diff --git a/.dockerignore b/.dockerignore
index 1c6bc1e124015..2c6db205037d4 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -17,6 +17,7 @@
 
 .git
 docker_cache
+docs/_build
 
 # IDE
 .vscode
@@ -49,7 +50,6 @@ python/dist
 python/*.egg-info
 python/*.egg
 python/*.pyc
-python/doc/_build
 __pycache__/
 */__pycache__/
 */*/__pycache__/
diff --git a/.gitignore b/.gitignore
index 79a2a8e13d424..6bb237af98ec7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
+apache-rat-*.jar
+arrow-src.tar
+arrow-src.tar.gz
+
 # Compiled source
 *.a
 *.dll
@@ -26,6 +30,8 @@
 .build_cache_dir
 dependency-reduced-pom.xml
 MANIFEST
+compile_commands.json
+build.ninja
 
 # Generated Visual Studio files
 *.vcxproj
@@ -33,8 +39,18 @@ MANIFEST
 *.sln
 *.iml
 
+# Linux perf sample data
+perf.data
+perf.data.old
+
 cpp/.idea/
+cpp/apidoc/xml/
+docs/example.gz
+docs/example1.dat
+docs/example3.dat
 python/.eggs/
+python/doc/
+
 .vscode
 .idea/
 .pytest_cache/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3066c5ed4e92b..4e0c7b265311a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,6 +21,14 @@
 # To run all hooks on all files use `pre-commit run -a`
 
 repos:
+  - repo: local
+    hooks:
+      - id: rat
+        name: rat
+        language: system
+        entry: bash -c "git archive HEAD --prefix=apache-arrow/ --output=arrow-src.tar && ./dev/release/run-rat.sh arrow-src.tar"
+        always_run: true
+        pass_filenames: false
   - repo: git://github.com/pre-commit/pre-commit-hooks
     sha: v1.2.3
     hooks:
diff --git a/.travis.yml b/.travis.yml
index b877e205b5bd0..02ce11de9b121 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,7 +16,7 @@
 # under the License.
 
 sudo: required
-dist: trusty
+dist: xenial
 
 services:
   - docker
@@ -38,7 +38,6 @@ before_install:
   - |
       if [ $TRAVIS_OS_NAME == "linux" ]; then
         sudo bash -c "echo -e 'Acquire::Retries 10; Acquire::http::Timeout \"20\";' > /etc/apt/apt.conf.d/99-travis-retry"
-        sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
         sudo apt-get update -qq
       fi
   - eval `python $TRAVIS_BUILD_DIR/ci/detect-changes.py`
@@ -46,12 +45,9 @@ before_install:
 
 matrix:
   fast_finish: true
-  allow_failures:
-  - jdk: oraclejdk9
-  - language: r
   include:
-  # Lint C++, Python, R
-  - os: linux
+  - name: "Lint C++, Python, R"
+    os: linux
     language: python
     python: "3.6"
     env:
@@ -62,70 +58,125 @@ matrix:
     - $TRAVIS_BUILD_DIR/ci/travis_install_clang_tools.sh
     script:
     - $TRAVIS_BUILD_DIR/ci/travis_lint.sh
-  # C++ & Python w/ gcc 4.9
-  - compiler: gcc
+  - name: "C++ unit tests w/ Valgrind, clang 6.0"
+    language: cpp
+    os: linux
+    env:
+    - ARROW_TRAVIS_VALGRIND=1
+    - ARROW_TRAVIS_USE_TOOLCHAIN=1
+    - ARROW_TRAVIS_PLASMA=1
+    - ARROW_TRAVIS_ORC=1
+    - ARROW_TRAVIS_PARQUET=1
+    - ARROW_TRAVIS_GANDIVA=1
+    - ARROW_TRAVIS_USE_SYSTEM_JAVA=1
+    - ARROW_BUILD_WARNING_LEVEL=CHECKIN
+    before_script:
+    - if [ $ARROW_CI_CPP_AFFECTED != "1" ]; then exit; fi
+    - export CC="clang-6.0"
+    - export CXX="clang++-6.0"
+    - $TRAVIS_BUILD_DIR/ci/travis_install_linux.sh
+    - $TRAVIS_BUILD_DIR/ci/travis_install_clang_tools.sh
+    # If either C++ or Python changed, we must install the C++ libraries
+    - git submodule update --init
+    - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
+    script:
+    - $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh || travis_terminate 1
+  # Separating Valgrind and C++ coverage makes individual jobs shorter
+  - name: "C++ unit tests w/ gcc 5.4, coverage"
+    compiler: gcc
     language: cpp
     os: linux
     jdk: openjdk8
     env:
     - ARROW_TRAVIS_USE_TOOLCHAIN=1
-    - ARROW_TRAVIS_VALGRIND=1
     - ARROW_TRAVIS_PLASMA=1
     - ARROW_TRAVIS_ORC=1
     - ARROW_TRAVIS_COVERAGE=1
     - ARROW_TRAVIS_PARQUET=1
-    - ARROW_TRAVIS_PYTHON_DOCS=1
+    - ARROW_TRAVIS_GANDIVA=1
+    - ARROW_TRAVIS_GANDIVA_JAVA=1
+    - ARROW_TRAVIS_USE_SYSTEM_JAVA=1
     - ARROW_BUILD_WARNING_LEVEL=CHECKIN
-    - ARROW_TRAVIS_PYTHON_JVM=1
-    - ARROW_TRAVIS_JAVA_BUILD_ONLY=1
-    - ARROW_TRAVIS_PYTHON_GANDIVA=1
-    # ARROW-2999 Benchmarks are disabled in Travis CI for the time being
-    # - ARROW_TRAVIS_PYTHON_BENCHMARKS=1
-    - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
     before_script:
-    # (ARROW_CI_CPP_AFFECTED implies ARROW_CI_PYTHON_AFFECTED)
-    - if [ $ARROW_CI_PYTHON_AFFECTED != "1" ]; then exit; fi
+    - if [ $ARROW_CI_CPP_AFFECTED != "1" ] && [ $ARROW_CI_JAVA_AFFECTED != "1" ]; then exit; fi
     - $TRAVIS_BUILD_DIR/ci/travis_install_linux.sh
     - $TRAVIS_BUILD_DIR/ci/travis_install_clang_tools.sh
     # If either C++ or Python changed, we must install the C++ libraries
     - git submodule update --init
     - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
     script:
-    # All test steps are required for accurate C++ coverage info
-    - $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh
-    # Build Arrow Java to test the pyarrow<->JVM in-process bridge
-    - $TRAVIS_BUILD_DIR/ci/travis_script_java.sh
-    # Only run Plasma tests with valgrind in one of the Python builds because
-    # they are slow
-    - export PLASMA_VALGRIND=0
-    - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 2.7
-    - export PLASMA_VALGRIND=1
-    - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 3.6
-    - $TRAVIS_BUILD_DIR/ci/travis_upload_cpp_coverage.sh
-  # Gandiva C++ w/ gcc 4.9 and Java
-  - compiler: gcc
+    - $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh || travis_terminate 1
+    - $TRAVIS_BUILD_DIR/ci/travis_script_gandiva_java.sh || travis_terminate 1
+    - $TRAVIS_BUILD_DIR/ci/travis_upload_cpp_coverage.sh || travis_terminate 1
+  - name: "C++ unit tests w/ gcc 4.8, trusty"
+    dist: trusty
+    compiler: gcc
     language: cpp
     os: linux
     jdk: openjdk8
     env:
-    - ARROW_TRAVIS_GANDIVA=1
     - ARROW_TRAVIS_USE_TOOLCHAIN=1
-    - ARROW_TRAVIS_VALGRIND=1
+    - ARROW_TRAVIS_PLASMA=1
+    - ARROW_TRAVIS_ORC=1
+    - ARROW_TRAVIS_PARQUET=1
+    - ARROW_TRAVIS_GANDIVA=1
+    - ARROW_TRAVIS_GANDIVA_JAVA=1
     - ARROW_BUILD_WARNING_LEVEL=CHECKIN
-    - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
+    before_install:
+    - ulimit -c unlimited -S
+    - |
+        if [ $TRAVIS_OS_NAME == "linux" ]; then
+          sudo bash -c "echo -e 'Acquire::Retries 10; Acquire::http::Timeout \"20\";' > /etc/apt/apt.conf.d/99-travis-retry"
+          sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
+          sudo apt-get update -qq
+        fi
+    - eval `python $TRAVIS_BUILD_DIR/ci/detect-changes.py`
     before_script:
-    # Run if something changed in CPP or Java.
     - if [ $ARROW_CI_CPP_AFFECTED != "1" ] && [ $ARROW_CI_JAVA_AFFECTED != "1" ]; then exit; fi
     - $TRAVIS_BUILD_DIR/ci/travis_install_linux.sh
     - $TRAVIS_BUILD_DIR/ci/travis_install_clang_tools.sh
-    - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh --only-library
+    # If either C++ or Python changed, we must install the C++ libraries
+    - git submodule update --init
+    - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
     script:
-    - $TRAVIS_BUILD_DIR/ci/travis_script_gandiva_cpp.sh
-    - $TRAVIS_BUILD_DIR/ci/travis_script_gandiva_java.sh
-  # [OS X] C++ & Python w/ XCode 6.4
-  - compiler: clang
+    - $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh || travis_terminate 1
+    - $TRAVIS_BUILD_DIR/ci/travis_script_gandiva_java.sh || travis_terminate 1
+  - name: "Python 2.7 and 3.6 unit tests w/ Valgrind, gcc 5.4, coverage"
+    compiler: gcc
     language: cpp
-    osx_image: xcode6.4
+    os: linux
+    jdk: openjdk8
+    env:
+    # Valgrind is needed for the Plasma store tests
+    - ARROW_TRAVIS_VALGRIND=1
+    - ARROW_TRAVIS_USE_TOOLCHAIN=1
+    - ARROW_TRAVIS_COVERAGE=1
+    - ARROW_TRAVIS_PYTHON_DOCS=1
+    - ARROW_TRAVIS_PYTHON_JVM=1
+    - ARROW_TRAVIS_OPTIONAL_INSTALL=1
+    - ARROW_BUILD_WARNING_LEVEL=CHECKIN
+    - ARROW_TRAVIS_USE_SYSTEM_JAVA=1
+    # TODO(wesm): Run the benchmarks outside of Travis
+    # - ARROW_TRAVIS_PYTHON_BENCHMARKS=1
+    before_script:
+    - if [ $ARROW_CI_PYTHON_AFFECTED != "1" ] && [ $ARROW_CI_DOCS_AFFECTED != "1" ]; then exit; fi
+    - $TRAVIS_BUILD_DIR/ci/travis_install_linux.sh
+    - $TRAVIS_BUILD_DIR/ci/travis_install_clang_tools.sh
+    - $TRAVIS_BUILD_DIR/ci/travis_install_toolchain.sh
+    script:
+    - $TRAVIS_BUILD_DIR/ci/travis_script_java.sh || travis_terminate 1
+    - ARROW_TRAVIS_PYTHON_GANDIVA=1
+    # Only run Plasma tests with valgrind in one of the Python builds because
+    # they are slow
+    - export PLASMA_VALGRIND=0
+    - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 2.7 || travis_terminate 1
+    - export PLASMA_VALGRIND=1
+    - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 3.6 || travis_terminate 1
+    - $TRAVIS_BUILD_DIR/ci/travis_upload_cpp_coverage.sh
+  - name: "[OS X] C++ w/ XCode 8.3"
+    compiler: clang
+    language: cpp
+    osx_image: xcode8.3
     os: osx
     cache:
     addons:
@@ -134,43 +185,51 @@ matrix:
     - ARROW_TRAVIS_PLASMA=1
     - ARROW_TRAVIS_ORC=1
     - ARROW_TRAVIS_PARQUET=1
+    - ARROW_TRAVIS_GANDIVA=1
+    - ARROW_TRAVIS_GANDIVA_JAVA=1
+    - ARROW_TRAVIS_OPTIONAL_INSTALL=1
+    - ARROW_TRAVIS_VERBOSE=0
     - ARROW_BUILD_WARNING_LEVEL=CHECKIN
+    # ARROW-3803: The Xcode 8.3 image has Boost libraries in /usr/local/lib
+    # which can get loaded before the toolchain Boost libraries. These seem to
+    # get loaded even though we are modifying LD_LIBRARY_PATH. We build our own
+    # Boost and statically link to get around the issue until this can be
+    # investigated further
+    - ARROW_TRAVIS_VENDORED_BOOST=1
     before_script:
-    - if [ $ARROW_CI_PYTHON_AFFECTED != "1" ]; then exit; fi
+    - if [ $ARROW_CI_CPP_AFFECTED != "1" ] && [ $ARROW_CI_JAVA_AFFECTED != "1" ]; then exit; fi
     # If either C++ or Python changed, we must install the C++ libraries
     - git submodule update --init
     - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
     script:
-    - if [ $ARROW_CI_CPP_AFFECTED == "1" ]; then $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh; fi
-    - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 2.7
-    - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 3.6
-  # [OS X] Gandiva C++ w/ XCode 8.3 & Java
-  - compiler: clang
+    - $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh || travis_terminate 1
+    - $TRAVIS_BUILD_DIR/ci/travis_script_gandiva_java.sh
+  - name: "[OS X] Python w/ XCode 7.3"
+    compiler: clang
     language: cpp
-    # xcode 7.3 has a bug in strptime.
-    osx_image: xcode8.3
+    osx_image: xcode7.3
     os: osx
     cache:
     addons:
     env:
-    - ARROW_TRAVIS_GANDIVA=1
     - ARROW_TRAVIS_USE_TOOLCHAIN=1
     - ARROW_BUILD_WARNING_LEVEL=CHECKIN
+    - ARROW_TRAVIS_OPTIONAL_INSTALL=1
+    - MACOSX_DEPLOYMENT_TARGET="10.9"
     before_script:
-    # Run if something changed in CPP or Java.
-    - if [ $ARROW_CI_CPP_AFFECTED != "1" ] && [ $ARROW_CI_JAVA_AFFECTED != "1" ]; then exit; fi
-    - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh --only-library
     script:
-    - $TRAVIS_BUILD_DIR/ci/travis_script_gandiva_cpp.sh
-    - $TRAVIS_BUILD_DIR/ci/travis_script_gandiva_java.sh
-  # [manylinux1] Python
-  - language: cpp
+    - if [ $ARROW_CI_PYTHON_AFFECTED != "1" ]; then exit; fi
+    - $TRAVIS_BUILD_DIR/ci/travis_install_toolchain.sh || travis_terminate 1
+    - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 2.7 || travis_terminate 1
+    - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 3.6
+  - name: "[manylinux1] Python"
+    language: cpp
     before_script:
     - if [ $ARROW_CI_PYTHON_AFFECTED == "1" ]; then docker pull quay.io/xhochy/arrow_manylinux1_x86_64_base:latest; fi
     script:
     - if [ $ARROW_CI_PYTHON_AFFECTED == "1" ]; then $TRAVIS_BUILD_DIR/ci/travis_script_manylinux.sh; fi
-  # Java w/ OpenJDK 8
-  - language: java
+  - name: "Java w/ OpenJDK 8"
+    language: java
     os: linux
     jdk: openjdk8
     before_script:
@@ -179,51 +238,55 @@ matrix:
     script:
     - $TRAVIS_BUILD_DIR/ci/travis_script_java.sh
     - $TRAVIS_BUILD_DIR/ci/travis_script_javadoc.sh
-  # Java w/ Oracle JDK 9
-  - language: java
+  - name: "Java w/ OpenJDK 9"
+    language: java
     os: linux
-    jdk: oraclejdk9
+    jdk: openjdk9
     before_script:
     - if [ $ARROW_CI_JAVA_AFFECTED != "1" ]; then exit; fi
     script:
     - $TRAVIS_BUILD_DIR/ci/travis_script_java.sh
-    addons:
-      apt:
-        packages:
-          - oracle-java9-installer
-  # Integration w/ OpenJDK 8
-  - language: java
+  - name: "Java w/ OpenJDK 11"
+    language: java
+    os: linux
+    jdk: openjdk11
+    before_script:
+    - if [ $ARROW_CI_JAVA_AFFECTED != "1" ]; then exit; fi
+    script:
+    - $TRAVIS_BUILD_DIR/ci/travis_script_java.sh
+  - name: "Integration w/ OpenJDK 8"
+    language: java
     os: linux
     env: ARROW_TEST_GROUP=integration
     jdk: openjdk8
     env:
     - ARROW_TRAVIS_PLASMA=1
     - ARROW_TRAVIS_PLASMA_JAVA_CLIENT=1
-    - CC="clang-6.0"
-    - CXX="clang++-6.0"
     before_script:
     - if [ $ARROW_CI_INTEGRATION_AFFECTED != "1" ]; then exit; fi
+    - export CC="clang-6.0"
+    - export CXX="clang++-6.0"
     - $TRAVIS_BUILD_DIR/ci/travis_install_linux.sh
     - $TRAVIS_BUILD_DIR/ci/travis_install_clang_tools.sh
-    - nvm install 10.1
+    - nvm install 11.6
     - $TRAVIS_BUILD_DIR/ci/travis_before_script_js.sh
     - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
     script:
     - $TRAVIS_BUILD_DIR/ci/travis_script_integration.sh
     - $TRAVIS_BUILD_DIR/ci/travis_script_plasma_java_client.sh
-  # NodeJS
-  - language: node_js
+  - name: "NodeJS"
+    language: node_js
     os: linux
     node_js:
-    - '10.1'
+    - '11.6'
     before_script:
     - if [ $ARROW_CI_JS_AFFECTED != "1" ]; then exit; fi
     - $TRAVIS_BUILD_DIR/ci/travis_install_linux.sh
     - $TRAVIS_BUILD_DIR/ci/travis_before_script_js.sh
     script:
     - $TRAVIS_BUILD_DIR/ci/travis_script_js.sh
-  # C++ & GLib & Ruby w/ gcc 4.9
-  - compiler: gcc
+  - name: "C++ & GLib & Ruby w/ gcc 5.4"
+    compiler: gcc
     language: cpp
     os: linux
     env:
@@ -232,8 +295,6 @@ matrix:
     - ARROW_TRAVIS_USE_VENDORED_BOOST=1
     - ARROW_TRAVIS_PARQUET=1
     - ARROW_TRAVIS_PLASMA=1
-    - BUILD_TORCH_EXAMPLE=no
-    - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
     before_script:
     - if [ $ARROW_CI_RUBY_AFFECTED != "1" ]; then exit; fi
     - $TRAVIS_BUILD_DIR/ci/travis_install_linux.sh
@@ -244,8 +305,8 @@ matrix:
     script:
     - if [ $ARROW_CI_C_GLIB_AFFECTED = "1" ]; then $TRAVIS_BUILD_DIR/ci/travis_script_c_glib.sh; fi
     - $TRAVIS_BUILD_DIR/ci/travis_script_ruby.sh
-  # [OS X] C++ & GLib & Ruby w/ XCode 8.3 & homebrew
-  - compiler: clang
+  - name: "[OS X] C++ & GLib & Ruby w/ XCode 8.3 & homebrew"
+    compiler: clang
     osx_image: xcode8.3
     os: osx
     env:
@@ -255,7 +316,6 @@ matrix:
     - ARROW_TRAVIS_PLASMA=1
     cache:
     addons:
-    rvm: 2.2
     before_script:
     - if [ $ARROW_CI_RUBY_AFFECTED != "1" ]; then exit; fi
     - $TRAVIS_BUILD_DIR/ci/travis_install_osx.sh
@@ -265,8 +325,8 @@ matrix:
     script:
     - if [ $ARROW_CI_C_GLIB_AFFECTED = "1" ]; then $TRAVIS_BUILD_DIR/ci/travis_script_c_glib.sh; fi
     - $TRAVIS_BUILD_DIR/ci/travis_script_ruby.sh
-  # Rust
-  - language: rust
+  - name: Rust
+    language: rust
     cache: cargo
     addons:
       apt:
@@ -280,16 +340,15 @@ matrix:
     - if [ $ARROW_CI_RUST_AFFECTED != "1" ]; then exit; fi
     - $TRAVIS_BUILD_DIR/ci/travis_install_cargo.sh
     script:
-    - RUSTUP_TOOLCHAIN=stable $TRAVIS_BUILD_DIR/ci/travis_script_rust.sh || true
     - RUSTUP_TOOLCHAIN=nightly $TRAVIS_BUILD_DIR/ci/travis_script_rust.sh
     after_success:
     - pushd ${TRAVIS_BUILD_DIR}/rust
     # Run coverage for codecov.io
     - mkdir -p target/kcov
-    - RUST_BACKTRACE=1 RUSTUP_TOOLCHAIN=stable cargo coverage --verbose
+    - RUST_BACKTRACE=1 RUSTUP_TOOLCHAIN=nightly cargo coverage --verbose
     - bash <(curl -s https://codecov.io/bash) || echo "Codecov did not collect coverage reports"
-  # Go
-  - language: go
+  - name: Go
+    language: go
     go_import_path: github.com/apache/arrow
     os: linux
     go:
@@ -301,10 +360,13 @@ matrix:
     after_success:
     - pushd ${TRAVIS_BUILD_DIR}/go/arrow
     - bash <(curl -s https://codecov.io/bash) || echo "Codecov did not collect coverage reports"
-  # R
-  - language: r
+  - name: R
+    language: r
     cache: packages
     latex: false
+    dist: trusty
+    env:
+    - ARROW_TRAVIS_PARQUET=1
     before_install:
     # Have to copy-paste this here because of how R's build steps work
     - eval `python $TRAVIS_BUILD_DIR/ci/detect-changes.py`
@@ -317,7 +379,6 @@ matrix:
         fi
     - $TRAVIS_BUILD_DIR/ci/travis_install_linux.sh
     - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh --only-library
-    - $TRAVIS_BUILD_DIR/ci/travis_install_clang_tools.sh
     - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$TRAVIS_BUILD_DIR/cpp-install/lib
     - export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$TRAVIS_BUILD_DIR/cpp-install/lib/pkgconfig
     - pushd ${TRAVIS_BUILD_DIR}/r
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 853806cb0bcad..5cacdfdb219ad 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,610 @@
   under the License.
 -->
 
+# Apache Arrow 0.12.0 (16 January 2019)
+
+## Bug
+
+* ARROW-1847 - [Doc] Document the difference between RecordBatch and Table in an FAQ fashion
+* ARROW-1994 - [Python] Test against Pandas master
+* ARROW-2026 - [Python] Cast all timestamp resolutions to INT96 use\_deprecated\_int96\_timestamps=True
+* ARROW-2038 - [Python] Follow-up bug fixes for s3fs Parquet support
+* ARROW-2113 - [Python] Incomplete CLASSPATH with "hadoop" contained in it can fool the classpath setting HDFS logic
+* ARROW-2591 - [Python] Segmentation fault when writing empty ListType column to Parquet
+* ARROW-2592 - [Python] Error reading old Parquet file due to metadata backwards compatibility issue
+* ARROW-2708 - [C++] Internal GetValues function in arrow::compute should check for nullptr
+* ARROW-2970 - [Python] NumPyConverter::Visit for Binary/String/FixedSizeBinary can overflow
+* ARROW-3058 - [Python] Feather reads fail with unintuitive error when conversion from pandas yields ChunkedArray
+* ARROW-3186 - [GLib] mesonbuild failures in Travis CI
+* ARROW-3202 - [C++] Build does not succeed on Alpine Linux
+* ARROW-3225 - [C++/Python] Pandas object conversion of ListType<DateType> and ListType<TimeType>
+* ARROW-3324 - [Parquet] Free more internal resources when writing multiple row groups
+* ARROW-3343 - [Java] Java tests fail non-deterministically with memory leak from Flight tests 
+* ARROW-3405 - [Python] Document CSV reader
+* ARROW-3428 - [Python] from\_pandas gives incorrect results when converting floating point to bool
+* ARROW-3436 - [C++] Boost version required by Gandiva is too new for Ubuntu 14.04
+* ARROW-3437 - [Gandiva][C++] Configure static linking of libgcc, libstdc++ with LDFLAGS 
+* ARROW-3438 - [Packaging] Escaped bulletpoints in changelog
+* ARROW-3445 - [GLib] Parquet GLib doesn't link Arrow GLib
+* ARROW-3449 - [C++] Support CMake 3.2 for "out of the box" builds
+* ARROW-3466 - [Python] Crash when importing tensorflow and pyarrow
+* ARROW-3467 - Building against external double conversion is broken
+* ARROW-3470 - [C++] Row-wise conversion tutorial has fallen out of date
+* ARROW-3477 - [C++] Testsuite fails on 32 bit arch
+* ARROW-3480 - [Website] Install document for Ubuntu is broken
+* ARROW-3485 - [C++] Examples fail with Protobuf error
+* ARROW-3494 - [C++] re2 conda-forge package not working in toolchain
+* ARROW-3516 - [C++] Use unsigned type for difference of pointers in parallel\_memcpy
+* ARROW-3517 - [C++] MinGW 32bit build causes g++ segv
+* ARROW-3524 - [C++] Fix compiler warnings from ARROW-3409 on clang-6
+* ARROW-3527 - [R] Unused variables in R-package C++ code
+* ARROW-3528 - [R] Typo in R documentation
+* ARROW-3535 - [Python] pip install tensorflow install too new numpy in manylinux1 build
+* ARROW-3541 - [Rust] Update BufferBuilder to allow for new bit-packed BooleanArray
+* ARROW-3544 - [Gandiva] Populate function registry in multiple compilation units to mitigate long compile times in release mode
+* ARROW-3549 - [Rust] Replace i64 with usize for some bit utility functions
+* ARROW-3573 - [Rust] with\_bitset does not set valid bits correctly
+* ARROW-3580 - [Gandiva][C++] Build error with g++ 8.2.0
+* ARROW-3586 - [Python] Segmentation fault when converting empty table to pandas with categoricals
+* ARROW-3598 - [Plasma] plasma\_store\_server fails linking with GPU enabled
+* ARROW-3613 - [Go] Resize does not correctly update the length
+* ARROW-3614 - [R] Handle Type::TIMESTAMP from Arrow to R
+* ARROW-3658 - [Rust] validation of offsets buffer is incorrect for \`List<T>\`
+* ARROW-3670 - [C++] Use FindBacktrace to find execinfo.h support
+* ARROW-3687 - [Rust] Anything measuring array slots should be \`usize\`
+* ARROW-3698 - [C++] Segmentation fault when using a large table in Gandiva
+* ARROW-3700 - [C++] CSV parser should allow ignoring empty lines
+* ARROW-3703 - [Python] DataFrame.to\_parquet crashes if datetime column has time zones
+* ARROW-3707 - [C++] test failure with zstd 1.3.7
+* ARROW-3711 - [C++] Don't pass CXX\_FLAGS to C\_FLAGS
+* ARROW-3712 - [CI] License check regression (RAT failure)
+* ARROW-3715 - [C++] gflags\_ep fails to build with CMake 3.13
+* ARROW-3716 - [R] Missing cases for ChunkedArray conversion
+* ARROW-3728 - [Python] Merging Parquet Files - Pandas Meta in Schema Mismatch
+* ARROW-3734 - [C++] Linking static zstd library fails on Arch x86-64
+* ARROW-3740 - [C++] Calling ArrayBuilder::Resize with length smaller than current appended length results in invalid state
+* ARROW-3742 - Fix pyarrow.types & gandiva cython bindings
+* ARROW-3745 - [C++] CMake passes static libraries multiple times to linker
+* ARROW-3754 - [Packaging] Zstd configure error on linux package builds
+* ARROW-3756 - [CI/Docker/Java] Java tests are failing in docker-compose setup
+* ARROW-3762 - [C++] Parquet arrow::Table reads error when overflowing capacity of BinaryArray
+* ARROW-3765 - [Gandiva] Segfault when the validity bitmap has not been allocated
+* ARROW-3766 - [Python] pa.Table.from\_pandas doesn't use schema ordering
+* ARROW-3768 - [Python] set classpath to hdfs not hadoop executable
+* ARROW-3790 - [C++] Signed to unsigned integer cast yields incorrect results when type sizes are the same
+* ARROW-3792 - [Python] Segmentation fault when writing empty RecordBatches to Parquet
+* ARROW-3793 - [C++] TestScalarAppendUnsafe is not testing unsafe appends
+* ARROW-3797 - [Rust] BinaryArray::value\_offset incorrect in offset case
+* ARROW-3805 - [Gandiva] handle null validity bitmap in if-else expressions
+* ARROW-3831 - [C++] arrow::util::Codec::Decompress() doesn't return decompressed data size
+* ARROW-3835 - [C++] arrow::io::CompressedOutputStream::raw() impementation is missing
+* ARROW-3837 - [C++] gflags link errors on Windows
+* ARROW-3866 - [Python] Column metadata is not transferred to tables in pyarrow
+* ARROW-3874 - [Gandiva] Cannot build: LLVM not detected correctly
+* ARROW-3879 - [C++] cuda-test failure
+* ARROW-3888 - [C++] Compilation warnings with gcc 7.3.0
+* ARROW-3889 - [Python] creating schema with invalid paramaters causes segmanetation fault
+* ARROW-3890 - [Python] Creating Array with explicit string type fails on Python 2.7
+* ARROW-3894 - [Python] Error reading IPC file with no record batches
+* ARROW-3898 - parquet-arrow example has compilation errors
+* ARROW-3920 - Plasma reference counting not properly done in TensorFlow custom operator.
+* ARROW-3931 - Make possible to build regardless of LANG
+* ARROW-3936 - Add \_O\_NOINHERIT to the file open flags on Windows
+* ARROW-3937 - [Rust] Rust nightly build is failing
+* ARROW-3940 - [Python/Documentation] Add required packages to the development instruction
+* ARROW-3941 - [R] RecordBatchStreamReader$schema
+* ARROW-3942 - [R] Feather api fixes
+* ARROW-3953 - Compat with pandas 0.24 rename of MultiIndex labels -> codes
+* ARROW-3955 - [GLib] Add (transfer full) to free when no longer needed
+* ARROW-3957 - [Python] Better error message when user connects to HDFS cluster with wrong port
+* ARROW-3961 - [Python/Documentation] Fix wrong path in the pyarrow README
+* ARROW-3969 - [Rust] CI build broken because rustfmt not available on nightly toolchain
+* ARROW-3976 - [Ruby] Homebrew donation solicitation on CLI breaking CI builds
+* ARROW-3977 - [Gandiva] gandiva cpp tests not running in CI
+* ARROW-3979 - [Gandiva] fix all valgrind reported errors
+* ARROW-3980 - [C++] Fix CRTP use in json-simple.cc
+* ARROW-3989 - [Rust] CSV reader should handle case sensitivity for boolean values
+* ARROW-3996 - [C++] Insufficient description on build
+* ARROW-4008 - [C++] Integration test executable failure
+* ARROW-4011 - [Gandiva] Refer irhelpers.bc in build directory
+* ARROW-4019 - [C++] Fix coverity issues
+* ARROW-4033 - [C++] thirdparty/download\_dependencies.sh uses tools or options not available in older Linuxes
+* ARROW-4034 - [Ruby] Interface for FileOutputStream doesn't respect append=True
+* ARROW-4041 - [CI] Python 2.7 run uses Python 3.6
+* ARROW-4049 - [C++] Arrow never use glog even though glog is linked.
+* ARROW-4052 - [C++] Linker errors with glog and gflags
+* ARROW-4053 - [Python/Integration] HDFS Tests failing with I/O operation on closed file
+* ARROW-4055 - [Python] Fails to convert pytz.utc with versions 2018.3 and earlier
+* ARROW-4058 - [C++] arrow-io-hdfs-test fails when run against HDFS cluster from docker-compose
+* ARROW-4065 - [C++] arrowTargets.cmake is broken
+* ARROW-4066 - Instructions to create Sphinx documentation
+* ARROW-4070 - [C++] ARROW\_BOOST\_VENDORED doesn't work properly with ninja build
+* ARROW-4073 - [Python] Parquet test failures on AppVeyor
+* ARROW-4074 - [Python] test\_get\_library\_dirs\_win32 fails if libraries installed someplace different from conda or wheel packages
+* ARROW-4078 - [CI] Run Travis job where documentation is built when docs/ is changed
+* ARROW-4088 - [Python] Table.from\_batches() fails when passed a schema with metadata
+* ARROW-4089 - [Plasma] The tutorial is wrong regarding the parameter type of PlasmaClient.Create 
+* ARROW-4101 - [C++] Binary identity cast not implemented
+* ARROW-4106 - [Python] Tests fail to run because hypothesis update broke its API
+* ARROW-4109 - [Packaging] Missing glog dependency from arrow-cpp conda recipe
+* ARROW-4113 - [R] Version number patch broke build
+* ARROW-4114 - [C++][DOCUMENTATION] 
+* ARROW-4115 - [Gandiva] valgrind complains that boolean output data buffer has uninited data
+* ARROW-4118 - [Python] Error with "asv run"
+* ARROW-4125 - [Python] ASV benchmarks fail to run if Plasma extension is not built (e.g. on Windows)
+* ARROW-4126 - [Go] offset not used when accessing boolean array
+* ARROW-4128 - [C++][DOCUMENTATION] Update style guide to reflect some more exceptions
+* ARROW-4130 - [Go] offset not used when accessing binary array
+* ARROW-4134 - [Packaging] Properly setup timezone in docker tests to prevent ORC adapter's abort
+* ARROW-4135 - [Python] Can't reload a pandas dataframe containing a list of datetime.time 
+* ARROW-4138 - [Python] setuptools\_scm customization does not work for versions above 0.9.0 on Windows
+* ARROW-4147 - [JAVA] Reduce heap usage for variable width vectors
+* ARROW-4149 - [CI/C++] Parquet test misses ZSTD compression codec in CMake 3.2 nightly builds
+* ARROW-4157 - [C++] -Wdocumentation failures with clang 6.0 on Ubuntu 18.04
+* ARROW-4171 - [Rust] fix parquet crate release version
+* ARROW-4173 - JIRA library name is wrong in error message of dev/merge\_arrow\_pr.py
+* ARROW-4178 - [C++] Fix TSan and UBSan errors
+* ARROW-4179 - [Python] Tests crashing on all platforms in CI
+* ARROW-4185 - [Rust] Appveyor builds are broken
+* ARROW-4186 - [C++] BitmapWriters clobber the first byte when length=0
+* ARROW-4188 - [Rust] There should be a README in the top level rust directory
+* ARROW-4197 - [C++] Emscripten compiler fails building Arrow
+* ARROW-4200 - [C++] conda\_env\_\* files cannot be used to create a fresh conda environment on Windows
+* ARROW-4209 - [Gandiva] returning IR structs causes issues with windows
+* ARROW-4215 - [GLib] Fix typos in documentation
+* ARROW-4227 - [GLib] Field in composite data type returns wrong data type
+* ARROW-4237 - [Packaging] Fix CMAKE\_INSTALL\_LIBDIR in release verification script
+* ARROW-4238 - [Packaging] Fix RC version conflict between crossbow and rake
+* ARROW-4246 - [Plasma][Python] PlasmaClient.list doesn't work with CUDA enabled Plasma
+* ARROW-4256 - [Release] Update Windows verification script for 0.12 release
+* ARROW-4258 - [Python] Safe cast fails from numpy float64 array with nans to integer
+* ARROW-4260 - [Python] test\_serialize\_deserialize\_pandas is failing in multiple build entries
+
+## Improvement
+
+* ARROW-1423 - [C++] Create non-owned CudaContext from context handle provided by thirdparty user
+* ARROW-1688 - [Java] Fail build on checkstyle warnings
+* ARROW-1993 - [Python] Add function for determining implied Arrow schema from pandas.DataFrame
+* ARROW-2211 - [C++] Use simpler hash functions for integers
+* ARROW-2216 - [CI] CI descriptions and envars are misleading
+* ARROW-2475 - [Format] Confusing array length description
+* ARROW-2483 - [Rust] use bit-packing for boolean vectors
+* ARROW-2504 - [Website] Add ApacheCon NA link
+* ARROW-2624 - [Python] Random schema and data generator for Arrow conversion and Parquet testing
+* ARROW-2637 - [C++/Python] Build support and instructions for development on Alpine Linux
+* ARROW-2670 - [C++/Python] Add Ubuntu 18.04 / gcc7 as a nightly build
+* ARROW-2673 - [Python] Add documentation + docstring for ARROW-2661
+* ARROW-2684 - [Python] Various documentation improvements
+* ARROW-2759 - Export notification socket of Plasma
+* ARROW-2803 - [C++] Put hashing function into src/arrow/util
+* ARROW-2807 - [Python] Enable memory-mapping to be toggled in get\_reader when reading Parquet files
+* ARROW-2808 - [Python] Add unit tests for ProxyMemoryPool, enable new default MemoryPool to be constructed
+* ARROW-2919 - [C++] Improve error message when listing empty HDFS file
+* ARROW-2968 - [R] Multi-threaded conversion from Arrow table to R data.frame
+* ARROW-3038 - [Go] add support for StringArray
+* ARROW-3063 - [Go] move list of supported/TODO features to confluence
+* ARROW-3070 - [Release] Host binary artifacts for RCs and releases on ASF Bintray account instead of dist/mirror system
+* ARROW-3131 - [Go] add test for Go-1.11
+* ARROW-3161 - [Packaging] Ensure to run pyarrow unit tests in conda and wheel builds
+* ARROW-3169 - [C++] Break array-test.cc and array.cc into multiple compilation units
+* ARROW-3199 - [Plasma] Check for EAGAIN in recvmsg and sendmsg
+* ARROW-3209 - [C++] Rename libarrow\_gpu to libarrow\_cuda
+* ARROW-3230 - [Python] Missing comparisons on ChunkedArray, Table
+* ARROW-3233 - [Python] Sphinx documentation for pyarrow.cuda GPU support
+* ARROW-3278 - [Python] Retrieve StructType's and StructArray's field by name
+* ARROW-3291 - [C++] Convenience API for constructing arrow::io::BufferReader from std::string
+* ARROW-3312 - [R] Use same .clang-format file for both R binding C++ code and main C++ codebase
+* ARROW-3318 - [C++] Convenience method for reading all batches from an IPC stream or file as arrow::Table
+* ARROW-3331 - [C++] Add re2 to ThirdpartyToolchain
+* ARROW-3353 - [Packaging] Build python 3.7 wheels
+* ARROW-3358 - [Gandiva][C++] Replace usages of gandiva/status.h with arrow/status.h
+* ARROW-3362 - [R] Guard against null buffers
+* ARROW-3366 - [R] Dockerfile for docker-compose setup
+* ARROW-3368 - [Integration/CI/Python] Add dask integration test to docker-compose setup
+* ARROW-3402 - [Gandiva][C++] Utilize common bitmap operation implementations in precompiled IR routines
+* ARROW-3409 - [C++] Add streaming compression interfaces
+* ARROW-3421 - [C++] Add include-what-you-use setup to primary docker-compose.yml
+* ARROW-3429 - [Packaging] Add a script to release binaries that use source archive at dist.apache.orgtable bit
+* ARROW-3430 - [Packaging] Add workaround to verify 0.11.0
+* ARROW-3431 - [GLib] Include Gemfile to archive
+* ARROW-3432 - [Packaging] Variables aren't expanded Subversion commit message
+* ARROW-3440 - [Gandiva][C++] Remove outdated cpp/src/gandiva/README.md, add build documentation to cpp/README.md
+* ARROW-3441 - [Gandiva][C++] Produce fewer test executables
+* ARROW-3442 - [C++] Use dynamic linking for unit tests, ensure coverage working properly with clang
+* ARROW-3451 - [Python] Allocate CUDA memory from a CUcontext created by numba.cuda
+* ARROW-3455 - [Gandiva][C++] Support pkg-config for Gandiva
+* ARROW-3456 - [CI] Reuse docker images and optimize docker-compose containers
+* ARROW-3460 - [Packaging] Add a script to rebase master on local release branch
+* ARROW-3461 - [Packaging] Add a script to upload RC artifacts as the official release
+* ARROW-3462 - [Packaging] Update CHANGELOG for 0.11.0
+* ARROW-3463 - [Website] Update for 0.11.0
+* ARROW-3465 - [Documentation] Fix gen\_apidocs' docker image
+* ARROW-3473 - [Format] Update Layout.md document to clarify use of 64-bit array lengths
+* ARROW-3474 - [GLib] Extend gparquet API with get\_schema and read\_column
+* ARROW-3479 - [R] Support to write record\_batch as stream
+* ARROW-3482 - [C++] Build with JEMALLOC by default
+* ARROW-3488 - [Packaging] Separate crossbow task definition files for packaging and tests
+* ARROW-3492 - [C++] Build jemalloc in parallel
+* ARROW-3493 - [Java] Document BOUNDS\_CHECKING\_ENABLED
+* ARROW-3506 - [Packaging] Nightly tests for docker-compose images
+* ARROW-3518 - [C++] Detect HOMEBREW\_PREFIX automatically
+* ARROW-3521 - [GLib] Run Python using find\_program in meson.build
+* ARROW-3530 - [Java/Python] Add conversion for pyarrow.Schema from org.apache…pojo.Schema
+* ARROW-3533 - [Python/Documentation] Use sphinx\_rtd\_theme instead of Bootstrap
+* ARROW-3539 - [CI/Packaging] Update scripts to build against vendored jemalloc
+* ARROW-3542 - [C++] Use unsafe appends when building array from CSV
+* ARROW-3545 - [C++/Python] Normalize child/field terminology with StructType
+* ARROW-3547 - [R] Protect against Null crash when reading from RecordBatch
+* ARROW-3548 - Speed up storing small objects in the object store.
+* ARROW-3551 - Change MapD to OmniSci on Powered By page
+* ARROW-3556 - [CI] Disable optimizations on Windows
+* ARROW-3557 - [Python] Set language\_level in Cython sources
+* ARROW-3558 - [Plasma] Remove fatal error when plasma client calls get on an unsealed object that it created.
+* ARROW-3559 - Statically link libraries for plasma\_store\_server executable.
+* ARROW-3562 - [R] Disallow creation of objects with null shared\_ptr<T>
+* ARROW-3563 - [C++] Declare public link dependencies so arrow\_static, plasma\_static automatically pull in transitive dependencies
+* ARROW-3566 - Clarify that the type of dictionary encoded field should be the encoded(index) type
+* ARROW-3574 - Fix remaining bug with plasma static versus shared libraries.
+* ARROW-3576 - [Python] Expose compressed file readers as NativeFile
+* ARROW-3577 - [Go] add support for ChunkedArray
+* ARROW-3581 - [Gandiva][C++] ARROW\_PROTOBUF\_USE\_SHARED isn't used
+* ARROW-3582 - [CI] Gandiva C++ build is always triggered
+* ARROW-3584 - [Go] add support for Table
+* ARROW-3587 - [Python] Efficient serialization for Arrow Objects (array, table, tensor, etc)
+* ARROW-3589 - [Gandiva] Make it possible to compile gandiva without JNI
+* ARROW-3591 - [R] Support to collect decimal type
+* ARROW-3600 - [Packaging] Support Ubuntu 18.10
+* ARROW-3601 - [Rust] Release 0.11.0
+* ARROW-3602 - [Gandiva] [Python] Add preliminary Cython bindings for Gandiva
+* ARROW-3603 - [Gandiva][C++] Can't build with vendored Boost
+* ARROW-3605 - Remove AE library from plasma header files.
+* ARROW-3607 - [Java] delete() method via JNI for plasma
+* ARROW-3611 - Give error more quickly when pyarrow serialization context is used incorrectly.
+* ARROW-3612 - [Go] implement RecordBatch and RecordBatchReader
+* ARROW-3615 - [R] Support for NaN
+* ARROW-3618 - [Packaging/Documentation] Add \`-c conda-forge\` option to avoid PackagesNotFoundError
+* ARROW-3620 - [Python] Document multithreading options in Sphinx and add to api.rst
+* ARROW-3621 - [Go] implement TableBatchReader
+* ARROW-3622 - [Go] implement Schema.Equal
+* ARROW-3623 - [Go] implement Field.Equal
+* ARROW-3624 - [Python/C++] Support for zero-sized device buffers
+* ARROW-3626 - [Go] add a CSV TableReader
+* ARROW-3629 - [Python] Add write\_to\_dataset to Python Sphinx API listing
+* ARROW-3632 - [Packaging] Update deb names in dev/tasks/tasks.yml in dev/release/00-prepare.sh
+* ARROW-3633 - [Packaging] Update deb names in dev/tasks/tasks.yml for 0.12.0
+* ARROW-3634 - [GLib] cuda.cpp compile error
+* ARROW-3636 - [C++/Python] Update arrow/python/pyarrow\_api.h
+* ARROW-3638 - [C++][Python] Move reading from Feather as Table feature to C++ from Python
+* ARROW-3639 - [Packaging] Run gandiva nightly packaging tasks
+* ARROW-3640 - [Go] add support for Tensors
+* ARROW-3641 - [C++/Python] remove public keyword from Cython api functions
+* ARROW-3642 - [C++] Add arrowConfig.cmake generation
+* ARROW-3645 - [Python] Document compression support in Sphinx
+* ARROW-3646 - [Python] Add convenience factories to create IO streams
+* ARROW-3647 - [R] Crash after unloading bit64 package
+* ARROW-3648 - [Plasma] Add API to get metadata and data at the same time
+* ARROW-3649 - [Rust] Refactor MutableBuffer's resize
+* ARROW-3656 - [C++] Allow whitespace in numeric CSV fields
+* ARROW-3657 - [R] Require bit64 package
+* ARROW-3659 - [C++] Clang Travis build (matrix entry 2) might not actually be using clang
+* ARROW-3661 - [Gandiva][GLib] Improve constant name
+* ARROW-3666 - [C++] Improve CSV parser performance
+* ARROW-3672 - [Go] implement Time32 array
+* ARROW-3673 - [Go] implement Time64 array
+* ARROW-3674 - [Go] implement Date32 array
+* ARROW-3675 - [Go] implement Date64 array
+* ARROW-3677 - [Go] implement FixedSizedBinary array
+* ARROW-3681 - [Go] add benchmarks for CSV reader
+* ARROW-3682 - [Go] unexport encoding/csv.Reader from CSV reader
+* ARROW-3683 - [Go] add functional-option style to CSV reader
+* ARROW-3684 - [Go] add chunk size option to CSV reader
+* ARROW-3693 - [R] Invalid buffer for empty characters with null data
+* ARROW-3694 - [Java] Avoid superfluous string creation when logging level is disabled 
+* ARROW-3695 - [Gandiva] Use add\_arrow\_lib()
+* ARROW-3696 - [C++] Add feather::TableWriter::Write(table)
+* ARROW-3697 - [Ruby] Add schema#[]
+* ARROW-3704 - [Gandiva] Can't build with g++ 8.2.0
+* ARROW-3708 - [Packaging] Nightly CentOS builds are failing
+* ARROW-3718 - [Gandiva] Remove spurious gtest include
+* ARROW-3719 - [GLib] Support read/write tabl to/from Feather
+* ARROW-3720 - [GLib] Use "indices" instead of "indexes"
+* ARROW-3721 - [Gandiva] [Python] Support all Gandiva literals
+* ARROW-3722 - [C++] Allow specifying column types to CSV reader
+* ARROW-3724 - [GLib] Update gitignore
+* ARROW-3725 - [GLib] Add field readers to GArrowStructDataType
+* ARROW-3727 - [Python] Document use of pyarrow.foreign\_buffer, cuda.foreign\_buffer in Sphinx
+* ARROW-3733 - [GLib] Add to\_string() to GArrowTable and GArrowColumn
+* ARROW-3736 - [CI/Docker] Ninja test in docker-compose run cpp hangs
+* ARROW-3743 - [Ruby] Add support for saving/loading Feather
+* ARROW-3744 - [Ruby] Use garrow\_table\_to\_string() in Arrow::Table#to\_s
+* ARROW-3746 - [Gandiva] [Python] Make it possible to list all functions registered with Gandiva
+* ARROW-3747 - [C++] Flip order of data members in arrow::Decimal128
+* ARROW-3748 - [GLib] Add GArrowCSVReader
+* ARROW-3749 - [GLib] Typos in documentation and test case name
+* ARROW-3751 - [Python] Add more cython bindings for gandiva
+* ARROW-3752 - [C++] Remove unused status::ArrowError
+* ARROW-3753 - [Gandiva] Remove debug print
+* ARROW-3773 - [C++] Remove duplicated AssertArraysEqual code in parquet/arrow/arrow-reader-writer-test.cc
+* ARROW-3778 - [C++] Don't put implementations in test-util.h
+* ARROW-3781 - [C++] Configure buffer size in arrow::io::BufferedOutputStream
+* ARROW-3784 - [R] Array with type fails with x is not a vector 
+* ARROW-3785 - [C++] Use double-conversion conda package in CI toolchain
+* ARROW-3787 - Implement From<ListArray> for BinaryArray
+* ARROW-3788 - [Ruby] Add support for CSV parser writtin in C++
+* ARROW-3795 - [R] Support for retrieving NAs from INT64 arrays
+* ARROW-3796 - [Rust] Add Example for PrimitiveArrayBuilder
+* ARROW-3800 - [C++] Vendor a string\_view backport
+* ARROW-3803 - [C++/Python] Split C++ and Python unit test Travis CI jobs, run all C++ tests (including Gandiva) together
+* ARROW-3819 - [Packaging] Update conda variant files to conform with feedstock after compiler migration
+* ARROW-3821 - [Format/Documentation]: Fix typos and grammar issues in Flight.proto comments
+* ARROW-3825 - [Python] The Python README.md does not show how to run the unit test suite
+* ARROW-3834 - [Doc] Merge Python & C++ and move to top-level
+* ARROW-3836 - [C++] Add PREFIX option to ADD\_ARROW\_BENCHMARK
+* ARROW-3839 - [Rust] Add ability to infer schema in CSV reader
+* ARROW-3841 - [C++] warning: catching polymorphic type by value
+* ARROW-3845 - [Gandiva] [GLib] Add GGandivaNode
+* ARROW-3847 - [GLib] Remove unnecessary “\”.
+* ARROW-3849 - Leverage Armv8 crc32 extension instructions to accelerate the hash computation for Arm64.
+* ARROW-3852 - [C++] used uninitialized warning
+* ARROW-3853 - [C++] Implement string to timestamp cast
+* ARROW-3854 - [GLib] Deprecate garrow\_gio\_{input,output}\_stream\_get\_raw()
+* ARROW-3855 - [Rust] Schema/Field/Datatype should implement serde traits
+* ARROW-3856 - [Ruby] Support compressed CSV save/load
+* ARROW-3858 - [GLib] Use {class\_name}\_get\_instance\_private
+* ARROW-3862 - [C++] Improve dependencies download script 
+* ARROW-3863 - [GLib] Use travis\_retry with brew bundle command
+* ARROW-3865 - [Packaging] Add double-conversion dependency to conda forge recipes and the windows wheel build
+* ARROW-3868 - [Rust] Build against nightly Rust in CI
+* ARROW-3870 - [C++] Add Peek to InputStream API
+* ARROW-3871 - [R] Replace usages of C++ GetValuesSafely with new methods on ArrayData
+* ARROW-3878 - [Rust] Improve primitive types 
+* ARROW-3880 - [Rust] PrimitiveArray<T> should support simple math operations
+* ARROW-3883 - [Rust] Update Rust README to reflect new functionality
+* ARROW-3884 - [Python] Add LLVM6 to manylinux1 base image
+* ARROW-3885 - [Rust] Update version to 0.12.0 and update release instructions on wiki
+* ARROW-3886 - [C++] Additional test cases for ARROW-3831
+* ARROW-3893 - [C++] Improve adaptive int builder performance
+* ARROW-3895 - [Rust] CSV reader should return Result<Option<>> not Option<Result<>>
+* ARROW-3905 - [Ruby] Add StructDataType#[]
+* ARROW-3906 - [C++] Break builder.cc into multiple compilation units
+* ARROW-3908 - [Rust] Update rust dockerfile to use nightly toolchain
+* ARROW-3910 - [Python] Set date\_as\_object to True in \*.to\_pandas as default after deduplicating logic implemented
+* ARROW-3911 - [Python] Deduplicate datetime.date objects in Table.to\_pandas internals
+* ARROW-3913 - [Gandiva] [GLib] Add GGandivaLiteralNode
+* ARROW-3914 - [C++/Python/Packaging] Docker-compose setup for Alpine linux
+* ARROW-3922 - [C++] improve the performance of bitmap operations
+* ARROW-3925 - [Python] Include autoconf in Linux/macOS dependencies in conda environment
+* ARROW-3928 - [Python] Add option to deduplicate PyBytes / PyString / PyUnicode objects in Table.to\_pandas conversion path
+* ARROW-3929 - [Go] improve memory usage of CSV reader to improve runtime performances
+* ARROW-3930 - [C++] Random test data generation is slow
+* ARROW-3932 - [Python/Documentation] Include Benchmarks.md in Sphinx docs
+* ARROW-3934 - [Gandiva] Don't compile precompiled tests if ARROW\_GANDIVA\_BUILD\_TESTS=off
+* ARROW-3950 - [Plasma] Don't force loading the TensorFlow op on import
+* ARROW-3952 - [Rust] Specify edition="2018" in Cargo.toml
+* ARROW-3958 - [Plasma] Reduce number of IPCs
+* ARROW-3960 - [Rust] remove extern crate for Rust 2018
+* ARROW-3963 - [Packaging/Docker] Nightly test for building sphinx documentations
+* ARROW-3964 - [Go] More readable example for csv.Reader
+* ARROW-3967 - [Gandiva] [C++] Make gandiva/node.h public
+* ARROW-3971 - [Python] Remove APIs deprecated in 0.11 and prior
+* ARROW-3974 - [C++] Combine field\_builders\_ and children\_ members in array/builder.h
+* ARROW-3982 - [C++] Allow "binary" input in simple JSON format
+* ARROW-3984 - [C++] Exit with error if user hits zstd ExternalProject path
+* ARROW-3986 - [C++] Write prose documentation
+* ARROW-3988 - [C++] Do not build unit tests by default in build system
+* ARROW-3994 - [C++] Remove ARROW\_GANDIVA\_BUILD\_TESTS option
+* ARROW-3995 - [CI] Use understandable names in Travis Matrix 
+* ARROW-3997 - [C++] [Doc] Clarify dictionary encoding integer signedness (and width?)
+* ARROW-4002 - [C++][Gandiva] Remove CMake version check
+* ARROW-4004 - [GLib] Replace GPU with CUDA
+* ARROW-4005 - [Plasma] [GLib] Add gplasma\_client\_disconnect()
+* ARROW-4006 - Add CODE\_OF\_CONDUCT.md
+* ARROW-4009 - [CI] Run Valgrind and C++ code coverage in different bulds
+* ARROW-4015 - [Plasma] remove legacy interfaces for plasma manager
+* ARROW-4017 - [C++] Check and update vendored libraries
+* ARROW-4026 - [C++] Use separate modular $COMPONENT-test targets for unit tests
+* ARROW-4029 - [C++] Define and document naming convention for internal / private header files not to be installed
+* ARROW-4030 - [CI] Use travis\_terminate to halt builds when a step fails
+* ARROW-4035 - [Ruby] Support msys2 mingw dependencies
+* ARROW-4037 - [Packaging] Remove workaround to verify 0.11.0
+* ARROW-4038 - [Rust] Add array\_ops methods for boolean AND, OR, NOT
+* ARROW-4042 - [Rust] Inconsistent method naming between BinaryArray and PrimitiveArray
+* ARROW-4048 - [GLib] Return ChunkedArray instead of Array in gparquet\_arrow\_file\_reader\_read\_column
+* ARROW-4051 - [Gandiva] [GLib] Add support for null literal
+* ARROW-4054 - [Python] Update gtest, flatbuffers and OpenSSL in manylinux1 base image
+* ARROW-4069 - [Python] Add tests for casting from binary to utf8
+* ARROW-4080 - [Rust] Improving lengthy build times in Appveyor
+* ARROW-4082 - [C++] CMake tweaks: allow RelWithDebInfo, improve FindClangTools
+* ARROW-4084 - [C++] Simplify Status and stringstream boilerplate
+* ARROW-4085 - [GLib] Use "field" for struct data type
+* ARROW-4087 - [C++] Make CSV nulls configurable
+* ARROW-4093 - [C++] Deprecated method suggests wrong method
+* ARROW-4098 - [Python] Deprecate pyarrow.open\_stream,open\_file in favor of pa.ipc.open\_stream/open\_file
+* ARROW-4102 - [C++] FixedSizeBinary identity cast not implemented
+* ARROW-4103 - [Documentation] Add README to docs/ root
+* ARROW-4105 - Add rust-toolchain to enforce user to use nightly toolchain for building
+* ARROW-4107 - [Python] Use ninja in pyarrow manylinux1 build
+* ARROW-4116 - [Python] Clarify in development.rst that virtualenv cannot be used with miniconda/Anaconda
+* ARROW-4122 - [C++] Initialize some uninitialized class members
+* ARROW-4127 - [Documentation] Add Docker build instructions
+* ARROW-4129 - [Python] Fix syntax problem in benchmark docs
+* ARROW-4152 - [GLib] Remove an example to show Torch integration
+* ARROW-4155 - [Rust] Implement array\_ops::sum() for PrimitiveArray<T>
+* ARROW-4158 - [Dev] Allow maintainers to use a GitHub API token when merging pull requests
+* ARROW-4160 - [Rust] Add README and executable files to parquet
+* ARROW-4168 - [GLib] Use property to keep GArrowDataType passed in garrow\_field\_new()
+* ARROW-4177 - [C++] Add ThreadPool and TaskGroup microbenchmarks
+* ARROW-4191 - [C++] Use same CC and AR for jemalloc as for the main sources
+* ARROW-4199 - [GLib] Add garrow\_seekable\_input\_stream\_peek()
+* ARROW-4207 - [Gandiva] [GLib] Add support for IfNode
+* ARROW-4211 - [GLib] Add GArrowFixedSizeBinaryDataType
+* ARROW-4216 - [Python] Add CUDA API docs
+* ARROW-4228 - [GLib] Add garrow\_list\_data\_type\_get\_field()
+* ARROW-4229 - [Packaging] Set crossbow target explicitly to enable building arbitrary arrow repo
+* ARROW-4233 - [Packaging] Create a Dockerfile to build source archive
+* ARROW-4240 - [Packaging] Documents for Plasma GLib and Gandiva GLib are missing in source archive
+* ARROW-4243 - [Python] Test failure with pandas 0.24.0rc1
+* ARROW-4249 - [Plasma] Remove reference to logging.h from plasma/common.h
+* ARROW-4257 - [Release] Update release verification script to check binaries on Bintray
+* ARROW-4269 - [Python] AttributeError: module 'pandas.core' has no attribute 'arrays'
+* ARROW-912 - [Python] Account for multiarch systems in development.rst
+
+## New Feature
+
+* ARROW-1019 - [C++] Implement input stream and output stream with Gzip codec
+* ARROW-1492 - [C++] Type casting function kernel suite
+* ARROW-1696 - [C++] Add codec benchmarks
+* ARROW-2712 - [C#] Initial C# .NET library
+* ARROW-3020 - [Python] Addition of option to allow empty Parquet row groups
+* ARROW-3108 - [C++] arrow::PrettyPrint for Table instances
+* ARROW-3126 - [Python] Make Buffered\* IO classes available to Python, incorporate into input\_stream, output\_stream factory functions
+* ARROW-3184 - [C++] Add modular build targets, "all" target, and require explicit target when invoking make or ninja
+* ARROW-3303 - [C++] Enable example arrays to be written with a simplified JSON representation
+* ARROW-3306 - [R] Objects and support functions different kinds of arrow::Buffer
+* ARROW-3307 - [R] Convert chunked arrow::Column to R vector
+* ARROW-3310 - [R] Create wrapper classes for various Arrow IO interfaces
+* ARROW-3340 - [R] support for dates and time classes
+* ARROW-3355 - [R] Support for factors
+* ARROW-3380 - [Python] Support reading CSV files and more from a gzipped file
+* ARROW-3381 - [C++] Implement InputStream for bz2 files
+* ARROW-3387 - [C++] Function to cast binary to string/utf8 with UTF8 validation
+* ARROW-3398 - [Rust] Update existing Builder to use MutableBuffer internally
+* ARROW-3407 - [C++] Add UTF8 conversion modes in CSV reader conversion options
+* ARROW-3439 - [R] R language bindings for Feather format
+* ARROW-3450 - [R] Wrap MemoryMappedFile class
+* ARROW-3490 - [R] streaming arrow objects to output streams
+* ARROW-3499 - [R] Expose arrow::ipc::Message type
+* ARROW-3504 - [Plasma] Add support for Plasma Client to put/get raw bytes without pyarrow serialization.
+* ARROW-3505 - [R] Read record batch and table
+* ARROW-3515 - Introduce NumericTensor class
+* ARROW-3529 - [Ruby] Import Red Parquet
+* ARROW-3536 - [C++] Fast UTF8 validation functions
+* ARROW-3537 - [Rust] Implement Tensor Type
+* ARROW-3540 - [Rust] Incorporate BooleanArray into PrimitiveArray
+* ARROW-3555 - [Plasma] Unify plasma client get function using metadata.
+* ARROW-3567 - [Gandiva] [GLib] Add GLib bindings of Gandiva
+* ARROW-3583 - [Python/Java] Create RecordBatch from VectorSchemaRoot
+* ARROW-3592 - [Python] Get BinaryArray value as zero copy memory view
+* ARROW-3608 - [R] Support for time32 and time64 array types
+* ARROW-3610 - [C++] Add interface to turn stl\_allocator into arrow::MemoryPool
+* ARROW-3630 - [Plasma] [GLib] Add GLib bindings of Plasma
+* ARROW-3660 - [C++] Don't unnecessarily lock MemoryMappedFile for resizing in readonly files
+* ARROW-3662 - [C++] Add a const overload to MemoryMappedFile::GetSize
+* ARROW-3692 - [Gandiva] [Ruby] Add Ruby bindings of Gandiva
+* ARROW-3723 - [Plasma] [Ruby] Add Ruby bindings of Plasma
+* ARROW-3726 - [Rust] CSV Reader & Writer
+* ARROW-3731 - [R] R API for reading and writing Parquet files
+* ARROW-3738 - [C++] Add CSV conversion option to parse ISO8601-like timestamp strings
+* ARROW-3741 - [R] Add support for arrow::compute::Cast to convert Arrow arrays from one type to another
+* ARROW-3755 - [GLib] Support for CompressedInputStream, CompressedOutputStream
+* ARROW-3760 - [R] Support Arrow CSV reader 
+* ARROW-3782 - [C++] Implement BufferedReader for C++
+* ARROW-3798 - [GLib] Add support for column type CSV read options
+* ARROW-3807 - [R] Missing Field API
+* ARROW-3823 - [R] + buffer.complex
+* ARROW-3830 - [GLib] Add GArrowCodec
+* ARROW-3842 - [R] RecordBatchStreamWriter api
+* ARROW-3864 - [GLib] Add support for allow-float-truncate cast option
+* ARROW-3900 - [GLib] Add garrow\_mutable\_buffer\_set\_data()
+* ARROW-3912 - [Plasma][GLib] Add support for creating and referring objects
+* ARROW-3916 - [Python] Support caller-provided filesystem in \`ParquetWriter\` constructor
+* ARROW-3924 - [Packaging][Plasma] Add support for Plasma deb/rpm packages
+* ARROW-3938 - [Packaging] Stop to refer java/pom.xml to get version information
+* ARROW-3945 - [Website] Blog post about Gandiva code donation
+* ARROW-3946 - [GLib] Add support for union
+* ARROW-3959 - [Rust] Time and Timestamp Support
+* ARROW-4028 - [Rust] Merge parquet-rs codebase
+* ARROW-4112 - [Packaging][Gandiva] Add support for deb packages
+* ARROW-4132 - [GLib] Add more GArrowTable constructors
+* ARROW-4141 - [Ruby] Add support for creating schema from raw Ruby objects
+* ARROW-4153 - [GLib] Add builder\_append\_value() for consistency
+* ARROW-4154 - [GLib] Add GArrowDecimal128DataType
+* ARROW-4161 - [GLib] Add GPlasmaClientOptions
+* ARROW-4162 - [Ruby] Add support for creating data types from description
+* ARROW-4166 - [Ruby] Add support for saving to and loading from buffer
+* ARROW-4174 - [Ruby] Add support for building composite array from raw Ruby objects
+* ARROW-4175 - [GLib] Add support for decimal compare operators
+* ARROW-4183 - [Ruby] Add Arrow::Struct as an element of Arrow::StructArray
+* ARROW-4184 - [Ruby] Add Arrow::RecordBatch#to\_table
+* ARROW-4214 - [Ruby] Add support for building RecordBatch from raw Ruby objects
+* ARROW-45 - [Python] Add unnest/flatten function for List types
+* ARROW-554 - [C++] Implement functions to conform unequal dictionaries amongst multiple Arrow arrays
+* ARROW-854 - [Format] Support sparse tensor
+
+## Sub-task
+
+* ARROW-3272 - [Java] Document checkstyle deviations from Google style guide
+* ARROW-3273 - [Java] checkstyle - fix javadoc style
+* ARROW-3323 - [Java] checkstyle - fix naming
+* ARROW-3347 - [Rust] Implement PrimitiveArrayBuilder
+* ARROW-3568 - [Packaging] Run pyarrow unittests for windows wheels
+* ARROW-3569 - [Packaging] Run pyarrow unittests when building conda package
+* ARROW-3588 - [Java] checkstyle - fix license
+* ARROW-3616 - [Java] checkstyle - fix remaining coding checks
+* ARROW-3664 - [Rust] Add benchmark for PrimitiveArrayBuilder
+* ARROW-3665 - [Rust] Implement StructArrayBuilder
+* ARROW-3713 - [Rust] Implement BinaryArrayBuilder
+* ARROW-3891 - [Java] Remove Long.bitCount with simple bitmap operations
+* ARROW-3939 - [Rust] Remove macro definition for ListArrayBuilder
+* ARROW-3948 - [CI][GLib] Set timeout to Homebrew
+* ARROW-4060 - [Rust] Add Parquet/Arrow schema converter
+* ARROW-4075 - [Rust] Reuse array builder after calling finish()
+* ARROW-4172 - [Rust] more consistent naming in array builders
+
+## Task
+
+* ARROW-2337 - [Scripts] Windows release verification script should use boost DSOs instead of static linkage
+* ARROW-2535 - [Python] Provide pre-commit hooks that check flake8
+* ARROW-2560 - [Rust] The Rust README should include Rust-specific information on contributing
+* ARROW-2653 - [C++] Refactor hash table support
+* ARROW-2720 - [C++] Clean up cmake CXX\_STANDARD and PIC flag setting
+* ARROW-3194 - [Java] Fix setValueCount in spitAndTransfer for variable width vectors
+* ARROW-3383 - [Java] Run Gandiva tests in Travis CI
+* ARROW-3384 - [Gandiva] Sync remaining commits from gandiva repo
+* ARROW-3385 - [Java] [Gandiva] Deploy gandiva snapshot jars automatically
+* ARROW-3427 - [C++] Add Windows support, Unix static libs for double-conversion package in conda-forge
+* ARROW-3469 - [Gandiva] add travis entry for gandiva on OSX
+* ARROW-3472 - [Gandiva] remove gandiva helpers library
+* ARROW-3487 - [Gandiva] simplify NULL\_IF\_NULL functions that can return errors
+* ARROW-3489 - [Gandiva] Support for in expressions
+* ARROW-3501 - [Gandiva] Enable building with gcc 4.8.x on Ubuntu Trusty, similar distros
+* ARROW-3519 - [Gandiva] Add support for functions that can return variable len output
+* ARROW-3597 - [Gandiva] gandiva should integrate with ADD\_ARROW\_TEST for tests
+* ARROW-3609 - [Gandiva] Move benchmark tests out of unit test
+* ARROW-3701 - [Gandiva] Add support for decimal operations
+* ARROW-3859 - [Java] Fix ComplexWriter backward incompatible change
+* ARROW-3860 - [Gandiva] [C++] Add option to use -static-libstdc++ when building libgandiva\_jni.so
+* ARROW-3867 - [Documentation] Uploading binary realase artifacts to Bintray
+* ARROW-3970 - [Gandiva][C++] Remove unnecessary boost dependencies
+* ARROW-3983 - [Gandiva][Crossbow] Use static boost while packaging
+* ARROW-3993 - [JS] CI Jobs Failing
+* ARROW-4039 - Update link to 'development.rst' page from Python README.md
+* ARROW-4043 - [Packaging/Docker] Python tests on alpine miss pytest dependency
+* ARROW-4044 - [Packaging/Python] Add hypothesis test dependency to pyarrow conda recipe
+* ARROW-4045 - [Packaging/Python] Add hypothesis test dependency to wheel crossbow tests
+* ARROW-4100 - [Gandiva][C++] Fix regex to ignore "." character
+* ARROW-4148 - [CI/Python] Disable ORC on nightly Alpine builds
+* ARROW-4151 - [Rust] Restructure project directories 
+* ARROW-4210 - [Python] Mention boost-cpp directly in the conda meta.yaml for pyarrow
+* ARROW-4239 - [Release] Updating .deb package names in the prepare script failed to run on OSX
+* ARROW-4241 - [Packaging] Disable crossbow conda OSX clang builds
+* ARROW-4266 - [Python][CI] Disable ORC tests in dask integration test
+* ARROW-4270 - [Packaging][Conda] Update xcode version and remove toolchain builds
+
+## Test
+
+* ARROW-4137 - [Rust] Move parquet code into a separate crate
+
+## Wish
+
+* ARROW-3248 - [C++] Arrow tests should have label "arrow"
+* ARROW-3260 - [CI] Make linting a separate job
+* ARROW-3844 - [C++] Remove ARROW\_USE\_SSE and ARROW\_SSE3
+* ARROW-3851 - [C++] "make check-format" is slow
+* ARROW-4079 - [C++] Add machine benchmarks
+* ARROW-4150 - [C++] Do not return buffers containing nullptr from internal allocations
+* ARROW-4156 - [C++] xcodebuild failure for cmake generated project
+
 # Apache Arrow 0.11.0 (08 October 2018)
 
 ## Bug
@@ -2620,3 +3224,4 @@
 * ARROW-260 - TestValueVector.testFixedVectorReallocation and testVariableVectorReallocation are flaky
 * ARROW-83 - Add basic test infrastructure for DecimalType
 
+
diff --git a/python/doc/Benchmarks.md b/CODE_OF_CONDUCT.md
similarity index 69%
rename from python/doc/Benchmarks.md
rename to CODE_OF_CONDUCT.md
index c84bf0dc1eb62..2efe740b77c50 100644
--- a/python/doc/Benchmarks.md
+++ b/CODE_OF_CONDUCT.md
@@ -16,14 +16,9 @@
   specific language governing permissions and limitations
   under the License.
 -->
-## Benchmark Requirements
 
-The benchmarks are run using [asv][1] which is also their only requirement.
+# Code of Conduct
 
-## Running the benchmarks
+* [Code of Conduct for The Apache Software Foundation][1]
 
-To run the benchmarks, call `asv run --python=same`. You cannot use the
-plain `asv run` command at the moment as asv cannot handle python packages
-in subdirectories of a repository.
-
-[1]: https://asv.readthedocs.org/
+[1]: https://www.apache.org/foundation/policies/conduct.html
\ No newline at end of file
diff --git a/LICENSE.txt b/LICENSE.txt
index 5c9aaddc14ff8..4bb80b93de459 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -681,7 +681,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 
 --------------------------------------------------------------------------------
-The file cpp/src/arrow/util/date.h has the following license (MIT)
+The files cpp/src/arrow/vendored/datetime/date.h, cpp/src/arrow/vendored/datetime/tz.h,
+cpp/src/arrow/vendored/datetime/tz_private.h, cpp/src/arrow/vendored/datetime/ios.h,
+cpp/src/arrow/vendored/datetime/tz.cpp are adapted from
+Howard Hinnant's date library (https://github.com/HowardHinnant/date)
+It is licensed under MIT license.
 
 The MIT License (MIT)
 Copyright (c) 2015, 2016, 2017 Howard Hinnant
@@ -736,7 +740,7 @@ SOFTWARE.
 
 --------------------------------------------------------------------------------
 
-The file cpp/src/util/string_view/string_view.hpp has the following license
+The file cpp/src/arrow/vendored/string_view.hpp has the following license
 
 Boost Software License - Version 1.0 - August 17th, 2003
 
@@ -764,7 +768,7 @@ DEALINGS IN THE SOFTWARE.
 
 --------------------------------------------------------------------------------
 
-The files in cpp/src/arrow/util/xxhash/ have the following license
+The files in cpp/src/arrow/vendored/xxhash/ have the following license
 (BSD 2-Clause License)
 
 xxHash Library
@@ -795,3 +799,36 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 You can contact the author at :
 - xxHash homepage: http://www.xxhash.com
 - xxHash source repository : https://github.com/Cyan4973/xxHash
+
+--------------------------------------------------------------------------------
+
+The files in dev/tasks/conda-recipes/variants have the following license
+
+BSD 3-clause license
+Copyright (c) 2015-2018, conda-forge
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors
+   may be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/appveyor.yml b/appveyor.yml
index 18ad9f5f56c5d..d955484ec8362 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -22,6 +22,7 @@ only_commits:
   # Skip commits not related to Python, C++ or Rust
   files:
     - appveyor.yml
+    - c_glib/
     - ci/
     - cpp/
     - format/
@@ -34,10 +35,19 @@ cache:
 
 matrix:
   fast_finish: true
+  allow_failures:
+    # Can't build with 32-bit MinGW for now.
+    # See https://issues.apache.org/jira/browse/ARROW-4297
+    - JOB: "MinGW32"
+      MINGW_PACKAGE_PREFIX: mingw-w64-i686
+      MINGW_PREFIX: c:\msys64\mingw32
+      MSYSTEM: MINGW32
+      USE_CLCACHE: false
 
 environment:
   global:
     USE_CLCACHE: true
+    ARROW_BUILD_GANDIVA: "OFF"
     PYTHON: "3.6"
     ARCH: "64"
 
@@ -60,13 +70,21 @@ environment:
       GENERATOR: Ninja
       CONFIGURATION: "Release"
       BUILD_SCRIPT: "CMake_Build_Script"
+    - JOB: "MinGW32"
+      MINGW_PACKAGE_PREFIX: mingw-w64-i686
+      MINGW_PREFIX: c:\msys64\mingw32
+      MSYSTEM: MINGW32
+      USE_CLCACHE: false
+    - JOB: "MinGW64"
+      MINGW_PACKAGE_PREFIX: mingw-w64-x86_64
+      MINGW_PREFIX: c:\msys64\mingw64
+      MSYSTEM: MINGW64
+      USE_CLCACHE: false
     - JOB: "Rust"
       TARGET: x86_64-pc-windows-msvc
       USE_CLCACHE: false
 
   MSVC_DEFAULT_OPTIONS: ON
-  BOOST_ROOT: C:\Libraries\boost_1_67_0
-  BOOST_LIBRARYDIR: C:\Libraries\boost_1_67_0\lib64-msvc-14.0
   APPVEYOR_SAVE_CACHE_ON_ERROR: true
 
 install:
diff --git a/c_glib/.gitignore b/c_glib/.gitignore
index cc7a19348af0c..18f952e0b3727 100644
--- a/c_glib/.gitignore
+++ b/c_glib/.gitignore
@@ -51,12 +51,12 @@ Makefile.in
 /libtool
 /m4/
 /stamp-h1
+/arrow-cuda-glib/*.pc
 /arrow-glib/enums.c
 /arrow-glib/enums.h
 /arrow-glib/stamp-*
 /arrow-glib/version.h
 /arrow-glib/*.pc
-/arrow-gpu-glib/*.pc
 /gandiva-glib/*.pc
 /parquet-glib/*.pc
 /plasma-glib/*.pc
diff --git a/c_glib/Dockerfile b/c_glib/Dockerfile
index 5d64a5f154f62..7abfa17a6b678 100644
--- a/c_glib/Dockerfile
+++ b/c_glib/Dockerfile
@@ -17,9 +17,7 @@
 
 FROM arrow:cpp
 
-ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get -q install --no-install-recommends -y \
-        tzdata \
         ruby-dev \
         pkg-config \
         autoconf-archive \
@@ -27,7 +25,7 @@ RUN apt-get -q install --no-install-recommends -y \
         libgirepository1.0-dev
 
 ADD c_glib/Gemfile /arrow/c_glib/
-RUN conda install -c conda-forge meson=0.47.1 && \
+RUN conda install meson=0.47.1 && \
     conda clean --all && \
     gem install bundler && \
     bundle install --gemfile arrow/c_glib/Gemfile
diff --git a/c_glib/Makefile.am b/c_glib/Makefile.am
index d21555e12bb2f..53bb57e411b0c 100644
--- a/c_glib/Makefile.am
+++ b/c_glib/Makefile.am
@@ -19,13 +19,12 @@ ACLOCAL_AMFLAGS = -I m4 ${ACLOCAL_FLAGS}
 
 SUBDIRS =					\
 	arrow-glib				\
-	arrow-gpu-glib				\
+	arrow-cuda-glib				\
 	gandiva-glib				\
 	parquet-glib				\
 	plasma-glib				\
 	doc					\
-	example					\
-	tool
+	example
 
 EXTRA_DIST =					\
 	Gemfile					\
diff --git a/c_glib/arrow-gpu-glib/Makefile.am b/c_glib/arrow-cuda-glib/Makefile.am
similarity index 64%
rename from c_glib/arrow-gpu-glib/Makefile.am
rename to c_glib/arrow-cuda-glib/Makefile.am
index a1249035a5a70..2e3848d2a0e2c 100644
--- a/c_glib/arrow-gpu-glib/Makefile.am
+++ b/c_glib/arrow-cuda-glib/Makefile.am
@@ -24,51 +24,51 @@ AM_CPPFLAGS =					\
 	-I$(top_builddir)			\
 	-I$(top_srcdir)
 
-if HAVE_ARROW_GPU
+if HAVE_ARROW_CUDA
 lib_LTLIBRARIES =				\
-	libarrow-gpu-glib.la
+	libarrow-cuda-glib.la
 
-libarrow_gpu_glib_la_CXXFLAGS =			\
+libarrow_cuda_glib_la_CXXFLAGS =		\
 	$(GLIB_CFLAGS)				\
 	$(ARROW_CFLAGS)				\
-	$(ARROW_GPU_CFLAGS)			\
+	$(ARROW_CUDA_CFLAGS)			\
 	$(GARROW_CXXFLAGS)
 
-libarrow_gpu_glib_la_LDFLAGS =			\
+libarrow_cuda_glib_la_LDFLAGS =			\
 	-version-info $(LT_VERSION_INFO)	\
 	-no-undefined
 
-libarrow_gpu_glib_la_LIBADD =			\
+libarrow_cuda_glib_la_LIBADD =			\
 	$(GLIB_LIBS)				\
 	$(ARROW_LIBS)				\
-	$(ARROW_GPU_LIBS)			\
+	$(ARROW_CUDA_LIBS)			\
 	../arrow-glib/libarrow-glib.la
 
-libarrow_gpu_glib_la_headers =			\
-	arrow-gpu-glib.h			\
+libarrow_cuda_glib_la_headers =			\
+	arrow-cuda-glib.h			\
 	cuda.h
 
-libarrow_gpu_glib_la_sources =			\
+libarrow_cuda_glib_la_sources =			\
 	cuda.cpp				\
-	$(libarrow_gpu_glib_la_headers)
+	$(libarrow_cuda_glib_la_headers)
 
-libarrow_gpu_glib_la_cpp_headers =		\
-	arrow-gpu-glib.hpp			\
+libarrow_cuda_glib_la_cpp_headers =		\
+	arrow-cuda-glib.hpp			\
 	cuda.hpp
 
-libarrow_gpu_glib_la_SOURCES =			\
-	$(libarrow_gpu_glib_la_sources)		\
-	$(libarrow_gpu_glib_la_cpp_headers)
+libarrow_cuda_glib_la_SOURCES =			\
+	$(libarrow_cuda_glib_la_sources)	\
+	$(libarrow_cuda_glib_la_cpp_headers)
 
-arrow_gpu_glib_includedir =			\
-	$(includedir)/arrow-gpu-glib
-arrow_gpu_glib_include_HEADERS =		\
-	$(libarrow_gpu_glib_la_headers)		\
-	$(libarrow_gpu_glib_la_cpp_headers)
+arrow_cuda_glib_includedir =			\
+	$(includedir)/arrow-cuda-glib
+arrow_cuda_glib_include_HEADERS =		\
+	$(libarrow_cuda_glib_la_headers)	\
+	$(libarrow_cuda_glib_la_cpp_headers)
 
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA =				\
-	arrow-gpu-glib.pc
+	arrow-cuda-glib.pc
 
 if HAVE_INTROSPECTION
 -include $(INTROSPECTION_MAKEFILE)
@@ -85,39 +85,39 @@ endif
 INTROSPECTION_COMPILER_ARGS =			\
 	--includedir=$(abs_builddir)/../arrow-glib
 
-ArrowGPU-1.0.gir: libarrow-gpu-glib.la
-ArrowGPU_1_0_gir_PACKAGES =			\
+ArrowCUDA-1.0.gir: libarrow-cuda-glib.la
+ArrowCUDA_1_0_gir_PACKAGES =			\
 	arrow-glib
-ArrowGPU_1_0_gir_EXPORT_PACKAGES =		\
-	arrow-gpu-glib
-ArrowGPU_1_0_gir_INCLUDES =			\
+ArrowCUDA_1_0_gir_EXPORT_PACKAGES =		\
+	arrow-cuda-glib
+ArrowCUDA_1_0_gir_INCLUDES =			\
 	Arrow-1.0
-ArrowGPU_1_0_gir_CFLAGS =			\
+ArrowCUDA_1_0_gir_CFLAGS =			\
 	$(AM_CPPFLAGS)
-ArrowGPU_1_0_gir_LIBS =
-ArrowGPU_1_0_gir_FILES =			\
-	$(libarrow_gpu_glib_la_sources)
-ArrowGPU_1_0_gir_SCANNERFLAGS =					\
+ArrowCUDA_1_0_gir_LIBS =
+ArrowCUDA_1_0_gir_FILES =			\
+	$(libarrow_cuda_glib_la_sources)
+ArrowCUDA_1_0_gir_SCANNERFLAGS =				\
 	--library-path=$(ARROW_LIB_DIR)				\
 	--warn-all						\
 	--add-include-path=$(abs_builddir)/../arrow-glib	\
-	--identifier-prefix=GArrowGPU				\
-	--symbol-prefix=garrow_gpu
+	--identifier-prefix=GArrowCUDA				\
+	--symbol-prefix=garrow_cuda
 if OS_MACOS
-ArrowGPU_1_0_gir_LIBS +=			\
+ArrowCUDA_1_0_gir_LIBS +=			\
 	 arrow-glib				\
-	 arrow-gpu-glib
-ArrowGPU_1_0_gir_SCANNERFLAGS +=				\
+	 arrow-cuda-glib
+ArrowCUDA_1_0_gir_SCANNERFLAGS +=				\
 	--no-libtool						\
 	--library-path=$(abs_builddir)/../arrow-glib/.libs	\
 	--library-path=$(abs_builddir)/.libs
 else
-ArrowGPU_1_0_gir_LIBS +=				\
+ArrowCUDA_1_0_gir_LIBS +=				\
 	$(abs_builddir)/../arrow-glib/libarrow-glib.la	\
-	libarrow-gpu-glib.la
+	libarrow-cuda-glib.la
 endif
 
-INTROSPECTION_GIRS += ArrowGPU-1.0.gir
+INTROSPECTION_GIRS += ArrowCUDA-1.0.gir
 
 girdir = $(datadir)/gir-1.0
 gir_DATA = $(INTROSPECTION_GIRS)
diff --git a/c_glib/arrow-gpu-glib/arrow-gpu-glib.h b/c_glib/arrow-cuda-glib/arrow-cuda-glib.h
similarity index 96%
rename from c_glib/arrow-gpu-glib/arrow-gpu-glib.h
rename to c_glib/arrow-cuda-glib/arrow-cuda-glib.h
index 1538c9a1865ac..b3c7f21087669 100644
--- a/c_glib/arrow-gpu-glib/arrow-gpu-glib.h
+++ b/c_glib/arrow-cuda-glib/arrow-cuda-glib.h
@@ -21,4 +21,4 @@
 
 #include <arrow-glib/arrow-glib.h>
 
-#include <arrow-gpu-glib/cuda.h>
+#include <arrow-cuda-glib/cuda.h>
diff --git a/c_glib/arrow-gpu-glib/arrow-gpu-glib.hpp b/c_glib/arrow-cuda-glib/arrow-cuda-glib.hpp
similarity index 95%
rename from c_glib/arrow-gpu-glib/arrow-gpu-glib.hpp
rename to c_glib/arrow-cuda-glib/arrow-cuda-glib.hpp
index 92017d8b67aab..e79b43ae07d15 100644
--- a/c_glib/arrow-gpu-glib/arrow-gpu-glib.hpp
+++ b/c_glib/arrow-cuda-glib/arrow-cuda-glib.hpp
@@ -21,4 +21,4 @@
 
 #include <arrow-glib/arrow-glib.hpp>
 
-#include <arrow-gpu-glib/cuda.hpp>
+#include <arrow-cuda-glib/cuda.hpp>
diff --git a/c_glib/arrow-gpu-glib/arrow-gpu-glib.pc.in b/c_glib/arrow-cuda-glib/arrow-cuda-glib.pc.in
similarity index 85%
rename from c_glib/arrow-gpu-glib/arrow-gpu-glib.pc.in
rename to c_glib/arrow-cuda-glib/arrow-cuda-glib.pc.in
index 38a6bae1a1298..de0ce974c7a13 100644
--- a/c_glib/arrow-gpu-glib/arrow-gpu-glib.pc.in
+++ b/c_glib/arrow-cuda-glib/arrow-cuda-glib.pc.in
@@ -20,9 +20,9 @@ exec_prefix=@exec_prefix@
 libdir=@libdir@
 includedir=@includedir@
 
-Name: Apache Arrow GPU GLib
-Description: C API for Apache Arrow GPU based on GLib
+Name: Apache Arrow CUDA GLib
+Description: C API for Apache Arrow CUDA based on GLib
 Version: @VERSION@
-Libs: -L${libdir} -larrow-gpu-glib
+Libs: -L${libdir} -larrow-cuda-glib
 Cflags: -I${includedir}
-Requires: arrow-glib
+Requires: arrow-glib arrow-cuda
diff --git a/c_glib/arrow-cuda-glib/cuda.cpp b/c_glib/arrow-cuda-glib/cuda.cpp
new file mode 100644
index 0000000000000..9679cc0ff7fd8
--- /dev/null
+++ b/c_glib/arrow-cuda-glib/cuda.cpp
@@ -0,0 +1,942 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#include <arrow-glib/buffer.hpp>
+#include <arrow-glib/error.hpp>
+#include <arrow-glib/input-stream.hpp>
+#include <arrow-glib/output-stream.hpp>
+#include <arrow-glib/readable.hpp>
+#include <arrow-glib/record-batch.hpp>
+#include <arrow-glib/schema.hpp>
+
+#include <arrow-cuda-glib/cuda.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: cuda
+ * @section_id: cuda-classes
+ * @title: CUDA related classes
+ * @include: arrow-cuda-glib/arrow-cuda-glib.h
+ *
+ * The following classes provide CUDA support for Apache Arrow data.
+ *
+ * #GArrowCUDADeviceManager is the starting point. You need at
+ * least one #GArrowCUDAContext to process Apache Arrow data on
+ * NVIDIA GPU.
+ *
+ * #GArrowCUDAContext is a class to keep context for one GPU. You
+ * need to create #GArrowCUDAContext for each GPU that you want to
+ * use. You can create #GArrowCUDAContext by
+ * garrow_cuda_device_manager_get_context().
+ *
+ * #GArrowCUDABuffer is a class for data on GPU. You can copy data
+ * on GPU to/from CPU by garrow_cuda_buffer_copy_to_host() and
+ * garrow_cuda_buffer_copy_from_host(). You can share data on GPU
+ * with other processes by garrow_cuda_buffer_export() and
+ * garrow_cuda_buffer_new_ipc().
+ *
+ * #GArrowCUDAHostBuffer is a class for data on CPU that is
+ * directly accessible from GPU.
+ *
+ * #GArrowCUDAIPCMemoryHandle is a class to share data on GPU with
+ * other processes. You can export your data on GPU to other processes
+ * by garrow_cuda_buffer_export() and
+ * garrow_cuda_ipc_memory_handle_new(). You can import other
+ * process data on GPU by garrow_cuda_ipc_memory_handle_new() and
+ * garrow_cuda_buffer_new_ipc().
+ *
+ * #GArrowCUDABufferInputStream is a class to read data in
+ * #GArrowCUDABuffer.
+ *
+ * #GArrowCUDABufferOutputStream is a class to write data into
+ * #GArrowCUDABuffer.
+ */
+
+G_DEFINE_TYPE(GArrowCUDADeviceManager,
+              garrow_cuda_device_manager,
+              G_TYPE_OBJECT)
+
+static void
+garrow_cuda_device_manager_init(GArrowCUDADeviceManager *object)
+{
+}
+
+static void
+garrow_cuda_device_manager_class_init(GArrowCUDADeviceManagerClass *klass)
+{
+}
+
+/**
+ * garrow_cuda_device_manager_new:
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: A newly created #GArrowCUDADeviceManager on success,
+ *   %NULL on error.
+ *
+ * Since: 0.8.0
+ */
+GArrowCUDADeviceManager *
+garrow_cuda_device_manager_new(GError **error)
+{
+  arrow::cuda::CudaDeviceManager *manager;
+  auto status = arrow::cuda::CudaDeviceManager::GetInstance(&manager);
+  if (garrow_error_check(error, status, "[cuda][device-manager][new]")) {
+    auto manager = g_object_new(GARROW_CUDA_TYPE_DEVICE_MANAGER,
+                                NULL);
+    return GARROW_CUDA_DEVICE_MANAGER(manager);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * garrow_cuda_device_manager_get_context:
+ * @manager: A #GArrowCUDADeviceManager.
+ * @gpu_number: A GPU device number for the target context.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): A newly created #GArrowCUDAContext on
+ *   success, %NULL on error. Contexts for the same GPU device number
+ *   share the same data internally.
+ *
+ * Since: 0.8.0
+ */
+GArrowCUDAContext *
+garrow_cuda_device_manager_get_context(GArrowCUDADeviceManager *manager,
+                                       gint gpu_number,
+                                       GError **error)
+{
+  arrow::cuda::CudaDeviceManager *arrow_manager;
+  arrow::cuda::CudaDeviceManager::GetInstance(&arrow_manager);
+  std::shared_ptr<arrow::cuda::CudaContext> context;
+  auto status = arrow_manager->GetContext(gpu_number, &context);
+  if (garrow_error_check(error, status,
+                         "[cuda][device-manager][get-context]]")) {
+    return garrow_cuda_context_new_raw(&context);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * garrow_cuda_device_manager_get_n_devices:
+ * @manager: A #GArrowCUDADeviceManager.
+ *
+ * Returns: The number of GPU devices.
+ *
+ * Since: 0.8.0
+ */
+gsize
+garrow_cuda_device_manager_get_n_devices(GArrowCUDADeviceManager *manager)
+{
+  arrow::cuda::CudaDeviceManager *arrow_manager;
+  arrow::cuda::CudaDeviceManager::GetInstance(&arrow_manager);
+  return arrow_manager->num_devices();
+}
+
+
+typedef struct GArrowCUDAContextPrivate_ {
+  std::shared_ptr<arrow::cuda::CudaContext> context;
+} GArrowCUDAContextPrivate;
+
+enum {
+  PROP_CONTEXT = 1
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowCUDAContext,
+                           garrow_cuda_context,
+                           G_TYPE_OBJECT)
+
+#define GARROW_CUDA_CONTEXT_GET_PRIVATE(object) \
+  static_cast<GArrowCUDAContextPrivate *>(      \
+    garrow_cuda_context_get_instance_private(   \
+      GARROW_CUDA_CONTEXT(object)))
+
+static void
+garrow_cuda_context_finalize(GObject *object)
+{
+  auto priv = GARROW_CUDA_CONTEXT_GET_PRIVATE(object);
+
+  priv->context = nullptr;
+
+  G_OBJECT_CLASS(garrow_cuda_context_parent_class)->finalize(object);
+}
+
+static void
+garrow_cuda_context_set_property(GObject *object,
+                                 guint prop_id,
+                                 const GValue *value,
+                                 GParamSpec *pspec)
+{
+  auto priv = GARROW_CUDA_CONTEXT_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_CONTEXT:
+    priv->context =
+      *static_cast<std::shared_ptr<arrow::cuda::CudaContext> *>(g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_cuda_context_get_property(GObject *object,
+                                 guint prop_id,
+                                 GValue *value,
+                                 GParamSpec *pspec)
+{
+  switch (prop_id) {
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_cuda_context_init(GArrowCUDAContext *object)
+{
+}
+
+static void
+garrow_cuda_context_class_init(GArrowCUDAContextClass *klass)
+{
+  GParamSpec *spec;
+
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize     = garrow_cuda_context_finalize;
+  gobject_class->set_property = garrow_cuda_context_set_property;
+  gobject_class->get_property = garrow_cuda_context_get_property;
+
+  /**
+   * GArrowCUDAContext:context:
+   *
+   * Since: 0.8.0
+   */
+  spec = g_param_spec_pointer("context",
+                              "Context",
+                              "The raw std::shared_ptr<arrow::cuda::CudaContext>",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_CONTEXT, spec);
+}
+
+/**
+ * garrow_cuda_context_get_allocated_size:
+ * @context: A #GArrowCUDAContext.
+ *
+ * Returns: The allocated memory by this context in bytes.
+ *
+ * Since: 0.8.0
+ */
+gint64
+garrow_cuda_context_get_allocated_size(GArrowCUDAContext *context)
+{
+  auto arrow_context = garrow_cuda_context_get_raw(context);
+  return arrow_context->bytes_allocated();
+}
+
+
+G_DEFINE_TYPE(GArrowCUDABuffer,
+              garrow_cuda_buffer,
+              GARROW_TYPE_BUFFER)
+
+static void
+garrow_cuda_buffer_init(GArrowCUDABuffer *object)
+{
+}
+
+static void
+garrow_cuda_buffer_class_init(GArrowCUDABufferClass *klass)
+{
+}
+
+/**
+ * garrow_cuda_buffer_new:
+ * @context: A #GArrowCUDAContext.
+ * @size: The number of bytes to be allocated on GPU device for this context.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): A newly created #GArrowCUDABuffer on
+ *   success, %NULL on error.
+ *
+ * Since: 0.8.0
+ */
+GArrowCUDABuffer *
+garrow_cuda_buffer_new(GArrowCUDAContext *context,
+                       gint64 size,
+                       GError **error)
+{
+  auto arrow_context = garrow_cuda_context_get_raw(context);
+  std::shared_ptr<arrow::cuda::CudaBuffer> arrow_buffer;
+  auto status = arrow_context->Allocate(size, &arrow_buffer);
+  if (garrow_error_check(error, status, "[cuda][buffer][new]")) {
+    return garrow_cuda_buffer_new_raw(&arrow_buffer);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * garrow_cuda_buffer_new_ipc:
+ * @context: A #GArrowCUDAContext.
+ * @handle: A #GArrowCUDAIPCMemoryHandle to be communicated.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): A newly created #GArrowCUDABuffer on
+ *   success, %NULL on error. The buffer has data from the IPC target.
+ *
+ * Since: 0.8.0
+ */
+GArrowCUDABuffer *
+garrow_cuda_buffer_new_ipc(GArrowCUDAContext *context,
+                           GArrowCUDAIPCMemoryHandle *handle,
+                           GError **error)
+{
+  auto arrow_context = garrow_cuda_context_get_raw(context);
+  auto arrow_handle = garrow_cuda_ipc_memory_handle_get_raw(handle);
+  std::shared_ptr<arrow::cuda::CudaBuffer> arrow_buffer;
+  auto status = arrow_context->OpenIpcBuffer(*arrow_handle, &arrow_buffer);
+  if (garrow_error_check(error, status,
+                         "[cuda][buffer][new-ipc]")) {
+    return garrow_cuda_buffer_new_raw(&arrow_buffer);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * garrow_cuda_buffer_new_record_batch:
+ * @context: A #GArrowCUDAContext.
+ * @record_batch: A #GArrowRecordBatch to be serialized.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): A newly created #GArrowCUDABuffer on
+ *   success, %NULL on error. The buffer has serialized record batch
+ *   data.
+ *
+ * Since: 0.8.0
+ */
+GArrowCUDABuffer *
+garrow_cuda_buffer_new_record_batch(GArrowCUDAContext *context,
+                                    GArrowRecordBatch *record_batch,
+                                    GError **error)
+{
+  auto arrow_context = garrow_cuda_context_get_raw(context);
+  auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
+  std::shared_ptr<arrow::cuda::CudaBuffer> arrow_buffer;
+  auto status = arrow::cuda::SerializeRecordBatch(*arrow_record_batch,
+                                                  arrow_context.get(),
+                                                  &arrow_buffer);
+  if (garrow_error_check(error, status,
+                         "[cuda][buffer][new-record-batch]")) {
+    return garrow_cuda_buffer_new_raw(&arrow_buffer);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * garrow_cuda_buffer_copy_to_host:
+ * @buffer: A #GArrowCUDABuffer.
+ * @position: The offset of memory on GPU device to be copied.
+ * @size: The size of memory on GPU device to be copied in bytes.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): A #GBytes that have copied memory on CPU
+ *   host on success, %NULL on error.
+ *
+ * Since: 0.8.0
+ */
+GBytes *
+garrow_cuda_buffer_copy_to_host(GArrowCUDABuffer *buffer,
+                                gint64 position,
+                                gint64 size,
+                                GError **error)
+{
+  auto arrow_buffer = garrow_cuda_buffer_get_raw(buffer);
+  auto data = static_cast<uint8_t *>(g_malloc(size));
+  auto status = arrow_buffer->CopyToHost(position, size, data);
+  if (garrow_error_check(error, status, "[cuda][buffer][copy-to-host]")) {
+    return g_bytes_new_take(data, size);
+  } else {
+    g_free(data);
+    return NULL;
+  }
+}
+
+/**
+ * garrow_cuda_buffer_copy_from_host:
+ * @buffer: A #GArrowCUDABuffer.
+ * @data: (array length=size): Data on CPU host to be copied.
+ * @size: The size of data on CPU host to be copied in bytes.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.8.0
+ */
+gboolean
+garrow_cuda_buffer_copy_from_host(GArrowCUDABuffer *buffer,
+                                  const guint8 *data,
+                                  gint64 size,
+                                  GError **error)
+{
+  auto arrow_buffer = garrow_cuda_buffer_get_raw(buffer);
+  auto status = arrow_buffer->CopyFromHost(0, data, size);
+  return garrow_error_check(error,
+                            status,
+                            "[cuda][buffer][copy-from-host]");
+}
+
+/**
+ * garrow_cuda_buffer_export:
+ * @buffer: A #GArrowCUDABuffer.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): A newly created
+ *   #GArrowCUDAIPCMemoryHandle to handle the exported buffer on
+ *   success, %NULL on error
+ *
+ * Since: 0.8.0
+ */
+GArrowCUDAIPCMemoryHandle *
+garrow_cuda_buffer_export(GArrowCUDABuffer *buffer, GError **error)
+{
+  auto arrow_buffer = garrow_cuda_buffer_get_raw(buffer);
+  std::shared_ptr<arrow::cuda::CudaIpcMemHandle> arrow_handle;
+  auto status = arrow_buffer->ExportForIpc(&arrow_handle);
+  if (garrow_error_check(error, status, "[cuda][buffer][export-for-ipc]")) {
+    return garrow_cuda_ipc_memory_handle_new_raw(&arrow_handle);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * garrow_cuda_buffer_get_context:
+ * @buffer: A #GArrowCUDABuffer.
+ *
+ * Returns: (transfer full): A newly created #GArrowCUDAContext for the
+ *   buffer. Contexts for the same buffer share the same data internally.
+ *
+ * Since: 0.8.0
+ */
+GArrowCUDAContext *
+garrow_cuda_buffer_get_context(GArrowCUDABuffer *buffer)
+{
+  auto arrow_buffer = garrow_cuda_buffer_get_raw(buffer);
+  auto arrow_context = arrow_buffer->context();
+  return garrow_cuda_context_new_raw(&arrow_context);
+}
+
+/**
+ * garrow_cuda_buffer_read_record_batch:
+ * @buffer: A #GArrowCUDABuffer.
+ * @schema: A #GArrowSchema for record batch.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): A newly created #GArrowRecordBatch on
+ *   success, %NULL on error. The record batch data is located on GPU.
+ *
+ * Since: 0.8.0
+ */
+GArrowRecordBatch *
+garrow_cuda_buffer_read_record_batch(GArrowCUDABuffer *buffer,
+                                     GArrowSchema *schema,
+                                     GError **error)
+{
+  auto arrow_buffer = garrow_cuda_buffer_get_raw(buffer);
+  auto arrow_schema = garrow_schema_get_raw(schema);
+  auto pool = arrow::default_memory_pool();
+  std::shared_ptr<arrow::RecordBatch> arrow_record_batch;
+  auto status = arrow::cuda::ReadRecordBatch(arrow_schema,
+                                             arrow_buffer,
+                                             pool,
+                                             &arrow_record_batch);
+  if (garrow_error_check(error, status,
+                         "[cuda][buffer][read-record-batch]")) {
+    return garrow_record_batch_new_raw(&arrow_record_batch);
+  } else {
+    return NULL;
+  }
+}
+
+
+G_DEFINE_TYPE(GArrowCUDAHostBuffer,
+              garrow_cuda_host_buffer,
+              GARROW_TYPE_MUTABLE_BUFFER)
+
+static void
+garrow_cuda_host_buffer_init(GArrowCUDAHostBuffer *object)
+{
+}
+
+static void
+garrow_cuda_host_buffer_class_init(GArrowCUDAHostBufferClass *klass)
+{
+}
+
+/**
+ * garrow_cuda_host_buffer_new:
+ * @gpu_number: A GPU device number for the target context.
+ * @size: The number of bytes to be allocated on CPU host.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: A newly created #GArrowCUDAHostBuffer on success,
+ *   %NULL on error. The allocated memory is accessible from GPU
+ *   device for the @context.
+ *
+ * Since: 0.8.0
+ */
+GArrowCUDAHostBuffer *
+garrow_cuda_host_buffer_new(gint gpu_number, gint64 size, GError **error)
+{
+  arrow::cuda::CudaDeviceManager *manager;
+  auto status = arrow::cuda::CudaDeviceManager::GetInstance(&manager);
+  std::shared_ptr<arrow::cuda::CudaHostBuffer> arrow_buffer;
+  status = manager->AllocateHost(gpu_number, size, &arrow_buffer);
+  if (garrow_error_check(error, status, "[cuda][host-buffer][new]")) {
+    return garrow_cuda_host_buffer_new_raw(&arrow_buffer);
+  } else {
+    return NULL;
+  }
+}
+
+
+typedef struct GArrowCUDAIPCMemoryHandlePrivate_ {
+  std::shared_ptr<arrow::cuda::CudaIpcMemHandle> ipc_memory_handle;
+} GArrowCUDAIPCMemoryHandlePrivate;
+
+enum {
+  PROP_IPC_MEMORY_HANDLE = 1
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowCUDAIPCMemoryHandle,
+                           garrow_cuda_ipc_memory_handle,
+                           G_TYPE_OBJECT)
+
+#define GARROW_CUDA_IPC_MEMORY_HANDLE_GET_PRIVATE(object)       \
+  static_cast<GArrowCUDAIPCMemoryHandlePrivate *>(              \
+    garrow_cuda_ipc_memory_handle_get_instance_private(         \
+      GARROW_CUDA_IPC_MEMORY_HANDLE(object)))
+
+static void
+garrow_cuda_ipc_memory_handle_finalize(GObject *object)
+{
+  auto priv = GARROW_CUDA_IPC_MEMORY_HANDLE_GET_PRIVATE(object);
+
+  priv->ipc_memory_handle = nullptr;
+
+  G_OBJECT_CLASS(garrow_cuda_ipc_memory_handle_parent_class)->finalize(object);
+}
+
+static void
+garrow_cuda_ipc_memory_handle_set_property(GObject *object,
+                                           guint prop_id,
+                                           const GValue *value,
+                                           GParamSpec *pspec)
+{
+  auto priv = GARROW_CUDA_IPC_MEMORY_HANDLE_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_IPC_MEMORY_HANDLE:
+    priv->ipc_memory_handle =
+      *static_cast<std::shared_ptr<arrow::cuda::CudaIpcMemHandle> *>(g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_cuda_ipc_memory_handle_get_property(GObject *object,
+                                           guint prop_id,
+                                           GValue *value,
+                                           GParamSpec *pspec)
+{
+  switch (prop_id) {
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_cuda_ipc_memory_handle_init(GArrowCUDAIPCMemoryHandle *object)
+{
+}
+
+static void
+garrow_cuda_ipc_memory_handle_class_init(GArrowCUDAIPCMemoryHandleClass *klass)
+{
+  GParamSpec *spec;
+
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize     = garrow_cuda_ipc_memory_handle_finalize;
+  gobject_class->set_property = garrow_cuda_ipc_memory_handle_set_property;
+  gobject_class->get_property = garrow_cuda_ipc_memory_handle_get_property;
+
+  /**
+   * GArrowCUDAIPCMemoryHandle:ipc-memory-handle:
+   *
+   * Since: 0.8.0
+   */
+  spec = g_param_spec_pointer("ipc-memory-handle",
+                              "IPC Memory Handle",
+                              "The raw std::shared_ptr<arrow::cuda::CudaIpcMemHandle>",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_IPC_MEMORY_HANDLE, spec);
+}
+
+/**
+ * garrow_cuda_ipc_memory_handle_new:
+ * @data: (array length=size): A serialized #GArrowCUDAIPCMemoryHandle.
+ * @size: The size of data.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): A newly created #GArrowCUDAIPCMemoryHandle
+ *   on success, %NULL on error.
+ *
+ * Since: 0.8.0
+ */
+GArrowCUDAIPCMemoryHandle *
+garrow_cuda_ipc_memory_handle_new(const guint8 *data,
+                                  gsize size,
+                                  GError **error)
+{
+  std::shared_ptr<arrow::cuda::CudaIpcMemHandle> arrow_handle;
+  auto status = arrow::cuda::CudaIpcMemHandle::FromBuffer(data, &arrow_handle);
+  if (garrow_error_check(error, status,
+                         "[cuda][ipc-memory-handle][new]")) {
+    return garrow_cuda_ipc_memory_handle_new_raw(&arrow_handle);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * garrow_cuda_ipc_memory_handle_serialize:
+ * @handle: A #GArrowCUDAIPCMemoryHandle.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): A newly created #GArrowBuffer on success,
+ *   %NULL on error. The buffer has serialized @handle. The serialized
+ *   @handle can be deserialized by garrow_cuda_ipc_memory_handle_new()
+ *   in other process.
+ *
+ * Since: 0.8.0
+ */
+GArrowBuffer *
+garrow_cuda_ipc_memory_handle_serialize(GArrowCUDAIPCMemoryHandle *handle,
+                                        GError **error)
+{
+  auto arrow_handle = garrow_cuda_ipc_memory_handle_get_raw(handle);
+  std::shared_ptr<arrow::Buffer> arrow_buffer;
+  auto status = arrow_handle->Serialize(arrow::default_memory_pool(),
+                                        &arrow_buffer);
+  if (garrow_error_check(error, status,
+                         "[cuda][ipc-memory-handle][serialize]")) {
+    return garrow_buffer_new_raw(&arrow_buffer);
+  } else {
+    return NULL;
+  }
+}
+
+GArrowBuffer *
+garrow_cuda_buffer_input_stream_new_raw_readable_interface(std::shared_ptr<arrow::Buffer> *arrow_buffer)
+{
+  auto buffer = GARROW_BUFFER(g_object_new(GARROW_CUDA_TYPE_BUFFER,
+                                           "buffer", arrow_buffer,
+                                           NULL));
+  return buffer;
+}
+
+static std::shared_ptr<arrow::io::Readable>
+garrow_cuda_buffer_input_stream_get_raw_readable_interface(GArrowReadable *readable)
+{
+  auto input_stream = GARROW_INPUT_STREAM(readable);
+  auto arrow_input_stream = garrow_input_stream_get_raw(input_stream);
+  return arrow_input_stream;
+}
+
+static void
+garrow_cuda_buffer_input_stream_readable_interface_init(GArrowReadableInterface *iface)
+{
+  iface->new_raw =
+    garrow_cuda_buffer_input_stream_new_raw_readable_interface;
+  iface->get_raw =
+    garrow_cuda_buffer_input_stream_get_raw_readable_interface;
+}
+
+G_DEFINE_TYPE_WITH_CODE(
+  GArrowCUDABufferInputStream,
+  garrow_cuda_buffer_input_stream,
+  GARROW_TYPE_BUFFER_INPUT_STREAM,
+  G_IMPLEMENT_INTERFACE(
+    GARROW_TYPE_READABLE,
+    garrow_cuda_buffer_input_stream_readable_interface_init))
+
+static void
+garrow_cuda_buffer_input_stream_init(GArrowCUDABufferInputStream *object)
+{
+}
+
+static void
+garrow_cuda_buffer_input_stream_class_init(GArrowCUDABufferInputStreamClass *klass)
+{
+}
+
+/**
+ * garrow_cuda_buffer_input_stream_new:
+ * @buffer: A #GArrowCUDABuffer.
+ *
+ * Returns: (transfer full): A newly created
+ *   #GArrowCUDABufferInputStream.
+ *
+ * Since: 0.8.0
+ */
+GArrowCUDABufferInputStream *
+garrow_cuda_buffer_input_stream_new(GArrowCUDABuffer *buffer)
+{
+  auto arrow_buffer = garrow_cuda_buffer_get_raw(buffer);
+  auto arrow_reader =
+    std::make_shared<arrow::cuda::CudaBufferReader>(arrow_buffer);
+  return garrow_cuda_buffer_input_stream_new_raw(&arrow_reader);
+}
+
+
+G_DEFINE_TYPE(GArrowCUDABufferOutputStream,
+              garrow_cuda_buffer_output_stream,
+              GARROW_TYPE_OUTPUT_STREAM)
+
+static void
+garrow_cuda_buffer_output_stream_init(GArrowCUDABufferOutputStream *object)
+{
+}
+
+static void
+garrow_cuda_buffer_output_stream_class_init(GArrowCUDABufferOutputStreamClass *klass)
+{
+}
+
+/**
+ * garrow_cuda_buffer_output_stream_new:
+ * @buffer: A #GArrowCUDABuffer.
+ *
+ * Returns: (transfer full): A newly created
+ *   #GArrowCUDABufferOutputStream.
+ *
+ * Since: 0.8.0
+ */
+GArrowCUDABufferOutputStream *
+garrow_cuda_buffer_output_stream_new(GArrowCUDABuffer *buffer)
+{
+  auto arrow_buffer = garrow_cuda_buffer_get_raw(buffer);
+  auto arrow_writer =
+    std::make_shared<arrow::cuda::CudaBufferWriter>(arrow_buffer);
+  return garrow_cuda_buffer_output_stream_new_raw(&arrow_writer);
+}
+
+/**
+ * garrow_cuda_buffer_output_stream_set_buffer_size:
+ * @stream: A #GArrowCUDABufferOutputStream.
+ * @size: A size of CPU buffer in bytes.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Sets CPU buffer size. to limit `cudaMemcpy()` calls. If CPU buffer
+ * size is `0`, buffering is disabled.
+ *
+ * The default is `0`.
+ *
+ * Since: 0.8.0
+ */
+gboolean
+garrow_cuda_buffer_output_stream_set_buffer_size(GArrowCUDABufferOutputStream *stream,
+                                                 gint64 size,
+                                                 GError **error)
+{
+  auto arrow_stream = garrow_cuda_buffer_output_stream_get_raw(stream);
+  auto status = arrow_stream->SetBufferSize(size);
+  return garrow_error_check(error,
+                            status,
+                            "[cuda][buffer-output-stream][set-buffer-size]");
+}
+
+/**
+ * garrow_cuda_buffer_output_stream_get_buffer_size:
+ * @stream: A #GArrowCUDABufferOutputStream.
+ *
+ * Returns: The CPU buffer size in bytes.
+ *
+ * See garrow_cuda_buffer_output_stream_set_buffer_size() for CPU
+ * buffer size details.
+ *
+ * Since: 0.8.0
+ */
+gint64
+garrow_cuda_buffer_output_stream_get_buffer_size(GArrowCUDABufferOutputStream *stream)
+{
+  auto arrow_stream = garrow_cuda_buffer_output_stream_get_raw(stream);
+  return arrow_stream->buffer_size();
+}
+
+/**
+ * garrow_cuda_buffer_output_stream_get_buffered_size:
+ * @stream: A #GArrowCUDABufferOutputStream.
+ *
+ * Returns: The size of buffered data in bytes.
+ *
+ * Since: 0.8.0
+ */
+gint64
+garrow_cuda_buffer_output_stream_get_buffered_size(GArrowCUDABufferOutputStream *stream)
+{
+  auto arrow_stream = garrow_cuda_buffer_output_stream_get_raw(stream);
+  return arrow_stream->num_bytes_buffered();
+}
+
+
+G_END_DECLS
+
+GArrowCUDAContext *
+garrow_cuda_context_new_raw(std::shared_ptr<arrow::cuda::CudaContext> *arrow_context)
+{
+  return GARROW_CUDA_CONTEXT(g_object_new(GARROW_CUDA_TYPE_CONTEXT,
+                                          "context", arrow_context,
+                                          NULL));
+}
+
+std::shared_ptr<arrow::cuda::CudaContext>
+garrow_cuda_context_get_raw(GArrowCUDAContext *context)
+{
+  if (!context)
+    return nullptr;
+
+  auto priv = GARROW_CUDA_CONTEXT_GET_PRIVATE(context);
+  return priv->context;
+}
+
+GArrowCUDAIPCMemoryHandle *
+garrow_cuda_ipc_memory_handle_new_raw(std::shared_ptr<arrow::cuda::CudaIpcMemHandle> *arrow_handle)
+{
+  auto handle = g_object_new(GARROW_CUDA_TYPE_IPC_MEMORY_HANDLE,
+                             "ipc-memory-handle", arrow_handle,
+                             NULL);
+  return GARROW_CUDA_IPC_MEMORY_HANDLE(handle);
+}
+
+std::shared_ptr<arrow::cuda::CudaIpcMemHandle>
+garrow_cuda_ipc_memory_handle_get_raw(GArrowCUDAIPCMemoryHandle *handle)
+{
+  if (!handle)
+    return nullptr;
+
+  auto priv = GARROW_CUDA_IPC_MEMORY_HANDLE_GET_PRIVATE(handle);
+  return priv->ipc_memory_handle;
+}
+
+GArrowCUDABuffer *
+garrow_cuda_buffer_new_raw(std::shared_ptr<arrow::cuda::CudaBuffer> *arrow_buffer)
+{
+  return GARROW_CUDA_BUFFER(g_object_new(GARROW_CUDA_TYPE_BUFFER,
+                                         "buffer", arrow_buffer,
+                                         NULL));
+}
+
+std::shared_ptr<arrow::cuda::CudaBuffer>
+garrow_cuda_buffer_get_raw(GArrowCUDABuffer *buffer)
+{
+  if (!buffer)
+    return nullptr;
+
+  auto arrow_buffer = garrow_buffer_get_raw(GARROW_BUFFER(buffer));
+  return std::static_pointer_cast<arrow::cuda::CudaBuffer>(arrow_buffer);
+}
+
+GArrowCUDAHostBuffer *
+garrow_cuda_host_buffer_new_raw(std::shared_ptr<arrow::cuda::CudaHostBuffer> *arrow_buffer)
+{
+  auto buffer = g_object_new(GARROW_CUDA_TYPE_HOST_BUFFER,
+                             "buffer", arrow_buffer,
+                             NULL);
+  return GARROW_CUDA_HOST_BUFFER(buffer);
+}
+
+std::shared_ptr<arrow::cuda::CudaHostBuffer>
+garrow_cuda_host_buffer_get_raw(GArrowCUDAHostBuffer *buffer)
+{
+  if (!buffer)
+    return nullptr;
+
+  auto arrow_buffer = garrow_buffer_get_raw(GARROW_BUFFER(buffer));
+  return std::static_pointer_cast<arrow::cuda::CudaHostBuffer>(arrow_buffer);
+}
+
+GArrowCUDABufferInputStream *
+garrow_cuda_buffer_input_stream_new_raw(std::shared_ptr<arrow::cuda::CudaBufferReader> *arrow_reader)
+{
+  auto input_stream = g_object_new(GARROW_CUDA_TYPE_BUFFER_INPUT_STREAM,
+                                   "input-stream", arrow_reader,
+                                   NULL);
+  return GARROW_CUDA_BUFFER_INPUT_STREAM(input_stream);
+}
+
+std::shared_ptr<arrow::cuda::CudaBufferReader>
+garrow_cuda_buffer_input_stream_get_raw(GArrowCUDABufferInputStream *input_stream)
+{
+  if (!input_stream)
+    return nullptr;
+
+  auto arrow_reader =
+    garrow_input_stream_get_raw(GARROW_INPUT_STREAM(input_stream));
+  return std::static_pointer_cast<arrow::cuda::CudaBufferReader>(arrow_reader);
+}
+
+GArrowCUDABufferOutputStream *
+garrow_cuda_buffer_output_stream_new_raw(std::shared_ptr<arrow::cuda::CudaBufferWriter> *arrow_writer)
+{
+  auto output_stream = g_object_new(GARROW_CUDA_TYPE_BUFFER_OUTPUT_STREAM,
+                                    "output-stream", arrow_writer,
+                                    NULL);
+  return GARROW_CUDA_BUFFER_OUTPUT_STREAM(output_stream);
+}
+
+std::shared_ptr<arrow::cuda::CudaBufferWriter>
+garrow_cuda_buffer_output_stream_get_raw(GArrowCUDABufferOutputStream *output_stream)
+{
+  if (!output_stream)
+    return nullptr;
+
+  auto arrow_writer =
+    garrow_output_stream_get_raw(GARROW_OUTPUT_STREAM(output_stream));
+  return std::static_pointer_cast<arrow::cuda::CudaBufferWriter>(arrow_writer);
+}
diff --git a/c_glib/arrow-cuda-glib/cuda.h b/c_glib/arrow-cuda-glib/cuda.h
new file mode 100644
index 0000000000000..6cdef99221fe2
--- /dev/null
+++ b/c_glib/arrow-cuda-glib/cuda.h
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-glib/arrow-glib.h>
+
+G_BEGIN_DECLS
+
+#define GARROW_CUDA_TYPE_DEVICE_MANAGER (garrow_cuda_device_manager_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowCUDADeviceManager,
+                         garrow_cuda_device_manager,
+                         GARROW_CUDA,
+                         DEVICE_MANAGER,
+                         GObject)
+struct _GArrowCUDADeviceManagerClass
+{
+  GObjectClass parent_class;
+};
+
+#define GARROW_CUDA_TYPE_CONTEXT (garrow_cuda_context_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowCUDAContext,
+                         garrow_cuda_context,
+                         GARROW_CUDA,
+                         CONTEXT,
+                         GObject)
+struct _GArrowCUDAContextClass
+{
+  GObjectClass parent_class;
+};
+
+#define GARROW_CUDA_TYPE_BUFFER (garrow_cuda_buffer_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowCUDABuffer,
+                         garrow_cuda_buffer,
+                         GARROW_CUDA,
+                         BUFFER,
+                         GArrowBuffer)
+struct _GArrowCUDABufferClass
+{
+  GArrowBufferClass parent_class;
+};
+
+#define GARROW_CUDA_TYPE_HOST_BUFFER (garrow_cuda_host_buffer_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowCUDAHostBuffer,
+                         garrow_cuda_host_buffer,
+                         GARROW_CUDA,
+                         HOST_BUFFER,
+                         GArrowMutableBuffer)
+struct _GArrowCUDAHostBufferClass
+{
+  GArrowMutableBufferClass parent_class;
+};
+
+#define GARROW_CUDA_TYPE_IPC_MEMORY_HANDLE      \
+  (garrow_cuda_ipc_memory_handle_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowCUDAIPCMemoryHandle,
+                         garrow_cuda_ipc_memory_handle,
+                         GARROW_CUDA,
+                         IPC_MEMORY_HANDLE,
+                         GObject)
+struct _GArrowCUDAIPCMemoryHandleClass
+{
+  GObjectClass parent_class;
+};
+
+#define GARROW_CUDA_TYPE_BUFFER_INPUT_STREAM    \
+  (garrow_cuda_buffer_input_stream_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowCUDABufferInputStream,
+                         garrow_cuda_buffer_input_stream,
+                         GARROW_CUDA,
+                         BUFFER_INPUT_STREAM,
+                         GArrowBufferInputStream)
+struct _GArrowCUDABufferInputStreamClass
+{
+  GArrowBufferInputStreamClass parent_class;
+};
+
+#define GARROW_CUDA_TYPE_BUFFER_OUTPUT_STREAM   \
+  (garrow_cuda_buffer_output_stream_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowCUDABufferOutputStream,
+                         garrow_cuda_buffer_output_stream,
+                         GARROW_CUDA,
+                         BUFFER_OUTPUT_STREAM,
+                         GArrowOutputStream)
+struct _GArrowCUDABufferOutputStreamClass
+{
+  GArrowOutputStreamClass parent_class;
+};
+
+GArrowCUDADeviceManager *
+garrow_cuda_device_manager_new(GError **error);
+
+GArrowCUDAContext *
+garrow_cuda_device_manager_get_context(GArrowCUDADeviceManager *manager,
+                                       gint gpu_number,
+                                       GError **error);
+gsize
+garrow_cuda_device_manager_get_n_devices(GArrowCUDADeviceManager *manager);
+
+gint64
+garrow_cuda_context_get_allocated_size(GArrowCUDAContext *context);
+
+
+GArrowCUDABuffer *
+garrow_cuda_buffer_new(GArrowCUDAContext *context,
+                       gint64 size,
+                       GError **error);
+GArrowCUDABuffer *
+garrow_cuda_buffer_new_ipc(GArrowCUDAContext *context,
+                           GArrowCUDAIPCMemoryHandle *handle,
+                           GError **error);
+GArrowCUDABuffer *
+garrow_cuda_buffer_new_record_batch(GArrowCUDAContext *context,
+                                    GArrowRecordBatch *record_batch,
+                                    GError **error);
+GBytes *
+garrow_cuda_buffer_copy_to_host(GArrowCUDABuffer *buffer,
+                                gint64 position,
+                                gint64 size,
+                                GError **error);
+gboolean
+garrow_cuda_buffer_copy_from_host(GArrowCUDABuffer *buffer,
+                                  const guint8 *data,
+                                  gint64 size,
+                                  GError **error);
+GArrowCUDAIPCMemoryHandle *
+garrow_cuda_buffer_export(GArrowCUDABuffer *buffer,
+                          GError **error);
+GArrowCUDAContext *
+garrow_cuda_buffer_get_context(GArrowCUDABuffer *buffer);
+GArrowRecordBatch *
+garrow_cuda_buffer_read_record_batch(GArrowCUDABuffer *buffer,
+                                     GArrowSchema *schema,
+                                     GError **error);
+
+
+GArrowCUDAHostBuffer *
+garrow_cuda_host_buffer_new(gint gpu_number,
+                            gint64 size,
+                            GError **error);
+
+GArrowCUDAIPCMemoryHandle *
+garrow_cuda_ipc_memory_handle_new(const guint8 *data,
+                                  gsize size,
+                                  GError **error);
+
+GArrowBuffer *
+garrow_cuda_ipc_memory_handle_serialize(GArrowCUDAIPCMemoryHandle *handle,
+                                        GError **error);
+
+GArrowCUDABufferInputStream *
+garrow_cuda_buffer_input_stream_new(GArrowCUDABuffer *buffer);
+
+GArrowCUDABufferOutputStream *
+garrow_cuda_buffer_output_stream_new(GArrowCUDABuffer *buffer);
+
+gboolean
+garrow_cuda_buffer_output_stream_set_buffer_size(GArrowCUDABufferOutputStream *stream,
+                                                 gint64 size,
+                                                 GError **error);
+gint64
+garrow_cuda_buffer_output_stream_get_buffer_size(GArrowCUDABufferOutputStream *stream);
+gint64
+garrow_cuda_buffer_output_stream_get_buffered_size(GArrowCUDABufferOutputStream *stream);
+
+G_END_DECLS
diff --git a/c_glib/arrow-cuda-glib/cuda.hpp b/c_glib/arrow-cuda-glib/cuda.hpp
new file mode 100644
index 0000000000000..0f8985a9de4f5
--- /dev/null
+++ b/c_glib/arrow-cuda-glib/cuda.hpp
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/gpu/cuda_api.h>
+
+#include <arrow-cuda-glib/cuda.h>
+
+GArrowCUDAContext *
+garrow_cuda_context_new_raw(std::shared_ptr<arrow::cuda::CudaContext> *arrow_context);
+std::shared_ptr<arrow::cuda::CudaContext>
+garrow_cuda_context_get_raw(GArrowCUDAContext *context);
+
+GArrowCUDAIPCMemoryHandle *
+garrow_cuda_ipc_memory_handle_new_raw(std::shared_ptr<arrow::cuda::CudaIpcMemHandle> *arrow_handle);
+std::shared_ptr<arrow::cuda::CudaIpcMemHandle>
+garrow_cuda_ipc_memory_handle_get_raw(GArrowCUDAIPCMemoryHandle *handle);
+
+GArrowCUDABuffer *
+garrow_cuda_buffer_new_raw(std::shared_ptr<arrow::cuda::CudaBuffer> *arrow_buffer);
+std::shared_ptr<arrow::cuda::CudaBuffer>
+garrow_cuda_buffer_get_raw(GArrowCUDABuffer *buffer);
+
+GArrowCUDAHostBuffer *
+garrow_cuda_host_buffer_new_raw(std::shared_ptr<arrow::cuda::CudaHostBuffer> *arrow_buffer);
+std::shared_ptr<arrow::cuda::CudaHostBuffer>
+garrow_cuda_host_buffer_get_raw(GArrowCUDAHostBuffer *buffer);
+
+GArrowCUDABufferInputStream *
+garrow_cuda_buffer_input_stream_new_raw(std::shared_ptr<arrow::cuda::CudaBufferReader> *arrow_reader);
+std::shared_ptr<arrow::cuda::CudaBufferReader>
+garrow_cuda_buffer_input_stream_get_raw(GArrowCUDABufferInputStream *input_stream);
+
+GArrowCUDABufferOutputStream *
+garrow_cuda_buffer_output_stream_new_raw(std::shared_ptr<arrow::cuda::CudaBufferWriter> *arrow_writer);
+std::shared_ptr<arrow::cuda::CudaBufferWriter>
+garrow_cuda_buffer_output_stream_get_raw(GArrowCUDABufferOutputStream *output_stream);
diff --git a/c_glib/arrow-cuda-glib/meson.build b/c_glib/arrow-cuda-glib/meson.build
new file mode 100644
index 0000000000000..e5b9f477fc142
--- /dev/null
+++ b/c_glib/arrow-cuda-glib/meson.build
@@ -0,0 +1,79 @@
+# -*- indent-tabs-mode: nil -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+sources = files(
+  'cuda.cpp',
+)
+
+c_headers = files(
+  'arrow-cuda-glib.h',
+  'cuda.h',
+)
+
+cpp_headers = files(
+  'arrow-cuda-glib.hpp',
+  'cuda.hpp',
+)
+
+headers = c_headers + cpp_headers
+install_headers(headers, subdir: 'arrow-cuda-glib')
+
+
+dependencies = [
+  arrow_cuda,
+  arrow_glib,
+]
+libarrow_cuda_glib = library('arrow-cuda-glib',
+                             sources: sources,
+                             install: true,
+                             dependencies: dependencies,
+                             include_directories: base_include_directories,
+                             soversion: so_version,
+                             version: library_version)
+arrow_cuda_glib = declare_dependency(link_with: libarrow_cuda_glib,
+                                     include_directories: base_include_directories,
+                                     dependencies: dependencies)
+
+pkgconfig.generate(filebase: 'arrow-cuda-glib',
+                   name: 'Apache Arrow CUDA GLib',
+                   description: 'C API for Apache Arrow CUDA based on GLib',
+                   version: version,
+                   requires: ['arrow-glib', 'arrow-cuda'],
+                   libraries: [libarrow_cuda_glib])
+
+gir_dependencies = [
+  declare_dependency(sources: arrow_glib_gir),
+]
+gir_extra_args = [
+  '--warn-all',
+  '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
+]
+arrow_cuda_glib_gir = gnome.generate_gir(libarrow_cuda_glib,
+                                         dependencies: gir_dependencies,
+                                         sources: sources + c_headers,
+                                         namespace: 'ArrowCUDA',
+                                         nsversion: api_version,
+                                         identifier_prefix: 'GArrowCUDA',
+                                         symbol_prefix: 'garrow_cuda',
+                                         export_packages: 'arrow-cuda-glib',
+                                         includes: [
+                                           'Arrow-1.0',
+                                         ],
+                                         install: true,
+                                         extra_args: gir_extra_args)
diff --git a/c_glib/arrow-glib/Makefile.am b/c_glib/arrow-glib/Makefile.am
index bf97168eb81d7..a296595571438 100644
--- a/c_glib/arrow-glib/Makefile.am
+++ b/c_glib/arrow-glib/Makefile.am
@@ -59,7 +59,7 @@ libarrow_glib_la_headers =			\
 	composite-array.h			\
 	composite-data-type.h			\
 	data-type.h				\
-	decimal.h				\
+	decimal128.h				\
 	error.h					\
 	field.h					\
 	gobject-type.h				\
@@ -110,7 +110,7 @@ libarrow_glib_la_sources =			\
 	column.cpp				\
 	composite-array.cpp			\
 	composite-data-type.cpp			\
-	decimal.cpp				\
+	decimal128.cpp				\
 	error.cpp				\
 	field.cpp				\
 	record-batch.cpp			\
@@ -155,7 +155,7 @@ libarrow_glib_la_cpp_headers =			\
 	codec.hpp				\
 	column.hpp				\
 	data-type.hpp				\
-	decimal.hpp				\
+	decimal128.hpp				\
 	error.hpp				\
 	field.hpp				\
 	record-batch.hpp			\
diff --git a/c_glib/arrow-glib/array-builder.cpp b/c_glib/arrow-glib/array-builder.cpp
index a5c75790de939..095c68d87689d 100644
--- a/c_glib/arrow-glib/array-builder.cpp
+++ b/c_glib/arrow-glib/array-builder.cpp
@@ -23,16 +23,16 @@
 
 #include <arrow-glib/array-builder.hpp>
 #include <arrow-glib/data-type.hpp>
+#include <arrow-glib/decimal128.hpp>
 #include <arrow-glib/error.hpp>
 #include <arrow-glib/type.hpp>
-#include <arrow-glib/decimal.hpp>
 
 template <typename BUILDER, typename VALUE>
 gboolean
-garrow_array_builder_append(GArrowArrayBuilder *builder,
-                            VALUE value,
-                            GError **error,
-                            const gchar *context)
+garrow_array_builder_append_value(GArrowArrayBuilder *builder,
+                                  VALUE value,
+                                  GError **error,
+                                  const gchar *context)
 {
   auto arrow_builder =
     static_cast<BUILDER>(garrow_array_builder_get_raw(builder));
@@ -446,17 +446,38 @@ garrow_boolean_array_builder_new(void)
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_boolean_array_builder_append_value() instead.
  */
 gboolean
 garrow_boolean_array_builder_append(GArrowBooleanArrayBuilder *builder,
                                     gboolean value,
                                     GError **error)
 {
-  return garrow_array_builder_append<arrow::BooleanBuilder *>
+  return garrow_boolean_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_boolean_array_builder_append_value:
+ * @builder: A #GArrowBooleanArrayBuilder.
+ * @value: A boolean value.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_boolean_array_builder_append_value(GArrowBooleanArrayBuilder *builder,
+                                          gboolean value,
+                                          GError **error)
+{
+  return garrow_array_builder_append_value<arrow::BooleanBuilder *>
     (GARROW_ARRAY_BUILDER(builder),
      static_cast<bool>(value),
      error,
-     "[boolean-array-builder][append]");
+     "[boolean-array-builder][append-value]");
 }
 
 /**
@@ -583,17 +604,38 @@ garrow_int_array_builder_new(void)
  * Returns: %TRUE on success, %FALSE if there was an error.
  *
  * Since: 0.6.0
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_int_array_builder_append_value() instead.
  */
 gboolean
 garrow_int_array_builder_append(GArrowIntArrayBuilder *builder,
                                 gint64 value,
                                 GError **error)
 {
-  return garrow_array_builder_append<arrow::AdaptiveIntBuilder *>
+  return garrow_int_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_int_array_builder_append_value:
+ * @builder: A #GArrowIntArrayBuilder.
+ * @value: A int value.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_int_array_builder_append_value(GArrowIntArrayBuilder *builder,
+                                      gint64 value,
+                                      GError **error)
+{
+  return garrow_array_builder_append_value<arrow::AdaptiveIntBuilder *>
     (GARROW_ARRAY_BUILDER(builder),
      value,
      error,
-     "[int-array-builder][append]");
+     "[int-array-builder][append-value]");
 }
 
 /**
@@ -718,17 +760,38 @@ garrow_uint_array_builder_new(void)
  * Returns: %TRUE on success, %FALSE if there was an error.
  *
  * Since: 0.8.0
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_uint_array_builder_append_value() instead.
  */
 gboolean
 garrow_uint_array_builder_append(GArrowUIntArrayBuilder *builder,
                                  guint64 value,
                                  GError **error)
 {
-  return garrow_array_builder_append<arrow::AdaptiveUIntBuilder *>
+  return garrow_uint_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_uint_array_builder_append_value:
+ * @builder: A #GArrowUIntArrayBuilder.
+ * @value: A unsigned int value.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_uint_array_builder_append_value(GArrowUIntArrayBuilder *builder,
+                                       guint64 value,
+                                       GError **error)
+{
+  return garrow_array_builder_append_value<arrow::AdaptiveUIntBuilder *>
     (GARROW_ARRAY_BUILDER(builder),
      value,
      error,
-     "[uint-array-builder][append]");
+     "[uint-array-builder][append-value]");
 }
 
 /**
@@ -848,17 +911,38 @@ garrow_int8_array_builder_new(void)
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_int8_array_builder_append_value() instead.
  */
 gboolean
 garrow_int8_array_builder_append(GArrowInt8ArrayBuilder *builder,
                                  gint8 value,
                                  GError **error)
 {
-  return garrow_array_builder_append<arrow::Int8Builder *>
+  return garrow_int8_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_int8_array_builder_append_value:
+ * @builder: A #GArrowInt8ArrayBuilder.
+ * @value: A int8 value.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_int8_array_builder_append_value(GArrowInt8ArrayBuilder *builder,
+                                       gint8 value,
+                                       GError **error)
+{
+  return garrow_array_builder_append_value<arrow::Int8Builder *>
     (GARROW_ARRAY_BUILDER(builder),
      value,
      error,
-     "[int8-array-builder][append]");
+     "[int8-array-builder][append-value]");
 }
 
 /**
@@ -976,17 +1060,38 @@ garrow_uint8_array_builder_new(void)
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_uint8_array_builder_append_value() instead.
  */
 gboolean
 garrow_uint8_array_builder_append(GArrowUInt8ArrayBuilder *builder,
                                   guint8 value,
                                   GError **error)
 {
-  return garrow_array_builder_append<arrow::UInt8Builder *>
+  return garrow_uint8_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_uint8_array_builder_append_value:
+ * @builder: A #GArrowUInt8ArrayBuilder.
+ * @value: An uint8 value.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_uint8_array_builder_append_value(GArrowUInt8ArrayBuilder *builder,
+                                  guint8 value,
+                                  GError **error)
+{
+  return garrow_array_builder_append_value<arrow::UInt8Builder *>
     (GARROW_ARRAY_BUILDER(builder),
      value,
      error,
-     "[uint8-array-builder][append]");
+     "[uint8-array-builder][append-value]");
 }
 
 /**
@@ -1104,17 +1209,38 @@ garrow_int16_array_builder_new(void)
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_int16_array_builder_append_value() instead.
  */
 gboolean
 garrow_int16_array_builder_append(GArrowInt16ArrayBuilder *builder,
                                   gint16 value,
                                   GError **error)
 {
-  return garrow_array_builder_append<arrow::Int16Builder *>
+  return garrow_int16_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_int16_array_builder_append_value:
+ * @builder: A #GArrowInt16ArrayBuilder.
+ * @value: A int16 value.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_int16_array_builder_append_value(GArrowInt16ArrayBuilder *builder,
+                                        gint16 value,
+                                        GError **error)
+{
+  return garrow_array_builder_append_value<arrow::Int16Builder *>
     (GARROW_ARRAY_BUILDER(builder),
      value,
      error,
-     "[int16-array-builder][append]");
+     "[int16-array-builder][append-value]");
 }
 
 /**
@@ -1232,17 +1358,38 @@ garrow_uint16_array_builder_new(void)
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_uint16_array_builder_append_value() instead.
  */
 gboolean
 garrow_uint16_array_builder_append(GArrowUInt16ArrayBuilder *builder,
                                    guint16 value,
                                    GError **error)
 {
-  return garrow_array_builder_append<arrow::UInt16Builder *>
+  return garrow_uint16_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_uint16_array_builder_append_value:
+ * @builder: A #GArrowUInt16ArrayBuilder.
+ * @value: An uint16 value.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_uint16_array_builder_append_value(GArrowUInt16ArrayBuilder *builder,
+                                         guint16 value,
+                                         GError **error)
+{
+  return garrow_array_builder_append_value<arrow::UInt16Builder *>
     (GARROW_ARRAY_BUILDER(builder),
      value,
      error,
-     "[uint16-array-builder][append]");
+     "[uint16-array-builder][append-value]");
 }
 
 /**
@@ -1360,17 +1507,38 @@ garrow_int32_array_builder_new(void)
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_int32_array_builder_append_value() instead.
  */
 gboolean
 garrow_int32_array_builder_append(GArrowInt32ArrayBuilder *builder,
                                   gint32 value,
                                   GError **error)
 {
-  return garrow_array_builder_append<arrow::Int32Builder *>
+  return garrow_int32_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_int32_array_builder_append_value:
+ * @builder: A #GArrowInt32ArrayBuilder.
+ * @value: A int32 value.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_int32_array_builder_append_value(GArrowInt32ArrayBuilder *builder,
+                                        gint32 value,
+                                        GError **error)
+{
+  return garrow_array_builder_append_value<arrow::Int32Builder *>
     (GARROW_ARRAY_BUILDER(builder),
      value,
      error,
-     "[int32-array-builder][append]");
+     "[int32-array-builder][append-value]");
 }
 
 /**
@@ -1488,17 +1656,38 @@ garrow_uint32_array_builder_new(void)
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_uint32_array_builder_append_value() instead.
  */
 gboolean
 garrow_uint32_array_builder_append(GArrowUInt32ArrayBuilder *builder,
                                    guint32 value,
                                    GError **error)
 {
-  return garrow_array_builder_append<arrow::UInt32Builder *>
+  return garrow_uint32_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_uint32_array_builder_append_value:
+ * @builder: A #GArrowUInt32ArrayBuilder.
+ * @value: An uint32 value.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_uint32_array_builder_append_value(GArrowUInt32ArrayBuilder *builder,
+                                         guint32 value,
+                                         GError **error)
+{
+  return garrow_array_builder_append_value<arrow::UInt32Builder *>
     (GARROW_ARRAY_BUILDER(builder),
      value,
      error,
-     "[uint32-array-builder][append]");
+     "[uint32-array-builder][append-value]");
 }
 
 /**
@@ -1616,17 +1805,38 @@ garrow_int64_array_builder_new(void)
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_int64_array_builder_append_value() instead.
  */
 gboolean
 garrow_int64_array_builder_append(GArrowInt64ArrayBuilder *builder,
                                   gint64 value,
                                   GError **error)
 {
-  return garrow_array_builder_append<arrow::Int64Builder *>
+  return garrow_int64_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_int64_array_builder_append_value:
+ * @builder: A #GArrowInt64ArrayBuilder.
+ * @value: A int64 value.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_int64_array_builder_append_value(GArrowInt64ArrayBuilder *builder,
+                                        gint64 value,
+                                        GError **error)
+{
+  return garrow_array_builder_append_value<arrow::Int64Builder *>
     (GARROW_ARRAY_BUILDER(builder),
      value,
      error,
-     "[int64-array-builder][append]");
+     "[int64-array-builder][append-value]");
 }
 
 /**
@@ -1744,17 +1954,38 @@ garrow_uint64_array_builder_new(void)
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_uint64_array_builder_append_value() instead.
  */
 gboolean
 garrow_uint64_array_builder_append(GArrowUInt64ArrayBuilder *builder,
                                   guint64 value,
                                   GError **error)
 {
-  return garrow_array_builder_append<arrow::UInt64Builder *>
+  return garrow_uint64_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_uint64_array_builder_append_value:
+ * @builder: A #GArrowUInt64ArrayBuilder.
+ * @value: An uint64 value.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_uint64_array_builder_append_value(GArrowUInt64ArrayBuilder *builder,
+                                         guint64 value,
+                                         GError **error)
+{
+  return garrow_array_builder_append_value<arrow::UInt64Builder *>
     (GARROW_ARRAY_BUILDER(builder),
      value,
      error,
-     "[uint64-array-builder][append]");
+     "[uint64-array-builder][append-value]");
 }
 
 /**
@@ -1872,17 +2103,38 @@ garrow_float_array_builder_new(void)
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_float_array_builder_append_value() instead.
  */
 gboolean
 garrow_float_array_builder_append(GArrowFloatArrayBuilder *builder,
                                   gfloat value,
                                   GError **error)
 {
-  return garrow_array_builder_append<arrow::FloatBuilder *>
+  return garrow_float_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_float_array_builder_append_value:
+ * @builder: A #GArrowFloatArrayBuilder.
+ * @value: A float value.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_float_array_builder_append_value(GArrowFloatArrayBuilder *builder,
+                                        gfloat value,
+                                        GError **error)
+{
+  return garrow_array_builder_append_value<arrow::FloatBuilder *>
     (GARROW_ARRAY_BUILDER(builder),
      value,
      error,
-     "[float-array-builder][append]");
+     "[float-array-builder][append-value]");
 }
 
 /**
@@ -2000,17 +2252,38 @@ garrow_double_array_builder_new(void)
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_double_array_builder_append_value() instead.
  */
 gboolean
 garrow_double_array_builder_append(GArrowDoubleArrayBuilder *builder,
                                    gdouble value,
                                    GError **error)
 {
-  return garrow_array_builder_append<arrow::DoubleBuilder *>
+  return garrow_double_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_double_array_builder_append_value:
+ * @builder: A #GArrowDoubleArrayBuilder.
+ * @value: A double value.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_double_array_builder_append_value(GArrowDoubleArrayBuilder *builder,
+                                         gdouble value,
+                                         GError **error)
+{
+  return garrow_array_builder_append_value<arrow::DoubleBuilder *>
     (GARROW_ARRAY_BUILDER(builder),
      value,
      error,
-     "[double-array-builder][append]");
+     "[double-array-builder][append-value]");
 }
 
 /**
@@ -2129,19 +2402,44 @@ garrow_binary_array_builder_new(void)
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_binary_array_builder_append_value() instead.
  */
 gboolean
 garrow_binary_array_builder_append(GArrowBinaryArrayBuilder *builder,
                                    const guint8 *value,
                                    gint32 length,
                                    GError **error)
+{
+  return garrow_binary_array_builder_append_value(builder, value, length, error);
+}
+
+/**
+ * garrow_binary_array_builder_append_value:
+ * @builder: A #GArrowBinaryArrayBuilder.
+ * @value: (array length=length): A binary value.
+ * @length: A value length.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_binary_array_builder_append_value(GArrowBinaryArrayBuilder *builder,
+                                         const guint8 *value,
+                                         gint32 length,
+                                         GError **error)
 {
   auto arrow_builder =
     static_cast<arrow::BinaryBuilder *>(
       garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
 
   auto status = arrow_builder->Append(value, length);
-  return garrow_error_check(error, status, "[binary-array-builder][append]");
+  return garrow_error_check(error,
+                            status,
+                            "[binary-array-builder][append-value]");
 }
 
 /**
@@ -2197,11 +2495,32 @@ garrow_string_array_builder_new(void)
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_string_array_builder_append_value() instead.
  */
 gboolean
 garrow_string_array_builder_append(GArrowStringArrayBuilder *builder,
                                    const gchar *value,
                                    GError **error)
+{
+  return garrow_string_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_string_array_builder_append_value:
+ * @builder: A #GArrowStringArrayBuilder.
+ * @value: A string value.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_string_array_builder_append_value(GArrowStringArrayBuilder *builder,
+                                         const gchar *value,
+                                         GError **error)
 {
   auto arrow_builder =
     static_cast<arrow::StringBuilder *>(
@@ -2209,7 +2528,9 @@ garrow_string_array_builder_append(GArrowStringArrayBuilder *builder,
 
   auto status = arrow_builder->Append(value,
                                       static_cast<gint32>(strlen(value)));
-  return garrow_error_check(error, status, "[string-array-builder][append]");
+  return garrow_error_check(error,
+                            status,
+                            "[string-array-builder][append-value]");
 }
 
 /**
@@ -2290,17 +2611,38 @@ garrow_date32_array_builder_new(void)
  * Returns: %TRUE on success, %FALSE if there was an error.
  *
  * Since: 0.7.0
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_date32_array_builder_append_value() instead.
  */
 gboolean
 garrow_date32_array_builder_append(GArrowDate32ArrayBuilder *builder,
                                    gint32 value,
                                    GError **error)
 {
-  return garrow_array_builder_append<arrow::Date32Builder *>
+  return garrow_date32_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_date32_array_builder_append_value:
+ * @builder: A #GArrowDate32ArrayBuilder.
+ * @value: The number of days since UNIX epoch in signed 32bit integer.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_date32_array_builder_append_value(GArrowDate32ArrayBuilder *builder,
+                                         gint32 value,
+                                         GError **error)
+{
+  return garrow_array_builder_append_value<arrow::Date32Builder *>
     (GARROW_ARRAY_BUILDER(builder),
      value,
      error,
-     "[date32-array-builder][append]");
+     "[date32-array-builder][append-value]");
 }
 
 /**
@@ -2425,17 +2767,38 @@ garrow_date64_array_builder_new(void)
  * Returns: %TRUE on success, %FALSE if there was an error.
  *
  * Since: 0.7.0
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_date64_array_builder_append_value() instead.
  */
 gboolean
 garrow_date64_array_builder_append(GArrowDate64ArrayBuilder *builder,
                                    gint64 value,
                                    GError **error)
 {
-  return garrow_array_builder_append<arrow::Date64Builder *>
+  return garrow_date64_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_date64_array_builder_append_value:
+ * @builder: A #GArrowDate64ArrayBuilder.
+ * @value: The number of milliseconds since UNIX epoch in signed 64bit integer.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_date64_array_builder_append_value(GArrowDate64ArrayBuilder *builder,
+                                         gint64 value,
+                                         GError **error)
+{
+  return garrow_array_builder_append_value<arrow::Date64Builder *>
     (GARROW_ARRAY_BUILDER(builder),
      value,
      error,
-     "[date64-array-builder][append]");
+     "[date64-array-builder][append-value]");
 }
 
 /**
@@ -2562,17 +2925,38 @@ garrow_timestamp_array_builder_new(GArrowTimestampDataType *data_type)
  * Returns: %TRUE on success, %FALSE if there was an error.
  *
  * Since: 0.7.0
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_timestamp_array_builder_append_value() instead.
  */
 gboolean
 garrow_timestamp_array_builder_append(GArrowTimestampArrayBuilder *builder,
                                       gint64 value,
                                       GError **error)
 {
-  return garrow_array_builder_append<arrow::TimestampBuilder *>
+  return garrow_timestamp_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_timestamp_array_builder_append_value:
+ * @builder: A #GArrowTimestampArrayBuilder.
+ * @value: The number of milliseconds since UNIX epoch in signed 64bit integer.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_timestamp_array_builder_append_value(GArrowTimestampArrayBuilder *builder,
+                                            gint64 value,
+                                            GError **error)
+{
+  return garrow_array_builder_append_value<arrow::TimestampBuilder *>
     (GARROW_ARRAY_BUILDER(builder),
      value,
      error,
-     "[timestamp-array-builder][append]");
+     "[timestamp-array-builder][append-value]");
 }
 
 /**
@@ -2699,17 +3083,38 @@ garrow_time32_array_builder_new(GArrowTime32DataType *data_type)
  * Returns: %TRUE on success, %FALSE if there was an error.
  *
  * Since: 0.7.0
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_time32_array_builder_append_value() instead.
  */
 gboolean
 garrow_time32_array_builder_append(GArrowTime32ArrayBuilder *builder,
                                    gint32 value,
                                    GError **error)
 {
-  return garrow_array_builder_append<arrow::Time32Builder *>
+  return garrow_time32_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_time32_array_builder_append_value:
+ * @builder: A #GArrowTime32ArrayBuilder.
+ * @value: The number of days since UNIX epoch in signed 32bit integer.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_time32_array_builder_append_value(GArrowTime32ArrayBuilder *builder,
+                                         gint32 value,
+                                         GError **error)
+{
+  return garrow_array_builder_append_value<arrow::Time32Builder *>
     (GARROW_ARRAY_BUILDER(builder),
      value,
      error,
-     "[time32-array-builder][append]");
+     "[time32-array-builder][append-value]");
 }
 
 /**
@@ -2836,17 +3241,38 @@ garrow_time64_array_builder_new(GArrowTime64DataType *data_type)
  * Returns: %TRUE on success, %FALSE if there was an error.
  *
  * Since: 0.7.0
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_time64_array_builder_append_value() instead.
  */
 gboolean
 garrow_time64_array_builder_append(GArrowTime64ArrayBuilder *builder,
                                    gint64 value,
                                    GError **error)
 {
-  return garrow_array_builder_append<arrow::Time64Builder *>
+  return garrow_time64_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_time64_array_builder_append_value:
+ * @builder: A #GArrowTime64ArrayBuilder.
+ * @value: The number of milliseconds since UNIX epoch in signed 64bit integer.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_time64_array_builder_append_value(GArrowTime64ArrayBuilder *builder,
+                                         gint64 value,
+                                         GError **error)
+{
+  return garrow_array_builder_append_value<arrow::Time64Builder *>
     (GARROW_ARRAY_BUILDER(builder),
      value,
      error,
-     "[time64-array-builder][append]");
+     "[time64-array-builder][append-value]");
 }
 
 /**
@@ -3047,17 +3473,72 @@ garrow_list_array_builder_new(GArrowListDataType *data_type,
  *   g_object_unref(array);
  * }
  * ]|
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_list_array_builder_append_value() instead.
  */
 gboolean
 garrow_list_array_builder_append(GArrowListArrayBuilder *builder,
                                  GError **error)
+{
+  return garrow_list_array_builder_append_value(builder, error);
+}
+
+/**
+ * garrow_list_array_builder_append_value:
+ * @builder: A #GArrowListArrayBuilder.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * It appends a new list element. To append a new list element, you
+ * need to call this function then append list element values to
+ * `value_builder`. `value_builder` is the #GArrowArrayBuilder
+ * specified to constructor. You can get `value_builder` by
+ * garrow_list_array_builder_get_value_builder().
+ *
+ * |[<!-- language="C" -->
+ * GArrowInt8ArrayBuilder *value_builder;
+ * GArrowListArrayBuilder *builder;
+ *
+ * value_builder = garrow_int8_array_builder_new();
+ * builder = garrow_list_array_builder_new(value_builder, NULL);
+ *
+ * // Start 0th list element: [1, 0, -1]
+ * garrow_list_array_builder_append(builder, NULL);
+ * garrow_int8_array_builder_append(value_builder, 1);
+ * garrow_int8_array_builder_append(value_builder, 0);
+ * garrow_int8_array_builder_append(value_builder, -1);
+ *
+ * // Start 1st list element: [-29, 29]
+ * garrow_list_array_builder_append(builder, NULL);
+ * garrow_int8_array_builder_append(value_builder, -29);
+ * garrow_int8_array_builder_append(value_builder, 29);
+ *
+ * {
+ *   // [[1, 0, -1], [-29, 29]]
+ *   GArrowArray *array = garrow_array_builder_finish(builder);
+ *   // Now, builder is needless.
+ *   g_object_unref(builder);
+ *   g_object_unref(value_builder);
+ *
+ *   // Use array...
+ *   g_object_unref(array);
+ * }
+ * ]|
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_list_array_builder_append_value(GArrowListArrayBuilder *builder,
+                                       GError **error)
 {
   auto arrow_builder =
     static_cast<arrow::ListBuilder *>(
       garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
 
   auto status = arrow_builder->Append();
-  return garrow_error_check(error, status, "[list-array-builder][append]");
+  return garrow_error_check(error, status, "[list-array-builder][append-value]");
 }
 
 /**
@@ -3195,17 +3676,49 @@ garrow_struct_array_builder_new(GArrowStructDataType *data_type,
  * |[<!-- language="C" -->
  * // TODO
  * ]|
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_struct_array_builder_append_value() instead.
  */
 gboolean
 garrow_struct_array_builder_append(GArrowStructArrayBuilder *builder,
                                    GError **error)
+{
+  return garrow_struct_array_builder_append_value(builder, error);
+}
+
+/**
+ * garrow_struct_array_builder_append_value:
+ * @builder: A #GArrowStructArrayBuilder.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * It appends a new struct element. To append a new struct element,
+ * you need to call this function then append struct element field
+ * values to all `field_builder`s. `field_value`s are the
+ * #GArrowArrayBuilder specified to constructor. You can get
+ * `field_builder` by garrow_struct_array_builder_get_field_builder()
+ * or garrow_struct_array_builder_get_field_builders().
+ *
+ * |[<!-- language="C" -->
+ * // TODO
+ * ]|
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_struct_array_builder_append_value(GArrowStructArrayBuilder *builder,
+                                         GError **error)
 {
   auto arrow_builder =
     static_cast<arrow::StructBuilder *>(
       garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
 
   auto status = arrow_builder->Append();
-  return garrow_error_check(error, status, "[struct-array-builder][append]");
+  return garrow_error_check(error,
+                            status,
+                            "[struct-array-builder][append-value]");
 }
 
 /**
@@ -3290,14 +3803,14 @@ garrow_decimal128_array_builder_class_init(GArrowDecimal128ArrayBuilderClass *kl
 
 /**
  * garrow_decimal128_array_builder_new:
- * @data_type: #GArrowDecimalDataType for the decimal.
+ * @data_type: #GArrowDecimal128DataType for the decimal.
  *
  * Returns: A newly created #GArrowDecimal128ArrayBuilder.
  *
  * Since: 0.10.0
  */
 GArrowDecimal128ArrayBuilder *
-garrow_decimal128_array_builder_new(GArrowDecimalDataType *data_type)
+garrow_decimal128_array_builder_new(GArrowDecimal128DataType *data_type)
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
   auto builder = garrow_array_builder_new(arrow_data_type,
@@ -3315,18 +3828,60 @@ garrow_decimal128_array_builder_new(GArrowDecimalDataType *data_type)
  * Returns: %TRUE on success, %FALSE if there was an error.
  *
  * Since: 0.10.0
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_decimal128_array_builder_append_value() instead.
  */
 gboolean
 garrow_decimal128_array_builder_append(GArrowDecimal128ArrayBuilder *builder,
                                        GArrowDecimal128 *value,
                                        GError **error)
+{
+  return garrow_decimal128_array_builder_append_value(builder, value, error);
+}
+
+/**
+ * garrow_decimal128_array_builder_append_value:
+ * @builder: A #GArrowDecimal128ArrayBuilder.
+ * @value: A decimal value.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_decimal128_array_builder_append_value(GArrowDecimal128ArrayBuilder *builder,
+                                             GArrowDecimal128 *value,
+                                             GError **error)
 {
   auto arrow_decimal = garrow_decimal128_get_raw(value);
-  return garrow_array_builder_append<arrow::Decimal128Builder *>
+  return garrow_array_builder_append_value<arrow::Decimal128Builder *>
     (GARROW_ARRAY_BUILDER(builder),
      *arrow_decimal,
      error,
-     "[decimal128-array-builder][append]");
+     "[decimal128-array-builder][append-value]");
+}
+
+/**
+ * garrow_decimal128_array_builder_append_null:
+ * @builder: A #GArrowDecimal128ArrayBuilder.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * It appends a new NULL element.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_decimal128_array_builder_append_null(GArrowDecimal128ArrayBuilder *builder,
+                                            GError **error)
+{
+  return garrow_array_builder_append_null<arrow::Decimal128Builder *>
+    (GARROW_ARRAY_BUILDER(builder),
+     error,
+     "[decimal128-array-builder][append-null]");
 }
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/array-builder.h b/c_glib/arrow-glib/array-builder.h
index db340b70ab77c..bc0a99429b8f1 100644
--- a/c_glib/arrow-glib/array-builder.h
+++ b/c_glib/arrow-glib/array-builder.h
@@ -20,8 +20,7 @@
 #pragma once
 
 #include <arrow-glib/array.h>
-#include <arrow-glib/gobject-type.h>
-#include <arrow-glib/decimal.h>
+#include <arrow-glib/decimal128.h>
 
 G_BEGIN_DECLS
 
@@ -90,9 +89,16 @@ GType garrow_boolean_array_builder_get_type(void) G_GNUC_CONST;
 
 GArrowBooleanArrayBuilder *garrow_boolean_array_builder_new(void);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_boolean_array_builder_append_value)
 gboolean garrow_boolean_array_builder_append(GArrowBooleanArrayBuilder *builder,
                                              gboolean value,
                                              GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_boolean_array_builder_append_value(GArrowBooleanArrayBuilder *builder,
+                                                   gboolean value,
+                                                   GError **error);
 gboolean garrow_boolean_array_builder_append_values(GArrowBooleanArrayBuilder *builder,
                                                     const gboolean *values,
                                                     gint64 values_length,
@@ -150,9 +156,16 @@ GType garrow_int_array_builder_get_type(void) G_GNUC_CONST;
 
 GArrowIntArrayBuilder *garrow_int_array_builder_new(void);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_int_array_builder_append_value)
 gboolean garrow_int_array_builder_append(GArrowIntArrayBuilder *builder,
                                          gint64 value,
                                          GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_int_array_builder_append_value(GArrowIntArrayBuilder *builder,
+                                               gint64 value,
+                                               GError **error);
 gboolean garrow_int_array_builder_append_values(GArrowIntArrayBuilder *builder,
                                                 const gint64 *values,
                                                 gint64 values_length,
@@ -179,9 +192,16 @@ struct _GArrowUIntArrayBuilderClass
 
 GArrowUIntArrayBuilder *garrow_uint_array_builder_new(void);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint_array_builder_append_value)
 gboolean garrow_uint_array_builder_append(GArrowUIntArrayBuilder *builder,
                                           guint64 value,
                                           GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_uint_array_builder_append_value(GArrowUIntArrayBuilder *builder,
+                                                guint64 value,
+                                                GError **error);
 gboolean garrow_uint_array_builder_append_values(GArrowUIntArrayBuilder *builder,
                                                  const guint64 *values,
                                                  gint64 values_length,
@@ -239,9 +259,16 @@ GType garrow_int8_array_builder_get_type(void) G_GNUC_CONST;
 
 GArrowInt8ArrayBuilder *garrow_int8_array_builder_new(void);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_int8_array_builder_append_value)
 gboolean garrow_int8_array_builder_append(GArrowInt8ArrayBuilder *builder,
                                           gint8 value,
                                           GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_int8_array_builder_append_value(GArrowInt8ArrayBuilder *builder,
+                                                gint8 value,
+                                                GError **error);
 gboolean garrow_int8_array_builder_append_values(GArrowInt8ArrayBuilder *builder,
                                                  const gint8 *values,
                                                  gint64 values_length,
@@ -299,9 +326,16 @@ GType garrow_uint8_array_builder_get_type(void) G_GNUC_CONST;
 
 GArrowUInt8ArrayBuilder *garrow_uint8_array_builder_new(void);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint8_array_builder_append_value)
 gboolean garrow_uint8_array_builder_append(GArrowUInt8ArrayBuilder *builder,
                                            guint8 value,
                                            GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_uint8_array_builder_append_value(GArrowUInt8ArrayBuilder *builder,
+                                                 guint8 value,
+                                                 GError **error);
 gboolean garrow_uint8_array_builder_append_values(GArrowUInt8ArrayBuilder *builder,
                                                   const guint8 *values,
                                                   gint64 values_length,
@@ -359,9 +393,16 @@ GType garrow_int16_array_builder_get_type(void) G_GNUC_CONST;
 
 GArrowInt16ArrayBuilder *garrow_int16_array_builder_new(void);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_int16_array_builder_append_value)
 gboolean garrow_int16_array_builder_append(GArrowInt16ArrayBuilder *builder,
                                            gint16 value,
                                            GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_int16_array_builder_append_value(GArrowInt16ArrayBuilder *builder,
+                                                 gint16 value,
+                                                 GError **error);
 gboolean garrow_int16_array_builder_append_values(GArrowInt16ArrayBuilder *builder,
                                                   const gint16 *values,
                                                   gint64 values_length,
@@ -419,9 +460,16 @@ GType garrow_uint16_array_builder_get_type(void) G_GNUC_CONST;
 
 GArrowUInt16ArrayBuilder *garrow_uint16_array_builder_new(void);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint16_array_builder_append_value)
 gboolean garrow_uint16_array_builder_append(GArrowUInt16ArrayBuilder *builder,
                                             guint16 value,
                                             GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_uint16_array_builder_append_value(GArrowUInt16ArrayBuilder *builder,
+                                                  guint16 value,
+                                                  GError **error);
 gboolean garrow_uint16_array_builder_append_values(GArrowUInt16ArrayBuilder *builder,
                                                    const guint16 *values,
                                                    gint64 values_length,
@@ -479,9 +527,16 @@ GType garrow_int32_array_builder_get_type(void) G_GNUC_CONST;
 
 GArrowInt32ArrayBuilder *garrow_int32_array_builder_new(void);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_int32_array_builder_append_value)
 gboolean garrow_int32_array_builder_append(GArrowInt32ArrayBuilder *builder,
                                            gint32 value,
                                            GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_int32_array_builder_append_value(GArrowInt32ArrayBuilder *builder,
+                                                 gint32 value,
+                                                 GError **error);
 gboolean garrow_int32_array_builder_append_values(GArrowInt32ArrayBuilder *builder,
                                                   const gint32 *values,
                                                   gint64 values_length,
@@ -539,9 +594,16 @@ GType garrow_uint32_array_builder_get_type(void) G_GNUC_CONST;
 
 GArrowUInt32ArrayBuilder *garrow_uint32_array_builder_new(void);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint32_array_builder_append_value)
 gboolean garrow_uint32_array_builder_append(GArrowUInt32ArrayBuilder *builder,
                                             guint32 value,
                                             GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_uint32_array_builder_append_value(GArrowUInt32ArrayBuilder *builder,
+                                                  guint32 value,
+                                                  GError **error);
 gboolean garrow_uint32_array_builder_append_values(GArrowUInt32ArrayBuilder *builder,
                                                    const guint32 *values,
                                                    gint64 values_length,
@@ -599,9 +661,16 @@ GType garrow_int64_array_builder_get_type(void) G_GNUC_CONST;
 
 GArrowInt64ArrayBuilder *garrow_int64_array_builder_new(void);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_int64_array_builder_append_value)
 gboolean garrow_int64_array_builder_append(GArrowInt64ArrayBuilder *builder,
                                            gint64 value,
                                            GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_int64_array_builder_append_value(GArrowInt64ArrayBuilder *builder,
+                                                 gint64 value,
+                                                 GError **error);
 gboolean garrow_int64_array_builder_append_values(GArrowInt64ArrayBuilder *builder,
                                                   const gint64 *values,
                                                   gint64 values_length,
@@ -659,9 +728,16 @@ GType garrow_uint64_array_builder_get_type(void) G_GNUC_CONST;
 
 GArrowUInt64ArrayBuilder *garrow_uint64_array_builder_new(void);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint64_array_builder_append_value)
 gboolean garrow_uint64_array_builder_append(GArrowUInt64ArrayBuilder *builder,
                                             guint64 value,
                                             GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_uint64_array_builder_append_value(GArrowUInt64ArrayBuilder *builder,
+                                                  guint64 value,
+                                                  GError **error);
 gboolean garrow_uint64_array_builder_append_values(GArrowUInt64ArrayBuilder *builder,
                                                    const guint64 *values,
                                                    gint64 values_length,
@@ -719,9 +795,16 @@ GType garrow_float_array_builder_get_type(void) G_GNUC_CONST;
 
 GArrowFloatArrayBuilder *garrow_float_array_builder_new(void);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_float_array_builder_append_value)
 gboolean garrow_float_array_builder_append(GArrowFloatArrayBuilder *builder,
                                            gfloat value,
                                            GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_float_array_builder_append_value(GArrowFloatArrayBuilder *builder,
+                                                 gfloat value,
+                                                 GError **error);
 gboolean garrow_float_array_builder_append_values(GArrowFloatArrayBuilder *builder,
                                                   const gfloat *values,
                                                   gint64 values_length,
@@ -779,9 +862,16 @@ GType garrow_double_array_builder_get_type(void) G_GNUC_CONST;
 
 GArrowDoubleArrayBuilder *garrow_double_array_builder_new(void);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_double_array_builder_append_value)
 gboolean garrow_double_array_builder_append(GArrowDoubleArrayBuilder *builder,
                                             gdouble value,
                                             GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_double_array_builder_append_value(GArrowDoubleArrayBuilder *builder,
+                                                  gdouble value,
+                                                  GError **error);
 gboolean garrow_double_array_builder_append_values(GArrowDoubleArrayBuilder *builder,
                                                    const gdouble *values,
                                                    gint64 values_length,
@@ -839,10 +929,18 @@ GType garrow_binary_array_builder_get_type(void) G_GNUC_CONST;
 
 GArrowBinaryArrayBuilder *garrow_binary_array_builder_new(void);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_binary_array_builder_append_value)
 gboolean garrow_binary_array_builder_append(GArrowBinaryArrayBuilder *builder,
                                             const guint8 *value,
                                             gint32 length,
                                             GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_binary_array_builder_append_value(GArrowBinaryArrayBuilder *builder,
+                                                  const guint8 *value,
+                                                  gint32 length,
+                                                  GError **error);
 gboolean garrow_binary_array_builder_append_null(GArrowBinaryArrayBuilder *builder,
                                                  GError **error);
 
@@ -891,9 +989,16 @@ GType garrow_string_array_builder_get_type(void) G_GNUC_CONST;
 
 GArrowStringArrayBuilder *garrow_string_array_builder_new(void);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_string_array_builder_append_value)
 gboolean garrow_string_array_builder_append(GArrowStringArrayBuilder *builder,
                                             const gchar *value,
                                             GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_string_array_builder_append_value(GArrowStringArrayBuilder *builder,
+                                                  const gchar *value,
+                                                  GError **error);
 gboolean garrow_string_array_builder_append_values(GArrowStringArrayBuilder *builder,
                                                    const gchar **values,
                                                    gint64 values_length,
@@ -946,9 +1051,16 @@ GType garrow_date32_array_builder_get_type(void) G_GNUC_CONST;
 
 GArrowDate32ArrayBuilder *garrow_date32_array_builder_new(void);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_date32_array_builder_append_value)
 gboolean garrow_date32_array_builder_append(GArrowDate32ArrayBuilder *builder,
                                             gint32 value,
                                             GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_date32_array_builder_append_value(GArrowDate32ArrayBuilder *builder,
+                                                  gint32 value,
+                                                  GError **error);
 gboolean garrow_date32_array_builder_append_values(GArrowDate32ArrayBuilder *builder,
                                                    const gint32 *values,
                                                    gint64 values_length,
@@ -1006,9 +1118,16 @@ GType garrow_date64_array_builder_get_type(void) G_GNUC_CONST;
 
 GArrowDate64ArrayBuilder *garrow_date64_array_builder_new(void);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_date64_array_builder_append_value)
 gboolean garrow_date64_array_builder_append(GArrowDate64ArrayBuilder *builder,
                                             gint64 value,
                                             GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_date64_array_builder_append_value(GArrowDate64ArrayBuilder *builder,
+                                                  gint64 value,
+                                                  GError **error);
 gboolean garrow_date64_array_builder_append_values(GArrowDate64ArrayBuilder *builder,
                                                    const gint64 *values,
                                                    gint64 values_length,
@@ -1067,9 +1186,16 @@ GType garrow_timestamp_array_builder_get_type(void) G_GNUC_CONST;
 GArrowTimestampArrayBuilder *
 garrow_timestamp_array_builder_new(GArrowTimestampDataType *data_type);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_timestamp_array_builder_append_value)
 gboolean garrow_timestamp_array_builder_append(GArrowTimestampArrayBuilder *builder,
                                                gint64 value,
                                                GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_timestamp_array_builder_append_value(GArrowTimestampArrayBuilder *builder,
+                                                     gint64 value,
+                                                     GError **error);
 gboolean garrow_timestamp_array_builder_append_values(GArrowTimestampArrayBuilder *builder,
                                                       const gint64 *values,
                                                       gint64 values_length,
@@ -1127,9 +1253,16 @@ GType garrow_time32_array_builder_get_type(void) G_GNUC_CONST;
 
 GArrowTime32ArrayBuilder *garrow_time32_array_builder_new(GArrowTime32DataType *data_type);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_time32_array_builder_append_value)
 gboolean garrow_time32_array_builder_append(GArrowTime32ArrayBuilder *builder,
                                             gint32 value,
                                             GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_time32_array_builder_append_value(GArrowTime32ArrayBuilder *builder,
+                                                  gint32 value,
+                                                  GError **error);
 gboolean garrow_time32_array_builder_append_values(GArrowTime32ArrayBuilder *builder,
                                                    const gint32 *values,
                                                    gint64 values_length,
@@ -1187,9 +1320,16 @@ GType garrow_time64_array_builder_get_type(void) G_GNUC_CONST;
 
 GArrowTime64ArrayBuilder *garrow_time64_array_builder_new(GArrowTime64DataType *data_type);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_time64_array_builder_append_value)
 gboolean garrow_time64_array_builder_append(GArrowTime64ArrayBuilder *builder,
                                             gint64 value,
                                             GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_time64_array_builder_append_value(GArrowTime64ArrayBuilder *builder,
+                                                  gint64 value,
+                                                  GError **error);
 gboolean garrow_time64_array_builder_append_values(GArrowTime64ArrayBuilder *builder,
                                                    const gint64 *values,
                                                    gint64 values_length,
@@ -1248,8 +1388,14 @@ GType garrow_list_array_builder_get_type(void) G_GNUC_CONST;
 GArrowListArrayBuilder *garrow_list_array_builder_new(GArrowListDataType *data_type,
                                                       GError **error);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_list_array_builder_append_value)
 gboolean garrow_list_array_builder_append(GArrowListArrayBuilder *builder,
                                           GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_list_array_builder_append_value(GArrowListArrayBuilder *builder,
+                                                GError **error);
 gboolean garrow_list_array_builder_append_null(GArrowListArrayBuilder *builder,
                                                GError **error);
 
@@ -1301,8 +1447,14 @@ GType garrow_struct_array_builder_get_type(void) G_GNUC_CONST;
 GArrowStructArrayBuilder *garrow_struct_array_builder_new(GArrowStructDataType *data_type,
                                                           GError **error);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_struct_array_builder_append_value)
 gboolean garrow_struct_array_builder_append(GArrowStructArrayBuilder *builder,
                                             GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_struct_array_builder_append_value(GArrowStructArrayBuilder *builder,
+                                                  GError **error);
 gboolean garrow_struct_array_builder_append_null(GArrowStructArrayBuilder *builder,
                                                  GError **error);
 
@@ -1322,10 +1474,20 @@ struct _GArrowDecimal128ArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowDecimal128ArrayBuilder *garrow_decimal128_array_builder_new(GArrowDecimalDataType *data_type);
+GArrowDecimal128ArrayBuilder *garrow_decimal128_array_builder_new(GArrowDecimal128DataType *data_type);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_decimal128_array_builder_append_value)
 gboolean garrow_decimal128_array_builder_append(GArrowDecimal128ArrayBuilder *builder,
                                                 GArrowDecimal128 *value,
                                                 GError **error);
+#endif
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_decimal128_array_builder_append_value(GArrowDecimal128ArrayBuilder *builder,
+                                                      GArrowDecimal128 *value,
+                                                      GError **error);
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_decimal128_array_builder_append_null(GArrowDecimal128ArrayBuilder *builder,
+                                                     GError **error);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/basic-array.cpp b/c_glib/arrow-glib/basic-array.cpp
index 77f64fc0a89fc..9aebd9cb8957a 100644
--- a/c_glib/arrow-glib/basic-array.cpp
+++ b/c_glib/arrow-glib/basic-array.cpp
@@ -22,12 +22,12 @@
 #endif
 
 #include <arrow-glib/array.hpp>
+#include <arrow-glib/basic-data-type.hpp>
 #include <arrow-glib/buffer.hpp>
 #include <arrow-glib/compute.hpp>
-#include <arrow-glib/basic-data-type.hpp>
+#include <arrow-glib/decimal128.hpp>
 #include <arrow-glib/error.hpp>
 #include <arrow-glib/type.hpp>
-#include <arrow-glib/decimal.hpp>
 
 #include <sstream>
 
@@ -209,7 +209,9 @@ enum {
   PROP_ARRAY
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowArray, garrow_array, G_TYPE_OBJECT)
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowArray,
+                                    garrow_array,
+                                    G_TYPE_OBJECT)
 
 #define GARROW_ARRAY_GET_PRIVATE(obj)         \
   static_cast<GArrowArrayPrivate *>(          \
@@ -494,7 +496,8 @@ garrow_array_slice(GArrowArray *array,
  * @array: A #GArrowArray.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: (nullable): The formatted array content or %NULL on error.
+ * Returns: (nullable) (transfer full):
+ *   The formatted array content or %NULL on error.
  *
  *   The returned string should be freed when with g_free() when no
  *   longer needed.
@@ -764,7 +767,8 @@ garrow_boolean_array_get_value(GArrowBooleanArray *array,
  * @array: A #GArrowBooleanArray.
  * @length: (out): The number of values.
  *
- * Returns: (array length=length): The raw boolean values.
+ * Returns: (array length=length) (transfer full):
+ *   The raw boolean values.
  *
  *   It should be freed with g_free() when no longer needed.
  */
@@ -2144,10 +2148,10 @@ garrow_decimal128_array_class_init(GArrowDecimal128ArrayClass *klass)
  * @array: A #GArrowDecimal128Array.
  * @i: The index of the target value.
  *
- * Returns: The formatted i-th value.
+ * Returns: (transfer full): The formatted i-th value.
  *
- * The returned string should be freed with g_free() when no longer
- * needed.
+ *   The returned string should be freed with g_free() when no longer
+ *   needed.
  *
  * Since: 0.10.0
  */
@@ -2255,6 +2259,17 @@ garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array)
   case arrow::Type::type::STRUCT:
     type = GARROW_TYPE_STRUCT_ARRAY;
     break;
+  case arrow::Type::type::UNION:
+    {
+      auto arrow_union_array =
+        std::static_pointer_cast<arrow::UnionArray>(*arrow_array);
+      if (arrow_union_array->mode() == arrow::UnionMode::SPARSE) {
+        type = GARROW_TYPE_SPARSE_UNION_ARRAY;
+      } else {
+        type = GARROW_TYPE_DENSE_UNION_ARRAY;
+      }
+    }
+    break;
   case arrow::Type::type::DICTIONARY:
     type = GARROW_TYPE_DICTIONARY_ARRAY;
     break;
diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp
index 24133c99f46de..b6c5705fb070b 100644
--- a/c_glib/arrow-glib/basic-data-type.cpp
+++ b/c_glib/arrow-glib/basic-data-type.cpp
@@ -66,6 +66,8 @@ G_BEGIN_DECLS
  *
  * #GArrowBinaryDataType is a class for binary data type.
  *
+ * #GArrowFixedSizeBinaryDataType is a class for fixed-size binary data type.
+ *
  * #GArrowStringDataType is a class for UTF-8 encoded string data
  * type.
  *
@@ -85,7 +87,9 @@ G_BEGIN_DECLS
  * #GArrowTime64DataType is a class for the number of microseconds or
  * nanoseconds since midnight in 64-bit signed integer data type.
  *
- * #GArrowDecimalDataType is a class for 128-bit decimal data type.
+ * #GArrowDecimalDataType is a base class for decimal data type.
+ *
+ * #GArrowDecimal128DataType is a class for 128-bit decimal data type.
  */
 
 typedef struct GArrowDataTypePrivate_ {
@@ -198,8 +202,8 @@ garrow_data_type_equal(GArrowDataType *data_type,
  * garrow_data_type_to_string:
  * @data_type: A #GArrowDataType.
  *
- * Returns: The string representation of the data type. The caller
- *   must free it by g_free() when the caller doesn't need it anymore.
+ * Returns: (transfer full): The string representation of the data type.
+ *   The caller must free it by g_free() when the caller doesn't need it anymore.
  */
 gchar *
 garrow_data_type_to_string(GArrowDataType *data_type)
@@ -237,7 +241,7 @@ garrow_fixed_width_data_type_class_init(GArrowFixedWidthDataTypeClass *klass)
 }
 
 /**
- * garrow_fixed_width_data_type_get_id:
+ * garrow_fixed_width_data_type_get_bit_width:
  * @data_type: A #GArrowFixedWidthDataType.
  *
  * Returns: The number of bits for one data.
@@ -714,6 +718,59 @@ garrow_binary_data_type_new(void)
 }
 
 
+G_DEFINE_TYPE(GArrowFixedSizeBinaryDataType,
+              garrow_fixed_size_binary_data_type,
+              GARROW_TYPE_FIXED_WIDTH_DATA_TYPE)
+
+static void
+garrow_fixed_size_binary_data_type_init(GArrowFixedSizeBinaryDataType *object)
+{
+}
+
+static void
+garrow_fixed_size_binary_data_type_class_init(GArrowFixedSizeBinaryDataTypeClass *klass)
+{
+}
+
+/**
+ * garrow_fixed_size_binary_data_type:
+ * @byte_width: The byte width.
+ *
+ * Returns: The newly created fixed-size binary data type.
+ *
+ * Since: 0.12.0
+ */
+GArrowFixedSizeBinaryDataType *
+garrow_fixed_size_binary_data_type_new(gint32 byte_width)
+{
+  auto arrow_fixed_size_binary_data_type = arrow::fixed_size_binary(byte_width);
+
+  auto fixed_size_binary_data_type =
+    GARROW_FIXED_SIZE_BINARY_DATA_TYPE(g_object_new(GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE,
+                                                    "data-type", &arrow_fixed_size_binary_data_type,
+                                                    NULL));
+  return fixed_size_binary_data_type;
+}
+
+/**
+ * garrow_fixed_size_binary_data_type_get_byte_width:
+ * @data_type: A #GArrowFixedSizeBinaryDataType.
+ *
+ * Returns: The number of bytes for one data.
+ *
+ * Since: 0.12.0
+ */
+gint32
+garrow_fixed_size_binary_data_type_get_byte_width(GArrowFixedSizeBinaryDataType *data_type)
+{
+  const auto arrow_data_type =
+    garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  const auto arrow_fixed_size_binary_type =
+    std::static_pointer_cast<arrow::FixedSizeBinaryType>(arrow_data_type);
+  return arrow_fixed_size_binary_type->byte_width();
+}
+
+
 G_DEFINE_TYPE(GArrowStringDataType,
               garrow_string_data_type,
               GARROW_TYPE_DATA_TYPE)
@@ -1040,9 +1097,9 @@ garrow_time64_data_type_new(GArrowTimeUnit unit, GError **error)
 }
 
 
-G_DEFINE_TYPE(GArrowDecimalDataType,
-              garrow_decimal_data_type,
-              GARROW_TYPE_DATA_TYPE)
+G_DEFINE_ABSTRACT_TYPE(GArrowDecimalDataType,
+                       garrow_decimal_data_type,
+                       GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE)
 
 static void
 garrow_decimal_data_type_init(GArrowDecimalDataType *object)
@@ -1062,18 +1119,16 @@ garrow_decimal_data_type_class_init(GArrowDecimalDataTypeClass *klass)
  * Returns: The newly created decimal data type.
  *
  * Since: 0.10.0
+ *
+ * Deprecated: 0.12.0:
+ *   Use garrow_decimal128_data_type_new() instead.
  */
 GArrowDecimalDataType *
 garrow_decimal_data_type_new(gint32 precision,
                              gint32 scale)
 {
-  auto arrow_data_type = arrow::decimal(precision, scale);
-
-  GArrowDecimalDataType *data_type =
-    GARROW_DECIMAL_DATA_TYPE(g_object_new(GARROW_TYPE_DECIMAL_DATA_TYPE,
-                                          "data-type", &arrow_data_type,
-                                          NULL));
-  return data_type;
+  auto decimal128_data_type = garrow_decimal128_data_type_new(precision, scale);
+  return GARROW_DECIMAL_DATA_TYPE(decimal128_data_type);
 }
 
 /**
@@ -1112,6 +1167,43 @@ garrow_decimal_data_type_get_scale(GArrowDecimalDataType *decimal_data_type)
   return arrow_decimal_type->scale();
 }
 
+
+G_DEFINE_TYPE(GArrowDecimal128DataType,
+              garrow_decimal128_data_type,
+              GARROW_TYPE_DECIMAL_DATA_TYPE)
+
+static void
+garrow_decimal128_data_type_init(GArrowDecimal128DataType *object)
+{
+}
+
+static void
+garrow_decimal128_data_type_class_init(GArrowDecimal128DataTypeClass *klass)
+{
+}
+
+/**
+ * garrow_decimal128_data_type_new:
+ * @precision: The precision of decimal data.
+ * @scale: The scale of decimal data.
+ *
+ * Returns: The newly created 128-bit decimal data type.
+ *
+ * Since: 0.12.0
+ */
+GArrowDecimal128DataType *
+garrow_decimal128_data_type_new(gint32 precision,
+                                gint32 scale)
+{
+  auto arrow_data_type = arrow::decimal(precision, scale);
+
+  auto data_type =
+    GARROW_DECIMAL128_DATA_TYPE(g_object_new(GARROW_TYPE_DECIMAL128_DATA_TYPE,
+                                             "data-type", &arrow_data_type,
+                                             NULL));
+  return data_type;
+}
+
 G_END_DECLS
 
 GArrowDataType *
@@ -1160,6 +1252,9 @@ garrow_data_type_new_raw(std::shared_ptr<arrow::DataType> *arrow_data_type)
   case arrow::Type::type::BINARY:
     type = GARROW_TYPE_BINARY_DATA_TYPE;
     break;
+  case arrow::Type::type::FIXED_SIZE_BINARY:
+    type = GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE;
+    break;
   case arrow::Type::type::STRING:
     type = GARROW_TYPE_STRING_DATA_TYPE;
     break;
@@ -1184,11 +1279,22 @@ garrow_data_type_new_raw(std::shared_ptr<arrow::DataType> *arrow_data_type)
   case arrow::Type::type::STRUCT:
     type = GARROW_TYPE_STRUCT_DATA_TYPE;
     break;
+  case arrow::Type::type::UNION:
+    {
+      auto arrow_union_data_type =
+        std::static_pointer_cast<arrow::UnionType>(*arrow_data_type);
+      if (arrow_union_data_type->mode() == arrow::UnionMode::SPARSE) {
+        type = GARROW_TYPE_SPARSE_UNION_DATA_TYPE;
+      } else {
+        type = GARROW_TYPE_DENSE_UNION_DATA_TYPE;
+      }
+    }
+    break;
   case arrow::Type::type::DICTIONARY:
     type = GARROW_TYPE_DICTIONARY_DATA_TYPE;
     break;
   case arrow::Type::type::DECIMAL:
-    type = GARROW_TYPE_DECIMAL_DATA_TYPE;
+    type = GARROW_TYPE_DECIMAL128_DATA_TYPE;
     break;
   default:
     type = GARROW_TYPE_DATA_TYPE;
diff --git a/c_glib/arrow-glib/basic-data-type.h b/c_glib/arrow-glib/basic-data-type.h
index 45fddba34d4bc..d18958265748d 100644
--- a/c_glib/arrow-glib/basic-data-type.h
+++ b/c_glib/arrow-glib/basic-data-type.h
@@ -19,9 +19,9 @@
 
 #pragma once
 
-#include <arrow-glib/gobject-type.h>
+#include <arrow-glib/decimal128.h>
 #include <arrow-glib/type.h>
-#include <arrow-glib/decimal.h>
+#include <arrow-glib/version.h>
 
 G_BEGIN_DECLS
 
@@ -338,6 +338,25 @@ GType                 garrow_binary_data_type_get_type (void) G_GNUC_CONST;
 GArrowBinaryDataType *garrow_binary_data_type_new      (void);
 
 
+#define GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE (garrow_fixed_size_binary_data_type_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryDataType,
+                         garrow_fixed_size_binary_data_type,
+                         GARROW,
+                         FIXED_SIZE_BINARY_DATA_TYPE,
+                         GArrowDataType)
+struct _GArrowFixedSizeBinaryDataTypeClass
+{
+  GArrowFixedWidthDataTypeClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_0_12
+GArrowFixedSizeBinaryDataType *
+garrow_fixed_size_binary_data_type_new(gint32 byte_width);
+GARROW_AVAILABLE_IN_0_12
+gint32
+garrow_fixed_size_binary_data_type_get_byte_width(GArrowFixedSizeBinaryDataType *data_type);
+
+
 #define GARROW_TYPE_STRING_DATA_TYPE            \
   (garrow_string_data_type_get_type())
 #define GARROW_STRING_DATA_TYPE(obj)                           \
@@ -655,15 +674,34 @@ G_DECLARE_DERIVABLE_TYPE(GArrowDecimalDataType,
                          garrow_decimal_data_type,
                          GARROW,
                          DECIMAL_DATA_TYPE,
-                         GArrowDataType)
+                         GArrowFixedSizeBinaryDataType)
 struct _GArrowDecimalDataTypeClass
 {
-  GArrowDataTypeClass parent_class;
+  GArrowFixedSizeBinaryDataTypeClass parent_class;
 };
 
-GArrowDecimalDataType   *garrow_decimal_data_type_new     (gint32 precision,
-                                                           gint32 scale);
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_decimal128_data_type_new)
+GArrowDecimalDataType *
+garrow_decimal_data_type_new(gint32 precision, gint32 scale);
+#endif
 gint32 garrow_decimal_data_type_get_precision(GArrowDecimalDataType *decimal_data_type);
 gint32 garrow_decimal_data_type_get_scale(GArrowDecimalDataType *decimal_data_type);
 
+
+#define GARROW_TYPE_DECIMAL128_DATA_TYPE (garrow_decimal128_data_type_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128DataType,
+                         garrow_decimal128_data_type,
+                         GARROW,
+                         DECIMAL128_DATA_TYPE,
+                         GArrowDecimalDataType)
+struct _GArrowDecimal128DataTypeClass
+{
+  GArrowDecimalDataTypeClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_0_12
+GArrowDecimal128DataType *
+garrow_decimal128_data_type_new(gint32 precision, gint32 scale);
+
 G_END_DECLS
diff --git a/c_glib/arrow-glib/chunked-array.cpp b/c_glib/arrow-glib/chunked-array.cpp
index e046b0d547ea9..6d9598bc10618 100644
--- a/c_glib/arrow-glib/chunked-array.cpp
+++ b/c_glib/arrow-glib/chunked-array.cpp
@@ -302,7 +302,8 @@ garrow_chunked_array_slice(GArrowChunkedArray *chunked_array,
  * @chunked_array: A #GArrowChunkedArray.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: (nullable): The formatted chunked array content or %NULL on error.
+ * Returns: (nullable) (transfer full):
+ *   The formatted chunked array content or %NULL on error.
  *
  *   The returned string should be freed when with g_free() when no
  *   longer needed.
diff --git a/c_glib/arrow-glib/codec.cpp b/c_glib/arrow-glib/codec.cpp
index 45863878e9c7e..7f06fabde74e8 100644
--- a/c_glib/arrow-glib/codec.cpp
+++ b/c_glib/arrow-glib/codec.cpp
@@ -119,7 +119,7 @@ garrow_codec_class_init(GArrowCodecClass *klass)
 
 /**
  * garrow_codec_new:
- * @type: A #GArrowCodompressionType.
+ * @type: A #GArrowCompressionType.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: A newly created #GArrowCodec on success, %NULL on error.
diff --git a/c_glib/arrow-glib/column.cpp b/c_glib/arrow-glib/column.cpp
index 06ab0b70de407..68694b3d67903 100644
--- a/c_glib/arrow-glib/column.cpp
+++ b/c_glib/arrow-glib/column.cpp
@@ -322,7 +322,10 @@ garrow_column_get_field(GArrowColumn *column)
   } else {
     const auto arrow_column = garrow_column_get_raw(column);
     auto arrow_field = arrow_column->field();
-    return garrow_field_new_raw(&arrow_field);
+    auto data_type = garrow_column_get_data_type(column);
+    auto field = garrow_field_new_raw(&arrow_field, data_type);
+    g_object_unref(data_type);
+    return field;
   }
 }
 
@@ -372,7 +375,8 @@ garrow_column_get_data(GArrowColumn *column)
  * @column: A #GArrowColumn.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: (nullable): The formatted column content or %NULL on error.
+ * Returns: (nullable) (transfer full):
+ *   The formatted column content or %NULL on error.
  *
  *   The returned string should be freed when with g_free() when no
  *   longer needed.
diff --git a/c_glib/arrow-glib/composite-array.cpp b/c_glib/arrow-glib/composite-array.cpp
index b040ac72c7402..bff1858e8554d 100644
--- a/c_glib/arrow-glib/composite-array.cpp
+++ b/c_glib/arrow-glib/composite-array.cpp
@@ -41,10 +41,18 @@ G_BEGIN_DECLS
  * use #GArrowListArrayBuilder to create a new array.
  *
  * #GArrowStructArray is a class for struct array. It can store zero
- * or more structs. One struct has zero or more fields. If you don't
+ * or more structs. One struct has one or more fields. If you don't
  * have Arrow format data, you need to use #GArrowStructArrayBuilder
  * to create a new array.
  *
+ * #GArrowUnionArray is a base class for union array. It can store
+ * zero or more unions. One union has one or more fields but one union
+ * can store only one field value.
+ *
+ * #GArrowDenseUnionArray is a class for dense union array.
+ *
+ * #GArrowSparseUnionArray is a class for sparse union array.
+ *
  * #GArrowDictionaryArray is a class for dictionary array. It can
  * store data with dictionary and indices. It's space effective than
  * normal array when the array has many same values. You can convert a
@@ -159,7 +167,7 @@ garrow_struct_array_class_init(GArrowStructArrayClass *klass)
  * garrow_struct_array_new:
  * @data_type: The data type of the struct.
  * @length: The number of elements.
- * @children: (element-type GArrowArray): The arrays for each field
+ * @fields: (element-type GArrowArray): The arrays for each field
  *   as #GList of #GArrowArray.
  * @null_bitmap: (nullable): The bitmap that shows null elements. The
  *   N-th element is null when the N-th bit is 0, not null otherwise.
@@ -175,21 +183,21 @@ garrow_struct_array_class_init(GArrowStructArrayClass *klass)
 GArrowStructArray *
 garrow_struct_array_new(GArrowDataType *data_type,
                         gint64 length,
-                        GList *children,
+                        GList *fields,
                         GArrowBuffer *null_bitmap,
                         gint64 n_nulls)
 {
   const auto arrow_data_type = garrow_data_type_get_raw(data_type);
-  std::vector<std::shared_ptr<arrow::Array>> arrow_children;
-  for (GList *node = children; node; node = node->next) {
-    GArrowArray *child = GARROW_ARRAY(node->data);
-    arrow_children.push_back(garrow_array_get_raw(child));
+  std::vector<std::shared_ptr<arrow::Array>> arrow_fields;
+  for (auto node = fields; node; node = node->next) {
+    auto child = GARROW_ARRAY(node->data);
+    arrow_fields.push_back(garrow_array_get_raw(child));
   }
   const auto arrow_bitmap = garrow_buffer_get_raw(null_bitmap);
   auto arrow_struct_array =
     std::make_shared<arrow::StructArray>(arrow_data_type,
                                          length,
-                                         arrow_children,
+                                         arrow_fields,
                                          arrow_bitmap,
                                          n_nulls);
   auto arrow_array =
@@ -264,6 +272,153 @@ garrow_struct_array_flatten(GArrowStructArray *array, GError **error)
 }
 
 
+G_DEFINE_TYPE(GArrowUnionArray,
+              garrow_union_array,
+              GARROW_TYPE_ARRAY)
+
+static void
+garrow_union_array_init(GArrowUnionArray *object)
+{
+}
+
+static void
+garrow_union_array_class_init(GArrowUnionArrayClass *klass)
+{
+}
+
+/**
+ * garrow_union_array_get_field
+ * @array: A #GArrowUnionArray.
+ * @i: The index of the field in the union.
+ *
+ * Returns: (nullable) (transfer full): The i-th field values as a
+ *   #GArrowArray or %NULL on out of range.
+ */
+GArrowArray *
+garrow_union_array_get_field(GArrowUnionArray *array,
+                             gint i)
+{
+  auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
+  auto arrow_union_array =
+    std::static_pointer_cast<arrow::UnionArray>(arrow_array);
+  auto n_fields = arrow_array->num_fields();
+  if (i < 0) {
+    i += n_fields;
+  }
+  if (i < 0) {
+    return NULL;
+  }
+  if (i >= n_fields) {
+    return NULL;
+  }
+  auto arrow_field_array = arrow_union_array->child(i);
+  return garrow_array_new_raw(&arrow_field_array);
+}
+
+
+G_DEFINE_TYPE(GArrowSparseUnionArray,
+              garrow_sparse_union_array,
+              GARROW_TYPE_UNION_ARRAY)
+
+static void
+garrow_sparse_union_array_init(GArrowSparseUnionArray *object)
+{
+}
+
+static void
+garrow_sparse_union_array_class_init(GArrowSparseUnionArrayClass *klass)
+{
+}
+
+/**
+ * garrow_sparse_union_array_new:
+ * @type_ids: The field type IDs for each value as #GArrowInt8Array.
+ * @fields: (element-type GArrowArray): The arrays for each field
+ *   as #GList of #GArrowArray.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GArrowSparseUnionArray
+ *   or %NULL on error.
+ *
+ * Since: 0.12.0
+ */
+GArrowSparseUnionArray *
+garrow_sparse_union_array_new(GArrowInt8Array *type_ids,
+                              GList *fields,
+                              GError **error)
+{
+  auto arrow_type_ids = garrow_array_get_raw(GARROW_ARRAY(type_ids));
+  std::vector<std::shared_ptr<arrow::Array>> arrow_fields;
+  for (auto node = fields; node; node = node->next) {
+    auto *field = GARROW_ARRAY(node->data);
+    arrow_fields.push_back(garrow_array_get_raw(field));
+  }
+  std::shared_ptr<arrow::Array> arrow_union_array;
+  auto status = arrow::UnionArray::MakeSparse(*arrow_type_ids,
+                                              arrow_fields,
+                                              &arrow_union_array);
+  if (garrow_error_check(error, status, "[sparse-union-array][new]")) {
+    return GARROW_SPARSE_UNION_ARRAY(garrow_array_new_raw(&arrow_union_array));
+  } else {
+    return NULL;
+  }
+}
+
+
+G_DEFINE_TYPE(GArrowDenseUnionArray,
+              garrow_dense_union_array,
+              GARROW_TYPE_UNION_ARRAY)
+
+static void
+garrow_dense_union_array_init(GArrowDenseUnionArray *object)
+{
+}
+
+static void
+garrow_dense_union_array_class_init(GArrowDenseUnionArrayClass *klass)
+{
+}
+
+/**
+ * garrow_dense_union_array_new:
+ * @type_ids: The field type IDs for each value as #GArrowInt8Array.
+ * @value_offsets: The value offsets for each value as #GArrowInt32Array.
+ *   Each offset is counted for each type.
+ * @fields: (element-type GArrowArray): The arrays for each field
+ *   as #GList of #GArrowArray.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GArrowDenseUnionArray
+ *   or %NULL on error.
+ *
+ * Since: 0.12.0
+ */
+GArrowDenseUnionArray *
+garrow_dense_union_array_new(GArrowInt8Array *type_ids,
+                             GArrowInt32Array *value_offsets,
+                             GList *fields,
+                             GError **error)
+{
+  auto arrow_type_ids = garrow_array_get_raw(GARROW_ARRAY(type_ids));
+  auto arrow_value_offsets = garrow_array_get_raw(GARROW_ARRAY(value_offsets));
+  std::vector<std::shared_ptr<arrow::Array>> arrow_fields;
+  for (auto node = fields; node; node = node->next) {
+    auto *field = GARROW_ARRAY(node->data);
+    arrow_fields.push_back(garrow_array_get_raw(field));
+  }
+  std::shared_ptr<arrow::Array> arrow_union_array;
+  auto status = arrow::UnionArray::MakeDense(*arrow_type_ids,
+                                             *arrow_value_offsets,
+                                             arrow_fields,
+                                             &arrow_union_array);
+  if (garrow_error_check(error, status, "[dense-union-array][new]")) {
+    return GARROW_DENSE_UNION_ARRAY(garrow_array_new_raw(&arrow_union_array));
+  } else {
+    return NULL;
+  }
+}
+
+
 G_DEFINE_TYPE(GArrowDictionaryArray,
               garrow_dictionary_array,
               GARROW_TYPE_ARRAY)
diff --git a/c_glib/arrow-glib/composite-array.h b/c_glib/arrow-glib/composite-array.h
index ad6ad53ff9fc8..10432e2e56ba3 100644
--- a/c_glib/arrow-glib/composite-array.h
+++ b/c_glib/arrow-glib/composite-array.h
@@ -123,20 +123,72 @@ GType garrow_struct_array_get_type(void) G_GNUC_CONST;
 
 GArrowStructArray *garrow_struct_array_new(GArrowDataType *data_type,
                                            gint64 length,
-                                           GList *children,
+                                           GList *fields,
                                            GArrowBuffer *null_bitmap,
                                            gint64 n_nulls);
 
 GArrowArray *garrow_struct_array_get_field(GArrowStructArray *array,
                                            gint i);
 
+#ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_10_FOR(garrow_struct_array_flatten)
 GList *garrow_struct_array_get_fields(GArrowStructArray *array);
+#endif
 
 GARROW_AVAILABLE_IN_0_10
 GList *garrow_struct_array_flatten(GArrowStructArray *array, GError **error);
 
 
+#define GARROW_TYPE_UNION_ARRAY (garrow_union_array_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowUnionArray,
+                         garrow_union_array,
+                         GARROW,
+                         UNION_ARRAY,
+                         GArrowArray)
+struct _GArrowUnionArrayClass
+{
+  GArrowArrayClass parent_class;
+};
+
+GArrowArray *
+garrow_union_array_get_field(GArrowUnionArray *array,
+                             gint i);
+
+#define GARROW_TYPE_SPARSE_UNION_ARRAY (garrow_sparse_union_array_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionArray,
+                         garrow_sparse_union_array,
+                         GARROW,
+                         SPARSE_UNION_ARRAY,
+                         GArrowUnionArray)
+struct _GArrowSparseUnionArrayClass
+{
+  GArrowUnionArrayClass parent_class;
+};
+
+GArrowSparseUnionArray *
+garrow_sparse_union_array_new(GArrowInt8Array *type_ids,
+                              GList *fields,
+                              GError **error);
+
+
+#define GARROW_TYPE_DENSE_UNION_ARRAY (garrow_dense_union_array_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionArray,
+                         garrow_dense_union_array,
+                         GARROW,
+                         DENSE_UNION_ARRAY,
+                         GArrowUnionArray)
+struct _GArrowDenseUnionArrayClass
+{
+  GArrowUnionArrayClass parent_class;
+};
+
+GArrowDenseUnionArray *
+garrow_dense_union_array_new(GArrowInt8Array *type_ids,
+                             GArrowInt32Array *value_offsets,
+                             GList *fields,
+                             GError **error);
+
+
 #define GARROW_TYPE_DICTIONARY_ARRAY (garrow_dictionary_array_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryArray,
                          garrow_dictionary_array,
diff --git a/c_glib/arrow-glib/composite-data-type.cpp b/c_glib/arrow-glib/composite-data-type.cpp
index 2645bead4010e..675900a5becc2 100644
--- a/c_glib/arrow-glib/composite-data-type.cpp
+++ b/c_glib/arrow-glib/composite-data-type.cpp
@@ -40,6 +40,12 @@ G_BEGIN_DECLS
  *
  * #GArrowStructDataType is a class for struct data type.
  *
+ * #GArrowUnionDataType is a base class for union data types.
+ *
+ * #GArrowSparseUnionDataType is a class for sparse union data type.
+ *
+ * #GArrowDenseUnionDataType is a class for dense union data type.
+ *
  * #GArrowDictionaryDataType is a class for dictionary data type.
  */
 
@@ -82,19 +88,34 @@ garrow_list_data_type_new(GArrowField *field)
  * @list_data_type: A #GArrowListDataType.
  *
  * Returns: (transfer full): The field of value.
+ *
+ * Deprecated: 0.13.0:
+ *   Use garrow_list_data_type_get_field() instead.
  */
 GArrowField *
 garrow_list_data_type_get_value_field(GArrowListDataType *list_data_type)
 {
-  auto arrow_data_type =
-    garrow_data_type_get_raw(GARROW_DATA_TYPE(list_data_type));
+  return garrow_list_data_type_get_field(list_data_type);
+}
+
+/**
+ * garrow_list_data_type_get_field:
+ * @list_data_type: A #GArrowListDataType.
+ *
+ * Returns: (transfer full): The field of value.
+ *
+ * Since: 0.13.0
+ */
+GArrowField *
+garrow_list_data_type_get_field(GArrowListDataType *list_data_type)
+{
+  auto data_type = GARROW_DATA_TYPE(list_data_type);
+  auto arrow_data_type = garrow_data_type_get_raw(data_type);
   auto arrow_list_data_type =
     static_cast<arrow::ListType *>(arrow_data_type.get());
 
   auto arrow_field = arrow_list_data_type->value_field();
-  auto field = garrow_field_new_raw(&arrow_field);
-
-  return field;
+  return garrow_field_new_raw(&arrow_field, nullptr);
 }
 
 
@@ -122,38 +143,37 @@ GArrowStructDataType *
 garrow_struct_data_type_new(GList *fields)
 {
   std::vector<std::shared_ptr<arrow::Field>> arrow_fields;
-  for (GList *node = fields; node; node = g_list_next(node)) {
+  for (auto *node = fields; node; node = g_list_next(node)) {
     auto field = GARROW_FIELD(node->data);
     auto arrow_field = garrow_field_get_raw(field);
     arrow_fields.push_back(arrow_field);
   }
 
   auto arrow_data_type = std::make_shared<arrow::StructType>(arrow_fields);
-  GArrowStructDataType *data_type =
-    GARROW_STRUCT_DATA_TYPE(g_object_new(GARROW_TYPE_STRUCT_DATA_TYPE,
-                                         "data-type", &arrow_data_type,
-                                         NULL));
-  return data_type;
+  auto data_type = g_object_new(GARROW_TYPE_STRUCT_DATA_TYPE,
+                                "data-type", &arrow_data_type,
+                                NULL);
+  return GARROW_STRUCT_DATA_TYPE(data_type);
 }
 
 /**
  * garrow_struct_data_type_get_n_fields:
- * @data_type: A #GArrowStructDataType.
+ * @struct_data_type: A #GArrowStructDataType.
  *
  * Returns: The number of fields of the struct data type.
  *
  * Since: 0.12.0
  */
 gint
-garrow_struct_data_type_get_n_fields(GArrowStructDataType *data_type)
+garrow_struct_data_type_get_n_fields(GArrowStructDataType *struct_data_type)
 {
-  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(struct_data_type));
   return arrow_data_type->num_children();
 }
 
 /**
  * garrow_struct_data_type_get_fields:
- * @data_type: A #GArrowStructDataType.
+ * @struct_data_type: A #GArrowStructDataType.
  *
  * Returns: (transfer full) (element-type GArrowField):
  *   The fields of the struct data type.
@@ -161,21 +181,22 @@ garrow_struct_data_type_get_n_fields(GArrowStructDataType *data_type)
  * Since: 0.12.0
  */
 GList *
-garrow_struct_data_type_get_fields(GArrowStructDataType *data_type)
+garrow_struct_data_type_get_fields(GArrowStructDataType *struct_data_type)
 {
-  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto data_type = GARROW_DATA_TYPE(struct_data_type);
+  auto arrow_data_type = garrow_data_type_get_raw(data_type);
   auto arrow_fields = arrow_data_type->children();
 
   GList *fields = NULL;
   for (auto arrow_field : arrow_fields) {
-    fields = g_list_prepend(fields, garrow_field_new_raw(&arrow_field));
+    fields = g_list_prepend(fields, garrow_field_new_raw(&arrow_field, nullptr));
   }
   return g_list_reverse(fields);
 }
 
 /**
  * garrow_struct_data_type_get_field:
- * @data_type: A #GArrowStructDataType.
+ * @struct_data_type: A #GArrowStructDataType.
  * @i: The index of the target field.
  *
  * Returns: (transfer full) (nullable):
@@ -184,21 +205,25 @@ garrow_struct_data_type_get_fields(GArrowStructDataType *data_type)
  * Since: 0.12.0
  */
 GArrowField *
-garrow_struct_data_type_get_field(GArrowStructDataType *data_type,
+garrow_struct_data_type_get_field(GArrowStructDataType *struct_data_type,
                                   gint i)
 {
-  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto data_type = GARROW_DATA_TYPE(struct_data_type);
+  auto arrow_data_type = garrow_data_type_get_raw(data_type);
 
-  while (i < 0) {
+  if (i < 0) {
     i += arrow_data_type->num_children();
   }
+  if (i < 0) {
+    return NULL;
+  }
   if (i >= arrow_data_type->num_children()) {
     return NULL;
   }
 
   auto arrow_field = arrow_data_type->child(i);
   if (arrow_field) {
-    return garrow_field_new_raw(&arrow_field);
+    return garrow_field_new_raw(&arrow_field, nullptr);
   } else {
     return NULL;
   }
@@ -206,7 +231,7 @@ garrow_struct_data_type_get_field(GArrowStructDataType *data_type,
 
 /**
  * garrow_struct_data_type_get_field_by_name:
- * @data_type: A #GArrowStructDataType.
+ * @struct_data_type: A #GArrowStructDataType.
  * @name: The name of the target field.
  *
  * Returns: (transfer full) (nullable):
@@ -215,16 +240,17 @@ garrow_struct_data_type_get_field(GArrowStructDataType *data_type,
  * Since: 0.12.0
  */
 GArrowField *
-garrow_struct_data_type_get_field_by_name(GArrowStructDataType *data_type,
+garrow_struct_data_type_get_field_by_name(GArrowStructDataType *struct_data_type,
                                           const gchar *name)
 {
-  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto data_type = GARROW_DATA_TYPE(struct_data_type);
+  auto arrow_data_type = garrow_data_type_get_raw(data_type);
   auto arrow_struct_data_type =
     std::static_pointer_cast<arrow::StructType>(arrow_data_type);
 
-  auto arrow_field = arrow_struct_data_type->GetChildByName(name);
+  auto arrow_field = arrow_struct_data_type->GetFieldByName(name);
   if (arrow_field) {
-    return garrow_field_new_raw(&arrow_field);
+    return garrow_field_new_raw(&arrow_field, nullptr);
   } else {
     return NULL;
   }
@@ -232,7 +258,7 @@ garrow_struct_data_type_get_field_by_name(GArrowStructDataType *data_type,
 
 /**
  * garrow_struct_data_type_get_field_index:
- * @data_type: A #GArrowStructDataType.
+ * @struct_data_type: A #GArrowStructDataType.
  * @name: The name of the target field.
  *
  * Returns: The index of the target index in the struct data type
@@ -241,14 +267,232 @@ garrow_struct_data_type_get_field_by_name(GArrowStructDataType *data_type,
  * Since: 0.12.0
  */
 gint
-garrow_struct_data_type_get_field_index(GArrowStructDataType *data_type,
+garrow_struct_data_type_get_field_index(GArrowStructDataType *struct_data_type,
                                         const gchar *name)
 {
-  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(struct_data_type));
   auto arrow_struct_data_type =
     std::static_pointer_cast<arrow::StructType>(arrow_data_type);
 
-  return arrow_struct_data_type->GetChildIndex(name);
+  return arrow_struct_data_type->GetFieldIndex(name);
+}
+
+
+G_DEFINE_ABSTRACT_TYPE(GArrowUnionDataType,
+                       garrow_union_data_type,
+                       GARROW_TYPE_DATA_TYPE)
+
+static void
+garrow_union_data_type_init(GArrowUnionDataType *object)
+{
+}
+
+static void
+garrow_union_data_type_class_init(GArrowUnionDataTypeClass *klass)
+{
+}
+
+/**
+ * garrow_union_data_type_get_n_fields:
+ * @union_data_type: A #GArrowUnionDataType.
+ *
+ * Returns: The number of fields of the union data type.
+ *
+ * Since: 0.12.0
+ */
+gint
+garrow_union_data_type_get_n_fields(GArrowUnionDataType *union_data_type)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(union_data_type));
+  return arrow_data_type->num_children();
+}
+
+/**
+ * garrow_union_data_type_get_fields:
+ * @union_data_type: A #GArrowUnionDataType.
+ *
+ * Returns: (transfer full) (element-type GArrowField):
+ *   The fields of the union data type.
+ *
+ * Since: 0.12.0
+ */
+GList *
+garrow_union_data_type_get_fields(GArrowUnionDataType *union_data_type)
+{
+  auto data_type = GARROW_DATA_TYPE(union_data_type);
+  auto arrow_data_type = garrow_data_type_get_raw(data_type);
+  auto arrow_fields = arrow_data_type->children();
+
+  GList *fields = NULL;
+  for (auto arrow_field : arrow_fields) {
+    fields = g_list_prepend(fields, garrow_field_new_raw(&arrow_field, nullptr));
+  }
+  return g_list_reverse(fields);
+}
+
+/**
+ * garrow_union_data_type_get_field:
+ * @union_data_type: A #GArrowUnionDataType.
+ * @i: The index of the target field.
+ *
+ * Returns: (transfer full) (nullable):
+ *   The field at the index in the union data type or %NULL on not found.
+ *
+ * Since: 0.12.0
+ */
+GArrowField *
+garrow_union_data_type_get_field(GArrowUnionDataType *union_data_type,
+                                 gint i)
+{
+  auto data_type = GARROW_DATA_TYPE(union_data_type);
+  auto arrow_data_type = garrow_data_type_get_raw(data_type);
+
+  if (i < 0) {
+    i += arrow_data_type->num_children();
+  }
+  if (i < 0) {
+    return NULL;
+  }
+  if (i >= arrow_data_type->num_children()) {
+    return NULL;
+  }
+
+  auto arrow_field = arrow_data_type->child(i);
+  if (arrow_field) {
+    return garrow_field_new_raw(&arrow_field, nullptr);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * garrow_union_data_type_get_type_codes:
+ * @union_data_type: A #GArrowUnionDataType.
+ * @n_type_codes: (out): The number of type codes.
+ *
+ * Returns: (transfer full) (array length=n_type_codes):
+ *   The codes for each field.
+ *
+ *   It should be freed with g_free() when no longer needed.
+ *
+ * Since: 0.12.0
+ */
+guint8 *
+garrow_union_data_type_get_type_codes(GArrowUnionDataType *union_data_type,
+                                      gsize *n_type_codes)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(union_data_type));
+  auto arrow_union_data_type =
+    std::static_pointer_cast<arrow::UnionType>(arrow_data_type);
+
+  const auto arrow_type_codes = arrow_union_data_type->type_codes();
+  const auto n = arrow_type_codes.size();
+  auto type_codes = static_cast<guint8 *>(g_new(guint8, n));
+  for (size_t i = 0; i < n; ++i) {
+    type_codes[i] = arrow_type_codes[i];
+  }
+  *n_type_codes = n;
+  return type_codes;
+}
+
+
+G_DEFINE_TYPE(GArrowSparseUnionDataType,
+              garrow_sparse_union_data_type,
+              GARROW_TYPE_UNION_DATA_TYPE)
+
+static void
+garrow_sparse_union_data_type_init(GArrowSparseUnionDataType *object)
+{
+}
+
+static void
+garrow_sparse_union_data_type_class_init(GArrowSparseUnionDataTypeClass *klass)
+{
+}
+
+/**
+ * garrow_sparse_union_data_type_new:
+ * @fields: (element-type GArrowField): The fields of the union.
+ * @type_codes: (array length=n_type_codes): The codes to specify each field.
+ * @n_type_codes: The number of type codes.
+ *
+ * Returns: The newly created sparse union data type.
+ */
+GArrowSparseUnionDataType *
+garrow_sparse_union_data_type_new(GList *fields,
+                                  guint8 *type_codes,
+                                  gsize n_type_codes)
+{
+  std::vector<std::shared_ptr<arrow::Field>> arrow_fields;
+  for (auto node = fields; node; node = g_list_next(node)) {
+    auto field = GARROW_FIELD(node->data);
+    auto arrow_field = garrow_field_get_raw(field);
+    arrow_fields.push_back(arrow_field);
+  }
+
+  std::vector<uint8_t> arrow_type_codes;
+  for (gsize i = 0; i < n_type_codes; ++i) {
+    arrow_type_codes.push_back(type_codes[i]);
+  }
+
+  auto arrow_data_type =
+    std::make_shared<arrow::UnionType>(arrow_fields,
+                                       arrow_type_codes,
+                                       arrow::UnionMode::SPARSE);
+  auto data_type = g_object_new(GARROW_TYPE_SPARSE_UNION_DATA_TYPE,
+                                "data-type", &arrow_data_type,
+                                NULL);
+  return GARROW_SPARSE_UNION_DATA_TYPE(data_type);
+}
+
+
+G_DEFINE_TYPE(GArrowDenseUnionDataType,
+              garrow_dense_union_data_type,
+              GARROW_TYPE_UNION_DATA_TYPE)
+
+static void
+garrow_dense_union_data_type_init(GArrowDenseUnionDataType *object)
+{
+}
+
+static void
+garrow_dense_union_data_type_class_init(GArrowDenseUnionDataTypeClass *klass)
+{
+}
+
+/**
+ * garrow_dense_union_data_type_new:
+ * @fields: (element-type GArrowField): The fields of the union.
+ * @type_codes: (array length=n_type_codes): The codes to specify each field.
+ * @n_type_codes: The number of type codes.
+ *
+ * Returns: The newly created dense union data type.
+ */
+GArrowDenseUnionDataType *
+garrow_dense_union_data_type_new(GList *fields,
+                                 guint8 *type_codes,
+                                 gsize n_type_codes)
+{
+  std::vector<std::shared_ptr<arrow::Field>> arrow_fields;
+  for (auto node = fields; node; node = g_list_next(node)) {
+    auto field = GARROW_FIELD(node->data);
+    auto arrow_field = garrow_field_get_raw(field);
+    arrow_fields.push_back(arrow_field);
+  }
+
+  std::vector<uint8_t> arrow_type_codes;
+  for (gsize i = 0; i < n_type_codes; ++i) {
+    arrow_type_codes.push_back(type_codes[i]);
+  }
+
+  auto arrow_data_type =
+    std::make_shared<arrow::UnionType>(arrow_fields,
+                                       arrow_type_codes,
+                                       arrow::UnionMode::DENSE);
+  auto data_type = g_object_new(GARROW_TYPE_DENSE_UNION_DATA_TYPE,
+                                "data-type", &arrow_data_type,
+                                NULL);
+  return GARROW_DENSE_UNION_DATA_TYPE(data_type);
 }
 
 
@@ -291,16 +535,16 @@ garrow_dictionary_data_type_new(GArrowDataType *index_data_type,
 
 /**
  * garrow_dictionary_data_type_get_index_data_type:
- * @data_type: The #GArrowDictionaryDataType.
+ * @dictionary_data_type: The #GArrowDictionaryDataType.
  *
  * Returns: (transfer full): The #GArrowDataType of index.
  *
  * Since: 0.8.0
  */
 GArrowDataType *
-garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *data_type)
+garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *dictionary_data_type)
 {
-  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(dictionary_data_type));
   auto arrow_dictionary_data_type =
     std::static_pointer_cast<arrow::DictionaryType>(arrow_data_type);
   auto arrow_index_data_type = arrow_dictionary_data_type->index_type();
@@ -309,16 +553,16 @@ garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *data_t
 
 /**
  * garrow_dictionary_data_type_get_dictionary:
- * @data_type: The #GArrowDictionaryDataType.
+ * @dictionary_data_type: The #GArrowDictionaryDataType.
  *
  * Returns: (transfer full): The dictionary as #GArrowArray.
  *
  * Since: 0.8.0
  */
 GArrowArray *
-garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *data_type)
+garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *dictionary_data_type)
 {
-  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(dictionary_data_type));
   auto arrow_dictionary_data_type =
     std::static_pointer_cast<arrow::DictionaryType>(arrow_data_type);
   auto arrow_dictionary = arrow_dictionary_data_type->dictionary();
@@ -327,16 +571,16 @@ garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *data_type)
 
 /**
  * garrow_dictionary_data_type_is_ordered:
- * @data_type: The #GArrowDictionaryDataType.
+ * @dictionary_data_type: The #GArrowDictionaryDataType.
  *
  * Returns: Whether dictionary contents are ordered or not.
  *
  * Since: 0.8.0
  */
 gboolean
-garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *data_type)
+garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *dictionary_data_type)
 {
-  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(dictionary_data_type));
   auto arrow_dictionary_data_type =
     std::static_pointer_cast<arrow::DictionaryType>(arrow_data_type);
   return arrow_dictionary_data_type->ordered();
diff --git a/c_glib/arrow-glib/composite-data-type.h b/c_glib/arrow-glib/composite-data-type.h
index 7d6a02b1c77d9..beb312597d52b 100644
--- a/c_glib/arrow-glib/composite-data-type.h
+++ b/c_glib/arrow-glib/composite-data-type.h
@@ -22,6 +22,7 @@
 #include <arrow-glib/basic-array.h>
 #include <arrow-glib/basic-data-type.h>
 #include <arrow-glib/field.h>
+#include <arrow-glib/version.h>
 
 G_BEGIN_DECLS
 
@@ -67,7 +68,12 @@ struct _GArrowListDataTypeClass
 
 GType               garrow_list_data_type_get_type (void) G_GNUC_CONST;
 GArrowListDataType *garrow_list_data_type_new      (GArrowField *field);
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_13_FOR(garrow_list_data_type_get_field)
 GArrowField *garrow_list_data_type_get_value_field (GArrowListDataType *list_data_type);
+#endif
+GARROW_AVAILABLE_IN_0_13
+GArrowField *garrow_list_data_type_get_field (GArrowListDataType *list_data_type);
 
 
 #define GARROW_TYPE_STRUCT_DATA_TYPE (garrow_struct_data_type_get_type())
@@ -83,19 +89,79 @@ struct _GArrowStructDataTypeClass
 
 GArrowStructDataType *garrow_struct_data_type_new      (GList *fields);
 gint
-garrow_struct_data_type_get_n_fields(GArrowStructDataType *data_type);
+garrow_struct_data_type_get_n_fields(GArrowStructDataType *struct_data_type);
 GList *
-garrow_struct_data_type_get_fields(GArrowStructDataType *data_type);
+garrow_struct_data_type_get_fields(GArrowStructDataType *struct_data_type);
 GArrowField *
-garrow_struct_data_type_get_field(GArrowStructDataType *data_type,
+garrow_struct_data_type_get_field(GArrowStructDataType *struct_data_type,
                                   gint i);
 GArrowField *
-garrow_struct_data_type_get_field_by_name(GArrowStructDataType *data_type,
+garrow_struct_data_type_get_field_by_name(GArrowStructDataType *struct_data_type,
                                           const gchar *name);
 gint
-garrow_struct_data_type_get_field_index(GArrowStructDataType *data_type,
+garrow_struct_data_type_get_field_index(GArrowStructDataType *struct_data_type,
                                         const gchar *name);
 
+
+#define GARROW_TYPE_UNION_DATA_TYPE (garrow_union_data_type_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowUnionDataType,
+                         garrow_union_data_type,
+                         GARROW,
+                         UNION_DATA_TYPE,
+                         GArrowDataType)
+struct _GArrowUnionDataTypeClass
+{
+  GArrowDataTypeClass parent_class;
+};
+
+gint
+garrow_union_data_type_get_n_fields(GArrowUnionDataType *union_data_type);
+GList *
+garrow_union_data_type_get_fields(GArrowUnionDataType *union_data_type);
+GArrowField *
+garrow_union_data_type_get_field(GArrowUnionDataType *union_data_type,
+                                 gint i);
+guint8 *
+garrow_union_data_type_get_type_codes(GArrowUnionDataType *union_data_type,
+                                      gsize *n_type_codes);
+
+
+#define GARROW_TYPE_SPARSE_UNION_DATA_TYPE      \
+  (garrow_sparse_union_data_type_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionDataType,
+                         garrow_sparse_union_data_type,
+                         GARROW,
+                         SPARSE_UNION_DATA_TYPE,
+                         GArrowUnionDataType)
+struct _GArrowSparseUnionDataTypeClass
+{
+  GArrowUnionDataTypeClass parent_class;
+};
+
+GArrowSparseUnionDataType *
+garrow_sparse_union_data_type_new(GList *fields,
+                                  guint8 *type_codes,
+                                  gsize n_type_codes);
+
+
+#define GARROW_TYPE_DENSE_UNION_DATA_TYPE       \
+  (garrow_dense_union_data_type_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionDataType,
+                         garrow_dense_union_data_type,
+                         GARROW,
+                         DENSE_UNION_DATA_TYPE,
+                         GArrowUnionDataType)
+struct _GArrowDenseUnionDataTypeClass
+{
+  GArrowUnionDataTypeClass parent_class;
+};
+
+GArrowDenseUnionDataType *
+garrow_dense_union_data_type_new(GList *fields,
+                                 guint8 *type_codes,
+                                 gsize n_type_codes);
+
+
 #define GARROW_TYPE_DICTIONARY_DATA_TYPE (garrow_dictionary_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryDataType,
                          garrow_dictionary_data_type,
@@ -112,11 +178,11 @@ garrow_dictionary_data_type_new(GArrowDataType *index_data_type,
                                 GArrowArray *dictionary,
                                 gboolean ordered);
 GArrowDataType *
-garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *data_type);
+garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *dictionary_data_type);
 GArrowArray *
-garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *data_type);
+garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *dictionary_data_type);
 gboolean
-garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *data_type);
+garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *dictionary_data_type);
 
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/decimal.cpp b/c_glib/arrow-glib/decimal128.cpp
similarity index 70%
rename from c_glib/arrow-glib/decimal.cpp
rename to c_glib/arrow-glib/decimal128.cpp
index 67b2d43b1018a..32bdf5fcae6e4 100644
--- a/c_glib/arrow-glib/decimal.cpp
+++ b/c_glib/arrow-glib/decimal128.cpp
@@ -21,14 +21,14 @@
 #  include <config.h>
 #endif
 
-#include <arrow-glib/decimal.hpp>
+#include <arrow-glib/decimal128.hpp>
 #include <arrow-glib/error.hpp>
 
 G_BEGIN_DECLS
 
 /**
- * SECTION: decimal
- * @title: Decimal classes
+ * SECTION: decimal128
+ * @title: 128-bit decimal class
  * @include: arrow-glib/arrow-glib.h
  *
  * #GArrowDecimal128 is a 128-bit decimal class.
@@ -136,14 +136,128 @@ garrow_decimal128_new_integer(const gint64 data)
   return garrow_decimal128_new_raw(&arrow_decimal);
 }
 
+/**
+ * garrow_decimal128_equal:
+ * @decimal: A #GArrowDecimal128.
+ * @other_decimal: A #GArrowDecimal128 to be compared.
+ *
+ * Returns: %TRUE if the decimal is equal to the other decimal, %FALSE
+ *   otherwise.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_decimal128_equal(GArrowDecimal128 *decimal,
+                        GArrowDecimal128 *other_decimal)
+{
+  const auto arrow_decimal = garrow_decimal128_get_raw(decimal);
+  const auto arrow_other_decimal = garrow_decimal128_get_raw(other_decimal);
+  return *arrow_decimal == *arrow_other_decimal;
+}
+
+/**
+ * garrow_decimal128_not_equal:
+ * @decimal: A #GArrowDecimal128.
+ * @other_decimal: A #GArrowDecimal128 to be compared.
+ *
+ * Returns: %TRUE if the decimal isn't equal to the other decimal,
+ *   %FALSE otherwise.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_decimal128_not_equal(GArrowDecimal128 *decimal,
+                            GArrowDecimal128 *other_decimal)
+{
+  const auto arrow_decimal = garrow_decimal128_get_raw(decimal);
+  const auto arrow_other_decimal = garrow_decimal128_get_raw(other_decimal);
+  return *arrow_decimal != *arrow_other_decimal;
+}
+
+/**
+ * garrow_decimal128_less_than:
+ * @decimal: A #GArrowDecimal128.
+ * @other_decimal: A #GArrowDecimal128 to be compared.
+ *
+ * Returns: %TRUE if the decimal is less than the other decimal,
+ *   %FALSE otherwise.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_decimal128_less_than(GArrowDecimal128 *decimal,
+                            GArrowDecimal128 *other_decimal)
+{
+  const auto arrow_decimal = garrow_decimal128_get_raw(decimal);
+  const auto arrow_other_decimal = garrow_decimal128_get_raw(other_decimal);
+  return *arrow_decimal < *arrow_other_decimal;
+}
+
+/**
+ * garrow_decimal128_less_than_or_equal:
+ * @decimal: A #GArrowDecimal128.
+ * @other_decimal: A #GArrowDecimal128 to be compared.
+ *
+ * Returns: %TRUE if the decimal is less than the other decimal
+ *   or equal to the other decimal, %FALSE otherwise.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_decimal128_less_than_or_equal(GArrowDecimal128 *decimal,
+                                     GArrowDecimal128 *other_decimal)
+{
+  const auto arrow_decimal = garrow_decimal128_get_raw(decimal);
+  const auto arrow_other_decimal = garrow_decimal128_get_raw(other_decimal);
+  return *arrow_decimal <= *arrow_other_decimal;
+}
+
+/**
+ * garrow_decimal128_greater_than:
+ * @decimal: A #GArrowDecimal128.
+ * @other_decimal: A #GArrowDecimal128 to be compared.
+ *
+ * Returns: %TRUE if the decimal is greater than the other decimal,
+ *   %FALSE otherwise.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_decimal128_greater_than(GArrowDecimal128 *decimal,
+                               GArrowDecimal128 *other_decimal)
+{
+  const auto arrow_decimal = garrow_decimal128_get_raw(decimal);
+  const auto arrow_other_decimal = garrow_decimal128_get_raw(other_decimal);
+  return *arrow_decimal > *arrow_other_decimal;
+}
+
+/**
+ * garrow_decimal128_greater_than_or_equal:
+ * @decimal: A #GArrowDecimal128.
+ * @other_decimal: A #GArrowDecimal128 to be compared.
+ *
+ * Returns: %TRUE if the decimal is greater than the other decimal
+ *   or equal to the other decimal, %FALSE otherwise.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+garrow_decimal128_greater_than_or_equal(GArrowDecimal128 *decimal,
+                                        GArrowDecimal128 *other_decimal)
+{
+  const auto arrow_decimal = garrow_decimal128_get_raw(decimal);
+  const auto arrow_other_decimal = garrow_decimal128_get_raw(other_decimal);
+  return *arrow_decimal >= *arrow_other_decimal;
+}
+
 /**
  * garrow_decimal128_to_string_scale:
  * @decimal: A #GArrowDecimal128.
  * @scale: The scale of the decimal.
  *
- * Returns: The string representation of the decimal.
+ * Returns: (transfer full): The string representation of the decimal.
  *
- * It should be freed with g_free() when no longer needed.
+ *   It should be freed with g_free() when no longer needed.
  *
  * Since: 0.10.0
  */
@@ -159,9 +273,9 @@ garrow_decimal128_to_string_scale(GArrowDecimal128 *decimal, gint32 scale)
  * garrow_decimal128_to_string:
  * @decimal: A #GArrowDecimal128.
  *
- * Returns: The string representation of the decimal.
+ * Returns: (transfer full): The string representation of the decimal.
  *
- * It should be freed with g_free() when no longer needed.
+ *   It should be freed with g_free() when no longer needed.
  *
  * Since: 0.10.0
  */
diff --git a/c_glib/arrow-glib/decimal.h b/c_glib/arrow-glib/decimal128.h
similarity index 69%
rename from c_glib/arrow-glib/decimal.h
rename to c_glib/arrow-glib/decimal128.h
index 918cf3d49b4d2..e7601a457601b 100644
--- a/c_glib/arrow-glib/decimal.h
+++ b/c_glib/arrow-glib/decimal128.h
@@ -20,6 +20,7 @@
 #pragma once
 
 #include <arrow-glib/gobject-type.h>
+#include <arrow-glib/version.h>
 
 G_BEGIN_DECLS
 
@@ -37,6 +38,24 @@ struct _GArrowDecimal128Class
 
 GArrowDecimal128 *garrow_decimal128_new_string(const gchar *data);
 GArrowDecimal128 *garrow_decimal128_new_integer(const gint64 data);
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_decimal128_equal(GArrowDecimal128 *decimal,
+                                 GArrowDecimal128 *other_decimal);
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_decimal128_not_equal(GArrowDecimal128 *decimal,
+                                     GArrowDecimal128 *other_decimal);
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_decimal128_less_than(GArrowDecimal128 *decimal,
+                                     GArrowDecimal128 *other_decimal);
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_decimal128_less_than_or_equal(GArrowDecimal128 *decimal,
+                                              GArrowDecimal128 *other_decimal);
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_decimal128_greater_than(GArrowDecimal128 *decimal,
+                                        GArrowDecimal128 *other_decimal);
+GARROW_AVAILABLE_IN_0_12
+gboolean garrow_decimal128_greater_than_or_equal(GArrowDecimal128 *decimal,
+                                                 GArrowDecimal128 *other_decimal);
 gchar *garrow_decimal128_to_string_scale(GArrowDecimal128 *decimal,
                                          gint32 scale);
 gchar *garrow_decimal128_to_string(GArrowDecimal128 *decimal);
diff --git a/c_glib/arrow-glib/decimal.hpp b/c_glib/arrow-glib/decimal128.hpp
similarity index 96%
rename from c_glib/arrow-glib/decimal.hpp
rename to c_glib/arrow-glib/decimal128.hpp
index ce56cfe0bd062..84bf47e409f50 100644
--- a/c_glib/arrow-glib/decimal.hpp
+++ b/c_glib/arrow-glib/decimal128.hpp
@@ -23,7 +23,7 @@
 
 #include <arrow/util/decimal.h>
 
-#include <arrow-glib/decimal.h>
+#include <arrow-glib/decimal128.h>
 
 GArrowDecimal128 *garrow_decimal128_new_raw(std::shared_ptr<arrow::Decimal128> *arrow_decimal128);
 std::shared_ptr<arrow::Decimal128> garrow_decimal128_get_raw(GArrowDecimal128 *decimal);
diff --git a/c_glib/arrow-glib/field.cpp b/c_glib/arrow-glib/field.cpp
index b989d288ec30f..f7250bc6ee634 100644
--- a/c_glib/arrow-glib/field.cpp
+++ b/c_glib/arrow-glib/field.cpp
@@ -37,11 +37,12 @@ G_BEGIN_DECLS
 
 typedef struct GArrowFieldPrivate_ {
   std::shared_ptr<arrow::Field> field;
+  GArrowDataType *data_type;
 } GArrowFieldPrivate;
 
 enum {
-  PROP_0,
-  PROP_FIELD
+  PROP_FIELD = 1,
+  PROP_DATA_TYPE
 };
 
 G_DEFINE_TYPE_WITH_PRIVATE(GArrowField,
@@ -54,11 +55,22 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowField,
        GARROW_FIELD(obj)))
 
 static void
-garrow_field_finalize(GObject *object)
+garrow_field_dispose(GObject *object)
 {
-  GArrowFieldPrivate *priv;
+  auto priv = GARROW_FIELD_GET_PRIVATE(object);
 
-  priv = GARROW_FIELD_GET_PRIVATE(object);
+  if (priv->data_type) {
+    g_object_unref(priv->data_type);
+    priv->data_type = nullptr;
+  }
+
+  G_OBJECT_CLASS(garrow_field_parent_class)->dispose(object);
+}
+
+static void
+garrow_field_finalize(GObject *object)
+{
+  auto priv = GARROW_FIELD_GET_PRIVATE(object);
 
   priv->field = nullptr;
 
@@ -80,19 +92,9 @@ garrow_field_set_property(GObject *object,
     priv->field =
       *static_cast<std::shared_ptr<arrow::Field> *>(g_value_get_pointer(value));
     break;
-  default:
-    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+  case PROP_DATA_TYPE:
+    priv->data_type = GARROW_DATA_TYPE(g_value_dup_object(value));
     break;
-  }
-}
-
-static void
-garrow_field_get_property(GObject *object,
-                          guint prop_id,
-                          GValue *value,
-                          GParamSpec *pspec)
-{
-  switch (prop_id) {
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
     break;
@@ -107,21 +109,27 @@ garrow_field_init(GArrowField *object)
 static void
 garrow_field_class_init(GArrowFieldClass *klass)
 {
-  GObjectClass *gobject_class;
-  GParamSpec *spec;
-
-  gobject_class = G_OBJECT_CLASS(klass);
+  auto gobject_class = G_OBJECT_CLASS(klass);
 
+  gobject_class->dispose      = garrow_field_dispose;
   gobject_class->finalize     = garrow_field_finalize;
   gobject_class->set_property = garrow_field_set_property;
-  gobject_class->get_property = garrow_field_get_property;
 
+  GParamSpec *spec;
   spec = g_param_spec_pointer("field",
                               "Field",
                               "The raw std::shared<arrow::Field> *",
                               static_cast<GParamFlags>(G_PARAM_WRITABLE |
                                                        G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_FIELD, spec);
+
+  spec = g_param_spec_object("data-type",
+                             "Data type",
+                             "The data type",
+                             GARROW_TYPE_DATA_TYPE,
+                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_DATA_TYPE, spec);
 }
 
 /**
@@ -137,7 +145,7 @@ garrow_field_new(const gchar *name,
 {
   auto arrow_data_type = garrow_data_type_get_raw(data_type);
   auto arrow_field = std::make_shared<arrow::Field>(name, arrow_data_type);
-  return garrow_field_new_raw(&arrow_field);
+  return garrow_field_new_raw(&arrow_field, data_type);
 }
 
 /**
@@ -157,7 +165,7 @@ garrow_field_new_full(const gchar *name,
     std::make_shared<arrow::Field>(name,
                                    garrow_data_type_get_raw(data_type),
                                    nullable);
-  return garrow_field_new_raw(&arrow_field);
+  return garrow_field_new_raw(&arrow_field, data_type);
 }
 
 /**
@@ -177,14 +185,13 @@ garrow_field_get_name(GArrowField *field)
  * garrow_field_get_data_type:
  * @field: A #GArrowField.
  *
- * Returns: (transfer full): The data type of the field.
+ * Returns: (transfer none): The data type of the field.
  */
 GArrowDataType *
 garrow_field_get_data_type(GArrowField *field)
 {
-  const auto arrow_field = garrow_field_get_raw(field);
-  auto type = arrow_field->type();
-  return garrow_data_type_new_raw(&type);
+  auto priv = GARROW_FIELD_GET_PRIVATE(field);
+  return priv->data_type;
 }
 
 /**
@@ -233,11 +240,22 @@ garrow_field_to_string(GArrowField *field)
 G_END_DECLS
 
 GArrowField *
-garrow_field_new_raw(std::shared_ptr<arrow::Field> *arrow_field)
+garrow_field_new_raw(std::shared_ptr<arrow::Field> *arrow_field,
+                     GArrowDataType *data_type)
 {
+  bool data_type_need_unref = false;
+  if (!data_type) {
+    auto arrow_data_type = (*arrow_field)->type();
+    data_type = garrow_data_type_new_raw(&arrow_data_type);
+    data_type_need_unref = true;
+  }
   auto field = GARROW_FIELD(g_object_new(GARROW_TYPE_FIELD,
                                          "field", arrow_field,
+                                         "data-type", data_type,
                                          NULL));
+  if (data_type_need_unref) {
+    g_object_unref(data_type);
+  }
   return field;
 }
 
diff --git a/c_glib/arrow-glib/field.hpp b/c_glib/arrow-glib/field.hpp
index e130ad5992409..f8d0d46c97ab4 100644
--- a/c_glib/arrow-glib/field.hpp
+++ b/c_glib/arrow-glib/field.hpp
@@ -23,5 +23,6 @@
 
 #include <arrow-glib/field.h>
 
-GArrowField *garrow_field_new_raw(std::shared_ptr<arrow::Field> *arrow_field);
+GArrowField *garrow_field_new_raw(std::shared_ptr<arrow::Field> *arrow_field,
+                                  GArrowDataType *data_type);
 std::shared_ptr<arrow::Field> garrow_field_get_raw(GArrowField *field);
diff --git a/c_glib/arrow-glib/input-stream.cpp b/c_glib/arrow-glib/input-stream.cpp
index cb36e49067ac9..cb1fb3b04a68e 100644
--- a/c_glib/arrow-glib/input-stream.cpp
+++ b/c_glib/arrow-glib/input-stream.cpp
@@ -325,6 +325,30 @@ garrow_seekable_input_stream_read_at(GArrowSeekableInputStream *input_stream,
 }
 
 
+/**
+ * garrow_seekable_input_stream_peek:
+ * @input_stream: A #GArrowSeekableInputStream.
+ * @n_bytes: The number of bytes to be peeked.
+ *
+ * Returns: (transfer full): The data of the buffer, up to the
+ *   indicated number. The data becomes invalid after any operation on
+ *   the stream. If the stream is unbuffered, the data is empty.
+ *
+ *   It should be freed with g_bytes_unref() when no longer needed.
+ *
+ * Since: 0.12.0
+ */
+GBytes *
+garrow_seekable_input_stream_peek(GArrowSeekableInputStream *input_stream,
+                                  gint64 n_bytes)
+{
+  auto arrow_random_access_file =
+    garrow_seekable_input_stream_get_raw(input_stream);
+  auto string_view = arrow_random_access_file->Peek(n_bytes);
+  return g_bytes_new_static(string_view.data(), string_view.size());
+}
+
+
 typedef struct GArrowBufferInputStreamPrivate_ {
   GArrowBuffer *buffer;
 } GArrowBufferInputStreamPrivate;
diff --git a/c_glib/arrow-glib/input-stream.h b/c_glib/arrow-glib/input-stream.h
index 9deebd717363b..745b912749eb6 100644
--- a/c_glib/arrow-glib/input-stream.h
+++ b/c_glib/arrow-glib/input-stream.h
@@ -66,6 +66,9 @@ GArrowBuffer *garrow_seekable_input_stream_read_at(GArrowSeekableInputStream *in
                                                    gint64 position,
                                                    gint64 n_bytes,
                                                    GError **error);
+GARROW_AVAILABLE_IN_0_12
+GBytes *garrow_seekable_input_stream_peek(GArrowSeekableInputStream *input_stream,
+                                          gint64 n_bytes);
 
 
 #define GARROW_TYPE_BUFFER_INPUT_STREAM         \
diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build
index d962ec103175e..14126bee8d784 100644
--- a/c_glib/arrow-glib/meson.build
+++ b/c_glib/arrow-glib/meson.build
@@ -27,7 +27,7 @@ sources = files(
   'column.cpp',
   'composite-array.cpp',
   'composite-data-type.cpp',
-  'decimal.cpp',
+  'decimal128.cpp',
   'error.cpp',
   'field.cpp',
   'record-batch.cpp',
@@ -77,7 +77,7 @@ c_headers = files(
   'composite-array.h',
   'composite-data-type.h',
   'data-type.h',
-  'decimal.h',
+  'decimal128.h',
   'error.h',
   'field.h',
   'gobject-type.h',
@@ -128,7 +128,7 @@ cpp_headers = files(
   'codec.hpp',
   'column.hpp',
   'data-type.hpp',
-  'decimal.hpp',
+  'decimal128.hpp',
   'error.hpp',
   'field.hpp',
   'record-batch.hpp',
diff --git a/c_glib/arrow-glib/orc-file-reader.cpp b/c_glib/arrow-glib/orc-file-reader.cpp
index bde3cfc8fa04f..31905a2f9fea1 100644
--- a/c_glib/arrow-glib/orc-file-reader.cpp
+++ b/c_glib/arrow-glib/orc-file-reader.cpp
@@ -199,8 +199,7 @@ garrow_orc_file_reader_new(GArrowSeekableInputStream *input,
  * Since: 0.10.0
  *
  * Deprecated: 0.12.0:
- *  Use garrow_orc_file_reader_set_field_indices() instead.
- *
+ *   Use garrow_orc_file_reader_set_field_indices() instead.
  */
 void
 garrow_orc_file_reader_set_field_indexes(GArrowORCFileReader *reader,
diff --git a/c_glib/arrow-glib/orc-file-reader.h b/c_glib/arrow-glib/orc-file-reader.h
index 9b2dbadefe43a..9551d52e0fd55 100644
--- a/c_glib/arrow-glib/orc-file-reader.h
+++ b/c_glib/arrow-glib/orc-file-reader.h
@@ -39,22 +39,24 @@ garrow_orc_file_reader_new(GArrowSeekableInputStream *file,
                            GError **error);
 
 #ifndef GARROW_DISABLE_DEPRECATED
-G_GNUC_DEPRECATED_FOR(garrow_orc_file_reader_set_field_indices)
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_orc_file_reader_set_field_indices)
 void
 garrow_orc_file_reader_set_field_indexes(GArrowORCFileReader *reader,
                                          const gint *field_indexes,
                                          guint n_field_indexes);
 #endif
+GARROW_AVAILABLE_IN_0_12
 void
 garrow_orc_file_reader_set_field_indices(GArrowORCFileReader *reader,
                                          const gint *field_indices,
                                          guint n_field_indices);
 #ifndef GARROW_DISABLE_DEPRECATED
-G_GNUC_DEPRECATED_FOR(garrow_orc_file_reader_get_field_indices)
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_orc_file_reader_get_field_indices)
 const gint *
 garrow_orc_file_reader_get_field_indexes(GArrowORCFileReader *reader,
                                          guint *n_field_indexes);
 #endif
+GARROW_AVAILABLE_IN_0_12
 const gint *
 garrow_orc_file_reader_get_field_indices(GArrowORCFileReader *reader,
                                          guint *n_field_indices);
diff --git a/c_glib/arrow-glib/reader.cpp b/c_glib/arrow-glib/reader.cpp
index c6c96670ba4b6..b4afde31406d3 100644
--- a/c_glib/arrow-glib/reader.cpp
+++ b/c_glib/arrow-glib/reader.cpp
@@ -645,9 +645,11 @@ garrow_feather_file_reader_new(GArrowSeekableInputStream *file,
  * garrow_feather_file_reader_get_description:
  * @reader: A #GArrowFeatherFileReader.
  *
- * Returns: (nullable): The description of the file if it exists,
+ * Returns: (nullable) (transfer full):
+ *   The description of the file if it exists,
  *   %NULL otherwise. You can confirm whether description exists or not by
  *   garrow_feather_file_reader_has_description().
+ *
  *   It should be freed with g_free() when no longer needed.
  *
  * Since: 0.4.0
@@ -730,7 +732,8 @@ garrow_feather_file_reader_get_n_columns(GArrowFeatherFileReader *reader)
  * @reader: A #GArrowFeatherFileReader.
  * @i: The index of the target column.
  *
- * Returns: The i-th column name in the file.
+ * Returns: (transfer full): The i-th column name in the file.
+ *
  *   It should be freed with g_free() when no longer needed.
  *
  * Since: 0.4.0
diff --git a/c_glib/arrow-glib/record-batch.cpp b/c_glib/arrow-glib/record-batch.cpp
index f905b065de6e3..04d442b409a8c 100644
--- a/c_glib/arrow-glib/record-batch.cpp
+++ b/c_glib/arrow-glib/record-batch.cpp
@@ -331,7 +331,8 @@ garrow_record_batch_slice(GArrowRecordBatch *record_batch,
  * @record_batch: A #GArrowRecordBatch.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: (nullable): The formatted record batch content or %NULL on error.
+ * Returns: (nullable) (transfer full):
+ *   The formatted record batch content or %NULL on error.
  *
  *   The returned string should be freed when with g_free() when no
  *   longer needed.
diff --git a/c_glib/arrow-glib/schema.cpp b/c_glib/arrow-glib/schema.cpp
index 1affaaede766b..1bbe82f9a3ca6 100644
--- a/c_glib/arrow-glib/schema.cpp
+++ b/c_glib/arrow-glib/schema.cpp
@@ -21,6 +21,7 @@
 #  include <config.h>
 #endif
 
+#include <arrow-glib/basic-data-type.hpp>
 #include <arrow-glib/error.hpp>
 #include <arrow-glib/field.hpp>
 #include <arrow-glib/schema.hpp>
@@ -173,7 +174,7 @@ garrow_schema_get_field(GArrowSchema *schema, guint i)
 {
   const auto arrow_schema = garrow_schema_get_raw(schema);
   auto arrow_field = arrow_schema->field(i);
-  return garrow_field_new_raw(&arrow_field);
+  return garrow_field_new_raw(&arrow_field, nullptr);
 }
 
 /**
@@ -192,7 +193,8 @@ garrow_schema_get_field_by_name(GArrowSchema *schema,
   if (arrow_field == nullptr) {
     return NULL;
   } else {
-    return garrow_field_new_raw(&arrow_field);
+    auto arrow_data_type = arrow_field->type();
+    return garrow_field_new_raw(&arrow_field, nullptr);
   }
 }
 
@@ -223,7 +225,7 @@ garrow_schema_get_fields(GArrowSchema *schema)
 
   GList *fields = NULL;
   for (auto arrow_field : arrow_schema->fields()) {
-    GArrowField *field = garrow_field_new_raw(&arrow_field);
+    auto field = garrow_field_new_raw(&arrow_field, nullptr);
     fields = g_list_prepend(fields, field);
   }
 
diff --git a/c_glib/arrow-glib/table-builder.cpp b/c_glib/arrow-glib/table-builder.cpp
index e87314bf52b9f..5e004a55d8a05 100644
--- a/c_glib/arrow-glib/table-builder.cpp
+++ b/c_glib/arrow-glib/table-builder.cpp
@@ -41,7 +41,7 @@ G_BEGIN_DECLS
 
 typedef struct GArrowRecordBatchBuilderPrivate_ {
   arrow::RecordBatchBuilder *record_batch_builder;
-  GPtrArray *fields;
+  GPtrArray *column_builders;
 } GArrowRecordBatchBuilderPrivate;
 
 enum {
@@ -63,13 +63,13 @@ garrow_record_batch_builder_constructed(GObject *object)
 {
   auto priv = GARROW_RECORD_BATCH_BUILDER_GET_PRIVATE(object);
   auto arrow_builder = priv->record_batch_builder;
-  auto n_fields = arrow_builder->num_fields();
-  priv->fields = g_ptr_array_new_full(n_fields, g_object_unref);
-  for (int i = 0; i < n_fields; ++i) {
+  auto n_columns = arrow_builder->num_fields();
+  priv->column_builders = g_ptr_array_new_full(n_columns, g_object_unref);
+  for (int i = 0; i < n_columns; ++i) {
     auto arrow_array_builder = arrow_builder->GetField(i);
     auto array_builder = garrow_array_builder_new_raw(arrow_array_builder);
     garrow_array_builder_release_ownership(array_builder);
-    g_ptr_array_add(priv->fields, array_builder);
+    g_ptr_array_add(priv->column_builders, array_builder);
   }
 
   G_OBJECT_CLASS(garrow_record_batch_builder_parent_class)->constructed(object);
@@ -80,7 +80,7 @@ garrow_record_batch_builder_finalize(GObject *object)
 {
   auto priv = GARROW_RECORD_BATCH_BUILDER_GET_PRIVATE(object);
 
-  g_ptr_array_free(priv->fields, TRUE);
+  g_ptr_array_free(priv->column_builders, TRUE);
   delete priv->record_batch_builder;
 
   G_OBJECT_CLASS(garrow_record_batch_builder_parent_class)->finalize(object);
@@ -223,9 +223,26 @@ garrow_record_batch_builder_get_schema(GArrowRecordBatchBuilder *builder)
  * Returns: The number of fields.
  *
  * Since: 0.8.0
+ *
+ * Deprecated: 0.13.0:
+ *   Use garrow_record_batch_builder_get_n_columns() instead.
  */
 gint
 garrow_record_batch_builder_get_n_fields(GArrowRecordBatchBuilder *builder)
+{
+  return garrow_record_batch_builder_get_n_columns(builder);
+}
+
+/**
+ * garrow_record_batch_builder_get_n_columns:
+ * @builder: A #GArrowRecordBatchBuilder.
+ *
+ * Returns: The number of columns.
+ *
+ * Since: 0.13.0
+ */
+gint
+garrow_record_batch_builder_get_n_columns(GArrowRecordBatchBuilder *builder)
 {
   auto arrow_builder = garrow_record_batch_builder_get_raw(builder);
   return arrow_builder->num_fields();
@@ -241,23 +258,44 @@ garrow_record_batch_builder_get_n_fields(GArrowRecordBatchBuilder *builder)
  *   the `i`-th field on success, %NULL on out of index.
  *
  * Since: 0.8.0
+ *
+ * Deprecated: 0.13.0:
+ *   Use garrow_record_batch_builder_get_column_builder() instead.
  */
 GArrowArrayBuilder *
 garrow_record_batch_builder_get_field(GArrowRecordBatchBuilder *builder,
                                       gint i)
+{
+  return garrow_record_batch_builder_get_column_builder(builder, i);
+}
+
+/**
+ * garrow_record_batch_builder_get_column_builder:
+ * @builder: A #GArrowRecordBatchBuilder.
+ * @i: The column index. If it's negative, index is counted backward
+ *   from the end of the columns. `-1` means the last column.
+ *
+ * Returns: (transfer none) (nullable): The #GArrowArrayBuilder for
+ *   the `i`-th column on success, %NULL on out of index.
+ *
+ * Since: 0.13.0
+ */
+GArrowArrayBuilder *
+garrow_record_batch_builder_get_column_builder(GArrowRecordBatchBuilder *builder,
+                                               gint i)
 {
   auto priv = GARROW_RECORD_BATCH_BUILDER_GET_PRIVATE(builder);
   if (i < 0) {
-    i += priv->fields->len;
+    i += priv->column_builders->len;
   }
   if (i < 0) {
     return NULL;
   }
-  if (static_cast<guint>(i) >= priv->fields->len) {
+  if (static_cast<guint>(i) >= priv->column_builders->len) {
     return NULL;
   }
 
-  return GARROW_ARRAY_BUILDER(g_ptr_array_index(priv->fields, i));
+  return GARROW_ARRAY_BUILDER(g_ptr_array_index(priv->column_builders, i));
 }
 
 /**
diff --git a/c_glib/arrow-glib/table-builder.h b/c_glib/arrow-glib/table-builder.h
index d05525e54f52e..a76793953c55d 100644
--- a/c_glib/arrow-glib/table-builder.h
+++ b/c_glib/arrow-glib/table-builder.h
@@ -45,9 +45,22 @@ void garrow_record_batch_builder_set_initial_capacity(GArrowRecordBatchBuilder *
                                                       gint64 capacity);
 GArrowSchema *garrow_record_batch_builder_get_schema(GArrowRecordBatchBuilder *builder);
 
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_13_FOR(garrow_record_batch_builder_get_n_columns)
 gint garrow_record_batch_builder_get_n_fields(GArrowRecordBatchBuilder *builder);
+#endif
+GARROW_AVAILABLE_IN_0_13
+gint
+garrow_record_batch_builder_get_n_columns(GArrowRecordBatchBuilder *builder);
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_13_FOR(garrow_record_batch_builder_get_column_builder)
 GArrowArrayBuilder *garrow_record_batch_builder_get_field(GArrowRecordBatchBuilder *builder,
                                                           gint i);
+#endif
+GARROW_AVAILABLE_IN_0_13
+GArrowArrayBuilder *
+garrow_record_batch_builder_get_column_builder(GArrowRecordBatchBuilder *builder,
+                                               gint i);
 
 GArrowRecordBatch *garrow_record_batch_builder_flush(GArrowRecordBatchBuilder *builder,
                                                      GError **error);
diff --git a/c_glib/arrow-glib/table.cpp b/c_glib/arrow-glib/table.cpp
index b4d0d2c6d862f..b889eb2c9da23 100644
--- a/c_glib/arrow-glib/table.cpp
+++ b/c_glib/arrow-glib/table.cpp
@@ -21,8 +21,10 @@
 #  include <config.h>
 #endif
 
+#include <arrow-glib/array.hpp>
 #include <arrow-glib/column.hpp>
 #include <arrow-glib/error.hpp>
+#include <arrow-glib/record-batch.hpp>
 #include <arrow-glib/schema.hpp>
 #include <arrow-glib/table.hpp>
 
@@ -133,22 +135,218 @@ garrow_table_class_init(GArrowTableClass *klass)
  * @columns: (element-type GArrowColumn): The columns of the table.
  *
  * Returns: A newly created #GArrowTable.
+ *
+ * Deprecated: 0.12.0: Use garrow_table_new_values() instead.
  */
 GArrowTable *
 garrow_table_new(GArrowSchema *schema,
                  GList *columns)
 {
+  auto arrow_schema = garrow_schema_get_raw(schema);
   std::vector<std::shared_ptr<arrow::Column>> arrow_columns;
   for (GList *node = columns; node; node = node->next) {
-    GArrowColumn *column = GARROW_COLUMN(node->data);
+    auto column = GARROW_COLUMN(node->data);
     arrow_columns.push_back(garrow_column_get_raw(column));
   }
 
-  auto arrow_table =
-    arrow::Table::Make(garrow_schema_get_raw(schema), arrow_columns);
+  auto arrow_table = arrow::Table::Make(arrow_schema, arrow_columns);
   return garrow_table_new_raw(&arrow_table);
 }
 
+/**
+ * garrow_table_new_values: (skip)
+ * @schema: The schema of the table.
+ * @values: The values of the table. All values must be instance of the
+ *   same class. Available classes are #GArrowColumn, #GArrowArray and
+ *   #GArrowRecordBatch.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GArrowTable or %NULL on error.
+ *
+ * Since: 0.12.0
+ */
+GArrowTable *
+garrow_table_new_values(GArrowSchema *schema,
+                        GList *values,
+                        GError **error)
+{
+  const auto context = "[table][new][values]";
+  auto arrow_schema = garrow_schema_get_raw(schema);
+  std::vector<std::shared_ptr<arrow::Column>> arrow_columns;
+  std::vector<std::shared_ptr<arrow::Array>> arrow_arrays;
+  std::vector<std::shared_ptr<arrow::RecordBatch>> arrow_record_batches;
+  for (GList *node = values; node; node = node->next) {
+    if (GARROW_IS_COLUMN(node->data)) {
+      auto column = GARROW_COLUMN(node->data);
+      arrow_columns.push_back(garrow_column_get_raw(column));
+    } else if (GARROW_IS_ARRAY(node->data)) {
+      auto array = GARROW_ARRAY(node->data);
+      arrow_arrays.push_back(garrow_array_get_raw(array));
+    } else if (GARROW_IS_RECORD_BATCH(node->data)) {
+      auto record_batch = GARROW_RECORD_BATCH(node->data);
+      arrow_record_batches.push_back(garrow_record_batch_get_raw(record_batch));
+    } else {
+      g_set_error(error,
+                  GARROW_ERROR,
+                  GARROW_ERROR_INVALID,
+                  "%s: %s",
+                  context,
+                  "value must be one of "
+                  "GArrowColumn, GArrowArray and GArrowRecordBatch");
+      return NULL;
+    }
+  }
+
+  size_t n_types = 0;
+  if (!arrow_columns.empty()) {
+    ++n_types;
+  }
+  if (!arrow_arrays.empty()) {
+    ++n_types;
+  }
+  if (!arrow_record_batches.empty()) {
+    ++n_types;
+  }
+  if (n_types > 1) {
+    g_set_error(error,
+                GARROW_ERROR,
+                GARROW_ERROR_INVALID,
+                "%s: %s",
+                context,
+                "all values must be the same objects of "
+                "GArrowColumn, GArrowArray or GArrowRecordBatch");
+    return NULL;
+  }
+
+  if (!arrow_columns.empty()) {
+    auto arrow_table = arrow::Table::Make(arrow_schema, arrow_columns);
+    auto status = arrow_table->Validate();
+    if (garrow_error_check(error, status, context)) {
+      return garrow_table_new_raw(&arrow_table);
+    } else {
+      return NULL;
+    }
+  } else if (!arrow_arrays.empty()) {
+    auto arrow_table = arrow::Table::Make(arrow_schema, arrow_arrays);
+    auto status = arrow_table->Validate();
+    if (garrow_error_check(error, status, context)) {
+      return garrow_table_new_raw(&arrow_table);
+    } else {
+      return NULL;
+    }
+  } else {
+    std::shared_ptr<arrow::Table> arrow_table;
+    auto status = arrow::Table::FromRecordBatches(arrow_schema,
+                                                  arrow_record_batches,
+                                                  &arrow_table);
+    if (garrow_error_check(error, status, context)) {
+      return garrow_table_new_raw(&arrow_table);
+    } else {
+      return NULL;
+    }
+  }
+}
+
+/**
+ * garrow_table_new_columns:
+ * @schema: The schema of the table.
+ * @columns: (array length=n_columns): The columns of the table.
+ * @n_columns: The number of columns.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GArrowTable or %NULL on error.
+ *
+ * Since: 0.12.0
+ */
+GArrowTable *
+garrow_table_new_columns(GArrowSchema *schema,
+                         GArrowColumn **columns,
+                         gsize n_columns,
+                         GError **error)
+{
+  auto arrow_schema = garrow_schema_get_raw(schema);
+  std::vector<std::shared_ptr<arrow::Column>> arrow_columns;
+  for (gsize i = 0; i < n_columns; ++i) {
+    arrow_columns.push_back(garrow_column_get_raw(columns[i]));
+  }
+
+  auto arrow_table = arrow::Table::Make(arrow_schema, arrow_columns);
+  auto status = arrow_table->Validate();
+  if (garrow_error_check(error, status, "[table][new][columns]")) {
+    return garrow_table_new_raw(&arrow_table);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * garrow_table_new_arrays:
+ * @schema: The schema of the table.
+ * @arrays: (array length=n_arrays): The arrays of the table.
+ * @n_arrays: The number of arrays.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GArrowTable or %NULL on error.
+ *
+ * Since: 0.12.0
+ */
+GArrowTable *
+garrow_table_new_arrays(GArrowSchema *schema,
+                        GArrowArray **arrays,
+                        gsize n_arrays,
+                        GError **error)
+{
+  auto arrow_schema = garrow_schema_get_raw(schema);
+  std::vector<std::shared_ptr<arrow::Array>> arrow_arrays;
+  for (gsize i = 0; i < n_arrays; ++i) {
+    arrow_arrays.push_back(garrow_array_get_raw(arrays[i]));
+  }
+
+  auto arrow_table = arrow::Table::Make(arrow_schema, arrow_arrays);
+  auto status = arrow_table->Validate();
+  if (garrow_error_check(error, status, "[table][new][arrays]")) {
+    return garrow_table_new_raw(&arrow_table);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * garrow_table_new_record_batches:
+ * @schema: The schema of the table.
+ * @record_batches: (array length=n_record_batches): The record batches
+ *   that have data for the table.
+ * @n_record_batches: The number of record batches.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GArrowTable or %NULL on error.
+ *
+ * Since: 0.12.0
+ */
+GArrowTable *
+garrow_table_new_record_batches(GArrowSchema *schema,
+                                GArrowRecordBatch **record_batches,
+                                gsize n_record_batches,
+                                GError **error)
+{
+  auto arrow_schema = garrow_schema_get_raw(schema);
+  std::vector<std::shared_ptr<arrow::RecordBatch>> arrow_record_batches;
+  for (gsize i = 0; i < n_record_batches; ++i) {
+    auto arrow_record_batch = garrow_record_batch_get_raw(record_batches[i]);
+    arrow_record_batches.push_back(arrow_record_batch);
+  }
+
+  std::shared_ptr<arrow::Table> arrow_table;
+  auto status = arrow::Table::FromRecordBatches(arrow_schema,
+                                                arrow_record_batches,
+                                                &arrow_table);
+  if (garrow_error_check(error, status, "[table][new][record-batches]")) {
+    return garrow_table_new_raw(&arrow_table);
+  } else {
+    return NULL;
+  }
+}
+
 /**
  * garrow_table_equal:
  * @table: A #GArrowTable.
@@ -313,7 +511,8 @@ garrow_table_replace_column(GArrowTable *table,
  * @table: A #GArrowTable.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: (nullable): The formatted table content or %NULL on error.
+ * Returns: (nullable) (transfer full):
+ *   The formatted table content or %NULL on error.
  *
  *   The returned string should be freed when with g_free() when no
  *   longer needed.
diff --git a/c_glib/arrow-glib/table.h b/c_glib/arrow-glib/table.h
index ef7b0f5c289ce..bde2535033c7d 100644
--- a/c_glib/arrow-glib/table.h
+++ b/c_glib/arrow-glib/table.h
@@ -20,7 +20,9 @@
 #pragma once
 
 #include <arrow-glib/column.h>
+#include <arrow-glib/record-batch.h>
 #include <arrow-glib/schema.h>
+#include <arrow-glib/version.h>
 
 G_BEGIN_DECLS
 
@@ -35,8 +37,35 @@ struct _GArrowTableClass
   GObjectClass parent_class;
 };
 
-GArrowTable    *garrow_table_new           (GArrowSchema *schema,
-                                            GList *columns);
+#ifndef GARROW_DISABLE_DEPRECATED
+GARROW_DEPRECATED_IN_0_12_FOR(garrow_table_new_values)
+GArrowTable *
+garrow_table_new(GArrowSchema *schema,
+                 GList *columns);
+#endif
+GARROW_AVAILABLE_IN_0_12
+GArrowTable *
+garrow_table_new_values(GArrowSchema *schema,
+                        GList *values,
+                        GError **error);
+GARROW_AVAILABLE_IN_0_12
+GArrowTable *
+garrow_table_new_columns(GArrowSchema *schema,
+                         GArrowColumn **columns,
+                         gsize n_columns,
+                         GError **error);
+GARROW_AVAILABLE_IN_0_12
+GArrowTable *
+garrow_table_new_arrays(GArrowSchema *schema,
+                        GArrowArray **arrays,
+                        gsize n_arrays,
+                        GError **error);
+GARROW_AVAILABLE_IN_0_12
+GArrowTable *
+garrow_table_new_record_batches(GArrowSchema *schema,
+                                GArrowRecordBatch **record_batches,
+                                gsize n_record_batches,
+                                GError **error);
 
 gboolean        garrow_table_equal         (GArrowTable *table,
                                             GArrowTable *other_table);
diff --git a/c_glib/arrow-glib/tensor.cpp b/c_glib/arrow-glib/tensor.cpp
index ff2683de4ed09..46ae7beec2675 100644
--- a/c_glib/arrow-glib/tensor.cpp
+++ b/c_glib/arrow-glib/tensor.cpp
@@ -281,7 +281,9 @@ garrow_tensor_get_buffer(GArrowTensor *tensor)
  * @tensor: A #GArrowTensor.
  * @n_dimensions: (out): The number of dimensions.
  *
- * Returns: (array length=n_dimensions): The shape of the tensor.
+ * Returns: (array length=n_dimensions) (transfer full):
+ *   The shape of the tensor.
+ *
  *   It should be freed with g_free() when no longer needed.
  *
  * Since: 0.3.0
@@ -306,7 +308,9 @@ garrow_tensor_get_shape(GArrowTensor *tensor, gint *n_dimensions)
  * @tensor: A #GArrowTensor.
  * @n_strides: (out): The number of strides.
  *
- * Returns: (array length=n_strides): The strides of the tensor.
+ * Returns: (array length=n_strides) (transfer full):
+ *   The strides of the tensor.
+ *
  *   It should be freed with g_free() when no longer needed.
  *
  * Since: 0.3.0
diff --git a/c_glib/arrow-glib/type.cpp b/c_glib/arrow-glib/type.cpp
index 0642004e2f07b..e227ed2c31fc8 100644
--- a/c_glib/arrow-glib/type.cpp
+++ b/c_glib/arrow-glib/type.cpp
@@ -66,6 +66,8 @@ garrow_type_from_raw(arrow::Type::type type)
     return GARROW_TYPE_STRING;
   case arrow::Type::type::BINARY:
     return GARROW_TYPE_BINARY;
+  case arrow::Type::type::FIXED_SIZE_BINARY:
+    return GARROW_TYPE_FIXED_SIZE_BINARY;
   case arrow::Type::type::DATE32:
     return GARROW_TYPE_DATE32;
   case arrow::Type::type::DATE64:
diff --git a/c_glib/arrow-glib/type.h b/c_glib/arrow-glib/type.h
index 2137c785515f8..85f55c452be55 100644
--- a/c_glib/arrow-glib/type.h
+++ b/c_glib/arrow-glib/type.h
@@ -40,6 +40,8 @@ G_BEGIN_DECLS
  * @GARROW_TYPE_DOUBLE: 8-byte floating point value.
  * @GARROW_TYPE_STRING: UTF-8 variable-length string.
  * @GARROW_TYPE_BINARY: Variable-length bytes (no guarantee of UTF-8-ness).
+ * @GARROW_TYPE_FIXED_SIZE_BINARY: Fixed-size binary. Each value occupies
+ *   the same number of bytes.
  * @GARROW_TYPE_DATE32: int32 days since the UNIX epoch.
  * @GARROW_TYPE_DATE64: int64 milliseconds since the UNIX epoch.
  * @GARROW_TYPE_TIMESTAMP: Exact timestamp encoded with int64 since UNIX epoch.
@@ -72,6 +74,7 @@ typedef enum {
   GARROW_TYPE_DOUBLE,
   GARROW_TYPE_STRING,
   GARROW_TYPE_BINARY,
+  GARROW_TYPE_FIXED_SIZE_BINARY,
   GARROW_TYPE_DATE32,
   GARROW_TYPE_DATE64,
   GARROW_TYPE_TIMESTAMP,
diff --git a/c_glib/arrow-glib/version.h.in b/c_glib/arrow-glib/version.h.in
index eb734250e2352..827b9c9a813b8 100644
--- a/c_glib/arrow-glib/version.h.in
+++ b/c_glib/arrow-glib/version.h.in
@@ -110,6 +110,24 @@
 #  define GARROW_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor)
 #endif
 
+/**
+ * GARROW_VERSION_0_13:
+ *
+ * You can use this macro value for compile time API version check.
+ *
+ * Since: 0.13.0
+ */
+#define GARROW_VERSION_0_13 G_ENCODE_VERSION(0, 13)
+
+/**
+ * GARROW_VERSION_0_12:
+ *
+ * You can use this macro value for compile time API version check.
+ *
+ * Since: 0.12.0
+ */
+#define GARROW_VERSION_0_12 G_ENCODE_VERSION(0, 12)
+
 /**
  * GARROW_VERSION_0_10:
  *
@@ -166,6 +184,34 @@
 
 #define GARROW_AVAILABLE_IN_ALL
 
+#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_13
+#  define GARROW_DEPRECATED_IN_0_13               GARROW_DEPRECATED
+#  define GARROW_DEPRECATED_IN_0_13_FOR(function) GARROW_DEPRECATED_FOR(function)
+#else
+#  define GARROW_DEPRECATED_IN_0_13
+#  define GARROW_DEPRECATED_IN_0_13_FOR(function)
+#endif
+
+#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_13
+#  define GARROW_AVAILABLE_IN_0_13 GARROW_UNAVAILABLE(0, 13)
+#else
+#  define GARROW_AVAILABLE_IN_0_13
+#endif
+
+#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_12
+#  define GARROW_DEPRECATED_IN_0_12               GARROW_DEPRECATED
+#  define GARROW_DEPRECATED_IN_0_12_FOR(function) GARROW_DEPRECATED_FOR(function)
+#else
+#  define GARROW_DEPRECATED_IN_0_12
+#  define GARROW_DEPRECATED_IN_0_12_FOR(function)
+#endif
+
+#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_12
+#  define GARROW_AVAILABLE_IN_0_12 GARROW_UNAVAILABLE(0, 12)
+#else
+#  define GARROW_AVAILABLE_IN_0_12
+#endif
+
 #if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_10
 #  define GARROW_DEPRECATED_IN_0_10               GARROW_DEPRECATED
 #  define GARROW_DEPRECATED_IN_0_10_FOR(function) GARROW_DEPRECATED_FOR(function)
diff --git a/c_glib/arrow-gpu-glib/cuda.cpp b/c_glib/arrow-gpu-glib/cuda.cpp
deleted file mode 100644
index 6d2e48f351e95..0000000000000
--- a/c_glib/arrow-gpu-glib/cuda.cpp
+++ /dev/null
@@ -1,942 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifdef HAVE_CONFIG_H
-#  include <config.h>
-#endif
-
-#include <arrow-glib/buffer.hpp>
-#include <arrow-glib/error.hpp>
-#include <arrow-glib/input-stream.hpp>
-#include <arrow-glib/output-stream.hpp>
-#include <arrow-glib/readable.hpp>
-#include <arrow-glib/record-batch.hpp>
-#include <arrow-glib/schema.hpp>
-
-#include <arrow-gpu-glib/cuda.hpp>
-
-G_BEGIN_DECLS
-
-/**
- * SECTION: cuda
- * @section_id: cuda-classes
- * @title: CUDA related classes
- * @include: arrow-gpu-glib/arrow-gpu-glib.h
- *
- * The following classes provide CUDA support for Apache Arrow data.
- *
- * #GArrowGPUCUDADeviceManager is the starting point. You need at
- * least one #GArrowGPUCUDAContext to process Apache Arrow data on
- * NVIDIA GPU.
- *
- * #GArrowGPUCUDAContext is a class to keep context for one GPU. You
- * need to create #GArrowGPUCUDAContext for each GPU that you want to
- * use. You can create #GArrowGPUCUDAContext by
- * garrow_gpu_cuda_device_manager_get_context().
- *
- * #GArrowGPUCUDABuffer is a class for data on GPU. You can copy data
- * on GPU to/from CPU by garrow_gpu_cuda_buffer_copy_to_host() and
- * garrow_gpu_cuda_buffer_copy_from_host(). You can share data on GPU
- * with other processes by garrow_gpu_cuda_buffer_export() and
- * garrow_gpu_cuda_buffer_new_ipc().
- *
- * #GArrowGPUCUDAHostBuffer is a class for data on CPU that is
- * directly accessible from GPU.
- *
- * #GArrowGPUCUDAIPCMemoryHandle is a class to share data on GPU with
- * other processes. You can export your data on GPU to other processes
- * by garrow_gpu_cuda_buffer_export() and
- * garrow_gpu_cuda_ipc_memory_handle_new(). You can import other
- * process data on GPU by garrow_gpu_cuda_ipc_memory_handle_new() and
- * garrow_gpu_cuda_buffer_new_ipc().
- *
- * #GArrowGPUCUDABufferInputStream is a class to read data in
- * #GArrowGPUCUDABuffer.
- *
- * #GArrowGPUCUDABufferOutputStream is a class to write data into
- * #GArrowGPUCUDABuffer.
- */
-
-G_DEFINE_TYPE(GArrowGPUCUDADeviceManager,
-              garrow_gpu_cuda_device_manager,
-              G_TYPE_OBJECT)
-
-static void
-garrow_gpu_cuda_device_manager_init(GArrowGPUCUDADeviceManager *object)
-{
-}
-
-static void
-garrow_gpu_cuda_device_manager_class_init(GArrowGPUCUDADeviceManagerClass *klass)
-{
-}
-
-/**
- * garrow_gpu_cuda_device_manager_new:
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: A newly created #GArrowGPUCUDADeviceManager on success,
- *   %NULL on error.
- *
- * Since: 0.8.0
- */
-GArrowGPUCUDADeviceManager *
-garrow_gpu_cuda_device_manager_new(GError **error)
-{
-  arrow::gpu::CudaDeviceManager *manager;
-  auto status = arrow::gpu::CudaDeviceManager::GetInstance(&manager);
-  if (garrow_error_check(error, status, "[gpu][cuda][device-manager][new]")) {
-    auto manager = g_object_new(GARROW_GPU_TYPE_CUDA_DEVICE_MANAGER,
-                                NULL);
-    return GARROW_GPU_CUDA_DEVICE_MANAGER(manager);
-  } else {
-    return NULL;
-  }
-}
-
-/**
- * garrow_gpu_cuda_device_manager_get_context:
- * @manager: A #GArrowGPUCUDADeviceManager.
- * @gpu_number: A GPU device number for the target context.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (transfer full): A newly created #GArrowGPUCUDAContext on
- *   success, %NULL on error. Contexts for the same GPU device number
- *   share the same data internally.
- *
- * Since: 0.8.0
- */
-GArrowGPUCUDAContext *
-garrow_gpu_cuda_device_manager_get_context(GArrowGPUCUDADeviceManager *manager,
-                                           gint gpu_number,
-                                           GError **error)
-{
-  arrow::gpu::CudaDeviceManager *arrow_manager;
-  arrow::gpu::CudaDeviceManager::GetInstance(&arrow_manager);
-  std::shared_ptr<arrow::gpu::CudaContext> context;
-  auto status = arrow_manager->GetContext(gpu_number, &context);
-  if (garrow_error_check(error, status,
-                         "[gpu][cuda][device-manager][get-context]]")) {
-    return garrow_gpu_cuda_context_new_raw(&context);
-  } else {
-    return NULL;
-  }
-}
-
-/**
- * garrow_gpu_cuda_device_manager_get_n_devices:
- * @manager: A #GArrowGPUCUDADeviceManager.
- *
- * Returns: The number of GPU devices.
- *
- * Since: 0.8.0
- */
-gsize
-garrow_gpu_cuda_device_manager_get_n_devices(GArrowGPUCUDADeviceManager *manager)
-{
-  arrow::gpu::CudaDeviceManager *arrow_manager;
-  arrow::gpu::CudaDeviceManager::GetInstance(&arrow_manager);
-  return arrow_manager->num_devices();
-}
-
-
-typedef struct GArrowGPUCUDAContextPrivate_ {
-  std::shared_ptr<arrow::gpu::CudaContext> context;
-} GArrowGPUCUDAContextPrivate;
-
-enum {
-  PROP_CONTEXT = 1
-};
-
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowGPUCUDAContext,
-                           garrow_gpu_cuda_context,
-                           G_TYPE_OBJECT)
-
-#define GARROW_GPU_CUDA_CONTEXT_GET_PRIVATE(object)     \
-  static_cast<GArrowGPUCUDAContextPrivate *>(           \
-    garrow_gpu_cuda_context_get_instance_private(       \
-      GARROW_GPU_CUDA_CONTEXT(object)))
-
-static void
-garrow_gpu_cuda_context_finalize(GObject *object)
-{
-  auto priv = GARROW_GPU_CUDA_CONTEXT_GET_PRIVATE(object);
-
-  priv->context = nullptr;
-
-  G_OBJECT_CLASS(garrow_gpu_cuda_context_parent_class)->finalize(object);
-}
-
-static void
-garrow_gpu_cuda_context_set_property(GObject *object,
-                                     guint prop_id,
-                                     const GValue *value,
-                                     GParamSpec *pspec)
-{
-  auto priv = GARROW_GPU_CUDA_CONTEXT_GET_PRIVATE(object);
-
-  switch (prop_id) {
-  case PROP_CONTEXT:
-    priv->context =
-      *static_cast<std::shared_ptr<arrow::gpu::CudaContext> *>(g_value_get_pointer(value));
-    break;
-  default:
-    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
-    break;
-  }
-}
-
-static void
-garrow_gpu_cuda_context_get_property(GObject *object,
-                                     guint prop_id,
-                                     GValue *value,
-                                     GParamSpec *pspec)
-{
-  switch (prop_id) {
-  default:
-    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
-    break;
-  }
-}
-
-static void
-garrow_gpu_cuda_context_init(GArrowGPUCUDAContext *object)
-{
-}
-
-static void
-garrow_gpu_cuda_context_class_init(GArrowGPUCUDAContextClass *klass)
-{
-  GParamSpec *spec;
-
-  auto gobject_class = G_OBJECT_CLASS(klass);
-
-  gobject_class->finalize     = garrow_gpu_cuda_context_finalize;
-  gobject_class->set_property = garrow_gpu_cuda_context_set_property;
-  gobject_class->get_property = garrow_gpu_cuda_context_get_property;
-
-  /**
-   * GArrowGPUCUDAContext:context:
-   *
-   * Since: 0.8.0
-   */
-  spec = g_param_spec_pointer("context",
-                              "Context",
-                              "The raw std::shared_ptr<arrow::gpu::CudaContext>",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_CONTEXT, spec);
-}
-
-/**
- * garrow_gpu_cuda_context_get_allocated_size:
- * @context: A #GArrowGPUCUDAContext.
- *
- * Returns: The allocated memory by this context in bytes.
- *
- * Since: 0.8.0
- */
-gint64
-garrow_gpu_cuda_context_get_allocated_size(GArrowGPUCUDAContext *context)
-{
-  auto arrow_context = garrow_gpu_cuda_context_get_raw(context);
-  return arrow_context->bytes_allocated();
-}
-
-
-G_DEFINE_TYPE(GArrowGPUCUDABuffer,
-              garrow_gpu_cuda_buffer,
-              GARROW_TYPE_BUFFER)
-
-static void
-garrow_gpu_cuda_buffer_init(GArrowGPUCUDABuffer *object)
-{
-}
-
-static void
-garrow_gpu_cuda_buffer_class_init(GArrowGPUCUDABufferClass *klass)
-{
-}
-
-/**
- * garrow_gpu_cuda_buffer_new:
- * @context: A #GArrowGPUCUDAContext.
- * @size: The number of bytes to be allocated on GPU device for this context.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (transfer full): A newly created #GArrowGPUCUDABuffer on
- *   success, %NULL on error.
- *
- * Since: 0.8.0
- */
-GArrowGPUCUDABuffer *
-garrow_gpu_cuda_buffer_new(GArrowGPUCUDAContext *context,
-                           gint64 size,
-                           GError **error)
-{
-  auto arrow_context = garrow_gpu_cuda_context_get_raw(context);
-  std::shared_ptr<arrow::gpu::CudaBuffer> arrow_buffer;
-  auto status = arrow_context->Allocate(size, &arrow_buffer);
-  if (garrow_error_check(error, status, "[gpu][cuda][buffer][new]")) {
-    return garrow_gpu_cuda_buffer_new_raw(&arrow_buffer);
-  } else {
-    return NULL;
-  }
-}
-
-/**
- * garrow_gpu_cuda_buffer_new_ipc:
- * @context: A #GArrowGPUCUDAContext.
- * @handle: A #GArrowGPUCUDAIPCMemoryHandle to be communicated.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (transfer full): A newly created #GArrowGPUCUDABuffer on
- *   success, %NULL on error. The buffer has data from the IPC target.
- *
- * Since: 0.8.0
- */
-GArrowGPUCUDABuffer *
-garrow_gpu_cuda_buffer_new_ipc(GArrowGPUCUDAContext *context,
-                               GArrowGPUCUDAIPCMemoryHandle *handle,
-                               GError **error)
-{
-  auto arrow_context = garrow_gpu_cuda_context_get_raw(context);
-  auto arrow_handle = garrow_gpu_cuda_ipc_memory_handle_get_raw(handle);
-  std::shared_ptr<arrow::gpu::CudaBuffer> arrow_buffer;
-  auto status = arrow_context->OpenIpcBuffer(*arrow_handle, &arrow_buffer);
-  if (garrow_error_check(error, status,
-                         "[gpu][cuda][buffer][new-ipc]")) {
-    return garrow_gpu_cuda_buffer_new_raw(&arrow_buffer);
-  } else {
-    return NULL;
-  }
-}
-
-/**
- * garrow_gpu_cuda_buffer_new_record_batch:
- * @context: A #GArrowGPUCUDAContext.
- * @record_batch: A #GArrowRecordBatch to be serialized.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (transfer full): A newly created #GArrowGPUCUDABuffer on
- *   success, %NULL on error. The buffer has serialized record batch
- *   data.
- *
- * Since: 0.8.0
- */
-GArrowGPUCUDABuffer *
-garrow_gpu_cuda_buffer_new_record_batch(GArrowGPUCUDAContext *context,
-                                        GArrowRecordBatch *record_batch,
-                                        GError **error)
-{
-  auto arrow_context = garrow_gpu_cuda_context_get_raw(context);
-  auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
-  std::shared_ptr<arrow::gpu::CudaBuffer> arrow_buffer;
-  auto status = arrow::gpu::SerializeRecordBatch(*arrow_record_batch,
-                                                 arrow_context.get(),
-                                                 &arrow_buffer);
-  if (garrow_error_check(error, status,
-                         "[gpu][cuda][buffer][new-record-batch]")) {
-    return garrow_gpu_cuda_buffer_new_raw(&arrow_buffer);
-  } else {
-    return NULL;
-  }
-}
-
-/**
- * garrow_gpu_cuda_buffer_copy_to_host:
- * @buffer: A #GArrowGPUCUDABuffer.
- * @position: The offset of memory on GPU device to be copied.
- * @size: The size of memory on GPU device to be copied in bytes.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (transfer full): A #GBytes that have copied memory on CPU
- *   host on success, %NULL on error.
- *
- * Since: 0.8.0
- */
-GBytes *
-garrow_gpu_cuda_buffer_copy_to_host(GArrowGPUCUDABuffer *buffer,
-                                    gint64 position,
-                                    gint64 size,
-                                    GError **error)
-{
-  auto arrow_buffer = garrow_gpu_cuda_buffer_get_raw(buffer);
-  auto data = static_cast<uint8_t *>(g_malloc(size));
-  auto status = arrow_buffer->CopyToHost(position, size, data);
-  if (garrow_error_check(error, status, "[gpu][cuda][buffer][copy-to-host]")) {
-    return g_bytes_new_take(data, size);
-  } else {
-    g_free(data);
-    return NULL;
-  }
-}
-
-/**
- * garrow_gpu_cuda_buffer_copy_from_host:
- * @buffer: A #GArrowGPUCUDABuffer.
- * @data: (array length=size): Data on CPU host to be copied.
- * @size: The size of data on CPU host to be copied in bytes.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: %TRUE on success, %FALSE if there was an error.
- *
- * Since: 0.8.0
- */
-gboolean
-garrow_gpu_cuda_buffer_copy_from_host(GArrowGPUCUDABuffer *buffer,
-                                      const guint8 *data,
-                                      gint64 size,
-                                      GError **error)
-{
-  auto arrow_buffer = garrow_gpu_cuda_buffer_get_raw(buffer);
-  auto status = arrow_buffer->CopyFromHost(0, data, size);
-  return garrow_error_check(error,
-                            status,
-                            "[gpu][cuda][buffer][copy-from-host]");
-}
-
-/**
- * garrow_gpu_cuda_buffer_export:
- * @buffer: A #GArrowGPUCUDABuffer.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (transfer full): A newly created
- *   #GArrowGPUCUDAIPCMemoryHandle to handle the exported buffer on
- *   success, %NULL on error
- *
- * Since: 0.8.0
- */
-GArrowGPUCUDAIPCMemoryHandle *
-garrow_gpu_cuda_buffer_export(GArrowGPUCUDABuffer *buffer, GError **error)
-{
-  auto arrow_buffer = garrow_gpu_cuda_buffer_get_raw(buffer);
-  std::shared_ptr<arrow::gpu::CudaIpcMemHandle> arrow_handle;
-  auto status = arrow_buffer->ExportForIpc(&arrow_handle);
-  if (garrow_error_check(error, status, "[gpu][cuda][buffer][export-for-ipc]")) {
-    return garrow_gpu_cuda_ipc_memory_handle_new_raw(&arrow_handle);
-  } else {
-    return NULL;
-  }
-}
-
-/**
- * garrow_gpu_cuda_buffer_get_context:
- * @buffer: A #GArrowGPUCUDABuffer.
- *
- * Returns: (transfer full): A newly created #GArrowGPUCUDAContext for the
- *   buffer. Contexts for the same buffer share the same data internally.
- *
- * Since: 0.8.0
- */
-GArrowGPUCUDAContext *
-garrow_gpu_cuda_buffer_get_context(GArrowGPUCUDABuffer *buffer)
-{
-  auto arrow_buffer = garrow_gpu_cuda_buffer_get_raw(buffer);
-  auto arrow_context = arrow_buffer->context();
-  return garrow_gpu_cuda_context_new_raw(&arrow_context);
-}
-
-/**
- * garrow_gpu_cuda_buffer_read_record_batch:
- * @buffer: A #GArrowGPUCUDABuffer.
- * @schema: A #GArrowSchema for record batch.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (transfer full): A newly created #GArrowRecordBatch on
- *   success, %NULL on error. The record batch data is located on GPU.
- *
- * Since: 0.8.0
- */
-GArrowRecordBatch *
-garrow_gpu_cuda_buffer_read_record_batch(GArrowGPUCUDABuffer *buffer,
-                                         GArrowSchema *schema,
-                                         GError **error)
-{
-  auto arrow_buffer = garrow_gpu_cuda_buffer_get_raw(buffer);
-  auto arrow_schema = garrow_schema_get_raw(schema);
-  auto pool = arrow::default_memory_pool();
-  std::shared_ptr<arrow::RecordBatch> arrow_record_batch;
-  auto status = arrow::gpu::ReadRecordBatch(arrow_schema,
-                                            arrow_buffer,
-                                            pool,
-                                            &arrow_record_batch);
-  if (garrow_error_check(error, status,
-                         "[gpu][cuda][buffer][read-record-batch]")) {
-    return garrow_record_batch_new_raw(&arrow_record_batch);
-  } else {
-    return NULL;
-  }
-}
-
-
-G_DEFINE_TYPE(GArrowGPUCUDAHostBuffer,
-              garrow_gpu_cuda_host_buffer,
-              GARROW_TYPE_MUTABLE_BUFFER)
-
-static void
-garrow_gpu_cuda_host_buffer_init(GArrowGPUCUDAHostBuffer *object)
-{
-}
-
-static void
-garrow_gpu_cuda_host_buffer_class_init(GArrowGPUCUDAHostBufferClass *klass)
-{
-}
-
-/**
- * garrow_gpu_cuda_host_buffer_new:
- * @gpu_number: A GPU device number for the target context.
- * @size: The number of bytes to be allocated on CPU host.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: A newly created #GArrowGPUCUDAHostBuffer on success,
- *   %NULL on error. The allocated memory is accessible from GPU
- *   device for the @context.
- *
- * Since: 0.8.0
- */
-GArrowGPUCUDAHostBuffer *
-garrow_gpu_cuda_host_buffer_new(gint gpu_number, gint64 size, GError **error)
-{
-  arrow::gpu::CudaDeviceManager *manager;
-  auto status = arrow::gpu::CudaDeviceManager::GetInstance(&manager);
-  std::shared_ptr<arrow::gpu::CudaHostBuffer> arrow_buffer;
-  status = manager->AllocateHost(gpu_number, size, &arrow_buffer);
-  if (garrow_error_check(error, status, "[gpu][cuda][host-buffer][new]")) {
-    return garrow_gpu_cuda_host_buffer_new_raw(&arrow_buffer);
-  } else {
-    return NULL;
-  }
-}
-
-
-typedef struct GArrowGPUCUDAIPCMemoryHandlePrivate_ {
-  std::shared_ptr<arrow::gpu::CudaIpcMemHandle> ipc_memory_handle;
-} GArrowGPUCUDAIPCMemoryHandlePrivate;
-
-enum {
-  PROP_IPC_MEMORY_HANDLE = 1
-};
-
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowGPUCUDAIPCMemoryHandle,
-                           garrow_gpu_cuda_ipc_memory_handle,
-                           G_TYPE_OBJECT)
-
-#define GARROW_GPU_CUDA_IPC_MEMORY_HANDLE_GET_PRIVATE(object)   \
-  static_cast<GArrowGPUCUDAIPCMemoryHandlePrivate *>(           \
-    garrow_gpu_cuda_ipc_memory_handle_get_instance_private(     \
-      GARROW_GPU_CUDA_IPC_MEMORY_HANDLE(object)))
-
-static void
-garrow_gpu_cuda_ipc_memory_handle_finalize(GObject *object)
-{
-  auto priv = GARROW_GPU_CUDA_IPC_MEMORY_HANDLE_GET_PRIVATE(object);
-
-  priv->ipc_memory_handle = nullptr;
-
-  G_OBJECT_CLASS(garrow_gpu_cuda_ipc_memory_handle_parent_class)->finalize(object);
-}
-
-static void
-garrow_gpu_cuda_ipc_memory_handle_set_property(GObject *object,
-                                               guint prop_id,
-                                               const GValue *value,
-                                               GParamSpec *pspec)
-{
-  auto priv = GARROW_GPU_CUDA_IPC_MEMORY_HANDLE_GET_PRIVATE(object);
-
-  switch (prop_id) {
-  case PROP_IPC_MEMORY_HANDLE:
-    priv->ipc_memory_handle =
-      *static_cast<std::shared_ptr<arrow::gpu::CudaIpcMemHandle> *>(g_value_get_pointer(value));
-    break;
-  default:
-    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
-    break;
-  }
-}
-
-static void
-garrow_gpu_cuda_ipc_memory_handle_get_property(GObject *object,
-                                               guint prop_id,
-                                               GValue *value,
-                                               GParamSpec *pspec)
-{
-  switch (prop_id) {
-  default:
-    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
-    break;
-  }
-}
-
-static void
-garrow_gpu_cuda_ipc_memory_handle_init(GArrowGPUCUDAIPCMemoryHandle *object)
-{
-}
-
-static void
-garrow_gpu_cuda_ipc_memory_handle_class_init(GArrowGPUCUDAIPCMemoryHandleClass *klass)
-{
-  GParamSpec *spec;
-
-  auto gobject_class = G_OBJECT_CLASS(klass);
-
-  gobject_class->finalize     = garrow_gpu_cuda_ipc_memory_handle_finalize;
-  gobject_class->set_property = garrow_gpu_cuda_ipc_memory_handle_set_property;
-  gobject_class->get_property = garrow_gpu_cuda_ipc_memory_handle_get_property;
-
-  /**
-   * GArrowGPUCUDAIPCMemoryHandle:ipc-memory-handle:
-   *
-   * Since: 0.8.0
-   */
-  spec = g_param_spec_pointer("ipc-memory-handle",
-                              "IPC Memory Handle",
-                              "The raw std::shared_ptr<arrow::gpu::CudaIpcMemHandle>",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_IPC_MEMORY_HANDLE, spec);
-}
-
-/**
- * garrow_gpu_cuda_ipc_memory_handle_new:
- * @data: (array length=size): A serialized #GArrowGPUCUDAIPCMemoryHandle.
- * @size: The size of data.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (transfer full): A newly created #GArrowGPUCUDAIPCMemoryHandle
- *   on success, %NULL on error.
- *
- * Since: 0.8.0
- */
-GArrowGPUCUDAIPCMemoryHandle *
-garrow_gpu_cuda_ipc_memory_handle_new(const guint8 *data,
-                                      gsize size,
-                                      GError **error)
-{
-  std::shared_ptr<arrow::gpu::CudaIpcMemHandle> arrow_handle;
-  auto status = arrow::gpu::CudaIpcMemHandle::FromBuffer(data, &arrow_handle);
-  if (garrow_error_check(error, status,
-                         "[gpu][cuda][ipc-memory-handle][new]")) {
-    return garrow_gpu_cuda_ipc_memory_handle_new_raw(&arrow_handle);
-  } else {
-    return NULL;
-  }
-}
-
-/**
- * garrow_gpu_cuda_ipc_memory_handle_serialize:
- * @handle: A #GArrowGPUCUDAIPCMemoryHandle.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (transfer full): A newly created #GArrowBuffer on success,
- *   %NULL on error. The buffer has serialized @handle. The serialized
- *   @handle can be deserialized by garrow_gpu_cuda_ipc_memory_handle_new()
- *   in other process.
- *
- * Since: 0.8.0
- */
-GArrowBuffer *
-garrow_gpu_cuda_ipc_memory_handle_serialize(GArrowGPUCUDAIPCMemoryHandle *handle,
-                                            GError **error)
-{
-  auto arrow_handle = garrow_gpu_cuda_ipc_memory_handle_get_raw(handle);
-  std::shared_ptr<arrow::Buffer> arrow_buffer;
-  auto status = arrow_handle->Serialize(arrow::default_memory_pool(),
-                                        &arrow_buffer);
-  if (garrow_error_check(error, status,
-                         "[gpu][cuda][ipc-memory-handle][serialize]")) {
-    return garrow_buffer_new_raw(&arrow_buffer);
-  } else {
-    return NULL;
-  }
-}
-
-GArrowBuffer *
-garrow_gpu_cuda_buffer_input_stream_new_raw_readable_interface(std::shared_ptr<arrow::Buffer> *arrow_buffer)
-{
-  auto buffer = GARROW_BUFFER(g_object_new(GARROW_GPU_TYPE_CUDA_BUFFER,
-                                           "buffer", arrow_buffer,
-                                           NULL));
-  return buffer;
-}
-
-static std::shared_ptr<arrow::io::Readable>
-garrow_gpu_cuda_buffer_input_stream_get_raw_readable_interface(GArrowReadable *readable)
-{
-  auto input_stream = GARROW_INPUT_STREAM(readable);
-  auto arrow_input_stream = garrow_input_stream_get_raw(input_stream);
-  return arrow_input_stream;
-}
-
-static void
-garrow_gpu_cuda_buffer_input_stream_readable_interface_init(GArrowReadableInterface *iface)
-{
-  iface->new_raw =
-    garrow_gpu_cuda_buffer_input_stream_new_raw_readable_interface;
-  iface->get_raw =
-    garrow_gpu_cuda_buffer_input_stream_get_raw_readable_interface;
-}
-
-G_DEFINE_TYPE_WITH_CODE(
-  GArrowGPUCUDABufferInputStream,
-  garrow_gpu_cuda_buffer_input_stream,
-  GARROW_TYPE_BUFFER_INPUT_STREAM,
-  G_IMPLEMENT_INTERFACE(
-    GARROW_TYPE_READABLE,
-    garrow_gpu_cuda_buffer_input_stream_readable_interface_init))
-
-static void
-garrow_gpu_cuda_buffer_input_stream_init(GArrowGPUCUDABufferInputStream *object)
-{
-}
-
-static void
-garrow_gpu_cuda_buffer_input_stream_class_init(GArrowGPUCUDABufferInputStreamClass *klass)
-{
-}
-
-/**
- * garrow_gpu_cuda_buffer_input_stream_new:
- * @buffer: A #GArrowGPUCUDABuffer.
- *
- * Returns: (transfer full): A newly created
- *   #GArrowGPUCUDABufferInputStream.
- *
- * Since: 0.8.0
- */
-GArrowGPUCUDABufferInputStream *
-garrow_gpu_cuda_buffer_input_stream_new(GArrowGPUCUDABuffer *buffer)
-{
-  auto arrow_buffer = garrow_gpu_cuda_buffer_get_raw(buffer);
-  auto arrow_reader =
-    std::make_shared<arrow::gpu::CudaBufferReader>(arrow_buffer);
-  return garrow_gpu_cuda_buffer_input_stream_new_raw(&arrow_reader);
-}
-
-
-G_DEFINE_TYPE(GArrowGPUCUDABufferOutputStream,
-              garrow_gpu_cuda_buffer_output_stream,
-              GARROW_TYPE_OUTPUT_STREAM)
-
-static void
-garrow_gpu_cuda_buffer_output_stream_init(GArrowGPUCUDABufferOutputStream *object)
-{
-}
-
-static void
-garrow_gpu_cuda_buffer_output_stream_class_init(GArrowGPUCUDABufferOutputStreamClass *klass)
-{
-}
-
-/**
- * garrow_gpu_cuda_buffer_output_stream_new:
- * @buffer: A #GArrowGPUCUDABuffer.
- *
- * Returns: (transfer full): A newly created
- *   #GArrowGPUCUDABufferOutputStream.
- *
- * Since: 0.8.0
- */
-GArrowGPUCUDABufferOutputStream *
-garrow_gpu_cuda_buffer_output_stream_new(GArrowGPUCUDABuffer *buffer)
-{
-  auto arrow_buffer = garrow_gpu_cuda_buffer_get_raw(buffer);
-  auto arrow_writer =
-    std::make_shared<arrow::gpu::CudaBufferWriter>(arrow_buffer);
-  return garrow_gpu_cuda_buffer_output_stream_new_raw(&arrow_writer);
-}
-
-/**
- * garrow_gpu_cuda_buffer_output_stream_set_buffer_size:
- * @stream: A #GArrowGPUCUDABufferOutputStream.
- * @size: A size of CPU buffer in bytes.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: %TRUE on success, %FALSE if there was an error.
- *
- * Sets CPU buffer size. to limit `cudaMemcpy()` calls. If CPU buffer
- * size is `0`, buffering is disabled.
- *
- * The default is `0`.
- *
- * Since: 0.8.0
- */
-gboolean
-garrow_gpu_cuda_buffer_output_stream_set_buffer_size(GArrowGPUCUDABufferOutputStream *stream,
-                                                     gint64 size,
-                                                     GError **error)
-{
-  auto arrow_stream = garrow_gpu_cuda_buffer_output_stream_get_raw(stream);
-  auto status = arrow_stream->SetBufferSize(size);
-  return garrow_error_check(error,
-                            status,
-                            "[gpu][cuda][buffer-output-stream][set-buffer-size]");
-}
-
-/**
- * garrow_gpu_cuda_buffer_output_stream_get_buffer_size:
- * @stream: A #GArrowGPUCUDABufferOutputStream.
- *
- * Returns: The CPU buffer size in bytes.
- *
- * See garrow_gpu_cuda_buffer_output_stream_set_buffer_size() for CPU
- * buffer size details.
- *
- * Since: 0.8.0
- */
-gint64
-garrow_gpu_cuda_buffer_output_stream_get_buffer_size(GArrowGPUCUDABufferOutputStream *stream)
-{
-  auto arrow_stream = garrow_gpu_cuda_buffer_output_stream_get_raw(stream);
-  return arrow_stream->buffer_size();
-}
-
-/**
- * garrow_gpu_cuda_buffer_output_stream_get_buffered_size:
- * @stream: A #GArrowGPUCUDABufferOutputStream.
- *
- * Returns: The size of buffered data in bytes.
- *
- * Since: 0.8.0
- */
-gint64
-garrow_gpu_cuda_buffer_output_stream_get_buffered_size(GArrowGPUCUDABufferOutputStream *stream)
-{
-  auto arrow_stream = garrow_gpu_cuda_buffer_output_stream_get_raw(stream);
-  return arrow_stream->num_bytes_buffered();
-}
-
-
-G_END_DECLS
-
-GArrowGPUCUDAContext *
-garrow_gpu_cuda_context_new_raw(std::shared_ptr<arrow::gpu::CudaContext> *arrow_context)
-{
-  return GARROW_GPU_CUDA_CONTEXT(g_object_new(GARROW_GPU_TYPE_CUDA_CONTEXT,
-                                              "context", arrow_context,
-                                              NULL));
-}
-
-std::shared_ptr<arrow::gpu::CudaContext>
-garrow_gpu_cuda_context_get_raw(GArrowGPUCUDAContext *context)
-{
-  if (!context)
-    return nullptr;
-
-  auto priv = GARROW_GPU_CUDA_CONTEXT_GET_PRIVATE(context);
-  return priv->context;
-}
-
-GArrowGPUCUDAIPCMemoryHandle *
-garrow_gpu_cuda_ipc_memory_handle_new_raw(std::shared_ptr<arrow::gpu::CudaIpcMemHandle> *arrow_handle)
-{
-  auto handle = g_object_new(GARROW_GPU_TYPE_CUDA_IPC_MEMORY_HANDLE,
-                             "ipc-memory-handle", arrow_handle,
-                             NULL);
-  return GARROW_GPU_CUDA_IPC_MEMORY_HANDLE(handle);
-}
-
-std::shared_ptr<arrow::gpu::CudaIpcMemHandle>
-garrow_gpu_cuda_ipc_memory_handle_get_raw(GArrowGPUCUDAIPCMemoryHandle *handle)
-{
-  if (!handle)
-    return nullptr;
-
-  auto priv = GARROW_GPU_CUDA_IPC_MEMORY_HANDLE_GET_PRIVATE(handle);
-  return priv->ipc_memory_handle;
-}
-
-GArrowGPUCUDABuffer *
-garrow_gpu_cuda_buffer_new_raw(std::shared_ptr<arrow::gpu::CudaBuffer> *arrow_buffer)
-{
-  return GARROW_GPU_CUDA_BUFFER(g_object_new(GARROW_GPU_TYPE_CUDA_BUFFER,
-                                             "buffer", arrow_buffer,
-                                             NULL));
-}
-
-std::shared_ptr<arrow::gpu::CudaBuffer>
-garrow_gpu_cuda_buffer_get_raw(GArrowGPUCUDABuffer *buffer)
-{
-  if (!buffer)
-    return nullptr;
-
-  auto arrow_buffer = garrow_buffer_get_raw(GARROW_BUFFER(buffer));
-  return std::static_pointer_cast<arrow::gpu::CudaBuffer>(arrow_buffer);
-}
-
-GArrowGPUCUDAHostBuffer *
-garrow_gpu_cuda_host_buffer_new_raw(std::shared_ptr<arrow::gpu::CudaHostBuffer> *arrow_buffer)
-{
-  auto buffer = g_object_new(GARROW_GPU_TYPE_CUDA_HOST_BUFFER,
-                             "buffer", arrow_buffer,
-                             NULL);
-  return GARROW_GPU_CUDA_HOST_BUFFER(buffer);
-}
-
-std::shared_ptr<arrow::gpu::CudaHostBuffer>
-garrow_gpu_cuda_host_buffer_get_raw(GArrowGPUCUDAHostBuffer *buffer)
-{
-  if (!buffer)
-    return nullptr;
-
-  auto arrow_buffer = garrow_buffer_get_raw(GARROW_BUFFER(buffer));
-  return std::static_pointer_cast<arrow::gpu::CudaHostBuffer>(arrow_buffer);
-}
-
-GArrowGPUCUDABufferInputStream *
-garrow_gpu_cuda_buffer_input_stream_new_raw(std::shared_ptr<arrow::gpu::CudaBufferReader> *arrow_reader)
-{
-  auto input_stream = g_object_new(GARROW_GPU_TYPE_CUDA_BUFFER_INPUT_STREAM,
-                                   "input-stream", arrow_reader,
-                                   NULL);
-  return GARROW_GPU_CUDA_BUFFER_INPUT_STREAM(input_stream);
-}
-
-std::shared_ptr<arrow::gpu::CudaBufferReader>
-garrow_gpu_cuda_buffer_input_stream_get_raw(GArrowGPUCUDABufferInputStream *input_stream)
-{
-  if (!input_stream)
-    return nullptr;
-
-  auto arrow_reader =
-    garrow_input_stream_get_raw(GARROW_INPUT_STREAM(input_stream));
-  return std::static_pointer_cast<arrow::gpu::CudaBufferReader>(arrow_reader);
-}
-
-GArrowGPUCUDABufferOutputStream *
-garrow_gpu_cuda_buffer_output_stream_new_raw(std::shared_ptr<arrow::gpu::CudaBufferWriter> *arrow_writer)
-{
-  auto output_stream = g_object_new(GARROW_GPU_TYPE_CUDA_BUFFER_OUTPUT_STREAM,
-                                    "output-stream", arrow_writer,
-                                    NULL);
-  return GARROW_GPU_CUDA_BUFFER_OUTPUT_STREAM(output_stream);
-}
-
-std::shared_ptr<arrow::gpu::CudaBufferWriter>
-garrow_gpu_cuda_buffer_output_stream_get_raw(GArrowGPUCUDABufferOutputStream *output_stream)
-{
-  if (!output_stream)
-    return nullptr;
-
-  auto arrow_writer =
-    garrow_output_stream_get_raw(GARROW_OUTPUT_STREAM(output_stream));
-  return std::static_pointer_cast<arrow::gpu::CudaBufferWriter>(arrow_writer);
-}
diff --git a/c_glib/arrow-gpu-glib/cuda.h b/c_glib/arrow-gpu-glib/cuda.h
deleted file mode 100644
index f45a46a2def8e..0000000000000
--- a/c_glib/arrow-gpu-glib/cuda.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#pragma once
-
-#include <arrow-glib/arrow-glib.h>
-
-G_BEGIN_DECLS
-
-#define GARROW_GPU_TYPE_CUDA_DEVICE_MANAGER     \
-  (garrow_gpu_cuda_device_manager_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowGPUCUDADeviceManager,
-                         garrow_gpu_cuda_device_manager,
-                         GARROW_GPU,
-                         CUDA_DEVICE_MANAGER,
-                         GObject)
-struct _GArrowGPUCUDADeviceManagerClass
-{
-  GObjectClass parent_class;
-};
-
-#define GARROW_GPU_TYPE_CUDA_CONTEXT (garrow_gpu_cuda_context_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowGPUCUDAContext,
-                         garrow_gpu_cuda_context,
-                         GARROW_GPU,
-                         CUDA_CONTEXT,
-                         GObject)
-struct _GArrowGPUCUDAContextClass
-{
-  GObjectClass parent_class;
-};
-
-#define GARROW_GPU_TYPE_CUDA_BUFFER (garrow_gpu_cuda_buffer_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowGPUCUDABuffer,
-                         garrow_gpu_cuda_buffer,
-                         GARROW_GPU,
-                         CUDA_BUFFER,
-                         GArrowBuffer)
-struct _GArrowGPUCUDABufferClass
-{
-  GArrowBufferClass parent_class;
-};
-
-#define GARROW_GPU_TYPE_CUDA_HOST_BUFFER (garrow_gpu_cuda_host_buffer_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowGPUCUDAHostBuffer,
-                         garrow_gpu_cuda_host_buffer,
-                         GARROW_GPU,
-                         CUDA_HOST_BUFFER,
-                         GArrowMutableBuffer)
-struct _GArrowGPUCUDAHostBufferClass
-{
-  GArrowMutableBufferClass parent_class;
-};
-
-#define GARROW_GPU_TYPE_CUDA_IPC_MEMORY_HANDLE          \
-  (garrow_gpu_cuda_ipc_memory_handle_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowGPUCUDAIPCMemoryHandle,
-                         garrow_gpu_cuda_ipc_memory_handle,
-                         GARROW_GPU,
-                         CUDA_IPC_MEMORY_HANDLE,
-                         GObject)
-struct _GArrowGPUCUDAIPCMemoryHandleClass
-{
-  GObjectClass parent_class;
-};
-
-#define GARROW_GPU_TYPE_CUDA_BUFFER_INPUT_STREAM        \
-  (garrow_gpu_cuda_buffer_input_stream_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowGPUCUDABufferInputStream,
-                         garrow_gpu_cuda_buffer_input_stream,
-                         GARROW_GPU,
-                         CUDA_BUFFER_INPUT_STREAM,
-                         GArrowBufferInputStream)
-struct _GArrowGPUCUDABufferInputStreamClass
-{
-  GArrowBufferInputStreamClass parent_class;
-};
-
-#define GARROW_GPU_TYPE_CUDA_BUFFER_OUTPUT_STREAM               \
-  (garrow_gpu_cuda_buffer_output_stream_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowGPUCUDABufferOutputStream,
-                         garrow_gpu_cuda_buffer_output_stream,
-                         GARROW_GPU,
-                         CUDA_BUFFER_OUTPUT_STREAM,
-                         GArrowOutputStream)
-struct _GArrowGPUCUDABufferOutputStreamClass
-{
-  GArrowOutputStreamClass parent_class;
-};
-
-GArrowGPUCUDADeviceManager *
-garrow_gpu_cuda_device_manager_new(GError **error);
-
-GArrowGPUCUDAContext *
-garrow_gpu_cuda_device_manager_get_context(GArrowGPUCUDADeviceManager *manager,
-                                           gint gpu_number,
-                                           GError **error);
-gsize
-garrow_gpu_cuda_device_manager_get_n_devices(GArrowGPUCUDADeviceManager *manager);
-
-gint64
-garrow_gpu_cuda_context_get_allocated_size(GArrowGPUCUDAContext *context);
-
-
-GArrowGPUCUDABuffer *
-garrow_gpu_cuda_buffer_new(GArrowGPUCUDAContext *context,
-                           gint64 size,
-                           GError **error);
-GArrowGPUCUDABuffer *
-garrow_gpu_cuda_buffer_new_ipc(GArrowGPUCUDAContext *context,
-                               GArrowGPUCUDAIPCMemoryHandle *handle,
-                               GError **error);
-GArrowGPUCUDABuffer *
-garrow_gpu_cuda_buffer_new_record_batch(GArrowGPUCUDAContext *context,
-                                        GArrowRecordBatch *record_batch,
-                                        GError **error);
-GBytes *
-garrow_gpu_cuda_buffer_copy_to_host(GArrowGPUCUDABuffer *buffer,
-                                    gint64 position,
-                                    gint64 size,
-                                    GError **error);
-gboolean
-garrow_gpu_cuda_buffer_copy_from_host(GArrowGPUCUDABuffer *buffer,
-                                      const guint8 *data,
-                                      gint64 size,
-                                      GError **error);
-GArrowGPUCUDAIPCMemoryHandle *
-garrow_gpu_cuda_buffer_export(GArrowGPUCUDABuffer *buffer,
-                              GError **error);
-GArrowGPUCUDAContext *
-garrow_gpu_cuda_buffer_get_context(GArrowGPUCUDABuffer *buffer);
-GArrowRecordBatch *
-garrow_gpu_cuda_buffer_read_record_batch(GArrowGPUCUDABuffer *buffer,
-                                         GArrowSchema *schema,
-                                         GError **error);
-
-
-GArrowGPUCUDAHostBuffer *
-garrow_gpu_cuda_host_buffer_new(gint gpu_number,
-                                gint64 size,
-                                GError **error);
-
-GArrowGPUCUDAIPCMemoryHandle *
-garrow_gpu_cuda_ipc_memory_handle_new(const guint8 *data,
-                                      gsize size,
-                                      GError **error);
-
-GArrowBuffer *
-garrow_gpu_cuda_ipc_memory_handle_serialize(GArrowGPUCUDAIPCMemoryHandle *handle,
-                                            GError **error);
-
-GArrowGPUCUDABufferInputStream *
-garrow_gpu_cuda_buffer_input_stream_new(GArrowGPUCUDABuffer *buffer);
-
-GArrowGPUCUDABufferOutputStream *
-garrow_gpu_cuda_buffer_output_stream_new(GArrowGPUCUDABuffer *buffer);
-
-gboolean
-garrow_gpu_cuda_buffer_output_stream_set_buffer_size(GArrowGPUCUDABufferOutputStream *stream,
-                                                     gint64 size,
-                                                     GError **error);
-gint64
-garrow_gpu_cuda_buffer_output_stream_get_buffer_size(GArrowGPUCUDABufferOutputStream *stream);
-gint64
-garrow_gpu_cuda_buffer_output_stream_get_buffered_size(GArrowGPUCUDABufferOutputStream *stream);
-
-G_END_DECLS
diff --git a/c_glib/arrow-gpu-glib/cuda.hpp b/c_glib/arrow-gpu-glib/cuda.hpp
deleted file mode 100644
index 4b5b03c8b4608..0000000000000
--- a/c_glib/arrow-gpu-glib/cuda.hpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#pragma once
-
-#include <arrow/gpu/cuda_api.h>
-
-#include <arrow-gpu-glib/cuda.h>
-
-GArrowGPUCUDAContext *
-garrow_gpu_cuda_context_new_raw(std::shared_ptr<arrow::gpu::CudaContext> *arrow_context);
-std::shared_ptr<arrow::gpu::CudaContext>
-garrow_gpu_cuda_context_get_raw(GArrowGPUCUDAContext *context);
-
-GArrowGPUCUDAIPCMemoryHandle *
-garrow_gpu_cuda_ipc_memory_handle_new_raw(std::shared_ptr<arrow::gpu::CudaIpcMemHandle> *arrow_handle);
-std::shared_ptr<arrow::gpu::CudaIpcMemHandle>
-garrow_gpu_cuda_ipc_memory_handle_get_raw(GArrowGPUCUDAIPCMemoryHandle *handle);
-
-GArrowGPUCUDABuffer *
-garrow_gpu_cuda_buffer_new_raw(std::shared_ptr<arrow::gpu::CudaBuffer> *arrow_buffer);
-std::shared_ptr<arrow::gpu::CudaBuffer>
-garrow_gpu_cuda_buffer_get_raw(GArrowGPUCUDABuffer *buffer);
-
-GArrowGPUCUDAHostBuffer *
-garrow_gpu_cuda_host_buffer_new_raw(std::shared_ptr<arrow::gpu::CudaHostBuffer> *arrow_buffer);
-std::shared_ptr<arrow::gpu::CudaHostBuffer>
-garrow_gpu_cuda_host_buffer_get_raw(GArrowGPUCUDAHostBuffer *buffer);
-
-GArrowGPUCUDABufferInputStream *
-garrow_gpu_cuda_buffer_input_stream_new_raw(std::shared_ptr<arrow::gpu::CudaBufferReader> *arrow_reader);
-std::shared_ptr<arrow::gpu::CudaBufferReader>
-garrow_gpu_cuda_buffer_input_stream_get_raw(GArrowGPUCUDABufferInputStream *input_stream);
-
-GArrowGPUCUDABufferOutputStream *
-garrow_gpu_cuda_buffer_output_stream_new_raw(std::shared_ptr<arrow::gpu::CudaBufferWriter> *arrow_writer);
-std::shared_ptr<arrow::gpu::CudaBufferWriter>
-garrow_gpu_cuda_buffer_output_stream_get_raw(GArrowGPUCUDABufferOutputStream *output_stream);
diff --git a/c_glib/arrow-gpu-glib/meson.build b/c_glib/arrow-gpu-glib/meson.build
deleted file mode 100644
index e6b170efc5941..0000000000000
--- a/c_glib/arrow-gpu-glib/meson.build
+++ /dev/null
@@ -1,75 +0,0 @@
-# -*- indent-tabs-mode: nil -*-
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-sources = files(
-  'cuda.cpp',
-)
-
-c_headers = files(
-  'arrow-gpu-glib.h',
-  'cuda.h',
-)
-
-cpp_headers = files(
-  'arrow-gpu-glib.hpp',
-  'cuda.hpp',
-)
-
-headers = c_headers + cpp_headers
-install_headers(headers, subdir: 'arrow-gpu-glib')
-
-
-dependencies = [
-  arrow_gpu,
-  arrow_glib,
-]
-libarrow_gpu_glib = library('arrow-gpu-glib',
-                            sources: sources,
-                            install: true,
-                            dependencies: dependencies,
-                            include_directories: base_include_directories,
-                            soversion: so_version,
-                            version: library_version)
-arrow_gpu_glib = declare_dependency(link_with: libarrow_gpu_glib,
-                                    include_directories: base_include_directories,
-                                    dependencies: dependencies)
-
-pkgconfig.generate(filebase: 'arrow-gpu-glib',
-                   name: 'Apache Arrow GPU GLib',
-                   description: 'C API for Apache Arrow GPU based on GLib',
-                   version: version,
-                   requires: ['arrow-glib', 'arrow-gpu'],
-                   libraries: [libarrow_gpu_glib])
-
-gnome.generate_gir(libarrow_gpu_glib,
-                   dependencies: declare_dependency(sources: arrow_glib_gir),
-                   sources: sources + c_headers,
-                   namespace: 'ArrowGPU',
-                   nsversion: api_version,
-                   identifier_prefix: 'GArrowGPU',
-                   symbol_prefix: 'garrow_gpu',
-                   export_packages: 'arrow-gpu-glib',
-                   includes: [
-                     'Arrow-1.0',
-                   ],
-                   install: true,
-                   extra_args: [
-                     '--warn-all',
-                     '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
-                   ])
diff --git a/c_glib/configure.ac b/c_glib/configure.ac
index badf9e98da4ea..75654d2d7be25 100644
--- a/c_glib/configure.ac
+++ b/c_glib/configure.ac
@@ -17,12 +17,7 @@
 
 AC_PREREQ(2.65)
 
-m4_define([arrow_glib_version],
-           m4_esyscmd(grep "^  <version>" "$(dirname $0)/../java/pom.xml" | \
-                        sed -E \
-                            -e 's/(^  <version>)//g' \
-                            -e 's/(<\/version>$)//g' | \
-                        tr -d '\n'))
+m4_define([arrow_glib_version], 0.13.0-SNAPSHOT)
 AC_INIT([arrow-glib],
         arrow_glib_version,
         [https://issues.apache.org/jira/browse/ARROW],
@@ -115,6 +110,7 @@ AC_ARG_WITH(arrow-cpp-build-type,
   [GARROW_ARROW_CPP_BUILD_TYPE="$withval"],
   [GARROW_ARROW_CPP_BUILD_TYPE="release"])
 
+ARROW_CUDA_PKG_CONFIG_PATH=""
 AC_ARG_WITH(arrow-cpp-build-dir,
   [AS_HELP_STRING([--with-arrow-cpp-build-dir=PATH],
                   [Use this option to build with not installed Arrow C++])],
@@ -130,10 +126,10 @@ if test "x$GARROW_ARROW_CPP_BUILD_DIR" = "x"; then
                     [arrow-orc],
                     [HAVE_ARROW_ORC=yes],
                     [HAVE_ARROW_ORC=no])
-  PKG_CHECK_MODULES([ARROW_GPU],
-                    [arrow-gpu],
-                    [HAVE_ARROW_GPU=yes],
-                    [HAVE_ARROW_GPU=no])
+  PKG_CHECK_MODULES([ARROW_CUDA],
+                    [arrow-cuda],
+                    [HAVE_ARROW_CUDA=yes],
+                    [HAVE_ARROW_CUDA=no])
   PKG_CHECK_MODULES([GANDIVA],
                     [gandiva],
                     [HAVE_GANDIVA=yes],
@@ -168,16 +164,19 @@ else
     HAVE_ARROW_ORC=no
   fi
 
-  ARROW_GPU_CFLAGS=""
-  if test -f "${GARROW_ARROW_CPP_BUILD_DIR}/src/arrow/gpu/arrow-gpu.pc"; then
-    HAVE_ARROW_GPU=yes
-    ARROW_GPU_LIBS="-larrow_gpu"
+  ARROW_CUDA_CFLAGS=""
+  if test -f "${GARROW_ARROW_CPP_BUILD_DIR}/src/arrow/gpu/arrow-cuda.pc"; then
+    HAVE_ARROW_CUDA=yes
+    ARROW_CUDA_LIBS="-larrow_cuda"
+    ARROW_CUDA_PKG_CONFIG_PATH="\$(ARROW_BUILD_DIR)/src/arrow/gpu"
   else
-    HAVE_ARROW_GPU=no
-    ARROW_GPU_LIBS=""
+    HAVE_ARROW_CUDA=no
+    ARROW_CUDA_LIBS=""
+    ARROW_CUDA_PKG_CONFIG_PATH=""
   fi
-  AC_SUBST(ARROW_GPU_CFLAGS)
-  AC_SUBST(ARROW_GPU_LIBS)
+  AC_SUBST(ARROW_CUDA_CFLAGS)
+  AC_SUBST(ARROW_CUDA_LIBS)
+  AC_SUBST(ARROW_CUDA_PKG_CONFIG_PATH)
 
   GANDIVA_CFLAGS=""
   if test -f "${GARROW_ARROW_CPP_BUILD_DIR}/src/gandiva/gandiva.pc"; then
@@ -221,10 +220,20 @@ if test "$HAVE_ARROW_ORC" = "yes"; then
   AC_DEFINE(HAVE_ARROW_ORC, [1], [Define to 1 if Apache Arrow supports ORC.])
 fi
 
-AM_CONDITIONAL([HAVE_ARROW_GPU], [test "$HAVE_ARROW_GPU" = "yes"])
-if test "$HAVE_ARROW_GPU" = "yes"; then
-  AC_DEFINE(HAVE_ARROW_GPU, [1], [Define to 1 if Apache Arrow supports GPU.])
+AM_CONDITIONAL([HAVE_ARROW_CUDA], [test "$HAVE_ARROW_CUDA" = "yes"])
+if test "$HAVE_ARROW_CUDA" = "yes"; then
+  ARROW_CUDA_GLIB_PACKAGE="arrow-cuda-glib"
+  PLASMA_ARROW_CUDA_PKG_CONFIG_PATH=":\$(abs_top_builddir)/arrow-cuda-glib"
+  if test -n "${ARROW_CUDA_PKG_CONFIG_PATH}"; then
+    PLASMA_ARROW_CUDA_PKG_CONFIG_PATH=":${ARROW_CUDA_PKG_CONFIG_PATH}${PLASMA_ARROW_CUDA_PKG_CONFIG_PATH}"
+  fi
+  AC_DEFINE(HAVE_ARROW_CUDA, [1], [Define to 1 if Apache Arrow supports CUDA.])
+else
+  ARROW_CUDA_GLIB_PACKAGE=""
+  PLASMA_ARROW_CUDA_PKG_CONFIG_PATH=""
 fi
+AC_SUBST(ARROW_CUDA_GLIB_PACKAGE)
+AC_SUBST(PLASMA_ARROW_CUDA_PKG_CONFIG_PATH)
 
 AM_CONDITIONAL([HAVE_GANDIVA], [test "$HAVE_GANDIVA" = "yes"])
 if test "$HAVE_GANDIVA" = "yes"; then
@@ -246,12 +255,12 @@ AC_SUBST(exampledir)
 
 AC_CONFIG_FILES([
   Makefile
+  arrow-cuda-glib/Makefile
+  arrow-cuda-glib/arrow-cuda-glib.pc
   arrow-glib/Makefile
   arrow-glib/arrow-glib.pc
   arrow-glib/arrow-orc-glib.pc
   arrow-glib/version.h
-  arrow-gpu-glib/Makefile
-  arrow-gpu-glib/arrow-gpu-glib.pc
   gandiva-glib/Makefile
   gandiva-glib/gandiva-glib.pc
   parquet-glib/Makefile
@@ -269,7 +278,6 @@ AC_CONFIG_FILES([
   doc/plasma-glib/entities.xml
   example/Makefile
   example/lua/Makefile
-  tool/Makefile
 ])
 
 AC_OUTPUT
diff --git a/c_glib/doc/arrow-glib/Makefile.am b/c_glib/doc/arrow-glib/Makefile.am
index ad0c9382194d9..db9f00f39f300 100644
--- a/c_glib/doc/arrow-glib/Makefile.am
+++ b/c_glib/doc/arrow-glib/Makefile.am
@@ -55,15 +55,15 @@ AM_CFLAGS =					\
 GTKDOC_LIBS =						\
 	$(top_builddir)/arrow-glib/libarrow-glib.la
 
-if HAVE_ARROW_GPU
+if HAVE_ARROW_CUDA
 DOC_SOURCE_DIR +=				\
-	$(top_srcdir)/arrow-gpu-glib
+	$(top_srcdir)/arrow-cuda-glib
 HFILE_GLOB +=					\
-	$(top_srcdir)/arrow-gpu-glib/*.h
+	$(top_srcdir)/arrow-cuda-glib/*.h
 CFILE_GLOB +=					\
-	$(top_srcdir)/arrow-gpu-glib/*.cpp
+	$(top_srcdir)/arrow-cuda-glib/*.cpp
 GTKDOC_LIBS +=							\
-	$(top_builddir)/arrow-gpu-glib/libarrow-gpu-glib.la
+	$(top_builddir)/arrow-cuda-glib/libarrow-cuda-glib.la
 endif
 
 include $(top_srcdir)/gtk-doc.make
diff --git a/c_glib/doc/arrow-glib/arrow-glib-docs.xml b/c_glib/doc/arrow-glib/arrow-glib-docs.xml
index 17b75005ff97a..1016703001b8c 100644
--- a/c_glib/doc/arrow-glib/arrow-glib-docs.xml
+++ b/c_glib/doc/arrow-glib/arrow-glib-docs.xml
@@ -53,7 +53,7 @@
     </chapter>
     <chapter id="decimal">
       <title>Decimal</title>
-      <xi:include href="xml/decimal.xml"/>
+      <xi:include href="xml/decimal128.xml"/>
     </chapter>
     <chapter id="tensor">
       <title>Tensor</title>
@@ -163,6 +163,10 @@
     <title>Index of deprecated API</title>
     <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
   </index>
+  <index id="api-index-0-13-0" role="0.13.0">
+    <title>Index of new symbols in 0.13.0</title>
+    <xi:include href="xml/api-index-0.13.0.xml"><xi:fallback /></xi:include>
+  </index>
   <index id="api-index-0-12-0" role="0.12.0">
     <title>Index of new symbols in 0.12.0</title>
     <xi:include href="xml/api-index-0.12.0.xml"><xi:fallback /></xi:include>
diff --git a/c_glib/doc/arrow-glib/meson.build b/c_glib/doc/arrow-glib/meson.build
index 68050aa8792f8..d61a9747de678 100644
--- a/c_glib/doc/arrow-glib/meson.build
+++ b/c_glib/doc/arrow-glib/meson.build
@@ -50,13 +50,13 @@ source_directories = [
 dependencies = [
   arrow_glib,
 ]
-if arrow_gpu.found()
+if arrow_cuda.found()
   source_directories += [
-    join_paths(meson.source_root(), 'arrow-gpu-glib'),
-    join_paths(meson.build_root(), 'arrow-gpu-glib'),
+    join_paths(meson.source_root(), 'arrow-cuda-glib'),
+    join_paths(meson.build_root(), 'arrow-cuda-glib'),
   ]
   dependencies += [
-    arrow_gpu_glib,
+    arrow_cuda_glib,
   ]
 endif
 ignore_headers = []
diff --git a/c_glib/doc/parquet-glib/parquet-glib-docs.xml b/c_glib/doc/parquet-glib/parquet-glib-docs.xml
index 0f2c30ba7863f..4485a6765cb6b 100644
--- a/c_glib/doc/parquet-glib/parquet-glib-docs.xml
+++ b/c_glib/doc/parquet-glib/parquet-glib-docs.xml
@@ -57,6 +57,10 @@
     <title>Index of deprecated API</title>
     <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
   </index>
+  <index id="api-index-0-12-0" role="0.12.0">
+    <title>Index of new symbols in 0.12.0</title>
+    <xi:include href="xml/api-index-0.12.0.xml"><xi:fallback /></xi:include>
+  </index>
   <index id="api-index-0-11-0" role="0.11.0">
     <title>Index of new symbols in 0.11.0</title>
     <xi:include href="xml/api-index-0.11.0.xml"><xi:fallback /></xi:include>
diff --git a/c_glib/doc/plasma-glib/Makefile.am b/c_glib/doc/plasma-glib/Makefile.am
index 6a25bfb484eba..df872d6ca312c 100644
--- a/c_glib/doc/plasma-glib/Makefile.am
+++ b/c_glib/doc/plasma-glib/Makefile.am
@@ -15,6 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 
+PLASMA_ARROW_CUDA_GTKDOC_LIBS =
+if HAVE_ARROW_CUDA
+PLASMA_ARROW_CUDA_GTKDOC_LIBS +=				\
+	$(top_builddir)/arrow-cuda-glib/libarrow-cuda-glib.la
+endif
+
 if HAVE_PLASMA
 DOC_MODULE = plasma-glib
 
@@ -50,6 +56,7 @@ AM_CFLAGS =					\
 
 GTKDOC_LIBS =						\
 	$(top_builddir)/arrow-glib/libarrow-glib.la	\
+	$(PLASMA_ARROW_CUDA_GTKDOC_LIBS)		\
 	$(top_builddir)/plasma-glib/libplasma-glib.la
 
 include $(top_srcdir)/gtk-doc.make
diff --git a/c_glib/doc/plasma-glib/meson.build b/c_glib/doc/plasma-glib/meson.build
index 2572f0f371cc4..9efc53b4b1b23 100644
--- a/c_glib/doc/plasma-glib/meson.build
+++ b/c_glib/doc/plasma-glib/meson.build
@@ -56,6 +56,9 @@ dependencies = [
   arrow_glib,
   plasma_glib,
 ]
+if arrow_cuda.found()
+  dependencies += [arrow_cuda_glib]
+endif
 ignore_headers = []
 gnome.gtkdoc(project_name,
              main_xml: project_name + '-docs.xml',
diff --git a/c_glib/doc/plasma-glib/plasma-glib-docs.xml b/c_glib/doc/plasma-glib/plasma-glib-docs.xml
index 86e3245043d32..83d3aea9b00f7 100644
--- a/c_glib/doc/plasma-glib/plasma-glib-docs.xml
+++ b/c_glib/doc/plasma-glib/plasma-glib-docs.xml
@@ -36,12 +36,16 @@
     </releaseinfo>
   </bookinfo>
 
-  <part id="plasma-client">
-    <title>PlasmaClient</title>
+  <part id="client-side">
+    <title>Client side</title>
     <chapter id="client">
       <title>Client</title>
       <xi:include href="xml/client.xml"/>
     </chapter>
+    <chapter id="object">
+      <title>Object</title>
+      <xi:include href="xml/object.xml"/>
+    </chapter>
   </part>
 
   <chapter id="object-tree">
diff --git a/c_glib/example/build.c b/c_glib/example/build.c
index 8c6cf74d74815..9b2d58d2b2bba 100644
--- a/c_glib/example/build.c
+++ b/c_glib/example/build.c
@@ -33,13 +33,13 @@ main(int argc, char **argv)
 
     builder = garrow_int32_array_builder_new();
     if (success) {
-      success = garrow_int32_array_builder_append(builder, 29, &error);
+      success = garrow_int32_array_builder_append_value(builder, 29, &error);
     }
     if (success) {
-      success = garrow_int32_array_builder_append(builder, 2929, &error);
+      success = garrow_int32_array_builder_append_value(builder, 2929, &error);
     }
     if (success) {
-      success = garrow_int32_array_builder_append(builder, 292929, &error);
+      success = garrow_int32_array_builder_append_value(builder, 292929, &error);
     }
     if (!success) {
       g_print("failed to append: %s\n", error->message);
diff --git a/c_glib/example/lua/Makefile.am b/c_glib/example/lua/Makefile.am
index 86bdbed8a0228..9019d24741c1a 100644
--- a/c_glib/example/lua/Makefile.am
+++ b/c_glib/example/lua/Makefile.am
@@ -20,6 +20,5 @@ dist_lua_example_DATA =				\
 	README.md				\
 	read-batch.lua				\
 	read-stream.lua				\
-	stream-to-torch-tensor.lua		\
 	write-batch.lua				\
 	write-stream.lua
diff --git a/c_glib/example/lua/README.md b/c_glib/example/lua/README.md
index e7e3351fef148..7d388d46acb33 100644
--- a/c_glib/example/lua/README.md
+++ b/c_glib/example/lua/README.md
@@ -48,8 +48,3 @@ Here are example codes in this directory:
 
   * `read-stream.lua`: It shows how to read Arrow array from file in
     stream mode.
-
-  * `stream-to-torch-tensor.lua`: It shows how to read Arrow array
-    from file in stream mode and convert it to
-    [Torch](http://torch.ch/)'s
-    [`Tensor` object](http://torch7.readthedocs.io/en/rtd/tensor/index.html).
diff --git a/c_glib/example/lua/stream-to-torch-tensor.lua b/c_glib/example/lua/stream-to-torch-tensor.lua
deleted file mode 100644
index fc765e3c96872..0000000000000
--- a/c_glib/example/lua/stream-to-torch-tensor.lua
+++ /dev/null
@@ -1,101 +0,0 @@
--- Licensed to the Apache Software Foundation (ASF) under one
--- or more contributor license agreements.  See the NOTICE file
--- distributed with this work for additional information
--- regarding copyright ownership.  The ASF licenses this file
--- to you under the Apache License, Version 2.0 (the
--- "License"); you may not use this file except in compliance
--- with the License.  You may obtain a copy of the License at
---
---   http://www.apache.org/licenses/LICENSE-2.0
---
--- Unless required by applicable law or agreed to in writing,
--- software distributed under the License is distributed on an
--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
--- KIND, either express or implied.  See the License for the
--- specific language governing permissions and limitations
--- under the License.
-
-local lgi = require 'lgi'
-local Arrow = lgi.Arrow
-
-local torch = require 'torch'
-
-Arrow.Array.torch_types = function(self)
-   return nil
-end
-
-Arrow.Array.to_torch = function(self)
-   local types = self:torch_types()
-   if not types then
-      return nil
-   end
-
-   local storage_type = types[1]
-   local tensor_type = types[2]
-
-   local size = self:get_length()
-   local storage = storage_type(size)
-   if not storage then
-      return nil
-   end
-
-   for i = 1, size do
-      storage[i] = self:get_value(i - 1)
-   end
-   return tensor_type(storage)
-end
-
-Arrow.UInt8Array.torch_types = function(self)
-   return {torch.ByteStorage, torch.ByteTensor}
-end
-
-Arrow.Int8Array.torch_types = function(self)
-   return {torch.CharStorage, torch.CharTensor}
-end
-
-Arrow.Int16Array.torch_types = function(self)
-   return {torch.ShortStorage, torch.ShortTensor}
-end
-
-Arrow.Int32Array.torch_types = function(self)
-   return {torch.IntStorage, torch.IntTensor}
-end
-
-Arrow.Int64Array.torch_types = function(self)
-   return {torch.LongStorage, torch.LongTensor}
-end
-
-Arrow.FloatArray.torch_types = function(self)
-   return {torch.FloatStorage, torch.FloatTensor}
-end
-
-Arrow.DoubleArray.torch_types = function(self)
-   return {torch.DoubleStorage, torch.DoubleTensor}
-end
-
-
-local input_path = arg[1] or "/tmp/stream.arrow";
-
-local input = Arrow.MemoryMappedInputStream.new(input_path)
-local reader = Arrow.RecordBatchStreamReader.new(input)
-
-local i = 0
-while true do
-   local record_batch = reader:read_next_record_batch()
-   if not record_batch then
-      break
-   end
-
-   print(string.rep("=", 40))
-   print("record-batch["..i.."]:")
-   for j = 0, record_batch:get_n_columns() - 1 do
-      local column = record_batch:get_column(j)
-      local column_name = record_batch:get_column_name(j)
-      print("  "..column_name..":")
-      print(column:to_torch())
-   end
-
-   i = i + 1
-end
-
-input:close()
diff --git a/c_glib/gandiva-glib/expression.cpp b/c_glib/gandiva-glib/expression.cpp
index 529d85164de2a..b4e7a96bdef10 100644
--- a/c_glib/gandiva-glib/expression.cpp
+++ b/c_glib/gandiva-glib/expression.cpp
@@ -201,7 +201,8 @@ ggandiva_expression_new(GGandivaNode *root_node,
  * ggandiva_expression_to_string:
  * @expression: A #GGandivaExpression.
  *
- * Returns: The string representation of the node in the expression tree.
+ * Returns: (transfer full): The string representation of the node in the expression tree.
+ *
  *   It should be freed with g_free() when no longer needed.
  *
  * Since: 0.12.0
diff --git a/c_glib/gandiva-glib/node.cpp b/c_glib/gandiva-glib/node.cpp
index 49d1d0b7168df..b2adf8560f246 100644
--- a/c_glib/gandiva-glib/node.cpp
+++ b/c_glib/gandiva-glib/node.cpp
@@ -22,10 +22,20 @@
 #endif
 
 #include <arrow-glib/data-type.hpp>
+#include <arrow-glib/error.hpp>
 #include <arrow-glib/field.hpp>
 
 #include <gandiva-glib/node.hpp>
 
+template <typename Type>
+Type
+ggandiva_literal_node_get(GGandivaLiteralNode *node)
+{
+  auto gandiva_literal_node =
+    std::static_pointer_cast<gandiva::LiteralNode>(ggandiva_node_get_raw(GGANDIVA_NODE(node)));
+  return gandiva_literal_node->holder().get<Type>();
+}
+
 G_BEGIN_DECLS
 
 /**
@@ -40,15 +50,64 @@ G_BEGIN_DECLS
  *
  * #GGandivaFunctionNode is a class for a node in the expression tree, representing a function.
  *
+ * #GGandivaLiteralNode is a base class for a node in the expression tree,
+ * representing a literal.
+ *
+ * #GGandivaNullLiteralNode is a class for a node in the expression tree,
+ * representing a null literal.
+ *
+ * #GGandivaBooleanLiteralNode is a class for a node in the expression tree,
+ * representing a boolean literal.
+ *
+ * #GGandivaInt8LiteralNode is a class for a node in the expression tree,
+ * representing a 8-bit integer literal.
+ *
+ * #GGandivaUInt8LiteralNode is a class for a node in the expression tree,
+ * representing a 8-bit unsigned integer literal.
+ *
+ * #GGandivaInt16LiteralNode is a class for a node in the expression tree,
+ * representing a 16-bit integer literal.
+ *
+ * #GGandivaUInt16LiteralNode is a class for a node in the expression tree,
+ * representing a 16-bit unsigned integer literal.
+ *
+ * #GGandivaInt32LiteralNode is a class for a node in the expression tree,
+ * representing a 32-bit integer literal.
+ *
+ * #GGandivaUInt32LiteralNode is a class for a node in the expression tree,
+ * representing a 32-bit unsigned integer literal.
+ *
+ * #GGandivaInt64LiteralNode is a class for a node in the expression tree,
+ * representing a 64-bit integer literal.
+ *
+ * #GGandivaUInt64LiteralNode is a class for a node in the expression tree,
+ * representing a 64-bit unsigned integer literal.
+ *
+ * #GGandivaFloatLiteralNode is a class for a node in the expression tree,
+ * representing a 32-bit floating point literal.
+ *
+ * #GGandivaDoubleLiteralNode is a class for a node in the expression tree,
+ * representing a 64-bit floating point literal.
+ *
+ * #GGandivaBinaryLiteralNode is a class for a node in the expression tree,
+ * representing a binary literal.
+ *
+ * #GGandivaStringLiteralNode is a class for a node in the expression tree,
+ * representing an UTF-8 encoded string literal.
+ *
+ * #GGandivaIfNode is a class for a node in the expression tree, representing an if-else.
+ *
  * Since: 0.12.0
  */
 
 typedef struct GGandivaNodePrivate_ {
   std::shared_ptr<gandiva::Node> node;
+  GArrowDataType *return_type;
 } GGandivaNodePrivate;
 
 enum {
-  PROP_NODE = 1
+  PROP_NODE = 1,
+  PROP_RETURN_TYPE
 };
 
 G_DEFINE_TYPE_WITH_PRIVATE(GGandivaNode,
@@ -60,6 +119,19 @@ G_DEFINE_TYPE_WITH_PRIVATE(GGandivaNode,
     ggandiva_node_get_instance_private(                         \
       GGANDIVA_NODE(object)))
 
+static void
+ggandiva_node_dispose(GObject *object)
+{
+  auto priv = GGANDIVA_NODE_GET_PRIVATE(object);
+
+  if (priv->return_type) {
+    g_object_unref(priv->return_type);
+    priv->return_type = nullptr;
+  }
+
+  G_OBJECT_CLASS(ggandiva_node_parent_class)->dispose(object);
+}
+
 static void
 ggandiva_node_finalize(GObject *object)
 {
@@ -83,6 +155,27 @@ ggandiva_node_set_property(GObject *object,
     priv->node =
       *static_cast<std::shared_ptr<gandiva::Node> *>(g_value_get_pointer(value));
     break;
+  case PROP_RETURN_TYPE:
+    priv->return_type = GARROW_DATA_TYPE(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+ggandiva_node_get_property(GObject *object,
+                           guint prop_id,
+                           GValue *value,
+                           GParamSpec *pspec)
+{
+  auto priv = GGANDIVA_NODE_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_RETURN_TYPE:
+    g_value_set_object(value, priv->return_type);
+    break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
     break;
@@ -97,19 +190,28 @@ ggandiva_node_init(GGandivaNode *object)
 static void
 ggandiva_node_class_init(GGandivaNodeClass *klass)
 {
-  GParamSpec *spec;
-
   auto gobject_class = G_OBJECT_CLASS(klass);
 
+  gobject_class->dispose      = ggandiva_node_dispose;
   gobject_class->finalize     = ggandiva_node_finalize;
   gobject_class->set_property = ggandiva_node_set_property;
+  gobject_class->get_property = ggandiva_node_get_property;
 
+  GParamSpec *spec;
   spec = g_param_spec_pointer("node",
                               "Node",
                               "The raw std::shared<gandiva::Node> *",
                               static_cast<GParamFlags>(G_PARAM_WRITABLE |
                                                        G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_NODE, spec);
+
+  spec = g_param_spec_object("return-type",
+                             "Return type",
+                             "The return type",
+                             GARROW_TYPE_DATA_TYPE,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_RETURN_TYPE, spec);
 }
 
 
@@ -223,12 +325,10 @@ ggandiva_field_node_new(GArrowField *field)
 typedef struct GGandivaFunctionNodePrivate_ {
   gchar *name;
   GList *parameters;
-  GArrowDataType *return_type;
 } GGandivaFunctionNodePrivate;
 
 enum {
-  PROP_NAME = 1,
-  PROP_RETURN_TYPE
+  PROP_NAME = 1
 };
 
 G_DEFINE_TYPE_WITH_PRIVATE(GGandivaFunctionNode,
@@ -254,11 +354,6 @@ ggandiva_function_node_dispose(GObject *object)
     priv->parameters = nullptr;
   }
 
-  if (priv->return_type) {
-    g_object_unref(priv->return_type);
-    priv->return_type = nullptr;
-  }
-
   G_OBJECT_CLASS(ggandiva_function_node_parent_class)->dispose(object);
 }
 
@@ -284,9 +379,6 @@ ggandiva_function_node_set_property(GObject *object,
   case PROP_NAME:
     priv->name = g_value_dup_string(value);
     break;
-  case PROP_RETURN_TYPE:
-    priv->return_type = GARROW_DATA_TYPE(g_value_dup_object(value));
-    break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
     break;
@@ -305,9 +397,6 @@ ggandiva_function_node_get_property(GObject *object,
   case PROP_NAME:
     g_value_set_string(value, priv->name);
     break;
-  case PROP_RETURN_TYPE:
-    g_value_set_object(value, priv->return_type);
-    break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
     break;
@@ -339,14 +428,6 @@ ggandiva_function_node_class_init(GGandivaFunctionNodeClass *klass)
                              static_cast<GParamFlags>(G_PARAM_READWRITE |
                                                       G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_NAME, spec);
-
-  spec = g_param_spec_object("return-type",
-                             "Return type",
-                             "The return type of the function",
-                             GARROW_TYPE_DATA_TYPE,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_RETURN_TYPE, spec);
 }
 
 /**
@@ -395,42 +476,1039 @@ ggandiva_function_node_get_parameters(GGandivaFunctionNode *node)
   return priv->parameters;
 }
 
-G_END_DECLS
 
-std::shared_ptr<gandiva::Node>
-ggandiva_node_get_raw(GGandivaNode *node)
+G_DEFINE_TYPE(GGandivaLiteralNode,
+              ggandiva_literal_node,
+              GGANDIVA_TYPE_NODE)
+
+static void
+ggandiva_literal_node_init(GGandivaLiteralNode *literal_node)
 {
-  auto priv = GGANDIVA_NODE_GET_PRIVATE(node);
-  return priv->node;
 }
 
-GGandivaFieldNode *
-ggandiva_field_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node,
-                            GArrowField *field)
+static void
+ggandiva_literal_node_class_init(GGandivaLiteralNodeClass *klass)
 {
-  auto field_node = g_object_new(GGANDIVA_TYPE_FIELD_NODE,
-                                 "node", gandiva_node,
-                                 "field", field,
-                                 NULL);
-  return GGANDIVA_FIELD_NODE(field_node);
 }
 
-GGandivaFunctionNode *
-ggandiva_function_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node,
-                               const gchar *name,
-                               GList *parameters,
-                               GArrowDataType *return_type)
+
+G_DEFINE_TYPE(GGandivaNullLiteralNode,
+              ggandiva_null_literal_node,
+              GGANDIVA_TYPE_LITERAL_NODE)
+
+static void
+ggandiva_null_literal_node_init(GGandivaNullLiteralNode *null_literal_node)
 {
-  auto function_node = g_object_new(GGANDIVA_TYPE_FUNCTION_NODE,
-                                    "node", gandiva_node,
-                                    "name", name,
-                                    "return-type", return_type,
-                                    NULL);
-  auto priv = GGANDIVA_FUNCTION_NODE_GET_PRIVATE(function_node);
-  for (auto node = parameters; node; node = g_list_next(node)) {
-    auto parameter = GGANDIVA_NODE(node->data);
-    priv->parameters = g_list_prepend(priv->parameters, g_object_ref(parameter));
+}
+
+static void
+ggandiva_null_literal_node_class_init(GGandivaNullLiteralNodeClass *klass)
+{
+}
+
+/**
+ * ggandiva_null_literal_node_new:
+ * @return_type: A #GArrowDataType.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GGandivaNullLiteralNode for
+ *   the type or %NULL on error.
+ *
+ * Since: 0.12.0
+ */
+GGandivaNullLiteralNode *
+ggandiva_null_literal_node_new(GArrowDataType *return_type,
+                               GError **error)
+{
+  auto arrow_return_type = garrow_data_type_get_raw(return_type);
+  auto gandiva_node = gandiva::TreeExprBuilder::MakeNull(arrow_return_type);
+  if (!gandiva_node) {
+    g_set_error(error,
+                GARROW_ERROR,
+                GARROW_ERROR_INVALID,
+                "[gandiva][null-literal-node][new] "
+                "failed to create: <%s>",
+                arrow_return_type->ToString().c_str());
+    return NULL;
   }
-  priv->parameters = g_list_reverse(priv->parameters);
-  return GGANDIVA_FUNCTION_NODE(function_node);
+  return GGANDIVA_NULL_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
+                                                                  return_type));
+}
+
+
+G_DEFINE_TYPE(GGandivaBooleanLiteralNode,
+              ggandiva_boolean_literal_node,
+              GGANDIVA_TYPE_LITERAL_NODE)
+
+static void
+ggandiva_boolean_literal_node_init(GGandivaBooleanLiteralNode *boolean_literal_node)
+{
+}
+
+static void
+ggandiva_boolean_literal_node_class_init(GGandivaBooleanLiteralNodeClass *klass)
+{
+}
+
+/**
+ * ggandiva_boolean_literal_node_new:
+ * @value: The value of the boolean literal.
+ *
+ * Returns: A newly created #GGandivaBooleanLiteralNode.
+ *
+ * Since: 0.12.0
+ */
+GGandivaBooleanLiteralNode *
+ggandiva_boolean_literal_node_new(gboolean value)
+{
+  auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(static_cast<bool>(value));
+  return GGANDIVA_BOOLEAN_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
+                                                                     NULL));
+}
+
+/**
+ * ggandiva_boolean_literal_node_get_value:
+ * @node: A #GGandivaBooleanLiteralNode.
+ *
+ * Returns: The value of the boolean literal.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+ggandiva_boolean_literal_node_get_value(GGandivaBooleanLiteralNode *node)
+{
+  auto value = ggandiva_literal_node_get<bool>(GGANDIVA_LITERAL_NODE(node));
+  return static_cast<gboolean>(value);
+}
+
+
+G_DEFINE_TYPE(GGandivaInt8LiteralNode,
+              ggandiva_int8_literal_node,
+              GGANDIVA_TYPE_LITERAL_NODE)
+
+static void
+ggandiva_int8_literal_node_init(GGandivaInt8LiteralNode *int8_literal_node)
+{
+}
+
+static void
+ggandiva_int8_literal_node_class_init(GGandivaInt8LiteralNodeClass *klass)
+{
+}
+
+/**
+ * ggandiva_int8_literal_node_new:
+ * @value: The value of the 8-bit integer literal.
+ *
+ * Returns: A newly created #GGandivaInt8LiteralNode.
+ *
+ * Since: 0.12.0
+ */
+GGandivaInt8LiteralNode *
+ggandiva_int8_literal_node_new(gint8 value)
+{
+  auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
+  return GGANDIVA_INT8_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
+                                                                  NULL));
+}
+
+/**
+ * ggandiva_int8_literal_node_get_value:
+ * @node: A #GGandivaInt8LiteralNode.
+ *
+ * Returns: The value of the 8-bit integer literal.
+ *
+ * Since: 0.12.0
+ */
+gint8
+ggandiva_int8_literal_node_get_value(GGandivaInt8LiteralNode *node)
+{
+  return ggandiva_literal_node_get<int8_t>(GGANDIVA_LITERAL_NODE(node));
+}
+
+
+G_DEFINE_TYPE(GGandivaUInt8LiteralNode,
+              ggandiva_uint8_literal_node,
+              GGANDIVA_TYPE_LITERAL_NODE)
+
+static void
+ggandiva_uint8_literal_node_init(GGandivaUInt8LiteralNode *uint8_literal_node)
+{
+}
+
+static void
+ggandiva_uint8_literal_node_class_init(GGandivaUInt8LiteralNodeClass *klass)
+{
+}
+
+/**
+ * ggandiva_uint8_literal_node_new:
+ * @value: The value of the 8-bit unsigned integer literal.
+ *
+ * Returns: A newly created #GGandivaUInt8LiteralNode.
+ *
+ * Since: 0.12.0
+ */
+GGandivaUInt8LiteralNode *
+ggandiva_uint8_literal_node_new(guint8 value)
+{
+  auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
+  return GGANDIVA_UINT8_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
+                                                                   NULL));
+}
+
+/**
+ * ggandiva_uint8_literal_node_get_value:
+ * @node: A #GGandivaUInt8LiteralNode.
+ *
+ * Returns: The value of the 8-bit unsigned integer literal.
+ *
+ * Since: 0.12.0
+ */
+guint8
+ggandiva_uint8_literal_node_get_value(GGandivaUInt8LiteralNode *node)
+{
+  return ggandiva_literal_node_get<uint8_t>(GGANDIVA_LITERAL_NODE(node));
+}
+
+
+G_DEFINE_TYPE(GGandivaInt16LiteralNode,
+              ggandiva_int16_literal_node,
+              GGANDIVA_TYPE_LITERAL_NODE)
+
+static void
+ggandiva_int16_literal_node_init(GGandivaInt16LiteralNode *int16_literal_node)
+{
+}
+
+static void
+ggandiva_int16_literal_node_class_init(GGandivaInt16LiteralNodeClass *klass)
+{
+}
+
+/**
+ * ggandiva_int16_literal_node_new:
+ * @value: The value of the 16-bit integer literal.
+ *
+ * Returns: A newly created #GGandivaInt16LiteralNode.
+ *
+ * Since: 0.12.0
+ */
+GGandivaInt16LiteralNode *
+ggandiva_int16_literal_node_new(gint16 value)
+{
+  auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
+  return GGANDIVA_INT16_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
+                                                                   NULL));
+}
+
+/**
+ * ggandiva_int16_literal_node_get_value:
+ * @node: A #GGandivaInt16LiteralNode.
+ *
+ * Returns: The value of the 16-bit integer literal.
+ *
+ * Since: 0.12.0
+ */
+gint16
+ggandiva_int16_literal_node_get_value(GGandivaInt16LiteralNode *node)
+{
+  return ggandiva_literal_node_get<int16_t>(GGANDIVA_LITERAL_NODE(node));
+}
+
+
+G_DEFINE_TYPE(GGandivaUInt16LiteralNode,
+              ggandiva_uint16_literal_node,
+              GGANDIVA_TYPE_LITERAL_NODE)
+
+static void
+ggandiva_uint16_literal_node_init(GGandivaUInt16LiteralNode *uint16_literal_node)
+{
+}
+
+static void
+ggandiva_uint16_literal_node_class_init(GGandivaUInt16LiteralNodeClass *klass)
+{
+}
+
+/**
+ * ggandiva_uint16_literal_node_new:
+ * @value: The value of the 16-bit unsigned integer literal.
+ *
+ * Returns: A newly created #GGandivaUInt16LiteralNode.
+ *
+ * Since: 0.12.0
+ */
+GGandivaUInt16LiteralNode *
+ggandiva_uint16_literal_node_new(guint16 value)
+{
+  auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
+  return GGANDIVA_UINT16_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
+                                                                    NULL));
+}
+
+/**
+ * ggandiva_uint16_literal_node_get_value:
+ * @node: A #GGandivaUInt16LiteralNode.
+ *
+ * Returns: The value of the 16-bit unsigned integer literal.
+ *
+ * Since: 0.12.0
+ */
+guint16
+ggandiva_uint16_literal_node_get_value(GGandivaUInt16LiteralNode *node)
+{
+  return ggandiva_literal_node_get<uint16_t>(GGANDIVA_LITERAL_NODE(node));
+}
+
+
+G_DEFINE_TYPE(GGandivaInt32LiteralNode,
+              ggandiva_int32_literal_node,
+              GGANDIVA_TYPE_LITERAL_NODE)
+
+static void
+ggandiva_int32_literal_node_init(GGandivaInt32LiteralNode *int32_literal_node)
+{
+}
+
+static void
+ggandiva_int32_literal_node_class_init(GGandivaInt32LiteralNodeClass *klass)
+{
+}
+
+/**
+ * ggandiva_int32_literal_node_new:
+ * @value: The value of the 32-bit integer literal.
+ *
+ * Returns: A newly created #GGandivaInt32LiteralNode.
+ *
+ * Since: 0.12.0
+ */
+GGandivaInt32LiteralNode *
+ggandiva_int32_literal_node_new(gint32 value)
+{
+  auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
+  return GGANDIVA_INT32_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
+                                                                   NULL));
+}
+
+/**
+ * ggandiva_int32_literal_node_get_value:
+ * @node: A #GGandivaInt32LiteralNode.
+ *
+ * Returns: The value of the 32-bit integer literal.
+ *
+ * Since: 0.12.0
+ */
+gint32
+ggandiva_int32_literal_node_get_value(GGandivaInt32LiteralNode *node)
+{
+  return ggandiva_literal_node_get<int32_t>(GGANDIVA_LITERAL_NODE(node));
+}
+
+
+G_DEFINE_TYPE(GGandivaUInt32LiteralNode,
+              ggandiva_uint32_literal_node,
+              GGANDIVA_TYPE_LITERAL_NODE)
+
+static void
+ggandiva_uint32_literal_node_init(GGandivaUInt32LiteralNode *uint32_literal_node)
+{
+}
+
+static void
+ggandiva_uint32_literal_node_class_init(GGandivaUInt32LiteralNodeClass *klass)
+{
+}
+
+/**
+ * ggandiva_uint32_literal_node_new:
+ * @value: The value of the 32-bit unsigned integer literal.
+ *
+ * Returns: A newly created #GGandivaUInt32LiteralNode.
+ *
+ * Since: 0.12.0
+ */
+GGandivaUInt32LiteralNode *
+ggandiva_uint32_literal_node_new(guint32 value)
+{
+  auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
+  return GGANDIVA_UINT32_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
+                                                                    NULL));
+}
+
+/**
+ * ggandiva_uint32_literal_node_get_value:
+ * @node: A #GGandivaUInt32LiteralNode.
+ *
+ * Returns: The value of the 32-bit unsigned integer literal.
+ *
+ * Since: 0.12.0
+ */
+guint32
+ggandiva_uint32_literal_node_get_value(GGandivaUInt32LiteralNode *node)
+{
+  return ggandiva_literal_node_get<uint32_t>(GGANDIVA_LITERAL_NODE(node));
+}
+
+
+G_DEFINE_TYPE(GGandivaInt64LiteralNode,
+              ggandiva_int64_literal_node,
+              GGANDIVA_TYPE_LITERAL_NODE)
+
+static void
+ggandiva_int64_literal_node_init(GGandivaInt64LiteralNode *int64_literal_node)
+{
+}
+
+static void
+ggandiva_int64_literal_node_class_init(GGandivaInt64LiteralNodeClass *klass)
+{
+}
+
+/**
+ * ggandiva_int64_literal_node_new:
+ * @value: The value of the 64-bit integer literal.
+ *
+ * Returns: A newly created #GGandivaInt64LiteralNode.
+ *
+ * Since: 0.12.0
+ */
+GGandivaInt64LiteralNode *
+ggandiva_int64_literal_node_new(gint64 value)
+{
+  auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
+  return GGANDIVA_INT64_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
+                                                                   NULL));
+}
+
+/**
+ * ggandiva_int64_literal_node_get_value:
+ * @node: A #GGandivaInt64LiteralNode.
+ *
+ * Returns: The value of the 64-bit integer literal.
+ *
+ * Since: 0.12.0
+ */
+gint64
+ggandiva_int64_literal_node_get_value(GGandivaInt64LiteralNode *node)
+{
+  return ggandiva_literal_node_get<int64_t>(GGANDIVA_LITERAL_NODE(node));
+}
+
+
+G_DEFINE_TYPE(GGandivaUInt64LiteralNode,
+              ggandiva_uint64_literal_node,
+              GGANDIVA_TYPE_LITERAL_NODE)
+
+static void
+ggandiva_uint64_literal_node_init(GGandivaUInt64LiteralNode *uint64_literal_node)
+{
+}
+
+static void
+ggandiva_uint64_literal_node_class_init(GGandivaUInt64LiteralNodeClass *klass)
+{
+}
+
+/**
+ * ggandiva_uint64_literal_node_new:
+ * @value: The value of the 64-bit unsigned integer literal.
+ *
+ * Returns: A newly created #GGandivaUInt64LiteralNode.
+ *
+ * Since: 0.12.0
+ */
+GGandivaUInt64LiteralNode *
+ggandiva_uint64_literal_node_new(guint64 value)
+{
+  auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
+  return GGANDIVA_UINT64_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
+                                                                    NULL));
+}
+
+/**
+ * ggandiva_uint64_literal_node_get_value:
+ * @node: A #GGandivaUInt64LiteralNode.
+ *
+ * Returns: The value of the 64-bit unsigned integer literal.
+ *
+ * Since: 0.12.0
+ */
+guint64
+ggandiva_uint64_literal_node_get_value(GGandivaUInt64LiteralNode *node)
+{
+  return ggandiva_literal_node_get<uint64_t>(GGANDIVA_LITERAL_NODE(node));
+}
+
+
+G_DEFINE_TYPE(GGandivaFloatLiteralNode,
+              ggandiva_float_literal_node,
+              GGANDIVA_TYPE_LITERAL_NODE)
+
+static void
+ggandiva_float_literal_node_init(GGandivaFloatLiteralNode *float_literal_node)
+{
+}
+
+static void
+ggandiva_float_literal_node_class_init(GGandivaFloatLiteralNodeClass *klass)
+{
+}
+
+/**
+ * ggandiva_float_literal_node_new:
+ * @value: The value of the 32-bit floating point literal.
+ *
+ * Returns: A newly created #GGandivaFloatLiteralNode.
+ *
+ * Since: 0.12.0
+ */
+GGandivaFloatLiteralNode *
+ggandiva_float_literal_node_new(gfloat value)
+{
+  auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
+  return GGANDIVA_FLOAT_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
+                                                                   NULL));
+}
+
+/**
+ * ggandiva_float_literal_node_get_value:
+ * @node: A #GGandivaFloatLiteralNode.
+ *
+ * Returns: The value of the 32-bit floating point literal.
+ *
+ * Since: 0.12.0
+ */
+gfloat
+ggandiva_float_literal_node_get_value(GGandivaFloatLiteralNode *node)
+{
+  return ggandiva_literal_node_get<float>(GGANDIVA_LITERAL_NODE(node));
+}
+
+
+G_DEFINE_TYPE(GGandivaDoubleLiteralNode,
+              ggandiva_double_literal_node,
+              GGANDIVA_TYPE_LITERAL_NODE)
+
+static void
+ggandiva_double_literal_node_init(GGandivaDoubleLiteralNode *double_literal_node)
+{
+}
+
+static void
+ggandiva_double_literal_node_class_init(GGandivaDoubleLiteralNodeClass *klass)
+{
+}
+
+/**
+ * ggandiva_double_literal_node_new:
+ * @value: The value of the 64-bit floating point literal.
+ *
+ * Returns: A newly created #GGandivaDoubleLiteralNode.
+ *
+ * Since: 0.12.0
+ */
+GGandivaDoubleLiteralNode *
+ggandiva_double_literal_node_new(gdouble value)
+{
+  auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
+  return GGANDIVA_DOUBLE_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
+                                                                    NULL));
+}
+
+/**
+ * ggandiva_double_literal_node_get_value:
+ * @node: A #GGandivaDoubleLiteralNode.
+ *
+ * Returns: The value of the 64-bit floating point literal.
+ *
+ * Since: 0.12.0
+ */
+gdouble
+ggandiva_double_literal_node_get_value(GGandivaDoubleLiteralNode *node)
+{
+  return ggandiva_literal_node_get<double>(GGANDIVA_LITERAL_NODE(node));
+}
+
+
+typedef struct GGandivaBinaryLiteralNodePrivate_ {
+  GBytes *value;
+} GGandivaBinaryLiteralNodePrivate;
+
+G_DEFINE_TYPE_WITH_PRIVATE(GGandivaBinaryLiteralNode,
+                           ggandiva_binary_literal_node,
+                           GGANDIVA_TYPE_LITERAL_NODE)
+
+#define GGANDIVA_BINARY_LITERAL_NODE_GET_PRIVATE(object)                \
+  static_cast<GGandivaBinaryLiteralNodePrivate *>(                      \
+    ggandiva_binary_literal_node_get_instance_private(                  \
+      GGANDIVA_BINARY_LITERAL_NODE(object)))
+
+static void
+ggandiva_binary_literal_node_dispose(GObject *object)
+{
+  auto priv = GGANDIVA_BINARY_LITERAL_NODE_GET_PRIVATE(object);
+
+  if (priv->value) {
+    g_bytes_unref(priv->value);
+    priv->value = nullptr;
+  }
+
+  G_OBJECT_CLASS(ggandiva_binary_literal_node_parent_class)->dispose(object);
+}
+
+static void
+ggandiva_binary_literal_node_init(GGandivaBinaryLiteralNode *binary_literal_node)
+{
+}
+
+static void
+ggandiva_binary_literal_node_class_init(GGandivaBinaryLiteralNodeClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose = ggandiva_binary_literal_node_dispose;
+}
+
+/**
+ * ggandiva_binary_literal_node_new:
+ * @value: (array length=size): The value of the binary literal.
+ * @size: The number of bytes of the value.
+ *
+ * Returns: A newly created #GGandivaBinaryLiteralNode.
+ *
+ * Since: 0.12.0
+ */
+GGandivaBinaryLiteralNode *
+ggandiva_binary_literal_node_new(const guint8 *value,
+                                 gsize size)
+{
+  auto gandiva_node =
+    gandiva::TreeExprBuilder::MakeBinaryLiteral(std::string(reinterpret_cast<const char *>(value),
+                                                            size));
+  return GGANDIVA_BINARY_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
+                                                                    NULL));
+}
+
+/**
+ * ggandiva_binary_literal_node_new_bytes:
+ * @value: The value of the binary literal.
+ *
+ * Returns: A newly created #GGandivaBinaryLiteralNode.
+ *
+ * Since: 0.12.0
+ */
+GGandivaBinaryLiteralNode *
+ggandiva_binary_literal_node_new_bytes(GBytes *value)
+{
+  size_t value_size;
+  auto raw_value = g_bytes_get_data(value, &value_size);
+  auto gandiva_node =
+    gandiva::TreeExprBuilder::MakeBinaryLiteral(
+      std::string(reinterpret_cast<const char *>(raw_value),
+                  value_size));
+  auto literal_node = ggandiva_literal_node_new_raw(&gandiva_node,
+                                                    NULL);
+  auto priv = GGANDIVA_BINARY_LITERAL_NODE_GET_PRIVATE(literal_node);
+  priv->value = value;
+  g_bytes_ref(priv->value);
+  return GGANDIVA_BINARY_LITERAL_NODE(literal_node);
+}
+
+/**
+ * ggandiva_binary_literal_node_get_value:
+ * @node: A #GGandivaBinaryLiteralNode.
+ *
+ * Returns: (transfer none): The value of the binary literal.
+ *
+ * Since: 0.12.0
+ */
+GBytes *
+ggandiva_binary_literal_node_get_value(GGandivaBinaryLiteralNode *node)
+{
+  auto priv = GGANDIVA_BINARY_LITERAL_NODE_GET_PRIVATE(node);
+  if (!priv->value) {
+    auto value = ggandiva_literal_node_get<std::string>(GGANDIVA_LITERAL_NODE(node));
+    priv->value = g_bytes_new(value.data(), value.size());
+  }
+
+  return priv->value;
+}
+
+
+G_DEFINE_TYPE(GGandivaStringLiteralNode,
+              ggandiva_string_literal_node,
+              GGANDIVA_TYPE_LITERAL_NODE)
+
+static void
+ggandiva_string_literal_node_init(GGandivaStringLiteralNode *string_literal_node)
+{
+}
+
+static void
+ggandiva_string_literal_node_class_init(GGandivaStringLiteralNodeClass *klass)
+{
+}
+
+/**
+ * ggandiva_string_literal_node_new:
+ * @value: The value of the UTF-8 encoded string literal.
+ *
+ * Returns: A newly created #GGandivaStringLiteralNode.
+ *
+ * Since: 0.12.0
+ */
+GGandivaStringLiteralNode *
+ggandiva_string_literal_node_new(const gchar *value)
+{
+  auto gandiva_node = gandiva::TreeExprBuilder::MakeStringLiteral(value);
+  return GGANDIVA_STRING_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
+                                                                    NULL));
+}
+
+/**
+ * ggandiva_string_literal_node_get_value:
+ * @node: A #GGandivaStringLiteralNode.
+ *
+ * Returns: The value of the UTF-8 encoded string literal.
+ *
+ * Since: 0.12.0
+ */
+const gchar *
+ggandiva_string_literal_node_get_value(GGandivaStringLiteralNode *node)
+{
+  auto value = ggandiva_literal_node_get<std::string>(GGANDIVA_LITERAL_NODE(node));
+  return value.c_str();
+}
+
+
+typedef struct GGandivaIfNodePrivate_ {
+  GGandivaNode *condition_node;
+  GGandivaNode *then_node;
+  GGandivaNode *else_node;
+} GGandivaIfNodePrivate;
+
+enum {
+  PROP_CONDITION_NODE = 1,
+  PROP_THEN_NODE,
+  PROP_ELSE_NODE,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GGandivaIfNode,
+                           ggandiva_if_node,
+                           GGANDIVA_TYPE_NODE)
+
+#define GGANDIVA_IF_NODE_GET_PRIVATE(object)                 \
+  static_cast<GGandivaIfNodePrivate *>(                      \
+    ggandiva_if_node_get_instance_private(                   \
+      GGANDIVA_IF_NODE(object)))
+
+static void
+ggandiva_if_node_dispose(GObject *object)
+{
+  auto priv = GGANDIVA_IF_NODE_GET_PRIVATE(object);
+
+  if (priv->condition_node) {
+    g_object_unref(priv->condition_node);
+    priv->condition_node = nullptr;
+  }
+
+  if (priv->then_node) {
+    g_object_unref(priv->then_node);
+    priv->then_node = nullptr;
+  }
+
+  if (priv->else_node) {
+    g_object_unref(priv->else_node);
+    priv->else_node = nullptr;
+  }
+
+  G_OBJECT_CLASS(ggandiva_if_node_parent_class)->dispose(object);
+}
+
+static void
+ggandiva_if_node_set_property(GObject *object,
+                              guint prop_id,
+                              const GValue *value,
+                              GParamSpec *pspec)
+{
+  auto priv = GGANDIVA_IF_NODE_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_CONDITION_NODE:
+    priv->condition_node = GGANDIVA_NODE(g_value_dup_object(value));
+    break;
+  case PROP_THEN_NODE:
+    priv->then_node = GGANDIVA_NODE(g_value_dup_object(value));
+    break;
+  case PROP_ELSE_NODE:
+    priv->else_node = GGANDIVA_NODE(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+ggandiva_if_node_get_property(GObject *object,
+                              guint prop_id,
+                              GValue *value,
+                              GParamSpec *pspec)
+{
+  auto priv = GGANDIVA_IF_NODE_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_CONDITION_NODE:
+    g_value_set_object(value, priv->condition_node);
+    break;
+  case PROP_THEN_NODE:
+    g_value_set_object(value, priv->then_node);
+    break;
+  case PROP_ELSE_NODE:
+    g_value_set_object(value, priv->else_node);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+ggandiva_if_node_init(GGandivaIfNode *if_node)
+{
+}
+
+static void
+ggandiva_if_node_class_init(GGandivaIfNodeClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose      = ggandiva_if_node_dispose;
+  gobject_class->set_property = ggandiva_if_node_set_property;
+  gobject_class->get_property = ggandiva_if_node_get_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_object("condition-node",
+                             "Condition node",
+                             "The condition node",
+                             GGANDIVA_TYPE_NODE,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_CONDITION_NODE, spec);
+
+  spec = g_param_spec_object("then-node",
+                             "Then node",
+                             "The then node",
+                             GGANDIVA_TYPE_NODE,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_THEN_NODE, spec);
+
+  spec = g_param_spec_object("else-node",
+                             "Else node",
+                             "The else node",
+                             GGANDIVA_TYPE_NODE,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_ELSE_NODE, spec);
+}
+
+/**
+ * ggandiva_if_node_new:
+ * @condition_node: the node with the condition for if-else expression.
+ * @then_node: the node in case the condition node is true.
+ * @else_node: the node in case the condition node is false.
+ * @return_type: A #GArrowDataType.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GGandivaIfNode or %NULl on error.
+ *
+ * Since: 0.12.0
+ */
+GGandivaIfNode *
+ggandiva_if_node_new(GGandivaNode *condition_node,
+                     GGandivaNode *then_node,
+                     GGandivaNode *else_node,
+                     GArrowDataType *return_type,
+                     GError **error)
+{
+  if (!condition_node || !then_node || !else_node || !return_type) {
+    /* TODO: Improve error message to show which arguments are invalid. */
+    g_set_error(error,
+                GARROW_ERROR,
+                GARROW_ERROR_INVALID,
+                "[gandiva][if-literal-node][new] "
+                "all arguments must not NULL");
+    return NULL;
+  }
+  auto gandiva_condition_node = ggandiva_node_get_raw(condition_node);
+  auto gandiva_then_node = ggandiva_node_get_raw(then_node);
+  auto gandiva_else_node = ggandiva_node_get_raw(else_node);
+  auto arrow_return_type = garrow_data_type_get_raw(return_type);
+  auto gandiva_node = gandiva::TreeExprBuilder::MakeIf(gandiva_condition_node,
+                                                       gandiva_then_node,
+                                                       gandiva_else_node,
+                                                       arrow_return_type);
+  if (!gandiva_node) {
+    g_set_error(error,
+                GARROW_ERROR,
+                GARROW_ERROR_INVALID,
+                "[gandiva][if-literal-node][new] "
+                "failed to create: if (<%s>) {<%s>} else {<%s>} -> <%s>",
+                gandiva_condition_node->ToString().c_str(),
+                gandiva_then_node->ToString().c_str(),
+                gandiva_else_node->ToString().c_str(),
+                arrow_return_type->ToString().c_str());
+    return NULL;
+  }
+  return ggandiva_if_node_new_raw(&gandiva_node,
+                                  condition_node,
+                                  then_node,
+                                  else_node,
+                                  return_type);
+}
+
+G_END_DECLS
+
+std::shared_ptr<gandiva::Node>
+ggandiva_node_get_raw(GGandivaNode *node)
+{
+  auto priv = GGANDIVA_NODE_GET_PRIVATE(node);
+  return priv->node;
+}
+
+GGandivaFieldNode *
+ggandiva_field_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node,
+                            GArrowField *field)
+{
+  auto arrow_return_type = (*gandiva_node)->return_type();
+  auto return_type = garrow_field_get_data_type(field);
+  auto field_node = g_object_new(GGANDIVA_TYPE_FIELD_NODE,
+                                 "node", gandiva_node,
+                                 "field", field,
+                                 "return-type", return_type,
+                                 NULL);
+  return GGANDIVA_FIELD_NODE(field_node);
+}
+
+GGandivaFunctionNode *
+ggandiva_function_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node,
+                               const gchar *name,
+                               GList *parameters,
+                               GArrowDataType *return_type)
+{
+  auto function_node = g_object_new(GGANDIVA_TYPE_FUNCTION_NODE,
+                                    "node", gandiva_node,
+                                    "name", name,
+                                    "return-type", return_type,
+                                    NULL);
+  auto priv = GGANDIVA_FUNCTION_NODE_GET_PRIVATE(function_node);
+  for (auto node = parameters; node; node = g_list_next(node)) {
+    auto parameter = GGANDIVA_NODE(node->data);
+    priv->parameters = g_list_prepend(priv->parameters, g_object_ref(parameter));
+  }
+  priv->parameters = g_list_reverse(priv->parameters);
+  return GGANDIVA_FUNCTION_NODE(function_node);
+}
+
+GGandivaLiteralNode *
+ggandiva_literal_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node,
+                              GArrowDataType *return_type)
+{
+  auto gandiva_literal_node =
+    std::static_pointer_cast<gandiva::LiteralNode>(*gandiva_node);
+
+  GGandivaLiteralNode *literal_node;
+  if (gandiva_literal_node->is_null()) {
+    literal_node =
+      GGANDIVA_LITERAL_NODE(g_object_new(GGANDIVA_TYPE_NULL_LITERAL_NODE,
+                                         "node", gandiva_node,
+                                         "return-type", return_type,
+                                         NULL));
+  } else {
+    GType type;
+
+    auto arrow_return_type = gandiva_literal_node->return_type();
+    switch (arrow_return_type->id()) {
+    case arrow::Type::BOOL:
+      type = GGANDIVA_TYPE_BOOLEAN_LITERAL_NODE;
+      break;
+    case arrow::Type::type::UINT8:
+      type = GGANDIVA_TYPE_UINT8_LITERAL_NODE;
+      break;
+    case arrow::Type::type::UINT16:
+      type = GGANDIVA_TYPE_UINT16_LITERAL_NODE;
+      break;
+    case arrow::Type::type::UINT32:
+      type = GGANDIVA_TYPE_UINT32_LITERAL_NODE;
+      break;
+    case arrow::Type::type::UINT64:
+      type = GGANDIVA_TYPE_UINT64_LITERAL_NODE;
+      break;
+    case arrow::Type::type::INT8:
+      type = GGANDIVA_TYPE_INT8_LITERAL_NODE;
+      break;
+    case arrow::Type::type::INT16:
+      type = GGANDIVA_TYPE_INT16_LITERAL_NODE;
+      break;
+    case arrow::Type::type::INT32:
+      type = GGANDIVA_TYPE_INT32_LITERAL_NODE;
+      break;
+    case arrow::Type::type::INT64:
+      type = GGANDIVA_TYPE_INT64_LITERAL_NODE;
+      break;
+    case arrow::Type::type::FLOAT:
+      type = GGANDIVA_TYPE_FLOAT_LITERAL_NODE;
+      break;
+    case arrow::Type::type::DOUBLE:
+      type = GGANDIVA_TYPE_DOUBLE_LITERAL_NODE;
+      break;
+    case arrow::Type::type::STRING:
+      type = GGANDIVA_TYPE_STRING_LITERAL_NODE;
+      break;
+    case arrow::Type::type::BINARY:
+      type = GGANDIVA_TYPE_BINARY_LITERAL_NODE;
+      break;
+    default:
+      type = GGANDIVA_TYPE_LITERAL_NODE;
+      break;
+    }
+
+    if (return_type) {
+      literal_node =
+        GGANDIVA_LITERAL_NODE(g_object_new(type,
+                                           "node", gandiva_node,
+                                           "return-type", return_type,
+                                           NULL));
+    } else {
+      return_type = garrow_data_type_new_raw(&arrow_return_type);
+      literal_node =
+        GGANDIVA_LITERAL_NODE(g_object_new(type,
+                                           "node", gandiva_node,
+                                           "return-type", return_type,
+                                           NULL));
+      g_object_unref(return_type);
+    }
+  }
+
+  return literal_node;
+}
+
+GGandivaIfNode *
+ggandiva_if_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node,
+                         GGandivaNode *condition_node,
+                         GGandivaNode *then_node,
+                         GGandivaNode *else_node,
+                         GArrowDataType *return_type)
+{
+  auto if_node = g_object_new(GGANDIVA_TYPE_IF_NODE,
+                              "node", gandiva_node,
+                              "condition-node", condition_node,
+                              "then-node", then_node,
+                              "else-node", else_node,
+                              "return-type", return_type,
+                              NULL);
+  return GGANDIVA_IF_NODE(if_node);
 }
diff --git a/c_glib/gandiva-glib/node.h b/c_glib/gandiva-glib/node.h
index 98ab3afb6ae8f..ffcf41da10b21 100644
--- a/c_glib/gandiva-glib/node.h
+++ b/c_glib/gandiva-glib/node.h
@@ -35,6 +35,7 @@ struct _GGandivaNodeClass
   GObjectClass parent_class;
 };
 
+
 #define GGANDIVA_TYPE_FIELD_NODE (ggandiva_field_node_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaFieldNode,
                          ggandiva_field_node,
@@ -67,4 +68,275 @@ ggandiva_function_node_new(const gchar *name,
 GList *
 ggandiva_function_node_get_parameters(GGandivaFunctionNode *node);
 
+
+#define GGANDIVA_TYPE_LITERAL_NODE (ggandiva_literal_node_get_type())
+G_DECLARE_DERIVABLE_TYPE(GGandivaLiteralNode,
+                         ggandiva_literal_node,
+                         GGANDIVA,
+                         LITERAL_NODE,
+                         GGandivaNode)
+struct _GGandivaLiteralNodeClass
+{
+  GGandivaNodeClass parent_class;
+};
+
+
+#define GGANDIVA_TYPE_NULL_LITERAL_NODE (ggandiva_null_literal_node_get_type())
+G_DECLARE_DERIVABLE_TYPE(GGandivaNullLiteralNode,
+                         ggandiva_null_literal_node,
+                         GGANDIVA,
+                         NULL_LITERAL_NODE,
+                         GGandivaLiteralNode)
+struct _GGandivaNullLiteralNodeClass
+{
+  GGandivaLiteralNodeClass parent_class;
+};
+
+GGandivaNullLiteralNode *
+ggandiva_null_literal_node_new(GArrowDataType *return_type,
+                               GError **error);
+
+
+#define GGANDIVA_TYPE_BOOLEAN_LITERAL_NODE (ggandiva_boolean_literal_node_get_type())
+G_DECLARE_DERIVABLE_TYPE(GGandivaBooleanLiteralNode,
+                         ggandiva_boolean_literal_node,
+                         GGANDIVA,
+                         BOOLEAN_LITERAL_NODE,
+                         GGandivaLiteralNode)
+struct _GGandivaBooleanLiteralNodeClass
+{
+  GGandivaLiteralNodeClass parent_class;
+};
+
+GGandivaBooleanLiteralNode *
+ggandiva_boolean_literal_node_new(gboolean value);
+gboolean
+ggandiva_boolean_literal_node_get_value(GGandivaBooleanLiteralNode *node);
+
+
+#define GGANDIVA_TYPE_INT8_LITERAL_NODE (ggandiva_int8_literal_node_get_type())
+G_DECLARE_DERIVABLE_TYPE(GGandivaInt8LiteralNode,
+                         ggandiva_int8_literal_node,
+                         GGANDIVA,
+                         INT8_LITERAL_NODE,
+                         GGandivaLiteralNode)
+struct _GGandivaInt8LiteralNodeClass
+{
+  GGandivaLiteralNodeClass parent_class;
+};
+
+GGandivaInt8LiteralNode *
+ggandiva_int8_literal_node_new(gint8 value);
+gint8
+ggandiva_int8_literal_node_get_value(GGandivaInt8LiteralNode *node);
+
+
+#define GGANDIVA_TYPE_UINT8_LITERAL_NODE (ggandiva_uint8_literal_node_get_type())
+G_DECLARE_DERIVABLE_TYPE(GGandivaUInt8LiteralNode,
+                         ggandiva_uint8_literal_node,
+                         GGANDIVA,
+                         UINT8_LITERAL_NODE,
+                         GGandivaLiteralNode)
+struct _GGandivaUInt8LiteralNodeClass
+{
+  GGandivaLiteralNodeClass parent_class;
+};
+
+GGandivaUInt8LiteralNode *
+ggandiva_uint8_literal_node_new(guint8 value);
+guint8
+ggandiva_uint8_literal_node_get_value(GGandivaUInt8LiteralNode *node);
+
+
+#define GGANDIVA_TYPE_INT16_LITERAL_NODE (ggandiva_int16_literal_node_get_type())
+G_DECLARE_DERIVABLE_TYPE(GGandivaInt16LiteralNode,
+                         ggandiva_int16_literal_node,
+                         GGANDIVA,
+                         INT16_LITERAL_NODE,
+                         GGandivaLiteralNode)
+struct _GGandivaInt16LiteralNodeClass
+{
+  GGandivaLiteralNodeClass parent_class;
+};
+
+GGandivaInt16LiteralNode *
+ggandiva_int16_literal_node_new(gint16 value);
+gint16
+ggandiva_int16_literal_node_get_value(GGandivaInt16LiteralNode *node);
+
+
+#define GGANDIVA_TYPE_UINT16_LITERAL_NODE (ggandiva_uint16_literal_node_get_type())
+G_DECLARE_DERIVABLE_TYPE(GGandivaUInt16LiteralNode,
+                         ggandiva_uint16_literal_node,
+                         GGANDIVA,
+                         UINT16_LITERAL_NODE,
+                         GGandivaLiteralNode)
+struct _GGandivaUInt16LiteralNodeClass
+{
+  GGandivaLiteralNodeClass parent_class;
+};
+
+GGandivaUInt16LiteralNode *
+ggandiva_uint16_literal_node_new(guint16 value);
+guint16
+ggandiva_uint16_literal_node_get_value(GGandivaUInt16LiteralNode *node);
+
+
+#define GGANDIVA_TYPE_INT32_LITERAL_NODE (ggandiva_int32_literal_node_get_type())
+G_DECLARE_DERIVABLE_TYPE(GGandivaInt32LiteralNode,
+                         ggandiva_int32_literal_node,
+                         GGANDIVA,
+                         INT32_LITERAL_NODE,
+                         GGandivaLiteralNode)
+struct _GGandivaInt32LiteralNodeClass
+{
+  GGandivaLiteralNodeClass parent_class;
+};
+
+GGandivaInt32LiteralNode *
+ggandiva_int32_literal_node_new(gint32 value);
+gint32
+ggandiva_int32_literal_node_get_value(GGandivaInt32LiteralNode *node);
+
+
+#define GGANDIVA_TYPE_UINT32_LITERAL_NODE (ggandiva_uint32_literal_node_get_type())
+G_DECLARE_DERIVABLE_TYPE(GGandivaUInt32LiteralNode,
+                         ggandiva_uint32_literal_node,
+                         GGANDIVA,
+                         UINT32_LITERAL_NODE,
+                         GGandivaLiteralNode)
+struct _GGandivaUInt32LiteralNodeClass
+{
+  GGandivaLiteralNodeClass parent_class;
+};
+
+GGandivaUInt32LiteralNode *
+ggandiva_uint32_literal_node_new(guint32 value);
+guint32
+ggandiva_uint32_literal_node_get_value(GGandivaUInt32LiteralNode *node);
+
+
+#define GGANDIVA_TYPE_INT64_LITERAL_NODE (ggandiva_int64_literal_node_get_type())
+G_DECLARE_DERIVABLE_TYPE(GGandivaInt64LiteralNode,
+                         ggandiva_int64_literal_node,
+                         GGANDIVA,
+                         INT64_LITERAL_NODE,
+                         GGandivaLiteralNode)
+struct _GGandivaInt64LiteralNodeClass
+{
+  GGandivaLiteralNodeClass parent_class;
+};
+
+GGandivaInt64LiteralNode *
+ggandiva_int64_literal_node_new(gint64 value);
+gint64
+ggandiva_int64_literal_node_get_value(GGandivaInt64LiteralNode *node);
+
+
+#define GGANDIVA_TYPE_UINT64_LITERAL_NODE (ggandiva_uint64_literal_node_get_type())
+G_DECLARE_DERIVABLE_TYPE(GGandivaUInt64LiteralNode,
+                         ggandiva_uint64_literal_node,
+                         GGANDIVA,
+                         UINT64_LITERAL_NODE,
+                         GGandivaLiteralNode)
+struct _GGandivaUInt64LiteralNodeClass
+{
+  GGandivaLiteralNodeClass parent_class;
+};
+
+GGandivaUInt64LiteralNode *
+ggandiva_uint64_literal_node_new(guint64 value);
+guint64
+ggandiva_uint64_literal_node_get_value(GGandivaUInt64LiteralNode *node);
+
+
+#define GGANDIVA_TYPE_FLOAT_LITERAL_NODE (ggandiva_float_literal_node_get_type())
+G_DECLARE_DERIVABLE_TYPE(GGandivaFloatLiteralNode,
+                         ggandiva_float_literal_node,
+                         GGANDIVA,
+                         FLOAT_LITERAL_NODE,
+                         GGandivaLiteralNode)
+struct _GGandivaFloatLiteralNodeClass
+{
+  GGandivaLiteralNodeClass parent_class;
+};
+
+GGandivaFloatLiteralNode *
+ggandiva_float_literal_node_new(gfloat value);
+gfloat
+ggandiva_float_literal_node_get_value(GGandivaFloatLiteralNode *node);
+
+
+#define GGANDIVA_TYPE_DOUBLE_LITERAL_NODE (ggandiva_double_literal_node_get_type())
+G_DECLARE_DERIVABLE_TYPE(GGandivaDoubleLiteralNode,
+                         ggandiva_double_literal_node,
+                         GGANDIVA,
+                         DOUBLE_LITERAL_NODE,
+                         GGandivaLiteralNode)
+struct _GGandivaDoubleLiteralNodeClass
+{
+  GGandivaLiteralNodeClass parent_class;
+};
+
+GGandivaDoubleLiteralNode *
+ggandiva_double_literal_node_new(gdouble value);
+gdouble
+ggandiva_double_literal_node_get_value(GGandivaDoubleLiteralNode *node);
+
+
+#define GGANDIVA_TYPE_BINARY_LITERAL_NODE (ggandiva_binary_literal_node_get_type())
+G_DECLARE_DERIVABLE_TYPE(GGandivaBinaryLiteralNode,
+                         ggandiva_binary_literal_node,
+                         GGANDIVA,
+                         BINARY_LITERAL_NODE,
+                         GGandivaLiteralNode)
+struct _GGandivaBinaryLiteralNodeClass
+{
+  GGandivaLiteralNodeClass parent_class;
+};
+
+GGandivaBinaryLiteralNode *
+ggandiva_binary_literal_node_new(const guint8 *value,
+                                 gsize size);
+GGandivaBinaryLiteralNode *
+ggandiva_binary_literal_node_new_bytes(GBytes *value);
+GBytes *
+ggandiva_binary_literal_node_get_value(GGandivaBinaryLiteralNode *node);
+
+
+#define GGANDIVA_TYPE_STRING_LITERAL_NODE (ggandiva_string_literal_node_get_type())
+G_DECLARE_DERIVABLE_TYPE(GGandivaStringLiteralNode,
+                         ggandiva_string_literal_node,
+                         GGANDIVA,
+                         STRING_LITERAL_NODE,
+                         GGandivaLiteralNode)
+struct _GGandivaStringLiteralNodeClass
+{
+  GGandivaLiteralNodeClass parent_class;
+};
+
+GGandivaStringLiteralNode *
+ggandiva_string_literal_node_new(const gchar *value);
+const gchar *
+ggandiva_string_literal_node_get_value(GGandivaStringLiteralNode *node);
+
+
+#define GGANDIVA_TYPE_IF_NODE (ggandiva_if_node_get_type())
+G_DECLARE_DERIVABLE_TYPE(GGandivaIfNode,
+                         ggandiva_if_node,
+                         GGANDIVA,
+                         IF_NODE,
+                         GGandivaNode)
+struct _GGandivaIfNodeClass
+{
+  GGandivaNodeClass parent_class;
+};
+
+GGandivaIfNode *
+ggandiva_if_node_new(GGandivaNode *condition_node,
+                     GGandivaNode *then_node,
+                     GGandivaNode *else_node,
+                     GArrowDataType *return_type,
+                     GError **error);
+
 G_END_DECLS
diff --git a/c_glib/gandiva-glib/node.hpp b/c_glib/gandiva-glib/node.hpp
index 953c214beb9d6..9a6ae98058699 100644
--- a/c_glib/gandiva-glib/node.hpp
+++ b/c_glib/gandiva-glib/node.hpp
@@ -21,6 +21,7 @@
 
 #include <memory>
 
+#include <gandiva/node.h>
 #include <gandiva/tree_expr_builder.h>
 
 #include <gandiva-glib/node.h>
@@ -34,3 +35,12 @@ ggandiva_function_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node,
                                const gchar *name,
                                GList *parameters,
                                GArrowDataType *return_type);
+GGandivaLiteralNode *
+ggandiva_literal_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node,
+                              GArrowDataType *return_type);
+GGandivaIfNode *
+ggandiva_if_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node,
+                         GGandivaNode *condition_node,
+                         GGandivaNode *then_node,
+                         GGandivaNode *else_node,
+                         GArrowDataType *return_type);
diff --git a/c_glib/meson.build b/c_glib/meson.build
index 14136056d408c..7113534ec5915 100644
--- a/c_glib/meson.build
+++ b/c_glib/meson.build
@@ -23,8 +23,7 @@ project('arrow-glib', 'c', 'cpp',
           'cpp_std=c++11',
         ])
 
-python = find_program('python', 'python3', 'python2')
-version = run_command(python, 'tool/get-version.py').stdout().strip()
+version = '0.13.0-SNAPSHOT'
 if version.endswith('-SNAPSHOT')
   version_numbers = version.split('-')[0].split('.')
   version_tag = version.split('-')[1]
@@ -64,7 +63,7 @@ endif
 if arrow_cpp_build_lib_dir == ''
   arrow = dependency('arrow')
   have_arrow_orc = dependency('arrow-orc', required: false).found()
-  arrow_gpu = dependency('arrow-gpu', required: false)
+  arrow_cuda = dependency('arrow-cuda', required: false)
   gandiva = dependency('gandiva', required: false)
   parquet = dependency('parquet', required: false)
   plasma = dependency('plasma', required: false)
@@ -89,9 +88,9 @@ main(void)
   have_arrow_orc = cpp_compiler.links(arrow_orc_code,
                                       include_directories: base_include_directories,
                                       dependencies: [arrow])
-  arrow_gpu = cpp_compiler.find_library('arrow_gpu',
-                                        dirs: [arrow_cpp_build_lib_dir],
-                                        required: false)
+  arrow_cuda = cpp_compiler.find_library('arrow_cuda',
+                                         dirs: [arrow_cpp_build_lib_dir],
+                                         required: false)
   gandiva = cpp_compiler.find_library('gandiva',
                                       dirs: [arrow_cpp_build_lib_dir],
                                       required: false)
@@ -104,8 +103,8 @@ main(void)
 endif
 
 subdir('arrow-glib')
-if arrow_gpu.found()
-  subdir('arrow-gpu-glib')
+if arrow_cuda.found()
+  subdir('arrow-cuda-glib')
 endif
 if gandiva.found()
   subdir('gandiva-glib')
@@ -136,7 +135,7 @@ test('unit test',
      run_test,
      env: [
        'ARROW_GLIB_TYPELIB_DIR=@0@/arrow-glib'.format(meson.build_root()),
-       'ARROW_GPU_GLIB_TYPELIB_DIR=@0@/arrow-gpu-glib'.format(meson.build_root()),
+       'ARROW_CUDA_GLIB_TYPELIB_DIR=@0@/arrow-cuda-glib'.format(meson.build_root()),
        'GANDIVA_GLIB_TYPELIB_DIR=@0@/gandiva-glib'.format(meson.build_root()),
        'PARQUET_GLIB_TYPELIB_DIR=@0@/parquet-glib'.format(meson.build_root()),
        'PARQUET_GLIB_TYPELIB_DIR=@0@/plasma-glib'.format(meson.build_root()),
diff --git a/c_glib/parquet-glib/arrow-file-reader.cpp b/c_glib/parquet-glib/arrow-file-reader.cpp
index 398e85b02c08a..5c16e827fc14b 100644
--- a/c_glib/parquet-glib/arrow-file-reader.cpp
+++ b/c_glib/parquet-glib/arrow-file-reader.cpp
@@ -310,8 +310,8 @@ gparquet_arrow_file_reader_read_column(GParquetArrowFileReader *reader,
     return NULL;
   }
 
-  std::shared_ptr<arrow::Array> arrow_array;
-  status = parquet_arrow_file_reader->ReadColumn(column_index, &arrow_array);
+  std::shared_ptr<arrow::ChunkedArray> arrow_chunked_array;
+  status = parquet_arrow_file_reader->ReadColumn(column_index, &arrow_chunked_array);
   if (!garrow_error_check(error,
                           status,
                           "[parquet][arrow][file-reader][read-column]")) {
@@ -319,7 +319,7 @@ gparquet_arrow_file_reader_read_column(GParquetArrowFileReader *reader,
   }
 
   auto arrow_field = arrow_schema->field(0);
-  auto arrow_column = std::make_shared<arrow::Column>(arrow_field, arrow_array);
+  auto arrow_column = std::make_shared<arrow::Column>(arrow_field, arrow_chunked_array);
   return garrow_column_new_raw(&arrow_column);
 }
 
diff --git a/c_glib/plasma-glib/Makefile.am b/c_glib/plasma-glib/Makefile.am
index f797c97b094c4..d14638bc22764 100644
--- a/c_glib/plasma-glib/Makefile.am
+++ b/c_glib/plasma-glib/Makefile.am
@@ -23,13 +23,39 @@ EXTRA_DIST =					\
 
 AM_CPPFLAGS =					\
 	-I$(top_builddir)			\
-	-I$(top_srcdir)
+	-I$(top_srcdir)				\
+	-DG_LOG_DOMAIN=\"Plasma\"
 
 AM_CFLAGS =					\
 	$(GLIB_CFLAGS)				\
 	$(GARROW_CFLAGS)			\
 	$(GPLASMA_CFLAGS)
 
+PLASMA_ARROW_CUDA_LIBS =
+PLASMA_INTROSPECTION_COMPILER_ARROW_CUDA_ARGS =
+PLASMA_GIR_ARROW_CUDA_PACKAGE =
+PLASMA_GIR_ARROW_CUDA_SCANNER_ADD_INCLUDE_PATH =
+PLASMA_GIR_ARROW_CUDA_LIBS_MACOS =
+PLASMA_GIR_ARROW_CUDA_SCANNER_LIBRARY_PATH_MACOS =
+PLASMA_GIR_ARROW_CUDA_LIBS =
+if HAVE_ARROW_CUDA
+PLASMA_ARROW_CUDA_LIBS +=				\
+	$(ARROW_CUDA_LIBS)				\
+	../arrow-cuda-glib/libarrow-cuda-glib.la
+PLASMA_INTROSPECTION_COMPILER_ARROW_CUDA_ARGS +=		\
+	--includedir=$(abs_top_builddir)/arrow-cuda-glib
+PLASMA_GIR_ARROW_CUDA_PACKAGE +=		\
+	arrow-cuda-glib
+PLASMA_GIR_ARROW_CUDA_SCANNER_ADD_INCLUDE_PATH +=		\
+	--add-include-path=$(abs_top_builddir)/arrow-cuda-glib
+PLASMA_GIR_ARROW_CUDA_LIBS_MACOS +=		\
+	arrow-cuda-glib
+PLASMA_GIR_ARROW_CUDA_SCANNER_LIBRARY_PATH_MACOS +=			\
+	--library-path=$(abs_top_builddir)/arrow-cuda-glib/.libs
+PLASMA_GIR_ARROW_CUDA_LIBS +=						\
+	$(abs_top_builddir)/arrow-cuda-glib/libarrow-cuda-glib.la
+endif
+
 if HAVE_PLASMA
 lib_LTLIBRARIES =				\
 	libplasma-glib.la
@@ -49,18 +75,22 @@ libplasma_glib_la_LIBADD =			\
 	$(GLIB_LIBS)				\
 	$(ARROW_LIBS)				\
 	$(PLASMA_LIBS)				\
-	../arrow-glib/libarrow-glib.la
+	../arrow-glib/libarrow-glib.la		\
+	$(PLASMA_ARROW_CUDA_LIBS)
 
 libplasma_glib_la_headers =			\
 	client.h				\
+	object.h				\
 	plasma-glib.h
 
 libplasma_glib_la_sources =			\
 	client.cpp				\
+	object.cpp				\
 	$(libplasma_glib_la_headers)
 
-libplasma_glib_la_cpp_headers =		\
+libplasma_glib_la_cpp_headers =			\
 	client.hpp				\
+	object.hpp				\
 	plasma-glib.hpp
 
 libplasma_glib_la_SOURCES =			\
@@ -68,7 +98,7 @@ libplasma_glib_la_SOURCES =			\
 	$(libplasma_glib_la_cpp_headers)
 
 plasma_glib_includedir = $(includedir)/plasma-glib
-plasma_glib_include_HEADERS =				\
+plasma_glib_include_HEADERS =			\
 	$(libplasma_glib_la_headers)		\
 	$(libplasma_glib_la_cpp_headers)
 
@@ -84,17 +114,19 @@ INTROSPECTION_SCANNER_ARGS =
 INTROSPECTION_SCANNER_ENV =
 if USE_ARROW_BUILD_DIR
 INTROSPECTION_SCANNER_ENV +=			\
-	PKG_CONFIG_PATH=${abs_top_builddir}/arrow-glib:$(ARROW_BUILD_DIR)/src/arrow:$${PKG_CONFIG_PATH}
+	PKG_CONFIG_PATH=$(abs_top_builddir)/arrow-glib$(PLASMA_ARROW_CUDA_PKG_CONFIG_PATH):$(ARROW_BUILD_DIR)/src/arrow:$${PKG_CONFIG_PATH}
 else
 INTROSPECTION_SCANNER_ENV +=			\
-	PKG_CONFIG_PATH=${abs_top_builddir}/arrow-glib:$${PKG_CONFIG_PATH}
+	PKG_CONFIG_PATH=$(abs_top_builddir)/arrow-glib$(PLASMA_ARROW_CUDA_PKG_CONFIG_PATH):$${PKG_CONFIG_PATH}
 endif
-INTROSPECTION_COMPILER_ARGS =				\
-	--includedir=$(abs_top_builddir)/arrow-glib
+INTROSPECTION_COMPILER_ARGS =					\
+	--includedir=$(abs_top_builddir)/arrow-glib		\
+	$(PLASMA_INTROSPECTION_COMPILER_ARROW_CUDA_INCLUDEDIR)
 
 Plasma-1.0.gir: libplasma-glib.la
 Plasma_1_0_gir_PACKAGES =			\
-	arrow-glib
+	arrow-glib				\
+	$(PLASMA_GIR_ARROW_CUDA_PACKAGE)
 Plasma_1_0_gir_EXPORT_PACKAGES =		\
 	plasma-glib
 Plasma_1_0_gir_INCLUDES =			\
@@ -103,8 +135,9 @@ Plasma_1_0_gir_CFLAGS =			\
 	$(AM_CPPFLAGS)
 Plasma_1_0_gir_LIBS =
 Plasma_1_0_gir_FILES = $(libplasma_glib_la_sources)
-Plasma_1_0_gir_SCANNERFLAGS =						\
+Plasma_1_0_gir_SCANNERFLAGS =					\
 	--add-include-path=$(abs_top_builddir)/arrow-glib	\
+	$(PLASMA_GIR_ARROW_CUDA_SCANNER_ADD_INCLUDE_PATH)	\
 	--library-path=$(ARROW_LIB_DIR)				\
 	--warn-all						\
 	--identifier-prefix=GPlasma				\
@@ -112,14 +145,17 @@ Plasma_1_0_gir_SCANNERFLAGS =						\
 if OS_MACOS
 Plasma_1_0_gir_LIBS +=				\
 	arrow-glib				\
+	$(PLASMA_GIR_ARROW_CUDA_LIBS_MACOS)	\
 	plasma-glib
 Plasma_1_0_gir_SCANNERFLAGS +=					\
 	--no-libtool						\
 	--library-path=$(abs_top_builddir)/arrow-glib/.libs	\
+	$(PLASMA_GIR_ARROW_CUDA_SCANNER_LIBRARY_PATH_MACOS)	\
 	--library-path=$(abs_builddir)/.libs
 else
 Plasma_1_0_gir_LIBS +=					\
 	$(abs_top_builddir)/arrow-glib/libarrow-glib.la	\
+	$(PLASMA_GIR_ARROW_CUDA_LIBS)			\
 	libplasma-glib.la
 endif
 INTROSPECTION_GIRS += Plasma-1.0.gir
diff --git a/c_glib/plasma-glib/client.cpp b/c_glib/plasma-glib/client.cpp
index f818c971dea91..2038ea61f042a 100644
--- a/c_glib/plasma-glib/client.cpp
+++ b/c_glib/plasma-glib/client.cpp
@@ -21,47 +21,327 @@
 #  include <config.h>
 #endif
 
+#include <arrow-glib/buffer.hpp>
 #include <arrow-glib/error.hpp>
 
+#ifdef HAVE_ARROW_CUDA
+#  include <arrow-cuda-glib/cuda.hpp>
+#endif
+
 #include <plasma-glib/client.hpp>
+#include <plasma-glib/object.hpp>
 
 G_BEGIN_DECLS
 
 /**
  * SECTION: client
- * @title: Client classes
+ * @section_id: client-classes
+ * @title: Client related classes
  * @include: plasma-glib/plasma-glib.h
  *
- * #GPlasmaClient is a class for an interface with a plasma store
- * and a plasma manager.
+ * #GPlasmaClientOptions is a class for customizing plasma store
+ * connection.
+ *
+ * #GPlasmaClientCreateOptions is a class for customizing object creation.
+ *
+ * #GPlasmaClient is a class for an interface with a plasma store.
+ *
+ * Since: 0.12.0
+ */
+
+typedef struct GPlasmaClientCreatePrivate_ {
+  gint n_retries;
+} GPlasmaClientOptionsPrivate;
+
+enum {
+  PROP_N_RETRIES = 1
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GPlasmaClientOptions,
+                           gplasma_client_options,
+                           G_TYPE_OBJECT)
+
+#define GPLASMA_CLIENT_OPTIONS_GET_PRIVATE(object)      \
+  static_cast<GPlasmaClientOptionsPrivate *>(           \
+    gplasma_client_options_get_instance_private(        \
+      GPLASMA_CLIENT_OPTIONS(object)))
+
+static void
+gplasma_client_options_set_property(GObject *object,
+                                    guint prop_id,
+                                    const GValue *value,
+                                    GParamSpec *pspec)
+{
+  auto priv = GPLASMA_CLIENT_OPTIONS_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_N_RETRIES:
+    priv->n_retries = g_value_get_int(value);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gplasma_client_options_get_property(GObject *object,
+                                    guint prop_id,
+                                    GValue *value,
+                                    GParamSpec *pspec)
+{
+  auto priv = GPLASMA_CLIENT_OPTIONS_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_N_RETRIES:
+    g_value_set_int(value, priv->n_retries);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gplasma_client_options_init(GPlasmaClientOptions *object)
+{
+}
+
+static void
+gplasma_client_options_class_init(GPlasmaClientOptionsClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->set_property = gplasma_client_options_set_property;
+  gobject_class->get_property = gplasma_client_options_get_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_int("n-retries",
+                          "N retries",
+                          "The number of retries to connect plasma store. "
+                          "-1 means that the system default value is used.",
+                          -1,
+                          G_MAXINT,
+                          -1,
+                          static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                   G_PARAM_CONSTRUCT));
+  g_object_class_install_property(gobject_class, PROP_N_RETRIES, spec);
+}
+
+/**
+ * gplasma_client_options_new:
+ *
+ * Returns: A newly created #GPlasmaClientOptions.
  *
  * Since: 0.12.0
  */
+GPlasmaClientOptions *
+gplasma_client_options_new(void)
+{
+  auto options = g_object_new(GPLASMA_TYPE_CLIENT_OPTIONS,
+                              NULL);
+  return GPLASMA_CLIENT_OPTIONS(options);
+}
+
+/**
+ * gplasma_client_options_set_n_retries:
+ * @options: A #GPlasmaClientOptions.
+ * @n_retries: The number of retires on connect.
+ *
+ * Since: 0.12.0
+ */
+void
+gplasma_client_options_set_n_retries(GPlasmaClientOptions *options,
+                                     gint n_retries)
+{
+  auto priv = GPLASMA_CLIENT_OPTIONS_GET_PRIVATE(options);
+  priv->n_retries = n_retries;
+}
+
+/**
+ * gplasma_client_options_get_n_retries:
+ * @options: A #GPlasmaClientOptions.
+ *
+ * Returns: The number of retries on connect.
+ *
+ * Since: 0.12.0
+ */
+gint
+gplasma_client_options_get_n_retries(GPlasmaClientOptions *options)
+{
+  auto priv = GPLASMA_CLIENT_OPTIONS_GET_PRIVATE(options);
+  return priv->n_retries;
+}
+
+
+typedef struct GPlasmaClientCreateOptionsPrivate_ {
+  guint8 *metadata;
+  gsize metadata_size;
+  gint gpu_device;
+} GPlasmaClientCreateOptionsPrivate;
+
+enum {
+  PROP_GPU_DEVICE = 1
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GPlasmaClientCreateOptions,
+                           gplasma_client_create_options,
+                           G_TYPE_OBJECT)
+
+#define GPLASMA_CLIENT_CREATE_OPTIONS_GET_PRIVATE(object)         \
+  static_cast<GPlasmaClientCreateOptionsPrivate *>(               \
+    gplasma_client_create_options_get_instance_private(           \
+      GPLASMA_CLIENT_CREATE_OPTIONS(object)))
+
+static void
+gplasma_client_create_options_set_property(GObject *object,
+                                           guint prop_id,
+                                           const GValue *value,
+                                           GParamSpec *pspec)
+{
+  auto priv = GPLASMA_CLIENT_CREATE_OPTIONS_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_GPU_DEVICE:
+    priv->gpu_device = g_value_get_int(value);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gplasma_client_create_options_get_property(GObject *object,
+                                           guint prop_id,
+                                           GValue *value,
+                                           GParamSpec *pspec)
+{
+  auto priv = GPLASMA_CLIENT_CREATE_OPTIONS_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_GPU_DEVICE:
+    g_value_set_int(value, priv->gpu_device);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gplasma_client_create_options_init(GPlasmaClientCreateOptions *object)
+{
+}
+
+static void
+gplasma_client_create_options_class_init(GPlasmaClientCreateOptionsClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->set_property = gplasma_client_create_options_set_property;
+  gobject_class->get_property = gplasma_client_create_options_get_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_int("gpu-device",
+                          "GPU device",
+                          "The GPU device number. -1 means GPU isn't used.",
+                          -1,
+                          G_MAXINT,
+                          -1,
+                          static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                   G_PARAM_CONSTRUCT));
+  g_object_class_install_property(gobject_class, PROP_GPU_DEVICE, spec);
+}
+
+/**
+ * gplasma_client_create_options_new:
+ *
+ * Returns: A newly created #GPlasmaClientCreateOptions.
+ *
+ * Since: 0.12.0
+ */
+GPlasmaClientCreateOptions *
+gplasma_client_create_options_new(void)
+{
+  auto options = g_object_new(GPLASMA_TYPE_CLIENT_CREATE_OPTIONS,
+                              NULL);
+  return GPLASMA_CLIENT_CREATE_OPTIONS(options);
+}
+
+/**
+ * gplasma_client_create_options_set_metadata:
+ * @options: A #GPlasmaClientCreateOptions.
+ * @metadata: (nullable) (array length=size): The metadata of a created object.
+ * @size: The number of bytes of the metadata.
+ *
+ * Since: 0.12.0
+ */
+void
+gplasma_client_create_options_set_metadata(GPlasmaClientCreateOptions *options,
+                                           const guint8 *metadata,
+                                           gsize size)
+{
+  auto priv = GPLASMA_CLIENT_CREATE_OPTIONS_GET_PRIVATE(options);
+  if (priv->metadata) {
+    g_free(priv->metadata);
+  }
+  priv->metadata = static_cast<guint8 *>(g_memdup(metadata, size));
+  priv->metadata_size = size;
+}
+
+/**
+ * gplasma_client_create_options_get_metadata:
+ * @options: A #GPlasmaClientCreateOptions.
+ * @size: (nullable) (out): The number of bytes of the metadata.
+ *
+ * Returns: (nullable) (array length=size): The metadata of a created object.
+ *
+ * Since: 0.12.0
+ */
+const guint8 *
+gplasma_client_create_options_get_metadata(GPlasmaClientCreateOptions *options,
+                                           gsize *size)
+{
+  auto priv = GPLASMA_CLIENT_CREATE_OPTIONS_GET_PRIVATE(options);
+  if (size) {
+    *size = priv->metadata_size;
+  }
+  return priv->metadata;
+}
+
 
 typedef struct GPlasmaClientPrivate_ {
-  std::shared_ptr<plasma::PlasmaClient> client;
+  plasma::PlasmaClient *client;
+  bool disconnected;
 } GPlasmaClientPrivate;
 
 enum {
-  PROP_0,
-  PROP_CLIENT
+  PROP_CLIENT = 1
 };
 
 G_DEFINE_TYPE_WITH_PRIVATE(GPlasmaClient,
                            gplasma_client,
                            G_TYPE_OBJECT)
 
-#define GPLASMA_CLIENT_GET_PRIVATE(obj)         \
-  static_cast<GPlasmaClientPrivate *>(          \
-     gplasma_client_get_instance_private(       \
-       GPLASMA_CLIENT(obj)))
+#define GPLASMA_CLIENT_GET_PRIVATE(object)         \
+  static_cast<GPlasmaClientPrivate *>(             \
+    gplasma_client_get_instance_private(           \
+      GPLASMA_CLIENT(object)))
 
 static void
 gplasma_client_finalize(GObject *object)
 {
   auto priv = GPLASMA_CLIENT_GET_PRIVATE(object);
 
-  priv->client = nullptr;
+  if (!priv->disconnected) {
+    auto status = priv->client->Disconnect();
+    if (!status.ok()) {
+      g_warning("[plasma][client][finalize] Failed to disconnect: %s",
+                status.ToString().c_str());
+    }
+  }
+  delete priv->client;
 
   G_OBJECT_CLASS(gplasma_client_parent_class)->finalize(object);
 }
@@ -77,7 +357,7 @@ gplasma_client_set_property(GObject *object,
   switch (prop_id) {
   case PROP_CLIENT:
     priv->client =
-      *static_cast<std::shared_ptr<plasma::PlasmaClient> *>(g_value_get_pointer(value));
+      static_cast<plasma::PlasmaClient *>(g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -102,7 +382,7 @@ gplasma_client_class_init(GPlasmaClientClass *klass)
 
   spec = g_param_spec_pointer("client",
                               "Client",
-                              "The raw std::shared<plasma::PlasmaClient> *",
+                              "The raw plasma::PlasmaClient *",
                               static_cast<GParamFlags>(G_PARAM_WRITABLE |
                                                        G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_CLIENT, spec);
@@ -111,6 +391,7 @@ gplasma_client_class_init(GPlasmaClientClass *klass)
 /**
  * gplasma_client_new:
  * @store_socket_name: The name of the UNIX domain socket.
+ * @options: (nullable): The options to custom how to connect to plasma store.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: (nullable): A newly created #GPlasmaClient on success,
@@ -120,21 +401,200 @@ gplasma_client_class_init(GPlasmaClientClass *klass)
  */
 GPlasmaClient *
 gplasma_client_new(const gchar *store_socket_name,
+                   GPlasmaClientOptions *options,
                    GError **error)
 {
-  auto plasma_client = std::make_shared<plasma::PlasmaClient>();
-  auto status = plasma_client->Connect(store_socket_name, "");
+  auto plasma_client = new plasma::PlasmaClient();
+  int n_retries = -1;
+  if (options) {
+    n_retries = gplasma_client_options_get_n_retries(options);
+  }
+  auto status = plasma_client->Connect(store_socket_name, "", 0, n_retries);
   if (garrow_error_check(error, status, "[plasma][client][new]")) {
-    return gplasma_client_new_raw(&plasma_client);
+    return gplasma_client_new_raw(plasma_client);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * gplasma_client_create:
+ * @client: A #GPlasmaClient.
+ * @id: The ID for a newly created object.
+ * @data_size: The number of bytes of data for a newly created object.
+ * @options: (nullable): The option for creating an object.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable) (transfer full): A newly created #GPlasmaCreatedObject
+ *   on success, %NULL on error.
+ *
+ * Since: 0.12.0
+ */
+GPlasmaCreatedObject *
+gplasma_client_create(GPlasmaClient *client,
+                      GPlasmaObjectID *id,
+                      gsize data_size,
+                      GPlasmaClientCreateOptions *options,
+                      GError **error)
+{
+  const auto context = "[plasma][client][create]";
+  auto plasma_client = gplasma_client_get_raw(client);
+  auto plasma_id = gplasma_object_id_get_raw(id);
+  const uint8_t *raw_metadata = nullptr;
+  int64_t raw_metadata_size = 0;
+  int device_number = 0;
+  if (options) {
+    auto options_priv = GPLASMA_CLIENT_CREATE_OPTIONS_GET_PRIVATE(options);
+    raw_metadata = options_priv->metadata;
+    raw_metadata_size = options_priv->metadata_size;
+    if (options_priv->gpu_device >= 0) {
+#ifndef HAVE_ARROW_CUDA
+      g_set_error(error,
+                  GARROW_ERROR,
+                  GARROW_ERROR_INVALID,
+                  "%s Arrow CUDA GLib is needed to use GPU",
+                  context);
+      return NULL;
+#endif
+      device_number = options_priv->gpu_device + 1;
+    }
+  }
+  std::shared_ptr<arrow::Buffer> plasma_data;
+  auto status = plasma_client->Create(plasma_id,
+                                      data_size,
+                                      raw_metadata,
+                                      raw_metadata_size,
+                                      &plasma_data,
+                                      device_number);
+  if (garrow_error_check(error, status, context)) {
+    GArrowBuffer *data = nullptr;
+    if (device_number == 0) {
+      auto plasma_mutable_data =
+        std::static_pointer_cast<arrow::MutableBuffer>(plasma_data);
+      data = GARROW_BUFFER(garrow_mutable_buffer_new_raw(&plasma_mutable_data));
+#ifdef HAVE_ARROW_CUDA
+    } else {
+      auto plasma_cuda_data =
+        std::static_pointer_cast<arrow::cuda::CudaBuffer>(plasma_data);
+      data = GARROW_BUFFER(garrow_cuda_buffer_new_raw(&plasma_cuda_data));
+#endif
+    }
+    GArrowBuffer *metadata = nullptr;
+    if (raw_metadata_size > 0) {
+      auto plasma_metadata =
+        std::make_shared<arrow::Buffer>(raw_metadata, raw_metadata_size);
+      metadata = garrow_buffer_new_raw(&plasma_metadata);
+    }
+    return gplasma_created_object_new_raw(client,
+                                          id,
+                                          data,
+                                          metadata,
+                                          device_number - 1);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * gplasma_client_refer_object:
+ * @client: A #GPlasmaClient.
+ * @id: The ID of the target object.
+ * @timeout_ms: The timeout in milliseconds. -1 means no timeout.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable) (transfer full): A found #GPlasmaReferredObject
+ *   on success, %NULL on error.
+ *
+ * Since: 0.12.0
+ */
+GPlasmaReferredObject *
+gplasma_client_refer_object(GPlasmaClient *client,
+                            GPlasmaObjectID *id,
+                            gint64 timeout_ms,
+                            GError **error)
+{
+  const auto context = "[plasma][client][refer-object]";
+  auto plasma_client = gplasma_client_get_raw(client);
+  auto plasma_id = gplasma_object_id_get_raw(id);
+  std::vector<plasma::ObjectID> plasma_ids;
+  plasma_ids.push_back(plasma_id);
+  std::vector<plasma::ObjectBuffer> plasma_object_buffers;
+  auto status = plasma_client->Get(plasma_ids,
+                                   timeout_ms,
+                                   &plasma_object_buffers);
+  if (garrow_error_check(error, status, context)) {
+    auto plasma_object_buffer = plasma_object_buffers[0];
+    auto plasma_data = plasma_object_buffer.data;
+    auto plasma_metadata = plasma_object_buffer.metadata;
+    GArrowBuffer *data = nullptr;
+    GArrowBuffer *metadata = nullptr;
+    if (plasma_object_buffer.device_num > 0) {
+#ifdef HAVE_ARROW_CUDA
+      std::shared_ptr<arrow::cuda::CudaBuffer> plasma_cuda_data;
+      status = arrow::cuda::CudaBuffer::FromBuffer(plasma_data,
+                                                   &plasma_cuda_data);
+      if (!garrow_error_check(error, status, context)) {
+        return NULL;
+      }
+      std::shared_ptr<arrow::cuda::CudaBuffer> plasma_cuda_metadata;
+      status = arrow::cuda::CudaBuffer::FromBuffer(plasma_metadata,
+                                                  &plasma_cuda_metadata);
+      if (!garrow_error_check(error, status, context)) {
+        return NULL;
+      }
+
+      data = GARROW_BUFFER(garrow_cuda_buffer_new_raw(&plasma_cuda_data));
+      metadata =
+        GARROW_BUFFER(garrow_cuda_buffer_new_raw(&plasma_cuda_metadata));
+#else
+      g_set_error(error,
+                  GARROW_ERROR,
+                  GARROW_ERROR_INVALID,
+                  "%s Arrow CUDA GLib is needed to use GPU",
+                  context);
+      return NULL;
+#endif
+    } else {
+      data = garrow_buffer_new_raw(&plasma_data);
+      metadata = garrow_buffer_new_raw(&plasma_metadata);
+    }
+    return gplasma_referred_object_new_raw(client,
+                                           id,
+                                           data,
+                                           metadata,
+                                           plasma_object_buffer.device_num - 1);
   } else {
     return NULL;
   }
 }
 
+/**
+ * gplasma_client_disconnect:
+ * @client: A #GPlasmaClient.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+gplasma_client_disconnect(GPlasmaClient *client,
+                          GError **error)
+{
+  auto priv = GPLASMA_CLIENT_GET_PRIVATE(client);
+  auto status = priv->client->Disconnect();
+  if (garrow_error_check(error, status, "[plasma][client][disconnect]")) {
+    priv->disconnected = true;
+    return TRUE;
+  } else {
+    return FALSE;
+  }
+}
+
 G_END_DECLS
 
 GPlasmaClient *
-gplasma_client_new_raw(std::shared_ptr<plasma::PlasmaClient> *plasma_client)
+gplasma_client_new_raw(plasma::PlasmaClient *plasma_client)
 {
   auto client = g_object_new(GPLASMA_TYPE_CLIENT,
                              "client", plasma_client,
@@ -142,7 +602,7 @@ gplasma_client_new_raw(std::shared_ptr<plasma::PlasmaClient> *plasma_client)
   return GPLASMA_CLIENT(client);
 }
 
-std::shared_ptr<plasma::PlasmaClient>
+plasma::PlasmaClient *
 gplasma_client_get_raw(GPlasmaClient *client)
 {
   auto priv = GPLASMA_CLIENT_GET_PRIVATE(client);
diff --git a/c_glib/plasma-glib/client.h b/c_glib/plasma-glib/client.h
index 30c8a81aff7bb..2cb983e14e970 100644
--- a/c_glib/plasma-glib/client.h
+++ b/c_glib/plasma-glib/client.h
@@ -19,10 +19,53 @@
 
 #pragma once
 
-#include <arrow-glib/gobject-type.h>
+#include <plasma-glib/object.h>
 
 G_BEGIN_DECLS
 
+#define GPLASMA_TYPE_CLIENT_OPTIONS (gplasma_client_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GPlasmaClientOptions,
+                         gplasma_client_options,
+                         GPLASMA,
+                         CLIENT_OPTIONS,
+                         GObject)
+
+struct _GPlasmaClientOptionsClass
+{
+  GObjectClass parent_class;
+};
+
+GPlasmaClientOptions *gplasma_client_options_new(void);
+void
+gplasma_client_options_set_n_retries(GPlasmaClientOptions *options,
+                                     gint n_retries);
+gint
+gplasma_client_options_get_n_retries(GPlasmaClientOptions *options);
+
+
+#define GPLASMA_TYPE_CLIENT_CREATE_OPTIONS      \
+  (gplasma_client_create_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GPlasmaClientCreateOptions,
+                         gplasma_client_create_options,
+                         GPLASMA,
+                         CLIENT_CREATE_OPTIONS,
+                         GObject)
+
+struct _GPlasmaClientCreateOptionsClass
+{
+  GObjectClass parent_class;
+};
+
+GPlasmaClientCreateOptions *gplasma_client_create_options_new(void);
+void
+gplasma_client_create_options_set_metadata(GPlasmaClientCreateOptions *options,
+                                           const guint8 *metadata,
+                                           gsize size);
+const guint8 *
+gplasma_client_create_options_get_metadata(GPlasmaClientCreateOptions *options,
+                                           gsize *size);
+
+
 #define GPLASMA_TYPE_CLIENT (gplasma_client_get_type())
 G_DECLARE_DERIVABLE_TYPE(GPlasmaClient,
                          gplasma_client,
@@ -36,6 +79,20 @@ struct _GPlasmaClientClass
 };
 
 GPlasmaClient *gplasma_client_new(const gchar *store_socket_name,
+                                  GPlasmaClientOptions *options,
                                   GError **error);
+GPlasmaCreatedObject *
+gplasma_client_create(GPlasmaClient *client,
+                      GPlasmaObjectID *id,
+                      gsize data_size,
+                      GPlasmaClientCreateOptions *options,
+                      GError **error);
+GPlasmaReferredObject *
+gplasma_client_refer_object(GPlasmaClient *client,
+                            GPlasmaObjectID *id,
+                            gint64 timeout_ms,
+                            GError **error);
+gboolean gplasma_client_disconnect(GPlasmaClient *client,
+                                   GError **error);
 
 G_END_DECLS
diff --git a/c_glib/plasma-glib/client.hpp b/c_glib/plasma-glib/client.hpp
index 473ea16ae4444..d3e2ab2598d2a 100644
--- a/c_glib/plasma-glib/client.hpp
+++ b/c_glib/plasma-glib/client.hpp
@@ -19,11 +19,11 @@
 
 #pragma once
 
-#include <memory>
-
 #include <plasma/client.h>
 
 #include <plasma-glib/client.h>
 
-GPlasmaClient *gplasma_client_new_raw(std::shared_ptr<plasma::PlasmaClient> *plasma_client);
-std::shared_ptr<plasma::PlasmaClient> gplasma_client_get_raw(GPlasmaClient *client);
+GPlasmaClient *
+gplasma_client_new_raw(plasma::PlasmaClient *plasma_client);
+plasma::PlasmaClient *
+gplasma_client_get_raw(GPlasmaClient *client);
diff --git a/c_glib/plasma-glib/meson.build b/c_glib/plasma-glib/meson.build
index 40a20e9c7d006..75ebce870dba8 100644
--- a/c_glib/plasma-glib/meson.build
+++ b/c_glib/plasma-glib/meson.build
@@ -21,15 +21,18 @@ project_name = 'plasma-glib'
 
 sources = files(
   'client.cpp',
+  'object.cpp',
 )
 
 c_headers = files(
   'client.h',
+  'object.h',
   'plasma-glib.h',
 )
 
 cpp_headers = files(
   'client.hpp',
+  'object.hpp',
   'plasma-glib.hpp',
 )
 
@@ -41,13 +44,39 @@ dependencies = [
   plasma,
   arrow_glib,
 ]
+cpp_args = [
+  '-DG_LOG_DOMAIN="Plasma"',
+]
+pkg_config_requires = [
+  'plasma',
+  'arrow-glib',
+]
+gir_dependencies = [
+  declare_dependency(sources: arrow_glib_gir),
+]
+gir_includes = [
+  'Arrow-1.0',
+]
+gir_extra_args = [
+  '--warn-all',
+  '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
+]
+if arrow_cuda.found()
+  dependencies += [arrow_cuda_glib]
+  cpp_args += ['-DHAVE_ARROW_CUDA']
+  pkg_config_requires += ['arrow-cuda-glib']
+  gir_dependencies += [declare_dependency(sources: arrow_cuda_glib_gir)]
+  gir_includes += ['ArrowCUDA-1.0']
+  gir_extra_args += ['--include-uninstalled=./arrow-cuda-glib/ArrowCUDA-1.0.gir']
+endif
 libplasma_glib = library('plasma-glib',
-                          sources: sources,
-                          install: true,
-                          dependencies: dependencies,
-                          include_directories: base_include_directories,
-                          soversion: so_version,
-                          version: library_version)
+                         sources: sources,
+                         install: true,
+                         dependencies: dependencies,
+                         include_directories: base_include_directories,
+                         cpp_args: cpp_args,
+                         soversion: so_version,
+                         version: library_version)
 plasma_glib = declare_dependency(link_with: libplasma_glib,
                                  include_directories: base_include_directories,
                                  dependencies: dependencies)
@@ -56,22 +85,17 @@ pkgconfig.generate(filebase: project_name,
                    name: 'Apache Arrow Plasma GLib',
                    description: 'C API for Apache Arrow Plasma based on GLib',
                    version: version,
-                   requires: ['plasma', 'arrow-glib'],
+                   requires: pkg_config_requires,
                    libraries: [libplasma_glib])
 
 gnome.generate_gir(libplasma_glib,
-                   dependencies: declare_dependency(sources: arrow_glib_gir),
+                   dependencies: gir_dependencies,
                    sources: sources + c_headers,
                    namespace: 'Plasma',
                    nsversion: api_version,
                    identifier_prefix: 'GPlasma',
                    symbol_prefix: 'gplasma',
                    export_packages: 'plasma-glib',
-                   includes: [
-                     'Arrow-1.0',
-                   ],
+                   includes: gir_includes,
                    install: true,
-                   extra_args: [
-                     '--warn-all',
-                     '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
-                   ])
+                   extra_args: gir_extra_args)
diff --git a/c_glib/plasma-glib/object.cpp b/c_glib/plasma-glib/object.cpp
new file mode 100644
index 0000000000000..f7afd7231f2e5
--- /dev/null
+++ b/c_glib/plasma-glib/object.cpp
@@ -0,0 +1,538 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#include <arrow-glib/error.hpp>
+
+#include <plasma-glib/client.hpp>
+#include <plasma-glib/object.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: object
+ * @section_id: object-classes
+ * @title: Object related classes
+ * @include: plasma-glib/plasma-glib.h
+ *
+ * #GPlasmaObjectID is a class for an object ID.
+ *
+ * #GPlasmaObject is a base class for an object stored in plasma store.
+ *
+ * #GPlasmaCreatedObject is a class for a created object. You can
+ * change data of the object until the object is sealed or aborted.
+ *
+ * #GPlasmaReferredObject is a class for a created object. You can
+ * only refer the data and metadata of the object. You can't change
+ * the data of the object.
+ *
+ * Since: 0.12.0
+ */
+
+typedef struct GPlasmaObjectIDPrivate_ {
+  plasma::ObjectID id;
+} GPlasmaObjectIDPrivate;
+
+G_DEFINE_TYPE_WITH_PRIVATE(GPlasmaObjectID,
+                           gplasma_object_id,
+                           G_TYPE_OBJECT)
+
+#define GPLASMA_OBJECT_ID_GET_PRIVATE(object)   \
+  static_cast<GPlasmaObjectIDPrivate *>(        \
+    gplasma_object_id_get_instance_private(     \
+      GPLASMA_OBJECT_ID(object)))
+
+static void
+gplasma_object_id_init(GPlasmaObjectID *object)
+{
+}
+
+static void
+gplasma_object_id_class_init(GPlasmaObjectIDClass *klass)
+{
+}
+
+/**
+ * gplasma_object_id_new:
+ * @id: (array length=size): The raw ID bytes.
+ * @size: The number of bytes of the ID. It must be 1..20.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GPlasmaObjectID on success,
+ *   %NULL on error.
+ *
+ * Since: 0.12.0
+ */
+GPlasmaObjectID *
+gplasma_object_id_new(const guint8 *id,
+                      gsize size,
+                      GError **error)
+{
+  if (size == 0 || size > plasma::kUniqueIDSize) {
+    g_set_error(error,
+                GARROW_ERROR,
+                GARROW_ERROR_INVALID,
+                "[plasma][object-id][new] "
+                "ID must be 1..20 bytes: <%" G_GSIZE_FORMAT ">",
+                size);
+    return NULL;
+  }
+
+  auto object_id = g_object_new(GPLASMA_TYPE_OBJECT_ID, NULL);
+  auto priv = GPLASMA_OBJECT_ID_GET_PRIVATE(object_id);
+  memcpy(priv->id.mutable_data(), id, size);
+  if (size != plasma::kUniqueIDSize) {
+    memset(priv->id.mutable_data() + size, 0, plasma::kUniqueIDSize - size);
+  }
+  return GPLASMA_OBJECT_ID(object_id);
+}
+
+/**
+ * gplasma_object_id_to_binary:
+ * @id: A #GPlasmaObjectID.
+ * @size: (nullable) (out): The number of bytes of the byte string of
+ *   the object ID. It's always 20. 20 is `plasma::kUniqueIDSize`.
+ *
+ * Returns: (array length=size): The byte string of the object ID.
+ *
+ * Since: 0.12.0
+ */
+const guint8 *
+gplasma_object_id_to_binary(GPlasmaObjectID *id,
+                            gsize *size)
+{
+  auto priv = GPLASMA_OBJECT_ID_GET_PRIVATE(id);
+  if (size) {
+    *size = plasma::kUniqueIDSize;
+  }
+  return priv->id.data();
+}
+
+/**
+ * gplasma_object_id_to_hex:
+ * @id: A #GPlasmaObjectID.
+ *
+ * Returns: (transfer full): The hex representation of the object ID.
+ *
+ *   It should be freed with g_free() when no longer needed.
+ *
+ * Since: 0.12.0
+ */
+gchar *
+gplasma_object_id_to_hex(GPlasmaObjectID *id)
+{
+  auto priv = GPLASMA_OBJECT_ID_GET_PRIVATE(id);
+  return g_strdup(priv->id.hex().c_str());
+}
+
+typedef struct GPlasmaObjectPrivate_ {
+  GPlasmaClient *client;
+  GPlasmaObjectID *id;
+  GArrowBuffer *data;
+  GArrowBuffer *metadata;
+  gint gpu_device;
+} GPlasmaObjectPrivate;
+
+enum {
+  PROP_CLIENT = 1,
+  PROP_ID,
+  PROP_DATA,
+  PROP_METADATA,
+  PROP_GPU_DEVICE
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GPlasmaObject,
+                           gplasma_object,
+                           G_TYPE_OBJECT)
+
+#define GPLASMA_OBJECT_GET_PRIVATE(object)      \
+  static_cast<GPlasmaObjectPrivate *>(          \
+    gplasma_object_get_instance_private(        \
+      GPLASMA_OBJECT(object)))
+
+static void
+gplasma_object_dispose(GObject *object)
+{
+  auto priv = GPLASMA_OBJECT_GET_PRIVATE(object);
+
+  // Properties except priv->id must be disposed in subclass.
+
+  if (priv->id) {
+    g_object_unref(priv->id);
+    priv->id = nullptr;
+  }
+
+  G_OBJECT_CLASS(gplasma_object_parent_class)->dispose(object);
+}
+
+static void
+gplasma_object_set_property(GObject *object,
+                            guint prop_id,
+                            const GValue *value,
+                            GParamSpec *pspec)
+{
+  auto priv = GPLASMA_OBJECT_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_CLIENT:
+    priv->client = GPLASMA_CLIENT(g_value_dup_object(value));
+    break;
+  case PROP_ID:
+    priv->id = GPLASMA_OBJECT_ID(g_value_dup_object(value));
+    break;
+  case PROP_DATA:
+    priv->data = GARROW_BUFFER(g_value_dup_object(value));
+    break;
+  case PROP_METADATA:
+    priv->metadata = GARROW_BUFFER(g_value_dup_object(value));
+    break;
+  case PROP_GPU_DEVICE:
+    priv->gpu_device = g_value_get_int(value);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gplasma_object_get_property(GObject *object,
+                            guint prop_id,
+                            GValue *value,
+                            GParamSpec *pspec)
+{
+  auto priv = GPLASMA_OBJECT_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_CLIENT:
+    g_value_set_object(value, priv->client);
+    break;
+  case PROP_ID:
+    g_value_set_object(value, priv->id);
+    break;
+  case PROP_DATA:
+    g_value_set_object(value, priv->data);
+    break;
+  case PROP_METADATA:
+    g_value_set_object(value, priv->metadata);
+    break;
+  case PROP_GPU_DEVICE:
+    g_value_set_int(value, priv->gpu_device);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gplasma_object_init(GPlasmaObject *object)
+{
+}
+
+static void
+gplasma_object_class_init(GPlasmaObjectClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose      = gplasma_object_dispose;
+  gobject_class->set_property = gplasma_object_set_property;
+  gobject_class->get_property = gplasma_object_get_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_object("client",
+                             "Client",
+                             "The client",
+                             GPLASMA_TYPE_CLIENT,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_CLIENT, spec);
+
+  spec = g_param_spec_object("id",
+                             "ID",
+                             "The ID of this object",
+                             GPLASMA_TYPE_OBJECT_ID,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_ID, spec);
+
+  spec = g_param_spec_object("data",
+                             "Data",
+                             "The data of this object",
+                             GARROW_TYPE_BUFFER,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_DATA, spec);
+
+  spec = g_param_spec_object("metadata",
+                             "Metadata",
+                             "The metadata of this object",
+                             GARROW_TYPE_BUFFER,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_METADATA, spec);
+
+  spec = g_param_spec_int("gpu-device",
+                          "GPU device",
+                          "The GPU device number. -1 means GPU isn't used.",
+                          -1,
+                          G_MAXINT,
+                          -1,
+                          static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                   G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_GPU_DEVICE, spec);
+}
+
+static bool
+gplasma_object_check_not_released(GPlasmaObjectPrivate *priv,
+                                  GError **error,
+                                  const gchar *context)
+{
+  if (priv->client) {
+    return true;
+  }
+
+  auto id_priv = GPLASMA_OBJECT_ID_GET_PRIVATE(priv->id);
+  auto id_hex = id_priv->id.hex();
+  g_set_error(error,
+              GARROW_ERROR,
+              GARROW_ERROR_INVALID,
+              "%s: Can't process released object: <%s>",
+              context,
+              id_hex.c_str());
+  return false;
+}
+
+static void
+gplasma_object_release_resources(GPlasmaObjectPrivate *priv)
+{
+  if (priv->client) {
+    g_object_unref(priv->client);
+    priv->client = nullptr;
+  }
+
+  if (priv->data) {
+    g_object_unref(priv->data);
+    priv->data = nullptr;
+  }
+
+  if (priv->metadata) {
+    g_object_unref(priv->metadata);
+    priv->metadata = nullptr;
+  }
+}
+
+G_DEFINE_TYPE(GPlasmaCreatedObject,
+              gplasma_created_object,
+              GPLASMA_TYPE_OBJECT)
+
+static void
+gplasma_created_object_dispose(GObject *object)
+{
+  auto priv = GPLASMA_OBJECT_GET_PRIVATE(object);
+
+  if (priv->client) {
+    gplasma_created_object_abort(GPLASMA_CREATED_OBJECT(object), NULL);
+  }
+
+  G_OBJECT_CLASS(gplasma_created_object_parent_class)->dispose(object);
+}
+
+static void
+gplasma_created_object_init(GPlasmaCreatedObject *object)
+{
+}
+
+static void
+gplasma_created_object_class_init(GPlasmaCreatedObjectClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose = gplasma_created_object_dispose;
+}
+
+/**
+ * gplasma_created_object_seal:
+ * @object: A #GPlasmaCreatedObject.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Seals the object in the object store. You can't use the sealed
+ * object anymore.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+gplasma_created_object_seal(GPlasmaCreatedObject *object,
+                            GError **error)
+{
+  const auto context = "[plasma][created-object][seal]";
+
+  auto priv = GPLASMA_OBJECT_GET_PRIVATE(object);
+  if (!gplasma_object_check_not_released(priv, error, context)) {
+    return FALSE;
+  }
+
+  auto plasma_client = gplasma_client_get_raw(priv->client);
+  auto id_priv = GPLASMA_OBJECT_ID_GET_PRIVATE(priv->id);
+  auto status = plasma_client->Seal(id_priv->id);
+  auto success = garrow_error_check(error, status, context);
+  if (success) {
+    status = plasma_client->Release(id_priv->id);
+    success = garrow_error_check(error, status, context);
+    gplasma_object_release_resources(priv);
+  }
+  return success;
+}
+
+/**
+ * gplasma_created_object_abort:
+ * @object: A #GPlasmaCreatedObject.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Aborts the object in the object store. You can't use the aborted
+ * object anymore.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+gplasma_created_object_abort(GPlasmaCreatedObject *object,
+                             GError **error)
+{
+  const auto context = "[plasma][created-object][abort]";
+
+  auto priv = GPLASMA_OBJECT_GET_PRIVATE(object);
+  if (!gplasma_object_check_not_released(priv, error, context)) {
+    return FALSE;
+  }
+
+  auto plasma_client = gplasma_client_get_raw(priv->client);
+  auto id_priv = GPLASMA_OBJECT_ID_GET_PRIVATE(priv->id);
+  auto status = plasma_client->Release(id_priv->id);
+  auto success = garrow_error_check(error, status, context);
+  if (success) {
+    status = plasma_client->Abort(id_priv->id);
+    success = garrow_error_check(error, status, context);
+    gplasma_object_release_resources(priv);
+  }
+  return success;
+}
+
+
+G_DEFINE_TYPE(GPlasmaReferredObject,
+              gplasma_referred_object,
+              GPLASMA_TYPE_OBJECT)
+
+static void
+gplasma_referred_object_dispose(GObject *object)
+{
+  auto priv = GPLASMA_OBJECT_GET_PRIVATE(object);
+
+  gplasma_object_release_resources(priv);
+
+  G_OBJECT_CLASS(gplasma_referred_object_parent_class)->dispose(object);
+}
+
+static void
+gplasma_referred_object_init(GPlasmaReferredObject *object)
+{
+}
+
+static void
+gplasma_referred_object_class_init(GPlasmaReferredObjectClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose = gplasma_referred_object_dispose;
+}
+
+/**
+ * gplasma_referred_object_release:
+ * @object: A #GPlasmaReferredObject.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Releases the object explicitly. The object is no longer valid.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 0.12.0
+ */
+gboolean
+gplasma_referred_object_release(GPlasmaReferredObject *object,
+                                GError **error)
+{
+  const auto context = "[plasma][referred-object][release]";
+
+  auto priv = GPLASMA_OBJECT_GET_PRIVATE(object);
+  if (!gplasma_object_check_not_released(priv, error, context)) {
+    return FALSE;
+  }
+
+  gplasma_object_release_resources(priv);
+  return TRUE;
+}
+
+G_END_DECLS
+
+plasma::ObjectID
+gplasma_object_id_get_raw(GPlasmaObjectID *id)
+{
+  auto priv = GPLASMA_OBJECT_ID_GET_PRIVATE(id);
+  return priv->id;
+}
+
+GPlasmaCreatedObject *
+gplasma_created_object_new_raw(GPlasmaClient *client,
+                               GPlasmaObjectID *id,
+                               GArrowBuffer *data,
+                               GArrowBuffer *metadata,
+                               gint gpu_device)
+{
+  auto object = g_object_new(GPLASMA_TYPE_CREATED_OBJECT,
+                             "client", client,
+                             "id", id,
+                             "data", data,
+                             "metadata", metadata,
+                             "gpu-device", gpu_device,
+                             NULL);
+  return GPLASMA_CREATED_OBJECT(object);
+}
+
+GPlasmaReferredObject *
+gplasma_referred_object_new_raw(GPlasmaClient *client,
+                                GPlasmaObjectID *id,
+                                GArrowBuffer *data,
+                                GArrowBuffer *metadata,
+                                gint gpu_device)
+{
+  auto object = g_object_new(GPLASMA_TYPE_REFERRED_OBJECT,
+                             "client", client,
+                             "id", id,
+                             "data", data,
+                             "metadata", metadata,
+                             "gpu-device", gpu_device,
+                             NULL);
+  return GPLASMA_REFERRED_OBJECT(object);
+}
diff --git a/c_glib/plasma-glib/object.h b/c_glib/plasma-glib/object.h
new file mode 100644
index 0000000000000..46547d37b46e8
--- /dev/null
+++ b/c_glib/plasma-glib/object.h
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-glib/buffer.h>
+
+G_BEGIN_DECLS
+
+#define GPLASMA_TYPE_OBJECT_ID (gplasma_object_id_get_type())
+G_DECLARE_DERIVABLE_TYPE(GPlasmaObjectID,
+                         gplasma_object_id,
+                         GPLASMA,
+                         OBJECT_ID,
+                         GObject)
+
+struct _GPlasmaObjectIDClass
+{
+  GObjectClass parent_class;
+};
+
+GPlasmaObjectID *gplasma_object_id_new(const guint8 *id,
+                                       gsize size,
+                                       GError **error);
+const guint8 *gplasma_object_id_to_binary(GPlasmaObjectID *id,
+                                          gsize *size);
+gchar *gplasma_object_id_to_hex(GPlasmaObjectID *id);
+
+#define GPLASMA_TYPE_OBJECT (gplasma_object_get_type())
+G_DECLARE_DERIVABLE_TYPE(GPlasmaObject,
+                         gplasma_object,
+                         GPLASMA,
+                         OBJECT,
+                         GObject)
+
+struct _GPlasmaObjectClass
+{
+  GObjectClass parent_class;
+};
+
+#define GPLASMA_TYPE_CREATED_OBJECT (gplasma_created_object_get_type())
+G_DECLARE_DERIVABLE_TYPE(GPlasmaCreatedObject,
+                         gplasma_created_object,
+                         GPLASMA,
+                         CREATED_OBJECT,
+                         GPlasmaObject)
+
+struct _GPlasmaCreatedObjectClass
+{
+  GPlasmaObjectClass parent_class;
+};
+
+gboolean gplasma_created_object_seal(GPlasmaCreatedObject *object,
+                                     GError **error);
+gboolean gplasma_created_object_abort(GPlasmaCreatedObject *object,
+                                      GError **error);
+
+#define GPLASMA_TYPE_REFERRED_OBJECT (gplasma_referred_object_get_type())
+G_DECLARE_DERIVABLE_TYPE(GPlasmaReferredObject,
+                         gplasma_referred_object,
+                         GPLASMA,
+                         REFERRED_OBJECT,
+                         GPlasmaObject)
+
+struct _GPlasmaReferredObjectClass
+{
+  GPlasmaObjectClass parent_class;
+};
+
+gboolean gplasma_referred_object_release(GPlasmaReferredObject *object,
+                                         GError **error);
+
+G_END_DECLS
diff --git a/c_glib/plasma-glib/object.hpp b/c_glib/plasma-glib/object.hpp
new file mode 100644
index 0000000000000..9d598b2ed6b3a
--- /dev/null
+++ b/c_glib/plasma-glib/object.hpp
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <plasma/client.h>
+
+#include <plasma-glib/client.h>
+
+#include <plasma-glib/object.hpp>
+
+plasma::ObjectID
+gplasma_object_id_get_raw(GPlasmaObjectID *id);
+
+GPlasmaCreatedObject *
+gplasma_created_object_new_raw(GPlasmaClient *client,
+                               GPlasmaObjectID *id,
+                               GArrowBuffer *data,
+                               GArrowBuffer *metadata,
+                               gint gpu_device);
+
+GPlasmaReferredObject *
+gplasma_referred_object_new_raw(GPlasmaClient *client,
+                                GPlasmaObjectID *id,
+                                GArrowBuffer *data,
+                                GArrowBuffer *metadata,
+                                gint gpu_device);
diff --git a/c_glib/plasma-glib/plasma-glib.h b/c_glib/plasma-glib/plasma-glib.h
index 33eed2cc6fc3c..2a6dd76ca35b2 100644
--- a/c_glib/plasma-glib/plasma-glib.h
+++ b/c_glib/plasma-glib/plasma-glib.h
@@ -20,3 +20,4 @@
 #pragma once
 
 #include <plasma-glib/client.h>
+#include <plasma-glib/object.h>
diff --git a/c_glib/plasma-glib/plasma-glib.hpp b/c_glib/plasma-glib/plasma-glib.hpp
index b0af4899ea3de..b2958c28f863d 100644
--- a/c_glib/plasma-glib/plasma-glib.hpp
+++ b/c_glib/plasma-glib/plasma-glib.hpp
@@ -22,3 +22,4 @@
 #include <plasma-glib/plasma-glib.h>
 
 #include <plasma-glib/client.hpp>
+#include <plasma-glib/object.hpp>
diff --git a/c_glib/plasma-glib/plasma-glib.pc.in b/c_glib/plasma-glib/plasma-glib.pc.in
index 21f202c9b06fd..c82fe69580f1f 100644
--- a/c_glib/plasma-glib/plasma-glib.pc.in
+++ b/c_glib/plasma-glib/plasma-glib.pc.in
@@ -25,4 +25,4 @@ Description: C API for Apache Arrow Plasma based on GLib
 Version: @VERSION@
 Libs: -L${libdir} -lplasma-glib
 Cflags: -I${includedir}
-Requires: plasma arrow-glib
+Requires: plasma arrow-glib @ARROW_CUDA_GLIB_PACKAGE@
diff --git a/c_glib/test/gandiva/test-binary-literal-node.rb b/c_glib/test/gandiva/test-binary-literal-node.rb
new file mode 100644
index 0000000000000..fddf74830d4ab
--- /dev/null
+++ b/c_glib/test/gandiva/test-binary-literal-node.rb
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestGandivaBinaryLiteralNode < Test::Unit::TestCase
+  def setup
+    omit("Gandiva is required") unless defined?(::Gandiva)
+    @value = "\x00\x01\x02\x03\x04"
+  end
+
+  sub_test_case(".new") do
+    def test_string
+      node = Gandiva::BinaryLiteralNode.new(@value)
+      assert_equal(@value, node.value.to_s)
+    end
+
+    def test_bytes
+      bytes_value = GLib::Bytes.new(@value)
+      node = Gandiva::BinaryLiteralNode.new(bytes_value)
+      assert_equal(@value, node.value.to_s)
+    end
+  end
+
+  sub_test_case("instance methods") do
+    def setup
+      super
+      @node = Gandiva::BinaryLiteralNode.new(@value)
+    end
+
+    def test_return_type
+      assert_equal(Arrow::BinaryDataType.new, @node.return_type)
+    end
+  end
+end
diff --git a/c_glib/test/gandiva/test-boolean-literal-node.rb b/c_glib/test/gandiva/test-boolean-literal-node.rb
new file mode 100644
index 0000000000000..6e18a76218595
--- /dev/null
+++ b/c_glib/test/gandiva/test-boolean-literal-node.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestGandivaBooleanLiteralNode < Test::Unit::TestCase
+  def setup
+    omit("Gandiva is required") unless defined?(::Gandiva)
+    @value = true
+    @node = Gandiva::BooleanLiteralNode.new(@value)
+  end
+
+  def test_value
+    assert_equal(@value, @node.value?)
+  end
+
+  def test_return_type
+    assert_equal(Arrow::BooleanDataType.new, @node.return_type)
+  end
+end
diff --git a/c_glib/test/gandiva/test-double-literal-node.rb b/c_glib/test/gandiva/test-double-literal-node.rb
new file mode 100644
index 0000000000000..27cc3aea23b32
--- /dev/null
+++ b/c_glib/test/gandiva/test-double-literal-node.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestGandivaDoubleLiteralNode < Test::Unit::TestCase
+  def setup
+    omit("Gandiva is required") unless defined?(::Gandiva)
+    @value = 1.5
+    @node = Gandiva::DoubleLiteralNode.new(@value)
+  end
+
+  def test_value
+    assert_equal(@value, @node.value)
+  end
+
+  def test_return_type
+    assert_equal(Arrow::DoubleDataType.new, @node.return_type)
+  end
+end
diff --git a/c_glib/test/gandiva/test-field-node.rb b/c_glib/test/gandiva/test-field-node.rb
index c5bfe6cfc9743..51db285bcc0bf 100644
--- a/c_glib/test/gandiva/test-field-node.rb
+++ b/c_glib/test/gandiva/test-field-node.rb
@@ -18,11 +18,15 @@
 class TestGandivaFieldNode < Test::Unit::TestCase
   def setup
     omit("Gandiva is required") unless defined?(::Gandiva)
+    @field = Arrow::Field.new("valid", Arrow::BooleanDataType.new)
+    @node = Gandiva::FieldNode.new(@field)
   end
 
   def test_field
-    field = Arrow::Field.new("valid", Arrow::BooleanDataType.new)
-    field_node = Gandiva::FieldNode.new(field)
-    assert_equal(field, field_node.field)
+    assert_equal(@field, @node.field)
+  end
+
+  def test_return_type
+    assert_equal(@field.data_type, @node.return_type)
   end
 end
diff --git a/c_glib/test/gandiva/test-float-literal-node.rb b/c_glib/test/gandiva/test-float-literal-node.rb
new file mode 100644
index 0000000000000..4a49eb37441d1
--- /dev/null
+++ b/c_glib/test/gandiva/test-float-literal-node.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestGandivaFloatLiteralNode < Test::Unit::TestCase
+  def setup
+    omit("Gandiva is required") unless defined?(::Gandiva)
+    @value = 1.5
+    @node = Gandiva::FloatLiteralNode.new(@value)
+  end
+
+  def test_value
+    assert_equal(@value, @node.value)
+  end
+
+  def test_return_type
+    assert_equal(Arrow::FloatDataType.new, @node.return_type)
+  end
+end
diff --git a/c_glib/test/gandiva/test-if-node.rb b/c_glib/test/gandiva/test-if-node.rb
new file mode 100644
index 0000000000000..b00359590905d
--- /dev/null
+++ b/c_glib/test/gandiva/test-if-node.rb
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestGandivaIfNode < Test::Unit::TestCase
+  def setup
+    omit("Gandiva is required") unless defined?(::Gandiva)
+    field1 = Arrow::Field.new("field1", Arrow::Int32DataType.new)
+    field2 = Arrow::Field.new("field2", Arrow::Int32DataType.new)
+    @then_node = Gandiva::FieldNode.new(field1)
+    @else_node = Gandiva::FieldNode.new(field2)
+    @return_type = Arrow::Int32DataType.new
+    @condition_node = Gandiva::FunctionNode.new("greater_than",
+                                                [@then_node, @else_node],
+                                                @return_type)
+    @if_node = Gandiva::IfNode.new(@condition_node,
+                                   @then_node,
+                                   @else_node,
+                                   @return_type)
+  end
+
+  def test_readers
+    assert_equal([
+                   @condition_node,
+                   @then_node,
+                   @else_node,
+                   @return_type
+                 ],
+                 [
+                   @if_node.condition_node,
+                   @if_node.then_node,
+                   @if_node.else_node,
+                   @if_node.return_type
+                 ])
+  end
+end
diff --git a/c_glib/test/gandiva/test-int16-literal-node.rb b/c_glib/test/gandiva/test-int16-literal-node.rb
new file mode 100644
index 0000000000000..f8e6b26849496
--- /dev/null
+++ b/c_glib/test/gandiva/test-int16-literal-node.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestGandivaInt16LiteralNode < Test::Unit::TestCase
+  def setup
+    omit("Gandiva is required") unless defined?(::Gandiva)
+    @value = -(2 ** 15)
+    @node = Gandiva::Int16LiteralNode.new(@value)
+  end
+
+  def test_value
+    assert_equal(@value, @node.value)
+  end
+
+  def test_return_type
+    assert_equal(Arrow::Int16DataType.new, @node.return_type)
+  end
+end
diff --git a/c_glib/test/gandiva/test-int32-literal-node.rb b/c_glib/test/gandiva/test-int32-literal-node.rb
new file mode 100644
index 0000000000000..3d1bf588cf7dc
--- /dev/null
+++ b/c_glib/test/gandiva/test-int32-literal-node.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestGandivaInt32LiteralNode < Test::Unit::TestCase
+  def setup
+    omit("Gandiva is required") unless defined?(::Gandiva)
+    @value = -(2 ** 31)
+    @node = Gandiva::Int32LiteralNode.new(@value)
+  end
+
+  def test_value
+    assert_equal(@value, @node.value)
+  end
+
+  def test_return_type
+    assert_equal(Arrow::Int32DataType.new, @node.return_type)
+  end
+end
diff --git a/c_glib/test/gandiva/test-int64-literal-node.rb b/c_glib/test/gandiva/test-int64-literal-node.rb
new file mode 100644
index 0000000000000..b2ca3bf630b43
--- /dev/null
+++ b/c_glib/test/gandiva/test-int64-literal-node.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestGandivaInt64LiteralNode < Test::Unit::TestCase
+  def setup
+    omit("Gandiva is required") unless defined?(::Gandiva)
+    @value = -(2 ** 63)
+    @node = Gandiva::Int64LiteralNode.new(@value)
+  end
+
+  def test_value
+    assert_equal(@value, @node.value)
+  end
+
+  def test_return_type
+    assert_equal(Arrow::Int64DataType.new, @node.return_type)
+  end
+end
diff --git a/c_glib/test/gandiva/test-int8-literal-node.rb b/c_glib/test/gandiva/test-int8-literal-node.rb
new file mode 100644
index 0000000000000..8d917bd1b4dfe
--- /dev/null
+++ b/c_glib/test/gandiva/test-int8-literal-node.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestGandivaInt8LiteralNode < Test::Unit::TestCase
+  def setup
+    omit("Gandiva is required") unless defined?(::Gandiva)
+    @value = -(2 ** 7)
+    @node = Gandiva::Int8LiteralNode.new(@value)
+  end
+
+  def test_value
+    assert_equal(@value, @node.value)
+  end
+
+  def test_return_type
+    assert_equal(Arrow::Int8DataType.new, @node.return_type)
+  end
+end
diff --git a/c_glib/test/gandiva/test-null-literal-node.rb b/c_glib/test/gandiva/test-null-literal-node.rb
new file mode 100644
index 0000000000000..ae14f3c15e411
--- /dev/null
+++ b/c_glib/test/gandiva/test-null-literal-node.rb
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestGandivaNullLiteralNode < Test::Unit::TestCase
+  def setup
+    omit("Gandiva is required") unless defined?(::Gandiva)
+  end
+
+  def test_invalid_type
+    return_type = Arrow::NullDataType.new
+    message =
+      "[gandiva][null-literal-node][new] " +
+      "failed to create: <#{return_type}>"
+    assert_raise(Arrow::Error::Invalid.new(message)) do
+      Gandiva::NullLiteralNode.new(return_type)
+    end
+  end
+
+  def test_return_type
+    return_type = Arrow::BooleanDataType.new
+    literal_node = Gandiva::NullLiteralNode.new(return_type)
+    assert_equal(return_type, literal_node.return_type)
+  end
+end
diff --git a/c_glib/test/gandiva/test-string-literal-node.rb b/c_glib/test/gandiva/test-string-literal-node.rb
new file mode 100644
index 0000000000000..8a397ab4d1a9b
--- /dev/null
+++ b/c_glib/test/gandiva/test-string-literal-node.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestGandivaStringLiteralNode < Test::Unit::TestCase
+  def setup
+    omit("Gandiva is required") unless defined?(::Gandiva)
+    @value = "Hello"
+    @node = Gandiva::StringLiteralNode.new(@value)
+  end
+
+  def test_value
+    assert_equal(@value, @node.value)
+  end
+
+  def test_return_type
+    assert_equal(Arrow::StringDataType.new, @node.return_type)
+  end
+end
diff --git a/c_glib/test/gandiva/test-uint16-literal-node.rb b/c_glib/test/gandiva/test-uint16-literal-node.rb
new file mode 100644
index 0000000000000..971da38881df6
--- /dev/null
+++ b/c_glib/test/gandiva/test-uint16-literal-node.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestGandivaUInt16LiteralNode < Test::Unit::TestCase
+  def setup
+    omit("Gandiva is required") unless defined?(::Gandiva)
+    @value = 2 ** 16 - 1
+    @node = Gandiva::UInt16LiteralNode.new(@value)
+  end
+
+  def test_value
+    assert_equal(@value, @node.value)
+  end
+
+  def test_return_type
+    assert_equal(Arrow::UInt16DataType.new, @node.return_type)
+  end
+end
diff --git a/c_glib/test/gandiva/test-uint32-literal-node.rb b/c_glib/test/gandiva/test-uint32-literal-node.rb
new file mode 100644
index 0000000000000..8fcab7fefad87
--- /dev/null
+++ b/c_glib/test/gandiva/test-uint32-literal-node.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestGandivaUInt32LiteralNode < Test::Unit::TestCase
+  def setup
+    omit("Gandiva is required") unless defined?(::Gandiva)
+    @value = 2 ** 32 - 1
+    @node = Gandiva::UInt32LiteralNode.new(@value)
+  end
+
+  def test_value
+    assert_equal(@value, @node.value)
+  end
+
+  def test_return_type
+    assert_equal(Arrow::UInt32DataType.new, @node.return_type)
+  end
+end
diff --git a/c_glib/test/gandiva/test-uint64-literal-node.rb b/c_glib/test/gandiva/test-uint64-literal-node.rb
new file mode 100644
index 0000000000000..d5afddcd75f44
--- /dev/null
+++ b/c_glib/test/gandiva/test-uint64-literal-node.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestGandivaUInt64LiteralNode < Test::Unit::TestCase
+  def setup
+    omit("Gandiva is required") unless defined?(::Gandiva)
+    @value = 3
+    @node = Gandiva::UInt64LiteralNode.new(@value)
+  end
+
+  def test_value
+    assert_equal(@value, @node.value)
+  end
+
+  def test_return_type
+    assert_equal(Arrow::UInt64DataType.new, @node.return_type)
+  end
+end
diff --git a/c_glib/test/gandiva/test-uint8-literal-node.rb b/c_glib/test/gandiva/test-uint8-literal-node.rb
new file mode 100644
index 0000000000000..8ce91d599f435
--- /dev/null
+++ b/c_glib/test/gandiva/test-uint8-literal-node.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestGandivaUInt8LiteralNode < Test::Unit::TestCase
+  def setup
+    omit("Gandiva is required") unless defined?(::Gandiva)
+    @value = 2 ** 8 - 1
+    @node = Gandiva::UInt8LiteralNode.new(@value)
+  end
+
+  def test_value
+    assert_equal(@value, @node.value)
+  end
+
+  def test_return_type
+    assert_equal(Arrow::UInt8DataType.new, @node.return_type)
+  end
+end
diff --git a/c_glib/test/helper/buildable.rb b/c_glib/test/helper/buildable.rb
index d6d1ff89b6a3e..f3ae709512eeb 100644
--- a/c_glib/test/helper/buildable.rb
+++ b/c_glib/test/helper/buildable.rb
@@ -135,20 +135,20 @@ def append_to_builder(builder, value)
         data_type = builder.value_data_type
         case data_type
         when Arrow::ListDataType
-          builder.append
+          builder.append_value
           value_builder = builder.value_builder
           value.each do |v|
             append_to_builder(value_builder, v)
           end
         when Arrow::StructDataType
-          builder.append
+          builder.append_value
           value.each do |name, v|
             field_index = data_type.get_field_index(name)
             field_builder = builder.get_field_builder(field_index)
             append_to_builder(field_builder, v)
           end
         else
-          builder.append(value)
+          builder.append_value(value)
         end
       end
     end
@@ -179,7 +179,7 @@ def build_array(builder, values)
         if value.nil?
           builder.append_null
         else
-          builder.append(value)
+          builder.append_value(value)
         end
       end
       builder.finish
diff --git a/ci/travis_script_gandiva_cpp.sh b/c_glib/test/plasma/test-plasma-client-options.rb
old mode 100755
new mode 100644
similarity index 69%
rename from ci/travis_script_gandiva_cpp.sh
rename to c_glib/test/plasma/test-plasma-client-options.rb
index 4d0a9b7a6bac4..abe6fd3ce46ff
--- a/ci/travis_script_gandiva_cpp.sh
+++ b/c_glib/test/plasma/test-plasma-client-options.rb
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -17,17 +15,17 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -e
-
-source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
-
-pushd $CPP_BUILD_DIR
-
-PATH=$ARROW_BUILD_TYPE:$PATH ctest -j2 --output-on-failure -L gandiva,unittest
-
-# not running in parallel, since some of them are benchmarks
-PATH=$ARROW_BUILD_TYPE:$PATH ctest -VV -L gandiva,integ
+class TestPlasmaClientOptions < Test::Unit::TestCase
+  include Helper::Omittable
 
-popd
+  def setup
+    omit("Plasma is required") unless defined?(::Plasma)
+    @options = Plasma::ClientOptions.new
+  end
 
-# TODO : Capture C++ coverage info
+  test("n_retries") do
+    assert_equal(-1, @options.n_retries)
+    @options.n_retries = 10
+    assert_equal(10, @options.n_retries)
+  end
+end
diff --git a/c_glib/test/plasma/test-plasma-client.rb b/c_glib/test/plasma/test-plasma-client.rb
index aee2d037b3104..a57d1fc5944e9 100644
--- a/c_glib/test/plasma/test-plasma-client.rb
+++ b/c_glib/test/plasma/test-plasma-client.rb
@@ -16,20 +16,80 @@
 # under the License.
 
 class TestPlasmaClient < Test::Unit::TestCase
+  include Helper::Omittable
+
   def setup
     @store = nil
     omit("Plasma is required") unless defined?(::Plasma)
     @store = Helper::PlasmaStore.new
     @store.start
+    @options = Plasma::ClientOptions.new
+    @client = Plasma::Client.new(@store.socket_path, @options)
+    @id = Plasma::ObjectID.new("Hello")
+    @data = "World"
+    @options = Plasma::ClientCreateOptions.new
   end
 
   def teardown
     @store.stop if @store
   end
 
-  def test_new
-    assert_nothing_raised do
-      Plasma::Client.new(@store.socket_path)
+  sub_test_case("#create") do
+    def setup
+      super
+
+      @metadata = "Metadata"
+    end
+
+    test("no options") do
+      require_gi(1, 42, 0)
+
+      object = @client.create(@id, @data.bytesize)
+      object.data.set_data(0, @data)
+      object.seal
+
+      object = @client.refer_object(@id, -1)
+      assert_equal(@data, object.data.data.to_s)
+    end
+
+    test("options: metadata") do
+      @options.set_metadata(@metadata)
+      object = @client.create(@id, 1, @options)
+      object.seal
+
+      object = @client.refer_object(@id, -1)
+      assert_equal(@metadata, object.metadata.data.to_s)
+    end
+
+    test("options: GPU device") do
+      omit("Arrow CUDA is required") unless defined?(::ArrowCUDA)
+
+      gpu_device = 0
+
+      @options.gpu_device = gpu_device
+      @options.metadata = @metadata
+      object = @client.create(@id, @data.bytesize, @options)
+      object.data.copy_from_host(@data)
+      object.seal
+
+      object = @client.refer_object(@id, -1)
+      assert_equal([
+                     gpu_device,
+                     @data,
+                     @metadata,
+                   ],
+                   [
+                     object.gpu_device,
+                     object.data.copy_to_host(0, @data.bytesize).to_s,
+                     object.metadata.copy_to_host(0, @metadata.bytesize).to_s,
+                   ])
+    end
+  end
+
+  test("#disconnect") do
+    @client.disconnect
+    assert_raise(Arrow::Error::Io) do
+      @client.create(@id, @data.bytesize, @options)
     end
   end
 end
diff --git a/c_glib/test/plasma/test-plasma-created-object.rb b/c_glib/test/plasma/test-plasma-created-object.rb
new file mode 100644
index 0000000000000..9025ff4ac22d9
--- /dev/null
+++ b/c_glib/test/plasma/test-plasma-created-object.rb
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestPlasmaCreatedObject < Test::Unit::TestCase
+  def setup
+    @store = nil
+    omit("Plasma is required") unless defined?(::Plasma)
+    @store = Helper::PlasmaStore.new
+    @store.start
+    @client = Plasma::Client.new(@store.socket_path, nil)
+
+    @id = Plasma::ObjectID.new("Hello")
+    @data = "World"
+    @metadata = "Metadata"
+    @options = Plasma::ClientCreateOptions.new
+    @options.metadata = @metadata
+    @object = @client.create(@id, @data.bytesize, @options)
+  end
+
+  def teardown
+    @store.stop if @store
+  end
+
+  test("#seal") do
+    @object.data.set_data(0, @data)
+    @object.seal
+
+    object = @client.refer_object(@id, -1)
+    assert_equal(@data, object.data.data.to_s)
+  end
+
+  test("#abort") do
+    @object.data.set_data(0, @data)
+    assert_raise(Arrow::Error::PlasmaObjectExists) do
+      @client.create(@id, @data.bytesize, @options)
+    end
+    @object.abort
+
+    object = @client.create(@id, @data.bytesize, @options)
+    object.abort
+  end
+end
diff --git a/c_glib/test/plasma/test-plasma-referred-object.rb b/c_glib/test/plasma/test-plasma-referred-object.rb
new file mode 100644
index 0000000000000..a74641ed5dcd3
--- /dev/null
+++ b/c_glib/test/plasma/test-plasma-referred-object.rb
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestPlasmaReferredObject < Test::Unit::TestCase
+  def setup
+    @store = nil
+    omit("Plasma is required") unless defined?(::Plasma)
+    @store = Helper::PlasmaStore.new
+    @store.start
+    @client = Plasma::Client.new(@store.socket_path, nil)
+
+    @id = Plasma::ObjectID.new("Hello")
+    @data = "World"
+    @metadata = "Metadata"
+    @options = Plasma::ClientCreateOptions.new
+    @options.metadata = @metadata
+    object = @client.create(@id, @data.bytesize, @options)
+    object.data.set_data(0, @data)
+    object.seal
+    @object = @client.refer_object(@id, -1)
+  end
+
+  def teardown
+    @store.stop if @store
+  end
+
+  test("#release") do
+    @object.release
+
+    message = "[plasma][referred-object][release]: "
+    message << "Can't process released object: <#{@id.to_hex}>"
+    error = Arrow::Error::Invalid.new(message)
+    assert_raise(error) do
+      @object.release
+    end
+  end
+end
diff --git a/c_glib/test/run-test.rb b/c_glib/test/run-test.rb
index 238bb2d68af70..99d72f4289176 100755
--- a/c_glib/test/run-test.rb
+++ b/c_glib/test/run-test.rb
@@ -38,7 +38,7 @@ def initialize(data)
 end
 
 begin
-  ArrowGPU = GI.load("ArrowGPU")
+  ArrowCUDA = GI.load("ArrowCUDA")
 rescue GObjectIntrospection::RepositoryError::TypelibNotFound
 end
 
diff --git a/c_glib/test/run-test.sh b/c_glib/test/run-test.sh
index 96585ce653a74..d33555dd459e2 100755
--- a/c_glib/test/run-test.sh
+++ b/c_glib/test/run-test.sh
@@ -20,7 +20,7 @@
 test_dir="$(cd $(dirname $0); pwd)"
 build_dir="$(cd .; pwd)"
 
-modules="arrow-glib arrow-gpu-glib gandiva-glib parquet-glib plasma-glib"
+modules="arrow-glib arrow-cuda-glib gandiva-glib parquet-glib plasma-glib"
 
 for module in ${modules}; do
   module_build_dir="${build_dir}/${module}"
diff --git a/c_glib/test/test-array.rb b/c_glib/test/test-array.rb
index 12fba7346c36f..3befde3c7a9bb 100644
--- a/c_glib/test/test-array.rb
+++ b/c_glib/test/test-array.rb
@@ -42,7 +42,7 @@ def test_equal_range
   def test_is_null
     builder = Arrow::BooleanArrayBuilder.new
     builder.append_null
-    builder.append(true)
+    builder.append_value(true)
     array = builder.finish
     assert_equal([true, false],
                  array.length.times.collect {|i| array.null?(i)})
@@ -51,7 +51,7 @@ def test_is_null
   def test_is_valid
     builder = Arrow::BooleanArrayBuilder.new
     builder.append_null
-    builder.append(true)
+    builder.append_value(true)
     array = builder.finish
     assert_equal([false, true],
                  array.length.times.collect {|i| array.valid?(i)})
@@ -59,7 +59,7 @@ def test_is_valid
 
   def test_length
     builder = Arrow::BooleanArrayBuilder.new
-    builder.append(true)
+    builder.append_value(true)
     array = builder.finish
     assert_equal(1, array.length)
   end
@@ -75,10 +75,10 @@ def test_n_nulls
   def test_null_bitmap
     builder = Arrow::BooleanArrayBuilder.new
     builder.append_null
-    builder.append(true)
-    builder.append(false)
+    builder.append_value(true)
+    builder.append_value(false)
     builder.append_null
-    builder.append(false)
+    builder.append_value(false)
     array = builder.finish
     assert_equal(0b10110, array.null_bitmap.data.to_s.unpack("c*")[0])
   end
@@ -97,9 +97,9 @@ def test_value_type
 
   def test_slice
     builder = Arrow::BooleanArrayBuilder.new
-    builder.append(true)
-    builder.append(false)
-    builder.append(true)
+    builder.append_value(true)
+    builder.append_value(false)
+    builder.append_value(true)
     array = builder.finish
     sub_array = array.slice(1, 2)
     assert_equal([false, true],
diff --git a/c_glib/test/test-binary-array.rb b/c_glib/test/test-binary-array.rb
index 2dfd9cfbaaf14..0dcaf4eef60c5 100644
--- a/c_glib/test/test-binary-array.rb
+++ b/c_glib/test/test-binary-array.rb
@@ -32,7 +32,7 @@ def test_new
   def test_value
     data = "\x00\x01\x02"
     builder = Arrow::BinaryArrayBuilder.new
-    builder.append(data)
+    builder.append_value(data)
     array = builder.finish
     assert_equal(data, array.get_value(0).to_s)
   end
@@ -41,8 +41,8 @@ def test_buffer
     data1 = "\x00\x01\x02"
     data2 = "\x03\x04\x05"
     builder = Arrow::BinaryArrayBuilder.new
-    builder.append(data1)
-    builder.append(data2)
+    builder.append_value(data1)
+    builder.append_value(data2)
     array = builder.finish
     assert_equal(data1 + data2, array.buffer.data.to_s)
   end
@@ -51,8 +51,8 @@ def test_offsets_buffer
     data1 = "\x00\x01"
     data2 = "\x02\x03\x04"
     builder = Arrow::BinaryArrayBuilder.new
-    builder.append(data1)
-    builder.append(data2)
+    builder.append_value(data1)
+    builder.append_value(data2)
     array = builder.finish
     byte_per_offset = 4
     assert_equal([0, 2, 5].pack("l*"),
diff --git a/c_glib/test/test-boolean-array.rb b/c_glib/test/test-boolean-array.rb
index ae22bce48b64a..e8c7e5efe2fc5 100644
--- a/c_glib/test/test-boolean-array.rb
+++ b/c_glib/test/test-boolean-array.rb
@@ -29,26 +29,26 @@ def test_new
 
   def test_buffer
     builder = Arrow::BooleanArrayBuilder.new
-    builder.append(true)
-    builder.append(false)
-    builder.append(true)
+    builder.append_value(true)
+    builder.append_value(false)
+    builder.append_value(true)
     array = builder.finish
     assert_equal([0b101].pack("C*"), array.buffer.data.to_s)
   end
 
   def test_value
     builder = Arrow::BooleanArrayBuilder.new
-    builder.append(true)
+    builder.append_value(true)
     array = builder.finish
     assert_equal(true, array.get_value(0))
   end
 
   def test_values
-    require_gi_bindings(3, 1, 9)
+    require_gi_bindings(3, 3, 1)
     builder = Arrow::BooleanArrayBuilder.new
-    builder.append(true)
-    builder.append(false)
-    builder.append(true)
+    builder.append_value(true)
+    builder.append_value(false)
+    builder.append_value(true)
     array = builder.finish
     assert_equal([true, false, true], array.values)
   end
diff --git a/c_glib/test/test-buffer-input-stream.rb b/c_glib/test/test-buffer-input-stream.rb
index f5a0132d2da98..cb6a667b3b7c0 100644
--- a/c_glib/test/test-buffer-input-stream.rb
+++ b/c_glib/test/test-buffer-input-stream.rb
@@ -39,4 +39,12 @@ def test_align
     read_buffer = buffer_input_stream.read(3)
     assert_equal("rld", read_buffer.data.to_s)
   end
+
+  def test_peek
+    buffer = Arrow::Buffer.new("Hello World")
+    buffer_input_stream = Arrow::BufferInputStream.new(buffer)
+    peeked_data = buffer_input_stream.peek(5)
+    assert_equal(buffer_input_stream.read(5).data.to_s,
+                 peeked_data.to_s)
+  end
 end
diff --git a/c_glib/test/test-gpu-cuda.rb b/c_glib/test/test-cuda.rb
similarity index 80%
rename from c_glib/test/test-gpu-cuda.rb
rename to c_glib/test/test-cuda.rb
index 66ec19d424ec9..ae915307b70f0 100644
--- a/c_glib/test/test-gpu-cuda.rb
+++ b/c_glib/test/test-cuda.rb
@@ -15,12 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 
-class TestGPUCUDA < Test::Unit::TestCase
+class TestCUDA < Test::Unit::TestCase
   include Helper::Buildable
 
   def setup
-    omit("Arrow GPU is required") unless defined?(::ArrowGPU)
-    @manager = ArrowGPU::CUDADeviceManager.new
+    omit("Arrow CUDA is required") unless defined?(::ArrowCUDA)
+    @manager = ArrowCUDA::DeviceManager.new
     omit("At least one GPU is required") if @manager.n_devices.zero?
     @context = @manager.get_context(0)
   end
@@ -29,7 +29,7 @@ def setup
     def test_allocated_size
       allocated_size_before = @context.allocated_size
       size = 128
-      buffer = ArrowGPU::CUDABuffer.new(@context, size)
+      buffer = ArrowCUDA::Buffer.new(@context, size)
       assert_equal(size,
                    @context.allocated_size - allocated_size_before)
     end
@@ -38,7 +38,7 @@ def test_allocated_size
   sub_test_case("Buffer") do
     def setup
       super
-      @buffer = ArrowGPU::CUDABuffer.new(@context, 128)
+      @buffer = ArrowCUDA::Buffer.new(@context, 128)
     end
 
     def test_copy
@@ -50,19 +50,19 @@ def test_export
       @buffer.copy_from_host("Hello World")
       handle = @buffer.export
       serialized_handle = handle.serialize.data
-      Tempfile.open("arrow-gpu-cuda-export") do |output|
+      Tempfile.open("arrow-cuda-export") do |output|
         pid = spawn(RbConfig.ruby, "-e", <<-SCRIPT)
 require "gi"
 
 Gio = GI.load("Gio")
 Arrow = GI.load("Arrow")
-ArrowGPU = GI.load("ArrowGPU")
+ArrowCUDA = GI.load("ArrowCUDA")
 
-manager = ArrowGPU::CUDADeviceManager.new
+manager = ArrowCUDA::DeviceManager.new
 context = manager.get_context(0)
 serialized_handle = #{serialized_handle.to_s.dump}
-handle = ArrowGPU::CUDAIPCMemoryHandle.new(serialized_handle)
-buffer = ArrowGPU::CUDABuffer.new(context, handle)
+handle = ArrowCUDA::IPCMemoryHandle.new(serialized_handle)
+buffer = ArrowCUDA::Buffer.new(context, handle)
 File.open(#{output.path.dump}, "w") do |output|
   output.print(buffer.copy_to_host(0, 6).to_s)
 end
@@ -85,7 +85,7 @@ def test_record_batch
       ]
       cpu_record_batch = Arrow::RecordBatch.new(schema, 1, columns)
 
-      buffer = ArrowGPU::CUDABuffer.new(@context, cpu_record_batch)
+      buffer = ArrowCUDA::Buffer.new(@context, cpu_record_batch)
       gpu_record_batch = buffer.read_record_batch(schema)
       assert_equal(cpu_record_batch.n_rows,
                    gpu_record_batch.n_rows)
@@ -94,16 +94,16 @@ def test_record_batch
 
   sub_test_case("HostBuffer") do
     def test_new
-      buffer = ArrowGPU::CUDAHostBuffer.new(0, 128)
+      buffer = ArrowCUDA::HostBuffer.new(0, 128)
       assert_equal(128, buffer.size)
     end
   end
 
   sub_test_case("BufferInputStream") do
     def test_new
-      buffer = ArrowGPU::CUDABuffer.new(@context, 128)
+      buffer = ArrowCUDA::Buffer.new(@context, 128)
       buffer.copy_from_host("Hello World")
-      stream = ArrowGPU::CUDABufferInputStream.new(buffer)
+      stream = ArrowCUDA::BufferInputStream.new(buffer)
       begin
         assert_equal("Hello Worl", stream.read(5).copy_to_host(0, 10).to_s)
       ensure
@@ -115,9 +115,9 @@ def test_new
   sub_test_case("BufferOutputStream") do
     def setup
       super
-      @buffer = ArrowGPU::CUDABuffer.new(@context, 128)
+      @buffer = ArrowCUDA::Buffer.new(@context, 128)
       @buffer.copy_from_host("\x00" * @buffer.size)
-      @stream = ArrowGPU::CUDABufferOutputStream.new(@buffer)
+      @stream = ArrowCUDA::BufferOutputStream.new(@buffer)
     end
 
     def cleanup
diff --git a/c_glib/test/test-date32-array.rb b/c_glib/test/test-date32-array.rb
index f1425693f381e..09ef78650bd59 100644
--- a/c_glib/test/test-date32-array.rb
+++ b/c_glib/test/test-date32-array.rb
@@ -34,9 +34,9 @@ def test_buffer
     after_epoch = 17406 # 2017-08-28
 
     builder = Arrow::Date32ArrayBuilder.new
-    builder.append(0)
-    builder.append(after_epoch)
-    builder.append(before_epoch)
+    builder.append_value(0)
+    builder.append_value(after_epoch)
+    builder.append_value(before_epoch)
     array = builder.finish
     assert_equal([0, after_epoch, before_epoch].pack("l*"),
                  array.buffer.data.to_s)
@@ -46,7 +46,7 @@ def test_value
     after_epoch = 17406 # 2017-08-28
 
     builder = Arrow::Date32ArrayBuilder.new
-    builder.append(after_epoch)
+    builder.append_value(after_epoch)
     array = builder.finish
     assert_equal(after_epoch, array.get_value(0))
   end
@@ -56,9 +56,9 @@ def test_values
     after_epoch = 17406 # 2017-08-28
 
     builder = Arrow::Date32ArrayBuilder.new
-    builder.append(0)
-    builder.append(after_epoch)
-    builder.append(before_epoch)
+    builder.append_value(0)
+    builder.append_value(after_epoch)
+    builder.append_value(before_epoch)
     array = builder.finish
     assert_equal([0, after_epoch, before_epoch], array.values)
   end
diff --git a/c_glib/test/test-date64-array.rb b/c_glib/test/test-date64-array.rb
index 1ea9f5a6a0545..4d9f189196fc8 100644
--- a/c_glib/test/test-date64-array.rb
+++ b/c_glib/test/test-date64-array.rb
@@ -34,9 +34,9 @@ def test_buffer
     after_epoch = 1503878400000 # 2017-08-28T00:00:00Z
 
     builder = Arrow::Date64ArrayBuilder.new
-    builder.append(0)
-    builder.append(after_epoch)
-    builder.append(before_epoch)
+    builder.append_value(0)
+    builder.append_value(after_epoch)
+    builder.append_value(before_epoch)
     array = builder.finish
     assert_equal([0, after_epoch, before_epoch].pack("q*"),
                  array.buffer.data.to_s)
@@ -46,7 +46,7 @@ def test_value
     after_epoch = 1503878400000 # 2017-08-28T00:00:00Z
 
     builder = Arrow::Date64ArrayBuilder.new
-    builder.append(after_epoch)
+    builder.append_value(after_epoch)
     array = builder.finish
     assert_equal(after_epoch, array.get_value(0))
   end
@@ -56,9 +56,9 @@ def test_values
     after_epoch = 1503878400000 # 2017-08-28T00:00:00Z
 
     builder = Arrow::Date64ArrayBuilder.new
-    builder.append(0)
-    builder.append(after_epoch)
-    builder.append(before_epoch)
+    builder.append_value(0)
+    builder.append_value(after_epoch)
+    builder.append_value(before_epoch)
     array = builder.finish
     assert_equal([0, after_epoch, before_epoch], array.values)
   end
diff --git a/c_glib/test/test-decimal-array.rb b/c_glib/test/test-decimal128-array.rb
similarity index 85%
rename from c_glib/test/test-decimal-array.rb
rename to c_glib/test/test-decimal128-array.rb
index a65e10037659a..132ceb7788585 100644
--- a/c_glib/test/test-decimal-array.rb
+++ b/c_glib/test/test-decimal128-array.rb
@@ -15,21 +15,21 @@
 # specific language governing permissions and limitations
 # under the License.
 
-class TestDecimalArray < Test::Unit::TestCase
+class TestDecimal128Array < Test::Unit::TestCase
   def test_format_value
-    data_type = Arrow::DecimalDataType.new(8,2)
+    data_type = Arrow::Decimal128DataType.new(8, 2)
     builder = Arrow::Decimal128ArrayBuilder.new(data_type)
     decimal = Arrow::Decimal128.new("23423445")
-    builder.append(decimal)
+    builder.append_value(decimal)
     array = builder.finish
     assert_equal("234234.45", array.format_value(0))
   end
 
   def test_value
-    data_type = Arrow::DecimalDataType.new(8,2)
+    data_type = Arrow::Decimal128DataType.new(8, 2)
     builder = Arrow::Decimal128ArrayBuilder.new(data_type)
     decimal = Arrow::Decimal128.new("23423445")
-    builder.append(decimal)
+    builder.append_value(decimal)
     array = builder.finish
     assert_equal("234234.45",
                  array.get_value(0).to_string_scale(array.value_data_type.scale))
diff --git a/c_glib/test/test-decimal-data-type.rb b/c_glib/test/test-decimal128-data-type.rb
similarity index 80%
rename from c_glib/test/test-decimal-data-type.rb
rename to c_glib/test/test-decimal128-data-type.rb
index 04bfe78f925c0..27a31e28309cd 100644
--- a/c_glib/test/test-decimal-data-type.rb
+++ b/c_glib/test/test-decimal128-data-type.rb
@@ -15,24 +15,24 @@
 # specific language governing permissions and limitations
 # under the License.
 
-class TestDecimalDataType < Test::Unit::TestCase
+class TestDecimal128DataType < Test::Unit::TestCase
   def test_type
-    data_type = Arrow::DecimalDataType.new(2, 0)
+    data_type = Arrow::Decimal128DataType.new(2, 0)
     assert_equal(Arrow::Type::DECIMAL, data_type.id)
   end
 
   def test_to_s
-    data_type = Arrow::DecimalDataType.new(2, 0)
+    data_type = Arrow::Decimal128DataType.new(2, 0)
     assert_equal("decimal(2, 0)", data_type.to_s)
   end
 
   def test_precision
-    data_type = Arrow::DecimalDataType.new(8, 2)
+    data_type = Arrow::Decimal128DataType.new(8, 2)
     assert_equal(8, data_type.precision)
   end
 
   def test_scale
-    data_type = Arrow::DecimalDataType.new(8, 2)
+    data_type = Arrow::Decimal128DataType.new(8, 2)
     assert_equal(2, data_type.scale)
   end
 end
diff --git a/c_glib/test/test-decimal.rb b/c_glib/test/test-decimal128.rb
similarity index 56%
rename from c_glib/test/test-decimal.rb
rename to c_glib/test/test-decimal128.rb
index 99f1912babfae..de9453cbe69cd 100644
--- a/c_glib/test/test-decimal.rb
+++ b/c_glib/test/test-decimal128.rb
@@ -106,4 +106,101 @@ def test_divide_zero
       decimal1.divide(decimal2)
     end
   end
+
+  def test_equal
+    decimal = Arrow::Decimal128.new(10)
+    other_decimal1 = Arrow::Decimal128.new(10)
+    other_decimal2 = Arrow::Decimal128.new(11)
+    assert_equal([
+                   true,
+                   false,
+                 ],
+                 [
+                   decimal == other_decimal1,
+                   decimal == other_decimal2,
+                 ])
+  end
+
+  def test_not_equal
+    require_gi_bindings(3, 3, 1)
+    decimal = Arrow::Decimal128.new(10)
+    other_decimal1 = Arrow::Decimal128.new(10)
+    other_decimal2 = Arrow::Decimal128.new(11)
+    assert_equal([
+                   false,
+                   true,
+                 ],
+                 [
+                   decimal != other_decimal1,
+                   decimal != other_decimal2,
+                 ])
+  end
+
+  def test_less_than
+    require_gi_bindings(3, 3, 1)
+    decimal = Arrow::Decimal128.new(10)
+    other_decimal1 = Arrow::Decimal128.new(11)
+    other_decimal2 = Arrow::Decimal128.new(9)
+    assert_equal([
+                   true,
+                   false,
+                   false
+                 ],
+                 [
+                   decimal < other_decimal1,
+                   decimal < other_decimal2,
+                   decimal < decimal,
+                 ])
+  end
+
+  def test_less_than_or_equal
+    require_gi_bindings(3, 3, 1)
+    decimal = Arrow::Decimal128.new(10)
+    other_decimal1 = Arrow::Decimal128.new(11)
+    other_decimal2 = Arrow::Decimal128.new(9)
+    assert_equal([
+                   true,
+                   false,
+                   true
+                 ],
+                 [
+                   decimal <= other_decimal1,
+                   decimal <= other_decimal2,
+                   decimal <= decimal
+                 ])
+  end
+
+  def test_greater_than
+    require_gi_bindings(3, 3, 1)
+    decimal = Arrow::Decimal128.new(10)
+    other_decimal1 = Arrow::Decimal128.new(11)
+    other_decimal2 = Arrow::Decimal128.new(9)
+    assert_equal([
+                   false,
+                   true,
+                   false
+                 ],
+                 [
+                   decimal > other_decimal1,
+                   decimal > other_decimal2,
+                   decimal > decimal
+                 ])
+  end
+
+  def test_greater_than_or_equal
+    require_gi_bindings(3, 3, 1)
+    decimal = Arrow::Decimal128.new(10)
+    other_decimal1 = Arrow::Decimal128.new(11)
+    other_decimal2 = Arrow::Decimal128.new(9)
+    assert_equal([
+                   false,
+                   true,
+                   true
+                 ],
+                 [
+                   decimal >= other_decimal1,
+                   decimal >= other_decimal2,
+                   decimal >= decimal
+                 ])
+  end
 end
diff --git a/c_glib/test/test-dense-union-array.rb b/c_glib/test/test-dense-union-array.rb
new file mode 100644
index 0000000000000..fa73f8d4c0918
--- /dev/null
+++ b/c_glib/test/test-dense-union-array.rb
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDenseUnionArray < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    type_ids = build_int8_array([0, 1, nil, 1, 1])
+    value_offsets = build_int32_array([0, 0, 0, 1, 2])
+    fields = [
+      build_int16_array([1]),
+      build_string_array(["a", "b", "c"]),
+    ]
+    @array = Arrow::DenseUnionArray.new(type_ids, value_offsets, fields)
+  end
+
+  def test_value_data_type
+    fields = [
+      Arrow::Field.new("0", Arrow::Int16DataType.new),
+      Arrow::Field.new("1", Arrow::StringDataType.new),
+    ]
+    assert_equal(Arrow::DenseUnionDataType.new(fields, [0, 1]),
+                 @array.value_data_type)
+  end
+
+  def test_field
+    assert_equal([
+                   build_int16_array([1]),
+                   build_string_array(["a", "b", "c"]),
+                 ],
+                 [
+                   @array.get_field(0),
+                   @array.get_field(1),
+                 ])
+  end
+end
diff --git a/c_glib/test/test-dense-union-data-type.rb b/c_glib/test/test-dense-union-data-type.rb
new file mode 100644
index 0000000000000..231767f8a5441
--- /dev/null
+++ b/c_glib/test/test-dense-union-data-type.rb
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDenseUnionDataType < Test::Unit::TestCase
+  def setup
+    @number_field_data_type = Arrow::Int32DataType.new
+    @text_field_data_type = Arrow::StringDataType.new
+    @field_data_types = [
+      @number_field_data_type,
+      @text_field_data_type,
+    ]
+    @number_field = Arrow::Field.new("number", @number_field_data_type)
+    @text_field = Arrow::Field.new("text", @text_field_data_type)
+    @fields = [
+      @number_field,
+      @text_field,
+    ]
+    @data_type = Arrow::DenseUnionDataType.new(@fields, [2, 9])
+  end
+
+  def test_type
+    assert_equal(Arrow::Type::UNION, @data_type.id)
+  end
+
+  def test_to_s
+    assert_equal("union[dense]<number: int32=2, text: string=9>",
+                 @data_type.to_s)
+  end
+
+  def test_fields
+    assert_equal(@fields.zip(@field_data_types),
+                 @data_type.fields.collect {|field| [field, field.data_type]})
+  end
+
+  def test_get_field
+    field = @data_type.get_field(0)
+    assert_equal([
+                   @fields[0],
+                   @field_data_types[0],
+                 ],
+                 [
+                   field,
+                   field.data_type,
+                 ])
+  end
+end
diff --git a/c_glib/test/test-double-array.rb b/c_glib/test/test-double-array.rb
index 1213a5dfe53d6..020ed8f079960 100644
--- a/c_glib/test/test-double-array.rb
+++ b/c_glib/test/test-double-array.rb
@@ -29,16 +29,16 @@ def test_new
 
   def test_buffer
     builder = Arrow::DoubleArrayBuilder.new
-    builder.append(-1.1)
-    builder.append(2.2)
-    builder.append(-4.4)
+    builder.append_value(-1.1)
+    builder.append_value(2.2)
+    builder.append_value(-4.4)
     array = builder.finish
     assert_equal([-1.1, 2.2, -4.4].pack("d*"), array.buffer.data.to_s)
   end
 
   def test_value
     builder = Arrow::DoubleArrayBuilder.new
-    builder.append(1.5)
+    builder.append_value(1.5)
     array = builder.finish
     assert_in_delta(1.5, array.get_value(0))
   end
@@ -46,9 +46,9 @@ def test_value
   def test_values
     require_gi_bindings(3, 1, 7)
     builder = Arrow::DoubleArrayBuilder.new
-    builder.append(1.5)
-    builder.append(3)
-    builder.append(4.5)
+    builder.append_value(1.5)
+    builder.append_value(3)
+    builder.append_value(4.5)
     array = builder.finish
     assert_equal([1.5, 3.0, 4.5], array.values)
   end
diff --git a/c_glib/test/test-fixed-size-binary-data-type.rb b/c_glib/test/test-fixed-size-binary-data-type.rb
new file mode 100644
index 0000000000000..584fb3deec93d
--- /dev/null
+++ b/c_glib/test/test-fixed-size-binary-data-type.rb
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFixedSizeBinaryDataType < Test::Unit::TestCase
+  def setup
+    @byte_width = 10
+    @data_type = Arrow::FixedSizeBinaryDataType.new(@byte_width)
+  end
+
+  def test_type
+    assert_equal(Arrow::Type::FIXED_SIZE_BINARY, @data_type.id)
+  end
+
+  def test_to_s
+    assert_equal("fixed_size_binary[10]", @data_type.to_s)
+  end
+
+  def test_byte_width
+    assert_equal(@byte_width, @data_type.byte_width)
+  end
+
+  def test_bit_width
+    assert_equal(@byte_width * 8, @data_type.bit_width)
+  end
+end
diff --git a/c_glib/test/test-float-array.rb b/c_glib/test/test-float-array.rb
index c8e1b4d864c08..c2a71a0dd39db 100644
--- a/c_glib/test/test-float-array.rb
+++ b/c_glib/test/test-float-array.rb
@@ -29,16 +29,16 @@ def test_new
 
   def test_buffer
     builder = Arrow::FloatArrayBuilder.new
-    builder.append(-1.1)
-    builder.append(2.2)
-    builder.append(-4.4)
+    builder.append_value(-1.1)
+    builder.append_value(2.2)
+    builder.append_value(-4.4)
     array = builder.finish
     assert_equal([-1.1, 2.2, -4.4].pack("f*"), array.buffer.data.to_s)
   end
 
   def test_value
     builder = Arrow::FloatArrayBuilder.new
-    builder.append(1.5)
+    builder.append_value(1.5)
     array = builder.finish
     assert_in_delta(1.5, array.get_value(0))
   end
@@ -46,9 +46,9 @@ def test_value
   def test_values
     require_gi_bindings(3, 1, 7)
     builder = Arrow::FloatArrayBuilder.new
-    builder.append(1.5)
-    builder.append(3)
-    builder.append(4.5)
+    builder.append_value(1.5)
+    builder.append_value(3)
+    builder.append_value(4.5)
     array = builder.finish
     assert_equal([1.5, 3.0, 4.5], array.values)
   end
diff --git a/c_glib/test/test-int16-array.rb b/c_glib/test/test-int16-array.rb
index 13646e0d5b818..e0efb68019b24 100644
--- a/c_glib/test/test-int16-array.rb
+++ b/c_glib/test/test-int16-array.rb
@@ -29,16 +29,16 @@ def test_new
 
   def test_buffer
     builder = Arrow::Int16ArrayBuilder.new
-    builder.append(-1)
-    builder.append(2)
-    builder.append(-4)
+    builder.append_value(-1)
+    builder.append_value(2)
+    builder.append_value(-4)
     array = builder.finish
     assert_equal([-1, 2, -4].pack("s*"), array.buffer.data.to_s)
   end
 
   def test_value
     builder = Arrow::Int16ArrayBuilder.new
-    builder.append(-1)
+    builder.append_value(-1)
     array = builder.finish
     assert_equal(-1, array.get_value(0))
   end
@@ -46,9 +46,9 @@ def test_value
   def test_values
     require_gi_bindings(3, 1, 7)
     builder = Arrow::Int16ArrayBuilder.new
-    builder.append(-1)
-    builder.append(2)
-    builder.append(-4)
+    builder.append_value(-1)
+    builder.append_value(2)
+    builder.append_value(-4)
     array = builder.finish
     assert_equal([-1, 2, -4], array.values)
   end
diff --git a/c_glib/test/test-int32-array.rb b/c_glib/test/test-int32-array.rb
index d1579a8eba881..9827e532bf154 100644
--- a/c_glib/test/test-int32-array.rb
+++ b/c_glib/test/test-int32-array.rb
@@ -28,25 +28,25 @@ def test_new
 
   def test_buffer
     builder = Arrow::Int32ArrayBuilder.new
-    builder.append(-1)
-    builder.append(2)
-    builder.append(-4)
+    builder.append_value(-1)
+    builder.append_value(2)
+    builder.append_value(-4)
     array = builder.finish
     assert_equal([-1, 2, -4].pack("l*"), array.buffer.data.to_s)
   end
 
   def test_value
     builder = Arrow::Int32ArrayBuilder.new
-    builder.append(-1)
+    builder.append_value(-1)
     array = builder.finish
     assert_equal(-1, array.get_value(0))
   end
 
   def test_values
     builder = Arrow::Int32ArrayBuilder.new
-    builder.append(-1)
-    builder.append(2)
-    builder.append(-4)
+    builder.append_value(-1)
+    builder.append_value(2)
+    builder.append_value(-4)
     array = builder.finish
     assert_equal([-1, 2, -4], array.values)
   end
diff --git a/c_glib/test/test-int64-array.rb b/c_glib/test/test-int64-array.rb
index 5d9c37a55c084..39a74d34e23fa 100644
--- a/c_glib/test/test-int64-array.rb
+++ b/c_glib/test/test-int64-array.rb
@@ -28,25 +28,25 @@ def test_new
 
   def test_buffer
     builder = Arrow::Int64ArrayBuilder.new
-    builder.append(-1)
-    builder.append(2)
-    builder.append(-4)
+    builder.append_value(-1)
+    builder.append_value(2)
+    builder.append_value(-4)
     array = builder.finish
     assert_equal([-1, 2, -4].pack("q*"), array.buffer.data.to_s)
   end
 
   def test_value
     builder = Arrow::Int64ArrayBuilder.new
-    builder.append(-1)
+    builder.append_value(-1)
     array = builder.finish
     assert_equal(-1, array.get_value(0))
   end
 
   def test_values
     builder = Arrow::Int64ArrayBuilder.new
-    builder.append(-1)
-    builder.append(2)
-    builder.append(-4)
+    builder.append_value(-1)
+    builder.append_value(2)
+    builder.append_value(-4)
     array = builder.finish
     assert_equal([-1, 2, -4], array.values)
   end
diff --git a/c_glib/test/test-int8-array.rb b/c_glib/test/test-int8-array.rb
index e17c10c53611e..46fe591a575c2 100644
--- a/c_glib/test/test-int8-array.rb
+++ b/c_glib/test/test-int8-array.rb
@@ -28,25 +28,25 @@ def test_new
 
   def test_buffer
     builder = Arrow::Int8ArrayBuilder.new
-    builder.append(-1)
-    builder.append(2)
-    builder.append(-4)
+    builder.append_value(-1)
+    builder.append_value(2)
+    builder.append_value(-4)
     array = builder.finish
     assert_equal([-1, 2, -4].pack("c*"), array.buffer.data.to_s)
   end
 
   def test_value
     builder = Arrow::Int8ArrayBuilder.new
-    builder.append(-1)
+    builder.append_value(-1)
     array = builder.finish
     assert_equal(-1, array.get_value(0))
   end
 
   def test_values
     builder = Arrow::Int8ArrayBuilder.new
-    builder.append(-1)
-    builder.append(2)
-    builder.append(-4)
+    builder.append_value(-1)
+    builder.append_value(2)
+    builder.append_value(-4)
     array = builder.finish
     assert_equal([-1, 2, -4], array.values)
   end
diff --git a/c_glib/test/test-list-array.rb b/c_glib/test/test-list-array.rb
index 14f84067ac525..271d32236acbd 100644
--- a/c_glib/test/test-list-array.rb
+++ b/c_glib/test/test-list-array.rb
@@ -38,14 +38,14 @@ def test_value
     builder = Arrow::ListArrayBuilder.new(data_type)
     value_builder = builder.value_builder
 
-    builder.append
-    value_builder.append(-29)
-    value_builder.append(29)
+    builder.append_value
+    value_builder.append_value(-29)
+    value_builder.append_value(29)
 
-    builder.append
-    value_builder.append(-1)
-    value_builder.append(0)
-    value_builder.append(1)
+    builder.append_value
+    value_builder.append_value(-1)
+    value_builder.append_value(0)
+    value_builder.append_value(1)
 
     array = builder.finish
     value = array.get_value(1)
diff --git a/c_glib/test/test-list-data-type.rb b/c_glib/test/test-list-data-type.rb
index aa6a8fa65fd8c..78df28a144aa3 100644
--- a/c_glib/test/test-list-data-type.rb
+++ b/c_glib/test/test-list-data-type.rb
@@ -16,21 +16,28 @@
 # under the License.
 
 class TestListDataType < Test::Unit::TestCase
+  def setup
+    @field_data_type = Arrow::BooleanDataType.new
+    @field = Arrow::Field.new("enabled", @field_data_type)
+    @data_type = Arrow::ListDataType.new(@field)
+  end
+
   def test_type
-    field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
-    data_type = Arrow::ListDataType.new(field)
-    assert_equal(Arrow::Type::LIST, data_type.id)
+    assert_equal(Arrow::Type::LIST, @data_type.id)
   end
 
   def test_to_s
-    field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
-    data_type = Arrow::ListDataType.new(field)
-    assert_equal("list<enabled: bool>", data_type.to_s)
+    assert_equal("list<enabled: bool>", @data_type.to_s)
   end
 
-  def test_value_field
-    field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
-    data_type = Arrow::ListDataType.new(field)
-    assert_equal(field, data_type.value_field)
+  def test_field
+    assert_equal([
+                   @field,
+                   @field_data_type,
+                 ],
+                 [
+                   @data_type.field,
+                   @data_type.field.data_type,
+                 ])
   end
 end
diff --git a/c_glib/test/test-record-batch-builder.rb b/c_glib/test/test-record-batch-builder.rb
index 1bb72820a5860..ce8efdffd98d8 100644
--- a/c_glib/test/test-record-batch-builder.rb
+++ b/c_glib/test/test-record-batch-builder.rb
@@ -17,6 +17,7 @@
 
 class TestRecordBatchBuilder < Test::Unit::TestCase
   include Helper::Buildable
+  include Helper::Omittable
 
   def setup
     @fields = [
@@ -36,37 +37,38 @@ def test_schema
     assert_equal(@schema, @builder.schema)
   end
 
-  def test_n_fields
-    assert_equal(@fields.size, @builder.n_fields)
+  def test_n_columns
+    assert_equal(@fields.size, @builder.n_columns)
   end
 
-  sub_test_case("#get_field") do
+  sub_test_case("#get_column_builder") do
     def test_valid
       assert_equal(Arrow::BooleanArrayBuilder,
-                   @builder.get_field(0).class)
+                   @builder.get_column_builder(0).class)
     end
 
     def test_negative
       assert_equal(Arrow::Int32ArrayBuilder,
-                   @builder.get_field(-1).class)
+                   @builder.get_column_builder(-1).class)
     end
 
     def test_too_negative
-      assert_nil(@builder.get_field(-@fields.size - 1))
+      assert_nil(@builder.get_column_builder(-@fields.size - 1))
     end
 
     def test_too_large
-      assert_nil(@builder.get_field(@fields.size))
+      assert_nil(@builder.get_column_builder(@fields.size))
     end
   end
 
   def test_flush
+    require_gi_bindings(3, 3, 1)
     arrays = {
       "visible" => build_boolean_array([true, false, true]),
       "point"   => build_int32_array([1, -1, 0]),
     }
     arrays.each_with_index do |(_, array), i|
-      @builder.get_field(i).append_values(array.values, [])
+      @builder.get_column_builder(i).append_values(array.values, [])
     end
     assert_equal(build_record_batch(arrays),
                  @builder.flush)
@@ -76,7 +78,7 @@ def test_flush
       "point"   => build_int32_array([10, -10]),
     }
     arrays.each_with_index do |(_, array), i|
-      @builder.get_field(i).append_values(array.values, [])
+      @builder.get_column_builder(i).append_values(array.values, [])
     end
     assert_equal(build_record_batch(arrays),
                  @builder.flush)
diff --git a/c_glib/test/test-sparse-union-array.rb b/c_glib/test/test-sparse-union-array.rb
new file mode 100644
index 0000000000000..721f95c1fbec6
--- /dev/null
+++ b/c_glib/test/test-sparse-union-array.rb
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestSparseUnionArray < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    type_ids = build_int8_array([0, 1, nil, 1, 0])
+    fields = [
+      build_int16_array([1, nil, nil, nil, 5]),
+      build_string_array([nil, "b", nil, "d", nil]),
+    ]
+    @array = Arrow::SparseUnionArray.new(type_ids, fields)
+  end
+
+  def test_value_data_type
+    fields = [
+      Arrow::Field.new("0", Arrow::Int16DataType.new),
+      Arrow::Field.new("1", Arrow::StringDataType.new),
+    ]
+    assert_equal(Arrow::SparseUnionDataType.new(fields, [0, 1]),
+                 @array.value_data_type)
+  end
+
+  def test_field
+    assert_equal([
+                   build_int16_array([1, nil, nil, nil, 5]),
+                   build_string_array([nil, "b", nil, "d", nil]),
+                 ],
+                 [
+                   @array.get_field(0),
+                   @array.get_field(1),
+                 ])
+  end
+end
diff --git a/c_glib/test/test-sparse-union-data-type.rb b/c_glib/test/test-sparse-union-data-type.rb
new file mode 100644
index 0000000000000..30e24f7a11c9b
--- /dev/null
+++ b/c_glib/test/test-sparse-union-data-type.rb
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestSparseUnionDataType < Test::Unit::TestCase
+  def setup
+    @number_field_data_type = Arrow::Int32DataType.new
+    @text_field_data_type = Arrow::StringDataType.new
+    @field_data_types = [
+      @number_field_data_type,
+      @text_field_data_type,
+    ]
+    @number_field = Arrow::Field.new("number", @number_field_data_type)
+    @text_field = Arrow::Field.new("text", @text_field_data_type)
+    @fields = [
+      @number_field,
+      @text_field,
+    ]
+    @data_type = Arrow::SparseUnionDataType.new(@fields, [2, 9])
+  end
+
+  def test_type
+    assert_equal(Arrow::Type::UNION, @data_type.id)
+  end
+
+  def test_to_s
+    assert_equal("union[sparse]<number: int32=2, text: string=9>",
+                 @data_type.to_s)
+  end
+
+  def test_fields
+    assert_equal(@fields.zip(@field_data_types),
+                 @data_type.fields.collect {|field| [field, field.data_type]})
+  end
+
+  def test_get_field
+    field = @data_type.get_field(0)
+    assert_equal([
+                   @fields[0],
+                   @field_data_types[0],
+                 ],
+                 [
+                   field,
+                   field.data_type,
+                 ])
+  end
+end
diff --git a/c_glib/test/test-string-array.rb b/c_glib/test/test-string-array.rb
index a9edb0ae49152..61459edbb8059 100644
--- a/c_glib/test/test-string-array.rb
+++ b/c_glib/test/test-string-array.rb
@@ -31,15 +31,15 @@ def test_new
 
   def test_value
     builder = Arrow::StringArrayBuilder.new
-    builder.append("Hello")
+    builder.append_value("Hello")
     array = builder.finish
     assert_equal("Hello", array.get_string(0))
   end
 
   def test_buffer
     builder = Arrow::StringArrayBuilder.new
-    builder.append("Hello")
-    builder.append("World")
+    builder.append_value("Hello")
+    builder.append_value("World")
     array = builder.finish
     assert_equal("HelloWorld", array.buffer.data.to_s)
   end
diff --git a/c_glib/test/test-struct-array.rb b/c_glib/test/test-struct-array.rb
index 78760a9b30984..af7e299d8b7ce 100644
--- a/c_glib/test/test-struct-array.rb
+++ b/c_glib/test/test-struct-array.rb
@@ -58,13 +58,13 @@ def test_flatten
     data_type = Arrow::StructDataType.new(fields)
     builder = Arrow::StructArrayBuilder.new(data_type)
 
-    builder.append
-    builder.get_field_builder(0).append(-29)
-    builder.get_field_builder(1).append(true)
+    builder.append_value
+    builder.get_field_builder(0).append_value(-29)
+    builder.get_field_builder(1).append_value(true)
 
-    builder.append
-    builder.field_builders[0].append(2)
-    builder.field_builders[1].append(false)
+    builder.append_value
+    builder.field_builders[0].append_value(2)
+    builder.field_builders[1].append_value(false)
 
     array = builder.finish
     values = array.length.times.collect do |i|
diff --git a/c_glib/test/test-struct-data-type.rb b/c_glib/test/test-struct-data-type.rb
index ce94e41c70148..82ce19ec6a495 100644
--- a/c_glib/test/test-struct-data-type.rb
+++ b/c_glib/test/test-struct-data-type.rb
@@ -17,8 +17,14 @@
 
 class TestStructDataType < Test::Unit::TestCase
   def setup
-    @enabled_field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
-    @message_field = Arrow::Field.new("message", Arrow::StringDataType.new)
+    @enabled_field_data_type = Arrow::BooleanDataType.new
+    @message_field_data_type = Arrow::StringDataType.new
+    @field_data_types = [
+      @enabled_field_data_type,
+      @message_field_data_type,
+    ]
+    @enabled_field = Arrow::Field.new("enabled", @enabled_field_data_type)
+    @message_field = Arrow::Field.new("message", @message_field_data_type)
     @fields = [@enabled_field, @message_field]
     @data_type = Arrow::StructDataType.new(@fields)
   end
@@ -37,7 +43,8 @@ def test_n_fields
   end
 
   def test_fields
-    assert_equal(@fields, @data_type.fields)
+    assert_equal(@fields.zip(@field_data_types),
+                 @data_type.fields.collect {|field| [field, field.data_type]})
   end
 
   sub_test_case("#get_field") do
@@ -52,6 +59,18 @@ def test_negative
     def test_over
       assert_equal(nil, @data_type.get_field(2))
     end
+
+    def test_data_type
+      field = @data_type.get_field(0)
+      assert_equal([
+                     @fields[0],
+                     @field_data_types[0],
+                   ],
+                   [
+                     field,
+                     field.data_type,
+                   ])
+    end
   end
 
   sub_test_case("#get_field_by_name") do
@@ -64,9 +83,21 @@ def test_not_found
       assert_equal(nil,
                    @data_type.get_field_by_name("nonexistent"))
     end
+
+    def test_data_type
+      field = @data_type.get_field_by_name("enabled")
+      assert_equal([
+                     @enabled_field,
+                     @enabled_field_data_type,
+                   ],
+                   [
+                     field,
+                     field.data_type,
+                   ])
+    end
   end
 
-  sub_test_case("#get_field_by_name") do
+  sub_test_case("#get_field_index") do
     def test_found
       assert_equal(@fields.index(@enabled_field),
                    @data_type.get_field_index("enabled"))
diff --git a/c_glib/test/test-table.rb b/c_glib/test/test-table.rb
index 4394ad1353e7d..871e0d7c5ffd4 100644
--- a/c_glib/test/test-table.rb
+++ b/c_glib/test/test-table.rb
@@ -17,21 +17,19 @@
 
 class TestTable < Test::Unit::TestCase
   include Helper::Buildable
+  include Helper::Omittable
 
   sub_test_case(".new") do
-    def test_columns
-      fields = [
+    def setup
+      @fields = [
         Arrow::Field.new("visible", Arrow::BooleanDataType.new),
         Arrow::Field.new("valid", Arrow::BooleanDataType.new),
       ]
-      schema = Arrow::Schema.new(fields)
-      columns = [
-        Arrow::Column.new(fields[0], build_boolean_array([true])),
-        Arrow::Column.new(fields[1], build_boolean_array([false])),
-      ]
-      table = Arrow::Table.new(schema, columns)
+      @schema = Arrow::Schema.new(@fields)
+    end
 
-      data = table.n_columns.times.collect do |i|
+    def dump_table(table)
+      table.n_columns.times.collect do |i|
         column = table.get_column(i)
         values = []
         column.data.chunks.each do |chunk|
@@ -44,11 +42,54 @@ def test_columns
           values,
         ]
       end
+    end
+
+    def test_columns
+      columns = [
+        Arrow::Column.new(@fields[0], build_boolean_array([true])),
+        Arrow::Column.new(@fields[1], build_boolean_array([false])),
+      ]
+      table = Arrow::Table.new(@schema, columns)
       assert_equal([
                      ["visible", [true]],
                      ["valid", [false]],
                    ],
-                   data)
+                   dump_table(table))
+    end
+
+    def test_arrays
+      require_gi_bindings(3, 3, 1)
+      arrays = [
+        build_boolean_array([true]),
+        build_boolean_array([false]),
+      ]
+      table = Arrow::Table.new(@schema, arrays)
+      assert_equal([
+                     ["visible", [true]],
+                     ["valid", [false]],
+                   ],
+                   dump_table(table))
+    end
+
+    def test_record_batches
+      require_gi_bindings(3, 3, 1)
+      record_batches = [
+        build_record_batch({
+                             "visible" => build_boolean_array([true]),
+                             "valid" => build_boolean_array([false])
+                           }),
+        build_record_batch({
+                             "visible" => build_boolean_array([false]),
+                             "valid" => build_boolean_array([true])
+                           }),
+      ]
+      table = Arrow::Table.new(@schema, record_batches)
+
+      assert_equal([
+                     ["visible", [true, false]],
+                     ["valid", [false, true]],
+                   ],
+                   dump_table(table))
     end
   end
 
diff --git a/c_glib/test/test-tensor.rb b/c_glib/test/test-tensor.rb
index 4f18011c047d8..31f2556c4e604 100644
--- a/c_glib/test/test-tensor.rb
+++ b/c_glib/test/test-tensor.rb
@@ -66,12 +66,12 @@ def test_buffer
   end
 
   def test_shape
-    require_gi_bindings(3, 1, 2)
+    require_gi_bindings(3, 3, 1)
     assert_equal(@shape, @tensor.shape)
   end
 
   def test_strides
-    require_gi_bindings(3, 1, 2)
+    require_gi_bindings(3, 3, 1)
     assert_equal([4, 2, 1], @tensor.strides)
   end
 
diff --git a/c_glib/test/test-uint16-array.rb b/c_glib/test/test-uint16-array.rb
index 1362c8e7ff507..baa6934e4f4e2 100644
--- a/c_glib/test/test-uint16-array.rb
+++ b/c_glib/test/test-uint16-array.rb
@@ -29,16 +29,16 @@ def test_new
 
   def test_buffer
     builder = Arrow::UInt16ArrayBuilder.new
-    builder.append(1)
-    builder.append(2)
-    builder.append(4)
+    builder.append_value(1)
+    builder.append_value(2)
+    builder.append_value(4)
     array = builder.finish
     assert_equal([1, 2, 4].pack("S*"), array.buffer.data.to_s)
   end
 
   def test_value
     builder = Arrow::UInt16ArrayBuilder.new
-    builder.append(1)
+    builder.append_value(1)
     array = builder.finish
     assert_equal(1, array.get_value(0))
   end
@@ -46,9 +46,9 @@ def test_value
   def test_values
     require_gi_bindings(3, 1, 7)
     builder = Arrow::UInt16ArrayBuilder.new
-    builder.append(1)
-    builder.append(2)
-    builder.append(4)
+    builder.append_value(1)
+    builder.append_value(2)
+    builder.append_value(4)
     array = builder.finish
     assert_equal([1, 2, 4], array.values)
   end
diff --git a/c_glib/test/test-uint32-array.rb b/c_glib/test/test-uint32-array.rb
index 01b3edb353ff2..b9efb4cf00403 100644
--- a/c_glib/test/test-uint32-array.rb
+++ b/c_glib/test/test-uint32-array.rb
@@ -29,16 +29,16 @@ def test_new
 
   def test_buffer
     builder = Arrow::UInt32ArrayBuilder.new
-    builder.append(1)
-    builder.append(2)
-    builder.append(4)
+    builder.append_value(1)
+    builder.append_value(2)
+    builder.append_value(4)
     array = builder.finish
     assert_equal([1, 2, 4].pack("L*"), array.buffer.data.to_s)
   end
 
   def test_value
     builder = Arrow::UInt32ArrayBuilder.new
-    builder.append(1)
+    builder.append_value(1)
     array = builder.finish
     assert_equal(1, array.get_value(0))
   end
@@ -46,9 +46,9 @@ def test_value
   def test_values
     require_gi_bindings(3, 1, 7)
     builder = Arrow::UInt32ArrayBuilder.new
-    builder.append(1)
-    builder.append(2)
-    builder.append(4)
+    builder.append_value(1)
+    builder.append_value(2)
+    builder.append_value(4)
     array = builder.finish
     assert_equal([1, 2, 4], array.values)
   end
diff --git a/c_glib/test/test-uint64-array.rb b/c_glib/test/test-uint64-array.rb
index a002af269293c..b4275cefdd9b8 100644
--- a/c_glib/test/test-uint64-array.rb
+++ b/c_glib/test/test-uint64-array.rb
@@ -29,16 +29,16 @@ def test_new
 
   def test_buffer
     builder = Arrow::UInt64ArrayBuilder.new
-    builder.append(1)
-    builder.append(2)
-    builder.append(4)
+    builder.append_value(1)
+    builder.append_value(2)
+    builder.append_value(4)
     array = builder.finish
     assert_equal([1, 2, 4].pack("Q*"), array.buffer.data.to_s)
   end
 
   def test_value
     builder = Arrow::UInt64ArrayBuilder.new
-    builder.append(1)
+    builder.append_value(1)
     array = builder.finish
     assert_equal(1, array.get_value(0))
   end
@@ -46,9 +46,9 @@ def test_value
   def test_values
     require_gi_bindings(3, 1, 7)
     builder = Arrow::UInt64ArrayBuilder.new
-    builder.append(1)
-    builder.append(2)
-    builder.append(4)
+    builder.append_value(1)
+    builder.append_value(2)
+    builder.append_value(4)
     array = builder.finish
     assert_equal([1, 2, 4], array.values)
   end
diff --git a/c_glib/test/test-uint8-array.rb b/c_glib/test/test-uint8-array.rb
index 9137e53be70e5..08dfb3064cccb 100644
--- a/c_glib/test/test-uint8-array.rb
+++ b/c_glib/test/test-uint8-array.rb
@@ -28,25 +28,25 @@ def test_new
 
   def test_buffer
     builder = Arrow::UInt8ArrayBuilder.new
-    builder.append(1)
-    builder.append(2)
-    builder.append(4)
+    builder.append_value(1)
+    builder.append_value(2)
+    builder.append_value(4)
     array = builder.finish
     assert_equal([1, 2, 4].pack("C*"), array.buffer.data.to_s)
   end
 
   def test_value
     builder = Arrow::UInt8ArrayBuilder.new
-    builder.append(1)
+    builder.append_value(1)
     array = builder.finish
     assert_equal(1, array.get_value(0))
   end
 
   def test_values
     builder = Arrow::UInt8ArrayBuilder.new
-    builder.append(1)
-    builder.append(2)
-    builder.append(4)
+    builder.append_value(1)
+    builder.append_value(2)
+    builder.append_value(4)
     array = builder.finish
     assert_equal([1, 2, 4], array.values)
   end
diff --git a/ci/appveyor-build.bat b/ci/appveyor-build.bat
index cfd451c5b896a..6e554199f08ea 100644
--- a/ci/appveyor-build.bat
+++ b/ci/appveyor-build.bat
@@ -22,7 +22,9 @@ if "%JOB%" == "Rust" (
 ) else (
     git config core.symlinks true
     git reset --hard
-    if "%JOB%"=="Cmake_Script_Tests" (
+    if "%JOB:~,5%" == "MinGW" (
+        call ci\appveyor-cpp-build-mingw.bat
+    ) else if "%JOB%" == "Cmake_Script_Tests" (
         call ci\appveyor-cpp-test-cmake-script.bat
     ) else (
         call ci\appveyor-cpp-build.bat
diff --git a/ci/appveyor-cpp-build-mingw.bat b/ci/appveyor-cpp-build-mingw.bat
new file mode 100644
index 0000000000000..4d3992745496a
--- /dev/null
+++ b/ci/appveyor-cpp-build-mingw.bat
@@ -0,0 +1,61 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements.  See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership.  The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License.  You may obtain a copy of the License at
+@rem
+@rem   http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied.  See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@echo on
+
+set CMAKE_BUILD_TYPE=release
+set MESON_BUILD_TYPE=release
+
+set INSTALL_DIR=%HOMEDRIVE%%HOMEPATH%\install
+set PATH=%INSTALL_DIR%\bin;%PATH%
+set PKG_CONFIG_PATH=%INSTALL_DIR%\lib\pkgconfig
+
+set CPP_BUILD_DIR=cpp\build
+mkdir %CPP_BUILD_DIR%
+pushd %CPP_BUILD_DIR%
+
+set BOOST_ROOT=%MINGW_PREFIX%
+set LZ4_HOME=%MINGW_PREFIX%
+set ZSTD_HOME=%MINGW_PREFIX%
+set SNAPPY_HOME=%MINGW_PREFIX%
+set BROTLI_HOME=%MINGW_PREFIX%
+set FLATBUFFERS_HOME=%MINGW_PREFIX%
+cmake ^
+    -G "MSYS Makefiles" ^
+    -DCMAKE_INSTALL_PREFIX=%INSTALL_DIR% ^
+    -DCMAKE_BUILD_TYPE=%CMAKE_BUILD_TYPE% ^
+    -DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^
+    -DARROW_JEMALLOC=OFF ^
+    -DARROW_USE_GLOG=OFF ^
+    -DARROW_PYTHON=ON ^
+    -DPythonInterp_FIND_VERSION=ON ^
+    -DPythonInterp_FIND_VERSION_MAJOR=3 ^
+    .. || exit /B
+make -j4 || exit /B
+make install || exit /B
+popd
+
+set C_GLIB_BUILD_DIR=c_glib\build
+meson ^
+    setup ^
+    --prefix=%INSTALL_DIR% ^
+    --buildtype=%MESON_BUILD_TYPE% ^
+    %C_GLIB_BUILD_DIR% ^
+    c_glib || exit /B
+sed -i'' -s 's/\r//g' %C_GLIB_BUILD_DIR%/arrow-glib/version.h || exit /B
+ninja -C %C_GLIB_BUILD_DIR% || exit /B
+ninja -C %C_GLIB_BUILD_DIR% install || exit /B
diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index 91212a63fe3ac..f95b88e7bb892 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -34,6 +34,8 @@ if "%JOB%" == "Static_Crt_Build" (
         -DARROW_USE_STATIC_CRT=ON ^
         -DARROW_BOOST_USE_SHARED=OFF ^
         -DARROW_BUILD_SHARED=OFF ^
+        -DARROW_BUILD_TESTS=ON ^
+        -DARROW_BUILD_EXAMPLES=ON ^
         -DCMAKE_BUILD_TYPE=Debug ^
         -DARROW_TEST_LINKAGE=static ^
         -DARROW_CXXFLAGS="/MP" ^
@@ -51,6 +53,8 @@ if "%JOB%" == "Static_Crt_Build" (
         -DARROW_USE_STATIC_CRT=ON ^
         -DARROW_BOOST_USE_SHARED=OFF ^
         -DARROW_BUILD_SHARED=OFF ^
+        -DARROW_BUILD_TESTS=ON ^
+        -DARROW_BUILD_EXAMPLES=ON ^
         -DCMAKE_BUILD_TYPE=Release ^
         -DARROW_TEST_LINKAGE=static ^
         -DCMAKE_CXX_FLAGS_RELEASE="/MT %CMAKE_CXX_FLAGS_RELEASE%" ^
@@ -76,6 +80,8 @@ if "%JOB%" == "Build_Debug" (
   cmake -G "%GENERATOR%" ^
         -DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^
         -DARROW_BOOST_USE_SHARED=OFF ^
+        -DARROW_BUILD_TESTS=ON ^
+        -DARROW_BUILD_EXAMPLES=ON ^
         -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
         -DARROW_BUILD_STATIC=OFF ^
         -DARROW_CXXFLAGS="/MP" ^
@@ -89,23 +95,31 @@ if "%JOB%" == "Build_Debug" (
   exit /B 0
 )
 
-conda create -n arrow -q -y ^
+conda create -n arrow -q -y -c conda-forge ^
+      --file=ci\conda_env_python.yml ^
       python=%PYTHON% ^
-      six pytest setuptools numpy pandas cython ^
-      thrift-cpp=0.11.0 boost-cpp ^
-      -c conda-forge
+      numpy=1.14 ^
+      thrift-cpp=0.11 ^
+      boost-cpp
 
 call activate arrow
 
+set ARROW_LLVM_VERSION=6.0.1
+
+if "%ARROW_BUILD_GANDIVA%" == "ON" (
+  @rem Install llvmdev in the toolchain if building gandiva.dll
+  conda install -q -y llvmdev=%ARROW_LLVM_VERSION% || exit /B
+)
+
 @rem Use Boost from Anaconda
 set BOOST_ROOT=%CONDA_PREFIX%\Library
 set BOOST_LIBRARYDIR=%CONDA_PREFIX%\Library\lib
 
 if "%JOB%" == "Toolchain" (
   @rem Install pre-built "toolchain" packages for faster builds
-  conda install -q -y --file=ci\conda_env_cpp.yml ^
-        python=%PYTHON% ^
-        -c conda-forge
+  conda install -q -y -c conda-forge ^
+        --file=ci\conda_env_cpp.yml ^
+        python=%PYTHON%
 
   set ARROW_BUILD_TOOLCHAIN=%CONDA_PREFIX%\Library
 )
diff --git a/ci/appveyor-cpp-setup-mingw.bat b/ci/appveyor-cpp-setup-mingw.bat
new file mode 100644
index 0000000000000..471e7426f6e8f
--- /dev/null
+++ b/ci/appveyor-cpp-setup-mingw.bat
@@ -0,0 +1,36 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements.  See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership.  The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License.  You may obtain a copy of the License at
+@rem
+@rem   http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied.  See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@echo on
+
+set PATH=%MINGW_PREFIX%\bin;C:\msys64\usr\bin;%PATH%
+
+pacman -S --noconfirm ^
+    "%MINGW_PACKAGE_PREFIX%-boost" ^
+    "%MINGW_PACKAGE_PREFIX%-brotli" ^
+    "%MINGW_PACKAGE_PREFIX%-cmake" ^
+    "%MINGW_PACKAGE_PREFIX%-flatbuffers" ^
+    "%MINGW_PACKAGE_PREFIX%-gcc" ^
+    "%MINGW_PACKAGE_PREFIX%-gobject-introspection" ^
+    "%MINGW_PACKAGE_PREFIX%-gtk-doc" ^
+    "%MINGW_PACKAGE_PREFIX%-lz4" ^
+    "%MINGW_PACKAGE_PREFIX%-meson" ^
+    "%MINGW_PACKAGE_PREFIX%-protobuf" ^
+    "%MINGW_PACKAGE_PREFIX%-python3-numpy" ^
+    "%MINGW_PACKAGE_PREFIX%-snappy" ^
+    "%MINGW_PACKAGE_PREFIX%-zlib" ^
+    "%MINGW_PACKAGE_PREFIX%-zstd" || exit /B
diff --git a/ci/appveyor-cpp-test-cmake-script.bat b/ci/appveyor-cpp-test-cmake-script.bat
index 25bf9bddbbf39..415406c4ac366 100644
--- a/ci/appveyor-cpp-test-cmake-script.bat
+++ b/ci/appveyor-cpp-test-cmake-script.bat
@@ -32,6 +32,8 @@ set FLATBUFFERS_HOME=WrongPath
 
 cmake -G "%GENERATOR%" ^
       -DARROW_BOOST_USE_SHARED=OFF ^
+      -DARROW_BUILD_TESTS=ON ^
+      -DARROW_BUILD_EXAMPLES=ON ^
       -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
       -DARROW_CXXFLAGS="/MP" ^
       .. >nul 2>error.txt
@@ -49,6 +51,8 @@ set GFLAGS_HOME=WrongPath
 
 cmake -G "%GENERATOR%" ^
       -DARROW_BOOST_USE_SHARED=OFF ^
+      -DARROW_BUILD_TESTS=ON ^
+      -DARROW_BUILD_EXAMPLES=ON ^
       -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
       -DARROW_CXXFLAGS="/MP" ^
       .. >nul 2>error.txt
@@ -66,6 +70,8 @@ set SNAPPY_HOME=WrongPath
 
 cmake -G "%GENERATOR%" ^
       -DARROW_BOOST_USE_SHARED=OFF ^
+      -DARROW_BUILD_TESTS=ON ^
+      -DARROW_BUILD_EXAMPLES=ON ^
       -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
       -DARROW_CXXFLAGS="/MP" ^
       .. >nul 2>error.txt
@@ -83,6 +89,8 @@ set ZLIB_HOME=WrongPath
 
 cmake -G "%GENERATOR%" ^
       -DARROW_BOOST_USE_SHARED=OFF ^
+      -DARROW_BUILD_TESTS=ON ^
+      -DARROW_BUILD_EXAMPLES=ON ^
       -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
       -DARROW_CXXFLAGS="/MP" ^
       .. >nul 2>error.txt
@@ -100,6 +108,8 @@ set BROTLI_HOME=WrongPath
 
 cmake -G "%GENERATOR%" ^
       -DARROW_BOOST_USE_SHARED=OFF ^
+      -DARROW_BUILD_TESTS=ON ^
+      -DARROW_BUILD_EXAMPLES=ON ^
       -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
       -DARROW_CXXFLAGS="/MP" ^
       .. >nul 2>error.txt
@@ -117,6 +127,8 @@ set LZ4_HOME=WrongPath
 
 cmake -G "%GENERATOR%" ^
       -DARROW_BOOST_USE_SHARED=OFF ^
+      -DARROW_BUILD_TESTS=ON ^
+      -DARROW_BUILD_EXAMPLES=ON ^
       -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
       -DARROW_CXXFLAGS="/MP" ^
       .. >nul 2>error.txt
@@ -134,6 +146,8 @@ set ZSTD_HOME=WrongPath
 
 cmake -G "%GENERATOR%" ^
       -DARROW_BOOST_USE_SHARED=OFF ^
+      -DARROW_BUILD_TESTS=ON ^
+      -DARROW_BUILD_EXAMPLES=ON ^
       -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
       -DARROW_CXXFLAGS="/MP" ^
       .. >nul 2>error.txt
@@ -158,6 +172,8 @@ pushd %BUILD_DIR%
 set ARROW_BUILD_TOOLCHAIN=%CONDA_PREFIX%\Library
 cmake -G "%GENERATOR%" ^
       -DARROW_BOOST_USE_SHARED=OFF ^
+      -DARROW_BUILD_TESTS=ON ^
+      -DARROW_BUILD_EXAMPLES=ON ^
       -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
       -DARROW_CXXFLAGS="/MP" ^
       .. 2>output.txt
diff --git a/ci/appveyor-filter-changes.bat b/ci/appveyor-filter-changes.bat
index e6f008f83a299..e78f91f53150c 100644
--- a/ci/appveyor-filter-changes.bat
+++ b/ci/appveyor-filter-changes.bat
@@ -22,6 +22,13 @@ if "%JOB%" == "Rust" (
         echo ===
         appveyor exit
     )
+) else if "%JOB%" == "MinGW" (
+    if "%ARROW_CI_GLIB_AFFECTED%" == "0" (
+        echo ===
+        echo === No C++, or GLib changes, exiting job
+        echo ===
+        appveyor exit
+    )
 ) else (
     if "%ARROW_CI_PYTHON_AFFECTED%" == "0" (
         echo ===
diff --git a/ci/appveyor-install.bat b/ci/appveyor-install.bat
index 483f262368656..3ab8f38f68c4a 100644
--- a/ci/appveyor-install.bat
+++ b/ci/appveyor-install.bat
@@ -25,7 +25,11 @@ if "%JOB%" == "Rust" (
     rustup install nightly
     rustc -Vv
     cargo -V
+) else if "%JOB:~,5%" == "MinGW" (
+    call ci\appveyor-cpp-setup-mingw.bat
 ) else (
     set "PATH=C:\Miniconda36-x64;C:\Miniconda36-x64\Scripts;C:\Miniconda36-x64\Library\bin;%PATH%"
+    set BOOST_ROOT=C:\Libraries\boost_1_67_0
+    set BOOST_LIBRARYDIR=C:\Libraries\boost_1_67_0\lib64-msvc-14.0
     call ci\appveyor-cpp-setup.bat
 )
diff --git a/ci/conda_env_cpp.yml b/ci/conda_env_cpp.yml
index 1e22e9017fc62..3d50b210ea68d 100644
--- a/ci/conda_env_cpp.yml
+++ b/ci/conda_env_cpp.yml
@@ -15,7 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
-boost-cpp
+# ARROW-4056: The conda-forge boost 1.69.0 seems to break the Parquet unit
+# tests with Xcode 8.3. Root cause not yet determined
+boost-cpp=1.68.0
 brotli
 bzip2
 cmake
@@ -23,6 +25,7 @@ double-conversion
 flatbuffers
 gflags
 glog
+gmock
 gtest
 libprotobuf
 lz4-c
diff --git a/ci/conda_env_python.yml b/ci/conda_env_python.yml
index 37ec65496ebcc..b51f5c32f3297 100644
--- a/ci/conda_env_python.yml
+++ b/ci/conda_env_python.yml
@@ -16,10 +16,10 @@
 # under the License.
 
 cython
-nomkl
+cloudpickle
+hypothesis
 numpy
 pandas
 pytest
-python
 setuptools
 setuptools_scm
diff --git a/ci/conda_env_sphinx.yml b/ci/conda_env_sphinx.yml
new file mode 100644
index 0000000000000..af6b4077dd7fa
--- /dev/null
+++ b/ci/conda_env_sphinx.yml
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Requirements for building the documentation
+breathe
+doxygen
+ipython
+sphinx
+sphinx_rtd_theme
diff --git a/ci/conda_env_unix.yml b/ci/conda_env_unix.yml
new file mode 100644
index 0000000000000..9ecf549b504eb
--- /dev/null
+++ b/ci/conda_env_unix.yml
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# conda package dependencies specific to Unix-like environments (Linux and macOS)
+
+autoconf
+rsync
diff --git a/ci/cpp-msvc-build-main.bat b/ci/cpp-msvc-build-main.bat
index ef961b2e0f26e..779af154bedb0 100644
--- a/ci/cpp-msvc-build-main.bat
+++ b/ci/cpp-msvc-build-main.bat
@@ -44,18 +44,24 @@ mkdir cpp\build
 pushd cpp\build
 
 cmake -G "%GENERATOR%" %CMAKE_ARGS% ^
+      -DCMAKE_VERBOSE_MAKEFILE=OFF ^
       -DCMAKE_INSTALL_PREFIX=%CONDA_PREFIX%\Library ^
       -DARROW_BOOST_USE_SHARED=OFF ^
       -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
       -DARROW_BUILD_STATIC=OFF ^
+      -DARROW_BUILD_TESTS=ON ^
+      -DARROW_BUILD_EXAMPLES=ON ^
+      -DARROW_BUILD_EXAMPLES=ON ^
+      -DARROW_VERBOSE_THIRDPARTY_BUILD=ON ^
       -DARROW_CXXFLAGS="%ARROW_CXXFLAGS%" ^
       -DCMAKE_CXX_FLAGS_RELEASE="/MD %CMAKE_CXX_FLAGS_RELEASE%" ^
+      -DARROW_GANDIVA=%ARROW_BUILD_GANDIVA% ^
       -DARROW_PARQUET=ON ^
       -DARROW_PYTHON=ON ^
       ..  || exit /B
 cmake --build . --target install --config %CONFIGURATION%  || exit /B
 
-@rem Needed so python-test.exe works
+@rem Needed so arrow-python-test.exe works
 set OLD_PYTHONHOME=%PYTHONHOME%
 set PYTHONHOME=%CONDA_PREFIX%
 
@@ -70,7 +76,7 @@ popd
 
 pushd python
 
-pip install pickle5
+pip install -r requirements.txt pickle5
 
 set PYARROW_CXXFLAGS=%ARROW_CXXFLAGS%
 set PYARROW_CMAKE_GENERATOR=%GENERATOR%
@@ -112,6 +118,6 @@ pip install %WHEEL_PATH% || exit /B
 python -c "import pyarrow" || exit /B
 python -c "import pyarrow.parquet" || exit /B
 
-pip install pandas pickle5 pytest pytest-faulthandler || exit /B
+pip install pandas pickle5 pytest pytest-faulthandler hypothesis || exit /B
 
 py.test -r sxX --durations=15 --pyargs pyarrow.tests || exit /B
diff --git a/ci/detect-changes.py b/ci/detect-changes.py
index e9a647c5e6d9c..102dc56396c45 100644
--- a/ci/detect-changes.py
+++ b/ci/detect-changes.py
@@ -26,7 +26,7 @@
 
 perr = functools.partial(print, file=sys.stderr)
 
-LANGUAGE_TOPICS = ['c_glib', 'cpp', 'go', 'java', 'js', 'python',
+LANGUAGE_TOPICS = ['c_glib', 'cpp', 'docs', 'go', 'java', 'js', 'python',
                    'r', 'ruby', 'rust']
 
 ALL_TOPICS = LANGUAGE_TOPICS + ['integration', 'site', 'dev']
diff --git a/ci/docker_build_c_glib.sh b/ci/docker_build_c_glib.sh
index 28ef9011f1e23..0135781f6ccb2 100755
--- a/ci/docker_build_c_glib.sh
+++ b/ci/docker_build_c_glib.sh
@@ -22,7 +22,7 @@ set -e
 export ARROW_C_GLIB_HOME=$CONDA_PREFIX
 
 export CFLAGS="-DARROW_NO_DEPRECATED_API"
-export CXXFLAGS="-DARROW_NO_DEPRECATED_API -D_GLIBCXX_USE_CXX11_ABI=0"
+export CXXFLAGS="-DARROW_NO_DEPRECATED_API"
 
 mkdir -p /build/c_glib
 
diff --git a/ci/docker_build_cpp.sh b/ci/docker_build_cpp.sh
index f1cf43fd1c3ba..450dc870249b6 100755
--- a/ci/docker_build_cpp.sh
+++ b/ci/docker_build_cpp.sh
@@ -17,25 +17,21 @@
 # under the License.
 
 set -e
-set -o xtrace
 
-# Arrow specific environment variables
-export ARROW_BUILD_TOOLCHAIN=$CONDA_PREFIX
-export ARROW_HOME=$CONDA_PREFIX
-export PARQUET_HOME=$CONDA_PREFIX
+source_dir=${1:-/arrow/cpp}
+build_dir=${2:-/build/cpp}
+install_dir=${3:-${ARROW_HOME:-/usr/local}}
 
-# https://arrow.apache.org/docs/python/development.html#known-issues
-export CXXFLAGS="-D_GLIBCXX_USE_CXX11_ABI=0"
-
-mkdir -p /build/cpp
-pushd /build/cpp
+mkdir -p ${build_dir}
+pushd ${build_dir}
 
 cmake -GNinja \
       -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-debug} \
-      -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-      -DARROW_ORC=ON \
-      -DARROW_PLASMA=ON \
-      -DARROW_PARQUET=ON \
+      -DCMAKE_INSTALL_PREFIX=${install_dir} \
+      -DCMAKE_INSTALL_LIBDIR=lib \
+      -DARROW_ORC=${ARROW_ORC:-ON} \
+      -DARROW_PLASMA=${ARROW_PLASMA:-ON} \
+      -DARROW_PARQUET=${ARROW_PARQUET:-ON} \
       -DARROW_HDFS=${ARROW_HDFS:-OFF} \
       -DARROW_PYTHON=${ARROW_PYTHON:-OFF} \
       -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \
@@ -43,7 +39,7 @@ cmake -GNinja \
       -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \
       -DARROW_EXTRA_ERROR_CONTEXT=ON \
       -DCMAKE_CXX_FLAGS=$CXXFLAGS \
-      /arrow/cpp
+      ${source_dir}
 ninja
 ninja install
 
diff --git a/cpp/src/arrow/util/variant/CMakeLists.txt b/ci/docker_build_java.sh
old mode 100644
new mode 100755
similarity index 76%
rename from cpp/src/arrow/util/variant/CMakeLists.txt
rename to ci/docker_build_java.sh
index 0ebb2516246ed..0cbd00f816d06
--- a/cpp/src/arrow/util/variant/CMakeLists.txt
+++ b/ci/docker_build_java.sh
@@ -1,3 +1,4 @@
+#!/usr/bin/env bash
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -15,14 +16,17 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#######################################
-# arrow_util_variant
-#######################################
+set -e
 
-install(FILES
-  optional.h
-  recursive_wrapper.h
-  variant_cast.h
-  variant_io.h
-  variant_visitor.h
-  DESTINATION include/arrow/util/variant)
+# /arrow/java is read-only
+mkdir -p /build/java
+
+arrow_src=/build/java/arrow
+
+pushd /arrow
+  rsync -a header java format integration $arrow_src
+popd
+
+pushd $arrow_src/java
+  mvn -DskipTests -Drat.skip=true install
+popd
diff --git a/ci/docker_build_python.sh b/ci/docker_build_python.sh
index e89a0b44d6fd0..36b31b99737be 100755
--- a/ci/docker_build_python.sh
+++ b/ci/docker_build_python.sh
@@ -18,22 +18,34 @@
 
 set -e
 
-export ARROW_BUILD_TOOLCHAIN=$CONDA_PREFIX
-export ARROW_HOME=$CONDA_PREFIX
+source_dir=${1:-/arrow/python}
+build_dir=${2:-/build/python}
 
-# For newer GCC per https://arrow.apache.org/docs/python/development.html#known-issues
-export CXXFLAGS="-D_GLIBCXX_USE_CXX11_ABI=0"
 export PYARROW_CXXFLAGS=$CXXFLAGS
 export PYARROW_CMAKE_GENERATOR=Ninja
+export PYARROW_BUILD_TYPE=${PYARROW_BUILD_TYPE:-debug}
+
+# Feature flags
+export SETUPTOOLS_SCM_VERSION_WRITE_TO_PREFIX=$build_dir
+export PYARROW_WITH_ORC=${PYARROW_WITH_ORC:-1}
+export PYARROW_WITH_PARQUET=${PYARROW_WITH_PARQUET:-1}
+export PYARROW_WITH_PLASMA=${PYARROW_WITH_PLASMA:-1}
 
 # Build pyarrow
-pushd /arrow/python
+pushd ${source_dir}
+  # hacky again, setuptools_scm writes _generated_version.py before pyarrow
+  # directory is created by setuptools
+  mkdir -p $build_dir/pyarrow
 
-python setup.py build_ext \
-    --build-temp=/build/python \
-    --build-type=${PYARROW_BUILD_TYPE:-debug} \
-    --with-parquet \
-    --with-plasma \
-    install
+  relative_build_dir=$(realpath --relative-to=. $build_dir)
 
+  # this is a nightmare, but prevents mutating the source directory
+  # which is bind mounted as readonly
+  python setup.py build_ext --build-temp $relative_build_dir \
+                            --build-lib $relative_build_dir \
+                  build_py --build-lib $relative_build_dir \
+                  egg_info --egg-base $relative_build_dir \
+                  install_lib --build-dir $relative_build_dir \
+                  install --single-version-externally-managed \
+                          --record $relative_build_dir/record.txt
 popd
diff --git a/ci/docker_build_r.sh b/ci/docker_build_r.sh
index 9fb95bc88cce3..6e676784aff16 100755
--- a/ci/docker_build_r.sh
+++ b/ci/docker_build_r.sh
@@ -21,10 +21,6 @@ set -e
 export ARROW_BUILD_TOOLCHAIN=$CONDA_PREFIX
 export ARROW_HOME=$CONDA_PREFIX
 
-# For newer GCC per https://arrow.apache.org/docs/python/development.html#known-issues
-export CXXFLAGS="-D_GLIBCXX_USE_CXX11_ABI=0"
-export PKG_CXXFLAGS=$CXXFLAGS
-
 # Build arrow
 pushd /arrow/r
 
diff --git a/dev/dask_integration.sh b/ci/docker_build_sphinx.sh
similarity index 78%
rename from dev/dask_integration.sh
rename to ci/docker_build_sphinx.sh
index d344328b6af1e..4a65f8155fb16 100755
--- a/dev/dask_integration.sh
+++ b/ci/docker_build_sphinx.sh
@@ -16,6 +16,13 @@
 # limitations under the License.
 #
 
-# Pass the service name to run_docker_compose.sh
-# Which validates environment and runs the service
-exec "$(dirname ${BASH_SOURCE})"/run_docker_compose.sh dask_integration
+set -ex
+
+pushd /arrow/cpp/apidoc
+doxygen
+popd
+
+sphinx-build -b html /arrow/docs/source /arrow/docs/_build/html
+
+mkdir -p /arrow/site/asf-site/docs/latest
+rsync -r /arrow/docs/_build/html/ /arrow/site/asf-site/docs/latest/
diff --git a/ci/docker_install_conda.sh b/ci/docker_install_conda.sh
index 427ee76e8e256..73c7162b98926 100755
--- a/ci/docker_install_conda.sh
+++ b/ci/docker_install_conda.sh
@@ -27,3 +27,16 @@ rm /tmp/miniconda.sh
 ln -s ${CONDA_PREFIX}/etc/profile.d/conda.sh /etc/profile.d/conda.sh
 echo ". ${CONDA_PREFIX}/etc/profile.d/conda.sh" >> ~/.bashrc
 echo "conda activate base" >> ~/.bashrc
+
+# Configure conda
+source $MINICONDA/etc/profile.d/conda.sh
+conda config --set show_channel_urls True
+
+# Help with SSL timeouts to S3
+conda config --set remote_connect_timeout_secs 12
+
+# Setup conda-forge
+conda config --add channels conda-forge
+
+# Update packages
+conda update --all -q -y
diff --git a/ci/rust-build-main.bat b/ci/rust-build-main.bat
index c8a51fef6ec46..b36a97acf51ac 100644
--- a/ci/rust-build-main.bat
+++ b/ci/rust-build-main.bat
@@ -17,35 +17,18 @@
 
 @rem The "main" Rust build script for Windows CI
 
+@rem Retrieve git submodules, configure env var for Parquet unit tests
+git submodule update --init || exit /B
+set PARQUET_TEST_DATA=%CD%\cpp\submodules\parquet-testing\data
 pushd rust
 
-@echo ===================================
-@echo Build with stable toolchain
-@echo ===================================
-
-rustup default stable
-rustup show
-cargo build --target %TARGET%
-cargo build --target %TARGET% --release
-@echo Test (debug)
-@echo ------------
-cargo test --target %TARGET%
-@echo
-@echo Test (release)
-@echo --------------
-cargo test --target %TARGET% --release
-
 @echo ===================================
 @echo Build with nightly toolchain
 @echo ===================================
 
 rustup default nightly
 rustup show
-cargo build --target %TARGET% || exit /B
 cargo build --target %TARGET% --release || exit /B
-@echo Test (debug)
-@echo ------------
-cargo test --target %TARGET% || exit /B
 @echo
 @echo Test (release)
 @echo --------------
@@ -53,8 +36,10 @@ cargo test --target %TARGET% --release || exit /B
 @echo
 @echo Run example (release)
 @echo ---------------------
+cd arrow
 cargo run --example builders --target %TARGET% --release || exit /B
 cargo run --example dynamic_types --target %TARGET% --release || exit /B
 cargo run --example read_csv --target %TARGET% --release || exit /B
+cargo run --example read_csv_infer_schema --target %TARGET% --release || exit /B
 
 popd
diff --git a/ci/travis_before_script_c_glib.sh b/ci/travis_before_script_c_glib.sh
index 7cd1c2a064396..e8dd0cdc80d2e 100755
--- a/ci/travis_before_script_c_glib.sh
+++ b/ci/travis_before_script_c_glib.sh
@@ -44,22 +44,8 @@ gem install test-unit gobject-introspection
 if [ $TRAVIS_OS_NAME = "osx" ]; then
   sudo env PKG_CONFIG_PATH=$PKG_CONFIG_PATH luarocks install lgi
 else
-  if [ $BUILD_TORCH_EXAMPLE = "yes" ]; then
-    git clone \
-      --quiet \
-      --depth 1 \
-      --recursive \
-      https://github.com/torch/distro.git ~/torch
-    pushd ~/torch
-    ./install-deps > /dev/null
-    echo "yes" | ./install.sh > /dev/null
-    . ~/torch/install/bin/torch-activate
-    popd
-    luarocks install lgi
-  else
-    sudo apt install -y -qq luarocks
-    sudo luarocks install lgi
-  fi
+  sudo apt install -y -qq luarocks
+  sudo luarocks install lgi
 fi
 
 pushd $ARROW_C_GLIB_DIR
diff --git a/ci/travis_before_script_cpp.sh b/ci/travis_before_script_cpp.sh
index f9e0602a80971..76ae9a66e8100 100755
--- a/ci/travis_before_script_cpp.sh
+++ b/ci/travis_before_script_cpp.sh
@@ -40,8 +40,15 @@ if [ "$only_library_mode" == "no" ]; then
   source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh
 fi
 
+if [ "$ARROW_TRAVIS_USE_TOOLCHAIN" == "1" ]; then
+  # Set up C++ toolchain from conda-forge packages for faster builds
+  source $TRAVIS_BUILD_DIR/ci/travis_install_toolchain.sh
+fi
+
+mkdir -p $ARROW_CPP_BUILD_DIR
+pushd $ARROW_CPP_BUILD_DIR
+
 CMAKE_COMMON_FLAGS="\
--DARROW_BUILD_BENCHMARKS=ON \
 -DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL \
 -DARROW_NO_DEPRECATED_API=ON \
 -DARROW_EXTRA_ERROR_CONTEXT=ON"
@@ -49,26 +56,34 @@ CMAKE_LINUX_FLAGS=""
 CMAKE_OSX_FLAGS=""
 
 if [ "$ARROW_TRAVIS_USE_TOOLCHAIN" == "1" ]; then
-  # Set up C++ toolchain from conda-forge packages for faster builds
-  source $TRAVIS_BUILD_DIR/ci/travis_install_toolchain.sh
   CMAKE_COMMON_FLAGS="${CMAKE_COMMON_FLAGS} -DARROW_JEMALLOC=ON"
   CMAKE_COMMON_FLAGS="${CMAKE_COMMON_FLAGS} -DARROW_WITH_BZ2=ON"
 fi
 
-mkdir -p $ARROW_CPP_BUILD_DIR
-pushd $ARROW_CPP_BUILD_DIR
-
 if [ $only_library_mode == "yes" ]; then
   CMAKE_COMMON_FLAGS="\
 $CMAKE_COMMON_FLAGS \
--DARROW_BUILD_TESTS=OFF \
 -DARROW_BUILD_UTILITIES=OFF \
 -DARROW_INSTALL_NAME_RPATH=OFF"
+else
+  CMAKE_COMMON_FLAGS="\
+$CMAKE_COMMON_FLAGS \
+-DARROW_BUILD_BENCHMARKS=ON \
+-DARROW_BUILD_TESTS=ON \
+-DARROW_BUILD_EXAMPLES=ON \
+-DARROW_BUILD_UTILITIES=ON \
+-DARROW_INSTALL_NAME_RPATH=OFF"
 fi
 
+ARROW_CXXFLAGS=""
+
 # Use Ninja for faster builds when using toolchain
 if [ $ARROW_TRAVIS_USE_TOOLCHAIN == "1" ]; then
   CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -GNinja"
+  if [ "$DISTRO_CODENAME" != "trusty" ]; then
+    # Make sure the toolchain linker (from binutils package) is picked up by clang
+    ARROW_CXXFLAGS="$ARROW_CXXFLAGS -B$CPP_TOOLCHAIN/bin"
+  fi
 fi
 
 if [ $ARROW_TRAVIS_PLASMA == "1" ]; then
@@ -92,6 +107,9 @@ fi
 
 if [ $ARROW_TRAVIS_GANDIVA == "1" ]; then
   CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_GANDIVA=ON"
+  if [ $ARROW_TRAVIS_GANDIVA_JAVA == "1" ]; then
+      CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_GANDIVA_JAVA=ON"
+  fi
 fi
 
 if [ $ARROW_TRAVIS_VALGRIND == "1" ]; then
@@ -106,15 +124,24 @@ if [ $ARROW_TRAVIS_VERBOSE == "1" ]; then
   CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
 fi
 
-if [ $ARROW_TRAVIS_USE_VENDORED_BOOST == "1" ]; then
+if [ $ARROW_TRAVIS_VENDORED_BOOST == "1" ]; then
   CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_BOOST_VENDORED=ON"
 fi
 
+if [ $ARROW_TRAVIS_STATIC_BOOST == "1" ]; then
+  CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_BOOST_USE_SHARED=OFF"
+fi
+
+if [ $ARROW_TRAVIS_OPTIONAL_INSTALL == "1" ]; then
+  CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_OPTIONAL_INSTALL=ON"
+fi
+
 if [ $TRAVIS_OS_NAME == "linux" ]; then
     cmake $CMAKE_COMMON_FLAGS \
           $CMAKE_LINUX_FLAGS \
           -DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE \
           -DBUILD_WARNING_LEVEL=$ARROW_BUILD_WARNING_LEVEL \
+          -DARROW_CXXFLAGS="$ARROW_CXXFLAGS" \
           $ARROW_CPP_DIR
 else
     if [ "$using_homebrew" = "yes" ]; then
@@ -130,8 +157,10 @@ else
           $ARROW_CPP_DIR
 fi
 
-# Build and install libraries
-$TRAVIS_MAKE -j4
+# Build and install libraries. Configure ARROW_CPP_BUILD_TARGETS environment
+# variable to only build certain targets. If you use this, you must also set
+# the environment variable ARROW_TRAVIS_OPTIONAL_INSTALL=1
+$TRAVIS_MAKE -j4 $ARROW_CPP_BUILD_TARGETS
 $TRAVIS_MAKE install
 
 popd
diff --git a/ci/travis_env_common.sh b/ci/travis_env_common.sh
index f5748b2a0452a..5f70535b42c6c 100755
--- a/ci/travis_env_common.sh
+++ b/ci/travis_env_common.sh
@@ -33,6 +33,8 @@ export ARROW_RUBY_DIR=$TRAVIS_BUILD_DIR/ruby
 export ARROW_RUST_DIR=${TRAVIS_BUILD_DIR}/rust
 export ARROW_R_DIR=${TRAVIS_BUILD_DIR}/r
 
+export ARROW_TRAVIS_COVERAGE=${ARROW_TRAVIS_COVERAGE:=0}
+
 if [ "$ARROW_TRAVIS_COVERAGE" == "1" ]; then
     export ARROW_CPP_COVERAGE_FILE=${TRAVIS_BUILD_DIR}/coverage.info
     export ARROW_PYTHON_COVERAGE_FILE=${TRAVIS_BUILD_DIR}/.coverage
@@ -71,3 +73,18 @@ if [ $TRAVIS_OS_NAME == "osx" ]; then
 fi
 
 export PARQUET_TEST_DATA=$TRAVIS_BUILD_DIR/cpp/submodules/parquet-testing/data
+
+# e.g. "trusty" or "xenial"
+if [ $TRAVIS_OS_NAME == "linux" ]; then
+  export DISTRO_CODENAME=`lsb_release -s -c`
+fi
+
+if [ "$ARROW_TRAVIS_USE_SYSTEM_JAVA" == "1" ]; then
+    # Use the Ubuntu-provided OpenJDK
+    unset JAVA_HOME
+    export TRAVIS_MVN=/usr/bin/mvn
+    export TRAVIS_JAVA=/usr/bin/java
+else
+    export TRAVIS_MVN=mvn
+    export TRAVIS_JAVA=java
+fi
diff --git a/ci/travis_install_cargo.sh b/ci/travis_install_cargo.sh
index f433033091ce1..e4a6b3b3493f3 100755
--- a/ci/travis_install_cargo.sh
+++ b/ci/travis_install_cargo.sh
@@ -21,6 +21,7 @@ set -e
 
 # ensure that both toolchains are installed
 rustup install stable
+rustup component add rustfmt
 rustup install nightly
 
 pip install 'travis-cargo<0.2' --user
diff --git a/ci/travis_install_clang_tools.sh b/ci/travis_install_clang_tools.sh
index 49b2e47762121..9e974db5fb7cc 100755
--- a/ci/travis_install_clang_tools.sh
+++ b/ci/travis_install_clang_tools.sh
@@ -17,8 +17,13 @@
 # specific language governing permissions and limitations
 # under the License.
 
-wget -O - http://llvm.org/apt/llvm-snapshot.gpg.key|sudo apt-key add -
+
+set -ex
+
+source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
+
+wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
 sudo apt-add-repository -y \
-     "deb http://llvm.org/apt/trusty/ llvm-toolchain-trusty-6.0 main"
+     "deb https://apt.llvm.org/$DISTRO_CODENAME/ llvm-toolchain-$DISTRO_CODENAME-6.0 main"
 sudo apt-get update -qq
 sudo apt-get install -q clang-6.0 clang-format-6.0 clang-tidy-6.0
diff --git a/ci/travis_install_conda.sh b/ci/travis_install_conda.sh
index ade6392ce24a2..49a2f21ef6793 100755
--- a/ci/travis_install_conda.sh
+++ b/ci/travis_install_conda.sh
@@ -67,7 +67,6 @@ else
   # Help with SSL timeouts to S3
   conda config --set remote_connect_timeout_secs 12
 
-  conda config --add channels https://repo.continuum.io/pkgs/free
   conda config --add channels conda-forge
 fi
 
diff --git a/ci/travis_install_linux.sh b/ci/travis_install_linux.sh
index 98d9bdd924bfa..b8fe63a3ff4bc 100755
--- a/ci/travis_install_linux.sh
+++ b/ci/travis_install_linux.sh
@@ -17,25 +17,43 @@
 # specific language governing permissions and limitations
 # under the License.
 
-sudo apt-get install -y -q \
+set -e
+
+source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
+
+sudo apt-get install -y -qq \
     gdb binutils ccache libboost-dev libboost-filesystem-dev \
     libboost-system-dev libboost-regex-dev
 
 if [ "$CXX" == "g++-4.9" ]; then
-    sudo apt-get install -y -q g++-4.9
+    sudo apt-get install -y -qq g++-4.9
 fi
 
 if [ "$ARROW_TRAVIS_VALGRIND" == "1" ]; then
-    sudo apt-get install -y -q valgrind
+    sudo apt-get install -y -qq valgrind
 fi
 
 if [ "$ARROW_TRAVIS_COVERAGE" == "1" ]; then
-    sudo apt-get install -y -q lcov
+    sudo apt-get install -y -qq lcov
 fi
 
-if [ "$ARROW_TRAVIS_GANDIVA" == "1" -a "$ARROW_USE_TOOLCHAIN" != "1" ]; then
-    sudo add-apt-repository -y ppa:dluxen/cmake-backports
-    sudo apt-get update -q
-    sudo apt-get install -y -q cmake3
-    sudo rm -rf /usr/local/cmake-*
+set -x
+if [ "$DISTRO_CODENAME" != "trusty" ]; then
+    if [ "$ARROW_TRAVIS_GANDIVA" == "1" ]; then
+        sudo apt-get install -y -qq llvm-6.0-dev
+    fi
+
+    sudo apt-get install -y -qq maven
+
+    # Remove Travis-specific versions of Java
+    sudo rm -rf /usr/local/lib/jvm*
+    sudo rm -rf /usr/local/maven*
+    hash -r
+    unset JAVA_HOME
+
+    which java
+    which mvn
+    java -version
+    mvn -v
 fi
+
diff --git a/ci/travis_install_osx.sh b/ci/travis_install_osx.sh
index 83ca4a70bc364..6b6a4b2533d8b 100755
--- a/ci/travis_install_osx.sh
+++ b/ci/travis_install_osx.sh
@@ -17,11 +17,34 @@
 # specific language governing permissions and limitations
 # under the License.
 
+set -x
 set -e
 
 if [ "$ARROW_CI_RUBY_AFFECTED" = "1" ]; then
-    brew update
-    brew upgrade python
-    brew uninstall postgis
-    brew bundle --file=$TRAVIS_BUILD_DIR/c_glib/Brewfile
+    brew_log_path=brew.log
+    function run_brew() {
+        local i=0
+        local n_tries=3
+        while [[ $((i++)) < ${n_tries} ]]; do
+            echo "${i}: brew" "$@" >> ${brew_log_path}
+            if gtimeout --signal=KILL 9m brew "$@" >> ${brew_log_path} 2>&1; then
+                break
+            elif [[ ${i} == ${n_tries} ]]; then
+                cat ${brew_log_path}
+                rm ${brew_log_path}
+                false
+            fi
+        done
+    }
+
+    # ARROW-3976 Old versions of git can cause failures when Homebrew prints a
+    # donation solicitation. Attempt to update git
+    git --version
+    run_brew upgrade git
+
+    run_brew update
+    run_brew upgrade python
+    run_brew uninstall postgis
+    run_brew bundle --file=$TRAVIS_BUILD_DIR/c_glib/Brewfile --verbose
+    rm ${brew_log_path}
 fi
diff --git a/ci/travis_install_toolchain.sh b/ci/travis_install_toolchain.sh
index 86ac56d043b96..7ba1f79e009b2 100755
--- a/ci/travis_install_toolchain.sh
+++ b/ci/travis_install_toolchain.sh
@@ -22,16 +22,32 @@ source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
 source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh
 
 if [ ! -e $CPP_TOOLCHAIN ]; then
+    CONDA_PACKAGES=""
+    CONDA_LABEL=""
+
     if [ $ARROW_TRAVIS_GANDIVA == "1" ] && [ $TRAVIS_OS_NAME == "osx" ]; then
-        CONDA_LLVM="llvmdev=6.0.1"
+        CONDA_PACKAGES="$CONDA_PACKAGES llvmdev=6.0.1"
+    fi
+
+    if [ $TRAVIS_OS_NAME == "linux" ]; then
+        if [ "$DISTRO_CODENAME" == "trusty" ]; then
+            CONDA_LABEL=" -c conda-forge/label/cf201901"
+        else
+            # Use newer binutils when linking against conda-provided libraries
+            CONDA_PACKAGES="$CONDA_PACKAGES binutils"
+        fi
+    fi
+
+    if [ $ARROW_TRAVIS_VALGRIND == "1" ]; then
+        # Use newer Valgrind
+        CONDA_PACKAGES="$CONDA_PACKAGES valgrind"
     fi
 
     # Set up C++ toolchain from conda-forge packages for faster builds
-    conda create -y -q -p $CPP_TOOLCHAIN \
+    conda create -y -q -p $CPP_TOOLCHAIN $CONDA_LABEL \
         --file=$TRAVIS_BUILD_DIR/ci/conda_env_cpp.yml \
-        ${CONDA_LLVM} \
+        $CONDA_PACKAGES \
         ccache \
-        curl \
         ninja \
         nomkl \
         python=3.6
diff --git a/ci/travis_script_c_glib.sh b/ci/travis_script_c_glib.sh
index adecc5c742967..c42a047ddf445 100755
--- a/ci/travis_script_c_glib.sh
+++ b/ci/travis_script_c_glib.sh
@@ -32,19 +32,10 @@ arrow_c_glib_run_test()
   export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$arrow_c_glib_lib_dir/pkgconfig
 
   pushd example/lua
-  if [ "$BUILD_TORCH_EXAMPLE" = "yes" ]; then
-    . ~/torch/install/bin/torch-activate
-    luajit write-batch.lua
-    luajit read-batch.lua
-    luajit write-stream.lua
-    luajit read-stream.lua
-    luajit stream-to-torch-tensor.lua
-  else
-    lua write-batch.lua
-    lua read-batch.lua
-    lua write-stream.lua
-    lua read-stream.lua
-  fi
+  lua write-batch.lua
+  lua read-batch.lua
+  lua write-stream.lua
+  lua read-stream.lua
   popd
 }
 
diff --git a/ci/travis_script_cpp.sh b/ci/travis_script_cpp.sh
index b89e5b73bf00f..14529b03160f5 100755
--- a/ci/travis_script_cpp.sh
+++ b/ci/travis_script_cpp.sh
@@ -30,7 +30,7 @@ popd
 # Capture C++ coverage info (we wipe the build dir in travis_script_python.sh)
 if [ "$ARROW_TRAVIS_COVERAGE" == "1" ]; then
     pushd $TRAVIS_BUILD_DIR
-    lcov --quiet --directory . --capture --no-external --output-file $ARROW_CPP_COVERAGE_FILE \
-        2>&1 | grep -v "WARNING: no data found for /usr/include"
+    lcov --directory . --capture --no-external --output-file $ARROW_CPP_COVERAGE_FILE \
+        2>&1 | grep -v "ignoring data for external file"
     popd
 fi
diff --git a/ci/travis_script_gandiva_java.sh b/ci/travis_script_gandiva_java.sh
index 1f188e7e91dd4..387be9a092b98 100755
--- a/ci/travis_script_gandiva_java.sh
+++ b/ci/travis_script_gandiva_java.sh
@@ -24,12 +24,10 @@ JAVA_DIR=${TRAVIS_BUILD_DIR}/java
 
 pushd $JAVA_DIR
 
-export MAVEN_OPTS="$MAVEN_OPTS -Dorg.slf4j.simpleLogger.defaultLogLevel=warn"
-
 # build with gandiva profile
-mvn -P gandiva -B install -DskipTests -Dgandiva.cpp.build.dir=$CPP_BUILD_DIR/debug
+$TRAVIS_MVN -P gandiva -B install -DskipTests -Dgandiva.cpp.build.dir=$CPP_BUILD_DIR/debug
 
 # run gandiva tests
-mvn test -P gandiva -pl gandiva -Dgandiva.cpp.build.dir=$CPP_BUILD_DIR/debug
+$TRAVIS_MVN test -P gandiva -pl gandiva -Dgandiva.cpp.build.dir=$CPP_BUILD_DIR/debug
 
 popd
diff --git a/ci/travis_script_integration.sh b/ci/travis_script_integration.sh
index 286acacd74004..02e2eae81509c 100755
--- a/ci/travis_script_integration.sh
+++ b/ci/travis_script_integration.sh
@@ -28,7 +28,7 @@ export ARROW_CPP_EXE_PATH=$ARROW_CPP_BUILD_DIR/debug
 pushd $ARROW_JAVA_DIR
 
 echo "mvn package"
-mvn -B clean package 2>&1 > mvn_package.log || (cat mvn_package.log && false)
+$TRAVIS_MVN -B clean package 2>&1 > mvn_package.log || (cat mvn_package.log && false)
 
 popd
 
@@ -36,14 +36,14 @@ pushd $ARROW_JS_DIR
 
 # lint and compile JS source
 npm run lint
-npm run build
+npm run build -- -t apache-arrow
 
 popd
 
 pushd $ARROW_INTEGRATION_DIR
 
 CONDA_ENV_NAME=arrow-integration-test
-conda create -y -q -n $CONDA_ENV_NAME python=3.5
+conda create -y -q -n $CONDA_ENV_NAME python=3.6
 conda activate $CONDA_ENV_NAME
 
 # faster builds, please
@@ -52,7 +52,12 @@ conda install -y nomkl
 # Expensive dependencies install from Continuum package repo
 conda install -y pip numpy six
 
-python integration_test.py --debug
+# ARROW-4008: Create a directory to write temporary files since /tmp can be
+# unstable in Travis CI
+INTEGRATION_TEMPDIR=$TRAVIS_BUILD_DIR/integration_temp
+mkdir -p $INTEGRATION_TEMPDIR
+
+python integration_test.py --debug --tempdir=$INTEGRATION_TEMPDIR
 
 popd
 
diff --git a/ci/travis_script_java.sh b/ci/travis_script_java.sh
index 8a71fdc4d0064..201c336268792 100755
--- a/ci/travis_script_java.sh
+++ b/ci/travis_script_java.sh
@@ -19,15 +19,16 @@
 
 set -e
 
+source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
+
 JAVA_DIR=${TRAVIS_BUILD_DIR}/java
 pushd $JAVA_DIR
 
-export MAVEN_OPTS="$MAVEN_OPTS -Dorg.slf4j.simpleLogger.defaultLogLevel=warn"
-if [ $ARROW_TRAVIS_JAVA_BUILD_ONLY == "1" ]; then
+if [ "$ARROW_TRAVIS_JAVA_BUILD_ONLY" == "1" ]; then
     # Save time and make build less verbose by skipping tests and style checks
-    mvn -DskipTests=true -Dcheckstyle.skip=true -B install
+    $TRAVIS_MVN -DskipTests=true -Dcheckstyle.skip=true -B install
 else
-    mvn -B install
+    $TRAVIS_MVN -B install
 fi
 
 popd
diff --git a/ci/travis_script_javadoc.sh b/ci/travis_script_javadoc.sh
index ccfb2dab61d05..755d4628f205b 100755
--- a/ci/travis_script_javadoc.sh
+++ b/ci/travis_script_javadoc.sh
@@ -19,11 +19,13 @@
 
 set -e
 
+source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
+
 JAVA_DIR=${TRAVIS_BUILD_DIR}/java
 
 pushd $JAVA_DIR
 
 export MAVEN_OPTS="$MAVEN_OPTS -Dorg.slf4j.simpleLogger.defaultLogLevel=warn"
-mvn -B site
+$TRAVIS_MVN -B site
 
 popd
diff --git a/ci/travis_script_js.sh b/ci/travis_script_js.sh
index 1871b4265cd01..34b07115e70b1 100755
--- a/ci/travis_script_js.sh
+++ b/ci/travis_script_js.sh
@@ -23,9 +23,10 @@ source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
 
 pushd $ARROW_JS_DIR
 
-npm run lint
+npm run lint:ci
 npm run build
-# run the non-snapshot unit tests
 npm test
+npm run test:coverage
+bash <(curl -s https://codecov.io/bash) || echo "Codecov did not collect coverage reports"
 
 popd
diff --git a/ci/travis_script_plasma_java_client.sh b/ci/travis_script_plasma_java_client.sh
index 927a2391201c1..0b291ed32a56d 100755
--- a/ci/travis_script_plasma_java_client.sh
+++ b/ci/travis_script_plasma_java_client.sh
@@ -23,16 +23,15 @@ source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
 
 PLASMA_JAVA_DIR=${TRAVIS_BUILD_DIR}/java/plasma
 
-
 pushd $PLASMA_JAVA_DIR
 
-mvn clean install
+$TRAVIS_MVN clean install
 
 export LD_LIBRARY_PATH=${ARROW_CPP_INSTALL}/lib:$LD_LIBRARY_PATH
 export PLASMA_STORE=${ARROW_CPP_INSTALL}/bin/plasma_store_server
 
 ldd $PLASMA_STORE
 
-java -cp target/test-classes:target/classes -Djava.library.path=${TRAVIS_BUILD_DIR}/cpp-build/debug/ org.apache.arrow.plasma.PlasmaClientTest
+$TRAVIS_JAVA -cp target/test-classes:target/classes -Djava.library.path=${TRAVIS_BUILD_DIR}/cpp-build/debug/ org.apache.arrow.plasma.PlasmaClientTest
 
 popd
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index 608e1ce636524..27d75da74893e 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -32,40 +32,35 @@ PYARROW_PYTEST_FLAGS=" -r sxX --durations=15 --parquet"
 PYTHON_VERSION=$1
 CONDA_ENV_DIR=$TRAVIS_BUILD_DIR/pyarrow-test-$PYTHON_VERSION
 
-conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION cmake curl
-conda activate $CONDA_ENV_DIR
-
 # We should use zlib in the target Python directory to avoid loading
 # wrong libpython on macOS at run-time. If we use zlib in
 # $ARROW_BUILD_TOOLCHAIN and libpython3.6m.dylib exists in both
-# $ARROW_BUILD_TOOLCHAIN and $CONDA_ENV_DIR, python-test uses
+# $ARROW_BUILD_TOOLCHAIN and $CONDA_ENV_DIR, arrow-python-test uses
 # libpython3.6m.dylib on $ARROW_BUILD_TOOLCHAIN not $CONDA_ENV_DIR.
 # libpython3.6m.dylib on $ARROW_BUILD_TOOLCHAIN doesn't have NumPy. So
 # python-test fails.
 export ZLIB_HOME=$CONDA_ENV_DIR
 
-python --version
-which python
-
 if [ $ARROW_TRAVIS_PYTHON_JVM == "1" ]; then
   CONDA_JVM_DEPS="jpype1"
 fi
 
-conda install -y -q pip \
+conda create -y -q -p $CONDA_ENV_DIR \
+      --file $TRAVIS_BUILD_DIR/ci/conda_env_python.yml \
       nomkl \
-      cloudpickle \
-      numpy=1.13.1 \
-      ${CONDA_JVM_DEPS} \
-      pandas \
-      cython
+      pip \
+      numpy=1.14 \
+      python=${PYTHON_VERSION} \
+      ${CONDA_JVM_DEPS}
+
+conda activate $CONDA_ENV_DIR
+
+python --version
+which python
 
 if [ "$ARROW_TRAVIS_PYTHON_DOCS" == "1" ] && [ "$PYTHON_VERSION" == "3.6" ]; then
   # Install documentation dependencies
-  conda install -y -q \
-        ipython \
-        numpydoc \
-        sphinx=1.7.9 \
-        sphinx_rtd_theme
+  conda install -y --file ci/conda_env_sphinx.yml
 fi
 
 # ARROW-2093: PyTorch increases the size of our conda dependency stack
@@ -78,7 +73,7 @@ fi
 # fi
 
 if [ $TRAVIS_OS_NAME != "osx" ]; then
-  conda install -y -c conda-forge tensorflow
+  conda install -y tensorflow
   PYARROW_PYTEST_FLAGS="$PYARROW_PYTEST_FLAGS --tensorflow"
 fi
 
@@ -92,19 +87,23 @@ rm -rf *
 # XXX Can we simply reuse CMAKE_COMMON_FLAGS from travis_before_script_cpp.sh?
 CMAKE_COMMON_FLAGS="-DARROW_EXTRA_ERROR_CONTEXT=ON"
 
+PYTHON_CPP_BUILD_TARGETS="arrow_python-all plasma parquet"
+
 if [ $ARROW_TRAVIS_COVERAGE == "1" ]; then
   CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_GENERATE_COVERAGE=ON"
 fi
 
 if [ $ARROW_TRAVIS_PYTHON_GANDIVA == "1" ]; then
-  CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_GANDIVA=ON -DARROW_GANDIVA_BUILD_TESTS=OFF"
+  CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_GANDIVA=ON"
+  PYTHON_CPP_BUILD_TARGETS="$PYTHON_CPP_BUILD_TARGETS gandiva"
 fi
 
 cmake -GNinja \
       $CMAKE_COMMON_FLAGS \
-      -DARROW_BUILD_TESTS=on \
-      -DARROW_TEST_INCLUDE_LABELS=python \
-      -DARROW_BUILD_UTILITIES=off \
+      -DARROW_BUILD_TESTS=ON \
+      -DARROW_BUILD_UTILITIES=OFF \
+      -DARROW_OPTIONAL_INSTALL=ON \
+      -DARROW_PARQUET=on \
       -DARROW_PLASMA=on \
       -DARROW_TENSORFLOW=on \
       -DARROW_PYTHON=on \
@@ -113,19 +112,16 @@ cmake -GNinja \
       -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
       $ARROW_CPP_DIR
 
-ninja
+ninja $PYTHON_CPP_BUILD_TARGETS
 ninja install
 
 popd
 
 # python-test isn't run by travis_script_cpp.sh, exercise it here
-$ARROW_CPP_BUILD_DIR/$ARROW_BUILD_TYPE/python-test
+$ARROW_CPP_BUILD_DIR/$ARROW_BUILD_TYPE/arrow-python-test
 
 pushd $ARROW_PYTHON_DIR
 
-# Other stuff pip install
-pip install -q -r requirements.txt
-
 if [ "$PYTHON_VERSION" == "3.6" ]; then
     pip install -q pickle5
 fi
@@ -134,6 +130,9 @@ if [ "$ARROW_TRAVIS_COVERAGE" == "1" ]; then
     pip install -q coverage
 fi
 
+echo "=== pip list ==="
+pip list
+
 export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$ARROW_CPP_INSTALL/lib/pkgconfig
 
 export PYARROW_BUILD_TYPE=$ARROW_BUILD_TYPE
@@ -178,19 +177,21 @@ if [ "$ARROW_TRAVIS_COVERAGE" == "1" ]; then
     coverage report -i --include="*/_parquet.pyx"
     # Generate XML file for CodeCov
     coverage xml -i -o $TRAVIS_BUILD_DIR/coverage.xml
-    # Capture C++ coverage info and combine with previous coverage file
+    # Capture C++ coverage info
     pushd $TRAVIS_BUILD_DIR
-    lcov --quiet --directory . --capture --no-external --output-file coverage-python-tests.info \
-        2>&1 | grep -v "WARNING: no data found for /usr/include"
+    lcov --directory . --capture --no-external --output-file coverage-python-tests.info \
+        2>&1 | grep -v "ignoring data for external file"
     lcov --add-tracefile coverage-python-tests.info \
-        --add-tracefile $ARROW_CPP_COVERAGE_FILE \
         --output-file $ARROW_CPP_COVERAGE_FILE
     rm coverage-python-tests.info
     popd   # $TRAVIS_BUILD_DIR
 fi
 
 if [ "$ARROW_TRAVIS_PYTHON_DOCS" == "1" ] && [ "$PYTHON_VERSION" == "3.6" ]; then
-  cd doc
+  pushd ../cpp/apidoc
+  doxygen
+  popd
+  cd ../docs
   sphinx-build -q -b html -d _build/doctrees -W source _build/html
 fi
 
diff --git a/ci/travis_script_rust.sh b/ci/travis_script_rust.sh
index 02a32cdabe818..c25d64ec42cb6 100755
--- a/ci/travis_script_rust.sh
+++ b/ci/travis_script_rust.sh
@@ -19,6 +19,8 @@
 
 set -e
 
+source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
+
 RUST_DIR=${TRAVIS_BUILD_DIR}/rust
 
 pushd $RUST_DIR
@@ -26,22 +28,17 @@ pushd $RUST_DIR
 # show activated toolchain
 rustup show
 
-# check code formatting only for Rust nightly
-if [ $RUSTUP_TOOLCHAIN == "nightly" ]
-then
-  # raises on any formatting errors
-  rustup component add rustfmt-preview
-  cargo fmt --all -- --check
-fi
-
-# raises on any warnings
-cargo rustc -- -D warnings
+# raises on any formatting errors
+cargo +stable fmt --all -- --check
 
-cargo build
+RUSTFLAGS="-D warnings" cargo build
 cargo test
-cargo bench
+
+# run examples
+cd arrow
 cargo run --example builders
 cargo run --example dynamic_types
 cargo run --example read_csv
+cargo run --example read_csv_infer_schema
 
 popd
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 8436e65ba8076..e0dbcd305e92e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -16,13 +16,10 @@
 # under the License.
 
 cmake_minimum_required(VERSION 3.2)
+message(STATUS "Building using CMake version: ${CMAKE_VERSION}")
+
+set(ARROW_VERSION "0.13.0-SNAPSHOT")
 
-# Extract Arrow version number
-file(READ "${CMAKE_CURRENT_SOURCE_DIR}/../java/pom.xml" POM_XML)
-string(REGEX MATCHALL
-  "\n  <version>[^<]+</version>" ARROW_VERSION_TAG "${POM_XML}")
-string(REGEX REPLACE
-  "(\n  <version>|</version>)" "" ARROW_VERSION "${ARROW_VERSION_TAG}")
 string(REGEX MATCH
   "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}")
 
@@ -50,6 +47,8 @@ message(STATUS "Arrow version: "
   "${ARROW_VERSION_MAJOR}.${ARROW_VERSION_MINOR}.${ARROW_VERSION_PATCH} "
   "(full: '${ARROW_VERSION}')")
 
+set(ARROW_SOURCE_DIR ${PROJECT_SOURCE_DIR})
+set(ARROW_BINARY_DIR ${PROJECT_BINARY_DIR})
 
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules")
 
@@ -65,6 +64,12 @@ if(POLICY CMP0054)
   cmake_policy(SET CMP0054 NEW)
 endif()
 
+# don't ignore <PackageName>_ROOT variables in find_package
+if(POLICY CMP0074)
+  # https://cmake.org/cmake/help/v3.12/policy/CMP0074.html
+  cmake_policy(SET CMP0074 NEW)
+endif()
+
 set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support")
 
 set(CLANG_FORMAT_VERSION "6.0")
@@ -84,12 +89,6 @@ if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1" OR INFER_FOUND)
   set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
 endif()
 
-find_program(CCACHE_FOUND ccache)
-if(CCACHE_FOUND)
-  set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_FOUND})
-  set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND})
-endif(CCACHE_FOUND)
-
 # ----------------------------------------------------------------------
 # cmake options
 
@@ -114,44 +113,100 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
     "Run the test suite using valgrind --tool=memcheck"
     OFF)
 
-  option(ARROW_BUILD_TESTS
-    "Build the Arrow googletest unit tests"
+  option(ARROW_USE_ASAN
+    "Enable Address Sanitizer checks"
+    OFF)
+
+  option(ARROW_USE_CCACHE
+    "Use ccache when compiling (if available)"
     ON)
 
-  set(ARROW_TEST_LINKAGE "shared" CACHE STRING
-    "Linkage of Arrow libraries with unit tests executables. \
-static|shared (default shared)")
+  option(ARROW_USE_TSAN
+    "Enable Thread Sanitizer checks"
+    OFF)
 
-  set(ARROW_TEST_INCLUDE_LABELS "" CACHE STRING
-    "Only build unit tests having the indicated label or labels. \
-Pass multiple labels by dividing with semicolons")
+  option(ARROW_BUILD_TESTS
+    "Build the Arrow googletest unit tests, default OFF"
+    OFF)
 
   option(ARROW_BUILD_BENCHMARKS
-    "Build the Arrow micro benchmarks"
+    "Build the Arrow micro benchmarks, default OFF"
     OFF)
 
+  option(ARROW_BUILD_EXAMPLES
+    "Build the Arrow examples, default OFF"
+    OFF)
+
+  set(ARROW_TEST_LINKAGE "shared" CACHE STRING
+    "Linkage of Arrow libraries with unit tests executables. \
+static|shared (default shared)")
+
   option(ARROW_NO_DEPRECATED_API
     "Exclude deprecated APIs from build"
     OFF)
 
-  option(ARROW_COMPUTE
-    "Build the Arrow Compute Modules"
+  option(ARROW_FUZZING
+    "Build Arrow Fuzzing executables"
+    OFF)
+
+  # Disable this option to exercise non-SIMD fallbacks
+  option(ARROW_USE_SIMD
+    "Build with SIMD optimizations"
     ON)
 
-  option(ARROW_EXTRA_ERROR_CONTEXT
-    "Compile with extra error context (line numbers, code)"
+  option(ARROW_ALTIVEC
+    "Build Arrow with Altivec"
+    ON)
+
+  option(ARROW_BUILD_UTILITIES
+    "Build Arrow commandline utilities"
+    ON)
+
+  option(ARROW_RPATH_ORIGIN
+    "Build Arrow libraries with RATH set to \$ORIGIN"
+    OFF)
+
+  option(ARROW_INSTALL_NAME_RPATH
+    "Build Arrow libraries with install_name set to @rpath"
+    ON)
+
+  option(ARROW_GENERATE_COVERAGE
+    "Build with C++ code coverage enabled"
     OFF)
 
+  option(ARROW_VERBOSE_LINT
+    "If off, 'quiet' flags will be passed to linting tools"
+    OFF)
+
+  option(ARROW_GGDB_DEBUG
+    "Pass -ggdb flag to debug builds"
+    ON)
+
+  #----------------------------------------------------------------------
+  # Project components to enable / disable building
+
+  option(ARROW_COMPUTE
+    "Build the Arrow Compute Modules"
+    ON)
+
   option(ARROW_FLIGHT
     "Build the Arrow Flight RPC System (requires GRPC, Protocol Buffers)"
     OFF)
 
+  option(ARROW_GANDIVA
+    "Build the Gandiva libraries"
+    OFF)
+
+  option(ARROW_PARQUET
+    "Build the Parquet libraries"
+    OFF)
+
   option(ARROW_IPC
     "Build the Arrow IPC extensions"
     ON)
 
-  option(ARROW_GPU
-    "Build the Arrow GPU extensions (requires CUDA installation)"
+  option(ARROW_CUDA
+    "Build the Arrow CUDA extensions (requires CUDA toolkit)"
     OFF)
 
   option(ARROW_ORC
@@ -170,58 +225,49 @@ Pass multiple labels by dividing with semicolons")
     "Build the Arrow HDFS bridge"
     ON)
 
-  option(ARROW_BOOST_USE_SHARED
-    "Rely on boost shared libraries where relevant"
-    ON)
-
-  option(ARROW_BOOST_VENDORED
-    "Use vendored Boost instead of existing Boost"
-    OFF)
-
-  option(ARROW_PROTOBUF_USE_SHARED
-    "Rely on Protocol Buffers shared libraries where relevant"
-    OFF)
-
   option(ARROW_PYTHON
     "Build the Arrow CPython extensions"
     OFF)
 
-  option(ARROW_FUZZING
-    "Build Arrow Fuzzing executables"
+  option(ARROW_HIVESERVER2
+    "Build the HiveServer2 client and Arrow adapter"
     OFF)
 
-  # Disable this option to exercise non-SIMD fallbacks
-  option(ARROW_USE_SIMD
-    "Build with SIMD optimizations"
-    ON)
+  option(ARROW_PLASMA
+    "Build the plasma object store along with Arrow"
+    OFF)
 
-  option(ARROW_ALTIVEC
-    "Build Arrow with Altivec"
-    ON)
+  option(ARROW_PLASMA_JAVA_CLIENT
+    "Build the plasma object store java client"
+    OFF)
 
-  option(ARROW_BUILD_UTILITIES
-    "Build Arrow commandline utilities"
-    ON)
+  #----------------------------------------------------------------------
+  # Thirdparty toolchain options
 
-  option(ARROW_RPATH_ORIGIN
-    "Build Arrow libraries with RATH set to \$ORIGIN"
+  option(ARROW_VERBOSE_THIRDPARTY_BUILD
+    "If off, output from ExternalProjects will be logged to files rather than shown"
     OFF)
 
-  option(ARROW_INSTALL_NAME_RPATH
-    "Build Arrow libraries with install_name set to @rpath"
+  option(ARROW_BOOST_USE_SHARED
+    "Rely on boost shared libraries where relevant"
     ON)
 
-  option(ARROW_HIVESERVER2
-    "Build the HiveServer2 client and Arrow adapter"
+  option(ARROW_BOOST_VENDORED
+    "Use vendored Boost instead of existing Boost. \
+Note that this requires linking Boost statically"
     OFF)
 
-  option(ARROW_PLASMA
-    "Build the plasma object store along with Arrow"
+  option(ARROW_PROTOBUF_USE_SHARED
+    "Rely on Protocol Buffers shared libraries where relevant"
     OFF)
 
-  option(ARROW_PLASMA_JAVA_CLIENT
-    "Build the plasma object store java client"
-    OFF)
+  option(ARROW_WITH_BACKTRACE
+    "Build with backtrace support"
+    ON)
+
+  option(ARROW_USE_GLOG
+    "Build libraries with glog support for pluggable logging"
+    ON)
 
   option(ARROW_WITH_BROTLI
     "Build with Brotli compression"
@@ -253,21 +299,8 @@ Pass multiple labels by dividing with semicolons")
     "Build with zstd compression"
     ${ARROW_WITH_ZSTD_DEFAULT})
 
-  option(ARROW_GENERATE_COVERAGE
-    "Build with C++ code coverage enabled"
-    OFF)
-
-  option(ARROW_VERBOSE_THIRDPARTY_BUILD
-    "If off, output from ExternalProjects will be logged to files rather than shown"
-    OFF)
-
-  option(ARROW_VERBOSE_LINT
-    "If off, 'quiet' flags will be passed to linting tools"
-    OFF)
-
-  option(ARROW_USE_GLOG
-    "Build libraries with glog support for pluggable logging"
-    ON)
+  #----------------------------------------------------------------------
+  # Windows options
 
   if (MSVC)
     option(ARROW_USE_CLCACHE
@@ -276,8 +309,12 @@ Pass multiple labels by dividing with semicolons")
 
     set(BROTLI_MSVC_STATIC_LIB_SUFFIX "-static" CACHE STRING
       "Brotli static lib suffix used on Windows with MSVC (default -static)")
+    set(PROTOBUF_MSVC_STATIC_LIB_SUFFIX "" CACHE STRING
+      "Protobuf static lib suffix used on Windows with MSVC (default is empty string)")
+    set(RE2_MSVC_STATIC_LIB_SUFFIX "_static" CACHE STRING
+      "re2 static lib suffix used on Windows with MSVC (default is _static)")
     set(SNAPPY_MSVC_STATIC_LIB_SUFFIX "_static" CACHE STRING
-      "Snappy static lib suffix used on Windows with MSVC (default is empty string)")
+      "Snappy static lib suffix used on Windows with MSVC (default is _static)")
     set(LZ4_MSVC_STATIC_LIB_SUFFIX "_static" CACHE STRING
       "Lz4 static lib suffix used on Windows with MSVC (default _static)")
     set(ZSTD_MSVC_STATIC_LIB_SUFFIX "_static" CACHE STRING
@@ -288,10 +325,8 @@ Pass multiple labels by dividing with semicolons")
       OFF)
   endif()
 
-  # Parquet-related build options
-  option(ARROW_PARQUET
-    "Build the Parquet libraries"
-    OFF)
+  #----------------------------------------------------------------------
+  # Parquet build options
 
   option(PARQUET_MINIMAL_DEPENDENCY
     "Depend only on Thirdparty headers to build libparquet. \
@@ -306,9 +341,11 @@ Always OFF if building binaries"
     "Build the Parquet examples. Requires static libraries to be built."
     OFF)
 
-  # Gandiva related build options
-  option(ARROW_GANDIVA
-    "Build the Gandiva libraries"
+  #----------------------------------------------------------------------
+  # Gandiva build options
+
+  option(ARROW_GANDIVA_JAVA
+    "Build the Gandiva JNI wrappers"
     OFF)
 
   # ARROW-3860: Temporary workaround
@@ -316,16 +353,41 @@ Always OFF if building binaries"
     "Include -static-libstdc++ -static-libgcc when linking with Gandiva static libraries"
     OFF)
 
-  option(ARROW_GANDIVA_JAVA
-    "Build the Gandiva JNI wrappers"
-    ON)
+  set(ARROW_GANDIVA_PC_CXX_FLAGS "" CACHE STRING
+    "Compiler flags to append when pre-compiling Gandiva operations")
 
-  option(ARROW_GANDIVA_BUILD_TESTS
-    "Build the Gandiva googletest unit tests"
-    ON)
+  #----------------------------------------------------------------------
+  # Advanced developer options
 
+  option(ARROW_EXTRA_ERROR_CONTEXT
+    "Compile with extra error context (line numbers, code)"
+    OFF)
+
+  option(ARROW_OPTIONAL_INSTALL
+    "If enabled install ONLY targets that have already been built. Please be \
+advised that if this is enabled 'install' will fail silently on components \
+that have not been built"
+    OFF)
+endif()
+
+# Needed for linting targets, etc.
+find_package(PythonInterp)
+
+if (ARROW_USE_CCACHE)
+  find_program(CCACHE_FOUND ccache)
+  if(CCACHE_FOUND)
+    message(STATUS "Using ccache: ${CCACHE_FOUND}")
+    set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_FOUND})
+    set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND})
+  endif(CCACHE_FOUND)
 endif()
 
+if (ARROW_OPTIONAL_INSTALL)
+  # Don't make the "install" target depend on the "all" target
+  set(CMAKE_SKIP_INSTALL_ALL_DEPENDENCY true)
+
+  set(INSTALL_IS_OPTIONAL OPTIONAL)
+endif()
 
 ############################################################
 # "make lint" target
@@ -334,69 +396,68 @@ if (NOT ARROW_VERBOSE_LINT)
   set(ARROW_LINT_QUIET "--quiet")
 endif()
 
-if (UNIX)
+if (NOT LINT_EXCLUSIONS_FILE)
+  # source files matching a glob from a line in this file
+  # will be excluded from linting (cpplint, clang-tidy, clang-format)
+  set(LINT_EXCLUSIONS_FILE ${BUILD_SUPPORT_DIR}/lint_exclusions.txt)
+endif()
 
-  file(GLOB_RECURSE LINT_FILES
-    "${CMAKE_CURRENT_SOURCE_DIR}/src/*.h"
-    "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cc"
-    )
-
-  FOREACH(item ${LINT_FILES})
-    IF(NOT ((item MATCHES "_generated.h") OR
-            (item MATCHES "pyarrow_api.h") OR
-            (item MATCHES "pyarrow_lib.h") OR
-            (item MATCHES "xxhash.h") OR
-            (item MATCHES "xxhash.cc") OR
-            (item MATCHES "config.h") OR
-            (item MATCHES "util/date.h") OR
-            (item MATCHES "util/string_view/") OR
-            (item MATCHES "util/variant") OR
-            (item MATCHES "zmalloc.h") OR
-            (item MATCHES "ae.h")))
-      LIST(APPEND FILTERED_LINT_FILES ${item})
-    ENDIF()
-  ENDFOREACH(item ${LINT_FILES})
-
-  find_program(CPPLINT_BIN NAMES cpplint cpplint.py HINTS ${BUILD_SUPPORT_DIR})
-  message(STATUS "Found cpplint executable at ${CPPLINT_BIN}")
-
-  # Full lint
-  # Balancing act: cpplint.py takes a non-trivial time to launch,
-  # so process 12 files per invocation, while still ensuring parallelism
-  add_custom_target(lint echo ${FILTERED_LINT_FILES} | xargs -n12 -P8
-  ${CPPLINT_BIN}
-  --verbose=2 ${ARROW_LINT_QUIET}
-  --linelength=90
-  --filter=-whitespace/comments,-readability/todo,-build/header_guard,-build/c++11,-runtime/references,-build/include_order
-  )
-endif (UNIX)
+find_program(CPPLINT_BIN NAMES cpplint cpplint.py HINTS ${BUILD_SUPPORT_DIR})
+message(STATUS "Found cpplint executable at ${CPPLINT_BIN}")
+
+add_custom_target(lint
+  ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/run_cpplint.py
+  --cpplint_binary ${CPPLINT_BIN}
+  --exclude_globs ${LINT_EXCLUSIONS_FILE}
+  --source_dir ${CMAKE_CURRENT_SOURCE_DIR}/src
+  ${ARROW_LINT_QUIET})
 
 ############################################################
 # "make format" and "make check-format" targets
 ############################################################
-
-# runs clang format and updates files in place.
-add_custom_target(format ${BUILD_SUPPORT_DIR}/run_clang_format.py
-  ${CLANG_FORMAT_BIN}
-  ${BUILD_SUPPORT_DIR}/clang_format_exclusions.txt
-  ${CMAKE_CURRENT_SOURCE_DIR}/src --fix ${ARROW_LINT_QUIET})
-
-# runs clang format and exits with a non-zero exit code if any files need to be reformatted
-add_custom_target(check-format ${BUILD_SUPPORT_DIR}/run_clang_format.py
-   ${CLANG_FORMAT_BIN}
-   ${BUILD_SUPPORT_DIR}/clang_format_exclusions.txt
-   ${CMAKE_CURRENT_SOURCE_DIR}/src ${ARROW_LINT_QUIET})
+if (${CLANG_FORMAT_FOUND})
+  # runs clang format and updates files in place.
+  add_custom_target(format
+    ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/run_clang_format.py
+    --clang_format_binary ${CLANG_FORMAT_BIN}
+    --exclude_globs ${LINT_EXCLUSIONS_FILE}
+    --source_dir ${CMAKE_CURRENT_SOURCE_DIR}/src
+    --fix
+    ${ARROW_LINT_QUIET})
+
+  # runs clang format and exits with a non-zero exit code if any files need to be reformatted
+  add_custom_target(check-format
+    ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/run_clang_format.py
+    --clang_format_binary ${CLANG_FORMAT_BIN}
+    --exclude_globs ${LINT_EXCLUSIONS_FILE}
+    --source_dir ${CMAKE_CURRENT_SOURCE_DIR}/src
+    ${ARROW_LINT_QUIET})
+endif()
 
 ############################################################
 # "make clang-tidy" and "make check-clang-tidy" targets
 ############################################################
 if (${CLANG_TIDY_FOUND})
+  # TODO check to make sure .clang-tidy is being respected
+
   # runs clang-tidy and attempts to fix any warning automatically
-  add_custom_target(clang-tidy ${BUILD_SUPPORT_DIR}/run-clang-tidy.sh ${CLANG_TIDY_BIN} ${CMAKE_BINARY_DIR}/compile_commands.json 1
-  `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc | sed -e '/_generated/g'`)
+  add_custom_target(clang-tidy
+    ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/run_clang_tidy.py
+    --clang_tidy_binary ${CLANG_TIDY_BIN}
+    --exclude_globs ${LINT_EXCLUSIONS_FILE}
+    --compile_commands ${CMAKE_BINARY_DIR}/compile_commands.json
+    --source_dir ${CMAKE_CURRENT_SOURCE_DIR}/src
+    --fix
+    ${ARROW_LINT_QUIET})
+
   # runs clang-tidy and exits with a non-zero exit code if any errors are found.
-  add_custom_target(check-clang-tidy ${BUILD_SUPPORT_DIR}/run-clang-tidy.sh ${CLANG_TIDY_BIN} ${CMAKE_BINARY_DIR}/compile_commands.json
-  0 `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc |grep -v -F -f ${CMAKE_CURRENT_SOURCE_DIR}/src/.clang-tidy-ignore | sed -e '/_generated/g'`)
+  add_custom_target(check-clang-tidy
+    ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/run_clang_tidy.py
+    --clang_tidy_binary ${CLANG_TIDY_BIN}
+    --exclude_globs ${LINT_EXCLUSIONS_FILE}
+    --compile_commands ${CMAKE_BINARY_DIR}/compile_commands.json
+    --source_dir ${CMAKE_CURRENT_SOURCE_DIR}/src
+    ${ARROW_LINT_QUIET})
 endif()
 
 if (ARROW_ONLY_LINT)
@@ -409,11 +470,17 @@ endif()
 ############################################################
 
 if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
+  # Currently the compression tests require at least these libraries; bz2 and
+  # zstd are optional. See ARROW-3984
   set(ARROW_WITH_BROTLI ON)
   set(ARROW_WITH_LZ4 ON)
   set(ARROW_WITH_SNAPPY ON)
   set(ARROW_WITH_ZLIB ON)
-  set(ARROW_WITH_ZSTD ON)
+endif()
+
+if(ARROW_BUILD_TESTS)
+  # JSON parsing of arrays is required for Arrow unit tests
+  set(ARROW_IPC ON)
 endif()
 
 if(PARQUET_BUILD_EXAMPLES OR PARQUET_BUILD_EXECUTABLES)
@@ -436,20 +503,26 @@ endif()
 
 if(NOT ARROW_BUILD_TESTS)
   set(NO_TESTS 1)
+else()
+  add_custom_target(all-tests)
+  add_custom_target(unittest ctest -L unittest)
+  add_dependencies(unittest all-tests)
 endif()
 
 if(NOT ARROW_BUILD_BENCHMARKS)
   set(NO_BENCHMARKS 1)
+else()
+  add_custom_target(all-benchmarks)
+  add_custom_target(benchmark ctest -L benchmark)
+  add_dependencies(benchmark all-benchmarks)
 endif()
 
-if (NOT ARROW_FUZZING)
-  set(NO_FUZZING 1)
+if(NOT ARROW_BUILD_EXAMPLES)
+  set(NO_EXAMPLES 1)
 endif()
 
-if (ARROW_TENSORFLOW)
-  # TensorFlow uses the old GLIBCXX ABI, so we have to use it too
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
+if (NOT ARROW_FUZZING)
+  set(NO_FUZZING 1)
 endif()
 
 if (MSVC AND ARROW_USE_CLCACHE AND
@@ -482,8 +555,6 @@ include(SetupCxxFlags)
 # Dependencies
 ############################################################
 
-add_custom_target(arrow_dependencies)
-
 include(BuildUtils)
 enable_testing()
 
@@ -635,48 +706,65 @@ endif(UNIX)
 ############################################################
 
 set(ARROW_LINK_LIBS)
+set(ARROW_SHARED_INSTALL_INTERFACE_LIBS)
+set(ARROW_STATIC_INSTALL_INTERFACE_LIBS)
 
 # Libraries to link statically with libarrow.so
 set(ARROW_STATIC_LINK_LIBS double-conversion_static)
+set(ARROW_STATIC_INSTALL_INTERFACE_LIBS double-conversion)
 
 if (ARROW_WITH_BROTLI)
-  SET(ARROW_STATIC_LINK_LIBS
+  list(APPEND
+    ARROW_STATIC_LINK_LIBS
     brotli_dec_static
     brotli_enc_static
-    brotli_common_static
-    ${ARROW_STATIC_LINK_LIBS})
+    brotli_common_static)
+  list(APPEND
+    ARROW_STATIC_INSTALL_INTERFACE_LIBS
+    brotlidec
+    brotlienc
+    brotlicommon)
 endif()
 
 if (ARROW_WITH_BZ2)
-  SET(ARROW_STATIC_LINK_LIBS bz2_static ${ARROW_STATIC_LINK_LIBS})
+  list(APPEND ARROW_STATIC_LINK_LIBS bz2_static)
+  list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS bz2)
 endif()
 
 if (ARROW_WITH_LZ4)
-  SET(ARROW_STATIC_LINK_LIBS lz4_static ${ARROW_STATIC_LINK_LIBS})
+  list(APPEND ARROW_STATIC_LINK_LIBS lz4_static)
+  list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS lz4)
 endif()
 
 if (ARROW_WITH_SNAPPY)
-  SET(ARROW_STATIC_LINK_LIBS snappy_static ${ARROW_STATIC_LINK_LIBS})
+  list(APPEND ARROW_STATIC_LINK_LIBS snappy_static)
+  list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS snappy)
 endif()
 
 if (ARROW_WITH_ZLIB)
-  SET(ARROW_STATIC_LINK_LIBS ${ZLIB_LIBRARY} ${ARROW_STATIC_LINK_LIBS})
+  list(APPEND ARROW_STATIC_LINK_LIBS ${ZLIB_LIBRARY})
+  list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS z)
 endif()
 
 if (ARROW_WITH_ZSTD)
-  SET(ARROW_STATIC_LINK_LIBS zstd_static ${ARROW_STATIC_LINK_LIBS})
+  list(APPEND ARROW_STATIC_LINK_LIBS zstd_static)
+  list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS zstd)
 endif()
 
 if (ARROW_ORC)
-  SET(ARROW_STATIC_LINK_LIBS
-    ${ARROW_STATIC_LINK_LIBS}
-    orc_static)
+  list(APPEND ARROW_STATIC_LINK_LIBS orc_static)
+  list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS orc)
 endif()
 
 if (ARROW_USE_GLOG)
-  SET(ARROW_STATIC_LINK_LIBS glog_static ${ARROW_STATIC_LINK_LIBS})
+  list(APPEND ARROW_STATIC_LINK_LIBS glog_static)
+  list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS glog)
+  add_definitions("-DARROW_USE_GLOG")
 endif()
 
+add_custom_target(arrow_dependencies)
+add_dependencies(arrow_dependencies toolchain)
+
 if (ARROW_STATIC_LINK_LIBS)
   add_dependencies(arrow_dependencies ${ARROW_STATIC_LINK_LIBS})
 endif()
@@ -687,15 +775,24 @@ set(ARROW_SHARED_PRIVATE_LINK_LIBS
   ${BOOST_FILESYSTEM_LIBRARY}
   ${BOOST_REGEX_LIBRARY})
 
-set(ARROW_STATIC_LINK_LIBS
-  ${ARROW_STATIC_LINK_LIBS}
+list(APPEND
+  ARROW_STATIC_LINK_LIBS
   ${BOOST_SYSTEM_LIBRARY}
   ${BOOST_FILESYSTEM_LIBRARY}
   ${BOOST_REGEX_LIBRARY})
 
+list(APPEND
+  ARROW_STATIC_INSTALL_INTERFACE_LIBS
+  boost_system
+  boost_filesystem
+  boost_regex)
+
 if (NOT MSVC)
-  set(ARROW_LINK_LIBS
-    ${ARROW_LINK_LIBS}
+  list(APPEND
+    ARROW_LINK_LIBS
+    ${CMAKE_DL_LIBS})
+  list(APPEND
+    ARROW_SHARED_INSTALL_INTERFACE_LIBS
     ${CMAKE_DL_LIBS})
 endif()
 
@@ -703,8 +800,9 @@ set(ARROW_TEST_STATIC_LINK_LIBS
   arrow_testing_static
   arrow_static
   ${ARROW_LINK_LIBS}
-  gtest_main_static
-  gtest_static)
+  ${GTEST_LIBRARY}
+  ${GMOCK_MAIN_LIBRARY}
+  ${GMOCK_LIBRARY})
 
 set(ARROW_TEST_SHARED_LINK_LIBS
   arrow_testing_shared
@@ -714,8 +812,9 @@ set(ARROW_TEST_SHARED_LINK_LIBS
   ${BOOST_SYSTEM_LIBRARY}
   ${BOOST_FILESYSTEM_LIBRARY}
   ${BOOST_REGEX_LIBRARY}
-  gtest_main_static
-  gtest_static)
+  ${GTEST_LIBRARY}
+  ${GMOCK_MAIN_LIBRARY}
+  ${GMOCK_LIBRARY})
 
 if(NOT MSVC)
   set(ARROW_TEST_SHARED_LINK_LIBS
@@ -723,19 +822,21 @@ if(NOT MSVC)
     ${CMAKE_DL_LIBS})
 endif()
 
-if ("${ARROW_TEST_LINKAGE}" STREQUAL "shared")
+if (ARROW_BUILD_TESTS AND "${ARROW_TEST_LINKAGE}" STREQUAL "shared")
   if (NOT ARROW_BUILD_SHARED)
     message(FATAL_ERROR "If using shared linkage for unit tests, must also \
 pass ARROW_BUILD_SHARED=on")
   endif()
   # Use shared linking for unit tests if it's available
   set(ARROW_TEST_LINK_LIBS ${ARROW_TEST_SHARED_LINK_LIBS})
+  set(ARROW_EXAMPLE_LINK_LIBS arrow_shared)
 else()
   if (NOT ARROW_BUILD_STATIC)
     message(FATAL_ERROR "If using static linkage for unit tests, must also \
 pass ARROW_BUILD_STATIC=on")
   endif()
   set(ARROW_TEST_LINK_LIBS ${ARROW_TEST_STATIC_LINK_LIBS})
+  set(ARROW_EXAMPLE_LINK_LIBS arrow_static)
 endif()
 
 if (ARROW_BUILD_BENCHMARKS)
@@ -744,40 +845,28 @@ if (ARROW_BUILD_BENCHMARKS)
     ${ARROW_TEST_LINK_LIBS})
 endif()
 
+set(ARROW_SYSTEM_LINK_LIBS)
+
 if (ARROW_JEMALLOC)
   add_definitions(-DARROW_JEMALLOC)
   add_definitions(-DARROW_JEMALLOC_INCLUDE_DIR=${JEMALLOC_INCLUDE_DIR})
-
-  if (NOT WIN32 AND NOT APPLE)
-    set(ARROW_JEMALLOC_LINK_LIBS
-      jemalloc_static
-      # For glibc <2.17 we need to link to librt.
-      # As we compile with --as-needed by default, the linker will omit this
-      # dependency if not required.
-      rt
-      )
-  else()
-    set(ARROW_JEMALLOC_LINK_LIBS
-      jemalloc_static
-      )
-  endif()
-  set(ARROW_SHARED_PRIVATE_LINK_LIBS
-    ${ARROW_SHARED_PRIVATE_LINK_LIBS}
-    ${ARROW_JEMALLOC_LINK_LIBS})
-  set(ARROW_STATIC_LINK_LIBS
-    ${ARROW_STATIC_LINK_LIBS}
-    ${ARROW_JEMALLOC_LINK_LIBS})
+  list(APPEND ARROW_SYSTEM_LINK_LIBS jemalloc_static)
 endif(ARROW_JEMALLOC)
 
-if (PTHREAD_LIBRARY)
-  set(ARROW_LINK_LIBS
-    ${ARROW_LINK_LIBS}
-    pthreadshared)
-  set(ARROW_STATIC_LINK_LIBS
-    ${ARROW_STATIC_LINK_LIBS}
-    pthreadshared)
+if (THREADS_FOUND)
+  list(APPEND ARROW_SYSTEM_LINK_LIBS Threads::Threads)
 endif()
 
+if (NOT WIN32 AND NOT APPLE)
+  # Pass -lrt on Linux only
+  list(APPEND ARROW_SYSTEM_LINK_LIBS rt)
+endif()
+
+list(APPEND ARROW_LINK_LIBS ${ARROW_SYSTEM_LINK_LIBS})
+list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_SYSTEM_LINK_LIBS})
+list(APPEND ARROW_SHARED_INSTALL_INTERFACE_LIBS ${ARROW_SYSTEM_LINK_LIBS})
+list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_SYSTEM_LINK_LIBS})
+
 ############################################################
 # Subdirectories
 ############################################################
@@ -788,28 +877,23 @@ endif()
 
 add_subdirectory(src/arrow)
 
-if(ARROW_FLIGHT)
-  add_subdirectory(src/arrow/flight)
-endif()
-
-if(ARROW_PYTHON)
-  add_subdirectory(src/arrow/python)
-endif()
-
-if(ARROW_HIVESERVER2)
-  add_subdirectory(src/arrow/dbi/hiveserver2)
-endif()
-
 if(ARROW_PARQUET)
   add_subdirectory(src/parquet)
   add_subdirectory(tools/parquet)
-  add_subdirectory(examples/parquet/low-level-api)
+  if (PARQUET_BUILD_EXAMPLES)
+    add_subdirectory(examples/parquet)
+  endif()
 endif()
 
 if(ARROW_GANDIVA)
   add_subdirectory(src/gandiva)
 endif()
 
+if(ARROW_BUILD_EXAMPLES)
+  add_custom_target(runexample ctest -L example)
+  add_subdirectory(examples/arrow)
+endif()
+
 include(CMakePackageConfigHelpers)
 
 # Makes the project importable from the build directory
diff --git a/cpp/Dockerfile b/cpp/Dockerfile
index 4ec8f0f3bf183..17d332d22bed3 100644
--- a/cpp/Dockerfile
+++ b/cpp/Dockerfile
@@ -18,35 +18,41 @@
 FROM ubuntu:18.04
 
 # install build essentials
-RUN apt-get update -y -q && \
+RUN export DEBIAN_FRONTEND=noninteractive && \
+    apt-get update -y -q && \
     apt-get install -y -q --no-install-recommends \
-      autoconf \
-      automake \
-      ca-certificates \
-      ccache \
-      g++ \
-      gcc \
-      git \
-      ninja-build \
-      pkg-config \
-      wget
+        ca-certificates \
+        ccache \
+        g++ \
+        gcc \
+        git \
+        ninja-build \
+        pkg-config \
+        tzdata \
+        wget
 
 # install conda and required packages
+ARG EXTRA_CONDA_PKGS
 ENV PATH=/opt/conda/bin:$PATH \
     CONDA_PREFIX=/opt/conda
 ADD ci/docker_install_conda.sh \
     ci/conda_env_cpp.yml \
+    ci/conda_env_unix.yml \
     /arrow/ci/
 RUN arrow/ci/docker_install_conda.sh && \
-    conda install -c conda-forge \
-        --file arrow/ci/conda_env_cpp.yml && \
+    conda install -q -c conda-forge \
+        --file arrow/ci/conda_env_cpp.yml \
+        --file arrow/ci/conda_env_unix.yml \
+        $EXTRA_CONDA_PKGS && \
     conda clean --all
 
 ENV CC=gcc \
     CXX=g++ \
-    ARROW_BUILD_TESTS=ON
+    ARROW_BUILD_TESTS=ON \
+    ARROW_BUILD_TOOLCHAIN=$CONDA_PREFIX \
+    ARROW_HOME=$CONDA_PREFIX \
+    PARQUET_HOME=$CONDA_PREFIX
 
 # build and test
 CMD arrow/ci/docker_build_cpp.sh && \
-    cd /build/cpp && \
-    ctest -j2 --output-on-failure -L unittest
+    cd /build/cpp && ctest -j2 --output-on-failure -L unittest
diff --git a/cpp/Dockerfile.alpine b/cpp/Dockerfile.alpine
new file mode 100644
index 0000000000000..3c412e613bc2c
--- /dev/null
+++ b/cpp/Dockerfile.alpine
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM alpine
+
+# install dependencies
+RUN apk add --no-cache -q \
+        autoconf \
+        bash \
+        bison \
+        boost-dev \
+        cmake \
+        flex \
+        g++ \
+        gcc \
+        git \
+        gzip \
+        make \
+        musl-dev \
+        ninja \
+        wget \
+        zlib-dev
+
+ENV CC=gcc \
+    CXX=g++ \
+    ARROW_ORC=OFF \
+    ARROW_PARQUET=OFF \
+    ARROW_BUILD_TESTS=ON \
+    ARROW_HOME=/usr/local
+
+# build and test
+CMD arrow/ci/docker_build_cpp.sh && \
+    cd /build/cpp && ctest -j2 --output-on-failure -L unittest
diff --git a/cpp/README.md b/cpp/README.md
index fcf913723974b..7312a31f23779 100644
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -30,16 +30,41 @@ in-source and out-of-source builds with the latter one being preferred.
 Building Arrow requires:
 
 * A C++11-enabled compiler. On Linux, gcc 4.8 and higher should be sufficient.
-* CMake
+* CMake 3.2 or higher
 * Boost
+* Bison/flex (for building Apache Thrift from source only,
+a parquet dependency.)
+
+Testing arrow with ctest requires:
+
+* python
 
 On Ubuntu/Debian you can install the requirements with:
 
 ```shell
-sudo apt-get install cmake \
+sudo apt-get install \
+     autoconf \
+     build-essential \
+     cmake \
      libboost-dev \
      libboost-filesystem-dev \
-     libboost-system-dev
+     libboost-regex-dev \
+     libboost-system-dev \
+     python \
+     bison \
+     flex
+```
+
+On Alpine Linux:
+
+```shell
+apk add autoconf \
+        bash \
+        boost-dev \
+        cmake \
+        g++ \
+        gcc \
+        make
 ```
 
 On macOS, you can use [Homebrew][1]:
@@ -54,25 +79,29 @@ If you are developing on Windows, see the [Windows developer guide][2].
 
 ## Building Arrow
 
-Simple debug build:
+Simple release build:
 
     git clone https://github.com/apache/arrow.git
     cd arrow/cpp
-    mkdir debug
-    cd debug
-    cmake ..
+    mkdir release
+    cd release
+    cmake -DARROW_BUILD_TESTS=ON  ..
     make unittest
 
-Simple release build:
+Simple debug build:
 
     git clone https://github.com/apache/arrow.git
     cd arrow/cpp
-    mkdir release
-    cd release
-    cmake .. -DCMAKE_BUILD_TYPE=Release
+    mkdir debug
+    cd debug
+    cmake -DCMAKE_BUILD_TYPE=Debug -DARROW_BUILD_TESTS=ON ..
     make unittest
 
-Detailed unit test logs will be placed in the build directory under `build/test-logs`.
+If you do not need to build the test suite, you can omit the
+`ARROW_BUILD_TESTS` option (the default is not to build the unit tests).
+
+Detailed unit test logs will be placed in the build directory under
+`build/test-logs`.
 
 On some Linux distributions, running the test suite might require setting an
 explicit locale. If you see any locale-related errors, try setting the
@@ -82,7 +111,35 @@ environment variable (which requires the `locales` package or equivalent):
 export LC_ALL="en_US.UTF-8"
 ```
 
-## Building and Developing Parquet Libraries
+## Modular Build Targets
+
+Since there are several major parts of the C++ project, we have provided
+modular CMake targets for building each library component, group of unit tests
+and benchmarks, and their dependencies:
+
+* `make arrow` for Arrow core libraries
+* `make parquet` for Parquet libraries
+* `make gandiva` for Gandiva (LLVM expression compiler) libraries
+* `make plasma` for Plasma libraries, server
+
+To build the unit tests or benchmarks, add `-tests` or `-benchmarks` to the
+target name. So `make arrow-tests` will build the Arrow core unit tests. Using
+the `-all` target, e.g. `parquet-all`, will build everything.
+
+If you wish to only build and install one or more project subcomponents, we
+have provided the CMake option `ARROW_OPTIONAL_INSTALL` to only install targets
+that have been built. For example, if you only wish to build the Parquet
+libraries, its tests, and its dependencies, you can run:
+
+```
+cmake .. -DARROW_PARQUET=ON -DARROW_OPTIONAL_INSTALL=ON -DARROW_BUILD_TESTS=ON
+make parquet
+make install
+```
+
+If you omit an explicit target when invoking `make`, all targets will be built.
+
+## Parquet Development Notes
 
 To build the C++ libraries for Apache Parquet, add the flag
 `-DARROW_PARQUET=ON` when invoking CMake. The Parquet libraries and unit tests
@@ -117,10 +174,10 @@ not use the macro.
 Follow the directions for simple build except run cmake
 with the `--ARROW_BUILD_BENCHMARKS` parameter set correctly:
 
-    cmake -DARROW_BUILD_BENCHMARKS=ON ..
+    cmake -DARROW_BUILD_TESTS=ON -DARROW_BUILD_BENCHMARKS=ON ..
 
 and instead of make unittest run either `make; ctest` to run both unit tests
-and benchmarks or `make runbenchmark` to run only the benchmark tests.
+and benchmarks or `make benchmark` to run only the benchmark tests.
 
 Benchmark logs will be placed in the build directory under `build/benchmark-logs`.
 
@@ -204,13 +261,11 @@ The Python library must be built against the same Python version for which you
 are building pyarrow, e.g. Python 2.7 or Python 3.6. NumPy must also be
 installed.
 
-### Building GPU extension library (optional)
+### Building CUDA extension library (optional)
 
-The optional `arrow_gpu` shared library can be built by passing
-`-DARROW_GPU=on`. This requires a CUDA installation to build, and to use many
-of the functions you must have a functioning GPU. Currently only CUDA
-functionality is supported, though if there is demand we can also add OpenCL
-interfaces in this library as needed.
+The optional `arrow_cuda` shared library can be built by passing
+`-DARROW_CUDA=on`. This requires a CUDA installation to build, and to use many
+of the functions you must have a functioning CUDA-compatible GPU.
 
 The CUDA toolchain used to build the library can be customized by using the
 `$CUDA_HOME` environment variable.
@@ -252,7 +307,7 @@ The optional `gandiva` libraries and tests can be built by passing
 `-DARROW_GANDIVA=on`.
 
 ```shell
-cmake .. -DARROW_GANDIVA=on
+cmake .. -DARROW_GANDIVA=ON -DARROW_BUILD_TESTS=ON
 make
 ctest -L gandiva
 ```
@@ -260,6 +315,55 @@ ctest -L gandiva
 This library is still in Alpha stages, and subject to API changes without
 deprecation warnings.
 
+### Building and developing Flight (optional)
+
+In addition to the Arrow dependencies, Flight requires:
+* gRPC (>= 1.14, roughly)
+* Protobuf (>= 3.6, earlier versions may work)
+* c-ares (used by gRPC)
+
+By default, Arrow will try to download and build these dependencies
+when building Flight.
+
+The optional `flight` libraries and tests can be built by passing
+`-DARROW_FLIGHT=ON`.
+
+```shell
+cmake .. -DARROW_FLIGHT=ON -DARROW_BUILD_TESTS=ON
+make
+```
+
+You can also use existing installations of the extra dependencies.
+When building, set the environment variables `GRPC_HOME` and/or
+`PROTOBUF_HOME` and/or `CARES_HOME`.
+
+You may try using system libraries for gRPC and Protobuf, but these
+are likely to be too old.
+
+On Ubuntu/Debian, you can try:
+
+```shell
+sudo apt-get install libgrpc-dev libgrpc++-dev protobuf-compiler-grpc libc-ares-dev
+```
+
+Note that the version of gRPC in Ubuntu 18.10 is too old; you will
+have to install gRPC from source. (Ubuntu 19.04/Debian Sid may work.)
+
+On macOS, you can try [Homebrew][1]:
+
+```shell
+brew install grpc
+```
+
+You can also install gRPC from source. In this case, you must install
+gRPC to generate the necessary files for CMake to find gRPC:
+
+```shell
+cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DgRPC_PROTOBUF_PROVIDER=package -DgRPC_ZLIB_PROVIDER=package -DgRPC_CARES_PROVIDER=package -DgRPC_SSL_PROVIDER=package
+```
+
+You can then specify `-DgRPC_DIR` to `cmake`.
+
 ### API documentation
 
 To generate the (html) API documentation, run the following command in the apidoc
@@ -271,9 +375,13 @@ This requires [Doxygen](http://www.doxygen.org) to be installed.
 
 ## Development
 
-This project follows [Google's C++ Style Guide][3] with minor exceptions. We do
-not encourage anonymous namespaces and we relax the line length restriction to
-90 characters.
+This project follows [Google's C++ Style Guide][3] with minor exceptions:
+
+  *  We relax the line length restriction to 90 characters.
+  *  We use the NULLPTR macro defined in `src/arrow/util/macros.h` to
+     support building C++/CLI (ARROW-1134)
+  *  We use doxygen style comments ("///") instead of line comments ("//")
+     in header files.
 
 ### Memory Pools
 
@@ -283,6 +391,12 @@ which use the default pool without explicitly passing it. You can disable these
 constructors in your application (so that you are accounting properly for all
 memory allocations) by defining `ARROW_NO_DEFAULT_MEMORY_POOL`.
 
+### Header files
+
+We use the `.h` extension for C++ header files. Any header file name not
+containing `internal` is considered to be a public header, and will be
+automatically installed by the build.
+
 ### Error Handling and Exceptions
 
 For error handling, we use `arrow::Status` values instead of throwing C++
@@ -376,6 +490,12 @@ You may find the required packages at http://releases.llvm.org/download.html
 or use the Debian/Ubuntu APT repositories on https://apt.llvm.org/. On macOS
 with [Homebrew][1] you can get it via `brew install llvm@6`.
 
+Depending on how you installed clang-format, the build system may not be able
+to find it. You can provide an explicit path to your LLVM installation (or the
+root path for the clang tools) with the environment variable
+`$CLANG_TOOLS_PATH` or by passing `-DClangTools_PATH=$PATH_TO_CLANG_TOOLS` when
+invoking CMake.
+
 ## Checking for ABI and API stability
 
 To build ABI compliance reports, you need to install the two tools
@@ -429,6 +549,14 @@ both of these options would be used rarely. Current known uses-cases when they a
 
 *  Parameterized tests in google test.
 
+## CMake version requirements
+
+We support CMake 3.2 and higher. Some features require a newer version of CMake:
+
+* Building the benchmarks requires 3.6 or higher
+* Building zstd from source requires 3.7 or higher
+* Building Gandiva JNI bindings requires 3.11 or higher
+
 [1]: https://brew.sh/
 [2]: https://github.com/apache/arrow/blob/master/cpp/apidoc/Windows.md
 [3]: https://google.github.io/styleguide/cppguide.html
diff --git a/cpp/apidoc/Doxyfile b/cpp/apidoc/Doxyfile
index 3ec9af9262622..38ce17fb810cc 100644
--- a/cpp/apidoc/Doxyfile
+++ b/cpp/apidoc/Doxyfile
@@ -518,7 +518,7 @@ HIDE_UNDOC_CLASSES     = NO
 # included in the documentation.
 # The default value is: NO.
 
-HIDE_FRIEND_COMPOUNDS  = NO
+HIDE_FRIEND_COMPOUNDS  = YES
 
 # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
 # documentation blocks found inside the body of a function. If set to NO, these
@@ -741,7 +741,7 @@ CITE_BIB_FILES         =
 # messages are off.
 # The default value is: NO.
 
-QUIET                  = NO
+QUIET                  = YES
 
 # The WARNINGS tag can be used to turn on/off the warning messages that are
 # generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
@@ -779,7 +779,7 @@ WARN_NO_PARAMDOC       = NO
 # a warning is encountered.
 # The default value is: NO.
 
-WARN_AS_ERROR          = NO
+WARN_AS_ERROR          = YES
 
 # The WARN_FORMAT tag determines the format of the warning messages that doxygen
 # can produce. The string should contain the $file, $line, and $text tags, which
@@ -858,7 +858,7 @@ RECURSIVE              = YES
 # Note that relative paths are relative to the directory from which doxygen is
 # run.
 
-EXCLUDE =
+EXCLUDE                = ../src/arrow/vendored
 
 # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
 # directories that are symbolic links (a Unix file system feature) are excluded
@@ -1919,7 +1919,7 @@ MAN_LINKS              = NO
 # captures the structure of the code including all documentation.
 # The default value is: NO.
 
-GENERATE_XML           = NO
+GENERATE_XML           = YES
 
 # The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
 # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
@@ -2075,7 +2075,8 @@ INCLUDE_FILE_PATTERNS  =
 PREDEFINED = __attribute__(x)= \
              __declspec(x)= \
              ARROW_EXPORT= \
-             ARROW_EXTERN_TEMPLATE=
+             ARROW_EXTERN_TEMPLATE= \
+             ARROW_DEPRECATED(x)=
 
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
 # tag can be used to specify a list of macro names that should be expanded. The
diff --git a/cpp/apidoc/Windows.md b/cpp/apidoc/Windows.md
index 5199c2fdbfa59..8a724d0342be7 100644
--- a/cpp/apidoc/Windows.md
+++ b/cpp/apidoc/Windows.md
@@ -38,10 +38,11 @@ Launch cmd.exe and run following commands:
 conda config --add channels conda-forge
 ```
 
-Now, you can bootstrap a build environment
+Now, you can bootstrap a build environment (call from the root directory of the
+Arrow codebase):
 
 ```shell
-conda create -n arrow-dev cmake git boost-cpp flatbuffers rapidjson cmake thrift-cpp snappy zlib brotli gflags lz4-c zstd -c conda-forge
+conda create -n arrow-dev --file=ci\conda_env_cpp.yml
 ```
 
 > **Note:** Make sure to get the `conda-forge` build of `gflags` as the
diff --git a/cpp/apidoc/index.md b/cpp/apidoc/index.md
index 46ee5003678dd..076c29726b994 100644
--- a/cpp/apidoc/index.md
+++ b/cpp/apidoc/index.md
@@ -38,63 +38,5 @@ Table of Contents
  * Instructions on how to build Arrow C++ on [Windows](Windows.md)
  * How to access [HDFS](HDFS.md)
  * Tutorials
-   * [Convert a vector of row-wise data into an Arrow table](tutorials/row_wise_conversion.md)
    * [Using the Plasma In-Memory Object Store](tutorials/plasma.md)
    * [Use Plasma to Access Tensors from C++ in Python](tutorials/tensor_to_py.md)
-
-Getting Started
----------------
-
-The most basic structure in Arrow is an `arrow::Array`. It holds a sequence
-of values with known length all having the same type. It consists of the data
-itself and an additional bitmap that indicates if the corresponding entry of
-array is a null-value. Note that for array with zero null entries, we can omit
-this bitmap.
-
-As Arrow objects are immutable, there are classes provided that should help you
-build these objects. To build an array of `int64_t` elements, we can use the
-`arrow::Int64Builder`. In the following example, we build an array of the range
-1 to 8 where the element that should hold the number 4 is nulled.
-
-    Int64Builder builder;
-    builder.Append(1);
-    builder.Append(2);
-    builder.Append(3);
-    builder.AppendNull();
-    builder.Append(5);
-    builder.Append(6);
-    builder.Append(7);
-    builder.Append(8);
-
-    std::shared_ptr<Array> array;
-    builder.Finish(&array);
-
-The resulting Array (which can be casted to `arrow::Int64Array` if you want
-to access its values) then consists of two `arrow::Buffer`. The first one is
-the null bitmap holding a single byte with the bits `0|0|0|0|1|0|0|0`.
-As we use [least-significant bit (LSB) numbering](https://en.wikipedia.org/wiki/Bit_numbering)
-this indicates that the fourth entry in the array is null. The second
-buffer is simply an `int64_t` array containing all the above values.
-As the fourth entry is null, the value at that position in the buffer is
-undefined.
-
-    // Cast the Array to its actual type to access its data
-    std::shared_ptr<Int64Array> int64_array = std::static_pointer_cast<Int64Array>(array);
-
-    // Get the pointer to the null bitmap.
-    const uint8_t* null_bitmap = int64_array->null_bitmap_data();
-
-    // Get the pointer to the actual data
-    const int64_t* data = int64_array->raw_values();
-
-In the above example, we have yet skipped explaining two things in the code.
-On constructing the builder, we have passed `arrow::int64()` to it. This is
-the type information with which the resulting array will be annotated. In
-this simple form, it is solely a `std::shared_ptr<arrow::Int64Type>`
-instantiation.
-
-Furthermore, we have passed `arrow::default_memory_pool()` to the constructor.
-This `arrow::MemoryPool` is used for the allocations of heap memory. Besides
-tracking the amount of memory allocated, the allocator also ensures that the
-allocated memory regions are 64-byte aligned (as required by the Arrow
-specification).
diff --git a/cpp/apidoc/tutorials/plasma.md b/cpp/apidoc/tutorials/plasma.md
index 472d479c4b2f9..40c5a10603e71 100644
--- a/cpp/apidoc/tutorials/plasma.md
+++ b/cpp/apidoc/tutorials/plasma.md
@@ -80,7 +80,7 @@ using namespace plasma;
 int main(int argc, char** argv) {
   // Start up and connect a Plasma client.
   PlasmaClient client;
-  ARROW_CHECK_OK(client.Connect("/tmp/plasma", ""));
+  ARROW_CHECK_OK(client.Connect("/tmp/plasma"));
   // Disconnect the Plasma client.
   ARROW_CHECK_OK(client.Disconnect());
 }
@@ -182,7 +182,7 @@ was written by the `Create` command.
 int64_t data_size = 100;
 // The address of the buffer allocated by the Plasma store will be written at
 // this address.
-uint8_t* data;
+std::shared_ptr<Buffer> data;
 // Create a Plasma object by specifying its ID and size.
 ARROW_CHECK_OK(client.Create(object_id, data_size, NULL, 0, &data));
 ```
@@ -194,7 +194,7 @@ metadata (as raw bytes) and the fourth argument is the size of the metadata.
 // Create a Plasma object with metadata.
 int64_t data_size = 100;
 std::string metadata = "{'author': 'john'}";
-uint8_t* data;
+std::shared_ptr<Buffer> data;
 client.Create(object_id, data_size, (uint8_t*) metadata.data(), metadata.size(), &data);
 ```
 
@@ -226,7 +226,7 @@ using namespace plasma;
 int main(int argc, char** argv) {
   // Start up and connect a Plasma client.
   PlasmaClient client;
-  ARROW_CHECK_OK(client.Connect("/tmp/plasma", ""));
+  ARROW_CHECK_OK(client.Connect("/tmp/plasma"));
   // Create an object with a fixed ObjectID.
   ObjectID object_id = ObjectID::from_binary("00000000000000000000");
   int64_t data_size = 1000;
@@ -332,7 +332,7 @@ using namespace plasma;
 int main(int argc, char** argv) {
   // Start up and connect a Plasma client.
   PlasmaClient client;
-  ARROW_CHECK_OK(client.Connect("/tmp/plasma", ""));
+  ARROW_CHECK_OK(client.Connect("/tmp/plasma"));
   ObjectID object_id = ObjectID::from_binary("00000000000000000000");
   ObjectBuffer object_buffer;
   ARROW_CHECK_OK(client.Get(&object_id, 1, -1, &object_buffer));
@@ -421,7 +421,7 @@ using namespace plasma;
 int main(int argc, char** argv) {
   // Start up and connect a Plasma client.
   PlasmaClient client;
-  ARROW_CHECK_OK(client.Connect("/tmp/plasma", ""));
+  ARROW_CHECK_OK(client.Connect("/tmp/plasma"));
 
   int fd;
   ARROW_CHECK_OK(client.Subscribe(&fd));
diff --git a/cpp/apidoc/tutorials/row_wise_conversion.md b/cpp/apidoc/tutorials/row_wise_conversion.md
deleted file mode 100644
index 750a923c7846b..0000000000000
--- a/cpp/apidoc/tutorials/row_wise_conversion.md
+++ /dev/null
@@ -1,194 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-Convert a vector of row-wise data into an Arrow table
-=====================================================
-
-While we want to use columnar data structures to build efficient operations, we
-often receive data in a row-wise fashion from other systems. In the following,
-we want give a brief introduction into the classes provided by Apache Arrow by
-showing how to transform row-wise data into a columnar table.
-
-The data in this example is stored in the following struct:
-
-```
-struct data_row {
-    int64_t id;
-    double cost;
-    std::vector<double> cost_components;
-};
-
-std::vector<data_row> rows;
-```
-
-The final representation should be an `arrow::Table` which in turn is made up of
-an `arrow::Schema` and a list of `arrow::Column`. An `arrow::Column` is again a
-named collection of one or more `arrow::Array` instances. As the first step, we
-will iterate over the data and build up the arrays incrementally. For this task,
-we provide `arrow::ArrayBuilder` classes that help in the construction of the
-final `arrow::Array` instances.
-
-For each type, Arrow has a specially typed builder class. For the primitive
-values `id` and `cost` we can use the respective `arrow::Int64Builder` and
-`arrow::DoubleBuilder`. For the `cost_components` vector, we need to have two
-builders, a top-level `arrow::ListBuilder` that builds the array of offsets and
-a nested `arrow::DoubleBuilder` that constructs the underlying values array that
-is referenced by the offsets in the former array.
-
-```
-// The builders are more efficient using
-// arrow::jemalloc::MemoryPool::default_pool() as this can increase the size of
-// the underlying memory regions in-place. At the moment, arrow::jemalloc is only
-// supported on Unix systems, not Windows.
-
-using arrow::DoubleBuilder;
-using arrow::Int64Builder;
-using arrow::ListBuilder;
-
-arrow::MemoryPool* pool = arrow::default_memory_pool();
-Int64Builder id_builder(pool);
-DoubleBuilder cost_builder(pool);
-std::unique_ptr<DoubleBuilder> components_values_builder(new DoubleBuilder(pool));
-ListBuilder components_builder(pool, std::move(components_values_builder));
-```
-
-Now we can loop over our existing data and insert it into the builders. The
-`Append` calls here may fail (e.g. we cannot allocate enough additional memory).
-Thus we need to check their return values. For more information on these values,
-check the documentation about `arrow::Status`.
-
-```
-for (const data_row& row : rows) {
-    ARROW_RETURN_NOT_OK(id_builder.Append(row.id));
-    ARROW_RETURN_NOT_OK(cost_builder.Append(row.cost));
-
-    // Indicate the start of a new list row. This will memorise the current
-    // offset in the values builder.
-    ARROW_RETURN_NOT_OK(components_builder.Append());
-    // Store the actual values. The final nullptr argument tells the underyling
-    // builder that all added values are valid, i.e. non-null.
-    ARROW_RETURN_NOT_OK(components_values_builder->Append(
-        row.cost_components.data(), row.cost_components.size(),
-        nullptr);
-}
-```
-
-At the end, we finalise the arrays, declare the (type) schema and combine them
- into a single `arrow::Table`:
-
-```
-std::shared_ptr<arrow::Array> id_array;
-ARROW_RETURN_NOT_OK(id_builder.Finish(&id_array));
-std::shared_ptr<arrow::Array> cost_array;
-ARROW_RETURN_NOT_OK(cost_builder.Finish(&cost_array));
-std::shared_ptr<arrow::Array> cost_components_array;
-ARROW_RETURN_NOT_OK(components_builder.Finish(&cost_components_array));
-
-std::vector<std::shared_ptr<arrow::Field>> schema_vector = {
-    arrow::field("id", arrow::int64()),
-    arrow::field("cost", arrow::float64()),
-    arrow::field("cost_components", arrow::list(arrow::float64()))
-};
-auto schema = std::make_shared<arrow::Schema>(schema_vector);
-
-std::shared_ptr<arrow::Table> table = arrow::Table::Make(schema,
-    {id_array, cost_array, cost_components_array});
-```
-
-The final `table` variable is the one we then can pass on to other functions
-that can consume Apache Arrow memory structures. This object has ownership of
-all referenced data, thus we don't have to care about undefined references once
-we leave the scope of the function building the table and its underlying arrays.
-
-<!-- TODO: Add an example with nullable entries -->
-
-Converting an Arrow Table back into row-wise representation
-===========================================================
-
-To convert an Arrow table back into the same row-wise representation as in the
-above section, we first will check that the table conforms to our expected
-schema and then will build up the vector of rows incrementally.
-
-For the check if the table is as expected, we can utilise solely its schema.
-
-```
-// This is our input that was passed in from the outside.
-std::shared_ptr<arrow::Table> table;
-
-std::vector<std::shared_ptr<arrow::Field>> schema_vector = {
-    arrow::field("id", arrow::int64()),
-    arrow::field("cost", arrow::float64()),
-    arrow::field("cost_components", arrow::list(arrow::float64()))
-};
-auto expected_schema = std::make_shared<arrow::Schema>(schema_vector);
-
-if (!expected_schema->Equals(*table->schema())) {
-    // The table doesn't have the expected schema thus we cannot directly
-    // convert it to our target representation.
-    // TODO: Implement your custom error handling logic here.
-}
-```
-
-As we have ensured that the table has the expected structure, we can unpack the
-underlying arrays. For the primitive columns `id` and `cost` we can use the high
-level functions to get the values whereas for the nested column
-`cost_components` we need to access the C-pointer to the data to copy its
-contents into the resulting `std::vector<double>`. Here we need to be care to
-also add the offset to the pointer. This offset is needed to enable zero-copy
-slicing operations. While this could be adjusted automatically for double
-arrays, this cannot be done for the accompanying bitmap as often the slicing
-border would be inside a byte.
-
-```
-// For simplicity, we assume that all arrays consist of a single chunk here.
-// In a productive implementation this should either be explicitly check or code
-// added that can treat chunked arrays.
-
-auto ids = std::static_pointer_cast<arrow::Int64Array>(
-    table->column(0)->data()->chunk(0));
-auto costs = std::static_pointer_cast<arrow::DoubleArray(
-    table->column(1)->data()->chunk(0));
-auto cost_components = std::static_pointer_cast<arrow::ListArray(
-    table->column(2)->data()->chunk(0));
-auto cost_components_values = std::static_pointer_cast<arrow::DoubleArray>(
-    cost_components->values());
-// To enable zero-copy slices, the native values pointer might need to account
-// for this slicing offset. This is not needed for the higher level functions
-// like Value(…) that already account for this offset internally.
-const double* cost_components_values_ptr = cost_components_values->data()
-    + cost_components_values->offset();
-```
-
-After we have unpacked the arrays from the table, we can iterate over them in a
-row-wise fashion and fill our target, row-wise representation.
-
-```
-std::vector<data_row> rows;
-
-for (int64_t i = 0; i < table->num_rows(); i++) {
-    // Another simplification in this example is that we assume that there are
-    // no null entries, e.g. each row is fill with valid values.
-    int64_t id = ids->Value(i);
-    double cost = costs->Value(i);
-    const double* first = cost_components_values_ptr + cost_components->value_offset(i);
-    const double* last = cost_components_values_ptr + cost_components->value_offset(i + 1);
-    std::vector<double> components_vec(first, last);
-    rows.push_back({id, cost, components_vec});
-}
-```
diff --git a/cpp/apidoc/tutorials/tensor_to_py.md b/cpp/apidoc/tutorials/tensor_to_py.md
index 0be973a4f3df9..cd191fea07d09 100644
--- a/cpp/apidoc/tutorials/tensor_to_py.md
+++ b/cpp/apidoc/tutorials/tensor_to_py.md
@@ -105,7 +105,7 @@ The `inputs` variable will be a list of Object IDs in their raw byte string form
 import pyarrow as pa
 import pyarrow.plasma as plasma
 
-plasma_client = plasma.connect('/tmp/plasma', '', 0)
+plasma_client = plasma.connect('/tmp/plasma')
 
 # inputs: a list of object ids
 inputs = [20 * b'1']
diff --git a/cpp/build-support/build-lz4-lib.sh b/cpp/build-support/build-lz4-lib.sh
index d33686655a8ac..fa4c61b48d4a7 100755
--- a/cpp/build-support/build-lz4-lib.sh
+++ b/cpp/build-support/build-lz4-lib.sh
@@ -19,7 +19,7 @@
 #
 export CFLAGS="${CFLAGS} -O3 -fPIC"
 if [ -z "$MAKELEVEL" ]; then
-  make -j4
+  make -j4 "$@"
 else
-  make
+  make "$@"
 fi
diff --git a/cpp/build-support/iwyu/mappings/arrow-misc.imp b/cpp/build-support/iwyu/mappings/arrow-misc.imp
index 8bb65e62d98e3..7ff99108c5aff 100644
--- a/cpp/build-support/iwyu/mappings/arrow-misc.imp
+++ b/cpp/build-support/iwyu/mappings/arrow-misc.imp
@@ -49,7 +49,7 @@
   { symbol: ["shared_ptr", private, "<memory>", public ] },
   { symbol: ["_Node_const_iterator", private, "<flatbuffers/flatbuffers.h>", public ] },
   { symbol: ["unordered_map<>::mapped_type", private, "<flatbuffers/flatbuffers.h>", public ] },
-  { symbol: ["move", private, "<utility>", public ] },
+  { symbol: ["std::move", private, "<utility>", public ] },
   { symbol: ["pair", private, "<utility>", public ] },
   { symbol: ["errno", private, "<cerrno>", public ] },
   { symbol: ["posix_memalign", private, "<cstdlib>", public ] }
diff --git a/cpp/build-support/lint_cpp_cli.py b/cpp/build-support/lint_cpp_cli.py
index 4c26927740dbb..ab2de5901a4df 100644
--- a/cpp/build-support/lint_cpp_cli.py
+++ b/cpp/build-support/lint_cpp_cli.py
@@ -19,8 +19,6 @@
 import argparse
 import re
 import os
-import sys
-import traceback
 
 parser = argparse.ArgumentParser(
     description="Check for illegal headers for C++/CLI applications")
@@ -34,6 +32,10 @@
 _RETURN_NOT_OK_REGEX = re.compile(r'.*\sRETURN_NOT_OK.*')
 
 
+def _paths(paths):
+    return [p.strip().replace('/', os.path.sep) for p in paths.splitlines()]
+
+
 def _strip_comments(line):
     m = _STRIP_COMMENT_REGEX.match(line)
     if not m:
@@ -48,11 +50,11 @@ def lint_file(path):
         (lambda x: '<mutex>' in x, 'Uses <mutex>', []),
         (lambda x: re.match(_NULLPTR_REGEX, x), 'Uses nullptr', []),
         (lambda x: re.match(_RETURN_NOT_OK_REGEX, x),
-         'Use ARROW_RETURN_NOT_OK in header files',
-         ['arrow/status.h',
-          'test',
-          'arrow/util/hash.h',
-          'arrow/python/util'])
+         'Use ARROW_RETURN_NOT_OK in header files', _paths('''\
+         arrow/status.h
+         test
+         arrow/util/hash.h
+         arrow/python/util'''))
     ]
 
     with open(path) as f:
@@ -63,28 +65,23 @@ def lint_file(path):
                     continue
 
                 if rule(stripped_line):
-                    raise Exception('File {0} failed C++/CLI lint check: {1}\n'
-                                    'Line {2}: {3}'
-                                    .format(path, why, i + 1, line))
-
-
-EXCLUSIONS = [
-    'arrow/python/iterators.h',
-    'arrow/util/date.h',
-    'arrow/util/hashing.h',
-    'arrow/util/macros.h',
-    'arrow/util/parallel.h',
-    'arrow/util/string_view/string_view.hpp',
-    'arrow/util/xxhash/xxhash.c',
-    'arrow/util/xxhash/xxhash.h',
-    'arrow/visitor_inline.h',
-    'gandiva/cache.h',
-    'gandiva/jni',
-    'test',
-    'internal'
-]
-
-try:
+                    yield path, why, i, line
+
+
+EXCLUSIONS = _paths('''\
+    arrow/python/iterators.h
+    arrow/util/hashing.h
+    arrow/util/macros.h
+    arrow/util/parallel.h
+    arrow/vendored
+    arrow/visitor_inline.h
+    gandiva/cache.h
+    gandiva/jni
+    test
+    internal''')
+
+
+def lint_files():
     for dirpath, _, filenames in os.walk(arguments.source_path):
         for filename in filenames:
             full_path = os.path.join(dirpath, filename)
@@ -100,7 +97,13 @@ def lint_file(path):
 
             # Only run on header files
             if filename.endswith('.h'):
-                lint_file(full_path)
-except Exception:
-    traceback.print_exc()
-    sys.exit(1)
+                yield from lint_file(full_path)
+
+
+if __name__ == '__main__':
+    failures = list(lint_files())
+    for path, why, i, line in failures:
+        print('File {0} failed C++/CLI lint check: {1}\n'
+              'Line {2}: {3}'.format(path, why, i + 1, line))
+    if failures:
+        exit(1)
diff --git a/cpp/build-support/clang_format_exclusions.txt b/cpp/build-support/lint_exclusions.txt
similarity index 61%
rename from cpp/build-support/clang_format_exclusions.txt
rename to cpp/build-support/lint_exclusions.txt
index c04523af1db81..2964898f4f24d 100644
--- a/cpp/build-support/clang_format_exclusions.txt
+++ b/cpp/build-support/lint_exclusions.txt
@@ -4,11 +4,6 @@
 *pyarrow_lib.h
 *python/config.h
 *python/platform.h
-*util/date.h
-*util/string_view/*
-*util/variant.h
-*util/variant/*
 *thirdparty/ae/*
-*xxhash.cc
-*xxhash.h
+*vendored/*
 *RcppExports.cpp*
diff --git a/cpp/build-support/lintutils.py b/cpp/build-support/lintutils.py
new file mode 100644
index 0000000000000..012d42bd696a2
--- /dev/null
+++ b/cpp/build-support/lintutils.py
@@ -0,0 +1,107 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import multiprocessing as mp
+import os
+from fnmatch import fnmatch
+from subprocess import Popen
+
+
+def chunk(seq, n):
+    """
+    divide a sequence into equal sized chunks
+    (the last chunk may be smaller, but won't be empty)
+    """
+    chunks = []
+    some = []
+    for element in seq:
+        if len(some) == n:
+            chunks.append(some)
+            some = []
+        some.append(element)
+    if len(some) > 0:
+        chunks.append(some)
+    return chunks
+
+
+def dechunk(chunks):
+    "flatten chunks into a single list"
+    seq = []
+    for chunk in chunks:
+        seq.extend(chunk)
+    return seq
+
+
+def run_parallel(cmds, **kwargs):
+    """
+    Run each of cmds (with shared **kwargs) using subprocess.Popen
+    then wait for all of them to complete.
+    Runs batches of multiprocessing.cpu_count() * 2 from cmds
+    returns a list of tuples containing each process'
+    returncode, stdout, stderr
+    """
+    complete = []
+    for cmds_batch in chunk(cmds, mp.cpu_count() * 2):
+        procs_batch = [Popen(cmd, **kwargs) for cmd in cmds_batch]
+        for proc in procs_batch:
+            stdout, stderr = proc.communicate()
+            complete.append((proc.returncode, stdout, stderr))
+    return complete
+
+
+_source_extensions = '''
+.h
+.cc
+'''.split()
+
+
+def get_sources(source_dir, exclude_globs=[]):
+    sources = []
+    for directory, subdirs, basenames in os.walk(source_dir):
+        for path in [os.path.join(directory, basename) for basename in basenames]:
+            # filter out non-source files
+            if os.path.splitext(path)[1] not in _source_extensions:
+                continue
+
+            path = os.path.abspath(path)
+
+            # filter out files that match the globs in the globs file
+            if any([fnmatch(path, glob) for glob in exclude_globs]):
+                continue
+
+            sources.append(path)
+    return sources
+
+
+def stdout_pathcolonline(completed_process, filenames):
+    """
+    given a completed process which may have reported some files as problematic
+    by printing the path name followed by ':' then a line number, examine
+    stdout and return the set of actually reported file names
+    """
+    returncode, stdout, stderr = completed_process
+    bfilenames = set()
+    for filename in filenames:
+        bfilenames.add(filename.encode('utf-8') + b':')
+    problem_files = set()
+    for line in stdout.splitlines():
+        for filename in bfilenames:
+            if line.startswith(filename):
+                problem_files.add(filename.decode('utf-8'))
+                bfilenames.remove(filename)
+                break
+    return problem_files, stdout
diff --git a/cpp/build-support/run-clang-tidy.sh b/cpp/build-support/run-clang-tidy.sh
deleted file mode 100755
index 75e9458e257ca..0000000000000
--- a/cpp/build-support/run-clang-tidy.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#
-# Runs clang format in the given directory
-# Arguments:
-#   $1 - Path to the clang tidy binary
-#   $2 - Path to the compile_commands.json to use
-#   $3 - Apply fixes (will raise an error if false and not there where changes)
-#   $ARGN - Files to run clang-tidy on
-#
-CLANG_TIDY=$1
-shift
-COMPILE_COMMANDS=$1
-shift
-APPLY_FIXES=$1
-shift
-
-# clang format will only find its configuration if we are in
-# the source tree or in a path relative to the source tree
-if [ "$APPLY_FIXES" == "1" ]; then
-  $CLANG_TIDY -p $COMPILE_COMMANDS -fix  $@
-else
-  NUM_CORRECTIONS=`$CLANG_TIDY -p $COMPILE_COMMANDS $@ 2>&1 | grep -v Skipping | grep "warnings* generated" | wc -l`
-  if [ "$NUM_CORRECTIONS" -gt "0" ]; then
-    echo "clang-tidy had suggested fixes.  Please fix these!!!"
-    exit 1
-  fi
-fi
diff --git a/cpp/build-support/run-test.sh b/cpp/build-support/run-test.sh
index 656ab7bd3b805..6b1c09efb4d8d 100755
--- a/cpp/build-support/run-test.sh
+++ b/cpp/build-support/run-test.sh
@@ -80,6 +80,10 @@ function setup_sanitizers() {
   TSAN_OPTIONS="$TSAN_OPTIONS history_size=7"
   export TSAN_OPTIONS
 
+  UBSAN_OPTIONS="$UBSAN_OPTIONS print_stacktrace=1"
+  UBSAN_OPTIONS="$UBSAN_OPTIONS suppressions=$ROOT/build-support/ubsan-suppressions.txt"
+  export UBSAN_OPTIONS
+
   # Enable leak detection even under LLVM 3.4, where it was disabled by default.
   # This flag only takes effect when running an ASAN build.
   # ASAN_OPTIONS="$ASAN_OPTIONS detect_leaks=1"
diff --git a/cpp/build-support/run_clang_format.py b/cpp/build-support/run_clang_format.py
index 24dcabb8c7169..1d1592d233ea7 100755
--- a/cpp/build-support/run_clang_format.py
+++ b/cpp/build-support/run_clang_format.py
@@ -16,74 +16,53 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
+import lintutils
+from subprocess import PIPE
 import argparse
 import difflib
-import fnmatch
 import multiprocessing as mp
-import os
-import subprocess
 import sys
+from functools import partial
 
 
-class FileChecker(object):
+# examine the output of clang-format and if changes are
+# present assemble a (unified)patch of the difference
+def _check_one_file(completed_processes, filename):
+    with open(filename, "rb") as reader:
+        original = reader.read()
 
-    def __init__(self, arguments):
-        self.quiet = arguments.quiet
-        self.clang_format_binary = arguments.clang_format_binary
-
-    def run(self, filename):
-        if not self.quiet:
-            print("Checking {}".format(filename))
-        #
-        # Due to some incompatibilities between Python 2 and
-        # Python 3, there are some specific actions we take here
-        # to make sure the difflib.unified_diff call works.
-        #
-        # In Python 2, the call to subprocess.check_output return
-        # a 'str' type. In Python 3, however, the call returns a
-        # 'bytes' type unless the 'encoding' argument is
-        # specified. Unfortunately, the 'encoding' argument is not
-        # in the Python 2 API. We could do an if/else here based
-        # on the version of Python we are running, but it's more
-        # straightforward to read the file in binary and do utf-8
-        # conversion. In Python 2, it's just converting string
-        # types to unicode types, whereas in Python 3 it's
-        # converting bytes types to utf-8 encoded str types. This
-        # approach ensures that the arguments to
-        # difflib.unified_diff are acceptable string types in both
-        # Python 2 and Python 3.
-        with open(filename, "rb") as reader:
-            original = reader.read().decode('utf8')
+    returncode, stdout, stderr = completed_processes[filename]
+    formatted = stdout
+    if formatted != original:
+        # Run the equivalent of diff -u
+        diff = list(difflib.unified_diff(
+            original.decode('utf8').splitlines(True),
+            formatted.decode('utf8').splitlines(True),
+            fromfile=filename,
+            tofile="{} (after clang format)".format(
+                filename)))
+    else:
+        diff = None
 
-        # Run clang-format and capture its output
-        formatted = subprocess.check_output(
-            [self.clang_format_binary,
-             filename])
-        formatted = formatted.decode('utf8')
-        if formatted != original:
-            # Run the equivalent of diff -u
-            diff = list(difflib.unified_diff(
-                original.splitlines(True),
-                formatted.splitlines(True),
-                fromfile=filename,
-                tofile="{} (after clang format)".format(
-                    filename)))
-            if diff:
-                return filename, diff
+    return filename, diff
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
-        description="Runs clang format on all of the source "
-        "files. If --fix is specified,  and compares the output "
-        "with the existing file, outputting a unifiied diff if "
-        "there are any necessary changes")
-    parser.add_argument("clang_format_binary",
+        description="Runs clang-format on all of the source "
+        "files. If --fix is specified enforce format by "
+        "modifying in place, otherwise compare the output "
+        "with the existing file and output any necessary "
+        "changes as a patch in unified diff format")
+    parser.add_argument("--clang_format_binary",
+                        required=True,
                         help="Path to the clang-format binary")
-    parser.add_argument("exclude_globs",
+    parser.add_argument("--exclude_globs",
                         help="Filename containing globs for files "
                         "that should be excluded from the checks")
-    parser.add_argument("source_dir",
+    parser.add_argument("--source_dir",
+                        required=True,
                         help="Root directory of the source code")
     parser.add_argument("--fix", default=False,
                         action="store_true",
@@ -93,47 +72,67 @@ def run(self, filename):
     parser.add_argument("--quiet", default=False,
                         action="store_true",
                         help="If specified, only print errors")
-
     arguments = parser.parse_args()
 
+    exclude_globs = []
+    if arguments.exclude_globs:
+        for line in open(arguments.exclude_globs):
+            exclude_globs.append(line.strip())
+
     formatted_filenames = []
-    exclude_globs = [line.strip() for line in open(arguments.exclude_globs)]
-    for directory, subdirs, filenames in os.walk(arguments.source_dir):
-        fullpaths = (os.path.join(directory, filename)
-                     for filename in filenames)
-        source_files = [x for x in fullpaths
-                        if x.endswith(".h") or
-                        x.endswith(".cc") or
-                        x.endswith(".cpp")]
-        formatted_filenames.extend(
-            # Filter out files that match the globs in the globs file
-            [filename for filename in source_files
-             if not any((fnmatch.fnmatch(filename, exclude_glob)
-                         for exclude_glob in exclude_globs))])
+    for path in lintutils.get_sources(arguments.source_dir, exclude_globs):
+        formatted_filenames.append(str(path))
 
-    error = False
     if arguments.fix:
         if not arguments.quiet:
-            # Print out each file on its own line, but run
-            # clang format once for all of the files
             print("\n".join(map(lambda x: "Formatting {}".format(x),
                                 formatted_filenames)))
-        subprocess.check_call([arguments.clang_format_binary,
-                               "-i"] + formatted_filenames)
+
+        # Break clang-format invocations into chunks: each invocation formats
+        # 16 files. Wait for all processes to complete
+        results = lintutils.run_parallel([
+            [arguments.clang_format_binary, "-i"] + some
+            for some in lintutils.chunk(formatted_filenames, 16)
+        ])
+        for returncode, stdout, stderr in results:
+            # if any clang-format reported a parse error, bubble it
+            if returncode != 0:
+                sys.exit(returncode)
+
     else:
-        checker = FileChecker(arguments)
+        # run an instance of clang-format for each source file in parallel,
+        # then wait for all processes to complete
+        results = lintutils.run_parallel([
+            [arguments.clang_format_binary, filename]
+            for filename in formatted_filenames
+        ], stdout=PIPE, stderr=PIPE)
+        for returncode, stdout, stderr in results:
+            # if any clang-format reported a parse error, bubble it
+            if returncode != 0:
+                sys.exit(returncode)
+
+        error = False
+        checker = partial(_check_one_file, {
+            filename: result
+            for filename, result in zip(formatted_filenames, results)
+        })
         pool = mp.Pool()
         try:
-            for res in pool.imap(checker.run, formatted_filenames):
-                if res is not None:
-                    filename, diff = res
+            # check the output from each invocation of clang-format in parallel
+            for filename, diff in pool.imap(checker, formatted_filenames):
+                if not arguments.quiet:
+                    print("Checking {}".format(filename))
+                if diff:
                     print("{} had clang-format style issues".format(filename))
                     # Print out the diff to stderr
                     error = True
+                    # pad with a newline
+                    print(file=sys.stderr)
                     sys.stderr.writelines(diff)
+        except Exception:
+            error = True
+            raise
         finally:
             pool.terminate()
             pool.join()
-
-
-    sys.exit(1 if error else 0)
+        sys.exit(1 if error else 0)
diff --git a/cpp/build-support/run_clang_tidy.py b/cpp/build-support/run_clang_tidy.py
new file mode 100755
index 0000000000000..57a3e91bd1c15
--- /dev/null
+++ b/cpp/build-support/run_clang_tidy.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import print_function
+import argparse
+import multiprocessing as mp
+import lintutils
+from subprocess import PIPE
+import sys
+from functools import partial
+
+
+def _get_chunk_key(filenames):
+    # lists are not hashable so key on the first filename in a chunk
+    return filenames[0]
+
+
+# clang-tidy outputs complaints in '/path:line_number: complaint' format,
+# so we can scan its output to get a list of files to fix
+def _check_some_files(completed_processes, filenames):
+    result = completed_processes[_get_chunk_key(filenames)]
+    return lintutils.stdout_pathcolonline(result, filenames)
+
+
+def _check_all(cmd, filenames):
+    # each clang-tidy instance will process 16 files
+    chunks = lintutils.chunk(filenames, 16)
+    cmds = [cmd + some for some in chunks]
+    results = lintutils.run_parallel(cmds, stderr=PIPE, stdout=PIPE)
+    error = False
+    # record completed processes (keyed by the first filename in the input
+    # chunk) for lookup in _check_some_files
+    completed_processes = {
+        _get_chunk_key(some): result
+        for some, result in zip(chunks, results)
+    }
+    checker = partial(_check_some_files, completed_processes)
+    pool = mp.Pool()
+    try:
+        # check output of completed clang-tidy invocations in parallel
+        for problem_files, stdout in pool.imap(checker, chunks):
+            if problem_files:
+                msg = "clang-tidy suggested fixes for {}"
+                print("\n".join(map(msg.format, problem_files)))
+                error = True
+    except Exception:
+        error = True
+        raise
+    finally:
+        pool.terminate()
+        pool.join()
+
+    if error:
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Runs clang-tidy on all ")
+    parser.add_argument("--clang_tidy_binary",
+                        required=True,
+                        help="Path to the clang-tidy binary")
+    parser.add_argument("--exclude_globs",
+                        help="Filename containing globs for files "
+                        "that should be excluded from the checks")
+    parser.add_argument("--compile_commands",
+                        required=True,
+                        help="compile_commands.json to pass clang-tidy")
+    parser.add_argument("--source_dir",
+                        required=True,
+                        help="Root directory of the source code")
+    parser.add_argument("--fix", default=False,
+                        action="store_true",
+                        help="If specified, will attempt to fix the "
+                        "source code instead of recommending fixes, "
+                        "defaults to %(default)s")
+    parser.add_argument("--quiet", default=False,
+                        action="store_true",
+                        help="If specified, only print errors")
+    arguments = parser.parse_args()
+
+    linted_filenames = []
+    for path in lintutils.get_sources(arguments.source_dir):
+        linted_filenames.append(path)
+
+    if not arguments.quiet:
+        msg = 'Tidying {}' if arguments.fix else 'Checking {}'
+        print("\n".join(map(msg.format, linted_filenames)))
+
+    cmd = [
+        arguments.clang_tidy_binary,
+        '-p',
+        arguments.compile_commands
+    ]
+    if arguments.fix:
+        cmd.append('-fix')
+        results = lintutils.run_parallel(
+            [cmd + some for some in lintutils.chunk(linted_filenames, 16)])
+        for result in results:
+            result.check_returncode()
+
+    else:
+        _check_all(cmd, linted_filenames)
diff --git a/cpp/build-support/run_cpplint.py b/cpp/build-support/run_cpplint.py
new file mode 100755
index 0000000000000..035a02edd1978
--- /dev/null
+++ b/cpp/build-support/run_cpplint.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import print_function
+import lintutils
+from subprocess import PIPE, STDOUT
+import argparse
+import multiprocessing as mp
+import sys
+import platform
+from functools import partial
+
+
+_filters = '''
+-whitespace/comments
+-readability/todo
+-build/header_guard
+-build/c++11
+-runtime/references
+-build/include_order
+'''.split()
+
+
+def _get_chunk_key(filenames):
+    # lists are not hashable so key on the first filename in a chunk
+    return filenames[0]
+
+
+def _check_some_files(completed_processes, filenames):
+    # cpplint outputs complaints in '/path:line_number: complaint' format,
+    # so we can scan its output to get a list of files to fix
+    result = completed_processes[_get_chunk_key(filenames)]
+    return lintutils.stdout_pathcolonline(result, filenames)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Runs cpplint on all of the source files.")
+    parser.add_argument("--cpplint_binary",
+                        required=True,
+                        help="Path to the cpplint binary")
+    parser.add_argument("--exclude_globs",
+                        help="Filename containing globs for files "
+                        "that should be excluded from the checks")
+    parser.add_argument("--source_dir",
+                        required=True,
+                        help="Root directory of the source code")
+    parser.add_argument("--quiet", default=False,
+                        action="store_true",
+                        help="If specified, only print errors")
+    arguments = parser.parse_args()
+
+    exclude_globs = []
+    if arguments.exclude_globs:
+        for line in open(arguments.exclude_globs):
+            exclude_globs.append(line.strip())
+
+    linted_filenames = []
+    for path in lintutils.get_sources(arguments.source_dir, exclude_globs):
+        linted_filenames.append(str(path))
+
+    cmd = [
+        arguments.cpplint_binary,
+        '--verbose=2',
+        '--linelength=90',
+        '--filter=' + ','.join(_filters)
+    ]
+    if (arguments.cpplint_binary.endswith('.py') and
+            platform.system() == 'Windows'):
+        # Windows doesn't support executable scripts; execute with
+        # sys.executable
+        cmd.insert(0, sys.executable)
+    if arguments.quiet:
+        cmd.append('--quiet')
+    else:
+        print("\n".join(map(lambda x: "Linting {}".format(x),
+                            linted_filenames)))
+
+    # lint files in chunks: each invocation of cpplint will process 16 files
+    chunks = lintutils.chunk(linted_filenames, 16)
+    cmds = [cmd + some for some in chunks]
+    results = lintutils.run_parallel(cmds, stdout=PIPE, stderr=STDOUT)
+
+    error = False
+    # record completed processes (keyed by the first filename in the input
+    # chunk) for lookup in _check_some_files
+    completed_processes = {
+        _get_chunk_key(filenames): result
+        for filenames, result in zip(chunks, results)
+    }
+    checker = partial(_check_some_files, completed_processes)
+    pool = mp.Pool()
+    try:
+        # scan the outputs of various cpplint invocations in parallel to
+        # distill a list of problematic files
+        for problem_files, stdout in pool.imap(checker, chunks):
+            if problem_files:
+                msg = "{} had cpplint issues"
+                print("\n".join(map(msg.format, problem_files)))
+                print(stdout, file=sys.stderr)
+                error = True
+    except Exception:
+        error = True
+        raise
+    finally:
+        pool.terminate()
+        pool.join()
+
+    sys.exit(1 if error else 0)
diff --git a/c_glib/tool/Makefile.am b/cpp/build-support/tsan-suppressions.txt
similarity index 95%
rename from c_glib/tool/Makefile.am
rename to cpp/build-support/tsan-suppressions.txt
index 5d7498b957520..ce897c8591188 100644
--- a/c_glib/tool/Makefile.am
+++ b/cpp/build-support/tsan-suppressions.txt
@@ -15,5 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
-EXTRA_DIST =					\
-	get-version.py
+# Thread leak in CUDA
+thread:libcuda.so
diff --git a/ruby/red-arrow-gpu/.gitignore b/cpp/build-support/ubsan-suppressions.txt
similarity index 95%
rename from ruby/red-arrow-gpu/.gitignore
rename to cpp/build-support/ubsan-suppressions.txt
index 161ac0553533c..13a83393a9124 100644
--- a/ruby/red-arrow-gpu/.gitignore
+++ b/cpp/build-support/ubsan-suppressions.txt
@@ -14,7 +14,3 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
-/lib/arrow-gpu/version.rb
-
-/pkg/
diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake
index fb646dd6c1a74..1591d864a1a7c 100644
--- a/cpp/cmake_modules/BuildUtils.cmake
+++ b/cpp/cmake_modules/BuildUtils.cmake
@@ -97,7 +97,9 @@ function(ADD_ARROW_LIB LIB_NAME)
                        SHARED_PRIVATE_LINK_LIBS
                        EXTRA_INCLUDES
                        PRIVATE_INCLUDES
-                       DEPENDENCIES)
+                       DEPENDENCIES
+                       SHARED_INSTALL_INTERFACE_LIBS
+                       STATIC_INSTALL_INTERFACE_LIBS)
   cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
   if(ARG_UNPARSED_ARGUMENTS)
     message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
@@ -119,9 +121,11 @@ function(ADD_ARROW_LIB LIB_NAME)
     set(BUILD_STATIC ${ARROW_BUILD_STATIC})
   endif()
 
-  if(MSVC)
+  if(MSVC OR (CMAKE_GENERATOR STREQUAL Xcode))
     # MSVC needs to compile C++ separately for each library kind (shared and static)
     # because of dllexport declarations
+    # The Xcode generator doesn't reliably work with Xcode as target names are not
+    # guessed correctly.
     set(LIB_DEPS ${ARG_SOURCES})
     set(EXTRA_DEPS ${ARG_DEPENDENCIES})
 
@@ -180,11 +184,14 @@ function(ADD_ARROW_LIB LIB_NAME)
         ${ARG_PRIVATE_INCLUDES})
     endif()
 
-    if(APPLE)
+    if(APPLE AND NOT DEFINED $ENV{EMSCRIPTEN})
       # On OS X, you can avoid linking at library load time and instead
       # expecting that the symbols have been loaded separately. This happens
       # with libpython* where there can be conflicts between system Python and
       # the Python from a thirdparty distribution
+      #
+      # When running with the Emscripten Compiler, we need not worry about
+      # python, and the Emscripten Compiler does not support this option.
       set(ARG_SHARED_LINK_FLAGS
         "-undefined dynamic_lookup ${ARG_SHARED_LINK_FLAGS}")
     endif()
@@ -199,8 +206,16 @@ function(ADD_ARROW_LIB LIB_NAME)
       VERSION "${ARROW_FULL_SO_VERSION}"
       SOVERSION "${ARROW_SO_VERSION}")
 
+    if (ARG_SHARED_INSTALL_INTERFACE_LIBS)
+      set(INTERFACE_LIBS ${ARG_SHARED_INSTALL_INTERFACE_LIBS})
+    else()
+      set(INTERFACE_LIBS ${ARG_SHARED_LINK_LIBS})
+    endif()
+
     target_link_libraries(${LIB_NAME}_shared
-      LINK_PUBLIC ${ARG_SHARED_LINK_LIBS}
+      LINK_PUBLIC
+      "$<BUILD_INTERFACE:${ARG_SHARED_LINK_LIBS}>"
+      "$<INSTALL_INTERFACE:${INTERFACE_LIBS}>"
       LINK_PRIVATE ${ARG_SHARED_PRIVATE_LINK_LIBS})
 
     if (ARROW_RPATH_ORIGIN)
@@ -226,10 +241,12 @@ function(ADD_ARROW_LIB LIB_NAME)
     endif()
 
     install(TARGETS ${LIB_NAME}_shared
+      ${INSTALL_IS_OPTIONAL}
       EXPORT ${PROJECT_NAME}-targets
       RUNTIME DESTINATION ${RUNTIME_INSTALL_DIR}
       LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-      ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+      ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+      INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
   endif()
 
   if (BUILD_STATIC)
@@ -268,14 +285,24 @@ function(ADD_ARROW_LIB LIB_NAME)
       LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}"
       OUTPUT_NAME ${LIB_NAME_STATIC})
 
+    if (ARG_STATIC_INSTALL_INTERFACE_LIBS)
+      set(INTERFACE_LIBS ${ARG_STATIC_INSTALL_INTERFACE_LIBS})
+    else()
+      set(INTERFACE_LIBS ${ARG_STATIC_LINK_LIBS})
+    endif()
+
     target_link_libraries(${LIB_NAME}_static
-      LINK_PUBLIC ${ARG_STATIC_LINK_LIBS})
+      LINK_PUBLIC
+      "$<BUILD_INTERFACE:${ARG_STATIC_LINK_LIBS}>"
+      "$<INSTALL_INTERFACE:${INTERFACE_LIBS}>")
 
     install(TARGETS ${LIB_NAME}_static
+      ${INSTALL_IS_OPTIONAL}
       EXPORT ${PROJECT_NAME}-targets
       RUNTIME DESTINATION ${RUNTIME_INSTALL_DIR}
       LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-      ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+      ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+      INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
   endif()
 
   # Modify variable in calling scope
@@ -290,7 +317,7 @@ endfunction()
 ############################################################
 # Add a new micro benchmark, with or without an executable that should be built.
 # If benchmarks are enabled then they will be run along side unit tests with ctest.
-# 'make runbenchmark' and 'make unittest' to build/run only benchmark or unittests,
+# 'make benchmark' and 'make unittest' to build/run only benchmark or unittests,
 # respectively.
 #
 # REL_BENCHMARK_NAME is the name of the benchmark app. It may be a single component
@@ -306,10 +333,13 @@ endfunction()
 # \arg PREFIX a string to append to the name of the benchmark executable. For
 # example, if you have src/arrow/foo/bar-benchmark.cc, then PREFIX "foo" will
 # create test executable foo-bar-benchmark
-function(ADD_ARROW_BENCHMARK REL_BENCHMARK_NAME)
+# \arg LABELS the benchmark label or labels to assign the unit tests to. By
+# default, benchmarks will go in the "benchmark" group. Custom targets for the
+# group names must exist
+function(ADD_BENCHMARK REL_BENCHMARK_NAME)
   set(options)
   set(one_value_args)
-  set(multi_value_args EXTRA_LINK_LIBS DEPENDENCIES PREFIX)
+  set(multi_value_args EXTRA_LINK_LIBS STATIC_LINK_LIBS DEPENDENCIES PREFIX LABELS)
   cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
   if(ARG_UNPARSED_ARGUMENTS)
     message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
@@ -328,12 +358,18 @@ function(ADD_ARROW_BENCHMARK REL_BENCHMARK_NAME)
     # This benchmark has a corresponding .cc file, set it up as an executable.
     set(BENCHMARK_PATH "${EXECUTABLE_OUTPUT_PATH}/${BENCHMARK_NAME}")
     add_executable(${BENCHMARK_NAME} "${REL_BENCHMARK_NAME}.cc")
-    target_link_libraries(${BENCHMARK_NAME} ${ARROW_BENCHMARK_LINK_LIBS})
-    add_dependencies(runbenchmark ${BENCHMARK_NAME})
+
+    if (ARG_STATIC_LINK_LIBS)
+      # Customize link libraries
+      target_link_libraries(${BENCHMARK_NAME} PRIVATE ${ARG_STATIC_LINK_LIBS})
+    else()
+      target_link_libraries(${BENCHMARK_NAME} PRIVATE ${ARROW_BENCHMARK_LINK_LIBS})
+    endif()
+    add_dependencies(benchmark ${BENCHMARK_NAME})
     set(NO_COLOR "--color_print=false")
 
     if (ARG_EXTRA_LINK_LIBS)
-      target_link_libraries(${BENCHMARK_NAME} ${ARG_EXTRA_LINK_LIBS})
+      target_link_libraries(${BENCHMARK_NAME} PRIVATE ${ARG_EXTRA_LINK_LIBS})
     endif()
   else()
     # No executable, just invoke the benchmark (probably a script) directly.
@@ -341,13 +377,27 @@ function(ADD_ARROW_BENCHMARK REL_BENCHMARK_NAME)
     set(NO_COLOR "")
   endif()
 
+  # Add test as dependency of relevant label targets
+  add_dependencies(all-benchmarks ${BENCHMARK_NAME})
+  foreach (TARGET ${ARG_LABELS})
+    add_dependencies(${TARGET} ${BENCHMARK_NAME})
+  endforeach()
+
   if (ARG_DEPENDENCIES)
     add_dependencies(${BENCHMARK_NAME} ${ARG_DEPENDENCIES})
   endif()
 
+  if (ARG_LABELS)
+    set(ARG_LABELS "benchmark;${ARG_LABELS}")
+  else()
+    set(ARG_LABELS benchmark)
+  endif()
+
   add_test(${BENCHMARK_NAME}
     ${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} benchmark ${BENCHMARK_PATH} ${NO_COLOR})
-  set_tests_properties(${BENCHMARK_NAME} PROPERTIES LABELS "benchmark")
+  set_property(TEST ${BENCHMARK_NAME}
+    APPEND PROPERTY
+    LABELS ${ARG_LABELS})
 endfunction()
 
 ############################################################
@@ -368,16 +418,17 @@ endfunction()
 #
 # Arguments after the test name will be passed to set_tests_properties().
 #
+# \arg ENABLED if passed, add this unit test even if ARROW_BUILD_TESTS is off
 # \arg PREFIX a string to append to the name of the test executable. For
 # example, if you have src/arrow/foo/bar-test.cc, then PREFIX "foo" will create
 # test executable foo-bar-test
 # \arg LABELS the unit test label or labels to assign the unit tests
 # to. By default, unit tests will go in the "unittest" group, but if we have
 # multiple unit tests in some subgroup, you can assign a test to multiple
-# groups using the syntax unittest;GROUP2;GROUP3. Custom targets for the group
+# groups use the syntax unittest;GROUP2;GROUP3. Custom targets for the group
 # names must exist
-function(ADD_ARROW_TEST REL_TEST_NAME)
-  set(options NO_VALGRIND)
+function(ADD_TEST_CASE REL_TEST_NAME)
+  set(options NO_VALGRIND ENABLED)
   set(one_value_args)
   set(multi_value_args SOURCES STATIC_LINK_LIBS EXTRA_LINK_LIBS EXTRA_INCLUDES
       EXTRA_DEPENDENCIES LABELS PREFIX)
@@ -386,19 +437,7 @@ function(ADD_ARROW_TEST REL_TEST_NAME)
     message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
   endif()
 
-  if (NOT "${ARROW_TEST_INCLUDE_LABELS}" STREQUAL "")
-    set(_SKIP_TEST TRUE)
-    foreach (_INCLUDED_LABEL ${ARG_LABELS})
-      if ("${ARG_LABELS}" MATCHES "${_INCLUDED_LABEL}")
-        set(_SKIP_TEST FALSE)
-      endif()
-    endforeach()
-    if (_SKIP_TEST)
-      return()
-    endif()
-  endif()
-
-  if (NO_TESTS)
+  if (NO_TESTS AND NOT ARG_ENABLED)
     return()
   endif()
   get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE)
@@ -407,12 +446,6 @@ function(ADD_ARROW_TEST REL_TEST_NAME)
     set(TEST_NAME "${ARG_PREFIX}-${TEST_NAME}")
   endif()
 
-  if (ARG_LABELS)
-    set(ARG_LABELS "${ARG_LABELS}")
-  else()
-    set(ARG_LABELS unittest)
-  endif()
-
   if (ARG_SOURCES)
     set(SOURCES ${ARG_SOURCES})
   else()
@@ -424,13 +457,13 @@ function(ADD_ARROW_TEST REL_TEST_NAME)
 
   if (ARG_STATIC_LINK_LIBS)
     # Customize link libraries
-    target_link_libraries(${TEST_NAME} ${ARG_STATIC_LINK_LIBS})
+    target_link_libraries(${TEST_NAME} PRIVATE ${ARG_STATIC_LINK_LIBS})
   else()
-    target_link_libraries(${TEST_NAME} ${ARROW_TEST_LINK_LIBS})
+    target_link_libraries(${TEST_NAME} PRIVATE ${ARROW_TEST_LINK_LIBS})
   endif()
 
   if (ARG_EXTRA_LINK_LIBS)
-    target_link_libraries(${TEST_NAME} ${ARG_EXTRA_LINK_LIBS})
+    target_link_libraries(${TEST_NAME} PRIVATE ${ARG_EXTRA_LINK_LIBS})
   endif()
 
   if (ARG_EXTRA_INCLUDES)
@@ -443,10 +476,6 @@ function(ADD_ARROW_TEST REL_TEST_NAME)
     add_dependencies(${TEST_NAME} ${ARG_EXTRA_DEPENDENCIES})
   endif()
 
-  foreach (TEST_LABEL ${ARG_LABELS})
-    add_dependencies(${TEST_LABEL} ${TEST_NAME})
-  endforeach()
-
   if (ARROW_TEST_MEMCHECK AND NOT ARG_NO_VALGRIND)
     SET_PROPERTY(TARGET ${TEST_NAME}
       APPEND_STRING PROPERTY
@@ -455,18 +484,89 @@ function(ADD_ARROW_TEST REL_TEST_NAME)
       bash -c "cd '${CMAKE_SOURCE_DIR}'; \
                valgrind --suppressions=valgrind.supp --tool=memcheck --gen-suppressions=all \
                  --leak-check=full --leak-check-heuristics=stdstring --error-exitcode=1 ${TEST_PATH}")
-  elseif(MSVC)
+  elseif(WIN32)
     add_test(${TEST_NAME} ${TEST_PATH})
   else()
     add_test(${TEST_NAME}
       ${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} test ${TEST_PATH})
   endif()
 
+  # Add test as dependency of relevant targets
+  add_dependencies(all-tests ${TEST_NAME})
+  foreach (TARGET ${ARG_LABELS})
+    add_dependencies(${TARGET} ${TEST_NAME})
+  endforeach()
+
+  if (ARG_LABELS)
+    set(ARG_LABELS "unittest;${ARG_LABELS}")
+  else()
+    set(ARG_LABELS unittest)
+  endif()
+
   set_property(TEST ${TEST_NAME}
     APPEND PROPERTY
     LABELS ${ARG_LABELS})
 endfunction()
 
+############################################################
+# Examples
+############################################################
+# Add a new example, with or without an executable that should be built.
+# If examples are enabled then they will be run along side unit tests with ctest.
+# 'make runexample' to build/run only examples.
+#
+# REL_EXAMPLE_NAME is the name of the example app. It may be a single component
+# (e.g. monotime-example) or contain additional components (e.g.
+# net/net_util-example). Either way, the last component must be a globally
+# unique name.
+
+# The example will registered as unit test with ctest with a label
+# of 'example'.
+#
+# Arguments after the test name will be passed to set_tests_properties().
+#
+# \arg PREFIX a string to append to the name of the example executable. For
+# example, if you have src/arrow/foo/bar-example.cc, then PREFIX "foo" will
+# create test executable foo-bar-example
+function(ADD_ARROW_EXAMPLE REL_EXAMPLE_NAME)
+  set(options)
+  set(one_value_args)
+  set(multi_value_args EXTRA_LINK_LIBS DEPENDENCIES PREFIX)
+  cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
+  if(ARG_UNPARSED_ARGUMENTS)
+    message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
+  endif()
+
+  if(NO_EXAMPLES)
+    return()
+  endif()
+  get_filename_component(EXAMPLE_NAME ${REL_EXAMPLE_NAME} NAME_WE)
+
+  if(ARG_PREFIX)
+    set(EXAMPLE_NAME "${ARG_PREFIX}-${EXAMPLE_NAME}")
+  endif()
+
+  if(EXISTS ${CMAKE_SOURCE_DIR}/examples/arrow/${REL_EXAMPLE_NAME}.cc)
+    # This example has a corresponding .cc file, set it up as an executable.
+    set(EXAMPLE_PATH "${EXECUTABLE_OUTPUT_PATH}/${EXAMPLE_NAME}")
+    add_executable(${EXAMPLE_NAME} "${REL_EXAMPLE_NAME}.cc")
+    target_link_libraries(${EXAMPLE_NAME} ${ARROW_EXAMPLE_LINK_LIBS})
+    add_dependencies(runexample ${EXAMPLE_NAME})
+    set(NO_COLOR "--color_print=false")
+
+    if (ARG_EXTRA_LINK_LIBS)
+      target_link_libraries(${EXAMPLE_NAME} ${ARG_EXTRA_LINK_LIBS})
+    endif()
+  endif()
+
+  if (ARG_DEPENDENCIES)
+    add_dependencies(${EXAMPLE_NAME} ${ARG_DEPENDENCIES})
+  endif()
+
+  add_test(${EXAMPLE_NAME} ${EXAMPLE_PATH})
+  set_tests_properties(${EXAMPLE_NAME} PROPERTIES LABELS "example")
+endfunction()
+
 ############################################################
 # Fuzzing
 ############################################################
@@ -496,3 +596,36 @@ function(ADD_ARROW_FUZZING REL_FUZZING_NAME)
       PROPERTIES
       LINK_FLAGS "-fsanitize=fuzzer")
 endfunction()
+
+###################################################
+
+function(ARROW_INSTALL_ALL_HEADERS PATH)
+  set(options)
+  set(one_value_args)
+  set(multi_value_args PATTERN)
+  cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
+  if (NOT ARG_PATTERN)
+    # The .hpp extension is used by some vendored libraries
+    set(ARG_PATTERN "*.h" "*.hpp")
+  endif()
+  file(GLOB CURRENT_DIRECTORY_HEADERS ${ARG_PATTERN})
+
+  set(PUBLIC_HEADERS)
+  foreach(HEADER ${CURRENT_DIRECTORY_HEADERS})
+    if (NOT ((HEADER MATCHES "internal")))
+      LIST(APPEND PUBLIC_HEADERS ${HEADER})
+    endif()
+  endforeach()
+  install(FILES
+    ${PUBLIC_HEADERS}
+    DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/${PATH}")
+endfunction()
+
+function(ARROW_ADD_PKG_CONFIG MODULE)
+  configure_file(${MODULE}.pc.in
+    "${CMAKE_CURRENT_BINARY_DIR}/${MODULE}.pc"
+    @ONLY)
+  install(
+    FILES "${CMAKE_CURRENT_BINARY_DIR}/${MODULE}.pc"
+    DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/")
+endfunction()
diff --git a/cpp/cmake_modules/CompilerInfo.cmake b/cpp/cmake_modules/CompilerInfo.cmake
index 76f692b06dc13..faa12916b7273 100644
--- a/cpp/cmake_modules/CompilerInfo.cmake
+++ b/cpp/cmake_modules/CompilerInfo.cmake
@@ -21,14 +21,21 @@ if (NOT MSVC)
   set(COMPILER_GET_VERSION_SWITCH "-v")
 endif()
 
-message(INFO "Compiler command: ${CMAKE_CXX_COMPILER}")
+set(COMPILER_VERSION_COMMAND "${CMAKE_CXX_COMPILER}" "${COMPILER_GET_VERSION_SWITCH}")
+
+if (UNIX OR APPLE)
+  set(COMPILER_VERSION_COMMAND "env" "LANG=C" ${COMPILER_VERSION_COMMAND})
+endif()
+
+string(REPLACE ";" " " COMPILER_VERSION_COMMAND_STR "${COMPILER_VERSION_COMMAND}")
+message(STATUS "Compiler command: ${COMPILER_VERSION_COMMAND_STR}")
 # Some gcc seem to output their version on stdout, most do it on stderr, simply
 # merge both pipes into a single variable
-execute_process(COMMAND "${CMAKE_CXX_COMPILER}" ${COMPILER_GET_VERSION_SWITCH}
+execute_process(COMMAND ${COMPILER_VERSION_COMMAND}
                 OUTPUT_VARIABLE COMPILER_VERSION_FULL
                 ERROR_VARIABLE COMPILER_VERSION_FULL)
-message(INFO "Compiler version: ${COMPILER_VERSION_FULL}")
-message(INFO "Compiler id: ${CMAKE_CXX_COMPILER_ID}")
+message(STATUS "Compiler version: ${COMPILER_VERSION_FULL}")
+message(STATUS "Compiler id: ${CMAKE_CXX_COMPILER_ID}")
 string(TOLOWER "${COMPILER_VERSION_FULL}" COMPILER_VERSION_FULL_LOWER)
 
 if(MSVC)
diff --git a/cpp/cmake_modules/FindArrowCuda.cmake b/cpp/cmake_modules/FindArrowCuda.cmake
index 8733b6167380a..bac148fa3b637 100644
--- a/cpp/cmake_modules/FindArrowCuda.cmake
+++ b/cpp/cmake_modules/FindArrowCuda.cmake
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# - Find ARROW CUDA (arrow/gpu/cuda_api.h, libarrow_gpu.a, libarrow_gpu.so)
+# - Find ARROW CUDA (arrow/gpu/cuda_api.h, libarrow_cuda.a, libarrow_cuda.so)
 #
 # This module requires Arrow from which it uses
 #   ARROW_FOUND
@@ -31,10 +31,6 @@
 #  ARROW_CUDA_SHARED_IMP_LIB, path to libarrow's import library (MSVC only)
 #  ARROW_CUDA_FOUND, whether arrow has been found
 
-#
-# TODO(ARROW-3209): rename arrow/gpu to arrow/cuda, arrow_gpu to arrow_cuda
-#
-
 include(FindPkgConfig)
 include(GNUInstallDirs)
 
@@ -63,14 +59,14 @@ if (NOT (ARROW_CUDA_INCLUDE_DIR STREQUAL ARROW_INCLUDE_DIR))
   message(WARNING ${ARROW_CUDA_WARN_MSG})
 endif()
 
-find_library(ARROW_CUDA_LIB_PATH NAMES arrow_gpu
+find_library(ARROW_CUDA_LIB_PATH NAMES arrow_cuda
   PATHS
   ${ARROW_SEARCH_LIB_PATH}
   NO_DEFAULT_PATH)
 get_filename_component(ARROW_CUDA_LIBS ${ARROW_CUDA_LIB_PATH} DIRECTORY)
 
 if (MSVC)
-  find_library(ARROW_CUDA_SHARED_LIBRARIES NAMES arrow_gpu
+  find_library(ARROW_CUDA_SHARED_LIBRARIES NAMES arrow_cuda
     PATHS ${ARROW_HOME} NO_DEFAULT_PATH
     PATH_SUFFIXES "bin" )
   get_filename_component(ARROW_CUDA_SHARED_LIBS ${ARROW_CUDA_SHARED_LIBRARIES} PATH )
@@ -79,7 +75,7 @@ endif()
 
 if (ARROW_CUDA_INCLUDE_DIR AND ARROW_CUDA_LIBS)
   set(ARROW_CUDA_FOUND TRUE)
-  set(ARROW_CUDA_LIB_NAME arrow_gpu)
+  set(ARROW_CUDA_LIB_NAME arrow_cuda)
   if (MSVC)
     set(ARROW_CUDA_STATIC_LIB ${ARROW_CUDA_LIBS}/${ARROW_CUDA_LIB_NAME}${ARROW_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
     set(ARROW_CUDA_SHARED_LIB ${ARROW_CUDA_SHARED_LIBS}/${ARROW_CUDA_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
diff --git a/cpp/cmake_modules/FindClangTools.cmake b/cpp/cmake_modules/FindClangTools.cmake
index 2ddf7880ceb43..55b425fcbcfe4 100644
--- a/cpp/cmake_modules/FindClangTools.cmake
+++ b/cpp/cmake_modules/FindClangTools.cmake
@@ -17,11 +17,15 @@
 #
 #  find_package(ClangTools)
 #
-# Variables used by this module, they can change the default behaviour and need
+# Variables used by this module which can change the default behaviour and need
 # to be set before calling find_package:
 #
-#  ClangToolsBin_HOME -
-#   When set, this path is inspected instead of standard library binary locations
+#  CLANG_FORMAT_VERSION -
+#   The version of clang-format to find. If this is not specified, clang-format
+#   will not be searched for.
+#
+#  ClangTools_PATH -
+#   When set, this path is inspected in addition to standard library binary locations
 #   to find clang-tidy and clang-format
 #
 # This module defines
@@ -45,6 +49,13 @@ else()
   endif()
 endif()
 
+set(CLANG_TOOLS_SEARCH_PATHS
+  ${ClangTools_PATH}
+  $ENV{CLANG_TOOLS_PATH}
+  /usr/local/bin /usr/bin
+  "C:/Program Files/LLVM/bin"
+  "${HOMEBREW_PREFIX}/bin")
+
 find_program(CLANG_TIDY_BIN
   NAMES clang-tidy-4.0
   clang-tidy-3.9
@@ -52,33 +63,29 @@ find_program(CLANG_TIDY_BIN
   clang-tidy-3.7
   clang-tidy-3.6
   clang-tidy
-  PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin "${HOMEBREW_PREFIX}/bin"
-        NO_DEFAULT_PATH
+  PATHS ${CLANG_TOOLS_SEARCH_PATHS} NO_DEFAULT_PATH
 )
 
 if ( "${CLANG_TIDY_BIN}" STREQUAL "CLANG_TIDY_BIN-NOTFOUND" )
   set(CLANG_TIDY_FOUND 0)
-  message("clang-tidy not found")
+  message(STATUS "clang-tidy not found")
 else()
   set(CLANG_TIDY_FOUND 1)
-  message("clang-tidy found at ${CLANG_TIDY_BIN}")
+  message(STATUS "clang-tidy found at ${CLANG_TIDY_BIN}")
 endif()
 
 if (CLANG_FORMAT_VERSION)
     find_program(CLANG_FORMAT_BIN
       NAMES clang-format-${CLANG_FORMAT_VERSION}
-      PATHS
-            ${ClangTools_PATH}
-            $ENV{CLANG_TOOLS_PATH}
-            /usr/local/bin /usr/bin "${HOMEBREW_PREFIX}/bin"
-            NO_DEFAULT_PATH
+      PATHS ${CLANG_TOOLS_SEARCH_PATHS} NO_DEFAULT_PATH
     )
 
     # If not found yet, search alternative locations
-    if (("${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND") AND APPLE)
+    if ("${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND")
+      STRING(REGEX REPLACE "^([0-9]+)\\.[0-9]+" "\\1" CLANG_FORMAT_MAJOR_VERSION "${CLANG_FORMAT_VERSION}")
+      STRING(REGEX REPLACE "^[0-9]+\\.([0-9]+)" "\\1" CLANG_FORMAT_MINOR_VERSION "${CLANG_FORMAT_VERSION}")
+      if (APPLE)
         # Homebrew ships older LLVM versions in /usr/local/opt/llvm@version/
-        STRING(REGEX REPLACE "^([0-9]+)\\.[0-9]+" "\\1" CLANG_FORMAT_MAJOR_VERSION "${CLANG_FORMAT_VERSION}")
-        STRING(REGEX REPLACE "^[0-9]+\\.([0-9]+)" "\\1" CLANG_FORMAT_MINOR_VERSION "${CLANG_FORMAT_VERSION}")
         if ("${CLANG_FORMAT_MINOR_VERSION}" STREQUAL "0")
             find_program(CLANG_FORMAT_BIN
               NAMES clang-format
@@ -102,24 +109,29 @@ if (CLANG_FORMAT_VERSION)
                   NO_DEFAULT_PATH
           )
         endif()
+      else()
+        # try searching for "clang-format" and check the version
+        find_program(CLANG_FORMAT_BIN
+          NAMES clang-format
+          PATHS ${CLANG_TOOLS_SEARCH_PATHS} NO_DEFAULT_PATH
+        )
+        if (NOT ("${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND"))
+          execute_process(COMMAND ${CLANG_FORMAT_BIN} "-version"
+            OUTPUT_VARIABLE CLANG_FORMAT_FOUND_VERSION_MESSAGE
+            OUTPUT_STRIP_TRAILING_WHITESPACE)
+          if (NOT ("${CLANG_FORMAT_FOUND_VERSION_MESSAGE}" MATCHES "^clang-format version ${CLANG_FORMAT_MAJOR_VERSION}\\.${CLANG_FORMAT_MINOR_VERSION}.*"))
+            set(CLANG_FORMAT_BIN "CLANG_FORMAT_BIN-NOTFOUND")
+          endif()
+        endif()
+      endif()
     endif()
-else()
-    find_program(CLANG_FORMAT_BIN
-      NAMES clang-format-4.0
-      clang-format-3.9
-      clang-format-3.8
-      clang-format-3.7
-      clang-format-3.6
-      clang-format
-      PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin "${HOMEBREW_PREFIX}/bin"
-            NO_DEFAULT_PATH
-    )
+
 endif()
 
 if ( "${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND" )
   set(CLANG_FORMAT_FOUND 0)
-  message("clang-format not found")
+  message(STATUS "clang-format not found")
 else()
   set(CLANG_FORMAT_FOUND 1)
-  message("clang-format found at ${CLANG_FORMAT_BIN}")
+  message(STATUS "clang-format found at ${CLANG_FORMAT_BIN}")
 endif()
diff --git a/cpp/cmake_modules/FindGTest.cmake b/cpp/cmake_modules/FindGTest.cmake
index 8a31ae6e06357..6ddb14aa6fb60 100644
--- a/cpp/cmake_modules/FindGTest.cmake
+++ b/cpp/cmake_modules/FindGTest.cmake
@@ -21,15 +21,25 @@
 # to be set before calling find_package:
 #
 #  GTest_HOME - When set, this path is inspected instead of standard library
-#                locations as the root of the GTest installation.
-#                The environment variable GTEST_HOME overrides this veriable.
+#                locations as the root of the GTest/Gmock installation.
+#                The environment variable GTEST_HOME overrides this variable.
 #
 # This module defines
 #  GTEST_INCLUDE_DIR, directory containing headers
 #  GTEST_LIBS, directory containing gtest libraries
 #  GTEST_STATIC_LIB, path to libgtest.a
+#  GTEST_STATIC_MAIN_LIB, path to libgtest_main.a
 #  GTEST_SHARED_LIB, path to libgtest's shared library
+#  GTEST_SHARED_MAIN_LIB, path to libgtest_main's shared library
 #  GTEST_FOUND, whether gtest has been found
+#
+#  GMOCK_INCLUDE_DIR, directory containing headers
+#  GMOCK_LIBS, directory containing gmock libraries
+#  GMOCK_STATIC_LIB, path to libgmock.a
+#  GMOCK_STATIC_MAIN_LIB, path to libgmock_main.a
+#  GMOCK_SHARED_LIB, path to libgmock's shared library
+#  GMOCK_SHARED_MAIN_LIB, path to libgmock_main's shared library
+#  GMOCK_FOUND, whether gmock has been found
 
 if( NOT "${GTEST_HOME}" STREQUAL "")
     file( TO_CMAKE_PATH "${GTEST_HOME}" _native_path )
@@ -38,34 +48,107 @@ elseif ( GTest_HOME )
     list( APPEND _gtest_roots ${GTest_HOME} )
 endif()
 
+set(GTEST_STATIC_LIB_NAME
+  ${CMAKE_STATIC_LIBRARY_PREFIX}gtest${CMAKE_STATIC_LIBRARY_SUFFIX})
+set(GTEST_MAIN_STATIC_LIB_NAME
+  ${CMAKE_STATIC_LIBRARY_PREFIX}gtest_main${CMAKE_STATIC_LIBRARY_SUFFIX})
+set(GTEST_SHARED_LIB_NAME
+  ${CMAKE_SHARED_LIBRARY_PREFIX}gtest${CMAKE_SHARED_LIBRARY_SUFFIX})
+set(GTEST_MAIN_SHARED_LIB_NAME
+  ${CMAKE_SHARED_LIBRARY_PREFIX}gtest_main${CMAKE_SHARED_LIBRARY_SUFFIX})
+set(GMOCK_STATIC_LIB_NAME
+  ${CMAKE_STATIC_LIBRARY_PREFIX}gmock${CMAKE_STATIC_LIBRARY_SUFFIX})
+set(GMOCK_MAIN_STATIC_LIB_NAME
+  ${CMAKE_STATIC_LIBRARY_PREFIX}gmock_main${CMAKE_STATIC_LIBRARY_SUFFIX})
+set(GMOCK_SHARED_LIB_NAME
+  ${CMAKE_SHARED_LIBRARY_PREFIX}gmock${CMAKE_SHARED_LIBRARY_SUFFIX})
+set(GMOCK_MAIN_SHARED_LIB_NAME
+  ${CMAKE_SHARED_LIBRARY_PREFIX}gmock_main${CMAKE_SHARED_LIBRARY_SUFFIX})
+
+
 # Try the parameterized roots, if they exist
-if ( _gtest_roots )
-    find_path( GTEST_INCLUDE_DIR NAMES gtest/gtest.h
-        PATHS ${_gtest_roots} NO_DEFAULT_PATH
-        PATH_SUFFIXES "include" )
-    find_library( GTEST_LIBRARIES NAMES gtest gtest_main
-        PATHS ${_gtest_roots} NO_DEFAULT_PATH
-        PATH_SUFFIXES "lib" )
-else ()
-    find_path( GTEST_INCLUDE_DIR NAMES gtest/gtest.h )
-    find_library( GTEST_LIBRARIES NAMES gtest )
-endif ()
+if(_gtest_roots)
+  find_path(GTEST_INCLUDE_DIR NAMES gtest/gtest.h
+    PATHS ${_gtest_roots} NO_DEFAULT_PATH
+    PATH_SUFFIXES "include")
+  set(lib_dirs
+    "lib/${CMAKE_LIBRARY_ARCHITECTURE}"
+    "lib64"
+    "lib")
+  find_library(GTEST_STATIC_LIB NAMES ${GTEST_STATIC_LIB_NAME}
+    PATHS ${_gtest_roots} NO_DEFAULT_PATH
+    PATH_SUFFIXES ${lib_dirs})
+  find_library(GTEST_MAIN_STATIC_LIB NAMES ${GTEST_MAIN_STATIC_LIB_NAME}
+    PATHS ${_gtest_roots} NO_DEFAULT_PATH
+    PATH_SUFFIXES ${lib_dirs})
+  find_library(GTEST_SHARED_LIB NAMES ${GTEST_SHARED_LIB_NAME}
+    PATHS ${_gtest_roots} NO_DEFAULT_PATH
+    PATH_SUFFIXES ${lib_dirs})
+  find_library(GTEST_MAIN_SHARED_LIB NAMES ${GTEST_MAIN_SHARED_LIB_NAME}
+    PATHS ${_gtest_roots} NO_DEFAULT_PATH
+    PATH_SUFFIXES ${lib_dirs})
+else()
+  find_path(GTEST_INCLUDE_DIR NAMES gtest/gtest.h)
+  find_library(GTEST_STATIC_LIB NAMES ${GTEST_STATIC_LIB_NAME})
+  find_library(GTEST_MAIN_STATIC_LIB NAMES ${GTEST_MAIN_STATIC_LIB_NAME})
+  find_library(GTEST_SHARED_LIB NAMES ${GTEST_SHARED_LIB_NAME})
+  find_library(GTEST_MAIN_SHARED_LIB NAMES ${GTEST_MAIN_SHARED_LIB_NAME})
+endif()
 
+# gmock
+# Try the parameterized roots, if they exist (reuse gtest because the
+# libraries should be built together).
+if(_gtest_roots)
+  find_path(GMOCK_INCLUDE_DIR NAMES gmock/gmock.h
+    PATHS ${_gtest_roots} NO_DEFAULT_PATH
+    PATH_SUFFIXES "include")
+  set(lib_dirs
+    "lib/${CMAKE_LIBRARY_ARCHITECTURE}"
+    "lib64"
+    "lib")
+  find_library(GMOCK_STATIC_LIB NAMES ${GMOCK_STATIC_LIB_NAME}
+    PATHS ${_gtest_roots} NO_DEFAULT_PATH
+    PATH_SUFFIXES ${lib_dirs})
+  find_library(GMOCK_MAIN_STATIC_LIB NAMES ${GMOCK_MAIN_STATIC_LIB_NAME}
+    PATHS ${_gtest_roots} NO_DEFAULT_PATH
+    PATH_SUFFIXES ${lib_dirs})
+  find_library(GMOCK_SHARED_LIB NAMES ${GMOCK_SHARED_LIB_NAME}
+    PATHS ${_gtest_roots} NO_DEFAULT_PATH
+    PATH_SUFFIXES ${lib_dirs})
+  find_library(GMOCK_MAIN_SHARED_LIB NAMES ${GMOCK_MAIN_SHARED_LIB_NAME}
+    PATHS ${_gtest_roots} NO_DEFAULT_PATH
+    PATH_SUFFIXES ${lib_dirs})
+else()
+  find_path(GMOCK_INCLUDE_DIR NAMES gmock/gmock.h)
+  find_library(GMOCK_STATIC_LIB NAMES ${GMOCK_STATIC_LIB_NAME})
+  find_library(GMOCK_MAIN_STATIC_LIB NAMES ${GMOCK_MAIN_STATIC_LIB_NAME})
+  find_library(GMOCK_SHARED_LIB NAMES ${GMOCK_SHARED_LIB_NAME})
+  find_library(GMOCK_MAIN_SHARED_LIB NAMES ${GMOCK_MAIN_SHARED_LIB_NAME})
+endif()
 
-if (GTEST_INCLUDE_DIR AND GTEST_LIBRARIES)
+if(GTEST_INCLUDE_DIR AND
+    (GTEST_STATIC_LIB AND GTEST_MAIN_STATIC_LIB) OR
+    (GTEST_SHARED_LIB AND GTEST_MAIN_SHARED_LIB))
   set(GTEST_FOUND TRUE)
-  get_filename_component( GTEST_LIBS ${GTEST_LIBRARIES} PATH )
-  set(GTEST_LIB_NAME gtest)
-  set(GTEST_STATIC_LIB ${GTEST_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${GTEST_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
-  set(GTEST_MAIN_STATIC_LIB ${GTEST_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${GTEST_LIB_NAME}_main${CMAKE_STATIC_LIBRARY_SUFFIX})
-  set(GTEST_SHARED_LIB ${GTEST_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${GTEST_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
-else ()
+else()
   set(GTEST_FOUND FALSE)
-endif ()
+endif()
+
+if(GMOCK_INCLUDE_DIR AND
+    (GMOCK_STATIC_LIB AND GMOCK_MAIN_STATIC_LIB) OR
+    (GMOCK_SHARED_LIB AND GMOCK_MAIN_SHARED_LIB))
+  set(GMOCK_FOUND TRUE)
+else()
+  set(GMOCK_FOUND FALSE)
+endif()
 
 if (GTEST_FOUND)
   if (NOT GTest_FIND_QUIETLY)
-    message(STATUS "Found the GTest library: ${GTEST_LIBRARIES}")
+    message(STATUS "Found the GTest library:")
+    message(STATUS "GTEST_STATIC_LIB: ${GTEST_STATIC_LIB}")
+    message(STATUS "GTEST_MAIN_STATIC_LIB: ${GTEST_MAIN_STATIC_LIB}")
+    message(STATUS "GTEST_SHARED_LIB: ${GTEST_SHARED_LIB}")
+    message(STATUS "GTEST_MAIN_SHARED_LIB: ${GTEST_MAIN_SHARED_LIB}")
   endif ()
 else ()
   if (NOT GTest_FIND_QUIETLY)
@@ -83,10 +166,45 @@ else ()
   endif ()
 endif ()
 
+if (GMOCK_FOUND)
+  if (NOT GTest_FIND_QUIETLY)
+    message(STATUS "Found the Gmock library:")
+    message(STATUS "GMOCK_STATIC_LIB: ${GMOCK_STATIC_LIB}")
+    message(STATUS "GMOCK_MAIN_STATIC_LIB: ${GMOCK_MAIN_STATIC_LIB}")
+    message(STATUS "GMOCK_SHARED_LIB: ${GMOCK_SHARED_LIB}")
+    message(STATUS "GMOCK_MAIN_SHARED_LIB: ${GMOCK_MAIN_SHARED_LIB}")
+  endif ()
+else ()
+  # Reuse Gtest quietly and required flags because they should be found
+  # in tandem.
+  if (NOT GTest_FIND_QUIETLY)
+    set(GMOCK_ERR_MSG "Could not find the GMock library. Looked in ")
+    if ( _gtest_roots )
+      set(GTEST_ERR_MSG "${GMOCK_ERR_MSG} in ${_gtest_roots}.")
+    else ()
+      set(GTEST_ERR_MSG "${GMOCK_ERR_MSG} system search paths.")
+    endif ()
+    if (GTest_FIND_REQUIRED)
+      message(FATAL_ERROR "${GMOCK_ERR_MSG}")
+    else (GTest_FIND_REQUIRED)
+      message(STATUS "${GMOCK_ERR_MSG}")
+    endif (GTest_FIND_REQUIRED)
+  endif ()
+endif ()
+
+
 mark_as_advanced(
   GTEST_INCLUDE_DIR
   GTEST_LIBS
-  GTEST_LIBRARIES
   GTEST_STATIC_LIB
+  GTEST_MAIN_STATIC_LIB
   GTEST_SHARED_LIB
+  GTEST_MAIN_SHARED_LIB
+
+  GMOCK_INCLUDE_DIR
+  GMOCK_LIBS
+  GMOCK_STATIC_LIB
+  GMOCK_MAIN_STATIC_LIB
+  GMOCK_SHARED_LIB
+  GMOCK_MAIN_SHARED_LIB
 )
diff --git a/cpp/cmake_modules/FindInferTools.cmake b/cpp/cmake_modules/FindInferTools.cmake
index 00c6709c67703..e2d10209b2a0e 100644
--- a/cpp/cmake_modules/FindInferTools.cmake
+++ b/cpp/cmake_modules/FindInferTools.cmake
@@ -38,8 +38,8 @@ find_program(INFER_BIN
 
 if ( "${INFER_BIN}" STREQUAL "INFER_BIN-NOTFOUND" )
   set(INFER_FOUND 0)
-  message("infer not found")
+  message(STATUS "infer not found")
 else()
   set(INFER_FOUND 1)
-  message("infer found at ${INFER_BIN}")
+  message(STATUS "infer found at ${INFER_BIN}")
 endif()
diff --git a/cpp/cmake_modules/FindLLVM.cmake b/cpp/cmake_modules/FindLLVM.cmake
index eb6afd6f90759..edc1b48888ace 100644
--- a/cpp/cmake_modules/FindLLVM.cmake
+++ b/cpp/cmake_modules/FindLLVM.cmake
@@ -21,9 +21,24 @@
 #
 
 set(GANDIVA_LLVM_VERSION 6.0)
+
+if (APPLE)
+  # Also look in homebrew for a matching llvm version
+  find_program(BREW_BIN brew)
+  if (BREW_BIN)
+    execute_process(
+      COMMAND ${BREW_BIN} --prefix "llvm@6"
+      OUTPUT_VARIABLE LLVM_BREW_PREFIX
+      OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+  endif()
+endif()
+
 find_package(LLVM ${GANDIVA_LLVM_VERSION} REQUIRED CONFIG HINTS
              /usr/local/opt/llvm
-             /usr/share)
+             /usr/share
+             ${LLVM_BREW_PREFIX}
+             ${LLVM_DIR})
 message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
 message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
 
diff --git a/cpp/cmake_modules/FindProtobuf.cmake b/cpp/cmake_modules/FindProtobuf.cmake
index e4a87f4f9cabe..f53f48d60686e 100644
--- a/cpp/cmake_modules/FindProtobuf.cmake
+++ b/cpp/cmake_modules/FindProtobuf.cmake
@@ -44,12 +44,12 @@ if (EXISTS "${_protobuf_path}/lib/${CMAKE_LIBRARY_ARCHITECTURE}")
   set (lib_dirs "lib/${CMAKE_LIBRARY_ARCHITECTURE}" ${lib_dirs})
 endif ()
 
-find_library (PROTOBUF_LIBRARY NAMES protobuf PATHS
+find_library (PROTOBUF_LIBRARY NAMES protobuf libprotobuf PATHS
   ${_protobuf_path}
   NO_DEFAULT_PATH
   PATH_SUFFIXES ${lib_dirs})
 
-find_library (PROTOC_LIBRARY NAMES protoc PATHS
+find_library (PROTOC_LIBRARY NAMES protoc libprotoc PATHS
   ${_protobuf_path}
   NO_DEFAULT_PATH
   PATH_SUFFIXES ${lib_dirs})
@@ -66,7 +66,7 @@ if (PROTOBUF_INCLUDE_DIR AND PROTOBUF_LIBRARY AND PROTOC_LIBRARY AND PROTOBUF_EX
   get_filename_component (PROTOBUF_LIBS ${PROTOBUF_LIBRARY} PATH)
   set (PROTOBUF_LIB_NAME protobuf)
   set (PROTOC_LIB_NAME protoc)
-  set (PROTOBUF_STATIC_LIB ${PROTOBUF_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${PROTOBUF_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
+  set (PROTOBUF_STATIC_LIB ${PROTOBUF_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${PROTOBUF_LIB_NAME}${PROTOBUF_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
   set (PROTOC_STATIC_LIB ${PROTOBUF_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${PROTOC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
 else ()
   set (PROTOBUF_FOUND FALSE)
@@ -81,7 +81,7 @@ if (PROTOBUF_FOUND)
   message (STATUS "Found the Protoc executable: ${PROTOBUF_EXECUTABLE}")
 else()
   if (_protobuf_path)
-    set (PROTOBUF_ERR_MSG "Could not find Protobuf. Looked in ${_protobuf_path}.")
+    set (PROTOBUF_ERR_MSG "Could not find Protobuf. Looked in ${_protobuf_path}")
   else ()
     set (PROTOBUF_ERR_MSG "Could not find Protobuf in system search paths.")
   endif()
@@ -100,4 +100,3 @@ mark_as_advanced (
   PROTOBUF_STATIC_LIB
   PROTOC_STATIC_LIB
 )
-
diff --git a/cpp/cmake_modules/FindRE2.cmake b/cpp/cmake_modules/FindRE2.cmake
index ae0f182d0e48c..51b093fc97767 100644
--- a/cpp/cmake_modules/FindRE2.cmake
+++ b/cpp/cmake_modules/FindRE2.cmake
@@ -45,14 +45,18 @@ if (EXISTS "${_re2_path}/lib/${CMAKE_LIBRARY_ARCHITECTURE}")
   set (lib_dirs "lib/${CMAKE_LIBRARY_ARCHITECTURE}" ${lib_dirs})
 endif ()
 
-find_library(RE2_STATIC_LIB NAMES libre2${CMAKE_STATIC_LIBRARY_SUFFIX}
+set(RE2_LIB_NAME re2)
+set(RE2_STATIC_LIB_NAME ${CMAKE_STATIC_LIBRARY_PREFIX}${RE2_LIB_NAME}${RE2_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
+set(RE2_SHARED_LIB_NAME ${CMAKE_SHARED_LIBRARY_PREFIX}${RE2_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
+
+find_library(RE2_STATIC_LIB NAMES ${RE2_STATIC_LIB_NAME}
   PATHS ${_re2_path}
         NO_DEFAULT_PATH
   PATH_SUFFIXES ${lib_dirs}
   DOC   "Google's re2 regex static library"
 )
 
-find_library(RE2_SHARED_LIB NAMES libre2${CMAKE_SHARED_LIBRARY_SUFFIX}
+find_library(RE2_SHARED_LIB NAMES ${RE2_SHARED_LIB_NAME}
   PATHS ${_re2_path}
         NO_DEFAULT_PATH
   PATH_SUFFIXES ${lib_dirs}
diff --git a/cpp/cmake_modules/FindThrift.cmake b/cpp/cmake_modules/FindThrift.cmake
index 540276699148d..cb0f819bd57bd 100644
--- a/cpp/cmake_modules/FindThrift.cmake
+++ b/cpp/cmake_modules/FindThrift.cmake
@@ -34,6 +34,17 @@ if( NOT "${THRIFT_HOME}" STREQUAL "")
     list( APPEND _thrift_roots ${_native_path} )
 elseif ( Thrift_HOME )
     list( APPEND _thrift_roots ${Thrift_HOME} )
+elseif (APPLE)
+  # Also look in homebrew for a matching llvm version
+  find_program(BREW_BIN brew)
+  if (BREW_BIN)
+    execute_process(
+      COMMAND ${BREW_BIN} --prefix "thrift"
+      OUTPUT_VARIABLE THRIFT_BREW_PREFIX
+      OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+    list( APPEND _thrift_roots ${THRIFT_BREW_PREFIX} )
+  endif()
 endif()
 
 message(STATUS "THRIFT_HOME: ${THRIFT_HOME}")
diff --git a/cpp/cmake_modules/FindgRPC.cmake b/cpp/cmake_modules/FindgRPC.cmake
new file mode 100644
index 0000000000000..edf72864a7282
--- /dev/null
+++ b/cpp/cmake_modules/FindgRPC.cmake
@@ -0,0 +1,101 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if( NOT "${GRPC_HOME}" STREQUAL "")
+    file (TO_CMAKE_PATH "${GRPC_HOME}" _grpc_path)
+endif()
+
+message (STATUS "GRPC_HOME: ${GRPC_HOME}")
+
+find_package(gRPC CONFIG)
+if (gRPC_FOUND)
+  message (STATUS "Found CMake installation of gRPC")
+  get_property(GRPC_INCLUDE_DIR TARGET gRPC::gpr PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
+  get_property(GPR_STATIC_LIB TARGET gRPC::gpr PROPERTY LOCATION)
+  get_property(GRPC_STATIC_LIB TARGET gRPC::grpc_unsecure PROPERTY LOCATION)
+  get_property(GRPCPP_STATIC_LIB TARGET gRPC::grpc++_unsecure PROPERTY LOCATION)
+  get_property(GRPC_ADDRESS_SORTING_STATIC_LIB
+    TARGET gRPC::address_sorting PROPERTY LOCATION)
+  # Get location of grpc_cpp_plugin so we can pass it to protoc
+  get_property(GRPC_CPP_PLUGIN TARGET gRPC::grpc_cpp_plugin PROPERTY LOCATION)
+else()
+  find_path (GRPC_INCLUDE_DIR grpc/grpc.h HINTS
+    ${_grpc_path}
+    NO_DEFAULT_PATH
+    PATH_SUFFIXES "include")
+
+  set (lib_dirs "lib")
+  if (EXISTS "${_grpc_path}/lib64")
+    set (lib_dirs "lib64" ${lib_dirs})
+  endif ()
+  if (EXISTS "${_grpc_path}/lib/${CMAKE_LIBRARY_ARCHITECTURE}")
+    set (lib_dirs "lib/${CMAKE_LIBRARY_ARCHITECTURE}" ${lib_dirs})
+  endif ()
+
+  find_library (GPR_STATIC_LIB
+    NAMES "${CMAKE_STATIC_LIBRARY_PREFIX}gpr${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    PATHS ${_grpc_path}
+    NO_DEFAULT_PATH
+    PATH_SUFFIXES ${lib_dirs})
+
+  # On Debian/Ubuntu, libaddress_sorting is statically linked.
+  find_library (GRPC_ADDRESS_SORTING_STATIC_LIB
+    NAMES "${CMAKE_STATIC_LIBRARY_PREFIX}address_sorting${CMAKE_STATIC_LIBRARY_SUFFIX}"
+          "${CMAKE_STATIC_LIBRARY_PREFIX}grpc++${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    PATHS ${_grpc_path}
+    NO_DEFAULT_PATH
+    PATH_SUFFIXES ${lib_dirs})
+
+  find_library (GRPC_STATIC_LIB
+    NAMES "${CMAKE_STATIC_LIBRARY_PREFIX}grpc${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    PATHS ${_grpc_path}
+    NO_DEFAULT_PATH
+    PATH_SUFFIXES ${lib_dirs})
+
+  find_library (GRPCPP_STATIC_LIB
+    NAMES "${CMAKE_STATIC_LIBRARY_PREFIX}grpc++${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    PATHS ${_grpc_path}
+    NO_DEFAULT_PATH
+    PATH_SUFFIXES ${lib_dirs})
+
+  find_program(GRPC_CPP_PLUGIN grpc_cpp_plugin protoc-gen-grpc-cpp
+    HINTS ${_grpc_path}
+    NO_DEFAULT_PATH
+    PATH_SUFFIXES "bin")
+endif()
+
+if (GRPC_INCLUDE_DIR AND GPR_STATIC_LIB AND GRPC_ADDRESS_SORTING_STATIC_LIB AND
+    GRPC_STATIC_LIB AND GRPCPP_STATIC_LIB AND GRPC_CPP_PLUGIN)
+  set (gRPC_FOUND TRUE)
+else ()
+  set (gRPC_FOUND FALSE)
+endif ()
+
+if (gRPC_FOUND)
+  message (STATUS "Found the gRPC headers: ${GRPC_INCLUDE_DIR}")
+else()
+  if (_grpc_path)
+    set (GRPC_ERR_MSG "Could not find gRPC. Looked in ${_grpc_path}.")
+  else ()
+    set (GRPC_ERR_MSG "Could not find gRPC in system search paths.")
+  endif()
+
+  if (gRPC_FIND_REQUIRED)
+    message (FATAL_ERROR "${GRPC_ERR_MSG}")
+  else ()
+    message (STATUS "${GRPC_ERR_MSG}")
+  endif ()
+endif()
+
+mark_as_advanced (
+  GRPC_INCLUDE_DIR
+  )
diff --git a/cpp/cmake_modules/GandivaBuildUtils.cmake b/cpp/cmake_modules/GandivaBuildUtils.cmake
deleted file mode 100644
index 521d6976b5803..0000000000000
--- a/cpp/cmake_modules/GandivaBuildUtils.cmake
+++ /dev/null
@@ -1,91 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set(GANDIVA_TEST_LINK_LIBS
-  gtest_static
-  gtest_main_static
-  ${RE2_LIBRARY})
-
-if (PTHREAD_LIBRARY)
-  set(GANDIVA_TEST_LINK_LIBS
-    ${GANDIVA_TEST_LINK_LIBS}
-    ${PTHREAD_LIBRARY})
-endif()
-
-# Add a unittest executable, with its dependencies.
-function(add_gandiva_unit_test REL_TEST_NAME)
-  get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE)
-
-  add_executable(${TEST_NAME} ${REL_TEST_NAME} ${ARGN})
-  if(${REL_TEST_NAME} MATCHES "llvm" OR
-     ${REL_TEST_NAME} MATCHES "expression_registry")
-    # If the unit test has llvm in its name, include llvm.
-    add_dependencies(${TEST_NAME} LLVM::LLVM_INTERFACE)
-    target_link_libraries(${TEST_NAME} PRIVATE LLVM::LLVM_INTERFACE)
-  endif()
-
-  # Require toolchain to be built
-  add_dependencies(${TEST_NAME} arrow_dependencies)
-
-  target_include_directories(${TEST_NAME} PRIVATE
-    ${CMAKE_SOURCE_DIR}/include
-    ${CMAKE_SOURCE_DIR}/src
-  )
-  target_link_libraries(${TEST_NAME}
-    PRIVATE arrow_shared ${GANDIVA_TEST_LINK_LIBS}
-  )
-  add_test(NAME ${TEST_NAME} COMMAND ${TEST_NAME})
-  set_property(TEST ${TEST_NAME} PROPERTY LABELS gandiva,unittest ${TEST_NAME})
-endfunction(add_gandiva_unit_test REL_TEST_NAME)
-
-# Add a unittest executable for a precompiled file (used to generate IR)
-function(add_precompiled_unit_test REL_TEST_NAME)
-  get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE)
-
-  add_executable(${TEST_NAME} ${REL_TEST_NAME} ${ARGN})
-  # Require toolchain to be built
-  add_dependencies(${TEST_NAME} arrow_dependencies)
-  target_include_directories(${TEST_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/src)
-  target_link_libraries(${TEST_NAME}
-    PRIVATE arrow_shared ${GANDIVA_TEST_LINK_LIBS}
-  )
-  target_compile_definitions(${TEST_NAME} PRIVATE GANDIVA_UNIT_TEST=1)
-  add_test(NAME ${TEST_NAME} COMMAND ${TEST_NAME})
-  set_property(TEST ${TEST_NAME} PROPERTY LABELS gandiva,unittest ${TEST_NAME})
-endfunction(add_precompiled_unit_test REL_TEST_NAME)
-
-# Add an integ executable, with its dependencies.
-function(add_gandiva_integ_test REL_TEST_NAME GANDIVA_LIB)
-  get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE)
-
-  add_executable(${TEST_NAME}_${GANDIVA_LIB} ${REL_TEST_NAME} ${ARGN})
-  target_include_directories(${TEST_NAME}_${GANDIVA_LIB} PRIVATE ${CMAKE_SOURCE_DIR})
-  target_link_libraries(${TEST_NAME}_${GANDIVA_LIB} PRIVATE
-    ${GANDIVA_LIB}
-    ${GANDIVA_TEST_LINK_LIBS}
-  )
-
-  add_test(NAME ${TEST_NAME}_${GANDIVA_LIB} COMMAND ${TEST_NAME}_${GANDIVA_LIB})
-  set_property(TEST ${TEST_NAME}_${GANDIVA_LIB} PROPERTY LABELS gandiva,integ ${TEST_NAME}_${GANDIVA_LIB})
-endfunction(add_gandiva_integ_test REL_TEST_NAME)
-
-function(prevent_in_source_builds)
- file(TO_CMAKE_PATH "${PROJECT_BINARY_DIR}/CMakeLists.txt" LOC_PATH)
- if(EXISTS "${LOC_PATH}")
-   message(FATAL_ERROR "Gandiva does not support in-source builds")
- endif()
-endfunction(prevent_in_source_builds)
diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake
index 893ec360d3e55..44ca22f5dacb2 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -25,6 +25,9 @@ CHECK_CXX_COMPILER_FLAG("-maltivec" CXX_SUPPORTS_ALTIVEC)
 # Arm64 compiler flags
 CHECK_CXX_COMPILER_FLAG("-march=armv8-a+crc" CXX_SUPPORTS_ARMCRC)
 
+# Support C11
+set(CMAKE_C_STANDARD 11)
+
 # This ensures that things like gnu++11 get passed correctly
 set(CMAKE_CXX_STANDARD 11)
 
@@ -35,6 +38,12 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 # shared libraries
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 
+# if no build build type is specified, default to debug builds
+if (NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Release)
+endif(NOT CMAKE_BUILD_TYPE)
+string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE)
+
 # compiler flags that are common across debug/release builds
 if (WIN32)
   # TODO(wesm): Change usages of C runtime functions that MSVC says are
@@ -68,10 +77,10 @@ if (WIN32)
 
     if (ARROW_USE_STATIC_CRT)
       foreach (c_flag CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG
-		      CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
-		      CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_DEBUG
-		      CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO)
-	string(REPLACE "/MD" "-MT" ${c_flag} "${${c_flag}}")
+          CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
+          CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_DEBUG
+          CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO)
+        string(REPLACE "/MD" "-MT" ${c_flag} "${${c_flag}}")
       endforeach()
     endif()
 
@@ -83,16 +92,26 @@ else()
   set(CXX_COMMON_FLAGS "")
 endif()
 
-# Build warning level (CHECKIN, EVERYTHING, etc.)
+# BUILD_WARNING_LEVEL add warning/error compiler flags. The possible values are
+# - RELEASE: `-Werror` is not provide, thus warning do not halt the build.
+# - CHECKIN: Imply `-Werror -Wall` and some other warnings.
+# - EVERYTHING: Like `CHECKIN`, but possible extra flags depending on the
+#               compiler, including `-Wextra`, `-Weverything`, `-pedantic`.
+#               This is the most aggressive warning level.
 
-# if no build warning level is specified, default to development warning level
+# Defaults BUILD_WARNING_LEVEL to `CHECKIN`, unless CMAKE_BUILD_TYPE is
+# `RELEASE`, then it will default to `PRODUCTION`. The goal of defaulting to
+# `CHECKIN` is to avoid friction with long response time from CI.
 if (NOT BUILD_WARNING_LEVEL)
-  set(BUILD_WARNING_LEVEL Production)
+  if ("${CMAKE_BUILD_TYPE}" STREQUAL "RELEASE")
+    set(BUILD_WARNING_LEVEL PRODUCTION)
+  else()
+    set(BUILD_WARNING_LEVEL CHECKIN)
+  endif()
 endif(NOT BUILD_WARNING_LEVEL)
+string(TOUPPER ${BUILD_WARNING_LEVEL} BUILD_WARNING_LEVEL)
 
-string(TOUPPER ${BUILD_WARNING_LEVEL} UPPERCASE_BUILD_WARNING_LEVEL)
-
-if ("${UPPERCASE_BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN")
+if ("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN")
   # Pre-checkin builds
   if ("${COMPILER_FAMILY}" STREQUAL "msvc")
     string(REPLACE "/W3" "" CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS}")
@@ -106,7 +125,7 @@ if ("${UPPERCASE_BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN")
 -Wno-shadow -Wno-switch-enum -Wno-exit-time-destructors \
 -Wno-global-constructors -Wno-weak-template-vtables -Wno-undefined-reinterpret-cast \
 -Wno-implicit-fallthrough -Wno-unreachable-code-return \
--Wno-float-equal -Wno-missing-prototypes \
+-Wno-float-equal -Wno-missing-prototypes -Wno-documentation-unknown-command \
 -Wno-old-style-cast -Wno-covered-switch-default \
 -Wno-cast-align -Wno-vla-extension -Wno-shift-sign-overflow \
 -Wno-used-but-marked-unused -Wno-missing-variable-declarations \
@@ -141,7 +160,7 @@ if ("${UPPERCASE_BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN")
   else()
     message(FATAL_ERROR "Unknown compiler. Version info:\n${COMPILER_VERSION_FULL}")
   endif()
-elseif ("${UPPERCASE_BUILD_WARNING_LEVEL}" STREQUAL "EVERYTHING")
+elseif ("${BUILD_WARNING_LEVEL}" STREQUAL "EVERYTHING")
   # Pedantic builds for fixing warnings
   if ("${COMPILER_FAMILY}" STREQUAL "msvc")
     string(REPLACE "/W3" "" CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS}")
@@ -337,11 +356,19 @@ endif()
 #   Debug symbols are stripped for reduced binary size. Add
 #   -DARROW_CXXFLAGS="-g" to add them
 if (NOT MSVC)
-  set(C_FLAGS_DEBUG "-ggdb -O0")
-  set(C_FLAGS_FASTDEBUG "-ggdb -O1")
+  if(ARROW_GGDB_DEBUG)
+    set(C_FLAGS_DEBUG "-ggdb -O0")
+    set(C_FLAGS_FASTDEBUG "-ggdb -O1")
+    set(CXX_FLAGS_DEBUG "-ggdb -O0")
+    set(CXX_FLAGS_FASTDEBUG "-ggdb -O1")
+  else()
+    set(C_FLAGS_DEBUG "-g -O0")
+    set(C_FLAGS_FASTDEBUG "-g -O1")
+    set(CXX_FLAGS_DEBUG "-g -O0")
+    set(CXX_FLAGS_FASTDEBUG "-g -O1")
+  endif()
+
   set(C_FLAGS_RELEASE "-O3 -DNDEBUG")
-  set(CXX_FLAGS_DEBUG "-ggdb -O0")
-  set(CXX_FLAGS_FASTDEBUG "-ggdb -O1")
   set(CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
 endif()
 
@@ -350,18 +377,14 @@ set(C_FLAGS_PROFILE_BUILD "${CXX_FLAGS_RELEASE} -fprofile-use")
 set(CXX_FLAGS_PROFILE_GEN "${CXX_FLAGS_RELEASE} -fprofile-generate")
 set(CXX_FLAGS_PROFILE_BUILD "${CXX_FLAGS_RELEASE} -fprofile-use")
 
-# if no build build type is specified, default to debug builds
-if (NOT CMAKE_BUILD_TYPE)
-  set(CMAKE_BUILD_TYPE Debug)
-endif(NOT CMAKE_BUILD_TYPE)
 
-string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE)
 
 # Set compile flags based on the build type.
 message("Configured for ${CMAKE_BUILD_TYPE} build (set with cmake -DCMAKE_BUILD_TYPE={release,debug,...})")
 if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${C_FLAGS_DEBUG}")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_DEBUG}")
+elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "RELWITHDEBINFO")
 elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "FASTDEBUG")
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${C_FLAGS_FASTDEBUG}")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_FASTDEBUG}")
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 9829a4d3fbd80..ff2252528fdf3 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -15,6 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
+add_custom_target(toolchain)
+
+set(THIRDPARTY_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_thirdparty")
+
 # ----------------------------------------------------------------------
 # Toolchain linkage options
 
@@ -26,17 +30,31 @@ set(ARROW_RE2_LINKAGE "static" CACHE STRING
 
 set(THIRDPARTY_DIR "${arrow_SOURCE_DIR}/thirdparty")
 
+
 if (NOT "$ENV{ARROW_BUILD_TOOLCHAIN}" STREQUAL "")
   set(BROTLI_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
   set(BZ2_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+  set(CARES_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
   set(DOUBLE_CONVERSION_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
   set(FLATBUFFERS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
   set(GFLAGS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
   set(GLOG_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
   set(GRPC_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
-  # Using gtest from the toolchain breaks AppVeyor builds
+  # Using gtest from the toolchain breaks AppVeyor and
+  # trusty builds
   if (NOT MSVC)
-    set(GTEST_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+    if (APPLE)
+      set(GTEST_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+    else()
+      #linux
+      execute_process(COMMAND lsb_release -cs
+        OUTPUT_VARIABLE RELEASE_CODENAME
+	OUTPUT_STRIP_TRAILING_WHITESPACE
+      )
+      if (NOT RELEASE_CODENAME STREQUAL "trusty")
+	set(GTEST_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+      endif()
+    endif()
   endif()
   set(JEMALLOC_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
   set(LZ4_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
@@ -67,6 +85,10 @@ if (DEFINED ENV{BZ2_HOME})
   set(BZ2_HOME "$ENV{BZ2_HOME}")
 endif()
 
+if (DEFINED ENV{CARES_HOME})
+  set(CARES_HOME "$ENV{CARES_HOME}")
+endif()
+
 if (DEFINED ENV{DOUBLE_CONVERSION_HOME})
   set(DOUBLE_CONVERSION_HOME "$ENV{DOUBLE_CONVERSION_HOME}")
 endif()
@@ -144,6 +166,18 @@ else()
   set(ARROW_WITH_THRIFT OFF)
 endif()
 
+if (ARROW_FLIGHT)
+  set(ARROW_WITH_GRPC ON)
+endif()
+
+if (ARROW_FLIGHT OR ARROW_IPC)
+  set(ARROW_WITH_RAPIDJSON ON)
+endif()
+
+if (ARROW_ORC OR ARROW_FLIGHT OR ARROW_GANDIVA)
+  set(ARROW_WITH_PROTOBUF ON)
+endif()
+
 # ----------------------------------------------------------------------
 # Versions and URLs for toolchain builds, which also can be used to configure
 # offline builds
@@ -184,6 +218,12 @@ else()
   set(BROTLI_SOURCE_URL "https://github.com/google/brotli/archive/${BROTLI_VERSION}.tar.gz")
 endif()
 
+if (DEFINED ENV{ARROW_CARES_URL})
+  set(CARES_SOURCE_URL "$ENV{ARROW_CARES_URL}")
+else()
+  set(CARES_SOURCE_URL "https://c-ares.haxx.se/download/c-ares-${CARES_VERSION}.tar.gz")
+endif()
+
 if (DEFINED ENV{ARROW_DOUBLE_CONVERSION_URL})
   set(DOUBLE_CONVERSION_SOURCE_URL "$ENV{ARROW_DOUBLE_CONVERSION_URL}")
 else()
@@ -290,6 +330,31 @@ string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE)
 set(EP_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}}")
 set(EP_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}}")
 
+if (NOT MSVC)
+  # Set -fPIC on all external projects
+  set(EP_CXX_FLAGS "${EP_CXX_FLAGS} -fPIC")
+  set(EP_C_FLAGS "${EP_C_FLAGS} -fPIC")
+endif()
+
+# CC/CXX environment variables are captured on the first invocation of the
+# builder (e.g make or ninja) instead of when CMake is invoked into to build
+# directory. This leads to issues if the variables are exported in a subshell
+# and the invocation of make/ninja is in distinct subshell without the same
+# environment (CC/CXX).
+set(EP_COMMON_CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+                         -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER})
+
+# External projects are still able to override the following declarations.
+# cmake command line will favor the last defined variable when a duplicate is
+# encountered. This requires that `EP_COMMON_CMAKE_ARGS` is always the first
+# argument.
+set(EP_COMMON_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS}
+                         -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+                         -DCMAKE_C_FLAGS=${EP_C_FLAGS}
+                         -DCMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_C_FLAGS}
+                         -DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}
+                         -DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS})
+
 if (NOT ARROW_VERBOSE_THIRDPARTY_BUILD)
   set(EP_LOG_OPTIONS
     LOG_CONFIGURE 1
@@ -302,12 +367,6 @@ else()
   set(Boost_DEBUG TRUE)
 endif()
 
-if (NOT MSVC)
-  # Set -fPIC on all external projects
-  set(EP_CXX_FLAGS "${EP_CXX_FLAGS} -fPIC")
-  set(EP_C_FLAGS "${EP_C_FLAGS} -fPIC")
-endif()
-
 # Ensure that a default make is set
 if ("${MAKE}" STREQUAL "")
     if (NOT MSVC)
@@ -327,14 +386,8 @@ endif()
 # ----------------------------------------------------------------------
 # Find pthreads
 
-if (WIN32)
-  set(PTHREAD_LIBRARY "PTHREAD_LIBRARY-NOTFOUND")
-else()
-  find_library(PTHREAD_LIBRARY pthread)
-  message(STATUS "Found pthread: ${PTHREAD_LIBRARY}")
-  add_library(pthreadshared SHARED IMPORTED)
-  set_target_properties(pthreadshared PROPERTIES IMPORTED_LOCATION ${PTHREAD_LIBRARY})
-endif()
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+find_package(Threads REQUIRED)
 
 # ----------------------------------------------------------------------
 # Add Boost dependencies (code adapted from Apache Kudu (incubating))
@@ -344,6 +397,8 @@ if (MSVC AND ARROW_USE_STATIC_CRT)
   set(Boost_USE_STATIC_RUNTIME ON)
 endif()
 set(Boost_ADDITIONAL_VERSIONS
+  "1.70.0" "1.70"
+  "1.69.0" "1.69"
   "1.68.0" "1.68"
   "1.67.0" "1.67"
   "1.66.0" "1.66"
@@ -367,15 +422,16 @@ if (ARROW_BOOST_VENDORED)
   set(BOOST_SYSTEM_LIBRARY boost_system_static)
   set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static)
   set(BOOST_REGEX_LIBRARY boost_regex_static)
+
   if (ARROW_BOOST_HEADER_ONLY)
     set(BOOST_BUILD_PRODUCTS)
     set(BOOST_CONFIGURE_COMMAND "")
     set(BOOST_BUILD_COMMAND "")
   else()
     set(BOOST_BUILD_PRODUCTS
-      ${BOOST_SYSTEM_LIBRARY}
-      ${BOOST_FILESYSTEM_LIBRARY}
-      ${BOOST_REGEX_LIBRARY})
+      ${BOOST_STATIC_SYSTEM_LIBRARY}
+      ${BOOST_STATIC_FILESYSTEM_LIBRARY}
+      ${BOOST_STATIC_REGEX_LIBRARY})
     set(BOOST_CONFIGURE_COMMAND
       "./bootstrap.sh"
       "--prefix=${BOOST_PREFIX}"
@@ -401,12 +457,19 @@ if (ARROW_BOOST_VENDORED)
     ${EP_LOG_OPTIONS})
   set(Boost_INCLUDE_DIR "${BOOST_PREFIX}")
   set(Boost_INCLUDE_DIRS "${BOOST_INCLUDE_DIR}")
-  add_dependencies(arrow_dependencies boost_ep)
+  add_dependencies(toolchain boost_ep)
 else()
   if (MSVC)
     # disable autolinking in boost
     add_definitions(-DBOOST_ALL_NO_LIB)
   endif()
+
+  if (DEFINED ENV{BOOST_ROOT} OR DEFINED BOOST_ROOT)
+    # In older versions of CMake (such as 3.2), the system paths for Boost will
+    # be looked in first even if we set $BOOST_ROOT or pass -DBOOST_ROOT
+    set(Boost_NO_SYSTEM_PATHS ON)
+  endif()
+
   if (ARROW_BOOST_USE_SHARED)
     # Find shared Boost libraries.
     set(Boost_USE_STATIC_LIBS OFF)
@@ -457,7 +520,7 @@ else()
   endif()
 endif()
 
-message(STATUS "Boost include dir: " ${Boost_INCLUDE_DIRS})
+message(STATUS "Boost include dir: " ${Boost_INCLUDE_DIR})
 message(STATUS "Boost libraries: " ${Boost_LIBRARIES})
 
 if (NOT ARROW_BOOST_HEADER_ONLY)
@@ -482,32 +545,29 @@ include_directories(SYSTEM ${Boost_INCLUDE_DIR})
 # Google double-conversion
 
 if("${DOUBLE_CONVERSION_HOME}" STREQUAL "")
-  set(DOUBLE_CONVERSION_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/double-conversion_ep/src/double-conversion_ep")
-  set(DOUBLE_CONVERSION_HOME "${DOUBLE_CONVERSION_PREFIX}")
-  set(DOUBLE_CONVERSION_INCLUDE_DIR "${DOUBLE_CONVERSION_PREFIX}/include")
-  set(DOUBLE_CONVERSION_STATIC_LIB "${DOUBLE_CONVERSION_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}double-conversion${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(DOUBLE_CONVERSION_HOME "${THIRDPARTY_PREFIX}")
+  set(DOUBLE_CONVERSION_INCLUDE_DIR "${THIRDPARTY_PREFIX}/include")
+  set(DOUBLE_CONVERSION_STATIC_LIB "${THIRDPARTY_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}double-conversion${CMAKE_STATIC_LIBRARY_SUFFIX}")
 
   set(DOUBLE_CONVERSION_CMAKE_ARGS
-        "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
-        "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}"
-        "-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS}"
-        "-DCMAKE_INSTALL_PREFIX=${DOUBLE_CONVERSION_PREFIX}")
+    ${EP_COMMON_CMAKE_ARGS}
+    "-DCMAKE_INSTALL_PREFIX=${THIRDPARTY_PREFIX}")
+
   ExternalProject_Add(double-conversion_ep
     ${EP_LOG_OPTIONS}
-    INSTALL_DIR ${DOUBLE_CONVERSION_PREFIX}
+    INSTALL_DIR ${THIRDPARTY_PREFIX}
     URL ${DOUBLE_CONVERSION_SOURCE_URL}
     CMAKE_ARGS ${DOUBLE_CONVERSION_CMAKE_ARGS}
     BUILD_BYPRODUCTS "${DOUBLE_CONVERSION_STATIC_LIB}")
   set(DOUBLE_CONVERSION_VENDORED 1)
+  add_dependencies(toolchain double-conversion_ep)
 else()
   find_package(double-conversion REQUIRED
     PATHS "${DOUBLE_CONVERSION_HOME}")
   set(DOUBLE_CONVERSION_VENDORED 0)
 endif()
 
-if (DOUBLE_CONVERSION_VENDORED)
-  add_dependencies(arrow_dependencies double-conversion_ep)
-else()
+if (NOT DOUBLE_CONVERSION_VENDORED)
   get_property(DOUBLE_CONVERSION_STATIC_LIB TARGET double-conversion::double-conversion
     PROPERTY LOCATION)
   get_property(DOUBLE_CONVERSION_INCLUDE_DIR TARGET double-conversion::double-conversion
@@ -523,62 +583,22 @@ message(STATUS "double-conversion include dir: ${DOUBLE_CONVERSION_INCLUDE_DIR}"
 message(STATUS "double-conversion static library: ${DOUBLE_CONVERSION_STATIC_LIB}")
 
 # ----------------------------------------------------------------------
-# Google gtest & gflags
-
-if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
-  add_custom_target(unittest ctest -L unittest)
+# gflags
 
-  if("${GTEST_HOME}" STREQUAL "")
-    if(APPLE)
-      set(GTEST_CMAKE_CXX_FLAGS "-fPIC -DGTEST_USE_OWN_TR1_TUPLE=1 -Wno-unused-value -Wno-ignored-attributes")
-    elseif(NOT MSVC)
-      set(GTEST_CMAKE_CXX_FLAGS "-fPIC")
-    endif()
-    string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE)
-    set(GTEST_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}} ${GTEST_CMAKE_CXX_FLAGS}")
-
-    set(GTEST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/googletest_ep-prefix/src/googletest_ep")
-    set(GTEST_INCLUDE_DIR "${GTEST_PREFIX}/include")
-    set(GTEST_STATIC_LIB
-      "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest${CMAKE_STATIC_LIBRARY_SUFFIX}")
-    set(GTEST_MAIN_STATIC_LIB
-      "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest_main${CMAKE_STATIC_LIBRARY_SUFFIX}")
-    set(GTEST_VENDORED 1)
-    set(GTEST_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-                         -DCMAKE_INSTALL_PREFIX=${GTEST_PREFIX}
-                         -DCMAKE_CXX_FLAGS=${GTEST_CMAKE_CXX_FLAGS})
-    if (MSVC AND NOT ARROW_USE_STATIC_CRT)
-      set(GTEST_CMAKE_ARGS ${GTEST_CMAKE_ARGS} -Dgtest_force_shared_crt=ON)
-    endif()
-
-    ExternalProject_Add(googletest_ep
-      URL ${GTEST_SOURCE_URL}
-      BUILD_BYPRODUCTS ${GTEST_STATIC_LIB} ${GTEST_MAIN_STATIC_LIB}
-      CMAKE_ARGS ${GTEST_CMAKE_ARGS}
-      ${EP_LOG_OPTIONS})
-  else()
-    find_package(GTest REQUIRED)
-    set(GTEST_VENDORED 0)
-  endif()
-
-  message(STATUS "GTest include dir: ${GTEST_INCLUDE_DIR}")
-  message(STATUS "GTest static library: ${GTEST_STATIC_LIB}")
-  include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
-  ADD_THIRDPARTY_LIB(gtest
-    STATIC_LIB ${GTEST_STATIC_LIB})
-  ADD_THIRDPARTY_LIB(gtest_main
-    STATIC_LIB ${GTEST_MAIN_STATIC_LIB})
-
-  if(GTEST_VENDORED)
-    add_dependencies(gtest_static googletest_ep)
-    add_dependencies(gtest_main_static googletest_ep)
-  endif()
+if (ARROW_BUILD_TESTS OR
+    ARROW_BUILD_BENCHMARKS OR
+    (ARROW_USE_GLOG AND GLOG_HOME) OR
+    (ARROW_WITH_GRPC AND NOT GRPC_HOME))
+  set(ARROW_NEED_GFLAGS 1)
+else()
+  set(ARROW_NEED_GFLAGS 0)
+endif()
 
+if(ARROW_NEED_GFLAGS)
   # gflags (formerly Googleflags) command line parsing
   if("${GFLAGS_HOME}" STREQUAL "")
     set(GFLAGS_CMAKE_CXX_FLAGS ${EP_CXX_FLAGS})
-
-    set(GFLAGS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gflags_ep-prefix/src/gflags_ep")
+    set(GFLAGS_PREFIX "${THIRDPARTY_PREFIX}")
     set(GFLAGS_HOME "${GFLAGS_PREFIX}")
     set(GFLAGS_INCLUDE_DIR "${GFLAGS_PREFIX}/include")
     if(MSVC)
@@ -587,17 +607,14 @@ if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
       set(GFLAGS_STATIC_LIB "${GFLAGS_PREFIX}/lib/libgflags.a")
     endif()
     set(GFLAGS_VENDORED 1)
-    set(GFLAGS_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-                          -DCMAKE_INSTALL_PREFIX=${GFLAGS_PREFIX}
+    set(GFLAGS_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS}
+      "-DCMAKE_INSTALL_PREFIX=${GFLAGS_PREFIX}"
                           -DBUILD_SHARED_LIBS=OFF
                           -DBUILD_STATIC_LIBS=ON
                           -DBUILD_PACKAGING=OFF
                           -DBUILD_TESTING=OFF
                           -DBUILD_CONFIG_TESTS=OFF
-                          -DINSTALL_HEADERS=ON
-                          -DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS}
-                          -DCMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_C_FLAGS}
-                          -DCMAKE_CXX_FLAGS=${GFLAGS_CMAKE_CXX_FLAGS})
+                          -DINSTALL_HEADERS=ON)
 
     ExternalProject_Add(gflags_ep
       URL ${GFLAGS_SOURCE_URL}
@@ -626,27 +643,119 @@ if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
   endif()
 endif()
 
-if(ARROW_BUILD_BENCHMARKS)
-  add_custom_target(runbenchmark ctest -L benchmark)
+# ----------------------------------------------------------------------
+# Google gtest
 
+if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
+  if("${GTEST_HOME}" STREQUAL "")
+    set(GTEST_CMAKE_CXX_FLAGS ${EP_CXX_FLAGS})
+    if(APPLE)
+      set(GTEST_CMAKE_CXX_FLAGS ${GTEST_CMAKE_CXX_FLAGS}
+                                -DGTEST_USE_OWN_TR1_TUPLE=1
+                                -Wno-unused-value
+                                -Wno-ignored-attributes)
+    endif()
+
+    set(GTEST_PREFIX "${THIRDPARTY_PREFIX}")
+    set(GTEST_INCLUDE_DIR "${GTEST_PREFIX}/include")
+    set(GTEST_STATIC_LIB
+      "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(GTEST_MAIN_STATIC_LIB
+      "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest_main${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(GTEST_VENDORED 1)
+    set(GTEST_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS}
+      "-DCMAKE_INSTALL_PREFIX=${GTEST_PREFIX}"
+      -DCMAKE_CXX_FLAGS=${GTEST_CMAKE_CXX_FLAGS})
+    set(GMOCK_INCLUDE_DIR "${GTEST_PREFIX}/include")
+    set(GMOCK_STATIC_LIB
+      "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gmock${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(GMOCK_MAIN_STATIC_LIB
+      "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gmock_main${CMAKE_STATIC_LIBRARY_SUFFIX}")
+
+    if (MSVC AND NOT ARROW_USE_STATIC_CRT)
+      set(GTEST_CMAKE_ARGS ${GTEST_CMAKE_ARGS} -Dgtest_force_shared_crt=ON)
+    endif()
+
+    ExternalProject_Add(googletest_ep
+      URL ${GTEST_SOURCE_URL}
+      BUILD_BYPRODUCTS ${GTEST_STATIC_LIB} ${GTEST_MAIN_STATIC_LIB} ${GMOCK_STATIC_LIB} ${GMOCK_MAIN_STATIC_LIB}
+      CMAKE_ARGS ${GTEST_CMAKE_ARGS}
+      ${EP_LOG_OPTIONS})
+  else()
+    find_package(GTest REQUIRED)
+    set(GTEST_VENDORED 0)
+  endif()
+
+  message(STATUS "GTest include dir: ${GTEST_INCLUDE_DIR}")
+  message(STATUS "GMock include dir: ${GMOCK_INCLUDE_DIR}")
+  include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
+  # Conflicts in header files seem to either cause apple to have
+  # a bad boost symbol, or trusty to use CPP_TOOLCHAIN's header
+  # file for gmock (and the vendored version is 1.8.0 and conda is
+  # 1.8.1)
+  if (APPLE)
+    include_directories(SYSTEM ${GMOCK_INCLUDE_DIR})
+  else()
+    include_directories(BEFORE SYSTEM ${GMOCK_INCLUDE_DIR})
+  endif()
+  if(GTEST_STATIC_LIB)
+    message(STATUS "GTest static library: ${GTEST_STATIC_LIB}")
+    message(STATUS "GMock static library: ${GMOCK_STATIC_LIB}")
+    ADD_THIRDPARTY_LIB(gtest
+      STATIC_LIB ${GTEST_STATIC_LIB})
+    ADD_THIRDPARTY_LIB(gtest_main
+      STATIC_LIB ${GTEST_MAIN_STATIC_LIB})
+    ADD_THIRDPARTY_LIB(gmock
+      STATIC_LIB ${GMOCK_STATIC_LIB})
+    ADD_THIRDPARTY_LIB(gmock_main
+      STATIC_LIB ${GMOCK_MAIN_STATIC_LIB})
+    set(GTEST_LIBRARY gtest_static)
+    set(GTEST_MAIN_LIBRARY gtest_main_static)
+    set(GMOCK_LIBRARY gmock_static)
+    set(GMOCK_MAIN_LIBRARY gmock_main_static)
+  else()
+    message(STATUS "GTest shared library: ${GTEST_SHARED_LIB}")
+    message(STATUS "GMock shared library: ${GMOCK_SHARED_LIB}")
+    ADD_THIRDPARTY_LIB(gtest
+      SHARED_LIB ${GTEST_SHARED_LIB})
+    ADD_THIRDPARTY_LIB(gtest_main
+      SHARED_LIB ${GTEST_MAIN_SHARED_LIB})
+    set(GTEST_LIBRARY gtest_shared)
+    set(GTEST_MAIN_LIBRARY gtest_main_shared)
+    set(GMOCK_LIBRARY gmock_shared)
+    set(GMOCK_MAIN_LIBRARY gmock_main_shared)
+  endif()
+
+  if(GTEST_VENDORED)
+    add_dependencies(${GTEST_LIBRARY} googletest_ep)
+    add_dependencies(${GTEST_MAIN_LIBRARY} googletest_ep)
+    add_dependencies(${GMOCK_LIBRARY} googletest_ep)
+    add_dependencies(${GMOCK_MAIN_LIBRARY} googletest_ep)
+  endif()
+endif()
+
+if(ARROW_BUILD_BENCHMARKS)
   if("$ENV{GBENCHMARK_HOME}" STREQUAL "")
+    if(CMAKE_VERSION VERSION_LESS 3.6)
+      message(FATAL_ERROR "Building gbenchmark from source requires at least CMake 3.6")
+    endif()
+
     if(NOT MSVC)
-      set(GBENCHMARK_CMAKE_CXX_FLAGS "-fPIC -std=c++11 ${EP_CXX_FLAGS}")
+      set(GBENCHMARK_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} -std=c++11")
     endif()
 
     if(APPLE)
       set(GBENCHMARK_CMAKE_CXX_FLAGS "${GBENCHMARK_CMAKE_CXX_FLAGS} -stdlib=libc++")
     endif()
 
-    set(GBENCHMARK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gbenchmark_ep/src/gbenchmark_ep-install")
+    set(GBENCHMARK_PREFIX "${THIRDPARTY_PREFIX}")
     set(GBENCHMARK_INCLUDE_DIR "${GBENCHMARK_PREFIX}/include")
     set(GBENCHMARK_STATIC_LIB "${GBENCHMARK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}benchmark${CMAKE_STATIC_LIBRARY_SUFFIX}")
     set(GBENCHMARK_VENDORED 1)
-    set(GBENCHMARK_CMAKE_ARGS
-          "-DCMAKE_BUILD_TYPE=Release"
-          "-DCMAKE_INSTALL_PREFIX:PATH=${GBENCHMARK_PREFIX}"
-          "-DBENCHMARK_ENABLE_TESTING=OFF"
-          "-DCMAKE_CXX_FLAGS=${GBENCHMARK_CMAKE_CXX_FLAGS}")
+    set(GBENCHMARK_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS}
+      "-DCMAKE_INSTALL_PREFIX=${GBENCHMARK_PREFIX}"
+      -DBENCHMARK_ENABLE_TESTING=OFF
+      -DCMAKE_CXX_FLAGS=${GBENCHMARK_CMAKE_CXX_FLAGS})
     if (APPLE)
       set(GBENCHMARK_CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS} "-DBENCHMARK_USE_LIBCXX=ON")
     endif()
@@ -664,30 +773,34 @@ if(ARROW_BUILD_BENCHMARKS)
   message(STATUS "GBenchmark include dir: ${GBENCHMARK_INCLUDE_DIR}")
   message(STATUS "GBenchmark static library: ${GBENCHMARK_STATIC_LIB}")
   include_directories(SYSTEM ${GBENCHMARK_INCLUDE_DIR})
-  ADD_THIRDPARTY_LIB(benchmark
+  ADD_THIRDPARTY_LIB(gbenchmark
     STATIC_LIB ${GBENCHMARK_STATIC_LIB})
 
   if(GBENCHMARK_VENDORED)
-    add_dependencies(benchmark_static gbenchmark_ep)
+    add_dependencies(gbenchmark_static gbenchmark_ep)
   endif()
 endif()
 
-if (ARROW_IPC)
+if (ARROW_WITH_RAPIDJSON)
   # RapidJSON, header only dependency
   if("${RAPIDJSON_HOME}" STREQUAL "")
+    set(RAPIDJSON_HOME "${THIRDPARTY_PREFIX}")
+    set(RAPIDJSON_CMAKE_ARGS
+      -DRAPIDJSON_BUILD_DOC=OFF
+      -DRAPIDJSON_BUILD_EXAMPLES=OFF
+      -DRAPIDJSON_BUILD_TESTS=OFF
+      "-DCMAKE_INSTALL_PREFIX=${THIRDPARTY_PREFIX}")
+
     ExternalProject_Add(rapidjson_ep
+      ${EP_LOG_OPTIONS}
       PREFIX "${CMAKE_BINARY_DIR}"
       URL ${RAPIDJSON_SOURCE_URL}
       URL_MD5 ${RAPIDJSON_SOURCE_MD5}
-      CONFIGURE_COMMAND ""
-      BUILD_COMMAND ""
-      BUILD_IN_SOURCE 1
-      ${EP_LOG_OPTIONS}
-      INSTALL_COMMAND "")
+      CMAKE_ARGS ${RAPIDJSON_CMAKE_ARGS})
 
-    ExternalProject_Get_Property(rapidjson_ep SOURCE_DIR)
-    set(RAPIDJSON_INCLUDE_DIR "${SOURCE_DIR}/include")
+    set(RAPIDJSON_INCLUDE_DIR "${RAPIDJSON_HOME}/include")
     set(RAPIDJSON_VENDORED 1)
+    add_dependencies(toolchain rapidjson_ep)
   else()
     set(RAPIDJSON_INCLUDE_DIR "${RAPIDJSON_HOME}/include")
     set(RAPIDJSON_VENDORED 0)
@@ -695,13 +808,9 @@ if (ARROW_IPC)
   message(STATUS "RapidJSON include dir: ${RAPIDJSON_INCLUDE_DIR}")
   include_directories(SYSTEM ${RAPIDJSON_INCLUDE_DIR})
 
-  if(RAPIDJSON_VENDORED)
-    add_dependencies(arrow_dependencies rapidjson_ep)
-  endif()
-
   ## Flatbuffers
   if("${FLATBUFFERS_HOME}" STREQUAL "")
-    set(FLATBUFFERS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/flatbuffers_ep-prefix/src/flatbuffers_ep-install")
+    set(FLATBUFFERS_PREFIX "${THIRDPARTY_PREFIX}")
     if (MSVC)
       set(FLATBUFFERS_CMAKE_CXX_FLAGS /EHsc)
     else()
@@ -722,15 +831,12 @@ if (ARROW_IPC)
     set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_PREFIX}/include")
     set(FLATBUFFERS_COMPILER "${FLATBUFFERS_PREFIX}/bin/flatc")
     set(FLATBUFFERS_VENDORED 1)
+    add_dependencies(toolchain flatbuffers_ep)
   else()
     find_package(Flatbuffers REQUIRED)
     set(FLATBUFFERS_VENDORED 0)
   endif()
 
-  if(FLATBUFFERS_VENDORED)
-    add_dependencies(arrow_dependencies flatbuffers_ep)
-  endif()
-
   message(STATUS "Flatbuffers include dir: ${FLATBUFFERS_INCLUDE_DIR}")
   message(STATUS "Flatbuffers compiler: ${FLATBUFFERS_COMPILER}")
   include_directories(SYSTEM ${FLATBUFFERS_INCLUDE_DIR})
@@ -750,7 +856,7 @@ if (ARROW_JEMALLOC)
   # find_package(jemalloc)
 
   set(ARROW_JEMALLOC_USE_SHARED OFF)
-  set(JEMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src/jemalloc_ep/dist/")
+  set(JEMALLOC_PREFIX "${THIRDPARTY_PREFIX}")
   set(JEMALLOC_HOME "${JEMALLOC_PREFIX}")
   set(JEMALLOC_INCLUDE_DIR "${JEMALLOC_PREFIX}/include")
   set(JEMALLOC_SHARED_LIB "${JEMALLOC_PREFIX}/lib/libjemalloc${CMAKE_SHARED_LIBRARY_SUFFIX}")
@@ -760,7 +866,7 @@ if (ARROW_JEMALLOC)
   ExternalProject_Add(jemalloc_ep
     URL ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/jemalloc/${JEMALLOC_VERSION}.tar.gz
     PATCH_COMMAND touch doc/jemalloc.3 doc/jemalloc.html
-    CONFIGURE_COMMAND ./autogen.sh "--prefix=${JEMALLOC_PREFIX}" "--with-jemalloc-prefix=je_arrow_" "--with-private-namespace=je_arrow_private_" "--disable-tls"
+    CONFIGURE_COMMAND ./autogen.sh "AR=${CMAKE_AR}" "CC=${CMAKE_C_COMPILER}" "--prefix=${JEMALLOC_PREFIX}" "--with-jemalloc-prefix=je_arrow_" "--with-private-namespace=je_arrow_private_" "--disable-tls"
     ${EP_LOG_OPTIONS}
     BUILD_IN_SOURCE 1
     BUILD_COMMAND ${MAKE} ${MAKE_BUILD_ARGS}
@@ -769,11 +875,12 @@ if (ARROW_JEMALLOC)
 
   # Don't use the include directory directly so that we can point to a path
   # that is unique to our codebase.
-  include_directories(SYSTEM "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src/")
+  include_directories(SYSTEM "${CMAKE_CURRENT_BINARY_DIR}")
+
   ADD_THIRDPARTY_LIB(jemalloc
     STATIC_LIB ${JEMALLOC_STATIC_LIB}
     SHARED_LIB ${JEMALLOC_SHARED_LIB}
-    DEPS ${PTHREAD_LIBRARY})
+    DEPS Threads::Threads)
   add_dependencies(jemalloc_static jemalloc_ep)
 endif()
 
@@ -830,7 +937,7 @@ if (ARROW_WITH_ZLIB)
     ADD_THIRDPARTY_LIB(zlib SHARED_LIB ${ZLIB_SHARED_LIB})
     set(ZLIB_LIBRARY zlib_shared)
   else()
-    set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install")
+    set(ZLIB_PREFIX "${THIRDPARTY_PREFIX}")
     set(ZLIB_HOME "${ZLIB_PREFIX}")
     set(ZLIB_INCLUDE_DIR "${ZLIB_PREFIX}/include")
     if (MSVC)
@@ -843,12 +950,9 @@ if (ARROW_WITH_ZLIB)
       set(ZLIB_STATIC_LIB_NAME libz.a)
     endif()
     set(ZLIB_STATIC_LIB "${ZLIB_PREFIX}/lib/${ZLIB_STATIC_LIB_NAME}")
-    set(ZLIB_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-                        -DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX}
-                        -DCMAKE_C_FLAGS=${EP_C_FLAGS}
-                        -DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS}
-                        -DCMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_C_FLAGS}
-                        -DBUILD_SHARED_LIBS=OFF)
+    set(ZLIB_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS}
+      "-DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX}"
+      -DBUILD_SHARED_LIBS=OFF)
     ADD_THIRDPARTY_LIB(zlib
       STATIC_LIB ${ZLIB_STATIC_LIB})
     set(ZLIB_LIBRARY zlib_static)
@@ -869,7 +973,7 @@ if (ARROW_WITH_SNAPPY)
 # Snappy
 
   if("${SNAPPY_HOME}" STREQUAL "")
-    set(SNAPPY_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/snappy_ep/src/snappy_ep-install")
+    set(SNAPPY_PREFIX "${THIRDPARTY_PREFIX}")
     set(SNAPPY_HOME "${SNAPPY_PREFIX}")
     set(SNAPPY_INCLUDE_DIR "${SNAPPY_PREFIX}/include")
     if (MSVC)
@@ -888,12 +992,10 @@ if (ARROW_WITH_SNAPPY)
     endif()
 
     if (WIN32)
-      set(SNAPPY_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-                            "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}"
-                            "-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS}"
-                            "-DCMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_C_FLAGS}"
-                            "-DCMAKE_C_FLAGS=${EP_C_FLAGS}"
-                            "-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}")
+      set(SNAPPY_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS}
+        -DCMAKE_AR=${CMAKE_AR}
+        -DCMAKE_RANLIB=${CMAKE_RANLIB}
+        "-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}")
       set(SNAPPY_UPDATE_COMMAND ${CMAKE_COMMAND} -E copy
                         ${CMAKE_SOURCE_DIR}/cmake_modules/SnappyCMakeLists.txt
                         ./CMakeLists.txt &&
@@ -911,7 +1013,7 @@ if (ARROW_WITH_SNAPPY)
         BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}")
     else()
       ExternalProject_Add(snappy_ep
-        CONFIGURE_COMMAND ./configure --with-pic "--prefix=${SNAPPY_PREFIX}" ${SNAPPY_CXXFLAGS}
+        CONFIGURE_COMMAND ./configure --with-pic "AR=${CMAKE_AR}" "RANLIB=${CMAKE_RANLIB}" "--prefix=${SNAPPY_PREFIX}" ${SNAPPY_CXXFLAGS}
         ${EP_LOG_OPTIONS}
         BUILD_IN_SOURCE 1
         BUILD_COMMAND ${MAKE}
@@ -939,7 +1041,7 @@ if (ARROW_WITH_BROTLI)
 # Brotli
 
   if("${BROTLI_HOME}" STREQUAL "")
-    set(BROTLI_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/brotli_ep/src/brotli_ep-install")
+    set(BROTLI_PREFIX "${THIRDPARTY_PREFIX}")
     set(BROTLI_HOME "${BROTLI_PREFIX}")
     set(BROTLI_INCLUDE_DIR "${BROTLI_PREFIX}/include")
     if (MSVC)
@@ -947,17 +1049,13 @@ if (ARROW_WITH_BROTLI)
     else()
       set(BROTLI_LIB_DIR lib)
     endif()
-    set(BROTLI_STATIC_LIBRARY_ENC "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_LIBRARY_ARCHITECTURE}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc${CMAKE_STATIC_LIBRARY_SUFFIX}")
-    set(BROTLI_STATIC_LIBRARY_DEC "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_LIBRARY_ARCHITECTURE}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec${CMAKE_STATIC_LIBRARY_SUFFIX}")
-    set(BROTLI_STATIC_LIBRARY_COMMON "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_LIBRARY_ARCHITECTURE}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon${CMAKE_STATIC_LIBRARY_SUFFIX}")
-    set(BROTLI_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-                          "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}"
-                          "-DCMAKE_C_FLAGS=${EP_C_FLAGS}"
-                          "-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS}"
-                          "-DCMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_C_FLAGS}"
-                          -DCMAKE_INSTALL_PREFIX=${BROTLI_PREFIX}
-                          -DCMAKE_INSTALL_LIBDIR=lib/${CMAKE_LIBRARY_ARCHITECTURE}
-                          -DBUILD_SHARED_LIBS=OFF)
+    set(BROTLI_STATIC_LIBRARY_ENC "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(BROTLI_STATIC_LIBRARY_DEC "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(BROTLI_STATIC_LIBRARY_COMMON "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(BROTLI_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS}
+      "-DCMAKE_INSTALL_PREFIX=${BROTLI_PREFIX}"
+      -DCMAKE_INSTALL_LIBDIR=lib
+      -DBUILD_SHARED_LIBS=OFF)
 
     ExternalProject_Add(brotli_ep
       URL ${BROTLI_SOURCE_URL}
@@ -970,7 +1068,7 @@ if (ARROW_WITH_BROTLI)
       ExternalProject_Get_Property(brotli_ep SOURCE_DIR)
 
       ExternalProject_Add_Step(brotli_ep headers_copy
-        COMMAND xcopy /E /I include ..\\..\\..\\brotli_ep\\src\\brotli_ep-install\\include /Y
+        COMMAND xcopy /E /I include ..\\..\\..\\arrow_thirdparty\\include /Y
         DEPENDEES build
         WORKING_DIRECTORY ${SOURCE_DIR})
     endif()
@@ -1033,7 +1131,7 @@ if (ARROW_WITH_LZ4)
       set(LZ4_PATCH_COMMAND PATCH_COMMAND git --git-dir=. apply --verbose --whitespace=fix ${CMAKE_SOURCE_DIR}/build-support/lz4_msbuild_gl_runtimelibrary_params.patch)
     else()
       set(LZ4_STATIC_LIB "${LZ4_BUILD_DIR}/lib/liblz4.a")
-      set(LZ4_BUILD_COMMAND BUILD_COMMAND ${CMAKE_SOURCE_DIR}/build-support/build-lz4-lib.sh)
+      set(LZ4_BUILD_COMMAND BUILD_COMMAND ${CMAKE_SOURCE_DIR}/build-support/build-lz4-lib.sh "AR=${CMAKE_AR}")
     endif()
 
     ExternalProject_Add(lz4_ep
@@ -1068,17 +1166,16 @@ if (ARROW_WITH_ZSTD)
 # ZSTD
 
   if("${ZSTD_HOME}" STREQUAL "")
-    set(ZSTD_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zstd_ep-install")
+    set(ZSTD_PREFIX "${THIRDPARTY_PREFIX}")
     set(ZSTD_INCLUDE_DIR "${ZSTD_PREFIX}/include")
 
-    set(ZSTD_CMAKE_ARGS
-        "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
-        "-DCMAKE_INSTALL_PREFIX=${ZSTD_PREFIX}"
-        "-DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR}"
-        "-DZSTD_BUILD_PROGRAMS=off"
-        "-DZSTD_BUILD_SHARED=off"
-        "-DZSTD_BUILD_STATIC=on"
-        "-DZSTD_MULTITHREAD_SUPPORT=off")
+    set(ZSTD_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+      "-DCMAKE_INSTALL_PREFIX=${ZSTD_PREFIX}"
+      -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR}
+      -DZSTD_BUILD_PROGRAMS=off
+      -DZSTD_BUILD_SHARED=off
+      -DZSTD_BUILD_STATIC=on
+      -DZSTD_MULTITHREAD_SUPPORT=off)
 
     if (MSVC)
       set(ZSTD_STATIC_LIB "${ZSTD_PREFIX}/${CMAKE_INSTALL_LIBDIR}/zstd_static.lib")
@@ -1090,8 +1187,15 @@ if (ARROW_WITH_ZSTD)
       # Only pass our C flags on Unix as on MSVC it leads to a
       # "incompatible command-line options" error
       set(ZSTD_CMAKE_ARGS ${ZSTD_CMAKE_ARGS}
-          "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}"
-          "-DCMAKE_C_FLAGS=${EP_C_FLAGS}")
+                          -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+                          -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
+                          -DCMAKE_C_FLAGS=${EP_C_FLAGS}
+                          -DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS})
+    endif()
+
+    if(CMAKE_VERSION VERSION_LESS 3.7)
+      message(FATAL_ERROR "Building zstd using ExternalProject requires \
+at least CMake 3.7")
     endif()
 
     ExternalProject_Add(zstd_ep
@@ -1122,16 +1226,14 @@ endif()
 if (ARROW_GANDIVA)
   # re2
   if ("${RE2_HOME}" STREQUAL "")
-    set (RE2_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/re2_ep-install")
+    set (RE2_PREFIX "${THIRDPARTY_PREFIX}")
     set (RE2_HOME "${RE2_PREFIX}")
     set (RE2_INCLUDE_DIR "${RE2_PREFIX}/include")
     set (RE2_STATIC_LIB "${RE2_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}re2${CMAKE_STATIC_LIBRARY_SUFFIX}")
 
-    set(RE2_CMAKE_ARGS
-          "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
-          "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}"
-          "-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS}"
-          "-DCMAKE_INSTALL_PREFIX=${RE2_PREFIX}")
+    set(RE2_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS}
+      "-DCMAKE_INSTALL_PREFIX=${RE2_PREFIX}")
+
     ExternalProject_Add(re2_ep
       ${EP_LOG_OPTIONS}
       INSTALL_DIR ${RE2_PREFIX}
@@ -1139,6 +1241,7 @@ if (ARROW_GANDIVA)
       CMAKE_ARGS ${RE2_CMAKE_ARGS}
       BUILD_BYPRODUCTS "${RE2_STATIC_LIB}")
     set (RE2_VENDORED 1)
+    add_dependencies(toolchain re2_ep)
   else ()
     find_package (RE2 REQUIRED)
     set (RE2_VENDORED 0)
@@ -1155,27 +1258,31 @@ if (ARROW_GANDIVA)
       STATIC_LIB ${RE2_STATIC_LIB})
     set(RE2_LIBRARY re2_static)
   endif()
-
-  if (RE2_VENDORED)
-    add_dependencies (arrow_dependencies re2_ep)
-  endif ()
 endif ()
 
 
 # ----------------------------------------------------------------------
 # Protocol Buffers (required for ORC and Flight and Gandiva libraries)
 
-if (ARROW_ORC OR ARROW_FLIGHT OR ARROW_GANDIVA)
+if (ARROW_WITH_PROTOBUF)
   # protobuf
   if ("${PROTOBUF_HOME}" STREQUAL "")
-    set (PROTOBUF_PREFIX "${THIRDPARTY_DIR}/protobuf_ep-install")
+    set (PROTOBUF_PREFIX "${THIRDPARTY_PREFIX}")
     set (PROTOBUF_HOME "${PROTOBUF_PREFIX}")
     set (PROTOBUF_INCLUDE_DIR "${PROTOBUF_PREFIX}/include")
     set (PROTOBUF_STATIC_LIB "${PROTOBUF_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}protobuf${CMAKE_STATIC_LIBRARY_SUFFIX}")
     set (PROTOBUF_EXECUTABLE "${PROTOBUF_PREFIX}/bin/protoc")
+    set (PROTOBUF_CONFIGURE_ARGS "AR=${CMAKE_AR}"
+                                 "RANLIB=${CMAKE_RANLIB}"
+                                 "CC=${CMAKE_C_COMPILER}"
+                                 "CXX=${CMAKE_CXX_COMPILER}"
+                                 "--disable-shared"
+                                 "--prefix=${PROTOBUF_PREFIX}"
+                                 "CFLAGS=${EP_C_FLAGS}"
+                                 "CXXFLAGS=${EP_CXX_FLAGS}")
 
     ExternalProject_Add(protobuf_ep
-      CONFIGURE_COMMAND "./configure" "--disable-shared" "--prefix=${PROTOBUF_PREFIX}" "CXXFLAGS=${EP_CXX_FLAGS}"
+      CONFIGURE_COMMAND "./configure" ${PROTOBUF_CONFIGURE_ARGS}
       BUILD_IN_SOURCE 1
       URL ${PROTOBUF_SOURCE_URL}
       BUILD_BYPRODUCTS "${PROTOBUF_STATIC_LIB}" "${PROTOBUF_EXECUTABLE}"
@@ -1205,55 +1312,153 @@ endif()
 # ----------------------------------------------------------------------
 # Dependencies for Arrow Flight RPC
 
-if (ARROW_FLIGHT)
+if (ARROW_WITH_GRPC)
+  if ("${CARES_HOME}" STREQUAL "")
+    set(CARES_VENDORED 1)
+    set(CARES_PREFIX "${THIRDPARTY_PREFIX}")
+    set(CARES_HOME "${CARES_PREFIX}")
+    set(CARES_INCLUDE_DIR "${CARES_PREFIX}/include")
+
+    # If you set -DCARES_SHARED=ON then the build system names the library
+    # libcares_static.a
+    set(CARES_STATIC_LIB "${CARES_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}cares${CMAKE_STATIC_LIBRARY_SUFFIX}")
+
+    set(CARES_CMAKE_ARGS
+      -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+      -DCARES_STATIC=ON
+      -DCARES_SHARED=OFF
+      "-DCMAKE_C_FLAGS=${EP_C_FLAGS}"
+      "-DCMAKE_INSTALL_PREFIX=${CARES_PREFIX}")
+
+    ExternalProject_Add(cares_ep
+      ${EP_LOG_OPTIONS}
+      URL ${CARES_SOURCE_URL}
+      CMAKE_ARGS ${CARES_CMAKE_ARGS}
+      BUILD_BYPRODUCTS "${CARES_STATIC_LIB}")
+  else()
+    set(CARES_VENDORED 0)
+    find_package(c-ares REQUIRED
+      PATHS ${CARES_HOME}
+      NO_DEFAULT_PATH)
+    if(TARGET c-ares::cares)
+      get_property(CARES_STATIC_LIB TARGET c-ares::cares_static PROPERTY LOCATION)
+    endif()
+  endif()
+  message(STATUS "c-ares library: ${CARES_STATIC_LIB}")
+
+  add_custom_target(grpc)
+
   if ("${GRPC_HOME}" STREQUAL "")
     set(GRPC_VENDORED 1)
     set(GRPC_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/grpc_ep-prefix/src/grpc_ep-build")
-    set(GRPC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/grpc_ep/src/grpc_ep-install")
+    set(GRPC_PREFIX "${THIRDPARTY_PREFIX}")
     set(GRPC_HOME "${GRPC_PREFIX}")
     set(GRPC_INCLUDE_DIR "${GRPC_PREFIX}/include")
-    set(GRPC_STATIC_LIBRARY_GPR "${GRPC_BUILD_DIR}/${CMAKE_CFG_INTDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}gpr${CMAKE_STATIC_LIBRARY_SUFFIX}")
-    set(GRPC_STATIC_LIBRARY_GRPC "${GRPC_BUILD_DIR}/${CMAKE_CFG_INTDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}grpc${CMAKE_STATIC_LIBRARY_SUFFIX}")
-    set(GRPC_STATIC_LIBRARY_GRPCPP "${GRPC_BUILD_DIR}/${CMAKE_CFG_INTDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}grpcpp${CMAKE_STATIC_LIBRARY_SUFFIX}")
-    set(GRPC_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-                        "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}"
-                        "-DCMAKE_C_FLAGS=${EP_C_FLAGS}"
-                        -DCMAKE_INSTALL_PREFIX=${GRPC_PREFIX}
-                        -DBUILD_SHARED_LIBS=OFF)
+    set(GRPC_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS}
+      "-DCMAKE_INSTALL_PREFIX=${GRPC_PREFIX}"
+      -DBUILD_SHARED_LIBS=OFF)
+
+    set(GRPC_STATIC_LIBRARY_GPR "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gpr${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(GRPC_STATIC_LIBRARY_GRPC "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}grpc${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(GRPC_STATIC_LIBRARY_GRPCPP "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}grpc++${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(GRPC_STATIC_LIBRARY_ADDRESS_SORTING "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}address_sorting${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(GRPC_CPP_PLUGIN "${GRPC_PREFIX}/bin/grpc_cpp_plugin")
+
+    set(GRPC_CMAKE_PREFIX "${THIRDPARTY_PREFIX}")
+
+    add_custom_target(grpc_dependencies)
+
+    if (CARES_VENDORED)
+      add_dependencies(grpc_dependencies cares_ep)
+    else()
+      set(GRPC_CMAKE_PREFIX "${GRPC_CMAKE_PREFIX};${CARES_HOME}")
+    endif()
+
+    if (GFLAGS_VENDORED)
+      add_dependencies(grpc_dependencies gflags_ep)
+    else()
+      set(GRPC_CMAKE_PREFIX "${GRPC_CMAKE_PREFIX};${GFLAGS_HOME}")
+    endif()
+
+    if (PROTOBUF_VENDORED)
+      add_dependencies(grpc_dependencies protobuf_ep)
+    else()
+      set(GRPC_CMAKE_PREFIX "${GRPC_CMAKE_PREFIX};${PROTOBUF_HOME}")
+    endif()
 
+    # ZLIB is never vendored
+    if(NOT "${ZLIB_HOME}" STREQUAL "")
+      set(GRPC_CMAKE_PREFIX "${GRPC_CMAKE_PREFIX};${ZLIB_HOME}")
+    endif()
+
+    if (RAPIDJSON_VENDORED)
+      add_dependencies(grpc_dependencies rapidjson_ep)
+    endif()
+
+    # Yuck, see https://stackoverflow.com/a/45433229/776560
+    string(REPLACE ";" "|" GRPC_PREFIX_PATH_ALT_SEP "${GRPC_CMAKE_PREFIX}")
+
+    set(GRPC_CMAKE_ARGS
+      -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+      -DCMAKE_PREFIX_PATH="${GRPC_PREFIX_PATH_ALT_SEP}"
+      "-DgRPC_CARES_PROVIDER=package"
+      "-DgRPC_GFLAGS_PROVIDER=package"
+      "-DgRPC_PROTOBUF_PROVIDER=package"
+      "-DgRPC_SSL_PROVIDER=package"
+      "-DgRPC_ZLIB_PROVIDER=package"
+      "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}"
+      "-DCMAKE_C_FLAGS=${EP_C_FLAGS}"
+      "-DCMAKE_INSTALL_PREFIX=${GRPC_PREFIX}"
+      -DCMAKE_INSTALL_LIBDIR=lib
+      -DBUILD_SHARED_LIBS=OFF)
+
+    # XXX the gRPC git checkout is huge and takes a long time
+    # Ideally, we should be able to use the tarballs, but they don't contain
+    # vendored dependencies such as c-ares...
     ExternalProject_Add(grpc_ep
-      GIT_REPOSITORY "https://github.com/grpc/grpc"
-      GIT_TAG ${GRPC_VERSION}
-      BUILD_BYPRODUCTS "${GRPC_STATIC_LIBRARY_GPR}" "${GRPC_STATIC_LIBRARY_GRPC}" "${GRPC_STATIC_LIBRARY_GRPCPP}"
-      ${GRPC_BUILD_BYPRODUCTS}
-      ${EP_LOG_OPTIONS}
+      URL ${GRPC_SOURCE_URL}
+      LIST_SEPARATOR |
+      BUILD_BYPRODUCTS
+        ${GRPC_STATIC_LIBRARY_GPR}
+        ${GRPC_STATIC_LIBRARY_GRPC}
+        ${GRPC_STATIC_LIBRARY_GRPCPP}
+        ${GRPC_STATIC_LIBRARY_ADDRESS_SORTING}
+        ${GRPC_CPP_PLUGIN}
       CMAKE_ARGS ${GRPC_CMAKE_ARGS}
       ${EP_LOG_OPTIONS})
-    include_directories(SYSTEM ${GRPC_INCLUDE_DIR})
+
+    add_dependencies(grpc_ep grpc_dependencies)
+
+    set(GPR_STATIC_LIB "${GRPC_STATIC_LIBRARY_GPR}")
+    set(GRPC_STATIC_LIB "${GRPC_STATIC_LIBRARY_GRPC}")
+    set(GRPCPP_STATIC_LIB "${GRPC_STATIC_LIBRARY_GRPCPP}")
+    set(GRPC_ADDRESS_SORTING_STATIC_LIB "${GRPC_STATIC_LIBRARY_ADDRESS_SORTING}")
+
+    add_dependencies(grpc grpc_ep)
+    add_dependencies(toolchain grpc)
   else()
-    find_package(gRPC CONFIG REQUIRED)
+    find_package(gRPC REQUIRED)
     set(GRPC_VENDORED 0)
   endif()
 
-  get_property(GPR_STATIC_LIB TARGET gRPC::gpr PROPERTY LOCATION)
+  if ("${GRPC_CPP_PLUGIN}" STREQUAL "")
+    message(SEND_ERROR "Please set GRPC_CPP_PLUGIN.")
+  endif()
+
+  include_directories(SYSTEM ${GRPC_INCLUDE_DIR})
+
   ADD_THIRDPARTY_LIB(grpc_gpr
     STATIC_LIB ${GPR_STATIC_LIB})
 
-  get_property(GRPC_STATIC_LIB TARGET gRPC::grpc_unsecure PROPERTY LOCATION)
   ADD_THIRDPARTY_LIB(grpc_grpc
     STATIC_LIB ${GRPC_STATIC_LIB})
 
-  get_property(GRPCPP_STATIC_LIB TARGET gRPC::grpc++_unsecure PROPERTY LOCATION)
   ADD_THIRDPARTY_LIB(grpc_grpcpp
     STATIC_LIB ${GRPCPP_STATIC_LIB})
 
-  get_property(GRPC_ADDRESS_SORTING_STATIC_LIB
-    TARGET gRPC::address_sorting PROPERTY LOCATION)
   ADD_THIRDPARTY_LIB(grpc_address_sorting
     STATIC_LIB ${GRPC_ADDRESS_SORTING_STATIC_LIB})
 
-  # XXX(wesm): relying on vendored c-ares provided by gRPC for the time being
-  get_property(CARES_STATIC_LIB TARGET c-ares::cares_static PROPERTY LOCATION)
   ADD_THIRDPARTY_LIB(cares
     STATIC_LIB ${CARES_STATIC_LIB})
 endif()
@@ -1264,7 +1469,7 @@ endif()
 if (ARROW_ORC)
   # orc
   if ("${ORC_HOME}" STREQUAL "")
-    set(ORC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/orc_ep-install")
+    set(ORC_PREFIX "${THIRDPARTY_PREFIX}")
     set(ORC_HOME "${ORC_PREFIX}")
     set(ORC_INCLUDE_DIR "${ORC_PREFIX}/include")
     set(ORC_STATIC_LIB "${ORC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}orc${CMAKE_STATIC_LIBRARY_SUFFIX}")
@@ -1281,19 +1486,19 @@ if (ARROW_ORC)
     # Since LZ4 isn't installed, the header file is in ${LZ4_HOME}/lib instead of
     # ${LZ4_HOME}/include, which forces us to specify the include directory
     # manually as well.
-    set (ORC_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-                        -DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}
-                        -DCMAKE_CXX_FLAGS=${ORC_CMAKE_CXX_FLAGS}
-                        -DBUILD_LIBHDFSPP=OFF
-                        -DBUILD_JAVA=OFF
-                        -DBUILD_TOOLS=OFF
-                        -DBUILD_CPP_TESTS=OFF
-                        -DINSTALL_VENDORED_LIBS=OFF
-                        -DPROTOBUF_HOME=${PROTOBUF_HOME}
-                        -DLZ4_HOME=${LZ4_HOME}
-                        -DLZ4_INCLUDE_DIR=${LZ4_INCLUDE_DIR}
-                        -DSNAPPY_HOME=${SNAPPY_HOME}
-                        -DZLIB_HOME=${ZLIB_HOME})
+    set (ORC_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS}
+      "-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}"
+      -DCMAKE_CXX_FLAGS=${ORC_CMAKE_CXX_FLAGS}
+      -DBUILD_LIBHDFSPP=OFF
+      -DBUILD_JAVA=OFF
+      -DBUILD_TOOLS=OFF
+      -DBUILD_CPP_TESTS=OFF
+      -DINSTALL_VENDORED_LIBS=OFF
+      -DPROTOBUF_HOME=${PROTOBUF_HOME}
+      -DLZ4_HOME=${LZ4_HOME}
+      -DLZ4_INCLUDE_DIR=${LZ4_INCLUDE_DIR}
+      -DSNAPPY_HOME=${SNAPPY_HOME}
+      -DZLIB_HOME=${ZLIB_HOME})
 
     ExternalProject_Add(orc_ep
       URL ${ORC_SOURCE_URL}
@@ -1301,6 +1506,8 @@ if (ARROW_ORC)
       CMAKE_ARGS ${ORC_CMAKE_ARGS}
       ${EP_LOG_OPTIONS})
 
+    add_dependencies(toolchain orc_ep)
+
     set(ORC_VENDORED 1)
     add_dependencies(orc_ep ${ZLIB_LIBRARY})
     if (LZ4_VENDORED)
@@ -1326,7 +1533,6 @@ if (ARROW_ORC)
   if (ORC_VENDORED)
     add_dependencies(orc_static orc_ep)
   endif()
-
 endif()
 
 # ----------------------------------------------------------------------
@@ -1338,28 +1544,25 @@ if (ARROW_WITH_THRIFT)
 find_package(Thrift)
 
 if (NOT THRIFT_FOUND)
-  set(THRIFT_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/thrift_ep/src/thrift_ep-install")
+  set(THRIFT_PREFIX "${THIRDPARTY_PREFIX}")
   set(THRIFT_HOME "${THRIFT_PREFIX}")
   set(THRIFT_INCLUDE_DIR "${THRIFT_PREFIX}/include")
   set(THRIFT_COMPILER "${THRIFT_PREFIX}/bin/thrift")
-  set(THRIFT_CMAKE_ARGS "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
-                        "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}"
-                        "-DCMAKE_C_FLAGS=${EP_C_FLAGS}"
-                        "-DCMAKE_INSTALL_PREFIX=${THRIFT_PREFIX}"
-                        "-DCMAKE_INSTALL_RPATH=${THRIFT_PREFIX}/lib"
-                        "-DBUILD_SHARED_LIBS=OFF"
-                        "-DBUILD_TESTING=OFF"
-                        "-DBUILD_EXAMPLES=OFF"
-                        "-DBUILD_TUTORIALS=OFF"
-                        "-DWITH_QT4=OFF"
-                        "-DWITH_C_GLIB=OFF"
-                        "-DWITH_JAVA=OFF"
-                        "-DWITH_PYTHON=OFF"
-                        "-DWITH_HASKELL=OFF"
-                        "-DWITH_CPP=ON"
-                        "-DWITH_STATIC_LIB=ON"
-                        "-DWITH_LIBEVENT=OFF"
-                        )
+  set(THRIFT_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS}
+    "-DCMAKE_INSTALL_PREFIX=${THRIFT_PREFIX}"
+    -DCMAKE_INSTALL_RPATH=${THRIFT_PREFIX}/lib
+    -DBUILD_SHARED_LIBS=OFF
+    -DBUILD_TESTING=OFF
+    -DBUILD_EXAMPLES=OFF
+    -DBUILD_TUTORIALS=OFF
+    -DWITH_QT4=OFF
+    -DWITH_C_GLIB=OFF
+    -DWITH_JAVA=OFF
+    -DWITH_PYTHON=OFF
+    -DWITH_HASKELL=OFF
+    -DWITH_CPP=ON
+    -DWITH_STATIC_LIB=ON
+    -DWITH_LIBEVENT=OFF)
 
   # Thrift also uses boost. Forward important boost settings if there were ones passed.
   if (DEFINED BOOST_ROOT)
@@ -1395,7 +1598,7 @@ if (NOT THRIFT_FOUND)
 
   if (MSVC)
     set(WINFLEXBISON_VERSION 2.4.9)
-    set(WINFLEXBISON_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/winflexbison_ep/src/winflexbison_ep-install")
+    set(WINFLEXBISON_PREFIX "${THIRDPARTY_PREFIX}")
     ExternalProject_Add(winflexbison_ep
       URL https://github.com/lexxmark/winflexbison/releases/download/v.${WINFLEXBISON_VERSION}/win_flex_bison-${WINFLEXBISON_VERSION}.zip
       URL_HASH MD5=a2e979ea9928fbf8567e995e9c0df765
@@ -1413,10 +1616,31 @@ if (NOT THRIFT_FOUND)
                           "-DWITH_PLUGIN=OFF"
                           ${THRIFT_CMAKE_ARGS})
   elseif (APPLE)
-    if (DEFINED BISON_EXECUTABLE)
-      set(THRIFT_CMAKE_ARGS "-DBISON_EXECUTABLE=${BISON_EXECUTABLE}"
-                            ${THRIFT_CMAKE_ARGS})
+    # Some other process always resets BISON_EXECUTABLE to the system default,
+    # thus we use our own variable here.
+    if (NOT DEFINED THRIFT_BISON_EXECUTABLE)
+      find_package(BISON 2.5.1)
+
+      # In the case where we cannot find a system-wide installation, look for
+      # homebrew and ask for its bison installation.
+      if (NOT BISON_FOUND)
+        find_program(BREW_BIN brew)
+        if (BREW_BIN)
+          execute_process(
+            COMMAND ${BREW_BIN} --prefix bison
+            OUTPUT_VARIABLE BISON_PREFIX
+            OUTPUT_STRIP_TRAILING_WHITESPACE
+          )
+          set(BISON_EXECUTABLE "${BISON_PREFIX}/bin/bison")
+          find_package(BISON 2.5.1)
+          set(THRIFT_BISON_EXECUTABLE "${BISON_EXECUTABLE}")
+        endif()
+      else()
+        set(THRIFT_BISON_EXECUTABLE "${BISON_EXECUTABLE}")
+      endif()
     endif()
+    set(THRIFT_CMAKE_ARGS "-DBISON_EXECUTABLE=${THRIFT_BISON_EXECUTABLE}"
+                          ${THRIFT_CMAKE_ARGS})
   endif()
 
   ExternalProject_Add(thrift_ep
@@ -1454,12 +1678,12 @@ endif()  # ARROW_HIVESERVER2
 
 if (ARROW_USE_GLOG)
   if("${GLOG_HOME}" STREQUAL "")
-    set(GLOG_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/glog_ep-prefix/src/glog_ep")
-    set(GLOG_INCLUDE_DIR "${GLOG_BUILD_DIR}/include")
-    set(GLOG_STATIC_LIB "${GLOG_BUILD_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}glog${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(GLOG_PREFIX "${THIRDPARTY_PREFIX}")
+    set(GLOG_INCLUDE_DIR "${GLOG_PREFIX}/include")
+    set(GLOG_STATIC_LIB "${GLOG_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}glog${CMAKE_STATIC_LIBRARY_SUFFIX}")
     set(GLOG_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
     set(GLOG_CMAKE_C_FLAGS "${EP_C_FLAGS} -fPIC")
-    if (PTHREAD_LIBRARY)
+    if (Threads::Threads)
       set(GLOG_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -pthread")
       set(GLOG_CMAKE_C_FLAGS "${EP_C_FLAGS} -fPIC -pthread")
     endif()
@@ -1471,14 +1695,14 @@ if (ARROW_USE_GLOG)
       set(GLOG_CMAKE_CXX_FLAGS "${GLOG_CMAKE_CXX_FLAGS} -mmacosx-version-min=10.9")
     endif()
 
-    set(GLOG_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-                        -DCMAKE_INSTALL_PREFIX=${GLOG_BUILD_DIR}
-                        -DBUILD_SHARED_LIBS=OFF
-                        -DBUILD_TESTING=OFF
-                        -DWITH_GFLAGS=OFF
-                        -DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${GLOG_CMAKE_CXX_FLAGS}
-                        -DCMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}=${GLOG_CMAKE_C_FLAGS}
-                        -DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS})
+    set(GLOG_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS}
+      "-DCMAKE_INSTALL_PREFIX=${GLOG_PREFIX}"
+      -DBUILD_SHARED_LIBS=OFF
+      -DBUILD_TESTING=OFF
+      -DWITH_GFLAGS=OFF
+      -DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${GLOG_CMAKE_CXX_FLAGS}
+      -DCMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}=${GLOG_CMAKE_C_FLAGS}
+      -DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS})
     message(STATUS "Glog version: ${GLOG_VERSION}")
     ExternalProject_Add(glog_ep
       URL ${GLOG_SOURCE_URL}
@@ -1497,10 +1721,14 @@ if (ARROW_USE_GLOG)
   message(STATUS "Glog static library: ${GLOG_STATIC_LIB}")
 
   include_directories(SYSTEM ${GLOG_INCLUDE_DIR})
-  ADD_THIRDPARTY_LIB(glog
-    STATIC_LIB ${GLOG_STATIC_LIB})
 
   if (GLOG_VENDORED)
+    ADD_THIRDPARTY_LIB(glog
+      STATIC_LIB ${GLOG_STATIC_LIB})
     add_dependencies(glog_static glog_ep)
+  else()
+    ADD_THIRDPARTY_LIB(glog
+      STATIC_LIB ${GLOG_STATIC_LIB}
+      DEPS gflags_static)
   endif()
 endif()
diff --git a/cpp/cmake_modules/san-config.cmake b/cpp/cmake_modules/san-config.cmake
index f2de9cf1f7553..22a9b0c8098a0 100644
--- a/cpp/cmake_modules/san-config.cmake
+++ b/cpp/cmake_modules/san-config.cmake
@@ -22,19 +22,6 @@ if (${ARROW_USE_ASAN})
           ("${COMPILER_FAMILY}" STREQUAL "gcc" AND "${COMPILER_VERSION}" VERSION_GREATER "4.8")))
     message(SEND_ERROR "Cannot use ASAN without clang or gcc >= 4.8")
   endif()
-
-  # If UBSAN is also enabled, and we're on clang < 3.5, ensure static linking is
-  # enabled. Otherwise, we run into https://llvm.org/bugs/show_bug.cgi?id=18211
-  if("${ARROW_USE_UBSAN}" AND
-      "${COMPILER_FAMILY}" STREQUAL "clang" AND
-      "${COMPILER_VERSION}" VERSION_LESS "3.5")
-    if("${ARROW_LINK}" STREQUAL "a")
-      message("Using static linking for ASAN+UBSAN build")
-      set(ARROW_LINK "s")
-    elseif("${ARROW_LINK}" STREQUAL "d")
-      message(SEND_ERROR "Cannot use dynamic linking when ASAN and UBSAN are both enabled")
-    endif()
-  endif()
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -DADDRESS_SANITIZER")
 endif()
 
@@ -49,7 +36,7 @@ if (${ARROW_USE_UBSAN})
           ("${COMPILER_FAMILY}" STREQUAL "gcc" AND "${COMPILER_VERSION}" VERSION_GREATER "4.9")))
     message(SEND_ERROR "Cannot use UBSAN without clang or gcc >= 4.9")
   endif()
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr -fno-sanitize-recover")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr -fno-sanitize-recover=all")
 endif ()
 
 # Flag to enable thread sanitizer (clang or gcc 4.8)
@@ -101,14 +88,7 @@ if ("${ARROW_USE_UBSAN}" OR "${ARROW_USE_ASAN}" OR "${ARROW_USE_TSAN}")
   # GCC 4.8 and 4.9 (latest as of this writing) don't allow you to specify a
   # sanitizer blacklist.
   if("${COMPILER_FAMILY}" STREQUAL "clang")
-    # Require clang 3.4 or newer; clang 3.3 has issues with TSAN and pthread
-    # symbol interception.
-    if("${COMPILER_VERSION}" VERSION_LESS "3.4")
-        message(SEND_ERROR "Must use clang 3.4 or newer to run a sanitizer build."
-        " Detected unsupported version ${COMPILER_VERSION}."
-        " Try using clang from $NATIVE_TOOLCHAIN/.")
-    endif()
-    add_definitions("-fsanitize-blacklist=${BUILD_SUPPORT_DIR}/sanitize-blacklist.txt")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize-blacklist=${BUILD_SUPPORT_DIR}/sanitize-blacklist.txt")
   else()
     message(WARNING "GCC does not support specifying a sanitizer blacklist. Known sanitizer check failures will not be suppressed.")
   endif()
diff --git a/cpp/examples/arrow/CMakeLists.txt b/cpp/examples/arrow/CMakeLists.txt
new file mode 100644
index 0000000000000..6ecb537ad9787
--- /dev/null
+++ b/cpp/examples/arrow/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ADD_ARROW_EXAMPLE(row-wise-conversion-example)
diff --git a/cpp/examples/arrow/row-wise-conversion-example.cc b/cpp/examples/arrow/row-wise-conversion-example.cc
new file mode 100644
index 0000000000000..db8c28753dbe6
--- /dev/null
+++ b/cpp/examples/arrow/row-wise-conversion-example.cc
@@ -0,0 +1,190 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <iostream>
+#include <vector>
+
+#include <arrow/api.h>
+
+using arrow::DoubleBuilder;
+using arrow::Int64Builder;
+using arrow::ListBuilder;
+
+// While we want to use columnar data structures to build efficient operations, we
+// often receive data in a row-wise fashion from other systems. In the following,
+// we want give a brief introduction into the classes provided by Apache Arrow by
+// showing how to transform row-wise data into a columnar table.
+//
+// The data in this example is stored in the following struct:
+struct data_row {
+  int64_t id;
+  double cost;
+  std::vector<double> cost_components;
+};
+
+// Transforming a vector of structs into a columnar Table.
+//
+// The final representation should be an `arrow::Table` which in turn is made up of
+// an `arrow::Schema` and a list of `arrow::Column`. An `arrow::Column` is again a
+// named collection of one or more `arrow::Array` instances. As the first step, we
+// will iterate over the data and build up the arrays incrementally. For this task,
+// we provide `arrow::ArrayBuilder` classes that help in the construction of the
+// final `arrow::Array` instances.
+//
+// For each type, Arrow has a specially typed builder class. For the primitive
+// values `id` and `cost` we can use the respective `arrow::Int64Builder` and
+// `arrow::DoubleBuilder`. For the `cost_components` vector, we need to have two
+// builders, a top-level `arrow::ListBuilder` that builds the array of offsets and
+// a nested `arrow::DoubleBuilder` that constructs the underlying values array that
+// is referenced by the offsets in the former array.
+arrow::Status VectorToColumnarTable(const std::vector<struct data_row>& rows,
+                                    std::shared_ptr<arrow::Table>* table) {
+  // The builders are more efficient using
+  // arrow::jemalloc::MemoryPool::default_pool() as this can increase the size of
+  // the underlying memory regions in-place. At the moment, arrow::jemalloc is only
+  // supported on Unix systems, not Windows.
+  arrow::MemoryPool* pool = arrow::default_memory_pool();
+
+  Int64Builder id_builder(pool);
+  DoubleBuilder cost_builder(pool);
+  ListBuilder components_builder(pool, std::make_shared<DoubleBuilder>(pool));
+  // The following builder is owned by components_builder.
+  DoubleBuilder& cost_components_builder =
+      *(static_cast<DoubleBuilder*>(components_builder.value_builder()));
+
+  // Now we can loop over our existing data and insert it into the builders. The
+  // `Append` calls here may fail (e.g. we cannot allocate enough additional memory).
+  // Thus we need to check their return values. For more information on these values,
+  // check the documentation about `arrow::Status`.
+  for (const data_row& row : rows) {
+    ARROW_RETURN_NOT_OK(id_builder.Append(row.id));
+    ARROW_RETURN_NOT_OK(cost_builder.Append(row.cost));
+
+    // Indicate the start of a new list row. This will memorise the current
+    // offset in the values builder.
+    ARROW_RETURN_NOT_OK(components_builder.Append());
+    // Store the actual values. The final nullptr argument tells the underyling
+    // builder that all added values are valid, i.e. non-null.
+    ARROW_RETURN_NOT_OK(cost_components_builder.AppendValues(row.cost_components.data(),
+                                                             row.cost_components.size()));
+  }
+
+  // At the end, we finalise the arrays, declare the (type) schema and combine them
+  // into a single `arrow::Table`:
+  std::shared_ptr<arrow::Array> id_array;
+  ARROW_RETURN_NOT_OK(id_builder.Finish(&id_array));
+  std::shared_ptr<arrow::Array> cost_array;
+  ARROW_RETURN_NOT_OK(cost_builder.Finish(&cost_array));
+  // No need to invoke cost_components_builder.Finish because it is implied by
+  // the parent builder's Finish invocation.
+  std::shared_ptr<arrow::Array> cost_components_array;
+  ARROW_RETURN_NOT_OK(components_builder.Finish(&cost_components_array));
+
+  std::vector<std::shared_ptr<arrow::Field>> schema_vector = {
+      arrow::field("id", arrow::int64()), arrow::field("cost", arrow::float64()),
+      arrow::field("cost_components", arrow::list(arrow::float64()))};
+
+  auto schema = std::make_shared<arrow::Schema>(schema_vector);
+
+  // The final `table` variable is the one we then can pass on to other functions
+  // that can consume Apache Arrow memory structures. This object has ownership of
+  // all referenced data, thus we don't have to care about undefined references once
+  // we leave the scope of the function building the table and its underlying arrays.
+  *table = arrow::Table::Make(schema, {id_array, cost_array, cost_components_array});
+
+  return arrow::Status::OK();
+}
+
+arrow::Status ColumnarTableToVector(const std::shared_ptr<arrow::Table>& table,
+                                    std::vector<struct data_row>* rows) {
+  // To convert an Arrow table back into the same row-wise representation as in the
+  // above section, we first will check that the table conforms to our expected
+  // schema and then will build up the vector of rows incrementally.
+  //
+  // For the check if the table is as expected, we can utilise solely its schema.
+  std::vector<std::shared_ptr<arrow::Field>> schema_vector = {
+      arrow::field("id", arrow::int64()), arrow::field("cost", arrow::float64()),
+      arrow::field("cost_components", arrow::list(arrow::float64()))};
+  auto expected_schema = std::make_shared<arrow::Schema>(schema_vector);
+
+  if (!expected_schema->Equals(*table->schema())) {
+    // The table doesn't have the expected schema thus we cannot directly
+    // convert it to our target representation.
+    return arrow::Status::Invalid("Schemas are not matching!");
+  }
+
+  // As we have ensured that the table has the expected structure, we can unpack the
+  // underlying arrays. For the primitive columns `id` and `cost` we can use the high
+  // level functions to get the values whereas for the nested column
+  // `cost_components` we need to access the C-pointer to the data to copy its
+  // contents into the resulting `std::vector<double>`. Here we need to be care to
+  // also add the offset to the pointer. This offset is needed to enable zero-copy
+  // slicing operations. While this could be adjusted automatically for double
+  // arrays, this cannot be done for the accompanying bitmap as often the slicing
+  // border would be inside a byte.
+
+  auto ids =
+      std::static_pointer_cast<arrow::Int64Array>(table->column(0)->data()->chunk(0));
+  auto costs =
+      std::static_pointer_cast<arrow::DoubleArray>(table->column(1)->data()->chunk(0));
+  auto cost_components =
+      std::static_pointer_cast<arrow::ListArray>(table->column(2)->data()->chunk(0));
+  auto cost_components_values =
+      std::static_pointer_cast<arrow::DoubleArray>(cost_components->values());
+  // To enable zero-copy slices, the native values pointer might need to account
+  // for this slicing offset. This is not needed for the higher level functions
+  // like Value(…) that already account for this offset internally.
+  const double* ccv_ptr = cost_components_values->data()->GetValues<double>(1);
+
+  for (int64_t i = 0; i < table->num_rows(); i++) {
+    // Another simplification in this example is that we assume that there are
+    // no null entries, e.g. each row is fill with valid values.
+    int64_t id = ids->Value(i);
+    double cost = costs->Value(i);
+    const double* first = ccv_ptr + cost_components->value_offset(i);
+    const double* last = ccv_ptr + cost_components->value_offset(i + 1);
+    std::vector<double> components_vec(first, last);
+    rows->push_back({id, cost, components_vec});
+  }
+
+  return arrow::Status::OK();
+}
+
+#define EXIT_ON_FAILURE(expr)                      \
+  do {                                             \
+    arrow::Status status_ = (expr);                \
+    if (!status_.ok()) {                           \
+      std::cerr << status_.message() << std::endl; \
+      return EXIT_FAILURE;                         \
+    }                                              \
+  } while (0);
+
+int main(int argc, char** argv) {
+  std::vector<data_row> rows = {
+      {1, 1.0, {1.0}}, {2, 2.0, {1.0, 2.0}}, {3, 3.0, {1.0, 2.0, 3.0}}};
+
+  std::shared_ptr<arrow::Table> table;
+  EXIT_ON_FAILURE(VectorToColumnarTable(rows, &table));
+
+  std::vector<data_row> expected_rows;
+  EXIT_ON_FAILURE(ColumnarTableToVector(table, &expected_rows));
+
+  assert(rows.size() == expected_rows.size());
+
+  return EXIT_SUCCESS;
+}
diff --git a/cpp/examples/parquet/CMakeLists.txt b/cpp/examples/parquet/CMakeLists.txt
new file mode 100644
index 0000000000000..db172a2534f37
--- /dev/null
+++ b/cpp/examples/parquet/CMakeLists.txt
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+add_executable(parquet-low-level-example low-level-api/reader-writer.cc)
+add_executable(parquet-low-level-example2 low-level-api/reader-writer2.cc)
+target_include_directories(parquet-low-level-example PRIVATE low-level-api/)
+target_include_directories(parquet-low-level-example2 PRIVATE low-level-api/)
+target_link_libraries(parquet-low-level-example parquet_static)
+target_link_libraries(parquet-low-level-example2 parquet_static)
+
+add_executable(parquet-arrow-example parquet-arrow/reader-writer.cc)
+target_link_libraries(parquet-arrow-example parquet_shared)
+
+add_dependencies(parquet
+  parquet-low-level-example
+  parquet-low-level-example2
+  parquet-arrow-example)
diff --git a/cpp/examples/parquet/low-level-api/CMakeLists.txt b/cpp/examples/parquet/low-level-api/CMakeLists.txt
deleted file mode 100644
index 26e8220c0d057..0000000000000
--- a/cpp/examples/parquet/low-level-api/CMakeLists.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-if (PARQUET_BUILD_EXAMPLES)
-  add_executable(parquet-reader-writer reader-writer.cc)
-  add_executable(parquet-reader-writer2 reader-writer2.cc)
-  target_include_directories(parquet-reader-writer PRIVATE .)
-  target_include_directories(parquet-reader-writer2 PRIVATE .)
-  target_link_libraries(parquet-reader-writer parquet_static)
-  target_link_libraries(parquet-reader-writer2 parquet_static)
-
-  add_dependencies(parquet
-    parquet-reader-writer
-    parquet-reader-writer2)
-endif()
diff --git a/cpp/examples/parquet/parquet-arrow/CMakeLists.txt b/cpp/examples/parquet/parquet-arrow/CMakeLists.txt
index 892ec92a591ed..915930ec228e1 100644
--- a/cpp/examples/parquet/parquet-arrow/CMakeLists.txt
+++ b/cpp/examples/parquet/parquet-arrow/CMakeLists.txt
@@ -32,15 +32,11 @@ set(CMAKE_CXX_STANDARD 11)
 # We require a C++11 compliant compiler
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
-# First search the packages in the system. If they are not found, use CMake's
-# ExternalProject mechanism to build them locally.
+# Look for installed packages the system
 find_package(Arrow)
 find_package(Parquet)
 
 include_directories(SYSTEM ${ARROW_INCLUDE_DIR} ${PARQUET_INCLUDE_DIR})
 
-add_executable(parquet-arrow-reader-writer src/reader-writer.cc)
-target_link_libraries(parquet-arrow-reader-writer ${PARQUET_SHARED_LIB} ${ARROW_SHARED_LIB})
-if (ARROW_VENDORED)
-  add_dependencies(parquet-arrow-reader-writer arrow_ep)
-endif()
+add_executable(parquet-arrow-example reader-writer.cc)
+target_link_libraries(parquet-arrow-example ${PARQUET_SHARED_LIB} ${ARROW_SHARED_LIB})
diff --git a/cpp/examples/parquet/parquet-arrow/src/reader-writer.cc b/cpp/examples/parquet/parquet-arrow/reader-writer.cc
similarity index 98%
rename from cpp/examples/parquet/parquet-arrow/src/reader-writer.cc
rename to cpp/examples/parquet/parquet-arrow/reader-writer.cc
index 8154d7adef2ad..a5f928b6d4f69 100644
--- a/cpp/examples/parquet/parquet-arrow/src/reader-writer.cc
+++ b/cpp/examples/parquet/parquet-arrow/reader-writer.cc
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <iostream>
 #include <arrow/api.h>
 #include <arrow/io/api.h>
 #include <parquet/arrow/reader.h>
@@ -99,7 +100,7 @@ void read_single_column() {
   std::unique_ptr<parquet::arrow::FileReader> reader;
   PARQUET_THROW_NOT_OK(
       parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
-  std::shared_ptr<arrow::Array> array;
+  std::shared_ptr<arrow::ChunkedArray> array;
   PARQUET_THROW_NOT_OK(reader->ReadColumn(0, &array));
   PARQUET_THROW_NOT_OK(arrow::PrettyPrint(*array, 4, &std::cout));
   std::cout << std::endl;
@@ -118,7 +119,7 @@ void read_single_column_chunk() {
   std::unique_ptr<parquet::arrow::FileReader> reader;
   PARQUET_THROW_NOT_OK(
       parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
-  std::shared_ptr<arrow::Array> array;
+  std::shared_ptr<arrow::ChunkedArray> array;
   PARQUET_THROW_NOT_OK(reader->RowGroup(0)->Column(0)->Read(&array));
   PARQUET_THROW_NOT_OK(arrow::PrettyPrint(*array, 4, &std::cout));
   std::cout << std::endl;
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index a56079fb2a271..1dba5898c0a7a 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -15,10 +15,67 @@
 # specific language governing permissions and limitations
 # under the License.
 
+add_custom_target(arrow-all)
+add_custom_target(arrow)
+add_custom_target(arrow-benchmarks)
+add_custom_target(arrow-tests)
+add_dependencies(arrow-all arrow arrow-tests arrow-benchmarks)
+
+# Adding unit tests part of the "arrow" portion of the test suite
+function(ADD_ARROW_TEST REL_TEST_NAME)
+  set(options)
+  set(one_value_args PREFIX)
+  set(multi_value_args LABELS)
+  cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
+
+  if (ARG_PREFIX)
+    set(PREFIX ${ARG_PREFIX})
+  else()
+    set(PREFIX "arrow")
+  endif()
+
+  if (ARG_LABELS)
+    set(LABELS ${ARG_LABELS})
+  else()
+    set(LABELS "arrow-tests")
+  endif()
+
+  ADD_TEST_CASE(${REL_TEST_NAME}
+    PREFIX ${PREFIX}
+    LABELS ${LABELS}
+    ${ARG_UNPARSED_ARGUMENTS})
+endfunction()
+
+function(ADD_ARROW_BENCHMARK REL_TEST_NAME)
+  set(options)
+  set(one_value_args PREFIX)
+  set(multi_value_args)
+  cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
+  if (ARG_PREFIX)
+    set(PREFIX ${ARG_PREFIX})
+  else()
+    set(PREFIX "arrow")
+  endif()
+  ADD_BENCHMARK(${REL_TEST_NAME}
+    PREFIX ${PREFIX}
+    LABELS "arrow-benchmarks"
+    ${ARG_UNPARSED_ARGUMENTS})
+endfunction()
+
 set(ARROW_SRCS
   array.cc
-  buffer.cc
+
   builder.cc
+  array/builder_adaptive.cc
+  array/builder_base.cc
+  array/builder_binary.cc
+  array/builder_decimal.cc
+  array/builder_dict.cc
+  array/builder_nested.cc
+  array/builder_primitive.cc
+  array/builder_union.cc
+
+  buffer.cc
   compare.cc
   memory_pool.cc
   pretty_print.cc
@@ -27,6 +84,7 @@ set(ARROW_SRCS
   table.cc
   table_builder.cc
   tensor.cc
+  sparse_tensor.cc
   type.cc
   visitor.cc
 
@@ -44,6 +102,7 @@ set(ARROW_SRCS
   io/memory.cc
   io/readahead.cc
 
+  util/basic_decimal.cc
   util/bit-util.cc
   util/compression.cc
   util/cpu-info.cc
@@ -54,7 +113,10 @@ set(ARROW_SRCS
   util/key_value_metadata.cc
   util/task-group.cc
   util/thread-pool.cc
+  util/trie.cc
   util/utf8.cc
+
+  vendored/datetime/tz.cpp
 )
 
 if ("${COMPILER_FAMILY}" STREQUAL "clang")
@@ -75,8 +137,8 @@ if (ARROW_COMPUTE)
   )
 endif()
 
-if (ARROW_GPU)
-  # IPC extensions required to build the GPU library
+if (ARROW_CUDA)
+  # IPC extensions required to build the CUDA library
   set(ARROW_IPC ON)
   add_subdirectory(gpu)
 endif()
@@ -139,6 +201,7 @@ if (ARROW_IPC)
     ipc/feather.cc
     ipc/json.cc
     ipc/json-internal.cc
+    ipc/json-simple.cc
     ipc/message.cc
     ipc/metadata-internal.cc
     ipc/reader.cc
@@ -167,7 +230,11 @@ ADD_ARROW_LIB(arrow
   SHARED_LINK_FLAGS ${ARROW_SHARED_LINK_FLAGS}
   SHARED_LINK_LIBS ${ARROW_LINK_LIBS}
   SHARED_PRIVATE_LINK_LIBS ${ARROW_SHARED_PRIVATE_LINK_LIBS}
-  STATIC_LINK_LIBS ${ARROW_STATIC_LINK_LIBS})
+  STATIC_LINK_LIBS ${ARROW_STATIC_LINK_LIBS}
+  SHARED_INSTALL_INTERFACE_LIBS ${ARROW_SHARED_INSTALL_INTERFACE_LIBS}
+  STATIC_INSTALL_INTERFACE_LIBS ${ARROW_STATIC_INSTALL_INTERFACE_LIBS})
+
+add_dependencies(arrow ${ARROW_LIBRARIES})
 
 if (ARROW_BUILD_STATIC AND WIN32)
   target_compile_definitions(arrow_static PUBLIC ARROW_STATIC)
@@ -178,8 +245,8 @@ if (ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
   ADD_ARROW_LIB(arrow_testing
     SOURCES test-util.cc
     OUTPUTS ARROW_TESTING_LIBRARIES
-    DEPENDENCIES gtest_static
-    SHARED_LINK_LIBS arrow_shared gtest_static
+    DEPENDENCIES ${GTEST_LIBRARY}
+    SHARED_LINK_LIBS arrow_shared ${GTEST_LIBRARY}
     STATIC_LINK_LIBS arrow_static)
 
   if (ARROW_BUILD_STATIC AND WIN32)
@@ -196,43 +263,17 @@ find_package(Backtrace)
 foreach(LIB_TARGET ${ARROW_LIBRARIES})
   target_compile_definitions(${LIB_TARGET}
     PRIVATE ARROW_EXPORTING)
-  if (Backtrace_FOUND)
+  if (Backtrace_FOUND AND ARROW_WITH_BACKTRACE)
     target_compile_definitions(${LIB_TARGET}
       PRIVATE ARROW_WITH_BACKTRACE)
   endif()
 endforeach()
 
 # Headers: top level
-install(FILES
-  allocator.h
-  api.h
-  array.h
-  buffer.h
-  builder.h
-  compare.h
-  memory_pool.h
-  pretty_print.h
-  record_batch.h
-  status.h
-  stl.h
-  table.h
-  table_builder.h
-  tensor.h
-  type.h
-  type_fwd.h
-  type_traits.h
-  test-util.h
-  visitor.h
-  visitor_inline.h
-  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow")
+ARROW_INSTALL_ALL_HEADERS("arrow")
 
 # pkg-config support
-configure_file(arrow.pc.in
-  "${CMAKE_CURRENT_BINARY_DIR}/arrow.pc"
-  @ONLY)
-install(
-  FILES "${CMAKE_CURRENT_BINARY_DIR}/arrow.pc"
-  DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/")
+ARROW_ADD_PKG_CONFIG("arrow")
 
 #######################################
 # Unit tests
@@ -252,10 +293,25 @@ ADD_ARROW_TEST(type-test)
 ADD_ARROW_TEST(table-test)
 ADD_ARROW_TEST(table_builder-test)
 ADD_ARROW_TEST(tensor-test)
+ADD_ARROW_TEST(sparse_tensor-test)
 
 ADD_ARROW_BENCHMARK(builder-benchmark)
 ADD_ARROW_BENCHMARK(column-benchmark)
 
+add_subdirectory(array)
 add_subdirectory(csv)
 add_subdirectory(io)
 add_subdirectory(util)
+add_subdirectory(vendored)
+
+if(ARROW_FLIGHT)
+  add_subdirectory(flight)
+endif()
+
+if(ARROW_PYTHON)
+  add_subdirectory(python)
+endif()
+
+if(ARROW_HIVESERVER2)
+  add_subdirectory(dbi/hiveserver2)
+endif()
diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc
index de803d5ba6f03..01fc09afb0c92 100644
--- a/cpp/src/arrow/adapters/orc/adapter.cc
+++ b/cpp/src/arrow/adapters/orc/adapter.cc
@@ -206,11 +206,7 @@ Status GetArrowType(const liborc::Type* type, std::shared_ptr<DataType>* out) {
       *out = union_(fields, type_codes);
       break;
     }
-    default: {
-      std::stringstream ss;
-      ss << "Unknown Orc type kind: " << kind;
-      return Status::Invalid(ss.str());
-    }
+    default: { return Status::Invalid("Unknown Orc type kind: ", kind); }
   }
   return Status::OK();
 }
@@ -346,11 +342,9 @@ class ORCFileReader::Impl {
   }
 
   Status SelectStripe(liborc::RowReaderOptions* opts, int64_t stripe) {
-    if (stripe < 0 || stripe >= NumberOfStripes()) {
-      std::stringstream ss;
-      ss << "Out of bounds stripe: " << stripe;
-      return Status::Invalid(ss.str());
-    }
+    ARROW_RETURN_IF(stripe < 0 || stripe >= NumberOfStripes(),
+                    Status::Invalid("Out of bounds stripe: ", stripe));
+
     opts->range(stripes_[stripe].offset, stripes_[stripe].length);
     return Status::OK();
   }
@@ -359,9 +353,7 @@ class ORCFileReader::Impl {
                        const std::vector<int>& include_indices) {
     std::list<uint64_t> include_indices_list;
     for (auto it = include_indices.begin(); it != include_indices.end(); ++it) {
-      if (*it < 0) {
-        return Status::Invalid("Negative field index");
-      }
+      ARROW_RETURN_IF(*it < 0, Status::Invalid("Negative field index"));
       include_indices_list.push_back(*it);
     }
     opts->includeTypes(include_indices_list);
@@ -455,9 +447,7 @@ class ORCFileReader::Impl {
       case liborc::DECIMAL:
         return AppendDecimalBatch(type, batch, offset, length, builder);
       default:
-        std::stringstream ss;
-        ss << "Not implemented type kind: " << kind;
-        return Status::NotImplemented(ss.str());
+        return Status::NotImplemented("Not implemented type kind: ", kind);
     }
   }
 
diff --git a/cpp/src/arrow/adapters/tensorflow/CMakeLists.txt b/cpp/src/arrow/adapters/tensorflow/CMakeLists.txt
index db4264b59ab63..5bb5b725910e3 100644
--- a/cpp/src/arrow/adapters/tensorflow/CMakeLists.txt
+++ b/cpp/src/arrow/adapters/tensorflow/CMakeLists.txt
@@ -15,7 +15,4 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# Headers: top level
-install(FILES
-        convert.h
-        DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/adapters/tensorflow")
+ARROW_INSTALL_ALL_HEADERS("arrow/adapters/tensorflow")
diff --git a/cpp/src/arrow/allocator-test.cc b/cpp/src/arrow/allocator-test.cc
index cdffbd7e8494f..1a94467281dbc 100644
--- a/cpp/src/arrow/allocator-test.cc
+++ b/cpp/src/arrow/allocator-test.cc
@@ -17,6 +17,7 @@
 
 #include <cstdint>
 #include <limits>
+#include <memory>
 #include <new>
 
 #include <gtest/gtest.h>
diff --git a/cpp/src/arrow/allocator.h b/cpp/src/arrow/allocator.h
index 144ba575063a3..a02b8e64bb05a 100644
--- a/cpp/src/arrow/allocator.h
+++ b/cpp/src/arrow/allocator.h
@@ -29,6 +29,7 @@
 
 namespace arrow {
 
+/// \brief A STL allocator delegating allocations to a Arrow MemoryPool
 template <class T>
 class stl_allocator {
  public:
@@ -45,7 +46,9 @@ class stl_allocator {
     using other = stl_allocator<U>;
   };
 
+  /// \brief Construct an allocator from the default MemoryPool
   stl_allocator() noexcept : pool_(default_memory_pool()) {}
+  /// \brief Construct an allocator from the given MemoryPool
   explicit stl_allocator(MemoryPool* pool) noexcept : pool_(pool) {}
 
   template <class U>
@@ -86,9 +89,14 @@ class stl_allocator {
   MemoryPool* pool_;
 };
 
+/// \brief A MemoryPool implementation delegating allocations to a STL allocator
+///
+/// Note that STL allocators don't provide a resizing operation, and therefore
+/// any buffer resizes will do a full reallocation and copy.
 template <typename Allocator = std::allocator<uint8_t>>
 class STLMemoryPool : public MemoryPool {
  public:
+  /// \brief Construct a memory pool from the given allocator
   explicit STLMemoryPool(const Allocator& alloc) : alloc_(alloc) {}
 
   Status Allocate(int64_t size, uint8_t** out) override {
diff --git a/cpp/src/arrow/array-binary-test.cc b/cpp/src/arrow/array-binary-test.cc
index 4376695c68cba..6f938c82bfd0a 100644
--- a/cpp/src/arrow/array-binary-test.cc
+++ b/cpp/src/arrow/array-binary-test.cc
@@ -15,10 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>
 #include <cstdint>
 #include <cstring>
-#include <limits>
 #include <memory>
 #include <string>
 #include <vector>
@@ -28,10 +26,14 @@
 #include "arrow/array.h"
 #include "arrow/buffer.h"
 #include "arrow/builder.h"
+#include "arrow/memory_pool.h"
 #include "arrow/status.h"
 #include "arrow/test-common.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/checked_cast.h"
 
 namespace arrow {
 
@@ -676,4 +678,112 @@ TEST_F(TestStringArray, TestSliceEquality) { CheckSliceEquality<BinaryType>(); }
 
 TEST_F(TestBinaryArray, LengthZeroCtor) { BinaryArray array(0, nullptr, nullptr); }
 
+// ----------------------------------------------------------------------
+// ChunkedBinaryBuilder tests
+
+class TestChunkedBinaryBuilder : public ::testing::Test {
+ public:
+  void SetUp() {}
+
+  void Init(int32_t chunksize) {
+    builder_.reset(new internal::ChunkedBinaryBuilder(chunksize));
+  }
+
+ protected:
+  std::unique_ptr<internal::ChunkedBinaryBuilder> builder_;
+};
+
+TEST_F(TestChunkedBinaryBuilder, BasicOperation) {
+  const int32_t chunksize = 1000;
+  Init(chunksize);
+
+  const int elem_size = 10;
+  uint8_t buf[elem_size];
+
+  BinaryBuilder unchunked_builder;
+
+  const int iterations = 1000;
+  for (int i = 0; i < iterations; ++i) {
+    random_bytes(elem_size, i, buf);
+
+    ASSERT_OK(unchunked_builder.Append(buf, elem_size));
+    ASSERT_OK(builder_->Append(buf, elem_size));
+  }
+
+  std::shared_ptr<Array> unchunked;
+  ASSERT_OK(unchunked_builder.Finish(&unchunked));
+
+  ArrayVector chunks;
+  ASSERT_OK(builder_->Finish(&chunks));
+
+  // This assumes that everything is evenly divisible
+  ArrayVector expected_chunks;
+  const int elems_per_chunk = chunksize / elem_size;
+  for (int i = 0; i < iterations / elems_per_chunk; ++i) {
+    expected_chunks.emplace_back(unchunked->Slice(i * elems_per_chunk, elems_per_chunk));
+  }
+
+  ASSERT_EQ(expected_chunks.size(), chunks.size());
+  for (size_t i = 0; i < chunks.size(); ++i) {
+    AssertArraysEqual(*expected_chunks[i], *chunks[i]);
+  }
+}
+
+TEST_F(TestChunkedBinaryBuilder, NoData) {
+  Init(1000);
+
+  ArrayVector chunks;
+  ASSERT_OK(builder_->Finish(&chunks));
+
+  ASSERT_EQ(1, chunks.size());
+  ASSERT_EQ(0, chunks[0]->length());
+}
+
+TEST_F(TestChunkedBinaryBuilder, LargeElements) {
+  Init(100);
+
+  const int bufsize = 101;
+  uint8_t buf[bufsize];
+
+  const int iterations = 100;
+  for (int i = 0; i < iterations; ++i) {
+    random_bytes(bufsize, i, buf);
+    ASSERT_OK(builder_->Append(buf, bufsize));
+  }
+
+  ArrayVector chunks;
+  ASSERT_OK(builder_->Finish(&chunks));
+  ASSERT_EQ(iterations, static_cast<int>(chunks.size()));
+
+  int64_t total_data_size = 0;
+  for (auto chunk : chunks) {
+    ASSERT_EQ(1, chunk->length());
+    total_data_size +=
+        static_cast<int64_t>(static_cast<const BinaryArray&>(*chunk).GetView(0).size());
+  }
+  ASSERT_EQ(iterations * bufsize, total_data_size);
+}
+
+TEST(TestChunkedStringBuilder, BasicOperation) {
+  const int chunksize = 100;
+  internal::ChunkedStringBuilder builder(chunksize);
+
+  std::string value = "0123456789";
+
+  const int iterations = 100;
+  for (int i = 0; i < iterations; ++i) {
+    ASSERT_OK(builder.Append(value));
+  }
+
+  ArrayVector chunks;
+  ASSERT_OK(builder.Finish(&chunks));
+
+  ASSERT_EQ(10, chunks.size());
+
+  // Type is correct
+  for (auto chunk : chunks) {
+    ASSERT_TRUE(chunk->type()->Equals(*::arrow::utf8()));
+  }
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/array-dict-test.cc b/cpp/src/arrow/array-dict-test.cc
index 4c8dcc067b8c5..5134d1fe927a8 100644
--- a/cpp/src/arrow/array-dict-test.cc
+++ b/cpp/src/arrow/array-dict-test.cc
@@ -15,29 +15,32 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>
+#include <array>
 #include <cstdint>
-#include <cstring>
-#include <limits>
 #include <memory>
+#include <ostream>
 #include <string>
 #include <vector>
 
 #include <gtest/gtest.h>
 
 #include "arrow/array.h"
-#include "arrow/buffer.h"
 #include "arrow/builder.h"
+#include "arrow/memory_pool.h"
 #include "arrow/status.h"
 #include "arrow/test-common.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
 
 namespace arrow {
 
 using std::string;
 using std::vector;
 
+using internal::checked_cast;
+
 // ----------------------------------------------------------------------
 // Dictionary tests
 
@@ -55,59 +58,40 @@ TYPED_TEST(TestDictionaryBuilder, Basic) {
   ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(1)));
   ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(2)));
   ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(1)));
+  ASSERT_OK(builder.AppendNull());
+
+  ASSERT_EQ(builder.length(), 4);
+  ASSERT_EQ(builder.null_count(), 1);
 
   std::shared_ptr<Array> result;
   ASSERT_OK(builder.Finish(&result));
 
   // Build expected data
-  NumericBuilder<TypeParam> dict_builder;
-  ASSERT_OK(dict_builder.Append(static_cast<typename TypeParam::c_type>(1)));
-  ASSERT_OK(dict_builder.Append(static_cast<typename TypeParam::c_type>(2)));
-  std::shared_ptr<Array> dict_array;
-  ASSERT_OK(dict_builder.Finish(&dict_array));
-  auto dtype = std::make_shared<DictionaryType>(int8(), dict_array);
+  auto dict_array = ArrayFromJSON(std::make_shared<TypeParam>(), "[1, 2]");
+  auto dict_type = std::make_shared<DictionaryType>(int8(), dict_array);
 
-  Int8Builder int_builder;
-  ASSERT_OK(int_builder.Append(0));
-  ASSERT_OK(int_builder.Append(1));
-  ASSERT_OK(int_builder.Append(0));
-  std::shared_ptr<Array> int_array;
-  ASSERT_OK(int_builder.Finish(&int_array));
+  auto int_array = ArrayFromJSON(int8(), "[0, 1, 0, null]");
+  DictionaryArray expected(dict_type, int_array);
 
-  DictionaryArray expected(dtype, int_array);
   ASSERT_TRUE(expected.Equals(result));
 }
 
 TYPED_TEST(TestDictionaryBuilder, ArrayConversion) {
-  NumericBuilder<TypeParam> builder;
-  // DictionaryBuilder<TypeParam> builder;
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(1)));
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(2)));
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(1)));
+  auto type = std::make_shared<TypeParam>();
 
-  std::shared_ptr<Array> intermediate_result;
-  ASSERT_OK(builder.Finish(&intermediate_result));
+  auto intermediate_result = ArrayFromJSON(type, "[1, 2, 1]");
   DictionaryBuilder<TypeParam> dictionary_builder(default_memory_pool());
   ASSERT_OK(dictionary_builder.AppendArray(*intermediate_result));
   std::shared_ptr<Array> result;
   ASSERT_OK(dictionary_builder.Finish(&result));
 
   // Build expected data
-  NumericBuilder<TypeParam> dict_builder;
-  ASSERT_OK(dict_builder.Append(static_cast<typename TypeParam::c_type>(1)));
-  ASSERT_OK(dict_builder.Append(static_cast<typename TypeParam::c_type>(2)));
-  std::shared_ptr<Array> dict_array;
-  ASSERT_OK(dict_builder.Finish(&dict_array));
-  auto dtype = std::make_shared<DictionaryType>(int8(), dict_array);
+  auto dict_array = ArrayFromJSON(type, "[1, 2]");
+  auto dict_type = std::make_shared<DictionaryType>(int8(), dict_array);
 
-  Int8Builder int_builder;
-  ASSERT_OK(int_builder.Append(0));
-  ASSERT_OK(int_builder.Append(1));
-  ASSERT_OK(int_builder.Append(0));
-  std::shared_ptr<Array> int_array;
-  ASSERT_OK(int_builder.Finish(&int_array));
+  auto int_array = ArrayFromJSON(int8(), "[0, 1, 0]");
+  DictionaryArray expected(dict_type, int_array);
 
-  DictionaryArray expected(dtype, int_array);
   ASSERT_TRUE(expected.Equals(result));
 }
 
@@ -150,120 +134,74 @@ TYPED_TEST(TestDictionaryBuilder, DoubleTableSize) {
 }
 
 TYPED_TEST(TestDictionaryBuilder, DeltaDictionary) {
+  using c_type = typename TypeParam::c_type;
+  auto type = std::make_shared<TypeParam>();
+
   DictionaryBuilder<TypeParam> builder(default_memory_pool());
 
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(1)));
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(2)));
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(1)));
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(2)));
+  ASSERT_OK(builder.Append(static_cast<c_type>(1)));
+  ASSERT_OK(builder.Append(static_cast<c_type>(2)));
+  ASSERT_OK(builder.Append(static_cast<c_type>(1)));
+  ASSERT_OK(builder.Append(static_cast<c_type>(2)));
   std::shared_ptr<Array> result;
   FinishAndCheckPadding(&builder, &result);
 
   // Build expected data for the initial dictionary
-  NumericBuilder<TypeParam> dict_builder1;
-  ASSERT_OK(dict_builder1.Append(static_cast<typename TypeParam::c_type>(1)));
-  ASSERT_OK(dict_builder1.Append(static_cast<typename TypeParam::c_type>(2)));
-  std::shared_ptr<Array> dict_array1;
-  ASSERT_OK(dict_builder1.Finish(&dict_array1));
-  auto dtype1 = std::make_shared<DictionaryType>(int8(), dict_array1);
+  auto dict_type1 = dictionary(int8(), ArrayFromJSON(type, "[1, 2]"));
+  DictionaryArray expected(dict_type1, ArrayFromJSON(int8(), "[0, 1, 0, 1]"));
 
-  Int8Builder int_builder1;
-  ASSERT_OK(int_builder1.Append(0));
-  ASSERT_OK(int_builder1.Append(1));
-  ASSERT_OK(int_builder1.Append(0));
-  ASSERT_OK(int_builder1.Append(1));
-  std::shared_ptr<Array> int_array1;
-  ASSERT_OK(int_builder1.Finish(&int_array1));
-
-  DictionaryArray expected(dtype1, int_array1);
   ASSERT_TRUE(expected.Equals(result));
 
   // extend the dictionary builder with new data
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(2)));
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(3)));
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(3)));
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(1)));
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(3)));
+  ASSERT_OK(builder.Append(static_cast<c_type>(2)));
+  ASSERT_OK(builder.Append(static_cast<c_type>(3)));
+  ASSERT_OK(builder.Append(static_cast<c_type>(3)));
+  ASSERT_OK(builder.Append(static_cast<c_type>(1)));
+  ASSERT_OK(builder.Append(static_cast<c_type>(3)));
 
   std::shared_ptr<Array> result_delta;
   ASSERT_OK(builder.Finish(&result_delta));
 
   // Build expected data for the delta dictionary
-  NumericBuilder<TypeParam> dict_builder2;
-  ASSERT_OK(dict_builder2.Append(static_cast<typename TypeParam::c_type>(3)));
-  std::shared_ptr<Array> dict_array2;
-  ASSERT_OK(dict_builder2.Finish(&dict_array2));
-  auto dtype2 = std::make_shared<DictionaryType>(int8(), dict_array2);
+  auto dict_type2 = dictionary(int8(), ArrayFromJSON(type, "[3]"));
+  DictionaryArray expected_delta(dict_type2, ArrayFromJSON(int8(), "[1, 2, 2, 0, 2]"));
 
-  Int8Builder int_builder2;
-  ASSERT_OK(int_builder2.Append(1));
-  ASSERT_OK(int_builder2.Append(2));
-  ASSERT_OK(int_builder2.Append(2));
-  ASSERT_OK(int_builder2.Append(0));
-  ASSERT_OK(int_builder2.Append(2));
-  std::shared_ptr<Array> int_array2;
-  ASSERT_OK(int_builder2.Finish(&int_array2));
-
-  DictionaryArray expected_delta(dtype2, int_array2);
   ASSERT_TRUE(expected_delta.Equals(result_delta));
 }
 
 TYPED_TEST(TestDictionaryBuilder, DoubleDeltaDictionary) {
+  using c_type = typename TypeParam::c_type;
+  auto type = std::make_shared<TypeParam>();
+
   DictionaryBuilder<TypeParam> builder(default_memory_pool());
 
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(1)));
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(2)));
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(1)));
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(2)));
+  ASSERT_OK(builder.Append(static_cast<c_type>(1)));
+  ASSERT_OK(builder.Append(static_cast<c_type>(2)));
+  ASSERT_OK(builder.Append(static_cast<c_type>(1)));
+  ASSERT_OK(builder.Append(static_cast<c_type>(2)));
   std::shared_ptr<Array> result;
   FinishAndCheckPadding(&builder, &result);
 
   // Build expected data for the initial dictionary
-  NumericBuilder<TypeParam> dict_builder1;
-  ASSERT_OK(dict_builder1.Append(static_cast<typename TypeParam::c_type>(1)));
-  ASSERT_OK(dict_builder1.Append(static_cast<typename TypeParam::c_type>(2)));
-  std::shared_ptr<Array> dict_array1;
-  ASSERT_OK(dict_builder1.Finish(&dict_array1));
-  auto dtype1 = std::make_shared<DictionaryType>(int8(), dict_array1);
-
-  Int8Builder int_builder1;
-  ASSERT_OK(int_builder1.Append(0));
-  ASSERT_OK(int_builder1.Append(1));
-  ASSERT_OK(int_builder1.Append(0));
-  ASSERT_OK(int_builder1.Append(1));
-  std::shared_ptr<Array> int_array1;
-  ASSERT_OK(int_builder1.Finish(&int_array1));
+  auto dict_type1 = dictionary(int8(), ArrayFromJSON(type, "[1, 2]"));
+  DictionaryArray expected(dict_type1, ArrayFromJSON(int8(), "[0, 1, 0, 1]"));
 
-  DictionaryArray expected(dtype1, int_array1);
   ASSERT_TRUE(expected.Equals(result));
 
   // extend the dictionary builder with new data
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(2)));
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(3)));
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(3)));
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(1)));
-  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(3)));
+  ASSERT_OK(builder.Append(static_cast<c_type>(2)));
+  ASSERT_OK(builder.Append(static_cast<c_type>(3)));
+  ASSERT_OK(builder.Append(static_cast<c_type>(3)));
+  ASSERT_OK(builder.Append(static_cast<c_type>(1)));
+  ASSERT_OK(builder.Append(static_cast<c_type>(3)));
 
   std::shared_ptr<Array> result_delta1;
   ASSERT_OK(builder.Finish(&result_delta1));
 
   // Build expected data for the delta dictionary
-  NumericBuilder<TypeParam> dict_builder2;
-  ASSERT_OK(dict_builder2.Append(static_cast<typename TypeParam::c_type>(3)));
-  std::shared_ptr<Array> dict_array2;
-  ASSERT_OK(dict_builder2.Finish(&dict_array2));
-  auto dtype2 = std::make_shared<DictionaryType>(int8(), dict_array2);
+  auto dict_type2 = dictionary(int8(), ArrayFromJSON(type, "[3]"));
+  DictionaryArray expected_delta1(dict_type2, ArrayFromJSON(int8(), "[1, 2, 2, 0, 2]"));
 
-  Int8Builder int_builder2;
-  ASSERT_OK(int_builder2.Append(1));
-  ASSERT_OK(int_builder2.Append(2));
-  ASSERT_OK(int_builder2.Append(2));
-  ASSERT_OK(int_builder2.Append(0));
-  ASSERT_OK(int_builder2.Append(2));
-  std::shared_ptr<Array> int_array2;
-  ASSERT_OK(int_builder2.Finish(&int_array2));
-
-  DictionaryArray expected_delta1(dtype2, int_array2);
   ASSERT_TRUE(expected_delta1.Equals(result_delta1));
 
   // extend the dictionary builder with new data again
@@ -277,23 +215,9 @@ TYPED_TEST(TestDictionaryBuilder, DoubleDeltaDictionary) {
   ASSERT_OK(builder.Finish(&result_delta2));
 
   // Build expected data for the delta dictionary again
-  NumericBuilder<TypeParam> dict_builder3;
-  ASSERT_OK(dict_builder3.Append(static_cast<typename TypeParam::c_type>(4)));
-  ASSERT_OK(dict_builder3.Append(static_cast<typename TypeParam::c_type>(5)));
-  std::shared_ptr<Array> dict_array3;
-  ASSERT_OK(dict_builder3.Finish(&dict_array3));
-  auto dtype3 = std::make_shared<DictionaryType>(int8(), dict_array3);
-
-  Int8Builder int_builder3;
-  ASSERT_OK(int_builder3.Append(0));
-  ASSERT_OK(int_builder3.Append(1));
-  ASSERT_OK(int_builder3.Append(2));
-  ASSERT_OK(int_builder3.Append(3));
-  ASSERT_OK(int_builder3.Append(4));
-  std::shared_ptr<Array> int_array3;
-  ASSERT_OK(int_builder3.Finish(&int_array3));
+  auto dict_type3 = dictionary(int8(), ArrayFromJSON(type, "[4, 5]"));
+  DictionaryArray expected_delta2(dict_type3, ArrayFromJSON(int8(), "[0, 1, 2, 3, 4]"));
 
-  DictionaryArray expected_delta2(dtype3, int_array3);
   ASSERT_TRUE(expected_delta2.Equals(result_delta2));
 }
 
@@ -308,21 +232,27 @@ TEST(TestStringDictionaryBuilder, Basic) {
   ASSERT_OK(builder.Finish(&result));
 
   // Build expected data
-  StringBuilder str_builder;
-  ASSERT_OK(str_builder.Append("test"));
-  ASSERT_OK(str_builder.Append("test2"));
-  std::shared_ptr<Array> str_array;
-  ASSERT_OK(str_builder.Finish(&str_array));
-  auto dtype = std::make_shared<DictionaryType>(int8(), str_array);
+  auto dtype = dictionary(int8(), ArrayFromJSON(utf8(), "[\"test\", \"test2\"]"));
+  auto int_array = ArrayFromJSON(int8(), "[0, 1, 0]");
+  DictionaryArray expected(dtype, int_array);
 
-  Int8Builder int_builder;
-  ASSERT_OK(int_builder.Append(0));
-  ASSERT_OK(int_builder.Append(1));
-  ASSERT_OK(int_builder.Append(0));
-  std::shared_ptr<Array> int_array;
-  ASSERT_OK(int_builder.Finish(&int_array));
+  ASSERT_TRUE(expected.Equals(result));
+}
+
+// ARROW-4367
+TEST(TestStringDictionaryBuilder, OnlyNull) {
+  // Build the dictionary Array
+  StringDictionaryBuilder builder(default_memory_pool());
+  ASSERT_OK(builder.AppendNull());
 
+  std::shared_ptr<Array> result;
+  ASSERT_OK(builder.Finish(&result));
+
+  // Build expected data
+  auto dtype = dictionary(int8(), ArrayFromJSON(utf8(), "[]"));
+  auto int_array = ArrayFromJSON(int8(), "[null]");
   DictionaryArray expected(dtype, int_array);
+
   ASSERT_TRUE(expected.Equals(result));
 }
 
@@ -373,21 +303,10 @@ TEST(TestStringDictionaryBuilder, DeltaDictionary) {
   ASSERT_OK(builder.Finish(&result));
 
   // Build expected data
-  StringBuilder str_builder1;
-  ASSERT_OK(str_builder1.Append("test"));
-  ASSERT_OK(str_builder1.Append("test2"));
-  std::shared_ptr<Array> str_array1;
-  ASSERT_OK(str_builder1.Finish(&str_array1));
-  auto dtype1 = std::make_shared<DictionaryType>(int8(), str_array1);
-
-  Int8Builder int_builder1;
-  ASSERT_OK(int_builder1.Append(0));
-  ASSERT_OK(int_builder1.Append(1));
-  ASSERT_OK(int_builder1.Append(0));
-  std::shared_ptr<Array> int_array1;
-  ASSERT_OK(int_builder1.Finish(&int_array1));
+  auto dtype = dictionary(int8(), ArrayFromJSON(utf8(), "[\"test\", \"test2\"]"));
+  auto int_array = ArrayFromJSON(int8(), "[0, 1, 0]");
+  DictionaryArray expected(dtype, int_array);
 
-  DictionaryArray expected(dtype1, int_array1);
   ASSERT_TRUE(expected.Equals(result));
 
   // build a delta dictionary
@@ -399,20 +318,10 @@ TEST(TestStringDictionaryBuilder, DeltaDictionary) {
   FinishAndCheckPadding(&builder, &result_delta);
 
   // Build expected data
-  StringBuilder str_builder2;
-  ASSERT_OK(str_builder2.Append("test3"));
-  std::shared_ptr<Array> str_array2;
-  ASSERT_OK(str_builder2.Finish(&str_array2));
-  auto dtype2 = std::make_shared<DictionaryType>(int8(), str_array2);
-
-  Int8Builder int_builder2;
-  ASSERT_OK(int_builder2.Append(1));
-  ASSERT_OK(int_builder2.Append(2));
-  ASSERT_OK(int_builder2.Append(1));
-  std::shared_ptr<Array> int_array2;
-  ASSERT_OK(int_builder2.Finish(&int_array2));
-
+  auto dtype2 = dictionary(int8(), ArrayFromJSON(utf8(), "[\"test3\"]"));
+  auto int_array2 = ArrayFromJSON(int8(), "[1, 2, 1]");
   DictionaryArray expected_delta(dtype2, int_array2);
+
   ASSERT_TRUE(expected_delta.Equals(result_delta));
 }
 
@@ -647,7 +556,7 @@ TEST(TestFixedSizeBinaryDictionaryBuilder, InvalidTypeAppend) {
 
 TEST(TestDecimalDictionaryBuilder, Basic) {
   // Build the dictionary Array
-  const auto& decimal_type = arrow::decimal(2, 0);
+  auto decimal_type = arrow::decimal(2, 0);
   DictionaryBuilder<FixedSizeBinaryType> builder(decimal_type, default_memory_pool());
 
   // Test data
@@ -660,20 +569,9 @@ TEST(TestDecimalDictionaryBuilder, Basic) {
   ASSERT_OK(builder.Finish(&result));
 
   // Build expected data
-  FixedSizeBinaryBuilder decimal_builder(decimal_type);
-  ASSERT_OK(decimal_builder.Append(Decimal128(12).ToBytes()));
-  ASSERT_OK(decimal_builder.Append(Decimal128(11).ToBytes()));
-
-  std::shared_ptr<Array> decimal_array;
-  ASSERT_OK(decimal_builder.Finish(&decimal_array));
-  auto dtype = arrow::dictionary(int8(), decimal_array);
+  auto dtype = dictionary(int8(), ArrayFromJSON(decimal_type, "[\"12\", \"11\"]"));
+  DictionaryArray expected(dtype, ArrayFromJSON(int8(), "[0, 0, 1, 0]"));
 
-  Int8Builder int_builder;
-  ASSERT_OK(int_builder.AppendValues({0, 0, 1, 0}));
-  std::shared_ptr<Array> int_array;
-  ASSERT_OK(int_builder.Finish(&int_array));
-
-  DictionaryArray expected(dtype, int_array);
   ASSERT_TRUE(expected.Equals(result));
 }
 
@@ -758,26 +656,20 @@ TEST(TestDictionary, Basics) {
 
 TEST(TestDictionary, Equals) {
   vector<bool> is_valid = {true, true, false, true, true, true};
+  std::shared_ptr<Array> dict, dict2, indices, indices2, indices3;
 
-  std::shared_ptr<Array> dict;
-  vector<string> dict_values = {"foo", "bar", "baz"};
-  ArrayFromVector<StringType, string>(dict_values, &dict);
+  dict = ArrayFromJSON(utf8(), "[\"foo\", \"bar\", \"baz\"]");
   std::shared_ptr<DataType> dict_type = dictionary(int16(), dict);
 
-  std::shared_ptr<Array> dict2;
-  vector<string> dict2_values = {"foo", "bar", "baz", "qux"};
-  ArrayFromVector<StringType, string>(dict2_values, &dict2);
+  dict2 = ArrayFromJSON(utf8(), "[\"foo\", \"bar\", \"baz\", \"qux\"]");
   std::shared_ptr<DataType> dict2_type = dictionary(int16(), dict2);
 
-  std::shared_ptr<Array> indices;
   vector<int16_t> indices_values = {1, 2, -1, 0, 2, 0};
   ArrayFromVector<Int16Type, int16_t>(is_valid, indices_values, &indices);
 
-  std::shared_ptr<Array> indices2;
   vector<int16_t> indices2_values = {1, 2, 0, 0, 2, 0};
   ArrayFromVector<Int16Type, int16_t>(is_valid, indices2_values, &indices2);
 
-  std::shared_ptr<Array> indices3;
   vector<int16_t> indices3_values = {1, 1, 0, 0, 2, 0};
   ArrayFromVector<Int16Type, int16_t>(is_valid, indices3_values, &indices3);
 
@@ -825,17 +717,10 @@ TEST(TestDictionary, Equals) {
 }
 
 TEST(TestDictionary, Validate) {
-  vector<bool> is_valid = {true, true, false, true, true, true};
-
-  std::shared_ptr<Array> dict;
-  vector<string> dict_values = {"foo", "bar", "baz"};
-  ArrayFromVector<StringType, string>(dict_values, &dict);
+  auto dict = ArrayFromJSON(utf8(), "[\"foo\", \"bar\", \"baz\"]");
   std::shared_ptr<DataType> dict_type = dictionary(int16(), dict);
 
-  std::shared_ptr<Array> indices;
-  vector<int16_t> indices_values = {1, 2, 0, 0, 2, 0};
-  ArrayFromVector<Int16Type, int16_t>(is_valid, indices_values, &indices);
-
+  auto indices = ArrayFromJSON(int16(), "[1, 2, null, 0, 2, 0]");
   std::shared_ptr<Array> arr = std::make_shared<DictionaryArray>(dict_type, indices);
 
   // Only checking index type for now
@@ -857,28 +742,20 @@ TEST(TestDictionary, Validate) {
 }
 
 TEST(TestDictionary, FromArray) {
-  std::shared_ptr<Array> dict;
-  vector<string> dict_values = {"foo", "bar", "baz"};
-  ArrayFromVector<StringType, string>(dict_values, &dict);
+  auto dict = ArrayFromJSON(utf8(), "[\"foo\", \"bar\", \"baz\"]");
   std::shared_ptr<DataType> dict_type = dictionary(int16(), dict);
 
-  std::shared_ptr<Array> indices1;
-  vector<int16_t> indices_values1 = {1, 2, 0, 0, 2, 0};
-  ArrayFromVector<Int16Type, int16_t>(indices_values1, &indices1);
-
-  std::shared_ptr<Array> indices2;
-  vector<int16_t> indices_values2 = {1, 2, 0, 3, 2, 0};
-  ArrayFromVector<Int16Type, int16_t>(indices_values2, &indices2);
+  auto indices1 = ArrayFromJSON(int16(), "[1, 2, 0, 0, 2, 0]");
+  auto indices2 = ArrayFromJSON(int16(), "[1, 2, 0, 3, 2, 0]");
 
+  // Invalid index is masked by null
   std::shared_ptr<Array> indices3;
   vector<bool> is_valid3 = {true, true, false, true, true, true};
   vector<int16_t> indices_values3 = {1, 2, -1, 0, 2, 0};
   ArrayFromVector<Int16Type, int16_t>(is_valid3, indices_values3, &indices3);
 
-  std::shared_ptr<Array> indices4;
-  vector<bool> is_valid4 = {true, true, false, true, true, true};
-  vector<int16_t> indices_values4 = {1, 2, 1, 3, 2, 0};
-  ArrayFromVector<Int16Type, int16_t>(is_valid4, indices_values4, &indices4);
+  // Index out of bounds
+  auto indices4 = ArrayFromJSON(int16(), "[1, 2, null, 3, 2, 0]");
 
   std::shared_ptr<Array> arr1, arr2, arr3, arr4;
   ASSERT_OK(DictionaryArray::FromArrays(dict_type, indices1, &arr1));
@@ -887,4 +764,63 @@ TEST(TestDictionary, FromArray) {
   ASSERT_RAISES(Invalid, DictionaryArray::FromArrays(dict_type, indices4, &arr4));
 }
 
+TEST(TestDictionary, TransposeBasic) {
+  std::shared_ptr<Array> arr, out, expected;
+
+  auto dict = ArrayFromJSON(utf8(), "[\"A\", \"B\", \"C\"]");
+  auto dict_type = dictionary(int16(), dict);
+  auto indices = ArrayFromJSON(int16(), "[1, 2, 0, 0]");
+  // ["B", "C", "A", "A"]
+  ASSERT_OK(DictionaryArray::FromArrays(dict_type, indices, &arr));
+
+  // Transpose to same index type
+  {
+    auto out_dict = ArrayFromJSON(utf8(), "[\"Z\", \"A\", \"C\", \"B\"]");
+    auto out_dict_type = dictionary(int16(), out_dict);
+
+    const std::vector<int32_t> transpose_map{1, 3, 2};
+    ASSERT_OK(internal::checked_cast<const DictionaryArray&>(*arr).Transpose(
+        default_memory_pool(), out_dict_type, transpose_map, &out));
+
+    auto expected_indices = ArrayFromJSON(int16(), "[3, 2, 1, 1]");
+    ASSERT_OK(DictionaryArray::FromArrays(out_dict_type, expected_indices, &expected));
+    AssertArraysEqual(*out, *expected);
+  }
+
+  // Transpose to other type
+  {
+    auto out_dict = ArrayFromJSON(utf8(), "[\"Z\", \"A\", \"C\", \"B\"]");
+    auto out_dict_type = dictionary(int8(), out_dict);
+
+    const std::vector<int32_t> transpose_map{1, 3, 2};
+    ASSERT_OK(internal::checked_cast<const DictionaryArray&>(*arr).Transpose(
+        default_memory_pool(), out_dict_type, transpose_map, &out));
+
+    auto expected_indices = ArrayFromJSON(int8(), "[3, 2, 1, 1]");
+    ASSERT_OK(DictionaryArray::FromArrays(out_dict_type, expected_indices, &expected));
+    AssertArraysEqual(*expected, *out);
+  }
+}
+
+TEST(TestDictionary, TransposeNulls) {
+  std::shared_ptr<Array> arr, out, expected;
+
+  auto dict = ArrayFromJSON(utf8(), "[\"A\", \"B\", \"C\"]");
+  auto dict_type = dictionary(int16(), dict);
+  auto indices = ArrayFromJSON(int16(), "[1, 2, null, 0]");
+  // ["B", "C", null, "A"]
+  ASSERT_OK(DictionaryArray::FromArrays(dict_type, indices, &arr));
+
+  auto out_dict = ArrayFromJSON(utf8(), "[\"Z\", \"A\", \"C\", \"B\"]");
+  auto out_dict_type = dictionary(int16(), out_dict);
+
+  const std::vector<int32_t> transpose_map{1, 3, 2};
+  ASSERT_OK(internal::checked_cast<const DictionaryArray&>(*arr).Transpose(
+      default_memory_pool(), out_dict_type, transpose_map, &out));
+
+  auto expected_indices = ArrayFromJSON(int16(), "[3, 2, null, 1]");
+  ASSERT_OK(DictionaryArray::FromArrays(out_dict_type, expected_indices, &expected));
+  AssertArraysEqual(*expected, *out);
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/array-list-test.cc b/cpp/src/arrow/array-list-test.cc
index 207acd4cf65d7..c49c5e3097058 100644
--- a/cpp/src/arrow/array-list-test.cc
+++ b/cpp/src/arrow/array-list-test.cc
@@ -15,10 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>
 #include <cstdint>
 #include <cstring>
-#include <limits>
 #include <memory>
 #include <string>
 #include <vector>
@@ -32,6 +30,8 @@
 #include "arrow/test-common.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/checked_cast.h"
 
 namespace arrow {
 
diff --git a/cpp/src/arrow/array-struct-test.cc b/cpp/src/arrow/array-struct-test.cc
index dc8bafd4c0071..68c35f57116a8 100644
--- a/cpp/src/arrow/array-struct-test.cc
+++ b/cpp/src/arrow/array-struct-test.cc
@@ -15,10 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>
 #include <cstdint>
 #include <cstring>
-#include <limits>
 #include <memory>
 #include <string>
 #include <vector>
@@ -26,12 +24,12 @@
 #include <gtest/gtest.h>
 
 #include "arrow/array.h"
-#include "arrow/buffer.h"
 #include "arrow/builder.h"
 #include "arrow/status.h"
 #include "arrow/test-common.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
 
 namespace arrow {
 
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 586605831b3e5..ccdaad58c681a 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -23,7 +23,6 @@
 #include <limits>
 #include <memory>
 #include <numeric>
-#include <ostream>
 #include <string>
 #include <type_traits>
 #include <vector>
@@ -31,6 +30,7 @@
 #include <gtest/gtest.h>
 
 #include "arrow/array.h"
+#include "arrow/buffer-builder.h"
 #include "arrow/buffer.h"
 #include "arrow/builder.h"
 #include "arrow/ipc/test-common.h"
@@ -40,7 +40,6 @@
 #include "arrow/test-common.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
-#include "arrow/type_traits.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
@@ -246,6 +245,23 @@ TEST_F(TestArray, BuildLargeInMemoryArray) {
 
 TEST_F(TestArray, TestCopy) {}
 
+// ----------------------------------------------------------------------
+// Null type tests
+
+TEST(TestNullBuilder, Basics) {
+  NullBuilder builder;
+  std::shared_ptr<Array> array;
+
+  ASSERT_OK(builder.AppendNull());
+  ASSERT_OK(builder.Append(nullptr));
+  ASSERT_OK(builder.AppendNull());
+  ASSERT_OK(builder.Finish(&array));
+
+  const auto& null_array = checked_cast<NullArray&>(*array);
+  ASSERT_EQ(null_array.length(), 3);
+  ASSERT_EQ(null_array.null_count(), 3);
+}
+
 // ----------------------------------------------------------------------
 // Primitive type tests
 
@@ -331,7 +347,10 @@ class TestPrimitiveBuilder : public TestBuilder {
     ASSERT_TRUE(result->Equals(*expected));
   }
 
-  int64_t FlipValue(int64_t value) const { return ~value; }
+  void FlipValue(T* ptr) {
+    auto byteptr = reinterpret_cast<uint8_t*>(ptr);
+    *byteptr = static_cast<uint8_t>(~*byteptr);
+  }
 
  protected:
   std::unique_ptr<BuilderType> builder_;
@@ -414,8 +433,8 @@ void TestPrimitiveBuilder<PBoolean>::RandomData(int64_t N, double pct_null) {
 }
 
 template <>
-int64_t TestPrimitiveBuilder<PBoolean>::FlipValue(int64_t value) const {
-  return !value;
+void TestPrimitiveBuilder<PBoolean>::FlipValue(T* ptr) {
+  *ptr = !*ptr;
 }
 
 template <>
@@ -559,8 +578,7 @@ TYPED_TEST(TestPrimitiveBuilder, Equality) {
   const int64_t first_valid_idx = std::distance(valid_bytes.begin(), first_valid);
   // This should be true with a very high probability, but might introduce flakiness
   ASSERT_LT(first_valid_idx, size - 1);
-  draws[first_valid_idx] = static_cast<T>(
-      this->FlipValue(*reinterpret_cast<int64_t*>(&draws[first_valid_idx])));
+  this->FlipValue(&draws[first_valid_idx]);
   ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &unequal_array));
 
   // test normal equality
@@ -744,22 +762,22 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesLazyIter) {
   auto& draws = this->draws_;
   auto& valid_bytes = this->valid_bytes_;
 
-  auto doubler = [&draws](int64_t index) { return draws[index] * 2; };
-  auto lazy_iter = internal::MakeLazyRange(doubler, size);
+  auto halve = [&draws](int64_t index) { return draws[index] / 2; };
+  auto lazy_iter = internal::MakeLazyRange(halve, size);
 
   ASSERT_OK(this->builder_->AppendValues(lazy_iter.begin(), lazy_iter.end(),
                                          valid_bytes.begin()));
 
-  std::vector<T> doubled;
-  transform(draws.begin(), draws.end(), back_inserter(doubled),
-            [](T in) { return in * 2; });
+  std::vector<T> halved;
+  transform(draws.begin(), draws.end(), back_inserter(halved),
+            [](T in) { return in / 2; });
 
   std::shared_ptr<Array> result;
   FinishAndCheckPadding(this->builder_.get(), &result);
 
   std::shared_ptr<Array> expected;
   ASSERT_OK(
-      this->builder_->AppendValues(doubled.data(), doubled.size(), valid_bytes.data()));
+      this->builder_->AppendValues(halved.data(), halved.size(), valid_bytes.data()));
   FinishAndCheckPadding(this->builder_.get(), &expected);
 
   ASSERT_TRUE(expected->Equals(result));
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index 05d66d5cffdb2..1569889c0a6d0 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -18,6 +18,7 @@
 #include "arrow/array.h"
 
 #include <algorithm>
+#include <cstddef>
 #include <cstdint>
 #include <limits>
 #include <sstream>
@@ -32,6 +33,7 @@
 #include "arrow/util/bit-util.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
+#include "arrow/util/int-util.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 #include "arrow/visitor.h"
@@ -163,12 +165,6 @@ PrimitiveArray::PrimitiveArray(const std::shared_ptr<DataType>& type, int64_t le
   SetData(ArrayData::Make(type, length, {null_bitmap, data}, null_count, offset));
 }
 
-template <typename T>
-NumericArray<T>::NumericArray(const std::shared_ptr<ArrayData>& data)
-    : PrimitiveArray(data) {
-  DCHECK_EQ(data->type->id(), T::type_id);
-}
-
 // ----------------------------------------------------------------------
 // BooleanArray
 
@@ -393,7 +389,7 @@ std::shared_ptr<Array> StructArray::field(int i) const {
 }
 
 std::shared_ptr<Array> StructArray::GetFieldByName(const std::string& name) const {
-  int i = struct_type()->GetChildIndex(name);
+  int i = struct_type()->GetFieldIndex(name);
   return i == -1 ? nullptr : field(i);
 }
 
@@ -636,9 +632,8 @@ Status DictionaryArray::FromArrays(const std::shared_ptr<DataType>& type,
       is_valid = ValidateDictionaryIndices<Int64Type>(indices, upper_bound);
       break;
     default:
-      std::stringstream ss;
-      ss << "Categorical index type not supported: " << indices->type()->ToString();
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("Categorical index type not supported: ",
+                                    indices->type()->ToString());
   }
 
   if (!is_valid.ok()) {
@@ -662,6 +657,66 @@ std::shared_ptr<Array> DictionaryArray::dictionary() const {
   return dict_type_->dictionary();
 }
 
+template <typename InType, typename OutType>
+static Status TransposeDictIndices(MemoryPool* pool, const ArrayData& in_data,
+                                   const std::shared_ptr<DataType>& type,
+                                   const std::vector<int32_t>& transpose_map,
+                                   std::shared_ptr<Array>* out) {
+  using in_c_type = typename InType::c_type;
+  using out_c_type = typename OutType::c_type;
+
+  std::shared_ptr<Buffer> out_buffer;
+  RETURN_NOT_OK(AllocateBuffer(pool, in_data.length * sizeof(out_c_type), &out_buffer));
+  // Null bitmap is unchanged
+  auto out_data = ArrayData::Make(type, in_data.length, {in_data.buffers[0], out_buffer},
+                                  in_data.null_count);
+  internal::TransposeInts(in_data.GetValues<in_c_type>(1),
+                          out_data->GetMutableValues<out_c_type>(1), in_data.length,
+                          transpose_map.data());
+  *out = MakeArray(out_data);
+  return Status::OK();
+}
+
+Status DictionaryArray::Transpose(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+                                  const std::vector<int32_t>& transpose_map,
+                                  std::shared_ptr<Array>* out) const {
+  DCHECK_EQ(type->id(), Type::DICTIONARY);
+  const auto& out_dict_type = checked_cast<const DictionaryType&>(*type);
+
+  // XXX We'll probably want to make this operation a kernel when we
+  // implement dictionary-to-dictionary casting.
+  auto in_type_id = dict_type_->index_type()->id();
+  auto out_type_id = out_dict_type.index_type()->id();
+
+#define TRANSPOSE_IN_OUT_CASE(IN_INDEX_TYPE, OUT_INDEX_TYPE)                        \
+  case OUT_INDEX_TYPE::type_id:                                                     \
+    return TransposeDictIndices<IN_INDEX_TYPE, OUT_INDEX_TYPE>(pool, *data(), type, \
+                                                               transpose_map, out);
+
+#define TRANSPOSE_IN_CASE(IN_INDEX_TYPE)                        \
+  case IN_INDEX_TYPE::type_id:                                  \
+    switch (out_type_id) {                                      \
+      TRANSPOSE_IN_OUT_CASE(IN_INDEX_TYPE, Int8Type)            \
+      TRANSPOSE_IN_OUT_CASE(IN_INDEX_TYPE, Int16Type)           \
+      TRANSPOSE_IN_OUT_CASE(IN_INDEX_TYPE, Int32Type)           \
+      TRANSPOSE_IN_OUT_CASE(IN_INDEX_TYPE, Int64Type)           \
+      default:                                                  \
+        return Status::NotImplemented("unexpected index type"); \
+    }
+
+  switch (in_type_id) {
+    TRANSPOSE_IN_CASE(Int8Type)
+    TRANSPOSE_IN_CASE(Int16Type)
+    TRANSPOSE_IN_CASE(Int32Type)
+    TRANSPOSE_IN_CASE(Int64Type)
+    default:
+      return Status::NotImplemented("unexpected index type");
+  }
+
+#undef TRANSPOSE_IN_OUT_CASE
+#undef TRANSPOSE_IN_CASE
+}
+
 // ----------------------------------------------------------------------
 // Implement Array::Accept as inline visitor
 
@@ -678,12 +733,11 @@ struct ValidateVisitor {
   Status Visit(const NullArray&) { return Status::OK(); }
 
   Status Visit(const PrimitiveArray& array) {
-    if (array.data()->buffers.size() != 2) {
-      return Status::Invalid("number of buffers was != 2");
-    }
-    if (array.values() == nullptr) {
-      return Status::Invalid("values was null");
-    }
+    ARROW_RETURN_IF(array.data()->buffers.size() != 2,
+                    Status::Invalid("number of buffers was != 2"));
+
+    ARROW_RETURN_IF(array.values() == nullptr, Status::Invalid("values was null"));
+
     return Status::OK();
   }
 
@@ -714,10 +768,8 @@ struct ValidateVisitor {
       return Status::Invalid("value_offsets_ was null");
     }
     if (value_offsets->size() / static_cast<int>(sizeof(int32_t)) < array.length()) {
-      std::stringstream ss;
-      ss << "offset buffer size (bytes): " << value_offsets->size()
-         << " isn't large enough for length: " << array.length();
-      return Status::Invalid(ss.str());
+      return Status::Invalid("offset buffer size (bytes): ", value_offsets->size(),
+                             " isn't large enough for length: ", array.length());
     }
 
     if (!array.values()) {
@@ -726,17 +778,13 @@ struct ValidateVisitor {
 
     const int32_t last_offset = array.value_offset(array.length());
     if (array.values()->length() != last_offset) {
-      std::stringstream ss;
-      ss << "Final offset invariant not equal to values length: " << last_offset
-         << "!=" << array.values()->length();
-      return Status::Invalid(ss.str());
+      return Status::Invalid("Final offset invariant not equal to values length: ",
+                             last_offset, "!=", array.values()->length());
     }
 
     const Status child_valid = ValidateArray(*array.values());
     if (!child_valid.ok()) {
-      std::stringstream ss;
-      ss << "Child array invalid: " << child_valid.ToString();
-      return Status::Invalid(ss.str());
+      return Status::Invalid("Child array invalid: ", child_valid.ToString());
     }
 
     int32_t prev_offset = array.value_offset(0);
@@ -746,18 +794,14 @@ struct ValidateVisitor {
     for (int64_t i = 1; i <= array.length(); ++i) {
       int32_t current_offset = array.value_offset(i);
       if (array.IsNull(i - 1) && current_offset != prev_offset) {
-        std::stringstream ss;
-        ss << "Offset invariant failure at: " << i
-           << " inconsistent value_offsets for null slot" << current_offset
-           << "!=" << prev_offset;
-        return Status::Invalid(ss.str());
+        return Status::Invalid("Offset invariant failure at: ", i,
+                               " inconsistent value_offsets for null slot",
+                               current_offset, "!=", prev_offset);
       }
       if (current_offset < prev_offset) {
-        std::stringstream ss;
-        ss << "Offset invariant failure: " << i
-           << " inconsistent offset for non-null slot: " << current_offset << "<"
-           << prev_offset;
-        return Status::Invalid(ss.str());
+        return Status::Invalid("Offset invariant failure: ", i,
+                               " inconsistent offset for non-null slot: ", current_offset,
+                               "<", prev_offset);
       }
       prev_offset = current_offset;
     }
@@ -780,18 +824,14 @@ struct ValidateVisitor {
       for (int i = 0; i < array.num_fields(); ++i) {
         auto it = array.field(i);
         if (it->length() != array_length) {
-          std::stringstream ss;
-          ss << "Length is not equal from field " << it->type()->ToString()
-             << " at position {" << idx << "}";
-          return Status::Invalid(ss.str());
+          return Status::Invalid("Length is not equal from field ",
+                                 it->type()->ToString(), " at position [", idx, "]");
         }
 
         const Status child_valid = ValidateArray(*it);
         if (!child_valid.ok()) {
-          std::stringstream ss;
-          ss << "Child array invalid: " << child_valid.ToString() << " at position {"
-             << idx << "}";
-          return Status::Invalid(ss.str());
+          return Status::Invalid("Child array invalid: ", child_valid.ToString(),
+                                 " at position [", idx, "}");
         }
         ++idx;
       }
@@ -941,24 +981,4 @@ std::vector<ArrayVector> RechunkArraysConsistently(
 
 }  // namespace internal
 
-// ----------------------------------------------------------------------
-// Instantiate templates
-
-template class ARROW_TEMPLATE_EXPORT NumericArray<UInt8Type>;
-template class ARROW_TEMPLATE_EXPORT NumericArray<UInt16Type>;
-template class ARROW_TEMPLATE_EXPORT NumericArray<UInt32Type>;
-template class ARROW_TEMPLATE_EXPORT NumericArray<UInt64Type>;
-template class ARROW_TEMPLATE_EXPORT NumericArray<Int8Type>;
-template class ARROW_TEMPLATE_EXPORT NumericArray<Int16Type>;
-template class ARROW_TEMPLATE_EXPORT NumericArray<Int32Type>;
-template class ARROW_TEMPLATE_EXPORT NumericArray<Int64Type>;
-template class ARROW_TEMPLATE_EXPORT NumericArray<TimestampType>;
-template class ARROW_TEMPLATE_EXPORT NumericArray<Date32Type>;
-template class ARROW_TEMPLATE_EXPORT NumericArray<Date64Type>;
-template class ARROW_TEMPLATE_EXPORT NumericArray<Time32Type>;
-template class ARROW_TEMPLATE_EXPORT NumericArray<Time64Type>;
-template class ARROW_TEMPLATE_EXPORT NumericArray<HalfFloatType>;
-template class ARROW_TEMPLATE_EXPORT NumericArray<FloatType>;
-template class ARROW_TEMPLATE_EXPORT NumericArray<DoubleType>;
-
 }  // namespace arrow
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index 0274c15f74f61..5b4daa808c7e4 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -18,7 +18,6 @@
 #ifndef ARROW_ARRAY_H
 #define ARROW_ARRAY_H
 
-#include <cstddef>
 #include <cstdint>
 #include <iosfwd>
 #include <memory>
@@ -87,7 +86,7 @@ class Status;
 /// input array and replace them with newly-allocated data, changing the output
 /// data type as well.
 struct ARROW_EXPORT ArrayData {
-  ArrayData() : length(0) {}
+  ArrayData() : length(0), null_count(0), offset(0) {}
 
   ArrayData(const std::shared_ptr<DataType>& type, int64_t length,
             int64_t null_count = kUnknownNullCount, int64_t offset = 0)
@@ -170,24 +169,34 @@ struct ARROW_EXPORT ArrayData {
 
   // Access a buffer's data as a typed C pointer
   template <typename T>
-  inline const T* GetValues(int i) const {
+  inline const T* GetValues(int i, int64_t absolute_offset) const {
     if (buffers[i]) {
-      return reinterpret_cast<const T*>(buffers[i]->data()) + offset;
+      return reinterpret_cast<const T*>(buffers[i]->data()) + absolute_offset;
     } else {
       return NULLPTR;
     }
   }
 
+  template <typename T>
+  inline const T* GetValues(int i) const {
+    return GetValues<T>(i, offset);
+  }
+
   // Access a buffer's data as a typed C pointer
   template <typename T>
-  inline T* GetMutableValues(int i) {
+  inline T* GetMutableValues(int i, int64_t absolute_offset) {
     if (buffers[i]) {
-      return reinterpret_cast<T*>(buffers[i]->mutable_data()) + offset;
+      return reinterpret_cast<T*>(buffers[i]->mutable_data()) + absolute_offset;
     } else {
       return NULLPTR;
     }
   }
 
+  template <typename T>
+  inline T* GetMutableValues(int i) {
+    return GetMutableValues<T>(i, offset);
+  }
+
   std::shared_ptr<DataType> type;
   int64_t length;
   int64_t null_count;
@@ -301,7 +310,7 @@ class ARROW_EXPORT Array {
   std::string ToString() const;
 
  protected:
-  Array() {}
+  Array() : null_bitmap_data_(NULLPTR) {}
 
   std::shared_ptr<ArrayData> data_;
   const uint8_t* null_bitmap_data_;
@@ -372,7 +381,7 @@ class ARROW_EXPORT PrimitiveArray : public FlatArray {
   std::shared_ptr<Buffer> values() const { return data_->buffers[1]; }
 
  protected:
-  PrimitiveArray() {}
+  PrimitiveArray() : raw_values_(NULLPTR) {}
 
   inline void SetData(const std::shared_ptr<ArrayData>& data) {
     auto values = data->buffers[1];
@@ -387,13 +396,14 @@ class ARROW_EXPORT PrimitiveArray : public FlatArray {
   const uint8_t* raw_values_;
 };
 
+/// Concrete Array class for numeric data.
 template <typename TYPE>
-class ARROW_EXPORT NumericArray : public PrimitiveArray {
+class NumericArray : public PrimitiveArray {
  public:
   using TypeClass = TYPE;
   using value_type = typename TypeClass::c_type;
 
-  explicit NumericArray(const std::shared_ptr<ArrayData>& data);
+  explicit NumericArray(const std::shared_ptr<ArrayData>& data) : PrimitiveArray(data) {}
 
   // Only enable this constructor without a type argument for types without additional
   // metadata
@@ -412,6 +422,9 @@ class ARROW_EXPORT NumericArray : public PrimitiveArray {
 
   value_type Value(int64_t i) const { return raw_values()[i]; }
 
+  // For API compatibility with BinaryArray etc.
+  value_type GetView(int64_t i) const { return Value(i); }
+
  protected:
   using PrimitiveArray::PrimitiveArray;
 };
@@ -432,6 +445,8 @@ class ARROW_EXPORT BooleanArray : public PrimitiveArray {
                            i + data_->offset);
   }
 
+  bool GetView(int64_t i) const { return Value(i); }
+
  protected:
   using PrimitiveArray::PrimitiveArray;
 };
@@ -554,7 +569,7 @@ class ARROW_EXPORT BinaryArray : public FlatArray {
 
  protected:
   // For subclasses
-  BinaryArray() {}
+  BinaryArray() : raw_value_offsets_(NULLPTR), raw_data_(NULLPTR) {}
 
   /// Protected method for constructors
   void SetData(const std::shared_ptr<ArrayData>& data);
@@ -792,7 +807,7 @@ class ARROW_EXPORT DictionaryArray : public Array {
   /// This function does the validation of the indices and input type. It checks if
   /// all indices are non-negative and smaller than the size of the dictionary
   ///
-  /// \param[in] type a data type containing a dictionary
+  /// \param[in] type a dictionary type
   /// \param[in] indices an array of non-negative signed
   /// integers smaller than the size of the dictionary
   /// \param[out] out the resulting DictionaryArray instance
@@ -800,6 +815,23 @@ class ARROW_EXPORT DictionaryArray : public Array {
                            const std::shared_ptr<Array>& indices,
                            std::shared_ptr<Array>* out);
 
+  /// \brief Transpose this DictionaryArray
+  ///
+  /// This method constructs a new dictionary array with the given dictionary type,
+  /// transposing indices using the transpose map.
+  /// The type and the transpose map are typically computed using
+  /// DictionaryType::Unify.
+  ///
+  /// \param[in] pool a pool to allocate the array data from
+  /// \param[in] type a dictionary type
+  /// \param[in] transpose_map a vector transposing this array's indices
+  /// into the target array's indices
+  /// \param[out] out the resulting DictionaryArray instance
+  Status Transpose(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+                   const std::vector<int32_t>& transpose_map,
+                   std::shared_ptr<Array>* out) const;
+  // XXX Do we also want an unsafe in-place Transpose?
+
   std::shared_ptr<Array> indices() const;
   std::shared_ptr<Array> dictionary() const;
 
@@ -812,27 +844,6 @@ class ARROW_EXPORT DictionaryArray : public Array {
   std::shared_ptr<Array> indices_;
 };
 
-// ----------------------------------------------------------------------
-// extern templates and other details
-
-// Only instantiate these templates once
-ARROW_EXTERN_TEMPLATE NumericArray<Int8Type>;
-ARROW_EXTERN_TEMPLATE NumericArray<UInt8Type>;
-ARROW_EXTERN_TEMPLATE NumericArray<Int16Type>;
-ARROW_EXTERN_TEMPLATE NumericArray<UInt16Type>;
-ARROW_EXTERN_TEMPLATE NumericArray<Int32Type>;
-ARROW_EXTERN_TEMPLATE NumericArray<UInt32Type>;
-ARROW_EXTERN_TEMPLATE NumericArray<Int64Type>;
-ARROW_EXTERN_TEMPLATE NumericArray<UInt64Type>;
-ARROW_EXTERN_TEMPLATE NumericArray<HalfFloatType>;
-ARROW_EXTERN_TEMPLATE NumericArray<FloatType>;
-ARROW_EXTERN_TEMPLATE NumericArray<DoubleType>;
-ARROW_EXTERN_TEMPLATE NumericArray<Date32Type>;
-ARROW_EXTERN_TEMPLATE NumericArray<Date64Type>;
-ARROW_EXTERN_TEMPLATE NumericArray<Time32Type>;
-ARROW_EXTERN_TEMPLATE NumericArray<Time64Type>;
-ARROW_EXTERN_TEMPLATE NumericArray<TimestampType>;
-
 /// \brief Perform any validation checks to determine obvious inconsistencies
 /// with the array's internal data
 ///
diff --git a/cpp/src/arrow/array/CMakeLists.txt b/cpp/src/arrow/array/CMakeLists.txt
new file mode 100644
index 0000000000000..4a8ce3490abd1
--- /dev/null
+++ b/cpp/src/arrow/array/CMakeLists.txt
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Headers: top level
+ARROW_INSTALL_ALL_HEADERS("arrow/array")
diff --git a/cpp/doc/Parquet.md b/cpp/src/arrow/array/README.md
similarity index 62%
rename from cpp/doc/Parquet.md
rename to cpp/src/arrow/array/README.md
index 0ed100731ca1a..09580193aad28 100644
--- a/cpp/doc/Parquet.md
+++ b/cpp/src/arrow/array/README.md
@@ -17,21 +17,4 @@
   under the License.
 -->
 
-## Building Arrow-Parquet integration
-
-To use Arrow C++ with Parquet, you must first build the Arrow C++ libraries and
-install them someplace. Then, you can build [parquet-cpp][1] with the Arrow
-adapter library:
-
-```bash
-# Set this to your preferred install location
-export ARROW_HOME=$HOME/local
-
-git clone https://github.com/apache/parquet-cpp.git
-cd parquet-cpp
-cmake -DCMAKE_INSTALL_PREFIX=$PARQUET_HOME
-make -j4
-make install
-```
-
-[1]: https://github.com/apache/parquet-cpp
+## Implementation details related to columnnar (array) data structures
diff --git a/cpp/src/arrow/array/builder_adaptive.cc b/cpp/src/arrow/array/builder_adaptive.cc
new file mode 100644
index 0000000000000..e96c9a2400833
--- /dev/null
+++ b/cpp/src/arrow/array/builder_adaptive.cc
@@ -0,0 +1,409 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/builder_adaptive.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <utility>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/int-util.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using internal::AdaptiveIntBuilderBase;
+
+AdaptiveIntBuilderBase::AdaptiveIntBuilderBase(MemoryPool* pool)
+    : ArrayBuilder(int64(), pool),
+      data_(nullptr),
+      raw_data_(nullptr),
+      int_size_(1),
+      pending_pos_(0),
+      pending_has_nulls_(false) {}
+
+void AdaptiveIntBuilderBase::Reset() {
+  ArrayBuilder::Reset();
+  data_.reset();
+  raw_data_ = nullptr;
+  pending_pos_ = 0;
+  pending_has_nulls_ = false;
+}
+
+Status AdaptiveIntBuilderBase::Resize(int64_t capacity) {
+  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+  capacity = std::max(capacity, kMinBuilderCapacity);
+
+  int64_t nbytes = capacity * int_size_;
+  if (capacity_ == 0) {
+    RETURN_NOT_OK(AllocateResizableBuffer(pool_, nbytes, &data_));
+  } else {
+    RETURN_NOT_OK(data_->Resize(nbytes));
+  }
+  raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
+
+  return ArrayBuilder::Resize(capacity);
+}
+
+AdaptiveIntBuilder::AdaptiveIntBuilder(MemoryPool* pool) : AdaptiveIntBuilderBase(pool) {}
+
+Status AdaptiveIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+  RETURN_NOT_OK(CommitPendingData());
+
+  std::shared_ptr<DataType> output_type;
+  switch (int_size_) {
+    case 1:
+      output_type = int8();
+      break;
+    case 2:
+      output_type = int16();
+      break;
+    case 4:
+      output_type = int32();
+      break;
+    case 8:
+      output_type = int64();
+      break;
+    default:
+      DCHECK(false);
+      return Status::NotImplemented("Only ints of size 1,2,4,8 are supported");
+  }
+
+  std::shared_ptr<Buffer> null_bitmap;
+  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+  RETURN_NOT_OK(TrimBuffer(length_ * int_size_, data_.get()));
+
+  *out = ArrayData::Make(output_type, length_, {null_bitmap, data_}, null_count_);
+
+  data_ = nullptr;
+  capacity_ = length_ = null_count_ = 0;
+  return Status::OK();
+}
+
+Status AdaptiveIntBuilder::CommitPendingData() {
+  if (pending_pos_ == 0) {
+    return Status::OK();
+  }
+  RETURN_NOT_OK(Reserve(pending_pos_));
+  const uint8_t* valid_bytes = pending_has_nulls_ ? pending_valid_ : nullptr;
+  RETURN_NOT_OK(AppendValuesInternal(reinterpret_cast<const int64_t*>(pending_data_),
+                                     pending_pos_, valid_bytes));
+  pending_has_nulls_ = false;
+  pending_pos_ = 0;
+  return Status::OK();
+}
+
+static constexpr int64_t kAdaptiveIntChunkSize = 8192;
+
+Status AdaptiveIntBuilder::AppendValuesInternal(const int64_t* values, int64_t length,
+                                                const uint8_t* valid_bytes) {
+  while (length > 0) {
+    // In case `length` is very large, we don't want to trash the cache by
+    // scanning it twice (first to detect int width, second to copy the data).
+    // Instead, process data in L2-cacheable chunks.
+    const int64_t chunk_size = std::min(length, kAdaptiveIntChunkSize);
+
+    uint8_t new_int_size;
+    new_int_size = internal::DetectIntWidth(values, valid_bytes, chunk_size, int_size_);
+
+    DCHECK_GE(new_int_size, int_size_);
+    if (new_int_size > int_size_) {
+      // This updates int_size_
+      RETURN_NOT_OK(ExpandIntSize(new_int_size));
+    }
+
+    switch (int_size_) {
+      case 1:
+        internal::DowncastInts(values, reinterpret_cast<int8_t*>(raw_data_) + length_,
+                               chunk_size);
+        break;
+      case 2:
+        internal::DowncastInts(values, reinterpret_cast<int16_t*>(raw_data_) + length_,
+                               chunk_size);
+        break;
+      case 4:
+        internal::DowncastInts(values, reinterpret_cast<int32_t*>(raw_data_) + length_,
+                               chunk_size);
+        break;
+      case 8:
+        internal::DowncastInts(values, reinterpret_cast<int64_t*>(raw_data_) + length_,
+                               chunk_size);
+        break;
+      default:
+        DCHECK(false);
+    }
+
+    // This updates length_
+    ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, chunk_size);
+    values += chunk_size;
+    if (valid_bytes != nullptr) {
+      valid_bytes += chunk_size;
+    }
+    length -= chunk_size;
+  }
+
+  return Status::OK();
+}
+
+Status AdaptiveUIntBuilder::CommitPendingData() {
+  if (pending_pos_ == 0) {
+    return Status::OK();
+  }
+  RETURN_NOT_OK(Reserve(pending_pos_));
+  const uint8_t* valid_bytes = pending_has_nulls_ ? pending_valid_ : nullptr;
+  RETURN_NOT_OK(AppendValuesInternal(pending_data_, pending_pos_, valid_bytes));
+  pending_has_nulls_ = false;
+  pending_pos_ = 0;
+  return Status::OK();
+}
+
+Status AdaptiveIntBuilder::AppendValues(const int64_t* values, int64_t length,
+                                        const uint8_t* valid_bytes) {
+  RETURN_NOT_OK(CommitPendingData());
+  RETURN_NOT_OK(Reserve(length));
+
+  return AppendValuesInternal(values, length, valid_bytes);
+}
+
+template <typename new_type, typename old_type>
+typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
+AdaptiveIntBuilder::ExpandIntSizeInternal() {
+  return Status::OK();
+}
+
+#define __LESS(a, b) (a) < (b)
+template <typename new_type, typename old_type>
+typename std::enable_if<__LESS(sizeof(old_type), sizeof(new_type)), Status>::type
+AdaptiveIntBuilder::ExpandIntSizeInternal() {
+  int_size_ = sizeof(new_type);
+  RETURN_NOT_OK(Resize(data_->size() / sizeof(old_type)));
+  raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
+  const old_type* src = reinterpret_cast<old_type*>(raw_data_);
+  new_type* dst = reinterpret_cast<new_type*>(raw_data_);
+
+  // By doing the backward copy, we ensure that no element is overriden during
+  // the copy process and the copy stays in-place.
+  std::copy_backward(src, src + length_, dst + length_);
+
+  return Status::OK();
+}
+#undef __LESS
+
+template <typename new_type>
+Status AdaptiveIntBuilder::ExpandIntSizeN() {
+  switch (int_size_) {
+    case 1:
+      RETURN_NOT_OK((ExpandIntSizeInternal<new_type, int8_t>()));
+      break;
+    case 2:
+      RETURN_NOT_OK((ExpandIntSizeInternal<new_type, int16_t>()));
+      break;
+    case 4:
+      RETURN_NOT_OK((ExpandIntSizeInternal<new_type, int32_t>()));
+      break;
+    case 8:
+      RETURN_NOT_OK((ExpandIntSizeInternal<new_type, int64_t>()));
+      break;
+    default:
+      DCHECK(false);
+  }
+  return Status::OK();
+}
+
+Status AdaptiveIntBuilder::ExpandIntSize(uint8_t new_int_size) {
+  switch (new_int_size) {
+    case 1:
+      RETURN_NOT_OK((ExpandIntSizeN<int8_t>()));
+      break;
+    case 2:
+      RETURN_NOT_OK((ExpandIntSizeN<int16_t>()));
+      break;
+    case 4:
+      RETURN_NOT_OK((ExpandIntSizeN<int32_t>()));
+      break;
+    case 8:
+      RETURN_NOT_OK((ExpandIntSizeN<int64_t>()));
+      break;
+    default:
+      DCHECK(false);
+  }
+  return Status::OK();
+}
+
+AdaptiveUIntBuilder::AdaptiveUIntBuilder(MemoryPool* pool)
+    : AdaptiveIntBuilderBase(pool) {}
+
+Status AdaptiveUIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+  RETURN_NOT_OK(CommitPendingData());
+
+  std::shared_ptr<DataType> output_type;
+  switch (int_size_) {
+    case 1:
+      output_type = uint8();
+      break;
+    case 2:
+      output_type = uint16();
+      break;
+    case 4:
+      output_type = uint32();
+      break;
+    case 8:
+      output_type = uint64();
+      break;
+    default:
+      DCHECK(false);
+      return Status::NotImplemented("Only ints of size 1,2,4,8 are supported");
+  }
+
+  std::shared_ptr<Buffer> null_bitmap;
+  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+  RETURN_NOT_OK(TrimBuffer(length_ * int_size_, data_.get()));
+
+  *out = ArrayData::Make(output_type, length_, {null_bitmap, data_}, null_count_);
+
+  data_ = nullptr;
+  capacity_ = length_ = null_count_ = 0;
+  return Status::OK();
+}
+
+Status AdaptiveUIntBuilder::AppendValuesInternal(const uint64_t* values, int64_t length,
+                                                 const uint8_t* valid_bytes) {
+  while (length > 0) {
+    // See AdaptiveIntBuilder::AppendValuesInternal
+    const int64_t chunk_size = std::min(length, kAdaptiveIntChunkSize);
+
+    uint8_t new_int_size;
+    new_int_size = internal::DetectUIntWidth(values, valid_bytes, chunk_size, int_size_);
+
+    DCHECK_GE(new_int_size, int_size_);
+    if (new_int_size > int_size_) {
+      // This updates int_size_
+      RETURN_NOT_OK(ExpandIntSize(new_int_size));
+    }
+
+    switch (int_size_) {
+      case 1:
+        internal::DowncastUInts(values, reinterpret_cast<uint8_t*>(raw_data_) + length_,
+                                chunk_size);
+        break;
+      case 2:
+        internal::DowncastUInts(values, reinterpret_cast<uint16_t*>(raw_data_) + length_,
+                                chunk_size);
+        break;
+      case 4:
+        internal::DowncastUInts(values, reinterpret_cast<uint32_t*>(raw_data_) + length_,
+                                chunk_size);
+        break;
+      case 8:
+        internal::DowncastUInts(values, reinterpret_cast<uint64_t*>(raw_data_) + length_,
+                                chunk_size);
+        break;
+      default:
+        DCHECK(false);
+    }
+
+    // This updates length_
+    ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, chunk_size);
+    values += chunk_size;
+    if (valid_bytes != nullptr) {
+      valid_bytes += chunk_size;
+    }
+    length -= chunk_size;
+  }
+
+  return Status::OK();
+}
+
+Status AdaptiveUIntBuilder::AppendValues(const uint64_t* values, int64_t length,
+                                         const uint8_t* valid_bytes) {
+  RETURN_NOT_OK(Reserve(length));
+
+  return AppendValuesInternal(values, length, valid_bytes);
+}
+
+template <typename new_type, typename old_type>
+typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
+AdaptiveUIntBuilder::ExpandIntSizeInternal() {
+  return Status::OK();
+}
+
+#define __LESS(a, b) (a) < (b)
+template <typename new_type, typename old_type>
+typename std::enable_if<__LESS(sizeof(old_type), sizeof(new_type)), Status>::type
+AdaptiveUIntBuilder::ExpandIntSizeInternal() {
+  int_size_ = sizeof(new_type);
+  RETURN_NOT_OK(Resize(data_->size() / sizeof(old_type)));
+
+  old_type* src = reinterpret_cast<old_type*>(raw_data_);
+  new_type* dst = reinterpret_cast<new_type*>(raw_data_);
+  // By doing the backward copy, we ensure that no element is overriden during
+  // the copy process and the copy stays in-place.
+  std::copy_backward(src, src + length_, dst + length_);
+
+  return Status::OK();
+}
+#undef __LESS
+
+template <typename new_type>
+Status AdaptiveUIntBuilder::ExpandIntSizeN() {
+  switch (int_size_) {
+    case 1:
+      RETURN_NOT_OK((ExpandIntSizeInternal<new_type, uint8_t>()));
+      break;
+    case 2:
+      RETURN_NOT_OK((ExpandIntSizeInternal<new_type, uint16_t>()));
+      break;
+    case 4:
+      RETURN_NOT_OK((ExpandIntSizeInternal<new_type, uint32_t>()));
+      break;
+    case 8:
+      RETURN_NOT_OK((ExpandIntSizeInternal<new_type, uint64_t>()));
+      break;
+    default:
+      DCHECK(false);
+  }
+  return Status::OK();
+}
+
+Status AdaptiveUIntBuilder::ExpandIntSize(uint8_t new_int_size) {
+  switch (new_int_size) {
+    case 1:
+      RETURN_NOT_OK((ExpandIntSizeN<uint8_t>()));
+      break;
+    case 2:
+      RETURN_NOT_OK((ExpandIntSizeN<uint16_t>()));
+      break;
+    case 4:
+      RETURN_NOT_OK((ExpandIntSizeN<uint32_t>()));
+      break;
+    case 8:
+      RETURN_NOT_OK((ExpandIntSizeN<uint64_t>()));
+      break;
+    default:
+      DCHECK(false);
+  }
+  return Status::OK();
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/array/builder_adaptive.h b/cpp/src/arrow/array/builder_adaptive.h
new file mode 100644
index 0000000000000..6523de41622e4
--- /dev/null
+++ b/cpp/src/arrow/array/builder_adaptive.h
@@ -0,0 +1,174 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/array/builder_base.h"
+
+namespace arrow {
+
+namespace internal {
+
+class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
+ public:
+  explicit AdaptiveIntBuilderBase(MemoryPool* pool);
+
+  /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
+  Status AppendNulls(const uint8_t* valid_bytes, int64_t length) {
+    ARROW_RETURN_NOT_OK(CommitPendingData());
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
+    UnsafeAppendToBitmap(valid_bytes, length);
+    return Status::OK();
+  }
+
+  Status AppendNull() {
+    pending_data_[pending_pos_] = 0;
+    pending_valid_[pending_pos_] = 0;
+    pending_has_nulls_ = true;
+    ++pending_pos_;
+
+    if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
+      return CommitPendingData();
+    }
+    return Status::OK();
+  }
+
+  void Reset() override;
+  Status Resize(int64_t capacity) override;
+
+ protected:
+  virtual Status CommitPendingData() = 0;
+
+  std::shared_ptr<ResizableBuffer> data_;
+  uint8_t* raw_data_;
+  uint8_t int_size_;
+
+  static constexpr int32_t pending_size_ = 1024;
+  uint8_t pending_valid_[pending_size_];
+  uint64_t pending_data_[pending_size_];
+  int32_t pending_pos_;
+  bool pending_has_nulls_;
+};
+
+}  // namespace internal
+
+class ARROW_EXPORT AdaptiveUIntBuilder : public internal::AdaptiveIntBuilderBase {
+ public:
+  explicit AdaptiveUIntBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
+
+  using ArrayBuilder::Advance;
+  using internal::AdaptiveIntBuilderBase::Reset;
+
+  /// Scalar append
+  Status Append(const uint64_t val) {
+    pending_data_[pending_pos_] = val;
+    pending_valid_[pending_pos_] = 1;
+    ++pending_pos_;
+
+    if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
+      return CommitPendingData();
+    }
+    return Status::OK();
+  }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous C array of values
+  /// \param[in] length the number of values to append
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const uint64_t* values, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR);
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+ protected:
+  Status CommitPendingData() override;
+  Status ExpandIntSize(uint8_t new_int_size);
+
+  Status AppendValuesInternal(const uint64_t* values, int64_t length,
+                              const uint8_t* valid_bytes);
+
+  template <typename new_type, typename old_type>
+  typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
+  ExpandIntSizeInternal();
+#define __LESS(a, b) (a) < (b)
+  template <typename new_type, typename old_type>
+  typename std::enable_if<__LESS(sizeof(old_type), sizeof(new_type)), Status>::type
+  ExpandIntSizeInternal();
+#undef __LESS
+
+  template <typename new_type>
+  Status ExpandIntSizeN();
+};
+
+class ARROW_EXPORT AdaptiveIntBuilder : public internal::AdaptiveIntBuilderBase {
+ public:
+  explicit AdaptiveIntBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
+
+  using ArrayBuilder::Advance;
+  using internal::AdaptiveIntBuilderBase::Reset;
+
+  /// Scalar append
+  Status Append(const int64_t val) {
+    auto v = static_cast<uint64_t>(val);
+
+    pending_data_[pending_pos_] = v;
+    pending_valid_[pending_pos_] = 1;
+    ++pending_pos_;
+
+    if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
+      return CommitPendingData();
+    }
+    return Status::OK();
+  }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous C array of values
+  /// \param[in] length the number of values to append
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const int64_t* values, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR);
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+ protected:
+  Status CommitPendingData() override;
+  Status ExpandIntSize(uint8_t new_int_size);
+
+  Status AppendValuesInternal(const int64_t* values, int64_t length,
+                              const uint8_t* valid_bytes);
+
+  template <typename new_type, typename old_type>
+  typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
+  ExpandIntSizeInternal();
+#define __LESS(a, b) (a) < (b)
+  template <typename new_type, typename old_type>
+  typename std::enable_if<__LESS(sizeof(old_type), sizeof(new_type)), Status>::type
+  ExpandIntSizeInternal();
+#undef __LESS
+
+  template <typename new_type>
+  Status ExpandIntSizeN();
+};
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
new file mode 100644
index 0000000000000..e8059007c34ce
--- /dev/null
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/builder_base.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <sstream>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/int-util.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+Status ArrayBuilder::TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer) {
+  if (buffer) {
+    if (bytes_filled < buffer->size()) {
+      // Trim buffer
+      RETURN_NOT_OK(buffer->Resize(bytes_filled));
+    }
+    // zero the padding
+    buffer->ZeroPadding();
+  } else {
+    // Null buffers are allowed in place of 0-byte buffers
+    DCHECK_EQ(bytes_filled, 0);
+  }
+  return Status::OK();
+}
+
+Status ArrayBuilder::AppendToBitmap(bool is_valid) {
+  RETURN_NOT_OK(Reserve(1));
+  UnsafeAppendToBitmap(is_valid);
+  return Status::OK();
+}
+
+Status ArrayBuilder::AppendToBitmap(const uint8_t* valid_bytes, int64_t length) {
+  RETURN_NOT_OK(Reserve(length));
+  UnsafeAppendToBitmap(valid_bytes, length);
+  return Status::OK();
+}
+
+Status ArrayBuilder::Resize(int64_t capacity) {
+  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+  capacity_ = capacity;
+  return null_bitmap_builder_.Resize(capacity);
+}
+
+Status ArrayBuilder::Advance(int64_t elements) {
+  if (length_ + elements > capacity_) {
+    return Status::Invalid("Builder must be expanded");
+  }
+  length_ += elements;
+  return null_bitmap_builder_.Advance(elements);
+}
+
+Status ArrayBuilder::Finish(std::shared_ptr<Array>* out) {
+  std::shared_ptr<ArrayData> internal_data;
+  RETURN_NOT_OK(FinishInternal(&internal_data));
+  *out = MakeArray(internal_data);
+  return Status::OK();
+}
+
+void ArrayBuilder::Reset() {
+  capacity_ = length_ = null_count_ = 0;
+  null_bitmap_builder_.Reset();
+}
+
+Status ArrayBuilder::SetNotNull(int64_t length) {
+  RETURN_NOT_OK(Reserve(length));
+  UnsafeSetNotNull(length);
+  return Status::OK();
+}
+
+void ArrayBuilder::UnsafeAppendToBitmap(const std::vector<bool>& is_valid) {
+  for (bool element_valid : is_valid) {
+    UnsafeAppendToBitmap(element_valid);
+  }
+}
+
+void ArrayBuilder::UnsafeSetNotNull(int64_t length) {
+  length_ += length;
+  null_bitmap_builder_.UnsafeAppend(length, true);
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/array/builder_base.h b/cpp/src/arrow/array/builder_base.h
new file mode 100644
index 0000000000000..f4655fab0dea5
--- /dev/null
+++ b/cpp/src/arrow/array/builder_base.h
@@ -0,0 +1,194 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>  // IWYU pragma: keep
+#include <array>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include "arrow/buffer-builder.h"
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/type_traits.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+struct ArrayData;
+
+constexpr int64_t kMinBuilderCapacity = 1 << 5;
+constexpr int64_t kListMaximumElements = std::numeric_limits<int32_t>::max() - 1;
+
+/// Base class for all data array builders.
+///
+/// This class provides a facilities for incrementally building the null bitmap
+/// (see Append methods) and as a side effect the current number of slots and
+/// the null count.
+///
+/// \note Users are expected to use builders as one of the concrete types below.
+/// For example, ArrayBuilder* pointing to BinaryBuilder should be downcast before use.
+class ARROW_EXPORT ArrayBuilder {
+ public:
+  explicit ArrayBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
+      : type_(type), pool_(pool), null_bitmap_builder_(pool) {}
+
+  virtual ~ArrayBuilder() = default;
+
+  /// For nested types. Since the objects are owned by this class instance, we
+  /// skip shared pointers and just return a raw pointer
+  ArrayBuilder* child(int i) { return children_[i].get(); }
+
+  int num_children() const { return static_cast<int>(children_.size()); }
+
+  int64_t length() const { return length_; }
+  int64_t null_count() const { return null_count_; }
+  int64_t capacity() const { return capacity_; }
+
+  /// \brief Ensure that enough memory has been allocated to fit the indicated
+  /// number of total elements in the builder, including any that have already
+  /// been appended. Does not account for reallocations that may be due to
+  /// variable size data, like binary values. To make space for incremental
+  /// appends, use Reserve instead.
+  ///
+  /// \param[in] capacity the minimum number of total array values to
+  ///            accommodate. Must be greater than the current capacity.
+  /// \return Status
+  virtual Status Resize(int64_t capacity);
+
+  /// \brief Ensure that there is enough space allocated to add the indicated
+  /// number of elements without any further calls to Resize. The memory
+  /// allocated is rounded up to the next highest power of 2 similar to memory
+  /// allocations in STL containers like std::vector
+  /// \param[in] additional_capacity the number of additional array values
+  /// \return Status
+  Status Reserve(int64_t additional_capacity) {
+    auto min_capacity = length() + additional_capacity;
+    if (min_capacity <= capacity()) return Status::OK();
+
+    // leave growth factor up to BufferBuilder
+    auto new_capacity = BufferBuilder::GrowByFactor(min_capacity);
+    return Resize(new_capacity);
+  }
+
+  /// Reset the builder.
+  virtual void Reset();
+
+  /// For cases where raw data was memcpy'd into the internal buffers, allows us
+  /// to advance the length of the builder. It is your responsibility to use
+  /// this function responsibly.
+  Status Advance(int64_t elements);
+
+  /// \brief Return result of builder as an internal generic ArrayData
+  /// object. Resets builder except for dictionary builder
+  ///
+  /// \param[out] out the finalized ArrayData object
+  /// \return Status
+  virtual Status FinishInternal(std::shared_ptr<ArrayData>* out) = 0;
+
+  /// \brief Return result of builder as an Array object.
+  ///
+  /// The builder is reset except for DictionaryBuilder.
+  ///
+  /// \param[out] out the finalized Array object
+  /// \return Status
+  Status Finish(std::shared_ptr<Array>* out);
+
+  std::shared_ptr<DataType> type() const { return type_; }
+
+ protected:
+  /// Append to null bitmap
+  Status AppendToBitmap(bool is_valid);
+
+  /// Vector append. Treat each zero byte as a null.   If valid_bytes is null
+  /// assume all of length bits are valid.
+  Status AppendToBitmap(const uint8_t* valid_bytes, int64_t length);
+
+  /// Set the next length bits to not null (i.e. valid).
+  Status SetNotNull(int64_t length);
+
+  // Unsafe operations (don't check capacity/don't resize)
+
+  void UnsafeAppendNull() { UnsafeAppendToBitmap(false); }
+
+  // Append to null bitmap, update the length
+  void UnsafeAppendToBitmap(bool is_valid) {
+    null_bitmap_builder_.UnsafeAppend(is_valid);
+    ++length_;
+    if (!is_valid) ++null_count_;
+  }
+
+  // Vector append. Treat each zero byte as a nullzero. If valid_bytes is null
+  // assume all of length bits are valid.
+  void UnsafeAppendToBitmap(const uint8_t* valid_bytes, int64_t length) {
+    if (valid_bytes == NULLPTR) {
+      return UnsafeSetNotNull(length);
+    }
+    null_bitmap_builder_.UnsafeAppend(valid_bytes, length);
+    length_ += length;
+    null_count_ = null_bitmap_builder_.false_count();
+  }
+
+  void UnsafeAppendToBitmap(const std::vector<bool>& is_valid);
+
+  // Set the next length bits to not null (i.e. valid).
+  void UnsafeSetNotNull(int64_t length);
+
+  static Status TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer);
+
+  static Status CheckCapacity(int64_t new_capacity, int64_t old_capacity) {
+    if (new_capacity < 0) {
+      return Status::Invalid("Resize capacity must be positive");
+    }
+    if (new_capacity < old_capacity) {
+      return Status::Invalid("Resize cannot downsize");
+    }
+    return Status::OK();
+  }
+
+  std::shared_ptr<DataType> type_;
+  MemoryPool* pool_;
+
+  TypedBufferBuilder<bool> null_bitmap_builder_;
+  int64_t null_count_ = 0;
+
+  // Array length, so far. Also, the index of the next element to be added
+  int64_t length_ = 0;
+  int64_t capacity_ = 0;
+
+  // Child value array builders. These are owned by this class
+  std::vector<std::shared_ptr<ArrayBuilder>> children_;
+
+ private:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(ArrayBuilder);
+};
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/array/builder_binary.cc b/cpp/src/arrow/array/builder_binary.cc
new file mode 100644
index 0000000000000..4fef135b20348
--- /dev/null
+++ b/cpp/src/arrow/array/builder_binary.cc
@@ -0,0 +1,316 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/builder_binary.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <numeric>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+// ----------------------------------------------------------------------
+// String and binary
+
+BinaryBuilder::BinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
+    : ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool) {}
+
+BinaryBuilder::BinaryBuilder(MemoryPool* pool) : BinaryBuilder(binary(), pool) {}
+
+Status BinaryBuilder::Resize(int64_t capacity) {
+  DCHECK_LE(capacity, kListMaximumElements);
+  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+
+  // one more then requested for offsets
+  RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
+  return ArrayBuilder::Resize(capacity);
+}
+
+Status BinaryBuilder::ReserveData(int64_t elements) {
+  const int64_t size = value_data_length() + elements;
+  ARROW_RETURN_IF(
+      size > kBinaryMemoryLimit,
+      Status::CapacityError("Cannot reserve capacity larger than 2^31 - 1 for binary"));
+
+  return (size > value_data_capacity()) ? value_data_builder_.Reserve(elements)
+                                        : Status::OK();
+}
+
+Status BinaryBuilder::AppendOverflow(int64_t num_bytes) {
+  return Status::CapacityError("BinaryArray cannot contain more than ",
+                               kBinaryMemoryLimit, " bytes, have ", num_bytes);
+}
+
+Status BinaryBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+  // Write final offset (values length)
+  RETURN_NOT_OK(AppendNextOffset());
+
+  // These buffers' padding zeroed by BufferBuilder
+  std::shared_ptr<Buffer> offsets, value_data, null_bitmap;
+  RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
+  RETURN_NOT_OK(value_data_builder_.Finish(&value_data));
+  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+
+  *out =
+      ArrayData::Make(type_, length_, {null_bitmap, offsets, value_data}, null_count_, 0);
+  Reset();
+  return Status::OK();
+}
+
+void BinaryBuilder::Reset() {
+  ArrayBuilder::Reset();
+  offsets_builder_.Reset();
+  value_data_builder_.Reset();
+}
+
+const uint8_t* BinaryBuilder::GetValue(int64_t i, int32_t* out_length) const {
+  const int32_t* offsets = offsets_builder_.data();
+  int32_t offset = offsets[i];
+  if (i == (length_ - 1)) {
+    *out_length = static_cast<int32_t>(value_data_builder_.length()) - offset;
+  } else {
+    *out_length = offsets[i + 1] - offset;
+  }
+  return value_data_builder_.data() + offset;
+}
+
+util::string_view BinaryBuilder::GetView(int64_t i) const {
+  const int32_t* offsets = offsets_builder_.data();
+  int32_t offset = offsets[i];
+  int32_t value_length;
+  if (i == (length_ - 1)) {
+    value_length = static_cast<int32_t>(value_data_builder_.length()) - offset;
+  } else {
+    value_length = offsets[i + 1] - offset;
+  }
+  return util::string_view(
+      reinterpret_cast<const char*>(value_data_builder_.data() + offset), value_length);
+}
+
+StringBuilder::StringBuilder(MemoryPool* pool) : BinaryBuilder(utf8(), pool) {}
+
+Status StringBuilder::AppendValues(const std::vector<std::string>& values,
+                                   const uint8_t* valid_bytes) {
+  std::size_t total_length = std::accumulate(
+      values.begin(), values.end(), 0ULL,
+      [](uint64_t sum, const std::string& str) { return sum + str.size(); });
+  RETURN_NOT_OK(Reserve(values.size()));
+  RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
+  RETURN_NOT_OK(offsets_builder_.Reserve(values.size()));
+
+  if (valid_bytes) {
+    for (std::size_t i = 0; i < values.size(); ++i) {
+      UnsafeAppendNextOffset();
+      if (valid_bytes[i]) {
+        value_data_builder_.UnsafeAppend(
+            reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size());
+      }
+    }
+  } else {
+    for (std::size_t i = 0; i < values.size(); ++i) {
+      UnsafeAppendNextOffset();
+      value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i].data()),
+                                       values[i].size());
+    }
+  }
+
+  UnsafeAppendToBitmap(valid_bytes, values.size());
+  return Status::OK();
+}
+
+Status StringBuilder::AppendValues(const char** values, int64_t length,
+                                   const uint8_t* valid_bytes) {
+  std::size_t total_length = 0;
+  std::vector<std::size_t> value_lengths(length);
+  bool have_null_value = false;
+  for (int64_t i = 0; i < length; ++i) {
+    if (values[i]) {
+      auto value_length = strlen(values[i]);
+      value_lengths[i] = value_length;
+      total_length += value_length;
+    } else {
+      have_null_value = true;
+    }
+  }
+  RETURN_NOT_OK(Reserve(length));
+  RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
+  RETURN_NOT_OK(offsets_builder_.Reserve(length));
+
+  if (valid_bytes) {
+    int64_t valid_bytes_offset = 0;
+    for (int64_t i = 0; i < length; ++i) {
+      UnsafeAppendNextOffset();
+      if (valid_bytes[i]) {
+        if (values[i]) {
+          value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
+                                           value_lengths[i]);
+        } else {
+          UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, i - valid_bytes_offset);
+          UnsafeAppendToBitmap(false);
+          valid_bytes_offset = i + 1;
+        }
+      }
+    }
+    UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, length - valid_bytes_offset);
+  } else {
+    if (have_null_value) {
+      std::vector<uint8_t> valid_vector(length, 0);
+      for (int64_t i = 0; i < length; ++i) {
+        UnsafeAppendNextOffset();
+        if (values[i]) {
+          value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
+                                           value_lengths[i]);
+          valid_vector[i] = 1;
+        }
+      }
+      UnsafeAppendToBitmap(valid_vector.data(), length);
+    } else {
+      for (int64_t i = 0; i < length; ++i) {
+        UnsafeAppendNextOffset();
+        value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
+                                         value_lengths[i]);
+      }
+      UnsafeAppendToBitmap(nullptr, length);
+    }
+  }
+  return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// Fixed width binary
+
+FixedSizeBinaryBuilder::FixedSizeBinaryBuilder(const std::shared_ptr<DataType>& type,
+                                               MemoryPool* pool)
+    : ArrayBuilder(type, pool),
+      byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()),
+      byte_builder_(pool) {}
+
+#ifndef NDEBUG
+void FixedSizeBinaryBuilder::CheckValueSize(int64_t size) {
+  DCHECK_EQ(size, byte_width_) << "Appending wrong size to FixedSizeBinaryBuilder";
+}
+#endif
+
+Status FixedSizeBinaryBuilder::AppendValues(const uint8_t* data, int64_t length,
+                                            const uint8_t* valid_bytes) {
+  RETURN_NOT_OK(Reserve(length));
+  UnsafeAppendToBitmap(valid_bytes, length);
+  return byte_builder_.Append(data, length * byte_width_);
+}
+
+Status FixedSizeBinaryBuilder::AppendNull() {
+  RETURN_NOT_OK(Reserve(1));
+  UnsafeAppendToBitmap(false);
+  return byte_builder_.Advance(byte_width_);
+}
+
+void FixedSizeBinaryBuilder::Reset() {
+  ArrayBuilder::Reset();
+  byte_builder_.Reset();
+}
+
+Status FixedSizeBinaryBuilder::Resize(int64_t capacity) {
+  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+  RETURN_NOT_OK(byte_builder_.Resize(capacity * byte_width_));
+  return ArrayBuilder::Resize(capacity);
+}
+
+Status FixedSizeBinaryBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+  std::shared_ptr<Buffer> data;
+  RETURN_NOT_OK(byte_builder_.Finish(&data));
+
+  std::shared_ptr<Buffer> null_bitmap;
+  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+  *out = ArrayData::Make(type_, length_, {null_bitmap, data}, null_count_);
+
+  capacity_ = length_ = null_count_ = 0;
+  return Status::OK();
+}
+
+const uint8_t* FixedSizeBinaryBuilder::GetValue(int64_t i) const {
+  const uint8_t* data_ptr = byte_builder_.data();
+  return data_ptr + i * byte_width_;
+}
+
+util::string_view FixedSizeBinaryBuilder::GetView(int64_t i) const {
+  const uint8_t* data_ptr = byte_builder_.data();
+  return util::string_view(reinterpret_cast<const char*>(data_ptr + i * byte_width_),
+                           byte_width_);
+}
+
+// ----------------------------------------------------------------------
+// ChunkedArray builders
+
+namespace internal {
+
+ChunkedBinaryBuilder::ChunkedBinaryBuilder(int32_t max_chunk_size, MemoryPool* pool)
+    : max_chunk_size_(max_chunk_size),
+      chunk_data_size_(0),
+      builder_(new BinaryBuilder(pool)) {}
+
+Status ChunkedBinaryBuilder::Finish(ArrayVector* out) {
+  if (builder_->length() > 0 || chunks_.size() == 0) {
+    std::shared_ptr<Array> chunk;
+    RETURN_NOT_OK(builder_->Finish(&chunk));
+    chunks_.emplace_back(std::move(chunk));
+  }
+  *out = std::move(chunks_);
+  return Status::OK();
+}
+
+Status ChunkedBinaryBuilder::NextChunk() {
+  std::shared_ptr<Array> chunk;
+  RETURN_NOT_OK(builder_->Finish(&chunk));
+  chunks_.emplace_back(std::move(chunk));
+
+  chunk_data_size_ = 0;
+  return Status::OK();
+}
+
+Status ChunkedStringBuilder::Finish(ArrayVector* out) {
+  RETURN_NOT_OK(ChunkedBinaryBuilder::Finish(out));
+
+  // Change data type to string/utf8
+  for (size_t i = 0; i < out->size(); ++i) {
+    std::shared_ptr<ArrayData> data = (*out)[i]->data();
+    data->type = ::arrow::utf8();
+    (*out)[i] = std::make_shared<StringArray>(data);
+  }
+  return Status::OK();
+}
+
+}  // namespace internal
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h
new file mode 100644
index 0000000000000..abd8387f8094c
--- /dev/null
+++ b/cpp/src/arrow/array/builder_binary.h
@@ -0,0 +1,307 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <limits>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/array/builder_base.h"
+#include "arrow/buffer-builder.h"
+#include "arrow/status.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+
+constexpr int64_t kBinaryMemoryLimit = std::numeric_limits<int32_t>::max() - 1;
+
+// ----------------------------------------------------------------------
+// Binary and String
+
+/// \class BinaryBuilder
+/// \brief Builder class for variable-length binary data
+class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
+ public:
+  explicit BinaryBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
+
+  BinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool);
+
+  Status Append(const uint8_t* value, int32_t length) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    ARROW_RETURN_NOT_OK(AppendNextOffset());
+    ARROW_RETURN_NOT_OK(value_data_builder_.Append(value, length));
+
+    UnsafeAppendToBitmap(true);
+    return Status::OK();
+  }
+
+  Status AppendNull() {
+    ARROW_RETURN_NOT_OK(AppendNextOffset());
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendToBitmap(false);
+    return Status::OK();
+  }
+
+  Status Append(const char* value, int32_t length) {
+    return Append(reinterpret_cast<const uint8_t*>(value), length);
+  }
+
+  Status Append(util::string_view value) {
+    return Append(value.data(), static_cast<int32_t>(value.size()));
+  }
+
+  /// \brief Append without checking capacity
+  ///
+  /// Offsets and data should have been presized using Reserve() and
+  /// ReserveData(), respectively.
+  void UnsafeAppend(const uint8_t* value, int32_t length) {
+    UnsafeAppendNextOffset();
+    value_data_builder_.UnsafeAppend(value, length);
+    UnsafeAppendToBitmap(true);
+  }
+
+  void UnsafeAppend(const char* value, int32_t length) {
+    UnsafeAppend(reinterpret_cast<const uint8_t*>(value), length);
+  }
+
+  void UnsafeAppend(const std::string& value) {
+    UnsafeAppend(value.c_str(), static_cast<int32_t>(value.size()));
+  }
+
+  void UnsafeAppendNull() {
+    const int64_t num_bytes = value_data_builder_.length();
+    offsets_builder_.UnsafeAppend(static_cast<int32_t>(num_bytes));
+    UnsafeAppendToBitmap(false);
+  }
+
+  void Reset() override;
+  Status Resize(int64_t capacity) override;
+
+  /// \brief Ensures there is enough allocated capacity to append the indicated
+  /// number of bytes to the value data buffer without additional allocations
+  Status ReserveData(int64_t elements);
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \return size of values buffer so far
+  int64_t value_data_length() const { return value_data_builder_.length(); }
+  /// \return capacity of values buffer
+  int64_t value_data_capacity() const { return value_data_builder_.capacity(); }
+
+  /// Temporary access to a value.
+  ///
+  /// This pointer becomes invalid on the next modifying operation.
+  const uint8_t* GetValue(int64_t i, int32_t* out_length) const;
+
+  /// Temporary access to a value.
+  ///
+  /// This view becomes invalid on the next modifying operation.
+  util::string_view GetView(int64_t i) const;
+
+ protected:
+  TypedBufferBuilder<int32_t> offsets_builder_;
+  TypedBufferBuilder<uint8_t> value_data_builder_;
+
+  Status AppendOverflow(int64_t num_bytes);
+
+  Status AppendNextOffset() {
+    const int64_t num_bytes = value_data_builder_.length();
+    if (ARROW_PREDICT_FALSE(num_bytes > kBinaryMemoryLimit)) {
+      return AppendOverflow(num_bytes);
+    }
+    return offsets_builder_.Append(static_cast<int32_t>(num_bytes));
+  }
+
+  void UnsafeAppendNextOffset() {
+    const int64_t num_bytes = value_data_builder_.length();
+    offsets_builder_.UnsafeAppend(static_cast<int32_t>(num_bytes));
+  }
+};
+
+/// \class StringBuilder
+/// \brief Builder class for UTF8 strings
+class ARROW_EXPORT StringBuilder : public BinaryBuilder {
+ public:
+  using BinaryBuilder::BinaryBuilder;
+  explicit StringBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
+
+  using BinaryBuilder::Append;
+  using BinaryBuilder::Reset;
+  using BinaryBuilder::UnsafeAppend;
+
+  /// \brief Append a sequence of strings in one shot.
+  ///
+  /// \param[in] values a vector of strings
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const std::vector<std::string>& values,
+                      const uint8_t* valid_bytes = NULLPTR);
+
+  /// \brief Append a sequence of nul-terminated strings in one shot.
+  ///        If one of the values is NULL, it is processed as a null
+  ///        value even if the corresponding valid_bytes entry is 1.
+  ///
+  /// \param[in] values a contiguous C array of nul-terminated char *
+  /// \param[in] length the number of values to append
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const char** values, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR);
+};
+
+// ----------------------------------------------------------------------
+// FixedSizeBinaryBuilder
+
+class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
+ public:
+  FixedSizeBinaryBuilder(const std::shared_ptr<DataType>& type,
+                         MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
+
+  Status Append(const uint8_t* value) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendToBitmap(true);
+    return byte_builder_.Append(value, byte_width_);
+  }
+
+  Status Append(const char* value) {
+    return Append(reinterpret_cast<const uint8_t*>(value));
+  }
+
+  Status Append(const util::string_view& view) {
+#ifndef NDEBUG
+    CheckValueSize(static_cast<int64_t>(view.size()));
+#endif
+    return Append(reinterpret_cast<const uint8_t*>(view.data()));
+  }
+
+  Status Append(const std::string& s) {
+#ifndef NDEBUG
+    CheckValueSize(static_cast<int64_t>(s.size()));
+#endif
+    return Append(reinterpret_cast<const uint8_t*>(s.data()));
+  }
+
+  template <size_t NBYTES>
+  Status Append(const std::array<uint8_t, NBYTES>& value) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendToBitmap(true);
+    return byte_builder_.Append(value);
+  }
+
+  Status AppendValues(const uint8_t* data, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR);
+  Status AppendNull();
+
+  void Reset() override;
+  Status Resize(int64_t capacity) override;
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \return size of values buffer so far
+  int64_t value_data_length() const { return byte_builder_.length(); }
+
+  int32_t byte_width() const { return byte_width_; }
+
+  /// Temporary access to a value.
+  ///
+  /// This pointer becomes invalid on the next modifying operation.
+  const uint8_t* GetValue(int64_t i) const;
+
+  /// Temporary access to a value.
+  ///
+  /// This view becomes invalid on the next modifying operation.
+  util::string_view GetView(int64_t i) const;
+
+ protected:
+  int32_t byte_width_;
+  BufferBuilder byte_builder_;
+
+#ifndef NDEBUG
+  void CheckValueSize(int64_t size);
+#endif
+};
+
+// ----------------------------------------------------------------------
+// Chunked builders: build a sequence of BinaryArray or StringArray that are
+// limited to a particular size (to the upper limit of 2GB)
+
+namespace internal {
+
+class ARROW_EXPORT ChunkedBinaryBuilder {
+ public:
+  ChunkedBinaryBuilder(int32_t max_chunk_size,
+                       MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
+
+  virtual ~ChunkedBinaryBuilder() = default;
+
+  Status Append(const uint8_t* value, int32_t length) {
+    if (ARROW_PREDICT_FALSE(length + chunk_data_size_ > max_chunk_size_)) {
+      // Move onto next chunk, unless the builder length is currently 0, which
+      // means that max_chunk_size_ is less than the item length
+      if (builder_->length() > 0) {
+        ARROW_RETURN_NOT_OK(NextChunk());
+      }
+      // else fall through
+    }
+
+    chunk_data_size_ += length;
+    return builder_->Append(value, length);
+  }
+
+  Status Append(const util::string_view& value) {
+    return Append(reinterpret_cast<const uint8_t*>(value.data()),
+                  static_cast<int32_t>(value.size()));
+  }
+
+  Status AppendNull() {
+    if (ARROW_PREDICT_FALSE(builder_->length() == std::numeric_limits<int32_t>::max())) {
+      ARROW_RETURN_NOT_OK(NextChunk());
+    }
+    return builder_->AppendNull();
+  }
+
+  Status Reserve(int64_t values) { return builder_->Reserve(values); }
+
+  virtual Status Finish(ArrayVector* out);
+
+ protected:
+  Status NextChunk();
+
+  int32_t max_chunk_size_;
+  int32_t chunk_data_size_;
+
+  std::unique_ptr<BinaryBuilder> builder_;
+  std::vector<std::shared_ptr<Array>> chunks_;
+};
+
+class ARROW_EXPORT ChunkedStringBuilder : public ChunkedBinaryBuilder {
+ public:
+  using ChunkedBinaryBuilder::ChunkedBinaryBuilder;
+
+  Status Finish(ArrayVector* out) override;
+};
+
+}  // namespace internal
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/array/builder_decimal.cc b/cpp/src/arrow/array/builder_decimal.cc
new file mode 100644
index 0000000000000..191a0ff014078
--- /dev/null
+++ b/cpp/src/arrow/array/builder_decimal.cc
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/builder_decimal.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <numeric>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Decimal128Builder
+
+Decimal128Builder::Decimal128Builder(const std::shared_ptr<DataType>& type,
+                                     MemoryPool* pool)
+    : FixedSizeBinaryBuilder(type, pool) {}
+
+Status Decimal128Builder::Append(const Decimal128& value) {
+  RETURN_NOT_OK(FixedSizeBinaryBuilder::Reserve(1));
+  return FixedSizeBinaryBuilder::Append(value.ToBytes());
+}
+
+Status Decimal128Builder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+  std::shared_ptr<Buffer> data;
+  RETURN_NOT_OK(byte_builder_.Finish(&data));
+  std::shared_ptr<Buffer> null_bitmap;
+  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+
+  *out = ArrayData::Make(type_, length_, {null_bitmap, data}, null_count_);
+
+  return Status::OK();
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/array/builder_decimal.h b/cpp/src/arrow/array/builder_decimal.h
new file mode 100644
index 0000000000000..fb40a7950abbd
--- /dev/null
+++ b/cpp/src/arrow/array/builder_decimal.h
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/array/builder_binary.h"
+
+namespace arrow {
+
+class Decimal128;
+
+class ARROW_EXPORT Decimal128Builder : public FixedSizeBinaryBuilder {
+ public:
+  explicit Decimal128Builder(const std::shared_ptr<DataType>& type,
+                             MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
+
+  using FixedSizeBinaryBuilder::Append;
+  using FixedSizeBinaryBuilder::AppendValues;
+  using FixedSizeBinaryBuilder::Reset;
+
+  Status Append(const Decimal128& val);
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+};
+
+using DecimalBuilder = Decimal128Builder;
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/array/builder_dict.cc b/cpp/src/arrow/array/builder_dict.cc
new file mode 100644
index 0000000000000..cfc3d3d4b1a05
--- /dev/null
+++ b/cpp/src/arrow/array/builder_dict.cc
@@ -0,0 +1,332 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/builder_dict.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+#include <sstream>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/hashing.h"
+#include "arrow/util/logging.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+// ----------------------------------------------------------------------
+// DictionaryType unification
+
+struct UnifyDictionaryValues {
+  MemoryPool* pool_;
+  std::shared_ptr<DataType> value_type_;
+  const std::vector<const DictionaryType*>& types_;
+  std::shared_ptr<Array>* out_values_;
+  std::vector<std::vector<int32_t>>* out_transpose_maps_;
+
+  Status Visit(const DataType&, void* = nullptr) {
+    // Default implementation for non-dictionary-supported datatypes
+    std::stringstream ss;
+    ss << "Unification of " << value_type_->ToString()
+       << " dictionaries is not implemented";
+    return Status::NotImplemented(ss.str());
+  }
+
+  template <typename T>
+  Status Visit(const T&,
+               typename internal::DictionaryTraits<T>::MemoTableType* = nullptr) {
+    using ArrayType = typename TypeTraits<T>::ArrayType;
+    using DictTraits = typename internal::DictionaryTraits<T>;
+    using MemoTableType = typename DictTraits::MemoTableType;
+
+    MemoTableType memo_table;
+    if (out_transpose_maps_ != nullptr) {
+      out_transpose_maps_->clear();
+      out_transpose_maps_->reserve(types_.size());
+    }
+    // Build up the unified dictionary values and the transpose maps
+    for (const auto& type : types_) {
+      const ArrayType& values = checked_cast<const ArrayType&>(*type->dictionary());
+      if (out_transpose_maps_ != nullptr) {
+        std::vector<int32_t> transpose_map;
+        transpose_map.reserve(values.length());
+        for (int64_t i = 0; i < values.length(); ++i) {
+          int32_t dict_index = memo_table.GetOrInsert(values.GetView(i));
+          transpose_map.push_back(dict_index);
+        }
+        out_transpose_maps_->push_back(std::move(transpose_map));
+      } else {
+        for (int64_t i = 0; i < values.length(); ++i) {
+          memo_table.GetOrInsert(values.GetView(i));
+        }
+      }
+    }
+    // Build unified dictionary array
+    std::shared_ptr<ArrayData> data;
+    RETURN_NOT_OK(DictTraits::GetDictionaryArrayData(pool_, value_type_, memo_table,
+                                                     0 /* start_offset */, &data));
+    *out_values_ = MakeArray(data);
+    return Status::OK();
+  }
+};
+
+Status DictionaryType::Unify(MemoryPool* pool, const std::vector<const DataType*>& types,
+                             std::shared_ptr<DataType>* out_type,
+                             std::vector<std::vector<int32_t>>* out_transpose_maps) {
+  if (types.size() == 0) {
+    return Status::Invalid("need at least one input type");
+  }
+  std::vector<const DictionaryType*> dict_types;
+  dict_types.reserve(types.size());
+  for (const auto& type : types) {
+    if (type->id() != Type::DICTIONARY) {
+      return Status::TypeError("input types must be dictionary types");
+    }
+    dict_types.push_back(checked_cast<const DictionaryType*>(type));
+  }
+
+  // XXX Should we check the ordered flag?
+  auto value_type = dict_types[0]->dictionary()->type();
+  for (const auto& type : dict_types) {
+    auto values = type->dictionary();
+    if (!values->type()->Equals(value_type)) {
+      return Status::TypeError("input types have different value types");
+    }
+    if (values->null_count() != 0) {
+      return Status::TypeError("input types have null values");
+    }
+  }
+
+  std::shared_ptr<Array> values;
+  {
+    UnifyDictionaryValues visitor{pool, value_type, dict_types, &values,
+                                  out_transpose_maps};
+    RETURN_NOT_OK(VisitTypeInline(*value_type, &visitor));
+  }
+
+  // Build unified dictionary type with the right index type
+  std::shared_ptr<DataType> index_type;
+  if (values->length() <= std::numeric_limits<int8_t>::max()) {
+    index_type = int8();
+  } else if (values->length() <= std::numeric_limits<int16_t>::max()) {
+    index_type = int16();
+  } else if (values->length() <= std::numeric_limits<int32_t>::max()) {
+    index_type = int32();
+  } else {
+    index_type = int64();
+  }
+  *out_type = arrow::dictionary(index_type, values);
+  return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// DictionaryBuilder
+
+template <typename T>
+class DictionaryBuilder<T>::MemoTableImpl
+    : public internal::HashTraits<T>::MemoTableType {
+ public:
+  using MemoTableType = typename internal::HashTraits<T>::MemoTableType;
+  using MemoTableType::MemoTableType;
+};
+
+template <typename T>
+DictionaryBuilder<T>::~DictionaryBuilder() {}
+
+template <typename T>
+DictionaryBuilder<T>::DictionaryBuilder(const std::shared_ptr<DataType>& type,
+                                        MemoryPool* pool)
+    : ArrayBuilder(type, pool),
+      memo_table_(new MemoTableImpl(0)),
+      delta_offset_(0),
+      byte_width_(-1),
+      values_builder_(pool) {
+  DCHECK_EQ(T::type_id, type->id()) << "inconsistent type passed to DictionaryBuilder";
+}
+
+DictionaryBuilder<NullType>::DictionaryBuilder(const std::shared_ptr<DataType>& type,
+                                               MemoryPool* pool)
+    : ArrayBuilder(type, pool), values_builder_(pool) {
+  DCHECK_EQ(Type::NA, type->id()) << "inconsistent type passed to DictionaryBuilder";
+}
+
+template <>
+DictionaryBuilder<FixedSizeBinaryType>::DictionaryBuilder(
+    const std::shared_ptr<DataType>& type, MemoryPool* pool)
+    : ArrayBuilder(type, pool),
+      memo_table_(new MemoTableImpl(0)),
+      delta_offset_(0),
+      byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()) {}
+
+template <typename T>
+void DictionaryBuilder<T>::Reset() {
+  ArrayBuilder::Reset();
+  values_builder_.Reset();
+  memo_table_.reset(new MemoTableImpl(0));
+  delta_offset_ = 0;
+}
+
+template <typename T>
+Status DictionaryBuilder<T>::Resize(int64_t capacity) {
+  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+  capacity = std::max(capacity, kMinBuilderCapacity);
+
+  if (capacity_ == 0) {
+    // Initialize hash table
+    // XXX should we let the user pass additional size heuristics?
+    delta_offset_ = 0;
+  }
+  RETURN_NOT_OK(values_builder_.Resize(capacity));
+  return ArrayBuilder::Resize(capacity);
+}
+
+Status DictionaryBuilder<NullType>::Resize(int64_t capacity) {
+  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+  capacity = std::max(capacity, kMinBuilderCapacity);
+
+  RETURN_NOT_OK(values_builder_.Resize(capacity));
+  return ArrayBuilder::Resize(capacity);
+}
+
+template <typename T>
+Status DictionaryBuilder<T>::Append(const Scalar& value) {
+  RETURN_NOT_OK(Reserve(1));
+
+  auto memo_index = memo_table_->GetOrInsert(value);
+  RETURN_NOT_OK(values_builder_.Append(memo_index));
+  length_ += 1;
+
+  return Status::OK();
+}
+
+template <typename T>
+Status DictionaryBuilder<T>::AppendNull() {
+  length_ += 1;
+  null_count_ += 1;
+
+  return values_builder_.AppendNull();
+}
+
+Status DictionaryBuilder<NullType>::AppendNull() {
+  length_ += 1;
+  null_count_ += 1;
+
+  return values_builder_.AppendNull();
+}
+
+template <typename T>
+Status DictionaryBuilder<T>::AppendArray(const Array& array) {
+  using ArrayType = typename TypeTraits<T>::ArrayType;
+
+  const auto& concrete_array = checked_cast<const ArrayType&>(array);
+  for (int64_t i = 0; i < array.length(); i++) {
+    if (array.IsNull(i)) {
+      RETURN_NOT_OK(AppendNull());
+    } else {
+      RETURN_NOT_OK(Append(concrete_array.GetView(i)));
+    }
+  }
+  return Status::OK();
+}
+
+template <>
+Status DictionaryBuilder<FixedSizeBinaryType>::AppendArray(const Array& array) {
+  if (!type_->Equals(*array.type())) {
+    return Status::Invalid("Cannot append FixedSizeBinary array with non-matching type");
+  }
+
+  const auto& typed_array = checked_cast<const FixedSizeBinaryArray&>(array);
+  for (int64_t i = 0; i < array.length(); i++) {
+    if (array.IsNull(i)) {
+      RETURN_NOT_OK(AppendNull());
+    } else {
+      RETURN_NOT_OK(Append(typed_array.GetValue(i)));
+    }
+  }
+  return Status::OK();
+}
+
+Status DictionaryBuilder<NullType>::AppendArray(const Array& array) {
+  for (int64_t i = 0; i < array.length(); i++) {
+    RETURN_NOT_OK(AppendNull());
+  }
+  return Status::OK();
+}
+
+template <typename T>
+Status DictionaryBuilder<T>::FinishInternal(std::shared_ptr<ArrayData>* out) {
+  // Finalize indices array
+  RETURN_NOT_OK(values_builder_.FinishInternal(out));
+
+  // Generate dictionary array from hash table contents
+  std::shared_ptr<Array> dictionary;
+  std::shared_ptr<ArrayData> dictionary_data;
+
+  RETURN_NOT_OK(internal::DictionaryTraits<T>::GetDictionaryArrayData(
+      pool_, type_, *memo_table_, delta_offset_, &dictionary_data));
+  dictionary = MakeArray(dictionary_data);
+
+  // Set type of array data to the right dictionary type
+  (*out)->type = std::make_shared<DictionaryType>((*out)->type, dictionary);
+
+  // Update internals for further uses of this DictionaryBuilder
+  delta_offset_ = memo_table_->size();
+  values_builder_.Reset();
+
+  return Status::OK();
+}
+
+Status DictionaryBuilder<NullType>::FinishInternal(std::shared_ptr<ArrayData>* out) {
+  std::shared_ptr<Array> dictionary = std::make_shared<NullArray>(0);
+
+  RETURN_NOT_OK(values_builder_.FinishInternal(out));
+  (*out)->type = std::make_shared<DictionaryType>((*out)->type, dictionary);
+
+  return Status::OK();
+}
+
+template class DictionaryBuilder<UInt8Type>;
+template class DictionaryBuilder<UInt16Type>;
+template class DictionaryBuilder<UInt32Type>;
+template class DictionaryBuilder<UInt64Type>;
+template class DictionaryBuilder<Int8Type>;
+template class DictionaryBuilder<Int16Type>;
+template class DictionaryBuilder<Int32Type>;
+template class DictionaryBuilder<Int64Type>;
+template class DictionaryBuilder<Date32Type>;
+template class DictionaryBuilder<Date64Type>;
+template class DictionaryBuilder<Time32Type>;
+template class DictionaryBuilder<Time64Type>;
+template class DictionaryBuilder<TimestampType>;
+template class DictionaryBuilder<FloatType>;
+template class DictionaryBuilder<DoubleType>;
+template class DictionaryBuilder<FixedSizeBinaryType>;
+template class DictionaryBuilder<BinaryType>;
+template class DictionaryBuilder<StringType>;
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/array/builder_dict.h b/cpp/src/arrow/array/builder_dict.h
new file mode 100644
index 0000000000000..6f0271683aea2
--- /dev/null
+++ b/cpp/src/arrow/array/builder_dict.h
@@ -0,0 +1,167 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/array/builder_adaptive.h"  // IWYU pragma: export
+#include "arrow/array/builder_base.h"      // IWYU pragma: export
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Dictionary builder
+
+namespace internal {
+
+template <typename T>
+struct DictionaryScalar {
+  using type = typename T::c_type;
+};
+
+template <>
+struct DictionaryScalar<BinaryType> {
+  using type = util::string_view;
+};
+
+template <>
+struct DictionaryScalar<StringType> {
+  using type = util::string_view;
+};
+
+template <>
+struct DictionaryScalar<FixedSizeBinaryType> {
+  using type = util::string_view;
+};
+
+}  // namespace internal
+
+/// \brief Array builder for created encoded DictionaryArray from dense array
+///
+/// Unlike other builders, dictionary builder does not completely reset the state
+/// on Finish calls. The arrays built after the initial Finish call will reuse
+/// the previously created encoding and build a delta dictionary when new terms
+/// occur.
+///
+/// data
+template <typename T>
+class ARROW_EXPORT DictionaryBuilder : public ArrayBuilder {
+ public:
+  using Scalar = typename internal::DictionaryScalar<T>::type;
+
+  // WARNING: the type given below is the value type, not the DictionaryType.
+  // The DictionaryType is instantiated on the Finish() call.
+  DictionaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool);
+
+  template <typename T1 = T>
+  explicit DictionaryBuilder(
+      typename std::enable_if<TypeTraits<T1>::is_parameter_free, MemoryPool*>::type pool)
+      : DictionaryBuilder<T1>(TypeTraits<T1>::type_singleton(), pool) {}
+
+  ~DictionaryBuilder() override;
+
+  /// \brief Append a scalar value
+  Status Append(const Scalar& value);
+
+  /// \brief Append a fixed-width string (only for FixedSizeBinaryType)
+  template <typename T1 = T>
+  Status Append(typename std::enable_if<std::is_base_of<FixedSizeBinaryType, T1>::value,
+                                        const uint8_t*>::type value) {
+    return Append(util::string_view(reinterpret_cast<const char*>(value), byte_width_));
+  }
+
+  /// \brief Append a fixed-width string (only for FixedSizeBinaryType)
+  template <typename T1 = T>
+  Status Append(typename std::enable_if<std::is_base_of<FixedSizeBinaryType, T1>::value,
+                                        const char*>::type value) {
+    return Append(util::string_view(value, byte_width_));
+  }
+
+  /// \brief Append a scalar null value
+  Status AppendNull();
+
+  /// \brief Append a whole dense array to the builder
+  Status AppendArray(const Array& array);
+
+  void Reset() override;
+  Status Resize(int64_t capacity) override;
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// is the dictionary builder in the delta building mode
+  bool is_building_delta() { return delta_offset_ > 0; }
+
+ protected:
+  class MemoTableImpl;
+  std::unique_ptr<MemoTableImpl> memo_table_;
+
+  int32_t delta_offset_;
+  // Only used for FixedSizeBinaryType
+  int32_t byte_width_;
+
+  AdaptiveIntBuilder values_builder_;
+};
+
+template <>
+class ARROW_EXPORT DictionaryBuilder<NullType> : public ArrayBuilder {
+ public:
+  DictionaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool);
+  explicit DictionaryBuilder(MemoryPool* pool);
+
+  /// \brief Append a scalar null value
+  Status AppendNull();
+
+  /// \brief Append a whole dense array to the builder
+  Status AppendArray(const Array& array);
+
+  Status Resize(int64_t capacity) override;
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+ protected:
+  AdaptiveIntBuilder values_builder_;
+};
+
+class ARROW_EXPORT BinaryDictionaryBuilder : public DictionaryBuilder<BinaryType> {
+ public:
+  using DictionaryBuilder::Append;
+  using DictionaryBuilder::DictionaryBuilder;
+
+  Status Append(const uint8_t* value, int32_t length) {
+    return Append(reinterpret_cast<const char*>(value), length);
+  }
+
+  Status Append(const char* value, int32_t length) {
+    return Append(util::string_view(value, length));
+  }
+};
+
+/// \brief Dictionary array builder with convenience methods for strings
+class ARROW_EXPORT StringDictionaryBuilder : public DictionaryBuilder<StringType> {
+ public:
+  using DictionaryBuilder::Append;
+  using DictionaryBuilder::DictionaryBuilder;
+
+  Status Append(const uint8_t* value, int32_t length) {
+    return Append(reinterpret_cast<const char*>(value), length);
+  }
+
+  Status Append(const char* value, int32_t length) {
+    return Append(util::string_view(value, length));
+  }
+};
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/array/builder_nested.cc b/cpp/src/arrow/array/builder_nested.cc
new file mode 100644
index 0000000000000..46637713c3e0f
--- /dev/null
+++ b/cpp/src/arrow/array/builder_nested.cc
@@ -0,0 +1,174 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/builder_nested.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <sstream>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/int-util.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// ListBuilder
+
+ListBuilder::ListBuilder(MemoryPool* pool,
+                         std::shared_ptr<ArrayBuilder> const& value_builder,
+                         const std::shared_ptr<DataType>& type)
+    : ArrayBuilder(type ? type
+                        : std::static_pointer_cast<DataType>(
+                              std::make_shared<ListType>(value_builder->type())),
+                   pool),
+      offsets_builder_(pool),
+      value_builder_(value_builder) {}
+
+Status ListBuilder::AppendValues(const int32_t* offsets, int64_t length,
+                                 const uint8_t* valid_bytes) {
+  RETURN_NOT_OK(Reserve(length));
+  UnsafeAppendToBitmap(valid_bytes, length);
+  offsets_builder_.UnsafeAppend(offsets, length);
+  return Status::OK();
+}
+
+Status ListBuilder::AppendNextOffset() {
+  const int64_t num_values = value_builder_->length();
+  ARROW_RETURN_IF(
+      num_values > kListMaximumElements,
+      Status::CapacityError("ListArray cannot contain more then 2^31 - 1 child elements,",
+                            " have ", num_values));
+  return offsets_builder_.Append(static_cast<int32_t>(num_values));
+}
+
+Status ListBuilder::Append(bool is_valid) {
+  RETURN_NOT_OK(Reserve(1));
+  UnsafeAppendToBitmap(is_valid);
+  return AppendNextOffset();
+}
+
+Status ListBuilder::Resize(int64_t capacity) {
+  DCHECK_LE(capacity, kListMaximumElements);
+  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+
+  // one more then requested for offsets
+  RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
+  return ArrayBuilder::Resize(capacity);
+}
+
+Status ListBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+  RETURN_NOT_OK(AppendNextOffset());
+
+  // Offset padding zeroed by BufferBuilder
+  std::shared_ptr<Buffer> offsets;
+  RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
+
+  std::shared_ptr<ArrayData> items;
+  if (values_) {
+    items = values_->data();
+  } else {
+    if (value_builder_->length() == 0) {
+      // Try to make sure we get a non-null values buffer (ARROW-2744)
+      RETURN_NOT_OK(value_builder_->Resize(0));
+    }
+    RETURN_NOT_OK(value_builder_->FinishInternal(&items));
+  }
+
+  // If the type has not been specified in the constructor, infer it
+  // This is the case if the value_builder contains a DenseUnionBuilder
+  if (!arrow::internal::checked_cast<ListType&>(*type_).value_type()) {
+    type_ = std::static_pointer_cast<DataType>(
+        std::make_shared<ListType>(value_builder_->type()));
+  }
+  std::shared_ptr<Buffer> null_bitmap;
+  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+  *out = ArrayData::Make(type_, length_, {null_bitmap, offsets}, null_count_);
+  (*out)->child_data.emplace_back(std::move(items));
+  Reset();
+  return Status::OK();
+}
+
+void ListBuilder::Reset() {
+  ArrayBuilder::Reset();
+  values_.reset();
+  offsets_builder_.Reset();
+  value_builder_->Reset();
+}
+
+ArrayBuilder* ListBuilder::value_builder() const {
+  DCHECK(!values_) << "Using value builder is pointless when values_ is set";
+  return value_builder_.get();
+}
+
+// ----------------------------------------------------------------------
+// Struct
+
+StructBuilder::StructBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
+                             std::vector<std::shared_ptr<ArrayBuilder>>&& field_builders)
+    : ArrayBuilder(type, pool) {
+  children_ = std::move(field_builders);
+}
+
+void StructBuilder::Reset() {
+  ArrayBuilder::Reset();
+  for (const auto& field_builder : children_) {
+    field_builder->Reset();
+  }
+}
+
+Status StructBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+  std::shared_ptr<Buffer> null_bitmap;
+  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+
+  std::vector<std::shared_ptr<ArrayData>> child_data(children_.size());
+  for (size_t i = 0; i < children_.size(); ++i) {
+    if (length_ == 0) {
+      // Try to make sure the child buffers are initialized
+      RETURN_NOT_OK(children_[i]->Resize(0));
+    }
+    RETURN_NOT_OK(children_[i]->FinishInternal(&child_data[i]));
+  }
+
+  // If the type has not been specified in the constructor, infer it
+  // This is the case if one of the children contains a DenseUnionBuilder
+  if (!type_) {
+    std::vector<std::shared_ptr<Field>> fields;
+    for (const auto& field_builder : children_) {
+      fields.push_back(field("", field_builder->type()));
+    }
+    type_ = struct_(fields);
+  }
+
+  *out = ArrayData::Make(type_, length_, {null_bitmap}, null_count_);
+  (*out)->child_data = std::move(child_data);
+
+  capacity_ = length_ = null_count_ = 0;
+  return Status::OK();
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h
new file mode 100644
index 0000000000000..19b0ad81b5a16
--- /dev/null
+++ b/cpp/src/arrow/array/builder_nested.h
@@ -0,0 +1,122 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/buffer-builder.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// List builder
+
+/// \class ListBuilder
+/// \brief Builder class for variable-length list array value types
+///
+/// To use this class, you must append values to the child array builder and use
+/// the Append function to delimit each distinct list value (once the values
+/// have been appended to the child array) or use the bulk API to append
+/// a sequence of offests and null values.
+///
+/// A note on types.  Per arrow/type.h all types in the c++ implementation are
+/// logical so even though this class always builds list array, this can
+/// represent multiple different logical types.  If no logical type is provided
+/// at construction time, the class defaults to List<T> where t is taken from the
+/// value_builder/values that the object is constructed with.
+class ARROW_EXPORT ListBuilder : public ArrayBuilder {
+ public:
+  /// Use this constructor to incrementally build the value array along with offsets and
+  /// null bitmap.
+  ListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> const& value_builder,
+              const std::shared_ptr<DataType>& type = NULLPTR);
+
+  Status Resize(int64_t capacity) override;
+  void Reset() override;
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \brief Vector append
+  ///
+  /// If passed, valid_bytes is of equal length to values, and any zero byte
+  /// will be considered as a null for that slot
+  Status AppendValues(const int32_t* offsets, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR);
+
+  /// \brief Start a new variable-length list slot
+  ///
+  /// This function should be called before beginning to append elements to the
+  /// value builder
+  Status Append(bool is_valid = true);
+
+  Status AppendNull() { return Append(false); }
+
+  ArrayBuilder* value_builder() const;
+
+ protected:
+  TypedBufferBuilder<int32_t> offsets_builder_;
+  std::shared_ptr<ArrayBuilder> value_builder_;
+  std::shared_ptr<Array> values_;
+
+  Status AppendNextOffset();
+};
+
+// ----------------------------------------------------------------------
+// Struct
+
+// ---------------------------------------------------------------------------------
+// StructArray builder
+/// Append, Resize and Reserve methods are acting on StructBuilder.
+/// Please make sure all these methods of all child-builders' are consistently
+/// called to maintain data-structure consistency.
+class ARROW_EXPORT StructBuilder : public ArrayBuilder {
+ public:
+  StructBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
+                std::vector<std::shared_ptr<ArrayBuilder>>&& field_builders);
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// Null bitmap is of equal length to every child field, and any zero byte
+  /// will be considered as a null for that field, but users must using app-
+  /// end methods or advance methods of the child builders' independently to
+  /// insert data.
+  Status AppendValues(int64_t length, const uint8_t* valid_bytes) {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    UnsafeAppendToBitmap(valid_bytes, length);
+    return Status::OK();
+  }
+
+  /// Append an element to the Struct. All child-builders' Append method must
+  /// be called independently to maintain data-structure consistency.
+  Status Append(bool is_valid = true) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendToBitmap(is_valid);
+    return Status::OK();
+  }
+
+  Status AppendNull() { return Append(false); }
+
+  void Reset() override;
+
+  ArrayBuilder* field_builder(int i) const { return children_[i].get(); }
+
+  int num_fields() const { return static_cast<int>(children_.size()); }
+};
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/array/builder_primitive.cc b/cpp/src/arrow/array/builder_primitive.cc
new file mode 100644
index 0000000000000..a593f362dd29a
--- /dev/null
+++ b/cpp/src/arrow/array/builder_primitive.cc
@@ -0,0 +1,273 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/builder_primitive.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <sstream>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/int-util.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Null builder
+
+Status NullBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+  *out = ArrayData::Make(null(), length_, {nullptr}, length_);
+  length_ = null_count_ = 0;
+  return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+
+template <typename T>
+void PrimitiveBuilder<T>::Reset() {
+  data_.reset();
+  raw_data_ = nullptr;
+}
+
+template <typename T>
+Status PrimitiveBuilder<T>::Resize(int64_t capacity) {
+  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+  capacity = std::max(capacity, kMinBuilderCapacity);
+
+  int64_t nbytes = TypeTraits<T>::bytes_required(capacity);
+  if (capacity_ == 0) {
+    RETURN_NOT_OK(AllocateResizableBuffer(pool_, nbytes, &data_));
+  } else {
+    RETURN_NOT_OK(data_->Resize(nbytes));
+  }
+
+  raw_data_ = reinterpret_cast<value_type*>(data_->mutable_data());
+  return ArrayBuilder::Resize(capacity);
+}
+
+template <typename T>
+Status PrimitiveBuilder<T>::AppendValues(const value_type* values, int64_t length,
+                                         const uint8_t* valid_bytes) {
+  RETURN_NOT_OK(Reserve(length));
+
+  if (length > 0) {
+    std::memcpy(raw_data_ + length_, values,
+                static_cast<std::size_t>(TypeTraits<T>::bytes_required(length)));
+  }
+
+  // length_ is update by these
+  ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
+  return Status::OK();
+}
+
+template <typename T>
+Status PrimitiveBuilder<T>::AppendValues(const value_type* values, int64_t length,
+                                         const std::vector<bool>& is_valid) {
+  RETURN_NOT_OK(Reserve(length));
+  DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
+
+  if (length > 0) {
+    std::memcpy(raw_data_ + length_, values,
+                static_cast<std::size_t>(TypeTraits<T>::bytes_required(length)));
+  }
+
+  // length_ is update by these
+  ArrayBuilder::UnsafeAppendToBitmap(is_valid);
+  return Status::OK();
+}
+
+template <typename T>
+Status PrimitiveBuilder<T>::AppendValues(const std::vector<value_type>& values,
+                                         const std::vector<bool>& is_valid) {
+  return AppendValues(values.data(), static_cast<int64_t>(values.size()), is_valid);
+}
+
+template <typename T>
+Status PrimitiveBuilder<T>::AppendValues(const std::vector<value_type>& values) {
+  return AppendValues(values.data(), static_cast<int64_t>(values.size()));
+}
+
+template <typename T>
+Status PrimitiveBuilder<T>::FinishInternal(std::shared_ptr<ArrayData>* out) {
+  RETURN_NOT_OK(TrimBuffer(TypeTraits<T>::bytes_required(length_), data_.get()));
+  std::shared_ptr<Buffer> null_bitmap;
+  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+  *out = ArrayData::Make(type_, length_, {null_bitmap, data_}, null_count_);
+
+  data_ = nullptr;
+  capacity_ = length_ = null_count_ = 0;
+
+  return Status::OK();
+}
+
+template class PrimitiveBuilder<UInt8Type>;
+template class PrimitiveBuilder<UInt16Type>;
+template class PrimitiveBuilder<UInt32Type>;
+template class PrimitiveBuilder<UInt64Type>;
+template class PrimitiveBuilder<Int8Type>;
+template class PrimitiveBuilder<Int16Type>;
+template class PrimitiveBuilder<Int32Type>;
+template class PrimitiveBuilder<Int64Type>;
+template class PrimitiveBuilder<Date32Type>;
+template class PrimitiveBuilder<Date64Type>;
+template class PrimitiveBuilder<Time32Type>;
+template class PrimitiveBuilder<Time64Type>;
+template class PrimitiveBuilder<TimestampType>;
+template class PrimitiveBuilder<HalfFloatType>;
+template class PrimitiveBuilder<FloatType>;
+template class PrimitiveBuilder<DoubleType>;
+
+BooleanBuilder::BooleanBuilder(MemoryPool* pool)
+    : ArrayBuilder(boolean(), pool), data_(nullptr), raw_data_(nullptr) {}
+
+BooleanBuilder::BooleanBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
+    : BooleanBuilder(pool) {
+  DCHECK_EQ(Type::BOOL, type->id());
+}
+
+void BooleanBuilder::Reset() {
+  ArrayBuilder::Reset();
+  data_.reset();
+  raw_data_ = nullptr;
+}
+
+Status BooleanBuilder::Resize(int64_t capacity) {
+  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+  capacity = std::max(capacity, kMinBuilderCapacity);
+
+  const int64_t new_bitmap_size = BitUtil::BytesForBits(capacity);
+  if (capacity_ == 0) {
+    RETURN_NOT_OK(AllocateResizableBuffer(pool_, new_bitmap_size, &data_));
+    raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
+
+    // We zero the memory for booleans to keep things simple; for some reason if
+    // we do not, even though we may write every bit (through in-place | or &),
+    // valgrind will still show a warning. If we do not zero the bytes here, we
+    // will have to be careful to zero them in AppendNull and AppendNulls. Also,
+    // zeroing the bits results in deterministic bits when each byte may have a
+    // mix of nulls and not nulls.
+    //
+    // We only zero up to new_bitmap_size because the padding was zeroed by
+    // AllocateResizableBuffer
+    memset(raw_data_, 0, static_cast<size_t>(new_bitmap_size));
+  } else {
+    const int64_t old_bitmap_capacity = data_->capacity();
+    RETURN_NOT_OK(data_->Resize(new_bitmap_size));
+    const int64_t new_bitmap_capacity = data_->capacity();
+    raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
+
+    // See comment above about why we zero memory for booleans
+    memset(raw_data_ + old_bitmap_capacity, 0,
+           static_cast<size_t>(new_bitmap_capacity - old_bitmap_capacity));
+  }
+
+  return ArrayBuilder::Resize(capacity);
+}
+
+Status BooleanBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+  int64_t bit_offset = length_ % 8;
+  if (bit_offset > 0) {
+    // Adjust last byte
+    data_->mutable_data()[length_ / 8] &= BitUtil::kPrecedingBitmask[bit_offset];
+  }
+
+  std::shared_ptr<Buffer> null_bitmap;
+  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+  RETURN_NOT_OK(TrimBuffer(BitUtil::BytesForBits(length_), data_.get()));
+
+  *out = ArrayData::Make(boolean(), length_, {null_bitmap, data_}, null_count_);
+
+  data_ = nullptr;
+  capacity_ = length_ = null_count_ = 0;
+  return Status::OK();
+}
+
+Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length,
+                                    const uint8_t* valid_bytes) {
+  RETURN_NOT_OK(Reserve(length));
+
+  int64_t i = 0;
+  internal::GenerateBitsUnrolled(raw_data_, length_, length,
+                                 [values, &i]() -> bool { return values[i++] != 0; });
+
+  // this updates length_
+  ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
+  return Status::OK();
+}
+
+Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length,
+                                    const std::vector<bool>& is_valid) {
+  RETURN_NOT_OK(Reserve(length));
+  DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
+
+  int64_t i = 0;
+  internal::GenerateBitsUnrolled(raw_data_, length_, length,
+                                 [values, &i]() -> bool { return values[i++]; });
+
+  // this updates length_
+  ArrayBuilder::UnsafeAppendToBitmap(is_valid);
+  return Status::OK();
+}
+
+Status BooleanBuilder::AppendValues(const std::vector<uint8_t>& values,
+                                    const std::vector<bool>& is_valid) {
+  return AppendValues(values.data(), static_cast<int64_t>(values.size()), is_valid);
+}
+
+Status BooleanBuilder::AppendValues(const std::vector<uint8_t>& values) {
+  return AppendValues(values.data(), static_cast<int64_t>(values.size()));
+}
+
+Status BooleanBuilder::AppendValues(const std::vector<bool>& values,
+                                    const std::vector<bool>& is_valid) {
+  const int64_t length = static_cast<int64_t>(values.size());
+  RETURN_NOT_OK(Reserve(length));
+  DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
+
+  int64_t i = 0;
+  internal::GenerateBitsUnrolled(raw_data_, length_, length,
+                                 [&values, &i]() -> bool { return values[i++]; });
+
+  // this updates length_
+  ArrayBuilder::UnsafeAppendToBitmap(is_valid);
+  return Status::OK();
+}
+
+Status BooleanBuilder::AppendValues(const std::vector<bool>& values) {
+  const int64_t length = static_cast<int64_t>(values.size());
+  RETURN_NOT_OK(Reserve(length));
+
+  int64_t i = 0;
+  internal::GenerateBitsUnrolled(raw_data_, length_, length,
+                                 [&values, &i]() -> bool { return values[i++]; });
+
+  // this updates length_
+  ArrayBuilder::UnsafeSetNotNull(length);
+  return Status::OK();
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/array/builder_primitive.h b/cpp/src/arrow/array/builder_primitive.h
new file mode 100644
index 0000000000000..d17a13013ceae
--- /dev/null
+++ b/cpp/src/arrow/array/builder_primitive.h
@@ -0,0 +1,412 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/type.h"
+
+namespace arrow {
+
+class ARROW_EXPORT NullBuilder : public ArrayBuilder {
+ public:
+  explicit NullBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
+      : ArrayBuilder(null(), pool) {}
+
+  Status AppendNull() {
+    ++null_count_;
+    ++length_;
+    return Status::OK();
+  }
+
+  Status Append(std::nullptr_t value) { return AppendNull(); }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+};
+
+template <typename Type>
+class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder {
+ public:
+  using value_type = typename Type::c_type;
+
+  explicit PrimitiveBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
+      : ArrayBuilder(type, pool), data_(NULLPTR), raw_data_(NULLPTR) {}
+
+  using ArrayBuilder::Advance;
+
+  /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
+  /// The memory at the corresponding data slot is set to 0 to prevent
+  /// uninitialized memory access
+  Status AppendNulls(const uint8_t* valid_bytes, int64_t length) {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    memset(raw_data_ + length_, 0,
+           static_cast<size_t>(TypeTraits<Type>::bytes_required(length)));
+    UnsafeAppendToBitmap(valid_bytes, length);
+    return Status::OK();
+  }
+
+  /// \brief Append a single null element
+  Status AppendNull() {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    memset(raw_data_ + length_, 0, sizeof(value_type));
+    UnsafeAppendToBitmap(false);
+    return Status::OK();
+  }
+
+  value_type GetValue(int64_t index) const {
+    return reinterpret_cast<const value_type*>(data_->data())[index];
+  }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous C array of values
+  /// \param[in] length the number of values to append
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const value_type* values, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous C array of values
+  /// \param[in] length the number of values to append
+  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+  /// (0). Equal in length to values
+  /// \return Status
+  Status AppendValues(const value_type* values, int64_t length,
+                      const std::vector<bool>& is_valid);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a std::vector of values
+  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+  /// (0). Equal in length to values
+  /// \return Status
+  Status AppendValues(const std::vector<value_type>& values,
+                      const std::vector<bool>& is_valid);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a std::vector of values
+  /// \return Status
+  Status AppendValues(const std::vector<value_type>& values);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values_begin InputIterator to the beginning of the values
+  /// \param[in] values_end InputIterator pointing to the end of the values
+  /// \return Status
+
+  template <typename ValuesIter>
+  Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
+    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+
+    std::copy(values_begin, values_end, raw_data_ + length_);
+
+    // this updates the length_
+    UnsafeSetNotNull(length);
+    return Status::OK();
+  }
+
+  /// \brief Append a sequence of elements in one shot, with a specified nullmap
+  /// \param[in] values_begin InputIterator to the beginning of the values
+  /// \param[in] values_end InputIterator pointing to the end of the values
+  /// \param[in] valid_begin InputIterator with elements indication valid(1)
+  ///  or null(0) values.
+  /// \return Status
+  template <typename ValuesIter, typename ValidIter>
+  typename std::enable_if<!std::is_pointer<ValidIter>::value, Status>::type AppendValues(
+      ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
+    static_assert(!internal::is_null_pointer<ValidIter>::value,
+                  "Don't pass a NULLPTR directly as valid_begin, use the 2-argument "
+                  "version instead");
+    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+
+    std::copy(values_begin, values_end, raw_data_ + length_);
+
+    // this updates the length_
+    for (int64_t i = 0; i != length; ++i) {
+      UnsafeAppendToBitmap(*valid_begin);
+      ++valid_begin;
+    }
+    return Status::OK();
+  }
+
+  // Same as above, with a pointer type ValidIter
+  template <typename ValuesIter, typename ValidIter>
+  typename std::enable_if<std::is_pointer<ValidIter>::value, Status>::type AppendValues(
+      ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
+    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+
+    std::copy(values_begin, values_end, raw_data_ + length_);
+
+    // this updates the length_
+    if (valid_begin == NULLPTR) {
+      UnsafeSetNotNull(length);
+    } else {
+      for (int64_t i = 0; i != length; ++i) {
+        UnsafeAppendToBitmap(*valid_begin);
+        ++valid_begin;
+      }
+    }
+
+    return Status::OK();
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+  void Reset() override;
+
+  Status Resize(int64_t capacity) override;
+
+ protected:
+  std::shared_ptr<ResizableBuffer> data_;
+  value_type* raw_data_;
+};
+
+/// Base class for all Builders that emit an Array of a scalar numerical type.
+template <typename T>
+class ARROW_EXPORT NumericBuilder : public PrimitiveBuilder<T> {
+ public:
+  using typename PrimitiveBuilder<T>::value_type;
+  using PrimitiveBuilder<T>::PrimitiveBuilder;
+
+  template <typename T1 = T>
+  explicit NumericBuilder(
+      typename std::enable_if<TypeTraits<T1>::is_parameter_free, MemoryPool*>::type pool
+          ARROW_MEMORY_POOL_DEFAULT)
+      : PrimitiveBuilder<T1>(TypeTraits<T1>::type_singleton(), pool) {}
+
+  using ArrayBuilder::UnsafeAppendNull;
+  using ArrayBuilder::UnsafeAppendToBitmap;
+  using PrimitiveBuilder<T>::AppendValues;
+  using PrimitiveBuilder<T>::Resize;
+  using PrimitiveBuilder<T>::Reserve;
+
+  /// Append a single scalar and increase the size if necessary.
+  Status Append(const value_type val) {
+    ARROW_RETURN_NOT_OK(ArrayBuilder::Reserve(1));
+    UnsafeAppend(val);
+    return Status::OK();
+  }
+
+  /// Append a single scalar under the assumption that the underlying Buffer is
+  /// large enough.
+  ///
+  /// This method does not capacity-check; make sure to call Reserve
+  /// beforehand.
+  void UnsafeAppend(const value_type val) {
+    raw_data_[length_] = val;
+    UnsafeAppendToBitmap(true);
+  }
+
+ protected:
+  using PrimitiveBuilder<T>::length_;
+  using PrimitiveBuilder<T>::raw_data_;
+};
+
+// Builders
+
+using UInt8Builder = NumericBuilder<UInt8Type>;
+using UInt16Builder = NumericBuilder<UInt16Type>;
+using UInt32Builder = NumericBuilder<UInt32Type>;
+using UInt64Builder = NumericBuilder<UInt64Type>;
+
+using Int8Builder = NumericBuilder<Int8Type>;
+using Int16Builder = NumericBuilder<Int16Type>;
+using Int32Builder = NumericBuilder<Int32Type>;
+using Int64Builder = NumericBuilder<Int64Type>;
+using TimestampBuilder = NumericBuilder<TimestampType>;
+using Time32Builder = NumericBuilder<Time32Type>;
+using Time64Builder = NumericBuilder<Time64Type>;
+using Date32Builder = NumericBuilder<Date32Type>;
+using Date64Builder = NumericBuilder<Date64Type>;
+
+using HalfFloatBuilder = NumericBuilder<HalfFloatType>;
+using FloatBuilder = NumericBuilder<FloatType>;
+using DoubleBuilder = NumericBuilder<DoubleType>;
+
+class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
+ public:
+  using value_type = bool;
+  explicit BooleanBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
+
+  explicit BooleanBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool);
+
+  using ArrayBuilder::Advance;
+  using ArrayBuilder::UnsafeAppendNull;
+
+  /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
+  Status AppendNulls(const uint8_t* valid_bytes, int64_t length) {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    UnsafeAppendToBitmap(valid_bytes, length);
+
+    return Status::OK();
+  }
+
+  Status AppendNull() {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendToBitmap(false);
+
+    return Status::OK();
+  }
+
+  /// Scalar append
+  Status Append(const bool val) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(val);
+    return Status::OK();
+  }
+
+  Status Append(const uint8_t val) { return Append(val != 0); }
+
+  /// Scalar append, without checking for capacity
+  void UnsafeAppend(const bool val) {
+    if (val) {
+      BitUtil::SetBit(raw_data_, length_);
+    } else {
+      BitUtil::ClearBit(raw_data_, length_);
+    }
+    UnsafeAppendToBitmap(true);
+  }
+
+  void UnsafeAppend(const uint8_t val) { UnsafeAppend(val != 0); }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous array of bytes (non-zero is 1)
+  /// \param[in] length the number of values to append
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const uint8_t* values, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous C array of values
+  /// \param[in] length the number of values to append
+  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+  /// (0). Equal in length to values
+  /// \return Status
+  Status AppendValues(const uint8_t* values, int64_t length,
+                      const std::vector<bool>& is_valid);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a std::vector of bytes
+  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+  /// (0). Equal in length to values
+  /// \return Status
+  Status AppendValues(const std::vector<uint8_t>& values,
+                      const std::vector<bool>& is_valid);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a std::vector of bytes
+  /// \return Status
+  Status AppendValues(const std::vector<uint8_t>& values);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values an std::vector<bool> indicating true (1) or false
+  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+  /// (0). Equal in length to values
+  /// \return Status
+  Status AppendValues(const std::vector<bool>& values, const std::vector<bool>& is_valid);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values an std::vector<bool> indicating true (1) or false
+  /// \return Status
+  Status AppendValues(const std::vector<bool>& values);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values_begin InputIterator to the beginning of the values
+  /// \param[in] values_end InputIterator pointing to the end of the values
+  ///  or null(0) values
+  /// \return Status
+  template <typename ValuesIter>
+  Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
+    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    auto iter = values_begin;
+    internal::GenerateBitsUnrolled(raw_data_, length_, length,
+                                   [&iter]() -> bool { return *(iter++); });
+
+    // this updates length_
+    UnsafeSetNotNull(length);
+    return Status::OK();
+  }
+
+  /// \brief Append a sequence of elements in one shot, with a specified nullmap
+  /// \param[in] values_begin InputIterator to the beginning of the values
+  /// \param[in] values_end InputIterator pointing to the end of the values
+  /// \param[in] valid_begin InputIterator with elements indication valid(1)
+  ///  or null(0) values
+  /// \return Status
+  template <typename ValuesIter, typename ValidIter>
+  typename std::enable_if<!std::is_pointer<ValidIter>::value, Status>::type AppendValues(
+      ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
+    static_assert(!internal::is_null_pointer<ValidIter>::value,
+                  "Don't pass a NULLPTR directly as valid_begin, use the 2-argument "
+                  "version instead");
+    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+
+    auto iter = values_begin;
+    internal::GenerateBitsUnrolled(raw_data_, length_, length,
+                                   [&iter]() -> bool { return *(iter++); });
+
+    // this updates length_
+    for (int64_t i = 0; i != length; ++i) {
+      ArrayBuilder::UnsafeAppendToBitmap(*valid_begin);
+      ++valid_begin;
+    }
+    return Status::OK();
+  }
+
+  // Same as above, for a pointer type ValidIter
+  template <typename ValuesIter, typename ValidIter>
+  typename std::enable_if<std::is_pointer<ValidIter>::value, Status>::type AppendValues(
+      ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
+    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+
+    auto iter = values_begin;
+    internal::GenerateBitsUnrolled(raw_data_, length_, length,
+                                   [&iter]() -> bool { return *(iter++); });
+
+    // this updates the length_
+    if (valid_begin == NULLPTR) {
+      UnsafeSetNotNull(length);
+    } else {
+      for (int64_t i = 0; i != length; ++i) {
+        ArrayBuilder::UnsafeAppendToBitmap(*valid_begin);
+        ++valid_begin;
+      }
+    }
+
+    return Status::OK();
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+  void Reset() override;
+  Status Resize(int64_t capacity) override;
+
+ protected:
+  std::shared_ptr<ResizableBuffer> data_;
+  uint8_t* raw_data_;
+};
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/array/builder_union.cc b/cpp/src/arrow/array/builder_union.cc
new file mode 100644
index 0000000000000..f51b7d7f0203a
--- /dev/null
+++ b/cpp/src/arrow/array/builder_union.cc
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/array/builder_union.h"
+
+#include <utility>
+
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+DenseUnionBuilder::DenseUnionBuilder(MemoryPool* pool,
+                                     const std::shared_ptr<DataType>& type)
+    : ArrayBuilder(type, pool), types_builder_(pool), offsets_builder_(pool) {}
+
+Status DenseUnionBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+  std::shared_ptr<Buffer> types;
+  RETURN_NOT_OK(types_builder_.Finish(&types));
+  std::shared_ptr<Buffer> offsets;
+  RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
+
+  std::shared_ptr<Buffer> null_bitmap;
+  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+
+  std::vector<std::shared_ptr<Field>> fields;
+  std::vector<std::shared_ptr<ArrayData>> child_data(children_.size());
+  std::vector<uint8_t> type_ids;
+  for (size_t i = 0; i < children_.size(); ++i) {
+    std::shared_ptr<ArrayData> data;
+    RETURN_NOT_OK(children_[i]->FinishInternal(&data));
+    child_data[i] = data;
+    fields.push_back(field(field_names_[i], children_[i]->type()));
+    type_ids.push_back(static_cast<uint8_t>(i));
+  }
+
+  // If the type has not been specified in the constructor, infer it
+  if (!type_) {
+    type_ = union_(fields, type_ids, UnionMode::DENSE);
+  }
+
+  *out = ArrayData::Make(type_, length(), {null_bitmap, types, offsets}, null_count_);
+  (*out)->child_data = std::move(child_data);
+  return Status::OK();
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/array/builder_union.h b/cpp/src/arrow/array/builder_union.h
new file mode 100644
index 0000000000000..2ababc7d96ea2
--- /dev/null
+++ b/cpp/src/arrow/array/builder_union.h
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/array/builder_base.h"
+#include "arrow/buffer-builder.h"
+
+namespace arrow {
+
+/// \class DenseUnionBuilder
+///
+/// You need to call AppendChild for each of the children builders you want
+/// to use. The function will return an int8_t, which is the type tag
+/// associated with that child. You can then call Append with that tag
+/// (followed by an append on the child builder) to add elements to
+/// the union array.
+///
+/// You can either specify the type when the UnionBuilder is constructed
+/// or let the UnionBuilder infer the type at runtime (by omitting the
+/// type argument from the constructor).
+///
+/// This API is EXPERIMENTAL.
+class ARROW_EXPORT DenseUnionBuilder : public ArrayBuilder {
+ public:
+  /// Use this constructor to incrementally build the union array along
+  /// with types, offsets, and null bitmap.
+  explicit DenseUnionBuilder(MemoryPool* pool,
+                             const std::shared_ptr<DataType>& type = NULLPTR);
+
+  Status AppendNull() {
+    ARROW_RETURN_NOT_OK(types_builder_.Append(0));
+    ARROW_RETURN_NOT_OK(offsets_builder_.Append(0));
+    return AppendToBitmap(false);
+  }
+
+  /// \brief Append an element to the UnionArray. This must be followed
+  ///        by an append to the appropriate child builder.
+  /// \param[in] type index of the child the value will be appended
+  /// \param[in] offset offset of the value in that child
+  Status Append(int8_t type, int32_t offset) {
+    ARROW_RETURN_NOT_OK(types_builder_.Append(type));
+    ARROW_RETURN_NOT_OK(offsets_builder_.Append(offset));
+    return AppendToBitmap(true);
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \brief Make a new child builder available to the UnionArray
+  ///
+  /// \param[in] child the child builder
+  /// \param[in] field_name the name of the field in the union array type
+  /// if type inference is used
+  /// \return child index, which is the "type" argument that needs
+  /// to be passed to the "Append" method to add a new element to
+  /// the union array.
+  int8_t AppendChild(const std::shared_ptr<ArrayBuilder>& child,
+                     const std::string& field_name = "") {
+    children_.push_back(child);
+    field_names_.push_back(field_name);
+    return static_cast<int8_t>(children_.size() - 1);
+  }
+
+ private:
+  TypedBufferBuilder<int8_t> types_builder_;
+  TypedBufferBuilder<int32_t> offsets_builder_;
+  std::vector<std::string> field_names_;
+};
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/buffer-builder.h b/cpp/src/arrow/buffer-builder.h
new file mode 100644
index 0000000000000..b27fbd838f226
--- /dev/null
+++ b/cpp/src/arrow/buffer-builder.h
@@ -0,0 +1,349 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_BUFFER_BUILDER_H
+#define ARROW_BUFFER_BUILDER_H
+
+#include <algorithm>
+#include <array>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Buffer builder classes
+
+/// \class BufferBuilder
+/// \brief A class for incrementally building a contiguous chunk of in-memory
+/// data
+class ARROW_EXPORT BufferBuilder {
+ public:
+  explicit BufferBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
+      : pool_(pool), data_(NULLPTR), capacity_(0), size_(0) {}
+
+  /// \brief Resize the buffer to the nearest multiple of 64 bytes
+  ///
+  /// \param new_capacity the new capacity of the of the builder. Will be
+  /// rounded up to a multiple of 64 bytes for padding \param shrink_to_fit if
+  /// new capacity is smaller than the existing size, reallocate internal
+  /// buffer. Set to false to avoid reallocations when shrinking the builder.
+  /// \return Status
+  Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
+    // Resize(0) is a no-op
+    if (new_capacity == 0) {
+      return Status::OK();
+    }
+    int64_t old_capacity = capacity_;
+
+    if (buffer_ == NULLPTR) {
+      ARROW_RETURN_NOT_OK(AllocateResizableBuffer(pool_, new_capacity, &buffer_));
+    } else {
+      ARROW_RETURN_NOT_OK(buffer_->Resize(new_capacity, shrink_to_fit));
+    }
+    capacity_ = buffer_->capacity();
+    data_ = buffer_->mutable_data();
+    if (capacity_ > old_capacity) {
+      memset(data_ + old_capacity, 0, capacity_ - old_capacity);
+    }
+    return Status::OK();
+  }
+
+  /// \brief Ensure that builder can accommodate the additional number of bytes
+  /// without the need to perform allocations
+  ///
+  /// \param[in] additional_bytes number of additional bytes to make space for
+  /// \param[in] grow_by_factor if true, round up allocations using the
+  /// strategy in BufferBuilder::GrowByFactor
+  /// \return Status
+  Status Reserve(const int64_t additional_bytes, bool grow_by_factor = false) {
+    auto min_capacity = size_ + additional_bytes;
+    if (min_capacity <= capacity_) return Status::OK();
+    if (grow_by_factor) {
+      min_capacity = GrowByFactor(min_capacity);
+    }
+    return Resize(min_capacity, false);
+  }
+
+  /// \brief Return a capacity expanded by a growth factor of 2
+  static int64_t GrowByFactor(const int64_t min_capacity) {
+    // If the capacity was not already a multiple of 2, do so here
+    // TODO(emkornfield) doubling isn't great default allocation practice
+    // see https://github.com/facebook/folly/blob/master/folly/docs/FBVector.md
+    // for discussion
+    return BitUtil::NextPower2(min_capacity);
+  }
+
+  /// \brief Append the given data to the buffer
+  ///
+  /// The buffer is automatically expanded if necessary.
+  Status Append(const void* data, const int64_t length) {
+    if (ARROW_PREDICT_FALSE(size_ + length > capacity_)) {
+      ARROW_RETURN_NOT_OK(Resize(GrowByFactor(size_ + length), false));
+    }
+    UnsafeAppend(data, length);
+    return Status::OK();
+  }
+
+  /// \brief Append copies of a value to the buffer
+  ///
+  /// The buffer is automatically expanded if necessary.
+  Status Append(const int64_t num_copies, uint8_t value) {
+    ARROW_RETURN_NOT_OK(Reserve(num_copies, true));
+    UnsafeAppend(num_copies, value);
+    return Status::OK();
+  }
+
+  /// \brief Append the given data to the buffer
+  ///
+  /// The buffer is automatically expanded if necessary.
+  template <size_t NBYTES>
+  Status Append(const std::array<uint8_t, NBYTES>& data) {
+    constexpr auto nbytes = static_cast<int64_t>(NBYTES);
+    ARROW_RETURN_NOT_OK(Reserve(NBYTES, true));
+    std::copy(data.cbegin(), data.cend(), data_ + size_);
+    size_ += nbytes;
+    return Status::OK();
+  }
+
+  // Advance pointer and zero out memory
+  Status Advance(const int64_t length) { return Append(length, 0); }
+
+  // Unsafe methods don't check existing size
+  void UnsafeAppend(const void* data, const int64_t length) {
+    memcpy(data_ + size_, data, static_cast<size_t>(length));
+    size_ += length;
+  }
+
+  void UnsafeAppend(const int64_t num_copies, uint8_t value) {
+    memset(data_ + size_, value, static_cast<size_t>(num_copies));
+    size_ += num_copies;
+  }
+
+  /// \brief Return result of builder as a Buffer object.
+  ///
+  /// The builder is reset and can be reused afterwards.
+  ///
+  /// \param[out] out the finalized Buffer object
+  /// \param shrink_to_fit if the buffer size is smaller than its capacity,
+  /// reallocate to fit more tightly in memory. Set to false to avoid
+  /// a reallocation, at the expense of potentially more memory consumption.
+  /// \return Status
+  Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
+    ARROW_RETURN_NOT_OK(Resize(size_, shrink_to_fit));
+    *out = buffer_;
+    Reset();
+    return Status::OK();
+  }
+
+  void Reset() {
+    buffer_ = NULLPTR;
+    capacity_ = size_ = 0;
+  }
+
+  int64_t capacity() const { return capacity_; }
+  int64_t length() const { return size_; }
+  const uint8_t* data() const { return data_; }
+  uint8_t* mutable_data() { return data_; }
+
+ private:
+  std::shared_ptr<ResizableBuffer> buffer_;
+  MemoryPool* pool_;
+  uint8_t* data_;
+  int64_t capacity_;
+  int64_t size_;
+};
+
+template <typename T, typename Enable = void>
+class TypedBufferBuilder;
+
+/// \brief A BufferBuilder for building a buffer of arithmetic elements
+template <typename T>
+class TypedBufferBuilder<T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
+ public:
+  explicit TypedBufferBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
+      : bytes_builder_(pool) {}
+
+  Status Append(T value) {
+    return bytes_builder_.Append(reinterpret_cast<uint8_t*>(&value), sizeof(T));
+  }
+
+  Status Append(const T* values, int64_t num_elements) {
+    return bytes_builder_.Append(reinterpret_cast<const uint8_t*>(values),
+                                 num_elements * sizeof(T));
+  }
+
+  Status Append(const int64_t num_copies, T value) {
+    ARROW_RETURN_NOT_OK(
+        Resize(BufferBuilder::GrowByFactor(num_copies + length()), false));
+    UnsafeAppend(num_copies, value);
+    return Status::OK();
+  }
+
+  void UnsafeAppend(T value) {
+    bytes_builder_.UnsafeAppend(reinterpret_cast<uint8_t*>(&value), sizeof(T));
+  }
+
+  void UnsafeAppend(const T* values, int64_t num_elements) {
+    bytes_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values),
+                                num_elements * sizeof(T));
+  }
+
+  void UnsafeAppend(const int64_t num_copies, T value) {
+    auto data = mutable_data() + length();
+    bytes_builder_.UnsafeAppend(num_copies * sizeof(T), 0);
+    for (const auto end = data + num_copies; data != end; ++data) {
+      *data = value;
+    }
+  }
+
+  Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
+    return bytes_builder_.Resize(new_capacity * sizeof(T), shrink_to_fit);
+  }
+
+  Status Reserve(const int64_t additional_elements) {
+    return bytes_builder_.Reserve(additional_elements * sizeof(T));
+  }
+
+  Status Advance(const int64_t length) {
+    return bytes_builder_.Advance(length * sizeof(T));
+  }
+
+  Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
+    return bytes_builder_.Finish(out, shrink_to_fit);
+  }
+
+  void Reset() { bytes_builder_.Reset(); }
+
+  int64_t length() const { return bytes_builder_.length() / sizeof(T); }
+  int64_t capacity() const { return bytes_builder_.capacity() / sizeof(T); }
+  const T* data() const { return reinterpret_cast<const T*>(bytes_builder_.data()); }
+  T* mutable_data() { return reinterpret_cast<T*>(bytes_builder_.mutable_data()); }
+
+ private:
+  BufferBuilder bytes_builder_;
+};
+
+/// \brief A BufferBuilder for building a buffer containing a bitmap
+template <>
+class TypedBufferBuilder<bool> {
+ public:
+  explicit TypedBufferBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
+      : bytes_builder_(pool) {}
+
+  Status Append(bool value) {
+    ARROW_RETURN_NOT_OK(ResizeWithGrowthFactor(bit_length_ + 1));
+    UnsafeAppend(value);
+    return Status::OK();
+  }
+
+  Status Append(const uint8_t* valid_bytes, int64_t num_elements) {
+    ARROW_RETURN_NOT_OK(ResizeWithGrowthFactor(bit_length_ + num_elements));
+    UnsafeAppend(valid_bytes, num_elements);
+    return Status::OK();
+  }
+
+  Status Append(const int64_t num_copies, bool value) {
+    ARROW_RETURN_NOT_OK(ResizeWithGrowthFactor(bit_length_ + num_copies));
+    UnsafeAppend(num_copies, value);
+    return Status::OK();
+  }
+
+  void UnsafeAppend(bool value) {
+    BitUtil::SetBitTo(mutable_data(), bit_length_, value);
+    if (!value) {
+      ++false_count_;
+    }
+    ++bit_length_;
+  }
+
+  void UnsafeAppend(const uint8_t* bytes, int64_t num_elements) {
+    if (num_elements == 0) return;
+    int64_t i = 0;
+    internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements, [&] {
+      bool value = bytes[i++];
+      if (!value) ++false_count_;
+      return value;
+    });
+    bit_length_ += num_elements;
+  }
+
+  void UnsafeAppend(const int64_t num_copies, bool value) {
+    BitUtil::SetBitsTo(mutable_data(), bit_length_, num_copies, value);
+    if (!value) {
+      false_count_ += num_copies;
+    }
+    bit_length_ += num_copies;
+  }
+
+  Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
+    const int64_t old_byte_capacity = bytes_builder_.capacity();
+    const int64_t new_byte_capacity = BitUtil::BytesForBits(new_capacity);
+    ARROW_RETURN_NOT_OK(bytes_builder_.Resize(new_byte_capacity, shrink_to_fit));
+    if (new_byte_capacity > old_byte_capacity) {
+      memset(mutable_data() + old_byte_capacity, 0,
+             static_cast<size_t>(new_byte_capacity - old_byte_capacity));
+    }
+    return Status::OK();
+  }
+
+  Status Reserve(const int64_t additional_elements) {
+    return Resize(bit_length_ + additional_elements, false);
+  }
+
+  Status Advance(const int64_t length) {
+    bit_length_ += length;
+    false_count_ += length;
+    return ResizeWithGrowthFactor(bit_length_);
+  }
+
+  Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
+    bit_length_ = false_count_ = 0;
+    return bytes_builder_.Finish(out, shrink_to_fit);
+  }
+
+  void Reset() {
+    bytes_builder_.Reset();
+    bit_length_ = false_count_ = 0;
+  }
+
+  int64_t length() const { return bit_length_; }
+  int64_t capacity() const { return bytes_builder_.capacity() * 8; }
+  const uint8_t* data() const { return bytes_builder_.data(); }
+  uint8_t* mutable_data() { return bytes_builder_.mutable_data(); }
+  int64_t false_count() const { return false_count_; }
+
+ private:
+  Status ResizeWithGrowthFactor(const int64_t min_capacity) {
+    return Resize(BufferBuilder::GrowByFactor(min_capacity), false);
+  }
+  BufferBuilder bytes_builder_;
+  int64_t bit_length_ = 0;
+  int64_t false_count_ = 0;
+};
+
+}  // namespace arrow
+
+#endif  // ARROW_BUFFER_BUILDER_H
diff --git a/cpp/src/arrow/buffer-test.cc b/cpp/src/arrow/buffer-test.cc
index 4d16f7f9c277d..0154892d12b46 100644
--- a/cpp/src/arrow/buffer-test.cc
+++ b/cpp/src/arrow/buffer-test.cc
@@ -26,6 +26,7 @@
 
 #include <gtest/gtest.h>
 
+#include "arrow/buffer-builder.h"
 #include "arrow/buffer.h"
 #include "arrow/memory_pool.h"
 #include "arrow/status.h"
@@ -35,6 +36,22 @@ using std::string;
 
 namespace arrow {
 
+TEST(TestAllocate, Bitmap) {
+  std::shared_ptr<Buffer> new_buffer;
+  EXPECT_OK(AllocateBitmap(default_memory_pool(), 100, &new_buffer));
+  EXPECT_GE(new_buffer->size(), 13);
+  EXPECT_EQ(new_buffer->capacity() % 8, 0);
+}
+
+TEST(TestAllocate, EmptyBitmap) {
+  std::shared_ptr<Buffer> new_buffer;
+  EXPECT_OK(AllocateEmptyBitmap(default_memory_pool(), 100, &new_buffer));
+  EXPECT_EQ(new_buffer->size(), 13);
+  EXPECT_EQ(new_buffer->capacity() % 8, 0);
+  EXPECT_TRUE(std::all_of(new_buffer->data(), new_buffer->data() + new_buffer->capacity(),
+                          [](int8_t byte) { return byte == 0; }));
+}
+
 TEST(TestBuffer, FromStdString) {
   std::string val = "hello, world";
 
@@ -176,6 +193,65 @@ TEST(TestBuffer, SliceMutableBuffer) {
   ASSERT_TRUE(slice->Equals(expected));
 }
 
+template <typename AllocateFunction>
+void TestZeroSizeAllocateBuffer(MemoryPool* pool, AllocateFunction&& allocate_func) {
+  auto allocated_bytes = pool->bytes_allocated();
+  {
+    std::shared_ptr<Buffer> buffer;
+
+    ASSERT_OK(allocate_func(pool, 0, &buffer));
+    ASSERT_EQ(buffer->size(), 0);
+    // Even 0-sized buffers should not have a null data pointer
+    ASSERT_NE(buffer->data(), nullptr);
+    ASSERT_EQ(buffer->mutable_data(), buffer->data());
+
+    ASSERT_GE(pool->bytes_allocated(), allocated_bytes);
+  }
+  ASSERT_EQ(pool->bytes_allocated(), allocated_bytes);
+}
+
+TEST(TestAllocateBuffer, ZeroSize) {
+  MemoryPool* pool = default_memory_pool();
+  auto allocate_func = [](MemoryPool* pool, int64_t size, std::shared_ptr<Buffer>* out) {
+    return AllocateBuffer(pool, size, out);
+  };
+  TestZeroSizeAllocateBuffer(pool, allocate_func);
+}
+
+TEST(TestAllocateResizableBuffer, ZeroSize) {
+  MemoryPool* pool = default_memory_pool();
+  auto allocate_func = [](MemoryPool* pool, int64_t size, std::shared_ptr<Buffer>* out) {
+    std::shared_ptr<ResizableBuffer> res;
+    RETURN_NOT_OK(AllocateResizableBuffer(pool, size, &res));
+    *out = res;
+    return Status::OK();
+  };
+  TestZeroSizeAllocateBuffer(pool, allocate_func);
+}
+
+TEST(TestAllocateResizableBuffer, ZeroResize) {
+  MemoryPool* pool = default_memory_pool();
+  auto allocated_bytes = pool->bytes_allocated();
+  {
+    std::shared_ptr<ResizableBuffer> buffer;
+
+    ASSERT_OK(AllocateResizableBuffer(pool, 1000, &buffer));
+    ASSERT_EQ(buffer->size(), 1000);
+    ASSERT_NE(buffer->data(), nullptr);
+    ASSERT_EQ(buffer->mutable_data(), buffer->data());
+
+    ASSERT_GE(pool->bytes_allocated(), allocated_bytes + 1000);
+
+    ASSERT_OK(buffer->Resize(0));
+    ASSERT_NE(buffer->data(), nullptr);
+    ASSERT_EQ(buffer->mutable_data(), buffer->data());
+
+    ASSERT_GE(pool->bytes_allocated(), allocated_bytes);
+    ASSERT_LT(pool->bytes_allocated(), allocated_bytes + 1000);
+  }
+  ASSERT_EQ(pool->bytes_allocated(), allocated_bytes);
+}
+
 TEST(TestBufferBuilder, ResizeReserve) {
   const std::string data = "some data";
   auto data_ptr = data.c_str();
@@ -201,6 +277,98 @@ TEST(TestBufferBuilder, ResizeReserve) {
   ASSERT_EQ(128, builder.capacity());
 }
 
+template <typename T>
+class TypedTestBufferBuilder : public ::testing::Test {};
+
+using BufferBuilderElements = ::testing::Types<int16_t, uint32_t, double>;
+
+TYPED_TEST_CASE(TypedTestBufferBuilder, BufferBuilderElements);
+
+TYPED_TEST(TypedTestBufferBuilder, BasicTypedBufferBuilderUsage) {
+  TypedBufferBuilder<TypeParam> builder;
+
+  ASSERT_OK(builder.Append(static_cast<TypeParam>(0)));
+  ASSERT_EQ(builder.length(), 1);
+  ASSERT_EQ(builder.capacity(), 64 / sizeof(TypeParam));
+
+  constexpr int nvalues = 4;
+  TypeParam values[nvalues];
+  for (int i = 0; i != nvalues; ++i) {
+    values[i] = static_cast<TypeParam>(i);
+  }
+  ASSERT_OK(builder.Append(values, nvalues));
+  ASSERT_EQ(builder.length(), nvalues + 1);
+
+  std::shared_ptr<Buffer> built;
+  ASSERT_OK(builder.Finish(&built));
+
+  auto data = reinterpret_cast<const TypeParam*>(built->data());
+  ASSERT_EQ(data[0], static_cast<TypeParam>(0));
+  for (auto value : values) {
+    ++data;
+    ASSERT_EQ(*data, value);
+  }
+}
+
+TYPED_TEST(TypedTestBufferBuilder, AppendCopies) {
+  TypedBufferBuilder<TypeParam> builder;
+
+  ASSERT_OK(builder.Append(13, static_cast<TypeParam>(1)));
+  ASSERT_OK(builder.Append(17, static_cast<TypeParam>(0)));
+  ASSERT_EQ(builder.length(), 13 + 17);
+
+  std::shared_ptr<Buffer> built;
+  ASSERT_OK(builder.Finish(&built));
+
+  auto data = reinterpret_cast<const TypeParam*>(built->data());
+  for (int i = 0; i != 13 + 17; ++i, ++data) {
+    ASSERT_EQ(*data, static_cast<TypeParam>(i < 13)) << "index = " << i;
+  }
+}
+
+TEST(TestBufferBuilder, BasicBoolBufferBuilderUsage) {
+  TypedBufferBuilder<bool> builder;
+
+  ASSERT_OK(builder.Append(false));
+  ASSERT_EQ(builder.length(), 1);
+  ASSERT_EQ(builder.capacity(), 64 * 8);
+
+  constexpr int nvalues = 4;
+  uint8_t values[nvalues];
+  for (int i = 0; i != nvalues; ++i) {
+    values[i] = static_cast<uint8_t>(i);
+  }
+  ASSERT_OK(builder.Append(values, nvalues));
+  ASSERT_EQ(builder.length(), nvalues + 1);
+
+  ASSERT_EQ(builder.false_count(), 2);
+
+  std::shared_ptr<Buffer> built;
+  ASSERT_OK(builder.Finish(&built));
+
+  ASSERT_EQ(BitUtil::GetBit(built->data(), 0), false);
+  for (int i = 0; i != nvalues; ++i) {
+    ASSERT_EQ(BitUtil::GetBit(built->data(), i + 1), static_cast<bool>(values[i]));
+  }
+}
+
+TEST(TestBufferBuilder, BoolBufferBuilderAppendCopies) {
+  TypedBufferBuilder<bool> builder;
+
+  ASSERT_OK(builder.Append(13, true));
+  ASSERT_OK(builder.Append(17, false));
+  ASSERT_EQ(builder.length(), 13 + 17);
+  ASSERT_EQ(builder.capacity(), 64 * 8);
+  ASSERT_EQ(builder.false_count(), 17);
+
+  std::shared_ptr<Buffer> built;
+  ASSERT_OK(builder.Finish(&built));
+
+  for (int i = 0; i != 13 + 17; ++i) {
+    EXPECT_EQ(BitUtil::GetBit(built->data(), i), i < 13) << "index = " << i;
+  }
+}
+
 template <typename T>
 class TypedTestBuffer : public ::testing::Test {};
 
diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc
index 01bb0c34968d3..8f05912b80417 100644
--- a/cpp/src/arrow/buffer.cc
+++ b/cpp/src/arrow/buffer.cc
@@ -126,25 +126,18 @@ class PoolBuffer : public ResizableBuffer {
   }
 
   Status Resize(const int64_t new_size, bool shrink_to_fit = true) override {
-    if (!shrink_to_fit || (new_size > size_)) {
-      RETURN_NOT_OK(Reserve(new_size));
-    } else {
-      // Buffer is not growing, so shrink to the requested size without
+    if (mutable_data_ && shrink_to_fit && new_size <= size_) {
+      // Buffer is non-null and is not growing, so shrink to the requested size without
       // excess space.
       int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(new_size);
       if (capacity_ != new_capacity) {
         // Buffer hasn't got yet the requested size.
-        if (new_size == 0) {
-          pool_->Free(mutable_data_, capacity_);
-          capacity_ = 0;
-          mutable_data_ = nullptr;
-          data_ = nullptr;
-        } else {
-          RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &mutable_data_));
-          data_ = mutable_data_;
-          capacity_ = new_capacity;
-        }
+        RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &mutable_data_));
+        data_ = mutable_data_;
+        capacity_ = new_capacity;
       }
+    } else {
+      RETURN_NOT_OK(Reserve(new_size));
     }
     size_ = new_size;
 
@@ -219,9 +212,13 @@ Status AllocateResizableBuffer(const int64_t size,
   return AllocateResizableBuffer(default_memory_pool(), size, out);
 }
 
+Status AllocateBitmap(MemoryPool* pool, int64_t length, std::shared_ptr<Buffer>* out) {
+  return AllocateBuffer(pool, BitUtil::BytesForBits(length), out);
+}
+
 Status AllocateEmptyBitmap(MemoryPool* pool, int64_t length,
                            std::shared_ptr<Buffer>* out) {
-  RETURN_NOT_OK(AllocateBuffer(pool, BitUtil::BytesForBits(length), out));
+  RETURN_NOT_OK(AllocateBitmap(pool, length, out));
   memset((*out)->mutable_data(), 0, static_cast<size_t>((*out)->size()));
   return Status::OK();
 }
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 66c131413c2d3..306e677619fd7 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -19,7 +19,6 @@
 #define ARROW_BUFFER_H
 
 #include <algorithm>
-#include <array>
 #include <cstdint>
 #include <cstring>
 #include <memory>
@@ -29,7 +28,6 @@
 
 #include "arrow/memory_pool.h"
 #include "arrow/status.h"
-#include "arrow/util/bit-util.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
@@ -40,13 +38,15 @@ namespace arrow {
 
 /// \class Buffer
 /// \brief Object containing a pointer to a piece of contiguous memory with a
-/// particular size. Base class does not own its memory
+/// particular size.
 ///
 /// Buffers have two related notions of length: size and capacity. Size is
 /// the number of bytes that might have valid data. Capacity is the number
-/// of bytes that where allocated for the buffer in total.
+/// of bytes that were allocated for the buffer in total.
 ///
-/// The following invariant is always true: Size < Capacity
+/// The Buffer base class does not own its memory, but subclasses often do.
+///
+/// The following invariant is always true: Size <= Capacity
 class ARROW_EXPORT Buffer {
  public:
   /// \brief Construct from buffer and size without copying memory
@@ -108,7 +108,10 @@ class ARROW_EXPORT Buffer {
 #ifndef NDEBUG
     CheckMutable();
 #endif
-    memset(mutable_data_ + size_, 0, static_cast<size_t>(capacity_ - size_));
+    // A zero-capacity buffer can have a null data pointer
+    if (capacity_ != 0) {
+      memset(mutable_data_ + size_, 0, static_cast<size_t>(capacity_ - size_));
+    }
   }
 
   /// \brief Construct a new buffer that owns its memory from a std::string
@@ -158,9 +161,12 @@ class ARROW_EXPORT Buffer {
   /// \note Can throw std::bad_alloc if buffer is large
   std::string ToString() const;
 
-  int64_t capacity() const { return capacity_; }
+  /// \brief Return a pointer to the buffer's data
   const uint8_t* data() const { return data_; }
-
+  /// \brief Return a writable pointer to the buffer's data
+  ///
+  /// The buffer has to be mutable.  Otherwise, an assertion may be thrown
+  /// or a null pointer may be returned.
   uint8_t* mutable_data() {
 #ifndef NDEBUG
     CheckMutable();
@@ -168,8 +174,12 @@ class ARROW_EXPORT Buffer {
     return mutable_data_;
   }
 
+  /// \brief Return the buffer's size in bytes
   int64_t size() const { return size_; }
 
+  /// \brief Return the buffer's capacity (number of allocated bytes)
+  int64_t capacity() const { return capacity_; }
+
   std::shared_ptr<Buffer> parent() const { return parent_; }
 
  protected:
@@ -188,26 +198,38 @@ class ARROW_EXPORT Buffer {
   ARROW_DISALLOW_COPY_AND_ASSIGN(Buffer);
 };
 
-/// Construct a view on passed buffer at the indicated offset and length. This
-/// function cannot fail and does not error checking (except in debug builds)
+/// \defgroup buffer-slicing-functions Functions for slicing buffers
+///
+/// @{
+
+/// \brief Construct a view on a buffer at the given offset and length.
+///
+/// This function cannot fail and does not check for errors (except in debug builds)
 static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer,
                                                   const int64_t offset,
                                                   const int64_t length) {
   return std::make_shared<Buffer>(buffer, offset, length);
 }
 
+/// \brief Construct a view on a buffer at the given offset, up to the buffer's end.
+///
+/// This function cannot fail and does not check for errors (except in debug builds)
 static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer,
                                                   const int64_t offset) {
   int64_t length = buffer->size() - offset;
   return SliceBuffer(buffer, offset, length);
 }
 
-/// Construct a mutable buffer slice. If the parent buffer is not mutable, this
-/// will abort in debug builds
+/// \brief Like SliceBuffer, but construct a mutable buffer slice.
+///
+/// If the parent buffer is not mutable, behavior is undefined (it may abort
+/// in debug builds).
 ARROW_EXPORT
 std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer,
                                            const int64_t offset, const int64_t length);
 
+/// @}
+
 /// \class MutableBuffer
 /// \brief A Buffer whose contents can be mutated. May or may not own its data.
 class ARROW_EXPORT MutableBuffer : public Buffer {
@@ -241,10 +263,11 @@ class ARROW_EXPORT ResizableBuffer : public MutableBuffer {
   /// Change buffer reported size to indicated size, allocating memory if
   /// necessary.  This will ensure that the capacity of the buffer is a multiple
   /// of 64 bytes as defined in Layout.md.
-  /// Consider using ZeroPadding afterwards, in case you return buffer to a reader.
+  /// Consider using ZeroPadding afterwards, to conform to the Arrow layout
+  /// specification.
   ///
-  /// @param shrink_to_fit On deactivating this option, the capacity of the Buffer won't
-  /// decrease.
+  /// @param new_size The new size for the buffer.
+  /// @param shrink_to_fit Whether to shrink the capacity if new size < current size
   virtual Status Resize(const int64_t new_size, bool shrink_to_fit = true) = 0;
 
   /// Ensure that buffer has enough memory allocated to fit the indicated
@@ -266,6 +289,10 @@ class ARROW_EXPORT ResizableBuffer : public MutableBuffer {
   ResizableBuffer(uint8_t* data, int64_t size) : MutableBuffer(data, size) {}
 };
 
+/// \defgroup buffer-allocation-functions Functions for allocating buffers
+///
+/// @{
+
 /// \brief Allocate a fixed size mutable buffer from a memory pool, zero its padding.
 ///
 /// \param[in] pool a memory pool
@@ -344,7 +371,8 @@ Status AllocateResizableBuffer(const int64_t size, std::shared_ptr<ResizableBuff
 ARROW_EXPORT
 Status AllocateResizableBuffer(const int64_t size, std::unique_ptr<ResizableBuffer>* out);
 
-/// \brief Allocate a zero-initialized bitmap buffer from a memory pool
+/// \brief Allocate a bitmap buffer from a memory pool
+/// no guarantee on values is provided.
 ///
 /// \param[in] pool memory pool to allocate memory from
 /// \param[in] length size in bits of bitmap to allocate
@@ -352,6 +380,16 @@ Status AllocateResizableBuffer(const int64_t size, std::unique_ptr<ResizableBuff
 ///
 /// \return Status message
 ARROW_EXPORT
+Status AllocateBitmap(MemoryPool* pool, int64_t length, std::shared_ptr<Buffer>* out);
+
+/// \brief Allocate a zero-initialized bitmap buffer from a memory pool
+///
+/// \param[in] pool memory pool to allocate memory from
+/// \param[in] length size in bits of bitmap to allocate
+/// \param[out] out the resulting buffer (zero-initialized).
+///
+/// \return Status message
+ARROW_EXPORT
 Status AllocateEmptyBitmap(MemoryPool* pool, int64_t length,
                            std::shared_ptr<Buffer>* out);
 
@@ -364,150 +402,7 @@ Status AllocateEmptyBitmap(MemoryPool* pool, int64_t length,
 ARROW_EXPORT
 Status AllocateEmptyBitmap(int64_t length, std::shared_ptr<Buffer>* out);
 
-// ----------------------------------------------------------------------
-// Buffer builder classes
-
-/// \class BufferBuilder
-/// \brief A class for incrementally building a contiguous chunk of in-memory data
-class ARROW_EXPORT BufferBuilder {
- public:
-  explicit BufferBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
-      : pool_(pool), data_(NULLPTR), capacity_(0), size_(0) {}
-
-  /// \brief Resizes the buffer to the nearest multiple of 64 bytes
-  ///
-  /// \param elements the new capacity of the of the builder. Will be rounded
-  /// up to a multiple of 64 bytes for padding
-  /// \param shrink_to_fit if new capacity smaller than existing size,
-  /// reallocate internal buffer. Set to false to avoid reallocations when
-  /// shrinking the builder
-  /// \return Status
-  Status Resize(const int64_t elements, bool shrink_to_fit = true) {
-    // Resize(0) is a no-op
-    if (elements == 0) {
-      return Status::OK();
-    }
-    int64_t old_capacity = capacity_;
-
-    if (buffer_ == NULLPTR) {
-      ARROW_RETURN_NOT_OK(AllocateResizableBuffer(pool_, elements, &buffer_));
-    } else {
-      ARROW_RETURN_NOT_OK(buffer_->Resize(elements, shrink_to_fit));
-    }
-    capacity_ = buffer_->capacity();
-    data_ = buffer_->mutable_data();
-    if (capacity_ > old_capacity) {
-      memset(data_ + old_capacity, 0, capacity_ - old_capacity);
-    }
-    return Status::OK();
-  }
-
-  /// \brief Ensure that builder can accommodate the additional number of bytes
-  /// without the need to perform allocations
-  ///
-  /// \param size number of additional bytes to make space for
-  /// \return Status
-  Status Reserve(const int64_t size) { return Resize(size_ + size, false); }
-
-  Status Append(const void* data, int64_t length) {
-    if (capacity_ < length + size_) {
-      int64_t new_capacity = BitUtil::NextPower2(length + size_);
-      ARROW_RETURN_NOT_OK(Resize(new_capacity));
-    }
-    UnsafeAppend(data, length);
-    return Status::OK();
-  }
-
-  template <size_t NBYTES>
-  Status Append(const std::array<uint8_t, NBYTES>& data) {
-    constexpr auto nbytes = static_cast<int64_t>(NBYTES);
-    if (capacity_ < nbytes + size_) {
-      int64_t new_capacity = BitUtil::NextPower2(nbytes + size_);
-      ARROW_RETURN_NOT_OK(Resize(new_capacity));
-    }
-
-    std::copy(data.cbegin(), data.cend(), data_ + size_);
-    size_ += nbytes;
-    return Status::OK();
-  }
-
-  // Advance pointer and zero out memory
-  Status Advance(const int64_t length) {
-    if (capacity_ < length + size_) {
-      int64_t new_capacity = BitUtil::NextPower2(length + size_);
-      ARROW_RETURN_NOT_OK(Resize(new_capacity));
-    }
-    memset(data_ + size_, 0, static_cast<size_t>(length));
-    size_ += length;
-    return Status::OK();
-  }
-
-  // Unsafe methods don't check existing size
-  void UnsafeAppend(const void* data, int64_t length) {
-    memcpy(data_ + size_, data, static_cast<size_t>(length));
-    size_ += length;
-  }
-
-  Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
-    ARROW_RETURN_NOT_OK(Resize(size_, shrink_to_fit));
-    *out = buffer_;
-    Reset();
-    return Status::OK();
-  }
-
-  void Reset() {
-    buffer_ = NULLPTR;
-    capacity_ = size_ = 0;
-  }
-
-  int64_t capacity() const { return capacity_; }
-  int64_t length() const { return size_; }
-  const uint8_t* data() const { return data_; }
-
- protected:
-  std::shared_ptr<ResizableBuffer> buffer_;
-  MemoryPool* pool_;
-  uint8_t* data_;
-  int64_t capacity_;
-  int64_t size_;
-};
-
-template <typename T>
-class ARROW_EXPORT TypedBufferBuilder : public BufferBuilder {
- public:
-  explicit TypedBufferBuilder(MemoryPool* pool) : BufferBuilder(pool) {}
-
-  Status Append(T arithmetic_value) {
-    static_assert(std::is_arithmetic<T>::value,
-                  "Convenience buffer append only supports arithmetic types");
-    return BufferBuilder::Append(reinterpret_cast<uint8_t*>(&arithmetic_value),
-                                 sizeof(T));
-  }
-
-  Status Append(const T* arithmetic_values, int64_t num_elements) {
-    static_assert(std::is_arithmetic<T>::value,
-                  "Convenience buffer append only supports arithmetic types");
-    return BufferBuilder::Append(reinterpret_cast<const uint8_t*>(arithmetic_values),
-                                 num_elements * sizeof(T));
-  }
-
-  void UnsafeAppend(T arithmetic_value) {
-    static_assert(std::is_arithmetic<T>::value,
-                  "Convenience buffer append only supports arithmetic types");
-    BufferBuilder::UnsafeAppend(reinterpret_cast<uint8_t*>(&arithmetic_value), sizeof(T));
-  }
-
-  void UnsafeAppend(const T* arithmetic_values, int64_t num_elements) {
-    static_assert(std::is_arithmetic<T>::value,
-                  "Convenience buffer append only supports arithmetic types");
-    BufferBuilder::UnsafeAppend(reinterpret_cast<const uint8_t*>(arithmetic_values),
-                                num_elements * sizeof(T));
-  }
-
-  const T* data() const { return reinterpret_cast<const T*>(data_); }
-  int64_t length() const { return size_ / sizeof(T); }
-  int64_t capacity() const { return capacity_ / sizeof(T); }
-};
+/// @}
 
 }  // namespace arrow
 
diff --git a/cpp/src/arrow/builder-benchmark.cc b/cpp/src/arrow/builder-benchmark.cc
index f96728dcd4fdf..e4a56bf103ef7 100644
--- a/cpp/src/arrow/builder-benchmark.cc
+++ b/cpp/src/arrow/builder-benchmark.cc
@@ -18,6 +18,7 @@
 #include <algorithm>
 #include <cstdint>
 #include <limits>
+#include <numeric>
 #include <random>
 #include <string>
 #include <vector>
@@ -148,7 +149,7 @@ static void BM_BuildBooleanArrayNoNulls(
   constexpr uint8_t bit_pattern = 0xcc;  // 0b11001100
   uint64_t index = 0;
   std::generate(data.begin(), data.end(),
-                [&index]() -> uint8_t { return (bit_pattern >> ((index++) % 8)) & 1; });
+                [&]() -> uint8_t { return (bit_pattern >> ((index++) % 8)) & 1; });
 
   while (state.KeepRunning()) {
     BooleanBuilder builder;
@@ -163,10 +164,11 @@ static void BM_BuildBooleanArrayNoNulls(
 }
 
 static void BM_BuildBinaryArray(benchmark::State& state) {  // NOLINT non-const reference
-  const int64_t iterations = 1 << 20;
-
+  // About 160MB
+  const int64_t iterations = 1 << 24;
   std::string value = "1234567890";
-  while (state.KeepRunning()) {
+
+  for (auto _ : state) {
     BinaryBuilder builder;
     for (int64_t i = 0; i < iterations; i++) {
       ABORT_NOT_OK(builder.Append(value));
@@ -177,6 +179,26 @@ static void BM_BuildBinaryArray(benchmark::State& state) {  // NOLINT non-const
   state.SetBytesProcessed(state.iterations() * iterations * value.size());
 }
 
+static void BM_BuildChunkedBinaryArray(
+    benchmark::State& state) {  // NOLINT non-const reference
+  // About 160MB
+  const int64_t iterations = 1 << 24;
+  std::string value = "1234567890";
+
+  for (auto _ : state) {
+    // 1MB chunks
+    const int32_t chunksize = 1 << 20;
+    internal::ChunkedBinaryBuilder builder(chunksize);
+    for (int64_t i = 0; i < iterations; i++) {
+      ABORT_NOT_OK(builder.Append(reinterpret_cast<const uint8_t*>(value.data()),
+                                  static_cast<int32_t>(value.size())));
+    }
+    ArrayVector out;
+    ABORT_NOT_OK(builder.Finish(&out));
+  }
+  state.SetBytesProcessed(state.iterations() * iterations * value.size());
+}
+
 static void BM_BuildFixedSizeBinaryArray(
     benchmark::State& state) {  // NOLINT non-const reference
   const int64_t iterations = 1 << 20;
@@ -271,13 +293,13 @@ static std::vector<std::string> MakeStringDictFodder(int32_t n_values,
     *it++ = "abcfgh";
     // Add random strings
     std::uniform_int_distribution<int32_t> length_dist(2, 20);
-    std::independent_bits_engine<std::default_random_engine, 8, uint8_t> bytes_gen(42);
+    std::independent_bits_engine<std::default_random_engine, 8, uint16_t> bytes_gen(42);
 
-    std::generate(it, values_dict.end(), [&]() {
+    std::generate(it, values_dict.end(), [&] {
       auto length = length_dist(gen);
       std::string s(length, 'X');
       for (int32_t i = 0; i < length; ++i) {
-        s[i] = bytes_gen();
+        s[i] = static_cast<char>(bytes_gen());
       }
       return s;
     });
@@ -285,7 +307,7 @@ static std::vector<std::string> MakeStringDictFodder(int32_t n_values,
   {
     std::uniform_int_distribution<int32_t> indices_dist(0, n_distinct - 1);
     std::generate(values.begin(), values.end(),
-                  [&]() { return values_dict[indices_dist(gen)]; });
+                  [&] { return values_dict[indices_dist(gen)]; });
   }
   return values;
 }
@@ -328,7 +350,7 @@ static void BM_BuildStringDictionaryArray(
   const auto fodder = MakeStringDictFodder(10000, 100);
   auto type = binary();
   auto fodder_size =
-      std::accumulate(fodder.begin(), fodder.end(), 0,
+      std::accumulate(fodder.begin(), fodder.end(), static_cast<size_t>(0),
                       [&](size_t acc, const std::string& s) { return acc + s.size(); });
 
   while (state.KeepRunning()) {
@@ -371,10 +393,9 @@ BENCHMARK(BM_BuildAdaptiveUIntNoNullsScalarAppend)
     ->Repetitions(kRepetitions)
     ->Unit(benchmark::kMicrosecond);
 
-BENCHMARK(BM_BuildBinaryArray)->Repetitions(kRepetitions)->Unit(benchmark::kMicrosecond);
-BENCHMARK(BM_BuildFixedSizeBinaryArray)
-    ->Repetitions(kRepetitions)
-    ->Unit(benchmark::kMicrosecond);
+BENCHMARK(BM_BuildBinaryArray)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(BM_BuildChunkedBinaryArray)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(BM_BuildFixedSizeBinaryArray)->MinTime(3.0)->Unit(benchmark::kMicrosecond);
 
 BENCHMARK(BM_BuildInt64DictionaryArrayRandom)
     ->Repetitions(kRepetitions)
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 0e10be7ff46f4..2072edc936a3c 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -16,1359 +16,19 @@
 // under the License.
 
 #include "arrow/builder.h"
-#include <algorithm>
-#include <cstddef>
-#include <cstdint>
-#include <cstring>
-#include <numeric>
+
 #include <sstream>
+#include <string>
 #include <utility>
 #include <vector>
 
-#include "arrow/array.h"
-#include "arrow/buffer.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/bit-util.h"
 #include "arrow/util/checked_cast.h"
-#include "arrow/util/decimal.h"
-#include "arrow/util/hashing.h"
-#include "arrow/util/int-util.h"
-#include "arrow/util/logging.h"
 
 namespace arrow {
 
-using internal::AdaptiveIntBuilderBase;
-using internal::checked_cast;
-
-namespace {
-
-Status TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer) {
-  if (buffer) {
-    if (bytes_filled < buffer->size()) {
-      // Trim buffer
-      RETURN_NOT_OK(buffer->Resize(bytes_filled));
-    }
-    // zero the padding
-    buffer->ZeroPadding();
-  } else {
-    // Null buffers are allowed in place of 0-byte buffers
-    DCHECK_EQ(bytes_filled, 0);
-  }
-  return Status::OK();
-}
-
-}  // namespace
-
-Status ArrayBuilder::AppendToBitmap(bool is_valid) {
-  if (length_ == capacity_) {
-    // If the capacity was not already a multiple of 2, do so here
-    // TODO(emkornfield) doubling isn't great default allocation practice
-    // see https://github.com/facebook/folly/blob/master/folly/docs/FBVector.md
-    // fo discussion
-    RETURN_NOT_OK(Resize(BitUtil::NextPower2(capacity_ + 1)));
-  }
-  UnsafeAppendToBitmap(is_valid);
-  return Status::OK();
-}
-
-Status ArrayBuilder::AppendToBitmap(const uint8_t* valid_bytes, int64_t length) {
-  RETURN_NOT_OK(Reserve(length));
-
-  UnsafeAppendToBitmap(valid_bytes, length);
-  return Status::OK();
-}
-
-static inline Status CheckCapacity(int64_t new_capacity, int64_t old_capacity) {
-  if (new_capacity < 0) return Status::Invalid("Resize capacity must be positive");
-  if (new_capacity < old_capacity) return Status::Invalid("Resize cannot downsize");
-
-  return Status::OK();
-}
-
-Status ArrayBuilder::Resize(int64_t capacity) {
-  // Target size of validity (null) bitmap data
-  const int64_t new_bitmap_size = BitUtil::BytesForBits(capacity);
-  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
-
-  if (capacity_ == 0) {
-    RETURN_NOT_OK(AllocateResizableBuffer(pool_, new_bitmap_size, &null_bitmap_));
-    null_bitmap_data_ = null_bitmap_->mutable_data();
-
-    // Padding is zeroed by AllocateResizableBuffer
-    memset(null_bitmap_data_, 0, static_cast<size_t>(new_bitmap_size));
-  } else {
-    const int64_t old_bitmap_capacity = null_bitmap_->capacity();
-    RETURN_NOT_OK(null_bitmap_->Resize(new_bitmap_size));
-
-    const int64_t new_bitmap_capacity = null_bitmap_->capacity();
-    null_bitmap_data_ = null_bitmap_->mutable_data();
-
-    // Zero the region between the original capacity and the new capacity,
-    // including padding, which has not been zeroed, unlike
-    // AllocateResizableBuffer
-    if (old_bitmap_capacity < new_bitmap_capacity) {
-      memset(null_bitmap_data_ + old_bitmap_capacity, 0,
-             static_cast<size_t>(new_bitmap_capacity - old_bitmap_capacity));
-    }
-  }
-  capacity_ = capacity;
-  return Status::OK();
-}
-
-Status ArrayBuilder::Advance(int64_t elements) {
-  if (length_ + elements > capacity_) {
-    return Status::Invalid("Builder must be expanded");
-  }
-  length_ += elements;
-  return Status::OK();
-}
-
-Status ArrayBuilder::Finish(std::shared_ptr<Array>* out) {
-  std::shared_ptr<ArrayData> internal_data;
-  RETURN_NOT_OK(FinishInternal(&internal_data));
-  *out = MakeArray(internal_data);
-  return Status::OK();
-}
-
-Status ArrayBuilder::Reserve(int64_t additional_elements) {
-  if (length_ + additional_elements > capacity_) {
-    // TODO(emkornfield) power of 2 growth is potentially suboptimal
-    int64_t new_size = BitUtil::NextPower2(length_ + additional_elements);
-    return Resize(new_size);
-  }
-  return Status::OK();
-}
-
-void ArrayBuilder::Reset() {
-  capacity_ = length_ = null_count_ = 0;
-  null_bitmap_ = nullptr;
-}
-
-Status ArrayBuilder::SetNotNull(int64_t length) {
-  RETURN_NOT_OK(Reserve(length));
-  UnsafeSetNotNull(length);
-  return Status::OK();
-}
-
-void ArrayBuilder::UnsafeAppendToBitmap(const uint8_t* valid_bytes, int64_t length) {
-  if (valid_bytes == nullptr) {
-    UnsafeSetNotNull(length);
-    return;
-  }
-  UnsafeAppendToBitmap(valid_bytes, valid_bytes + length);
-}
-
-void ArrayBuilder::UnsafeAppendToBitmap(const std::vector<bool>& is_valid) {
-  UnsafeAppendToBitmap(is_valid.begin(), is_valid.end());
-}
-
-void ArrayBuilder::UnsafeSetNotNull(int64_t length) {
-  const int64_t new_length = length + length_;
-
-  // Fill up the bytes until we have a byte alignment
-  int64_t pad_to_byte = std::min<int64_t>(8 - (length_ % 8), length);
-
-  if (pad_to_byte == 8) {
-    pad_to_byte = 0;
-  }
-  for (int64_t i = length_; i < length_ + pad_to_byte; ++i) {
-    BitUtil::SetBit(null_bitmap_data_, i);
-  }
-
-  // Fast bitsetting
-  int64_t fast_length = (length - pad_to_byte) / 8;
-  memset(null_bitmap_data_ + ((length_ + pad_to_byte) / 8), 0xFF,
-         static_cast<size_t>(fast_length));
-
-  // Trailing bits
-  for (int64_t i = length_ + pad_to_byte + (fast_length * 8); i < new_length; ++i) {
-    BitUtil::SetBit(null_bitmap_data_, i);
-  }
-
-  length_ = new_length;
-}
-
-// ----------------------------------------------------------------------
-// Null builder
-
-Status NullBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  *out = ArrayData::Make(null(), length_, {nullptr}, length_);
-  length_ = null_count_ = 0;
-  return Status::OK();
-}
-
-// ----------------------------------------------------------------------
-
-template <typename T>
-void PrimitiveBuilder<T>::Reset() {
-  data_.reset();
-  raw_data_ = nullptr;
-}
-
-template <typename T>
-Status PrimitiveBuilder<T>::Resize(int64_t capacity) {
-  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
-  capacity = std::max(capacity, kMinBuilderCapacity);
-
-  int64_t nbytes = TypeTraits<T>::bytes_required(capacity);
-  if (capacity_ == 0) {
-    RETURN_NOT_OK(AllocateResizableBuffer(pool_, nbytes, &data_));
-  } else {
-    RETURN_NOT_OK(data_->Resize(nbytes));
-  }
-
-  raw_data_ = reinterpret_cast<value_type*>(data_->mutable_data());
-  return ArrayBuilder::Resize(capacity);
-}
-
-template <typename T>
-Status PrimitiveBuilder<T>::AppendValues(const value_type* values, int64_t length,
-                                         const uint8_t* valid_bytes) {
-  RETURN_NOT_OK(Reserve(length));
-
-  if (length > 0) {
-    std::memcpy(raw_data_ + length_, values,
-                static_cast<std::size_t>(TypeTraits<T>::bytes_required(length)));
-  }
-
-  // length_ is update by these
-  ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
-  return Status::OK();
-}
-
-template <typename T>
-Status PrimitiveBuilder<T>::AppendValues(const value_type* values, int64_t length,
-                                         const std::vector<bool>& is_valid) {
-  RETURN_NOT_OK(Reserve(length));
-  DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
-
-  if (length > 0) {
-    std::memcpy(raw_data_ + length_, values,
-                static_cast<std::size_t>(TypeTraits<T>::bytes_required(length)));
-  }
-
-  // length_ is update by these
-  ArrayBuilder::UnsafeAppendToBitmap(is_valid);
-  return Status::OK();
-}
-
-template <typename T>
-Status PrimitiveBuilder<T>::AppendValues(const std::vector<value_type>& values,
-                                         const std::vector<bool>& is_valid) {
-  return AppendValues(values.data(), static_cast<int64_t>(values.size()), is_valid);
-}
-
-template <typename T>
-Status PrimitiveBuilder<T>::AppendValues(const std::vector<value_type>& values) {
-  return AppendValues(values.data(), static_cast<int64_t>(values.size()));
-}
-
-template <typename T>
-Status PrimitiveBuilder<T>::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  RETURN_NOT_OK(TrimBuffer(BitUtil::BytesForBits(length_), null_bitmap_.get()));
-  RETURN_NOT_OK(TrimBuffer(TypeTraits<T>::bytes_required(length_), data_.get()));
-
-  *out = ArrayData::Make(type_, length_, {null_bitmap_, data_}, null_count_);
-
-  data_ = null_bitmap_ = nullptr;
-  capacity_ = length_ = null_count_ = 0;
-
-  return Status::OK();
-}
-
-template class PrimitiveBuilder<UInt8Type>;
-template class PrimitiveBuilder<UInt16Type>;
-template class PrimitiveBuilder<UInt32Type>;
-template class PrimitiveBuilder<UInt64Type>;
-template class PrimitiveBuilder<Int8Type>;
-template class PrimitiveBuilder<Int16Type>;
-template class PrimitiveBuilder<Int32Type>;
-template class PrimitiveBuilder<Int64Type>;
-template class PrimitiveBuilder<Date32Type>;
-template class PrimitiveBuilder<Date64Type>;
-template class PrimitiveBuilder<Time32Type>;
-template class PrimitiveBuilder<Time64Type>;
-template class PrimitiveBuilder<TimestampType>;
-template class PrimitiveBuilder<HalfFloatType>;
-template class PrimitiveBuilder<FloatType>;
-template class PrimitiveBuilder<DoubleType>;
-
-AdaptiveIntBuilderBase::AdaptiveIntBuilderBase(MemoryPool* pool)
-    : ArrayBuilder(int64(), pool),
-      data_(nullptr),
-      raw_data_(nullptr),
-      int_size_(1),
-      pending_pos_(0),
-      pending_has_nulls_(false) {}
-
-void AdaptiveIntBuilderBase::Reset() {
-  ArrayBuilder::Reset();
-  data_.reset();
-  raw_data_ = nullptr;
-  pending_pos_ = 0;
-  pending_has_nulls_ = false;
-}
-
-Status AdaptiveIntBuilderBase::Resize(int64_t capacity) {
-  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
-  capacity = std::max(capacity, kMinBuilderCapacity);
-
-  int64_t nbytes = capacity * int_size_;
-  if (capacity_ == 0) {
-    RETURN_NOT_OK(AllocateResizableBuffer(pool_, nbytes, &data_));
-  } else {
-    RETURN_NOT_OK(data_->Resize(nbytes));
-  }
-  raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
-
-  return ArrayBuilder::Resize(capacity);
-}
-
-AdaptiveIntBuilder::AdaptiveIntBuilder(MemoryPool* pool) : AdaptiveIntBuilderBase(pool) {}
-
-Status AdaptiveIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  RETURN_NOT_OK(CommitPendingData());
-
-  std::shared_ptr<DataType> output_type;
-  switch (int_size_) {
-    case 1:
-      output_type = int8();
-      break;
-    case 2:
-      output_type = int16();
-      break;
-    case 4:
-      output_type = int32();
-      break;
-    case 8:
-      output_type = int64();
-      break;
-    default:
-      DCHECK(false);
-      return Status::NotImplemented("Only ints of size 1,2,4,8 are supported");
-  }
-
-  RETURN_NOT_OK(TrimBuffer(BitUtil::BytesForBits(length_), null_bitmap_.get()));
-  RETURN_NOT_OK(TrimBuffer(length_ * int_size_, data_.get()));
-
-  *out = ArrayData::Make(output_type, length_, {null_bitmap_, data_}, null_count_);
-
-  data_ = null_bitmap_ = nullptr;
-  capacity_ = length_ = null_count_ = 0;
-  return Status::OK();
-}
-
-Status AdaptiveIntBuilder::CommitPendingData() {
-  if (pending_pos_ == 0) {
-    return Status::OK();
-  }
-  RETURN_NOT_OK(Reserve(pending_pos_));
-  const uint8_t* valid_bytes = pending_has_nulls_ ? pending_valid_ : nullptr;
-  RETURN_NOT_OK(AppendValuesInternal(reinterpret_cast<const int64_t*>(pending_data_),
-                                     pending_pos_, valid_bytes));
-  pending_has_nulls_ = false;
-  pending_pos_ = 0;
-  return Status::OK();
-}
-
-static constexpr int64_t kAdaptiveIntChunkSize = 8192;
-
-Status AdaptiveIntBuilder::AppendValuesInternal(const int64_t* values, int64_t length,
-                                                const uint8_t* valid_bytes) {
-  while (length > 0) {
-    // In case `length` is very large, we don't want to trash the cache by
-    // scanning it twice (first to detect int width, second to copy the data).
-    // Instead, process data in L2-cacheable chunks.
-    const int64_t chunk_size = std::min(length, kAdaptiveIntChunkSize);
-
-    uint8_t new_int_size;
-    new_int_size = internal::DetectIntWidth(values, valid_bytes, chunk_size, int_size_);
-
-    DCHECK_GE(new_int_size, int_size_);
-    if (new_int_size > int_size_) {
-      // This updates int_size_
-      RETURN_NOT_OK(ExpandIntSize(new_int_size));
-    }
-
-    switch (int_size_) {
-      case 1:
-        internal::DowncastInts(values, reinterpret_cast<int8_t*>(raw_data_) + length_,
-                               chunk_size);
-        break;
-      case 2:
-        internal::DowncastInts(values, reinterpret_cast<int16_t*>(raw_data_) + length_,
-                               chunk_size);
-        break;
-      case 4:
-        internal::DowncastInts(values, reinterpret_cast<int32_t*>(raw_data_) + length_,
-                               chunk_size);
-        break;
-      case 8:
-        internal::DowncastInts(values, reinterpret_cast<int64_t*>(raw_data_) + length_,
-                               chunk_size);
-        break;
-      default:
-        DCHECK(false);
-    }
-
-    // This updates length_
-    ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, chunk_size);
-    values += chunk_size;
-    if (valid_bytes != nullptr) {
-      valid_bytes += chunk_size;
-    }
-    length -= chunk_size;
-  }
-
-  return Status::OK();
-}
-
-Status AdaptiveUIntBuilder::CommitPendingData() {
-  if (pending_pos_ == 0) {
-    return Status::OK();
-  }
-  RETURN_NOT_OK(Reserve(pending_pos_));
-  const uint8_t* valid_bytes = pending_has_nulls_ ? pending_valid_ : nullptr;
-  RETURN_NOT_OK(AppendValuesInternal(pending_data_, pending_pos_, valid_bytes));
-  pending_has_nulls_ = false;
-  pending_pos_ = 0;
-  return Status::OK();
-}
-
-Status AdaptiveIntBuilder::AppendValues(const int64_t* values, int64_t length,
-                                        const uint8_t* valid_bytes) {
-  RETURN_NOT_OK(CommitPendingData());
-  RETURN_NOT_OK(Reserve(length));
-
-  return AppendValuesInternal(values, length, valid_bytes);
-}
-
-template <typename new_type, typename old_type>
-typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
-AdaptiveIntBuilder::ExpandIntSizeInternal() {
-  return Status::OK();
-}
-
-#define __LESS(a, b) (a) < (b)
-template <typename new_type, typename old_type>
-typename std::enable_if<__LESS(sizeof(old_type), sizeof(new_type)), Status>::type
-AdaptiveIntBuilder::ExpandIntSizeInternal() {
-  int_size_ = sizeof(new_type);
-  RETURN_NOT_OK(Resize(data_->size() / sizeof(old_type)));
-  raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
-  const old_type* src = reinterpret_cast<old_type*>(raw_data_);
-  new_type* dst = reinterpret_cast<new_type*>(raw_data_);
-
-  // By doing the backward copy, we ensure that no element is overriden during
-  // the copy process and the copy stays in-place.
-  std::copy_backward(src, src + length_, dst + length_);
-
-  return Status::OK();
-}
-#undef __LESS
-
-template <typename new_type>
-Status AdaptiveIntBuilder::ExpandIntSizeN() {
-  switch (int_size_) {
-    case 1:
-      RETURN_NOT_OK((ExpandIntSizeInternal<new_type, int8_t>()));
-      break;
-    case 2:
-      RETURN_NOT_OK((ExpandIntSizeInternal<new_type, int16_t>()));
-      break;
-    case 4:
-      RETURN_NOT_OK((ExpandIntSizeInternal<new_type, int32_t>()));
-      break;
-    case 8:
-      RETURN_NOT_OK((ExpandIntSizeInternal<new_type, int64_t>()));
-      break;
-    default:
-      DCHECK(false);
-  }
-  return Status::OK();
-}
-
-Status AdaptiveIntBuilder::ExpandIntSize(uint8_t new_int_size) {
-  switch (new_int_size) {
-    case 1:
-      RETURN_NOT_OK((ExpandIntSizeN<int8_t>()));
-      break;
-    case 2:
-      RETURN_NOT_OK((ExpandIntSizeN<int16_t>()));
-      break;
-    case 4:
-      RETURN_NOT_OK((ExpandIntSizeN<int32_t>()));
-      break;
-    case 8:
-      RETURN_NOT_OK((ExpandIntSizeN<int64_t>()));
-      break;
-    default:
-      DCHECK(false);
-  }
-  return Status::OK();
-}
-
-AdaptiveUIntBuilder::AdaptiveUIntBuilder(MemoryPool* pool)
-    : AdaptiveIntBuilderBase(pool) {}
-
-Status AdaptiveUIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  RETURN_NOT_OK(CommitPendingData());
-
-  std::shared_ptr<DataType> output_type;
-  switch (int_size_) {
-    case 1:
-      output_type = uint8();
-      break;
-    case 2:
-      output_type = uint16();
-      break;
-    case 4:
-      output_type = uint32();
-      break;
-    case 8:
-      output_type = uint64();
-      break;
-    default:
-      DCHECK(false);
-      return Status::NotImplemented("Only ints of size 1,2,4,8 are supported");
-  }
-
-  RETURN_NOT_OK(TrimBuffer(BitUtil::BytesForBits(length_), null_bitmap_.get()));
-  RETURN_NOT_OK(TrimBuffer(length_ * int_size_, data_.get()));
-
-  *out = ArrayData::Make(output_type, length_, {null_bitmap_, data_}, null_count_);
-
-  data_ = null_bitmap_ = nullptr;
-  capacity_ = length_ = null_count_ = 0;
-  return Status::OK();
-}
-
-Status AdaptiveUIntBuilder::AppendValuesInternal(const uint64_t* values, int64_t length,
-                                                 const uint8_t* valid_bytes) {
-  while (length > 0) {
-    // See AdaptiveIntBuilder::AppendValuesInternal
-    const int64_t chunk_size = std::min(length, kAdaptiveIntChunkSize);
-
-    uint8_t new_int_size;
-    new_int_size = internal::DetectUIntWidth(values, valid_bytes, chunk_size, int_size_);
-
-    DCHECK_GE(new_int_size, int_size_);
-    if (new_int_size > int_size_) {
-      // This updates int_size_
-      RETURN_NOT_OK(ExpandIntSize(new_int_size));
-    }
-
-    switch (int_size_) {
-      case 1:
-        internal::DowncastUInts(values, reinterpret_cast<uint8_t*>(raw_data_) + length_,
-                                chunk_size);
-        break;
-      case 2:
-        internal::DowncastUInts(values, reinterpret_cast<uint16_t*>(raw_data_) + length_,
-                                chunk_size);
-        break;
-      case 4:
-        internal::DowncastUInts(values, reinterpret_cast<uint32_t*>(raw_data_) + length_,
-                                chunk_size);
-        break;
-      case 8:
-        internal::DowncastUInts(values, reinterpret_cast<uint64_t*>(raw_data_) + length_,
-                                chunk_size);
-        break;
-      default:
-        DCHECK(false);
-    }
-
-    // This updates length_
-    ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, chunk_size);
-    values += chunk_size;
-    if (valid_bytes != nullptr) {
-      valid_bytes += chunk_size;
-    }
-    length -= chunk_size;
-  }
-
-  return Status::OK();
-}
-
-Status AdaptiveUIntBuilder::AppendValues(const uint64_t* values, int64_t length,
-                                         const uint8_t* valid_bytes) {
-  RETURN_NOT_OK(Reserve(length));
-
-  return AppendValuesInternal(values, length, valid_bytes);
-}
-
-template <typename new_type, typename old_type>
-typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
-AdaptiveUIntBuilder::ExpandIntSizeInternal() {
-  return Status::OK();
-}
-
-#define __LESS(a, b) (a) < (b)
-template <typename new_type, typename old_type>
-typename std::enable_if<__LESS(sizeof(old_type), sizeof(new_type)), Status>::type
-AdaptiveUIntBuilder::ExpandIntSizeInternal() {
-  int_size_ = sizeof(new_type);
-  RETURN_NOT_OK(Resize(data_->size() / sizeof(old_type)));
-
-  old_type* src = reinterpret_cast<old_type*>(raw_data_);
-  new_type* dst = reinterpret_cast<new_type*>(raw_data_);
-  // By doing the backward copy, we ensure that no element is overriden during
-  // the copy process and the copy stays in-place.
-  std::copy_backward(src, src + length_, dst + length_);
-
-  return Status::OK();
-}
-#undef __LESS
-
-template <typename new_type>
-Status AdaptiveUIntBuilder::ExpandIntSizeN() {
-  switch (int_size_) {
-    case 1:
-      RETURN_NOT_OK((ExpandIntSizeInternal<new_type, uint8_t>()));
-      break;
-    case 2:
-      RETURN_NOT_OK((ExpandIntSizeInternal<new_type, uint16_t>()));
-      break;
-    case 4:
-      RETURN_NOT_OK((ExpandIntSizeInternal<new_type, uint32_t>()));
-      break;
-    case 8:
-      RETURN_NOT_OK((ExpandIntSizeInternal<new_type, uint64_t>()));
-      break;
-    default:
-      DCHECK(false);
-  }
-  return Status::OK();
-}
-
-Status AdaptiveUIntBuilder::ExpandIntSize(uint8_t new_int_size) {
-  switch (new_int_size) {
-    case 1:
-      RETURN_NOT_OK((ExpandIntSizeN<uint8_t>()));
-      break;
-    case 2:
-      RETURN_NOT_OK((ExpandIntSizeN<uint16_t>()));
-      break;
-    case 4:
-      RETURN_NOT_OK((ExpandIntSizeN<uint32_t>()));
-      break;
-    case 8:
-      RETURN_NOT_OK((ExpandIntSizeN<uint64_t>()));
-      break;
-    default:
-      DCHECK(false);
-  }
-  return Status::OK();
-}
-
-BooleanBuilder::BooleanBuilder(MemoryPool* pool)
-    : ArrayBuilder(boolean(), pool), data_(nullptr), raw_data_(nullptr) {}
-
-BooleanBuilder::BooleanBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
-    : BooleanBuilder(pool) {
-  DCHECK_EQ(Type::BOOL, type->id());
-}
-
-void BooleanBuilder::Reset() {
-  ArrayBuilder::Reset();
-  data_.reset();
-  raw_data_ = nullptr;
-}
-
-Status BooleanBuilder::Resize(int64_t capacity) {
-  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
-  capacity = std::max(capacity, kMinBuilderCapacity);
-
-  const int64_t new_bitmap_size = BitUtil::BytesForBits(capacity);
-  if (capacity_ == 0) {
-    RETURN_NOT_OK(AllocateResizableBuffer(pool_, new_bitmap_size, &data_));
-    raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
-
-    // We zero the memory for booleans to keep things simple; for some reason if
-    // we do not, even though we may write every bit (through in-place | or &),
-    // valgrind will still show a warning. If we do not zero the bytes here, we
-    // will have to be careful to zero them in AppendNull and AppendNulls. Also,
-    // zeroing the bits results in deterministic bits when each byte may have a
-    // mix of nulls and not nulls.
-    //
-    // We only zero up to new_bitmap_size because the padding was zeroed by
-    // AllocateResizableBuffer
-    memset(raw_data_, 0, static_cast<size_t>(new_bitmap_size));
-  } else {
-    const int64_t old_bitmap_capacity = data_->capacity();
-    RETURN_NOT_OK(data_->Resize(new_bitmap_size));
-    const int64_t new_bitmap_capacity = data_->capacity();
-    raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
-
-    // See comment above about why we zero memory for booleans
-    memset(raw_data_ + old_bitmap_capacity, 0,
-           static_cast<size_t>(new_bitmap_capacity - old_bitmap_capacity));
-  }
-
-  return ArrayBuilder::Resize(capacity);
-}
-
-Status BooleanBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  int64_t bit_offset = length_ % 8;
-  if (bit_offset > 0) {
-    // Adjust last byte
-    data_->mutable_data()[length_ / 8] &= BitUtil::kPrecedingBitmask[bit_offset];
-  }
-
-  RETURN_NOT_OK(TrimBuffer(BitUtil::BytesForBits(length_), null_bitmap_.get()));
-  RETURN_NOT_OK(TrimBuffer(BitUtil::BytesForBits(length_), data_.get()));
-
-  *out = ArrayData::Make(boolean(), length_, {null_bitmap_, data_}, null_count_);
-
-  data_ = null_bitmap_ = nullptr;
-  capacity_ = length_ = null_count_ = 0;
-  return Status::OK();
-}
-
-Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length,
-                                    const uint8_t* valid_bytes) {
-  RETURN_NOT_OK(Reserve(length));
-
-  int64_t i = 0;
-  internal::GenerateBitsUnrolled(raw_data_, length_, length,
-                                 [values, &i]() -> bool { return values[i++] != 0; });
-
-  // this updates length_
-  ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
-  return Status::OK();
-}
-
-Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length,
-                                    const std::vector<bool>& is_valid) {
-  RETURN_NOT_OK(Reserve(length));
-  DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
-
-  int64_t i = 0;
-  internal::GenerateBitsUnrolled(raw_data_, length_, length,
-                                 [values, &i]() -> bool { return values[i++]; });
-
-  // this updates length_
-  ArrayBuilder::UnsafeAppendToBitmap(is_valid);
-  return Status::OK();
-}
-
-Status BooleanBuilder::AppendValues(const std::vector<uint8_t>& values,
-                                    const std::vector<bool>& is_valid) {
-  return AppendValues(values.data(), static_cast<int64_t>(values.size()), is_valid);
-}
-
-Status BooleanBuilder::AppendValues(const std::vector<uint8_t>& values) {
-  return AppendValues(values.data(), static_cast<int64_t>(values.size()));
-}
-
-Status BooleanBuilder::AppendValues(const std::vector<bool>& values,
-                                    const std::vector<bool>& is_valid) {
-  const int64_t length = static_cast<int64_t>(values.size());
-  RETURN_NOT_OK(Reserve(length));
-  DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
-
-  int64_t i = 0;
-  internal::GenerateBitsUnrolled(raw_data_, length_, length,
-                                 [&values, &i]() -> bool { return values[i++]; });
-
-  // this updates length_
-  ArrayBuilder::UnsafeAppendToBitmap(is_valid);
-  return Status::OK();
-}
-
-Status BooleanBuilder::AppendValues(const std::vector<bool>& values) {
-  const int64_t length = static_cast<int64_t>(values.size());
-  RETURN_NOT_OK(Reserve(length));
-
-  int64_t i = 0;
-  internal::GenerateBitsUnrolled(raw_data_, length_, length,
-                                 [&values, &i]() -> bool { return values[i++]; });
-
-  // this updates length_
-  ArrayBuilder::UnsafeSetNotNull(length);
-  return Status::OK();
-}
-
-// ----------------------------------------------------------------------
-// DictionaryBuilder
-
-template <typename T>
-class DictionaryBuilder<T>::MemoTableImpl
-    : public internal::HashTraits<T>::MemoTableType {
- public:
-  using MemoTableType = typename internal::HashTraits<T>::MemoTableType;
-  using MemoTableType::MemoTableType;
-};
-
-template <typename T>
-DictionaryBuilder<T>::~DictionaryBuilder() {}
-
-template <typename T>
-DictionaryBuilder<T>::DictionaryBuilder(const std::shared_ptr<DataType>& type,
-                                        MemoryPool* pool)
-    : ArrayBuilder(type, pool), byte_width_(-1), values_builder_(pool) {
-  DCHECK_EQ(T::type_id, type->id()) << "inconsistent type passed to DictionaryBuilder";
-}
-
-DictionaryBuilder<NullType>::DictionaryBuilder(const std::shared_ptr<DataType>& type,
-                                               MemoryPool* pool)
-    : ArrayBuilder(type, pool), values_builder_(pool) {
-  DCHECK_EQ(Type::NA, type->id()) << "inconsistent type passed to DictionaryBuilder";
-}
-
-template <>
-DictionaryBuilder<FixedSizeBinaryType>::DictionaryBuilder(
-    const std::shared_ptr<DataType>& type, MemoryPool* pool)
-    : ArrayBuilder(type, pool),
-      byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()) {}
-
-template <typename T>
-void DictionaryBuilder<T>::Reset() {
-  ArrayBuilder::Reset();
-  values_builder_.Reset();
-  memo_table_.reset();
-  delta_offset_ = 0;
-}
-
-template <typename T>
-Status DictionaryBuilder<T>::Resize(int64_t capacity) {
-  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
-  capacity = std::max(capacity, kMinBuilderCapacity);
-
-  if (capacity_ == 0) {
-    // Initialize hash table
-    // XXX should we let the user pass additional size heuristics?
-    memo_table_.reset(new MemoTableImpl(0));
-    delta_offset_ = 0;
-  }
-  RETURN_NOT_OK(values_builder_.Resize(capacity));
-  return ArrayBuilder::Resize(capacity);
-}
-
-Status DictionaryBuilder<NullType>::Resize(int64_t capacity) {
-  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
-  capacity = std::max(capacity, kMinBuilderCapacity);
-
-  RETURN_NOT_OK(values_builder_.Resize(capacity));
-  return ArrayBuilder::Resize(capacity);
-}
-
-template <typename T>
-Status DictionaryBuilder<T>::Append(const Scalar& value) {
-  RETURN_NOT_OK(Reserve(1));
-
-  auto memo_index = memo_table_->GetOrInsert(value);
-  RETURN_NOT_OK(values_builder_.Append(memo_index));
-
-  return Status::OK();
-}
-
-template <typename T>
-Status DictionaryBuilder<T>::AppendNull() {
-  return values_builder_.AppendNull();
-}
-
-Status DictionaryBuilder<NullType>::AppendNull() { return values_builder_.AppendNull(); }
-
-template <typename T>
-Status DictionaryBuilder<T>::AppendArray(const Array& array) {
-  const auto& numeric_array = checked_cast<const NumericArray<T>&>(array);
-  for (int64_t i = 0; i < array.length(); i++) {
-    if (array.IsNull(i)) {
-      RETURN_NOT_OK(AppendNull());
-    } else {
-      RETURN_NOT_OK(Append(numeric_array.Value(i)));
-    }
-  }
-  return Status::OK();
-}
-
-Status DictionaryBuilder<NullType>::AppendArray(const Array& array) {
-  for (int64_t i = 0; i < array.length(); i++) {
-    RETURN_NOT_OK(AppendNull());
-  }
-  return Status::OK();
-}
-
-template <typename T>
-Status DictionaryBuilder<T>::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  // Finalize indices array
-  RETURN_NOT_OK(values_builder_.FinishInternal(out));
-
-  // Generate dictionary array from hash table contents
-  std::shared_ptr<Array> dictionary;
-  std::shared_ptr<ArrayData> dictionary_data;
-
-  RETURN_NOT_OK(internal::DictionaryTraits<T>::GetDictionaryArrayData(
-      pool_, type_, *memo_table_, delta_offset_, &dictionary_data));
-  dictionary = MakeArray(dictionary_data);
-
-  // Set type of array data to the right dictionary type
-  (*out)->type = std::make_shared<DictionaryType>((*out)->type, dictionary);
-
-  // Update internals for further uses of this DictionaryBuilder
-  delta_offset_ = memo_table_->size();
-  values_builder_.Reset();
-
-  return Status::OK();
-}
-
-Status DictionaryBuilder<NullType>::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  std::shared_ptr<Array> dictionary = std::make_shared<NullArray>(0);
-
-  RETURN_NOT_OK(values_builder_.FinishInternal(out));
-  (*out)->type = std::make_shared<DictionaryType>((*out)->type, dictionary);
-
-  return Status::OK();
-}
-
-//
-// StringType and BinaryType specializations
-//
-
-#define BINARY_DICTIONARY_SPECIALIZATIONS(Type)                            \
-                                                                           \
-  template <>                                                              \
-  Status DictionaryBuilder<Type>::AppendArray(const Array& array) {        \
-    using ArrayType = typename TypeTraits<Type>::ArrayType;                \
-    const ArrayType& binary_array = checked_cast<const ArrayType&>(array); \
-    for (int64_t i = 0; i < array.length(); i++) {                         \
-      if (array.IsNull(i)) {                                               \
-        RETURN_NOT_OK(AppendNull());                                       \
-      } else {                                                             \
-        RETURN_NOT_OK(Append(binary_array.GetView(i)));                    \
-      }                                                                    \
-    }                                                                      \
-    return Status::OK();                                                   \
-  }
-
-BINARY_DICTIONARY_SPECIALIZATIONS(StringType);
-BINARY_DICTIONARY_SPECIALIZATIONS(BinaryType);
-
-template <>
-Status DictionaryBuilder<FixedSizeBinaryType>::AppendArray(const Array& array) {
-  if (!type_->Equals(*array.type())) {
-    return Status::Invalid("Cannot append FixedSizeBinary array with non-matching type");
-  }
-
-  const auto& typed_array = checked_cast<const FixedSizeBinaryArray&>(array);
-  for (int64_t i = 0; i < array.length(); i++) {
-    if (array.IsNull(i)) {
-      RETURN_NOT_OK(AppendNull());
-    } else {
-      RETURN_NOT_OK(Append(typed_array.GetValue(i)));
-    }
-  }
-  return Status::OK();
-}
-
-template class DictionaryBuilder<UInt8Type>;
-template class DictionaryBuilder<UInt16Type>;
-template class DictionaryBuilder<UInt32Type>;
-template class DictionaryBuilder<UInt64Type>;
-template class DictionaryBuilder<Int8Type>;
-template class DictionaryBuilder<Int16Type>;
-template class DictionaryBuilder<Int32Type>;
-template class DictionaryBuilder<Int64Type>;
-template class DictionaryBuilder<Date32Type>;
-template class DictionaryBuilder<Date64Type>;
-template class DictionaryBuilder<Time32Type>;
-template class DictionaryBuilder<Time64Type>;
-template class DictionaryBuilder<TimestampType>;
-template class DictionaryBuilder<FloatType>;
-template class DictionaryBuilder<DoubleType>;
-template class DictionaryBuilder<FixedSizeBinaryType>;
-template class DictionaryBuilder<BinaryType>;
-template class DictionaryBuilder<StringType>;
-
-// ----------------------------------------------------------------------
-// Decimal128Builder
-
-Decimal128Builder::Decimal128Builder(const std::shared_ptr<DataType>& type,
-                                     MemoryPool* pool)
-    : FixedSizeBinaryBuilder(type, pool) {}
-
-Status Decimal128Builder::Append(const Decimal128& value) {
-  RETURN_NOT_OK(FixedSizeBinaryBuilder::Reserve(1));
-  return FixedSizeBinaryBuilder::Append(value.ToBytes());
-}
-
-Status Decimal128Builder::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  std::shared_ptr<Buffer> data;
-  RETURN_NOT_OK(byte_builder_.Finish(&data));
-
-  *out = ArrayData::Make(type_, length_, {null_bitmap_, data}, null_count_);
-
-  return Status::OK();
-}
-
-// ----------------------------------------------------------------------
-// ListBuilder
-
-ListBuilder::ListBuilder(MemoryPool* pool,
-                         std::shared_ptr<ArrayBuilder> const& value_builder,
-                         const std::shared_ptr<DataType>& type)
-    : ArrayBuilder(type ? type
-                        : std::static_pointer_cast<DataType>(
-                              std::make_shared<ListType>(value_builder->type())),
-                   pool),
-      offsets_builder_(pool),
-      value_builder_(value_builder) {}
-
-Status ListBuilder::AppendValues(const int32_t* offsets, int64_t length,
-                                 const uint8_t* valid_bytes) {
-  RETURN_NOT_OK(Reserve(length));
-  UnsafeAppendToBitmap(valid_bytes, length);
-  offsets_builder_.UnsafeAppend(offsets, length);
-  return Status::OK();
-}
-
-Status ListBuilder::AppendNextOffset() {
-  int64_t num_values = value_builder_->length();
-  if (ARROW_PREDICT_FALSE(num_values > kListMaximumElements)) {
-    std::stringstream ss;
-    ss << "ListArray cannot contain more then INT32_MAX - 1 child elements,"
-       << " have " << num_values;
-    return Status::CapacityError(ss.str());
-  }
-  return offsets_builder_.Append(static_cast<int32_t>(num_values));
-}
-
-Status ListBuilder::Append(bool is_valid) {
-  RETURN_NOT_OK(Reserve(1));
-  UnsafeAppendToBitmap(is_valid);
-  return AppendNextOffset();
-}
-
-Status ListBuilder::Resize(int64_t capacity) {
-  DCHECK_LE(capacity, kListMaximumElements);
-  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
-
-  // one more then requested for offsets
-  RETURN_NOT_OK(offsets_builder_.Resize((capacity + 1) * sizeof(int32_t)));
-  return ArrayBuilder::Resize(capacity);
-}
-
-Status ListBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  RETURN_NOT_OK(AppendNextOffset());
-
-  // Offset padding zeroed by BufferBuilder
-  std::shared_ptr<Buffer> offsets;
-  RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
-
-  std::shared_ptr<ArrayData> items;
-  if (values_) {
-    items = values_->data();
-  } else {
-    if (value_builder_->length() == 0) {
-      // Try to make sure we get a non-null values buffer (ARROW-2744)
-      RETURN_NOT_OK(value_builder_->Resize(0));
-    }
-    RETURN_NOT_OK(value_builder_->FinishInternal(&items));
-  }
-
-  *out = ArrayData::Make(type_, length_, {null_bitmap_, offsets}, null_count_);
-  (*out)->child_data.emplace_back(std::move(items));
-  Reset();
-  return Status::OK();
-}
-
-void ListBuilder::Reset() {
-  ArrayBuilder::Reset();
-  values_.reset();
-  offsets_builder_.Reset();
-  value_builder_->Reset();
-}
-
-ArrayBuilder* ListBuilder::value_builder() const {
-  DCHECK(!values_) << "Using value builder is pointless when values_ is set";
-  return value_builder_.get();
-}
-
-// ----------------------------------------------------------------------
-// String and binary
-
-BinaryBuilder::BinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
-    : ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool) {}
-
-BinaryBuilder::BinaryBuilder(MemoryPool* pool) : BinaryBuilder(binary(), pool) {}
-
-Status BinaryBuilder::Resize(int64_t capacity) {
-  DCHECK_LE(capacity, kListMaximumElements);
-  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
-
-  // one more then requested for offsets
-  RETURN_NOT_OK(offsets_builder_.Resize((capacity + 1) * sizeof(int32_t)));
-  return ArrayBuilder::Resize(capacity);
-}
-
-Status BinaryBuilder::ReserveData(int64_t elements) {
-  if (value_data_length() + elements > value_data_capacity()) {
-    if (value_data_length() + elements > kBinaryMemoryLimit) {
-      return Status::CapacityError(
-          "Cannot reserve capacity larger than 2^31 - 1 for binary");
-    }
-    RETURN_NOT_OK(value_data_builder_.Reserve(elements));
-  }
-  return Status::OK();
-}
-
-Status BinaryBuilder::AppendNextOffset() {
-  const int64_t num_bytes = value_data_builder_.length();
-  if (ARROW_PREDICT_FALSE(num_bytes > kBinaryMemoryLimit)) {
-    std::stringstream ss;
-    ss << "BinaryArray cannot contain more than " << kBinaryMemoryLimit << " bytes, have "
-       << num_bytes;
-    return Status::CapacityError(ss.str());
-  }
-  return offsets_builder_.Append(static_cast<int32_t>(num_bytes));
-}
-
-Status BinaryBuilder::Append(const uint8_t* value, int32_t length) {
-  RETURN_NOT_OK(Reserve(1));
-  RETURN_NOT_OK(AppendNextOffset());
-  RETURN_NOT_OK(value_data_builder_.Append(value, length));
-
-  UnsafeAppendToBitmap(true);
-  return Status::OK();
-}
-
-Status BinaryBuilder::AppendNull() {
-  RETURN_NOT_OK(AppendNextOffset());
-  RETURN_NOT_OK(Reserve(1));
-
-  UnsafeAppendToBitmap(false);
-  return Status::OK();
-}
-
-Status BinaryBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  // Write final offset (values length)
-  RETURN_NOT_OK(AppendNextOffset());
-
-  // These buffers' padding zeroed by BufferBuilder
-  std::shared_ptr<Buffer> offsets, value_data;
-  RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
-  RETURN_NOT_OK(value_data_builder_.Finish(&value_data));
-
-  *out = ArrayData::Make(type_, length_, {null_bitmap_, offsets, value_data}, null_count_,
-                         0);
-  Reset();
-  return Status::OK();
-}
-
-void BinaryBuilder::Reset() {
-  ArrayBuilder::Reset();
-  offsets_builder_.Reset();
-  value_data_builder_.Reset();
-}
-
-const uint8_t* BinaryBuilder::GetValue(int64_t i, int32_t* out_length) const {
-  const int32_t* offsets = offsets_builder_.data();
-  int32_t offset = offsets[i];
-  if (i == (length_ - 1)) {
-    *out_length = static_cast<int32_t>(value_data_builder_.length()) - offset;
-  } else {
-    *out_length = offsets[i + 1] - offset;
-  }
-  return value_data_builder_.data() + offset;
-}
-
-util::string_view BinaryBuilder::GetView(int64_t i) const {
-  const int32_t* offsets = offsets_builder_.data();
-  int32_t offset = offsets[i];
-  int32_t value_length;
-  if (i == (length_ - 1)) {
-    value_length = static_cast<int32_t>(value_data_builder_.length()) - offset;
-  } else {
-    value_length = offsets[i + 1] - offset;
-  }
-  return util::string_view(
-      reinterpret_cast<const char*>(value_data_builder_.data() + offset), value_length);
-}
-
-StringBuilder::StringBuilder(MemoryPool* pool) : BinaryBuilder(utf8(), pool) {}
-
-Status StringBuilder::AppendValues(const std::vector<std::string>& values,
-                                   const uint8_t* valid_bytes) {
-  std::size_t total_length = std::accumulate(
-      values.begin(), values.end(), 0ULL,
-      [](uint64_t sum, const std::string& str) { return sum + str.size(); });
-  RETURN_NOT_OK(Reserve(values.size()));
-  RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
-  RETURN_NOT_OK(offsets_builder_.Reserve(values.size()));
-
-  if (valid_bytes) {
-    for (std::size_t i = 0; i < values.size(); ++i) {
-      RETURN_NOT_OK(AppendNextOffset());
-      if (valid_bytes[i]) {
-        RETURN_NOT_OK(value_data_builder_.Append(
-            reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size()));
-      }
-    }
-  } else {
-    for (std::size_t i = 0; i < values.size(); ++i) {
-      RETURN_NOT_OK(AppendNextOffset());
-      RETURN_NOT_OK(value_data_builder_.Append(
-          reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size()));
-    }
-  }
-
-  UnsafeAppendToBitmap(valid_bytes, values.size());
-  return Status::OK();
-}
-
-Status StringBuilder::AppendValues(const char** values, int64_t length,
-                                   const uint8_t* valid_bytes) {
-  std::size_t total_length = 0;
-  std::vector<std::size_t> value_lengths(length);
-  bool have_null_value = false;
-  for (int64_t i = 0; i < length; ++i) {
-    if (values[i]) {
-      auto value_length = strlen(values[i]);
-      value_lengths[i] = value_length;
-      total_length += value_length;
-    } else {
-      have_null_value = true;
-    }
-  }
-  RETURN_NOT_OK(Reserve(length));
-  RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
-  RETURN_NOT_OK(offsets_builder_.Reserve(length));
-
-  if (valid_bytes) {
-    int64_t valid_bytes_offset = 0;
-    for (int64_t i = 0; i < length; ++i) {
-      RETURN_NOT_OK(AppendNextOffset());
-      if (valid_bytes[i]) {
-        if (values[i]) {
-          RETURN_NOT_OK(value_data_builder_.Append(
-              reinterpret_cast<const uint8_t*>(values[i]), value_lengths[i]));
-        } else {
-          UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, i - valid_bytes_offset);
-          UnsafeAppendToBitmap(false);
-          valid_bytes_offset = i + 1;
-        }
-      }
-    }
-    UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, length - valid_bytes_offset);
-  } else {
-    if (have_null_value) {
-      std::vector<uint8_t> valid_vector(length, 0);
-      for (int64_t i = 0; i < length; ++i) {
-        RETURN_NOT_OK(AppendNextOffset());
-        if (values[i]) {
-          RETURN_NOT_OK(value_data_builder_.Append(
-              reinterpret_cast<const uint8_t*>(values[i]), value_lengths[i]));
-          valid_vector[i] = 1;
-        }
-      }
-      UnsafeAppendToBitmap(valid_vector.data(), length);
-    } else {
-      for (int64_t i = 0; i < length; ++i) {
-        RETURN_NOT_OK(AppendNextOffset());
-        RETURN_NOT_OK(value_data_builder_.Append(
-            reinterpret_cast<const uint8_t*>(values[i]), value_lengths[i]));
-      }
-      UnsafeAppendToBitmap(nullptr, length);
-    }
-  }
-  return Status::OK();
-}
-
-// ----------------------------------------------------------------------
-// Fixed width binary
-
-FixedSizeBinaryBuilder::FixedSizeBinaryBuilder(const std::shared_ptr<DataType>& type,
-                                               MemoryPool* pool)
-    : ArrayBuilder(type, pool),
-      byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()),
-      byte_builder_(pool) {}
-
-#ifndef NDEBUG
-void FixedSizeBinaryBuilder::CheckValueSize(int64_t size) {
-  DCHECK_EQ(size, byte_width_) << "Appending wrong size to FixedSizeBinaryBuilder";
-}
-#endif
-
-Status FixedSizeBinaryBuilder::AppendValues(const uint8_t* data, int64_t length,
-                                            const uint8_t* valid_bytes) {
-  RETURN_NOT_OK(Reserve(length));
-  UnsafeAppendToBitmap(valid_bytes, length);
-  return byte_builder_.Append(data, length * byte_width_);
-}
-
-Status FixedSizeBinaryBuilder::AppendNull() {
-  RETURN_NOT_OK(Reserve(1));
-  UnsafeAppendToBitmap(false);
-  return byte_builder_.Advance(byte_width_);
-}
-
-void FixedSizeBinaryBuilder::Reset() {
-  ArrayBuilder::Reset();
-  byte_builder_.Reset();
-}
-
-Status FixedSizeBinaryBuilder::Resize(int64_t capacity) {
-  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
-  RETURN_NOT_OK(byte_builder_.Resize(capacity * byte_width_));
-  return ArrayBuilder::Resize(capacity);
-}
-
-Status FixedSizeBinaryBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  std::shared_ptr<Buffer> data;
-  RETURN_NOT_OK(byte_builder_.Finish(&data));
-
-  *out = ArrayData::Make(type_, length_, {null_bitmap_, data}, null_count_);
-
-  null_bitmap_ = nullptr;
-  capacity_ = length_ = null_count_ = 0;
-  return Status::OK();
-}
-
-const uint8_t* FixedSizeBinaryBuilder::GetValue(int64_t i) const {
-  const uint8_t* data_ptr = byte_builder_.data();
-  return data_ptr + i * byte_width_;
-}
-
-util::string_view FixedSizeBinaryBuilder::GetView(int64_t i) const {
-  const uint8_t* data_ptr = byte_builder_.data();
-  return util::string_view(reinterpret_cast<const char*>(data_ptr + i * byte_width_),
-                           byte_width_);
-}
-
-// ----------------------------------------------------------------------
-// Struct
-
-StructBuilder::StructBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
-                             std::vector<std::shared_ptr<ArrayBuilder>>&& field_builders)
-    : ArrayBuilder(type, pool), field_builders_(std::move(field_builders)) {}
-
-void StructBuilder::Reset() {
-  ArrayBuilder::Reset();
-  for (const auto& field_builder : field_builders_) {
-    field_builder->Reset();
-  }
-}
-Status StructBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  RETURN_NOT_OK(TrimBuffer(BitUtil::BytesForBits(length_), null_bitmap_.get()));
-  *out = ArrayData::Make(type_, length_, {null_bitmap_}, null_count_);
-
-  (*out)->child_data.resize(field_builders_.size());
-  for (size_t i = 0; i < field_builders_.size(); ++i) {
-    if (length_ == 0) {
-      // Try to make sure the child buffers are initialized
-      RETURN_NOT_OK(field_builders_[i]->Resize(0));
-    }
-    RETURN_NOT_OK(field_builders_[i]->FinishInternal(&(*out)->child_data[i]));
-  }
-
-  null_bitmap_ = nullptr;
-  capacity_ = length_ = null_count_ = 0;
-  return Status::OK();
-}
+class MemoryPool;
 
 // ----------------------------------------------------------------------
 // Helper functions
@@ -1413,7 +73,7 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
     case Type::LIST: {
       std::unique_ptr<ArrayBuilder> value_builder;
       std::shared_ptr<DataType> value_type =
-          checked_cast<const ListType&>(*type).value_type();
+          internal::checked_cast<const ListType&>(*type).value_type();
       RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
       out->reset(new ListBuilder(pool, std::move(value_builder)));
       return Status::OK();
@@ -1433,9 +93,8 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
     }
 
     default: {
-      std::stringstream ss;
-      ss << "MakeBuilder: cannot construct builder for type " << type->ToString();
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("MakeBuilder: cannot construct builder for type ",
+                                    type->ToString());
     }
   }
 }
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 34cac55202cc2..a7ab22c1beedb 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -15,1177 +15,27 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef ARROW_BUILDER_H
-#define ARROW_BUILDER_H
+#pragma once
 
-#include <algorithm>  // IWYU pragma: keep
-#include <array>
-#include <cstdint>
-#include <cstring>
-#include <iterator>
-#include <limits>
 #include <memory>
-#include <string>
-#include <type_traits>
-#include <vector>
 
-#include "arrow/buffer.h"
-#include "arrow/memory_pool.h"
+#include "arrow/array/builder_adaptive.h"   // IWYU pragma: export
+#include "arrow/array/builder_base.h"       // IWYU pragma: export
+#include "arrow/array/builder_binary.h"     // IWYU pragma: export
+#include "arrow/array/builder_decimal.h"    // IWYU pragma: export
+#include "arrow/array/builder_dict.h"       // IWYU pragma: export
+#include "arrow/array/builder_nested.h"     // IWYU pragma: export
+#include "arrow/array/builder_primitive.h"  // IWYU pragma: export
 #include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/bit-util.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/string_view.h"
-#include "arrow/util/type_traits.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
 
-class Array;
-struct ArrayData;
-class Decimal128;
-
-constexpr int64_t kBinaryMemoryLimit = std::numeric_limits<int32_t>::max() - 1;
-constexpr int64_t kListMaximumElements = std::numeric_limits<int32_t>::max() - 1;
-
-constexpr int64_t kMinBuilderCapacity = 1 << 5;
-
-/// Base class for all data array builders.
-///
-/// This class provides a facilities for incrementally building the null bitmap
-/// (see Append methods) and as a side effect the current number of slots and
-/// the null count.
-///
-/// \note Users are expected to use builders as one of the concrete types below.
-/// For example, ArrayBuilder* pointing to BinaryBuilder should be downcast before use.
-class ARROW_EXPORT ArrayBuilder {
- public:
-  explicit ArrayBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
-      : type_(type),
-        pool_(pool),
-        null_bitmap_(NULLPTR),
-        null_count_(0),
-        null_bitmap_data_(NULLPTR),
-        length_(0),
-        capacity_(0) {}
-
-  virtual ~ArrayBuilder() = default;
-
-  /// For nested types. Since the objects are owned by this class instance, we
-  /// skip shared pointers and just return a raw pointer
-  ArrayBuilder* child(int i) { return children_[i].get(); }
-
-  int num_children() const { return static_cast<int>(children_.size()); }
-
-  int64_t length() const { return length_; }
-  int64_t null_count() const { return null_count_; }
-  int64_t capacity() const { return capacity_; }
-
-  /// \brief Ensure that enough memory has been allocated to fit the indicated
-  /// number of total elements in the builder, including any that have already
-  /// been appended. Does not account for reallocations that may be due to
-  /// variable size data, like binary values. To make space for incremental
-  /// appends, use Reserve instead.
-  ///
-  /// \param[in] capacity the minimum number of total array values to
-  ///            accommodate. Must be greater than the current capacity.
-  /// \return Status
-  virtual Status Resize(int64_t capacity);
-
-  /// \brief Ensure that there is enough space allocated to add the indicated
-  /// number of elements without any further calls to Resize. The memory
-  /// allocated is rounded up to the next highest power of 2 similar to memory
-  /// allocations in STL containers like std::vector
-  /// \param[in] additional_capacity the number of additional array values
-  /// \return Status
-  Status Reserve(int64_t additional_capacity);
-
-  /// Reset the builder.
-  virtual void Reset();
-
-  /// For cases where raw data was memcpy'd into the internal buffers, allows us
-  /// to advance the length of the builder. It is your responsibility to use
-  /// this function responsibly.
-  Status Advance(int64_t elements);
-
-  /// \brief Return result of builder as an internal generic ArrayData
-  /// object. Resets builder except for dictionary builder
-  ///
-  /// \param[out] out the finalized ArrayData object
-  /// \return Status
-  virtual Status FinishInternal(std::shared_ptr<ArrayData>* out) = 0;
-
-  /// \brief Return result of builder as an Array object.
-  ///        Resets the builder except for DictionaryBuilder
-  ///
-  /// \param[out] out the finalized Array object
-  /// \return Status
-  Status Finish(std::shared_ptr<Array>* out);
-
-  std::shared_ptr<DataType> type() const { return type_; }
-
- protected:
-  ArrayBuilder() {}
-
-  /// Append to null bitmap
-  Status AppendToBitmap(bool is_valid);
-
-  /// Vector append. Treat each zero byte as a null.   If valid_bytes is null
-  /// assume all of length bits are valid.
-  Status AppendToBitmap(const uint8_t* valid_bytes, int64_t length);
-
-  /// Set the next length bits to not null (i.e. valid).
-  Status SetNotNull(int64_t length);
-
-  // Unsafe operations (don't check capacity/don't resize)
-
-  void UnsafeAppendNull() { UnsafeAppendToBitmap(false); }
-
-  // Append to null bitmap, update the length
-  void UnsafeAppendToBitmap(bool is_valid) {
-    if (is_valid) {
-      BitUtil::SetBit(null_bitmap_data_, length_);
-    } else {
-      ++null_count_;
-    }
-    ++length_;
-  }
-
-  template <typename IterType>
-  void UnsafeAppendToBitmap(const IterType& begin, const IterType& end) {
-    int64_t byte_offset = length_ / 8;
-    int64_t bit_offset = length_ % 8;
-    uint8_t bitset = null_bitmap_data_[byte_offset];
-
-    for (auto iter = begin; iter != end; ++iter) {
-      if (bit_offset == 8) {
-        bit_offset = 0;
-        null_bitmap_data_[byte_offset] = bitset;
-        byte_offset++;
-        // TODO: Except for the last byte, this shouldn't be needed
-        bitset = null_bitmap_data_[byte_offset];
-      }
-
-      if (*iter) {
-        bitset |= BitUtil::kBitmask[bit_offset];
-      } else {
-        bitset &= BitUtil::kFlippedBitmask[bit_offset];
-        ++null_count_;
-      }
-
-      bit_offset++;
-    }
-
-    if (bit_offset != 0) {
-      null_bitmap_data_[byte_offset] = bitset;
-    }
-
-    length_ += std::distance(begin, end);
-  }
-
-  // Vector append. Treat each zero byte as a nullzero. If valid_bytes is null
-  // assume all of length bits are valid.
-  void UnsafeAppendToBitmap(const uint8_t* valid_bytes, int64_t length);
-
-  void UnsafeAppendToBitmap(const std::vector<bool>& is_valid);
-
-  // Set the next length bits to not null (i.e. valid).
-  void UnsafeSetNotNull(int64_t length);
-
-  std::shared_ptr<DataType> type_;
-  MemoryPool* pool_;
-
-  // When null_bitmap are first appended to the builder, the null bitmap is allocated
-  std::shared_ptr<ResizableBuffer> null_bitmap_;
-  int64_t null_count_;
-  uint8_t* null_bitmap_data_;
-
-  // Array length, so far. Also, the index of the next element to be added
-  int64_t length_;
-  int64_t capacity_;
-
-  // Child value array builders. These are owned by this class
-  std::vector<std::unique_ptr<ArrayBuilder>> children_;
-
- private:
-  ARROW_DISALLOW_COPY_AND_ASSIGN(ArrayBuilder);
-};
-
-class ARROW_EXPORT NullBuilder : public ArrayBuilder {
- public:
-  explicit NullBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
-      : ArrayBuilder(null(), pool) {}
-
-  Status AppendNull() {
-    ++null_count_;
-    ++length_;
-    return Status::OK();
-  }
-
-  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-};
-
-template <typename Type>
-class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder {
- public:
-  using value_type = typename Type::c_type;
-
-  explicit PrimitiveBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
-      : ArrayBuilder(type, pool), data_(NULLPTR), raw_data_(NULLPTR) {}
-
-  using ArrayBuilder::Advance;
-
-  /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
-  /// The memory at the corresponding data slot is set to 0 to prevent uninitialized
-  /// memory access
-  Status AppendNulls(const uint8_t* valid_bytes, int64_t length) {
-    ARROW_RETURN_NOT_OK(Reserve(length));
-    memset(raw_data_ + length_, 0,
-           static_cast<size_t>(TypeTraits<Type>::bytes_required(length)));
-    UnsafeAppendToBitmap(valid_bytes, length);
-    return Status::OK();
-  }
-
-  Status AppendNull() {
-    ARROW_RETURN_NOT_OK(Reserve(1));
-    memset(raw_data_ + length_, 0, sizeof(value_type));
-    UnsafeAppendToBitmap(false);
-    return Status::OK();
-  }
-
-  value_type GetValue(int64_t index) const {
-    return reinterpret_cast<const value_type*>(data_->data())[index];
-  }
-
-  /// \brief Append a sequence of elements in one shot
-  /// \param[in] values a contiguous C array of values
-  /// \param[in] length the number of values to append
-  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
-  /// indicates a valid (non-null) value
-  /// \return Status
-  Status AppendValues(const value_type* values, int64_t length,
-                      const uint8_t* valid_bytes = NULLPTR);
-
-  /// \brief Append a sequence of elements in one shot
-  /// \param[in] values a contiguous C array of values
-  /// \param[in] length the number of values to append
-  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
-  /// (0). Equal in length to values
-  /// \return Status
-  Status AppendValues(const value_type* values, int64_t length,
-                      const std::vector<bool>& is_valid);
-
-  /// \brief Append a sequence of elements in one shot
-  /// \param[in] values a std::vector of values
-  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
-  /// (0). Equal in length to values
-  /// \return Status
-  Status AppendValues(const std::vector<value_type>& values,
-                      const std::vector<bool>& is_valid);
-
-  /// \brief Append a sequence of elements in one shot
-  /// \param[in] values a std::vector of values
-  /// \return Status
-  Status AppendValues(const std::vector<value_type>& values);
-
-  /// \brief Append a sequence of elements in one shot
-  /// \param[in] values_begin InputIterator to the beginning of the values
-  /// \param[in] values_end InputIterator pointing to the end of the values
-  /// \return Status
-
-  template <typename ValuesIter>
-  Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
-    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
-    ARROW_RETURN_NOT_OK(Reserve(length));
-
-    std::copy(values_begin, values_end, raw_data_ + length_);
-
-    // this updates the length_
-    UnsafeSetNotNull(length);
-    return Status::OK();
-  }
-
-  /// \brief Append a sequence of elements in one shot, with a specified nullmap
-  /// \param[in] values_begin InputIterator to the beginning of the values
-  /// \param[in] values_end InputIterator pointing to the end of the values
-  /// \param[in] valid_begin InputIterator with elements indication valid(1)
-  ///  or null(0) values.
-  /// \return Status
-  template <typename ValuesIter, typename ValidIter>
-  typename std::enable_if<!std::is_pointer<ValidIter>::value, Status>::type AppendValues(
-      ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
-    static_assert(!internal::is_null_pointer<ValidIter>::value,
-                  "Don't pass a NULLPTR directly as valid_begin, use the 2-argument "
-                  "version instead");
-    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
-    ARROW_RETURN_NOT_OK(Reserve(length));
-
-    std::copy(values_begin, values_end, raw_data_ + length_);
-
-    // this updates the length_
-    UnsafeAppendToBitmap(valid_begin, std::next(valid_begin, length));
-    return Status::OK();
-  }
-
-  /// \brief Append a sequence of elements in one shot, with a specified nullmap
-  /// \param[in] values_begin InputIterator to the beginning of the values
-  /// \param[in] values_end InputIterator pointing to the end of the values
-  /// \param[in] valid_begin uint8_t* indication valid(1) or null(0) values.
-  ///  nullptr indicates all values are valid.
-  /// \return Status
-  template <typename ValuesIter, typename ValidIter>
-  typename std::enable_if<std::is_pointer<ValidIter>::value, Status>::type AppendValues(
-      ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
-    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
-    ARROW_RETURN_NOT_OK(Reserve(length));
-
-    std::copy(values_begin, values_end, raw_data_ + length_);
-
-    // this updates the length_
-    if (valid_begin == NULLPTR) {
-      UnsafeSetNotNull(length);
-    } else {
-      UnsafeAppendToBitmap(valid_begin, std::next(valid_begin, length));
-    }
-
-    return Status::OK();
-  }
-
-  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-  void Reset() override;
-
-  Status Resize(int64_t capacity) override;
-
- protected:
-  std::shared_ptr<ResizableBuffer> data_;
-  value_type* raw_data_;
-};
-
-/// Base class for all Builders that emit an Array of a scalar numerical type.
-template <typename T>
-class ARROW_EXPORT NumericBuilder : public PrimitiveBuilder<T> {
- public:
-  using typename PrimitiveBuilder<T>::value_type;
-  using PrimitiveBuilder<T>::PrimitiveBuilder;
-
-  template <typename T1 = T>
-  explicit NumericBuilder(
-      typename std::enable_if<TypeTraits<T1>::is_parameter_free, MemoryPool*>::type pool
-          ARROW_MEMORY_POOL_DEFAULT)
-      : PrimitiveBuilder<T1>(TypeTraits<T1>::type_singleton(), pool) {}
-
-  using ArrayBuilder::UnsafeAppendNull;
-  using PrimitiveBuilder<T>::AppendValues;
-  using PrimitiveBuilder<T>::Resize;
-  using PrimitiveBuilder<T>::Reserve;
-
-  /// Append a single scalar and increase the size if necessary.
-  Status Append(const value_type val) {
-    ARROW_RETURN_NOT_OK(ArrayBuilder::Reserve(1));
-    UnsafeAppend(val);
-    return Status::OK();
-  }
-
-  /// Append a single scalar under the assumption that the underlying Buffer is
-  /// large enough.
-  ///
-  /// This method does not capacity-check; make sure to call Reserve
-  /// beforehand.
-  void UnsafeAppend(const value_type val) {
-    BitUtil::SetBit(null_bitmap_data_, length_);
-    raw_data_[length_++] = val;
-  }
-
- protected:
-  using PrimitiveBuilder<T>::length_;
-  using PrimitiveBuilder<T>::null_bitmap_data_;
-  using PrimitiveBuilder<T>::raw_data_;
-};
-
-// Builders
-
-using UInt8Builder = NumericBuilder<UInt8Type>;
-using UInt16Builder = NumericBuilder<UInt16Type>;
-using UInt32Builder = NumericBuilder<UInt32Type>;
-using UInt64Builder = NumericBuilder<UInt64Type>;
-
-using Int8Builder = NumericBuilder<Int8Type>;
-using Int16Builder = NumericBuilder<Int16Type>;
-using Int32Builder = NumericBuilder<Int32Type>;
-using Int64Builder = NumericBuilder<Int64Type>;
-using TimestampBuilder = NumericBuilder<TimestampType>;
-using Time32Builder = NumericBuilder<Time32Type>;
-using Time64Builder = NumericBuilder<Time64Type>;
-using Date32Builder = NumericBuilder<Date32Type>;
-using Date64Builder = NumericBuilder<Date64Type>;
-
-using HalfFloatBuilder = NumericBuilder<HalfFloatType>;
-using FloatBuilder = NumericBuilder<FloatType>;
-using DoubleBuilder = NumericBuilder<DoubleType>;
-
-namespace internal {
-
-class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
- public:
-  explicit AdaptiveIntBuilderBase(MemoryPool* pool);
-
-  /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
-  Status AppendNulls(const uint8_t* valid_bytes, int64_t length) {
-    ARROW_RETURN_NOT_OK(CommitPendingData());
-    ARROW_RETURN_NOT_OK(Reserve(length));
-    memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
-    UnsafeAppendToBitmap(valid_bytes, length);
-    return Status::OK();
-  }
-
-  Status AppendNull() {
-    pending_data_[pending_pos_] = 0;
-    pending_valid_[pending_pos_] = 0;
-    pending_has_nulls_ = true;
-    ++pending_pos_;
-
-    if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
-      return CommitPendingData();
-    }
-    return Status::OK();
-  }
-
-  void Reset() override;
-  Status Resize(int64_t capacity) override;
-
- protected:
-  virtual Status CommitPendingData() = 0;
-
-  std::shared_ptr<ResizableBuffer> data_;
-  uint8_t* raw_data_;
-  uint8_t int_size_;
-
-  static constexpr int32_t pending_size_ = 1024;
-  uint8_t pending_valid_[pending_size_];
-  uint64_t pending_data_[pending_size_];
-  int32_t pending_pos_;
-  bool pending_has_nulls_;
-};
-
-}  // namespace internal
-
-class ARROW_EXPORT AdaptiveUIntBuilder : public internal::AdaptiveIntBuilderBase {
- public:
-  explicit AdaptiveUIntBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
-
-  using ArrayBuilder::Advance;
-  using internal::AdaptiveIntBuilderBase::Reset;
-
-  /// Scalar append
-  Status Append(const uint64_t val) {
-    pending_data_[pending_pos_] = val;
-    pending_valid_[pending_pos_] = 1;
-    ++pending_pos_;
-
-    if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
-      return CommitPendingData();
-    }
-    return Status::OK();
-  }
-
-  /// \brief Append a sequence of elements in one shot
-  /// \param[in] values a contiguous C array of values
-  /// \param[in] length the number of values to append
-  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
-  /// indicates a valid (non-null) value
-  /// \return Status
-  Status AppendValues(const uint64_t* values, int64_t length,
-                      const uint8_t* valid_bytes = NULLPTR);
-
-  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
- protected:
-  Status CommitPendingData() override;
-  Status ExpandIntSize(uint8_t new_int_size);
-
-  Status AppendValuesInternal(const uint64_t* values, int64_t length,
-                              const uint8_t* valid_bytes);
-
-  template <typename new_type, typename old_type>
-  typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
-  ExpandIntSizeInternal();
-#define __LESS(a, b) (a) < (b)
-  template <typename new_type, typename old_type>
-  typename std::enable_if<__LESS(sizeof(old_type), sizeof(new_type)), Status>::type
-  ExpandIntSizeInternal();
-#undef __LESS
-
-  template <typename new_type>
-  Status ExpandIntSizeN();
-};
-
-class ARROW_EXPORT AdaptiveIntBuilder : public internal::AdaptiveIntBuilderBase {
- public:
-  explicit AdaptiveIntBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
-
-  using ArrayBuilder::Advance;
-  using internal::AdaptiveIntBuilderBase::Reset;
-
-  /// Scalar append
-  Status Append(const int64_t val) {
-    auto v = static_cast<uint64_t>(val);
-
-    pending_data_[pending_pos_] = v;
-    pending_valid_[pending_pos_] = 1;
-    ++pending_pos_;
-
-    if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
-      return CommitPendingData();
-    }
-    return Status::OK();
-  }
-
-  /// \brief Append a sequence of elements in one shot
-  /// \param[in] values a contiguous C array of values
-  /// \param[in] length the number of values to append
-  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
-  /// indicates a valid (non-null) value
-  /// \return Status
-  Status AppendValues(const int64_t* values, int64_t length,
-                      const uint8_t* valid_bytes = NULLPTR);
-
-  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
- protected:
-  Status CommitPendingData() override;
-  Status ExpandIntSize(uint8_t new_int_size);
-
-  Status AppendValuesInternal(const int64_t* values, int64_t length,
-                              const uint8_t* valid_bytes);
-
-  template <typename new_type, typename old_type>
-  typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
-  ExpandIntSizeInternal();
-#define __LESS(a, b) (a) < (b)
-  template <typename new_type, typename old_type>
-  typename std::enable_if<__LESS(sizeof(old_type), sizeof(new_type)), Status>::type
-  ExpandIntSizeInternal();
-#undef __LESS
-
-  template <typename new_type>
-  Status ExpandIntSizeN();
-};
-
-class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
- public:
-  using value_type = bool;
-  explicit BooleanBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
-
-  explicit BooleanBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool);
-
-  using ArrayBuilder::Advance;
-  using ArrayBuilder::UnsafeAppendNull;
-
-  /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
-  Status AppendNulls(const uint8_t* valid_bytes, int64_t length) {
-    ARROW_RETURN_NOT_OK(Reserve(length));
-    UnsafeAppendToBitmap(valid_bytes, length);
-
-    return Status::OK();
-  }
-
-  Status AppendNull() {
-    ARROW_RETURN_NOT_OK(Reserve(1));
-    UnsafeAppendToBitmap(false);
-
-    return Status::OK();
-  }
-
-  /// Scalar append
-  Status Append(const bool val) {
-    ARROW_RETURN_NOT_OK(Reserve(1));
-    UnsafeAppend(val);
-    return Status::OK();
-  }
-
-  Status Append(const uint8_t val) { return Append(val != 0); }
-
-  /// Scalar append, without checking for capacity
-  void UnsafeAppend(const bool val) {
-    BitUtil::SetBit(null_bitmap_data_, length_);
-    if (val) {
-      BitUtil::SetBit(raw_data_, length_);
-    } else {
-      BitUtil::ClearBit(raw_data_, length_);
-    }
-    ++length_;
-  }
-
-  void UnsafeAppend(const uint8_t val) { UnsafeAppend(val != 0); }
-
-  /// \brief Append a sequence of elements in one shot
-  /// \param[in] values a contiguous array of bytes (non-zero is 1)
-  /// \param[in] length the number of values to append
-  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
-  /// indicates a valid (non-null) value
-  /// \return Status
-  Status AppendValues(const uint8_t* values, int64_t length,
-                      const uint8_t* valid_bytes = NULLPTR);
-
-  /// \brief Append a sequence of elements in one shot
-  /// \param[in] values a contiguous C array of values
-  /// \param[in] length the number of values to append
-  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
-  /// (0). Equal in length to values
-  /// \return Status
-  Status AppendValues(const uint8_t* values, int64_t length,
-                      const std::vector<bool>& is_valid);
-
-  /// \brief Append a sequence of elements in one shot
-  /// \param[in] values a std::vector of bytes
-  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
-  /// (0). Equal in length to values
-  /// \return Status
-  Status AppendValues(const std::vector<uint8_t>& values,
-                      const std::vector<bool>& is_valid);
-
-  /// \brief Append a sequence of elements in one shot
-  /// \param[in] values a std::vector of bytes
-  /// \return Status
-  Status AppendValues(const std::vector<uint8_t>& values);
-
-  /// \brief Append a sequence of elements in one shot
-  /// \param[in] values an std::vector<bool> indicating true (1) or false
-  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
-  /// (0). Equal in length to values
-  /// \return Status
-  Status AppendValues(const std::vector<bool>& values, const std::vector<bool>& is_valid);
-
-  /// \brief Append a sequence of elements in one shot
-  /// \param[in] values an std::vector<bool> indicating true (1) or false
-  /// \return Status
-  Status AppendValues(const std::vector<bool>& values);
-
-  /// \brief Append a sequence of elements in one shot
-  /// \param[in] values_begin InputIterator to the beginning of the values
-  /// \param[in] values_end InputIterator pointing to the end of the values
-  ///  or null(0) values
-  /// \return Status
-  template <typename ValuesIter>
-  Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
-    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
-    ARROW_RETURN_NOT_OK(Reserve(length));
-    auto iter = values_begin;
-    internal::GenerateBitsUnrolled(raw_data_, length_, length,
-                                   [&iter]() -> bool { return *(iter++); });
-
-    // this updates length_
-    UnsafeSetNotNull(length);
-    return Status::OK();
-  }
-
-  /// \brief Append a sequence of elements in one shot, with a specified nullmap
-  /// \param[in] values_begin InputIterator to the beginning of the values
-  /// \param[in] values_end InputIterator pointing to the end of the values
-  /// \param[in] valid_begin InputIterator with elements indication valid(1)
-  ///  or null(0) values
-  /// \return Status
-  template <typename ValuesIter, typename ValidIter>
-  typename std::enable_if<!std::is_pointer<ValidIter>::value, Status>::type AppendValues(
-      ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
-    static_assert(!internal::is_null_pointer<ValidIter>::value,
-                  "Don't pass a NULLPTR directly as valid_begin, use the 2-argument "
-                  "version instead");
-    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
-    ARROW_RETURN_NOT_OK(Reserve(length));
-
-    auto iter = values_begin;
-    internal::GenerateBitsUnrolled(raw_data_, length_, length,
-                                   [&iter]() -> bool { return *(iter++); });
-
-    // this updates length_
-    ArrayBuilder::UnsafeAppendToBitmap(valid_begin, std::next(valid_begin, length));
-    return Status::OK();
-  }
-
-  /// \brief Append a sequence of elements in one shot, with a specified nullmap
-  /// \param[in] values_begin InputIterator to the beginning of the values
-  /// \param[in] values_end InputIterator pointing to the end of the values
-  /// \param[in] valid_begin uint8_t* indication valid(1) or null(0) values.
-  ///  nullptr indicates all values are valid.
-  /// \return Status
-  template <typename ValuesIter, typename ValidIter>
-  typename std::enable_if<std::is_pointer<ValidIter>::value, Status>::type AppendValues(
-      ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
-    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
-    ARROW_RETURN_NOT_OK(Reserve(length));
-
-    auto iter = values_begin;
-    internal::GenerateBitsUnrolled(raw_data_, length_, length,
-                                   [&iter]() -> bool { return *(iter++); });
-
-    // this updates the length_
-    if (valid_begin == NULLPTR) {
-      UnsafeSetNotNull(length);
-    } else {
-      UnsafeAppendToBitmap(valid_begin, std::next(valid_begin, length));
-    }
-
-    return Status::OK();
-  }
-
-  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-  void Reset() override;
-  Status Resize(int64_t capacity) override;
-
- protected:
-  std::shared_ptr<ResizableBuffer> data_;
-  uint8_t* raw_data_;
-};
-
-// ----------------------------------------------------------------------
-// List builder
-
-/// \class ListBuilder
-/// \brief Builder class for variable-length list array value types
-///
-/// To use this class, you must append values to the child array builder and use
-/// the Append function to delimit each distinct list value (once the values
-/// have been appended to the child array) or use the bulk API to append
-/// a sequence of offests and null values.
-///
-/// A note on types.  Per arrow/type.h all types in the c++ implementation are
-/// logical so even though this class always builds list array, this can
-/// represent multiple different logical types.  If no logical type is provided
-/// at construction time, the class defaults to List<T> where t is taken from the
-/// value_builder/values that the object is constructed with.
-class ARROW_EXPORT ListBuilder : public ArrayBuilder {
- public:
-  /// Use this constructor to incrementally build the value array along with offsets and
-  /// null bitmap.
-  ListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> const& value_builder,
-              const std::shared_ptr<DataType>& type = NULLPTR);
-
-  Status Resize(int64_t capacity) override;
-  void Reset() override;
-  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
-  /// \brief Vector append
-  ///
-  /// If passed, valid_bytes is of equal length to values, and any zero byte
-  /// will be considered as a null for that slot
-  Status AppendValues(const int32_t* offsets, int64_t length,
-                      const uint8_t* valid_bytes = NULLPTR);
-
-  /// \brief Start a new variable-length list slot
-  ///
-  /// This function should be called before beginning to append elements to the
-  /// value builder
-  Status Append(bool is_valid = true);
-
-  Status AppendNull() { return Append(false); }
-
-  ArrayBuilder* value_builder() const;
-
- protected:
-  TypedBufferBuilder<int32_t> offsets_builder_;
-  std::shared_ptr<ArrayBuilder> value_builder_;
-  std::shared_ptr<Array> values_;
-
-  Status AppendNextOffset();
-};
-
-// ----------------------------------------------------------------------
-// Binary and String
-
-/// \class BinaryBuilder
-/// \brief Builder class for variable-length binary data
-class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
- public:
-  explicit BinaryBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
-
-  BinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool);
-
-  Status Append(const uint8_t* value, int32_t length);
-
-  Status Append(const char* value, int32_t length) {
-    return Append(reinterpret_cast<const uint8_t*>(value), length);
-  }
-
-  Status Append(util::string_view value) {
-    return Append(value.data(), static_cast<int32_t>(value.size()));
-  }
-
-  Status AppendNull();
-
-  /// \brief Append without checking capacity
-  ///
-  /// Offsets and data should have been presized using Reserve() and
-  /// ReserveData(), respectively.
-  void UnsafeAppend(const uint8_t* value, int32_t length) {
-    UnsafeAppendNextOffset();
-    value_data_builder_.UnsafeAppend(value, length);
-    UnsafeAppendToBitmap(true);
-  }
-
-  void UnsafeAppend(const char* value, int32_t length) {
-    UnsafeAppend(reinterpret_cast<const uint8_t*>(value), length);
-  }
-
-  void UnsafeAppend(const std::string& value) {
-    UnsafeAppend(value.c_str(), static_cast<int32_t>(value.size()));
-  }
-
-  void UnsafeAppendNull() {
-    const int64_t num_bytes = value_data_builder_.length();
-    offsets_builder_.UnsafeAppend(static_cast<int32_t>(num_bytes));
-    UnsafeAppendToBitmap(false);
-  }
-
-  void Reset() override;
-  Status Resize(int64_t capacity) override;
-
-  /// \brief Ensures there is enough allocated capacity to append the indicated
-  /// number of bytes to the value data buffer without additional allocations
-  Status ReserveData(int64_t elements);
-
-  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
-  /// \return size of values buffer so far
-  int64_t value_data_length() const { return value_data_builder_.length(); }
-  /// \return capacity of values buffer
-  int64_t value_data_capacity() const { return value_data_builder_.capacity(); }
-
-  /// Temporary access to a value.
-  ///
-  /// This pointer becomes invalid on the next modifying operation.
-  const uint8_t* GetValue(int64_t i, int32_t* out_length) const;
-
-  /// Temporary access to a value.
-  ///
-  /// This view becomes invalid on the next modifying operation.
-  util::string_view GetView(int64_t i) const;
-
- protected:
-  TypedBufferBuilder<int32_t> offsets_builder_;
-  TypedBufferBuilder<uint8_t> value_data_builder_;
-
-  Status AppendNextOffset();
-
-  void UnsafeAppendNextOffset() {
-    const int64_t num_bytes = value_data_builder_.length();
-    offsets_builder_.UnsafeAppend(static_cast<int32_t>(num_bytes));
-  }
-};
-
-/// \class StringBuilder
-/// \brief Builder class for UTF8 strings
-class ARROW_EXPORT StringBuilder : public BinaryBuilder {
- public:
-  using BinaryBuilder::BinaryBuilder;
-  explicit StringBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
-
-  using BinaryBuilder::Append;
-  using BinaryBuilder::Reset;
-  using BinaryBuilder::UnsafeAppend;
-
-  /// \brief Append a sequence of strings in one shot.
-  ///
-  /// \param[in] values a vector of strings
-  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
-  /// indicates a valid (non-null) value
-  /// \return Status
-  Status AppendValues(const std::vector<std::string>& values,
-                      const uint8_t* valid_bytes = NULLPTR);
-
-  /// \brief Append a sequence of nul-terminated strings in one shot.
-  ///        If one of the values is NULL, it is processed as a null
-  ///        value even if the corresponding valid_bytes entry is 1.
-  ///
-  /// \param[in] values a contiguous C array of nul-terminated char *
-  /// \param[in] length the number of values to append
-  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
-  /// indicates a valid (non-null) value
-  /// \return Status
-  Status AppendValues(const char** values, int64_t length,
-                      const uint8_t* valid_bytes = NULLPTR);
-};
-
-// ----------------------------------------------------------------------
-// FixedSizeBinaryBuilder
-
-class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
- public:
-  FixedSizeBinaryBuilder(const std::shared_ptr<DataType>& type,
-                         MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
-
-  Status Append(const uint8_t* value) {
-    ARROW_RETURN_NOT_OK(Reserve(1));
-    UnsafeAppendToBitmap(true);
-    return byte_builder_.Append(value, byte_width_);
-  }
-
-  Status Append(const char* value) {
-    return Append(reinterpret_cast<const uint8_t*>(value));
-  }
-
-  Status Append(const util::string_view& view) {
-#ifndef NDEBUG
-    CheckValueSize(static_cast<int64_t>(view.size()));
-#endif
-    return Append(reinterpret_cast<const uint8_t*>(view.data()));
-  }
-
-  Status Append(const std::string& s) {
-#ifndef NDEBUG
-    CheckValueSize(static_cast<int64_t>(s.size()));
-#endif
-    return Append(reinterpret_cast<const uint8_t*>(s.data()));
-  }
-
-  template <size_t NBYTES>
-  Status Append(const std::array<uint8_t, NBYTES>& value) {
-    ARROW_RETURN_NOT_OK(Reserve(1));
-    UnsafeAppendToBitmap(true);
-    return byte_builder_.Append(value);
-  }
-
-  Status AppendValues(const uint8_t* data, int64_t length,
-                      const uint8_t* valid_bytes = NULLPTR);
-  Status AppendNull();
-
-  void Reset() override;
-  Status Resize(int64_t capacity) override;
-  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
-  /// \return size of values buffer so far
-  int64_t value_data_length() const { return byte_builder_.length(); }
-
-  int32_t byte_width() const { return byte_width_; }
-
-  /// Temporary access to a value.
-  ///
-  /// This pointer becomes invalid on the next modifying operation.
-  const uint8_t* GetValue(int64_t i) const;
-
-  /// Temporary access to a value.
-  ///
-  /// This view becomes invalid on the next modifying operation.
-  util::string_view GetView(int64_t i) const;
-
- protected:
-  int32_t byte_width_;
-  BufferBuilder byte_builder_;
-
-#ifndef NDEBUG
-  void CheckValueSize(int64_t size);
-#endif
-};
-
-class ARROW_EXPORT Decimal128Builder : public FixedSizeBinaryBuilder {
- public:
-  explicit Decimal128Builder(const std::shared_ptr<DataType>& type,
-                             MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
-
-  using FixedSizeBinaryBuilder::Append;
-  using FixedSizeBinaryBuilder::AppendValues;
-  using FixedSizeBinaryBuilder::Reset;
-
-  Status Append(const Decimal128& val);
-
-  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-};
-
-using DecimalBuilder = Decimal128Builder;
-
-// ----------------------------------------------------------------------
-// Struct
-
-// ---------------------------------------------------------------------------------
-// StructArray builder
-/// Append, Resize and Reserve methods are acting on StructBuilder.
-/// Please make sure all these methods of all child-builders' are consistently
-/// called to maintain data-structure consistency.
-class ARROW_EXPORT StructBuilder : public ArrayBuilder {
- public:
-  StructBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
-                std::vector<std::shared_ptr<ArrayBuilder>>&& field_builders);
-
-  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
-  /// Null bitmap is of equal length to every child field, and any zero byte
-  /// will be considered as a null for that field, but users must using app-
-  /// end methods or advance methods of the child builders' independently to
-  /// insert data.
-  Status AppendValues(int64_t length, const uint8_t* valid_bytes) {
-    ARROW_RETURN_NOT_OK(Reserve(length));
-    UnsafeAppendToBitmap(valid_bytes, length);
-    return Status::OK();
-  }
-
-  /// Append an element to the Struct. All child-builders' Append method must
-  /// be called independently to maintain data-structure consistency.
-  Status Append(bool is_valid = true) {
-    ARROW_RETURN_NOT_OK(Reserve(1));
-    UnsafeAppendToBitmap(is_valid);
-    return Status::OK();
-  }
-
-  Status AppendNull() { return Append(false); }
-
-  void Reset() override;
-
-  ArrayBuilder* field_builder(int i) const { return field_builders_[i].get(); }
-
-  int num_fields() const { return static_cast<int>(field_builders_.size()); }
-
- protected:
-  std::vector<std::shared_ptr<ArrayBuilder>> field_builders_;
-};
-
-// ----------------------------------------------------------------------
-// Dictionary builder
-
-namespace internal {
-
-template <typename T>
-struct DictionaryScalar {
-  using type = typename T::c_type;
-};
-
-template <>
-struct DictionaryScalar<BinaryType> {
-  using type = util::string_view;
-};
-
-template <>
-struct DictionaryScalar<StringType> {
-  using type = util::string_view;
-};
-
-template <>
-struct DictionaryScalar<FixedSizeBinaryType> {
-  using type = util::string_view;
-};
-
-}  // namespace internal
-
-/// \brief Array builder for created encoded DictionaryArray from dense array
-///
-/// Unlike other builders, dictionary builder does not completely reset the state
-/// on Finish calls. The arrays built after the initial Finish call will reuse
-/// the previously created encoding and build a delta dictionary when new terms
-/// occur.
-///
-/// data
-template <typename T>
-class ARROW_EXPORT DictionaryBuilder : public ArrayBuilder {
- public:
-  using Scalar = typename internal::DictionaryScalar<T>::type;
-
-  // WARNING: the type given below is the value type, not the DictionaryType.
-  // The DictionaryType is instantiated on the Finish() call.
-  DictionaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool);
-
-  template <typename T1 = T>
-  explicit DictionaryBuilder(
-      typename std::enable_if<TypeTraits<T1>::is_parameter_free, MemoryPool*>::type pool)
-      : DictionaryBuilder<T1>(TypeTraits<T1>::type_singleton(), pool) {}
-
-  ~DictionaryBuilder() override;
-
-  /// \brief Append a scalar value
-  Status Append(const Scalar& value);
-
-  /// \brief Append a fixed-width string (only for FixedSizeBinaryType)
-  template <typename T1 = T>
-  Status Append(typename std::enable_if<std::is_base_of<FixedSizeBinaryType, T1>::value,
-                                        const uint8_t*>::type value) {
-    return Append(util::string_view(reinterpret_cast<const char*>(value), byte_width_));
-  }
-
-  /// \brief Append a fixed-width string (only for FixedSizeBinaryType)
-  template <typename T1 = T>
-  Status Append(typename std::enable_if<std::is_base_of<FixedSizeBinaryType, T1>::value,
-                                        const char*>::type value) {
-    return Append(util::string_view(value, byte_width_));
-  }
-
-  /// \brief Append a scalar null value
-  Status AppendNull();
-
-  /// \brief Append a whole dense array to the builder
-  Status AppendArray(const Array& array);
-
-  void Reset() override;
-  Status Resize(int64_t capacity) override;
-  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
-  /// is the dictionary builder in the delta building mode
-  bool is_building_delta() { return delta_offset_ > 0; }
-
- protected:
-  class MemoTableImpl;
-  std::unique_ptr<MemoTableImpl> memo_table_;
-
-  int32_t delta_offset_;
-  // Only used for FixedSizeBinaryType
-  int32_t byte_width_;
-
-  AdaptiveIntBuilder values_builder_;
-};
-
-template <>
-class ARROW_EXPORT DictionaryBuilder<NullType> : public ArrayBuilder {
- public:
-  DictionaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool);
-  explicit DictionaryBuilder(MemoryPool* pool);
-
-  /// \brief Append a scalar null value
-  Status AppendNull();
-
-  /// \brief Append a whole dense array to the builder
-  Status AppendArray(const Array& array);
-
-  Status Resize(int64_t capacity) override;
-  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-
- protected:
-  AdaptiveIntBuilder values_builder_;
-};
-
-class ARROW_EXPORT BinaryDictionaryBuilder : public DictionaryBuilder<BinaryType> {
- public:
-  using DictionaryBuilder::Append;
-  using DictionaryBuilder::DictionaryBuilder;
-
-  Status Append(const uint8_t* value, int32_t length) {
-    return Append(reinterpret_cast<const char*>(value), length);
-  }
-
-  Status Append(const char* value, int32_t length) {
-    return Append(util::string_view(value, length));
-  }
-};
-
-/// \brief Dictionary array builder with convenience methods for strings
-class ARROW_EXPORT StringDictionaryBuilder : public DictionaryBuilder<StringType> {
- public:
-  using DictionaryBuilder::Append;
-  using DictionaryBuilder::DictionaryBuilder;
-
-  Status Append(const uint8_t* value, int32_t length) {
-    return Append(reinterpret_cast<const char*>(value), length);
-  }
-
-  Status Append(const char* value, int32_t length) {
-    return Append(util::string_view(value, length));
-  }
-};
-
-// ----------------------------------------------------------------------
-// Helper functions
+class DataType;
+class MemoryPool;
 
 ARROW_EXPORT
 Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
                    std::unique_ptr<ArrayBuilder>* out);
 
 }  // namespace arrow
-
-#endif  // ARROW_BUILDER_H_
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 2f4f5d16364f1..114752934c9f6 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -30,6 +30,7 @@
 
 #include "arrow/array.h"
 #include "arrow/buffer.h"
+#include "arrow/sparse_tensor.h"
 #include "arrow/status.h"
 #include "arrow/tensor.h"
 #include "arrow/type.h"
@@ -324,7 +325,15 @@ static bool IsEqualPrimitive(const PrimitiveArray& left, const PrimitiveArray& r
     right_data = right.values()->data() + right.offset() * byte_width;
   }
 
-  if (left.null_count() > 0) {
+  if (byte_width == 0) {
+    // Special case 0-width data, as the data pointers may be null
+    for (int64_t i = 0; i < left.length(); ++i) {
+      if (left.IsNull(i) != right.IsNull(i)) {
+        return false;
+      }
+    }
+    return true;
+  } else if (left.null_count() > 0) {
     for (int64_t i = 0; i < left.length(); ++i) {
       const bool left_null = left.IsNull(i);
       const bool right_null = right.IsNull(i);
@@ -774,6 +783,98 @@ bool TensorEquals(const Tensor& left, const Tensor& right) {
   return are_equal;
 }
 
+namespace {
+
+template <typename LeftSparseIndexType, typename RightSparseIndexType>
+struct SparseTensorEqualsImpl {
+  static bool Compare(const SparseTensorImpl<LeftSparseIndexType>& left,
+                      const SparseTensorImpl<RightSparseIndexType>& right) {
+    // TODO(mrkn): should we support the equality among different formats?
+    return false;
+  }
+};
+
+template <typename SparseIndexType>
+struct SparseTensorEqualsImpl<SparseIndexType, SparseIndexType> {
+  static bool Compare(const SparseTensorImpl<SparseIndexType>& left,
+                      const SparseTensorImpl<SparseIndexType>& right) {
+    DCHECK(left.type()->id() == right.type()->id());
+    DCHECK(left.shape() == right.shape());
+    DCHECK(left.non_zero_length() == right.non_zero_length());
+
+    const auto& left_index = checked_cast<const SparseIndexType&>(*left.sparse_index());
+    const auto& right_index = checked_cast<const SparseIndexType&>(*right.sparse_index());
+
+    if (!left_index.Equals(right_index)) {
+      return false;
+    }
+
+    const auto& size_meta = dynamic_cast<const FixedWidthType&>(*left.type());
+    const int byte_width = size_meta.bit_width() / CHAR_BIT;
+    DCHECK_GT(byte_width, 0);
+
+    const uint8_t* left_data = left.data()->data();
+    const uint8_t* right_data = right.data()->data();
+
+    return memcmp(left_data, right_data,
+                  static_cast<size_t>(byte_width * left.non_zero_length()));
+  }
+};
+
+template <typename SparseIndexType>
+inline bool SparseTensorEqualsImplDispatch(const SparseTensorImpl<SparseIndexType>& left,
+                                           const SparseTensor& right) {
+  switch (right.format_id()) {
+    case SparseTensorFormat::COO: {
+      const auto& right_coo =
+          checked_cast<const SparseTensorImpl<SparseCOOIndex>&>(right);
+      return SparseTensorEqualsImpl<SparseIndexType, SparseCOOIndex>::Compare(left,
+                                                                              right_coo);
+    }
+
+    case SparseTensorFormat::CSR: {
+      const auto& right_csr =
+          checked_cast<const SparseTensorImpl<SparseCSRIndex>&>(right);
+      return SparseTensorEqualsImpl<SparseIndexType, SparseCSRIndex>::Compare(left,
+                                                                              right_csr);
+    }
+
+    default:
+      return false;
+  }
+}
+
+}  // namespace
+
+bool SparseTensorEquals(const SparseTensor& left, const SparseTensor& right) {
+  if (&left == &right) {
+    return true;
+  } else if (left.type()->id() != right.type()->id()) {
+    return false;
+  } else if (left.size() == 0) {
+    return true;
+  } else if (left.shape() != right.shape()) {
+    return false;
+  } else if (left.non_zero_length() != right.non_zero_length()) {
+    return false;
+  }
+
+  switch (left.format_id()) {
+    case SparseTensorFormat::COO: {
+      const auto& left_coo = checked_cast<const SparseTensorImpl<SparseCOOIndex>&>(left);
+      return SparseTensorEqualsImplDispatch(left_coo, right);
+    }
+
+    case SparseTensorFormat::CSR: {
+      const auto& left_csr = checked_cast<const SparseTensorImpl<SparseCSRIndex>&>(left);
+      return SparseTensorEqualsImplDispatch(left_csr, right);
+    }
+
+    default:
+      return false;
+  }
+}
+
 bool TypeEquals(const DataType& left, const DataType& right) {
   bool are_equal;
   // The arrays are the same object
diff --git a/cpp/src/arrow/compare.h b/cpp/src/arrow/compare.h
index 21e2fdc24f19c..d49d7cc0fdb08 100644
--- a/cpp/src/arrow/compare.h
+++ b/cpp/src/arrow/compare.h
@@ -29,12 +29,16 @@ namespace arrow {
 class Array;
 class DataType;
 class Tensor;
+class SparseTensor;
 
 /// Returns true if the arrays are exactly equal
 bool ARROW_EXPORT ArrayEquals(const Array& left, const Array& right);
 
 bool ARROW_EXPORT TensorEquals(const Tensor& left, const Tensor& right);
 
+/// EXPERIMENTAL: Returns true if the given sparse tensors are exactly equal
+bool ARROW_EXPORT SparseTensorEquals(const SparseTensor& left, const SparseTensor& right);
+
 /// Returns true if the arrays are approximately equal. For non-floating point
 /// types, this is equivalent to ArrayEquals(left, right)
 bool ARROW_EXPORT ArrayApproxEquals(const Array& left, const Array& right);
diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt
index d4369ed27b7c4..75d152b0bafa3 100644
--- a/cpp/src/arrow/compute/CMakeLists.txt
+++ b/cpp/src/arrow/compute/CMakeLists.txt
@@ -15,20 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# Headers: top level
-install(FILES
-  api.h
-  context.h
-  kernel.h
-  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/compute")
+ARROW_INSTALL_ALL_HEADERS("arrow/compute")
 
 # pkg-config support
-configure_file(arrow-compute.pc.in
-  "${CMAKE_CURRENT_BINARY_DIR}/arrow-compute.pc"
-  @ONLY)
-install(
-  FILES "${CMAKE_CURRENT_BINARY_DIR}/arrow-compute.pc"
-  DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/")
+ARROW_ADD_PKG_CONFIG("arrow-compute")
 
 #######################################
 # Unit tests
diff --git a/cpp/src/arrow/compute/compute-test.cc b/cpp/src/arrow/compute/compute-test.cc
index 821569e3f524c..f850a296976a5 100644
--- a/cpp/src/arrow/compute/compute-test.cc
+++ b/cpp/src/arrow/compute/compute-test.cc
@@ -39,10 +39,8 @@
 
 #include "arrow/compute/context.h"
 #include "arrow/compute/kernel.h"
-#include "arrow/compute/kernels/boolean.h"
-#include "arrow/compute/kernels/cast.h"
-#include "arrow/compute/kernels/hash.h"
 #include "arrow/compute/kernels/util-internal.h"
+#include "arrow/compute/test-util.h"
 
 using std::shared_ptr;
 using std::vector;
@@ -50,1531 +48,31 @@ using std::vector;
 namespace arrow {
 namespace compute {
 
-class ComputeFixture {
- public:
-  ComputeFixture() : ctx_(default_memory_pool()) {}
-
- protected:
-  FunctionContext ctx_;
-};
-
-template <typename Type, typename T>
-shared_ptr<Array> _MakeArray(const shared_ptr<DataType>& type, const vector<T>& values,
-                             const vector<bool>& is_valid) {
-  shared_ptr<Array> result;
-  if (is_valid.size() > 0) {
-    ArrayFromVector<Type, T>(type, is_valid, values, &result);
-  } else {
-    ArrayFromVector<Type, T>(type, values, &result);
-  }
-  return result;
-}
-
-// ----------------------------------------------------------------------
-// Cast
-
-static void AssertBufferSame(const Array& left, const Array& right, int buffer_index) {
-  ASSERT_EQ(left.data()->buffers[buffer_index].get(),
-            right.data()->buffers[buffer_index].get());
-}
-
-class TestCast : public ComputeFixture, public TestBase {
- public:
-  void CheckPass(const Array& input, const Array& expected,
-                 const shared_ptr<DataType>& out_type, const CastOptions& options) {
-    shared_ptr<Array> result;
-    ASSERT_OK(Cast(&ctx_, input, out_type, options, &result));
-    ASSERT_ARRAYS_EQUAL(expected, *result);
-  }
-
-  template <typename InType, typename I_TYPE>
-  void CheckFails(const shared_ptr<DataType>& in_type, const vector<I_TYPE>& in_values,
-                  const vector<bool>& is_valid, const shared_ptr<DataType>& out_type,
-                  const CastOptions& options) {
-    shared_ptr<Array> input, result;
-    if (is_valid.size() > 0) {
-      ArrayFromVector<InType, I_TYPE>(in_type, is_valid, in_values, &input);
-    } else {
-      ArrayFromVector<InType, I_TYPE>(in_type, in_values, &input);
-    }
-    ASSERT_RAISES(Invalid, Cast(&ctx_, *input, out_type, options, &result));
-  }
-
-  void CheckZeroCopy(const Array& input, const shared_ptr<DataType>& out_type) {
-    shared_ptr<Array> result;
-    ASSERT_OK(Cast(&ctx_, input, out_type, {}, &result));
-    AssertBufferSame(input, *result, 0);
-    AssertBufferSame(input, *result, 1);
-  }
-
-  template <typename InType, typename I_TYPE, typename OutType, typename O_TYPE>
-  void CheckCase(const shared_ptr<DataType>& in_type, const vector<I_TYPE>& in_values,
-                 const vector<bool>& is_valid, const shared_ptr<DataType>& out_type,
-                 const vector<O_TYPE>& out_values, const CastOptions& options) {
-    shared_ptr<Array> input, expected;
-    if (is_valid.size() > 0) {
-      ArrayFromVector<InType, I_TYPE>(in_type, is_valid, in_values, &input);
-      ArrayFromVector<OutType, O_TYPE>(out_type, is_valid, out_values, &expected);
-    } else {
-      ArrayFromVector<InType, I_TYPE>(in_type, in_values, &input);
-      ArrayFromVector<OutType, O_TYPE>(out_type, out_values, &expected);
-    }
-    CheckPass(*input, *expected, out_type, options);
-
-    // Check a sliced variant
-    if (input->length() > 1) {
-      CheckPass(*input->Slice(1), *expected->Slice(1), out_type, options);
-    }
-  }
-};
-
-TEST_F(TestCast, SameTypeZeroCopy) {
-  vector<bool> is_valid = {true, false, true, true, true};
-  vector<int32_t> v1 = {0, 1, 2, 3, 4};
-
-  shared_ptr<Array> arr;
-  ArrayFromVector<Int32Type, int32_t>(int32(), is_valid, v1, &arr);
-
-  shared_ptr<Array> result;
-  ASSERT_OK(Cast(&this->ctx_, *arr, int32(), {}, &result));
-
-  AssertBufferSame(*arr, *result, 0);
-  AssertBufferSame(*arr, *result, 1);
-}
-
-TEST_F(TestCast, ToBoolean) {
-  CastOptions options;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  // int8, should suffice for other integers
-  vector<int8_t> v1 = {0, 1, 127, -1, 0};
-  vector<bool> e1 = {false, true, true, true, false};
-  CheckCase<Int8Type, int8_t, BooleanType, bool>(int8(), v1, is_valid, boolean(), e1,
-                                                 options);
-
-  // floating point
-  vector<double> v2 = {1.0, 0, 0, -1.0, 5.0};
-  vector<bool> e2 = {true, false, false, true, true};
-  CheckCase<DoubleType, double, BooleanType, bool>(float64(), v2, is_valid, boolean(), e2,
-                                                   options);
-}
-
-TEST_F(TestCast, ToIntUpcast) {
-  CastOptions options;
-  options.allow_int_overflow = false;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  // int8 to int32
-  vector<int8_t> v1 = {0, 1, 127, -1, 0};
-  vector<int32_t> e1 = {0, 1, 127, -1, 0};
-  CheckCase<Int8Type, int8_t, Int32Type, int32_t>(int8(), v1, is_valid, int32(), e1,
-                                                  options);
-
-  // bool to int8
-  vector<bool> v2 = {false, true, false, true, true};
-  vector<int8_t> e2 = {0, 1, 0, 1, 1};
-  CheckCase<BooleanType, bool, Int8Type, int8_t>(boolean(), v2, is_valid, int8(), e2,
-                                                 options);
-
-  // uint8 to int16, no overflow/underrun
-  vector<uint8_t> v3 = {0, 100, 200, 255, 0};
-  vector<int16_t> e3 = {0, 100, 200, 255, 0};
-  CheckCase<UInt8Type, uint8_t, Int16Type, int16_t>(uint8(), v3, is_valid, int16(), e3,
-                                                    options);
-}
-
-TEST_F(TestCast, OverflowInNullSlot) {
-  CastOptions options;
-  options.allow_int_overflow = false;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  vector<int32_t> v11 = {0, 70000, 2000, 1000, 0};
-  vector<int16_t> e11 = {0, 0, 2000, 1000, 0};
-
-  shared_ptr<Array> expected;
-  ArrayFromVector<Int16Type, int16_t>(int16(), is_valid, e11, &expected);
-
-  auto buf = Buffer::Wrap(v11.data(), v11.size());
-  Int32Array tmp11(5, buf, expected->null_bitmap(), -1);
-
-  CheckPass(tmp11, *expected, int16(), options);
-}
-
-TEST_F(TestCast, ToIntDowncastSafe) {
-  CastOptions options;
-  options.allow_int_overflow = false;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  // int16 to uint8, no overflow/underrun
-  vector<int16_t> v1 = {0, 100, 200, 1, 2};
-  vector<uint8_t> e1 = {0, 100, 200, 1, 2};
-  CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v1, is_valid, uint8(), e1,
-                                                    options);
-
-  // int16 to uint8, with overflow
-  vector<int16_t> v2 = {0, 100, 256, 0, 0};
-  CheckFails<Int16Type>(int16(), v2, is_valid, uint8(), options);
-
-  // underflow
-  vector<int16_t> v3 = {0, 100, -1, 0, 0};
-  CheckFails<Int16Type>(int16(), v3, is_valid, uint8(), options);
-
-  // int32 to int16, no overflow
-  vector<int32_t> v4 = {0, 1000, 2000, 1, 2};
-  vector<int16_t> e4 = {0, 1000, 2000, 1, 2};
-  CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v4, is_valid, int16(), e4,
-                                                    options);
-
-  // int32 to int16, overflow
-  vector<int32_t> v5 = {0, 1000, 2000, 70000, 0};
-  CheckFails<Int32Type>(int32(), v5, is_valid, int16(), options);
-
-  // underflow
-  vector<int32_t> v6 = {0, 1000, 2000, -70000, 0};
-  CheckFails<Int32Type>(int32(), v6, is_valid, int16(), options);
-
-  vector<int32_t> v7 = {0, 1000, 2000, -70000, 0};
-  CheckFails<Int32Type>(int32(), v7, is_valid, uint8(), options);
-}
-
-template <typename O, typename I>
-std::vector<O> UnsafeVectorCast(const std::vector<I>& v) {
-  size_t n_elems = v.size();
-  std::vector<O> result(n_elems);
-
-  for (size_t i = 0; i < v.size(); i++) result[i] = static_cast<O>(v[i]);
-
-  return std::move(result);
-}
-
-TEST_F(TestCast, IntegerSignedToUnsigned) {
-  CastOptions options;
-  options.allow_int_overflow = false;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  vector<int32_t> v1 = {INT32_MIN, 100, -1, UINT16_MAX, INT32_MAX};
-
-  // Same width
-  CheckFails<Int32Type>(int32(), v1, is_valid, uint32(), options);
-  // Wider
-  CheckFails<Int32Type>(int32(), v1, is_valid, uint64(), options);
-  // Narrower
-  CheckFails<Int32Type>(int32(), v1, is_valid, uint16(), options);
-  // Fail because of overflow (instead of underflow).
-  vector<int32_t> over = {0, -11, 0, UINT16_MAX + 1, INT32_MAX};
-  CheckFails<Int32Type>(int32(), over, is_valid, uint16(), options);
-
-  options.allow_int_overflow = true;
-
-  CheckCase<Int32Type, int32_t, UInt32Type, uint32_t>(
-      int32(), v1, is_valid, uint32(), UnsafeVectorCast<uint32_t, int32_t>(v1), options);
-  CheckCase<Int32Type, int32_t, UInt64Type, uint64_t>(
-      int32(), v1, is_valid, uint64(), UnsafeVectorCast<uint64_t, int32_t>(v1), options);
-  CheckCase<Int32Type, int32_t, UInt16Type, uint16_t>(
-      int32(), v1, is_valid, uint16(), UnsafeVectorCast<uint16_t, int32_t>(v1), options);
-  CheckCase<Int32Type, int32_t, UInt16Type, uint16_t>(
-      int32(), over, is_valid, uint16(), UnsafeVectorCast<uint16_t, int32_t>(over),
-      options);
-}
-
-TEST_F(TestCast, IntegerUnsignedToSigned) {
-  CastOptions options;
-  options.allow_int_overflow = false;
-
-  vector<bool> is_valid = {true, true, true};
-
-  vector<uint32_t> v1 = {0, INT16_MAX + 1, UINT32_MAX};
-  vector<uint32_t> v2 = {0, INT16_MAX + 1, 2};
-  // Same width
-  CheckFails<UInt32Type>(uint32(), v1, is_valid, int32(), options);
-  // Narrower
-  CheckFails<UInt32Type>(uint32(), v1, is_valid, int16(), options);
-  CheckFails<UInt32Type>(uint32(), v2, is_valid, int16(), options);
-
-  options.allow_int_overflow = true;
-
-  CheckCase<UInt32Type, uint32_t, Int32Type, int32_t>(
-      uint32(), v1, is_valid, int32(), UnsafeVectorCast<int32_t, uint32_t>(v1), options);
-  CheckCase<UInt32Type, uint32_t, Int64Type, int64_t>(
-      uint32(), v1, is_valid, int64(), UnsafeVectorCast<int64_t, uint32_t>(v1), options);
-  CheckCase<UInt32Type, uint32_t, Int16Type, int16_t>(
-      uint32(), v1, is_valid, int16(), UnsafeVectorCast<int16_t, uint32_t>(v1), options);
-  CheckCase<UInt32Type, uint32_t, Int16Type, int16_t>(
-      uint32(), v2, is_valid, int16(), UnsafeVectorCast<int16_t, uint32_t>(v2), options);
-}
-
-TEST_F(TestCast, ToIntDowncastUnsafe) {
-  CastOptions options;
-  options.allow_int_overflow = true;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  // int16 to uint8, no overflow/underrun
-  vector<int16_t> v1 = {0, 100, 200, 1, 2};
-  vector<uint8_t> e1 = {0, 100, 200, 1, 2};
-  CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v1, is_valid, uint8(), e1,
-                                                    options);
-
-  // int16 to uint8, with overflow
-  vector<int16_t> v2 = {0, 100, 256, 0, 0};
-  vector<uint8_t> e2 = {0, 100, 0, 0, 0};
-  CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v2, is_valid, uint8(), e2,
-                                                    options);
-
-  // underflow
-  vector<int16_t> v3 = {0, 100, -1, 0, 0};
-  vector<uint8_t> e3 = {0, 100, 255, 0, 0};
-  CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v3, is_valid, uint8(), e3,
-                                                    options);
-
-  // int32 to int16, no overflow
-  vector<int32_t> v4 = {0, 1000, 2000, 1, 2};
-  vector<int16_t> e4 = {0, 1000, 2000, 1, 2};
-  CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v4, is_valid, int16(), e4,
-                                                    options);
-
-  // int32 to int16, overflow
-  // TODO(wesm): do we want to allow this? we could set to null
-  vector<int32_t> v5 = {0, 1000, 2000, 70000, 0};
-  vector<int16_t> e5 = {0, 1000, 2000, 4464, 0};
-  CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v5, is_valid, int16(), e5,
-                                                    options);
-
-  // underflow
-  // TODO(wesm): do we want to allow this? we could set overflow to null
-  vector<int32_t> v6 = {0, 1000, 2000, -70000, 0};
-  vector<int16_t> e6 = {0, 1000, 2000, -4464, 0};
-  CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v6, is_valid, int16(), e6,
-                                                    options);
-}
-
-TEST_F(TestCast, FloatingPointToInt) {
-  // which means allow_float_truncate == false
-  auto options = CastOptions::Safe();
-
-  vector<bool> is_valid = {true, false, true, true, true};
-  vector<bool> all_valid = {true, true, true, true, true};
-
-  // float32 to int32 no truncation
-  vector<float> v1 = {1.0, 0, 0.0, -1.0, 5.0};
-  vector<int32_t> e1 = {1, 0, 0, -1, 5};
-  CheckCase<FloatType, float, Int32Type, int32_t>(float32(), v1, is_valid, int32(), e1,
-                                                  options);
-  CheckCase<FloatType, float, Int32Type, int32_t>(float32(), v1, all_valid, int32(), e1,
-                                                  options);
-
-  // float64 to int32 no truncation
-  vector<double> v2 = {1.0, 0, 0.0, -1.0, 5.0};
-  vector<int32_t> e2 = {1, 0, 0, -1, 5};
-  CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v2, is_valid, int32(), e2,
-                                                    options);
-  CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v2, all_valid, int32(), e2,
-                                                    options);
-
-  // float64 to int64 no truncation
-  vector<double> v3 = {1.0, 0, 0.0, -1.0, 5.0};
-  vector<int64_t> e3 = {1, 0, 0, -1, 5};
-  CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v3, is_valid, int64(), e3,
-                                                    options);
-  CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v3, all_valid, int64(), e3,
-                                                    options);
-
-  // float64 to int32 truncate
-  vector<double> v4 = {1.5, 0, 0.5, -1.5, 5.5};
-  vector<int32_t> e4 = {1, 0, 0, -1, 5};
-
-  options.allow_float_truncate = false;
-  CheckFails<DoubleType>(float64(), v4, is_valid, int32(), options);
-  CheckFails<DoubleType>(float64(), v4, all_valid, int32(), options);
-
-  options.allow_float_truncate = true;
-  CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v4, is_valid, int32(), e4,
-                                                    options);
-  CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v4, all_valid, int32(), e4,
-                                                    options);
-
-  // float64 to int64 truncate
-  vector<double> v5 = {1.5, 0, 0.5, -1.5, 5.5};
-  vector<int64_t> e5 = {1, 0, 0, -1, 5};
-
-  options.allow_float_truncate = false;
-  CheckFails<DoubleType>(float64(), v5, is_valid, int64(), options);
-  CheckFails<DoubleType>(float64(), v5, all_valid, int64(), options);
-
-  options.allow_float_truncate = true;
-  CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v5, is_valid, int64(), e5,
-                                                    options);
-  CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v5, all_valid, int64(), e5,
-                                                    options);
-}
-
-TEST_F(TestCast, IntToFloatingPoint) {
-  auto options = CastOptions::Safe();
-
-  vector<bool> all_valid = {true, true, true, true, true};
-  vector<bool> all_invalid = {false, false, false, false, false};
-
-  vector<int64_t> v1 = {INT64_MIN, INT64_MIN + 1, 0, INT64_MAX - 1, INT64_MAX};
-  CheckFails<Int64Type>(int64(), v1, all_valid, float32(), options);
-
-  // While it's not safe to convert, all values are null.
-  CheckCase<Int64Type, int64_t, DoubleType, double>(int64(), v1, all_invalid, float64(),
-                                                    UnsafeVectorCast<double, int64_t>(v1),
-                                                    options);
-}
-
-TEST_F(TestCast, TimestampToTimestamp) {
-  CastOptions options;
-
-  auto CheckTimestampCast =
-      [this](const CastOptions& options, TimeUnit::type from_unit, TimeUnit::type to_unit,
-             const vector<int64_t>& from_values, const vector<int64_t>& to_values,
-             const vector<bool>& is_valid) {
-        CheckCase<TimestampType, int64_t, TimestampType, int64_t>(
-            timestamp(from_unit), from_values, is_valid, timestamp(to_unit), to_values,
-            options);
-      };
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  // Multiply promotions
-  vector<int64_t> v1 = {0, 100, 200, 1, 2};
-  vector<int64_t> e1 = {0, 100000, 200000, 1000, 2000};
-  CheckTimestampCast(options, TimeUnit::SECOND, TimeUnit::MILLI, v1, e1, is_valid);
-
-  vector<int64_t> v2 = {0, 100, 200, 1, 2};
-  vector<int64_t> e2 = {0, 100000000L, 200000000L, 1000000, 2000000};
-  CheckTimestampCast(options, TimeUnit::SECOND, TimeUnit::MICRO, v2, e2, is_valid);
-
-  vector<int64_t> v3 = {0, 100, 200, 1, 2};
-  vector<int64_t> e3 = {0, 100000000000L, 200000000000L, 1000000000L, 2000000000L};
-  CheckTimestampCast(options, TimeUnit::SECOND, TimeUnit::NANO, v3, e3, is_valid);
-
-  vector<int64_t> v4 = {0, 100, 200, 1, 2};
-  vector<int64_t> e4 = {0, 100000, 200000, 1000, 2000};
-  CheckTimestampCast(options, TimeUnit::MILLI, TimeUnit::MICRO, v4, e4, is_valid);
-
-  vector<int64_t> v5 = {0, 100, 200, 1, 2};
-  vector<int64_t> e5 = {0, 100000000L, 200000000L, 1000000, 2000000};
-  CheckTimestampCast(options, TimeUnit::MILLI, TimeUnit::NANO, v5, e5, is_valid);
-
-  vector<int64_t> v6 = {0, 100, 200, 1, 2};
-  vector<int64_t> e6 = {0, 100000, 200000, 1000, 2000};
-  CheckTimestampCast(options, TimeUnit::MICRO, TimeUnit::NANO, v6, e6, is_valid);
-
-  // Zero copy
-  vector<int64_t> v7 = {0, 70000, 2000, 1000, 0};
-  shared_ptr<Array> arr;
-  ArrayFromVector<TimestampType, int64_t>(timestamp(TimeUnit::SECOND), is_valid, v7,
-                                          &arr);
-  CheckZeroCopy(*arr, timestamp(TimeUnit::SECOND));
-
-  // ARROW-1773, cast to integer
-  CheckZeroCopy(*arr, int64());
-
-  // Divide, truncate
-  vector<int64_t> v8 = {0, 100123, 200456, 1123, 2456};
-  vector<int64_t> e8 = {0, 100, 200, 1, 2};
-
-  options.allow_time_truncate = true;
-  CheckTimestampCast(options, TimeUnit::MILLI, TimeUnit::SECOND, v8, e8, is_valid);
-  CheckTimestampCast(options, TimeUnit::MICRO, TimeUnit::MILLI, v8, e8, is_valid);
-  CheckTimestampCast(options, TimeUnit::NANO, TimeUnit::MICRO, v8, e8, is_valid);
-
-  vector<int64_t> v9 = {0, 100123000, 200456000, 1123000, 2456000};
-  vector<int64_t> e9 = {0, 100, 200, 1, 2};
-  CheckTimestampCast(options, TimeUnit::MICRO, TimeUnit::SECOND, v9, e9, is_valid);
-  CheckTimestampCast(options, TimeUnit::NANO, TimeUnit::MILLI, v9, e9, is_valid);
-
-  vector<int64_t> v10 = {0, 100123000000L, 200456000000L, 1123000000L, 2456000000};
-  vector<int64_t> e10 = {0, 100, 200, 1, 2};
-  CheckTimestampCast(options, TimeUnit::NANO, TimeUnit::SECOND, v10, e10, is_valid);
-
-  // Disallow truncate, failures
-  options.allow_time_truncate = false;
-  CheckFails<TimestampType>(timestamp(TimeUnit::MILLI), v8, is_valid,
-                            timestamp(TimeUnit::SECOND), options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v8, is_valid,
-                            timestamp(TimeUnit::MILLI), options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v8, is_valid,
-                            timestamp(TimeUnit::MICRO), options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v9, is_valid,
-                            timestamp(TimeUnit::SECOND), options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v9, is_valid,
-                            timestamp(TimeUnit::MILLI), options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v10, is_valid,
-                            timestamp(TimeUnit::SECOND), options);
-}
-
-TEST_F(TestCast, TimestampToDate32_Date64) {
-  CastOptions options;
-
-  vector<bool> is_valid = {true, true, false};
-
-  // 2000-01-01, 2000-01-02, null
-  vector<int64_t> v_nano = {946684800000000000, 946771200000000000, 0};
-  vector<int64_t> v_micro = {946684800000000, 946771200000000, 0};
-  vector<int64_t> v_milli = {946684800000, 946771200000, 0};
-  vector<int64_t> v_second = {946684800, 946771200, 0};
-  vector<int32_t> v_day = {10957, 10958, 0};
-
-  // Simple conversions
-  CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
-      timestamp(TimeUnit::NANO), v_nano, is_valid, date64(), v_milli, options);
-  CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
-      timestamp(TimeUnit::MICRO), v_micro, is_valid, date64(), v_milli, options);
-  CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
-      timestamp(TimeUnit::MILLI), v_milli, is_valid, date64(), v_milli, options);
-  CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
-      timestamp(TimeUnit::SECOND), v_second, is_valid, date64(), v_milli, options);
-
-  CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
-      timestamp(TimeUnit::NANO), v_nano, is_valid, date32(), v_day, options);
-  CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
-      timestamp(TimeUnit::MICRO), v_micro, is_valid, date32(), v_day, options);
-  CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
-      timestamp(TimeUnit::MILLI), v_milli, is_valid, date32(), v_day, options);
-  CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
-      timestamp(TimeUnit::SECOND), v_second, is_valid, date32(), v_day, options);
-
-  // Disallow truncate, failures
-  vector<int64_t> v_nano_fail = {946684800000000001, 946771200000000001, 0};
-  vector<int64_t> v_micro_fail = {946684800000001, 946771200000001, 0};
-  vector<int64_t> v_milli_fail = {946684800001, 946771200001, 0};
-  vector<int64_t> v_second_fail = {946684801, 946771201, 0};
-
-  options.allow_time_truncate = false;
-  CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v_nano_fail, is_valid, date64(),
-                            options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v_micro_fail, is_valid, date64(),
-                            options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::MILLI), v_milli_fail, is_valid, date64(),
-                            options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::SECOND), v_second_fail, is_valid,
-                            date64(), options);
-
-  CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v_nano_fail, is_valid, date32(),
-                            options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v_micro_fail, is_valid, date32(),
-                            options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::MILLI), v_milli_fail, is_valid, date32(),
-                            options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::SECOND), v_second_fail, is_valid,
-                            date32(), options);
-
-  // Make sure that nulls are excluded from the truncation checks
-  vector<int64_t> v_second_nofail = {946684800, 946771200, 1};
-  CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
-      timestamp(TimeUnit::SECOND), v_second_nofail, is_valid, date64(), v_milli, options);
-  CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
-      timestamp(TimeUnit::SECOND), v_second_nofail, is_valid, date32(), v_day, options);
-}
-
-TEST_F(TestCast, TimeToCompatible) {
-  CastOptions options;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  // Multiply promotions
-  vector<int32_t> v1 = {0, 100, 200, 1, 2};
-  vector<int32_t> e1 = {0, 100000, 200000, 1000, 2000};
-  CheckCase<Time32Type, int32_t, Time32Type, int32_t>(
-      time32(TimeUnit::SECOND), v1, is_valid, time32(TimeUnit::MILLI), e1, options);
-
-  vector<int32_t> v2 = {0, 100, 200, 1, 2};
-  vector<int64_t> e2 = {0, 100000000L, 200000000L, 1000000, 2000000};
-  CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
-      time32(TimeUnit::SECOND), v2, is_valid, time64(TimeUnit::MICRO), e2, options);
-
-  vector<int32_t> v3 = {0, 100, 200, 1, 2};
-  vector<int64_t> e3 = {0, 100000000000L, 200000000000L, 1000000000L, 2000000000L};
-  CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
-      time32(TimeUnit::SECOND), v3, is_valid, time64(TimeUnit::NANO), e3, options);
-
-  vector<int32_t> v4 = {0, 100, 200, 1, 2};
-  vector<int64_t> e4 = {0, 100000, 200000, 1000, 2000};
-  CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
-      time32(TimeUnit::MILLI), v4, is_valid, time64(TimeUnit::MICRO), e4, options);
-
-  vector<int32_t> v5 = {0, 100, 200, 1, 2};
-  vector<int64_t> e5 = {0, 100000000L, 200000000L, 1000000, 2000000};
-  CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
-      time32(TimeUnit::MILLI), v5, is_valid, time64(TimeUnit::NANO), e5, options);
-
-  vector<int64_t> v6 = {0, 100, 200, 1, 2};
-  vector<int64_t> e6 = {0, 100000, 200000, 1000, 2000};
-  CheckCase<Time64Type, int64_t, Time64Type, int64_t>(
-      time64(TimeUnit::MICRO), v6, is_valid, time64(TimeUnit::NANO), e6, options);
-
-  // Zero copy
-  vector<int64_t> v7 = {0, 70000, 2000, 1000, 0};
-  shared_ptr<Array> arr;
-  ArrayFromVector<Time64Type, int64_t>(time64(TimeUnit::MICRO), is_valid, v7, &arr);
-  CheckZeroCopy(*arr, time64(TimeUnit::MICRO));
-
-  // ARROW-1773: cast to int64
-  CheckZeroCopy(*arr, int64());
-
-  vector<int32_t> v7_2 = {0, 70000, 2000, 1000, 0};
-  ArrayFromVector<Time32Type, int32_t>(time32(TimeUnit::SECOND), is_valid, v7_2, &arr);
-  CheckZeroCopy(*arr, time32(TimeUnit::SECOND));
-
-  // ARROW-1773: cast to int64
-  CheckZeroCopy(*arr, int32());
-
-  // Divide, truncate
-  vector<int32_t> v8 = {0, 100123, 200456, 1123, 2456};
-  vector<int32_t> e8 = {0, 100, 200, 1, 2};
-
-  options.allow_time_truncate = true;
-  CheckCase<Time32Type, int32_t, Time32Type, int32_t>(
-      time32(TimeUnit::MILLI), v8, is_valid, time32(TimeUnit::SECOND), e8, options);
-  CheckCase<Time64Type, int32_t, Time32Type, int32_t>(
-      time64(TimeUnit::MICRO), v8, is_valid, time32(TimeUnit::MILLI), e8, options);
-  CheckCase<Time64Type, int32_t, Time64Type, int32_t>(
-      time64(TimeUnit::NANO), v8, is_valid, time64(TimeUnit::MICRO), e8, options);
-
-  vector<int64_t> v9 = {0, 100123000, 200456000, 1123000, 2456000};
-  vector<int32_t> e9 = {0, 100, 200, 1, 2};
-  CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
-      time64(TimeUnit::MICRO), v9, is_valid, time32(TimeUnit::SECOND), e9, options);
-  CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
-      time64(TimeUnit::NANO), v9, is_valid, time32(TimeUnit::MILLI), e9, options);
-
-  vector<int64_t> v10 = {0, 100123000000L, 200456000000L, 1123000000L, 2456000000};
-  vector<int32_t> e10 = {0, 100, 200, 1, 2};
-  CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
-      time64(TimeUnit::NANO), v10, is_valid, time32(TimeUnit::SECOND), e10, options);
-
-  // Disallow truncate, failures
-
-  options.allow_time_truncate = false;
-  CheckFails<Time32Type>(time32(TimeUnit::MILLI), v8, is_valid, time32(TimeUnit::SECOND),
-                         options);
-  CheckFails<Time64Type>(time64(TimeUnit::MICRO), v8, is_valid, time32(TimeUnit::MILLI),
-                         options);
-  CheckFails<Time64Type>(time64(TimeUnit::NANO), v8, is_valid, time64(TimeUnit::MICRO),
-                         options);
-  CheckFails<Time64Type>(time64(TimeUnit::MICRO), v9, is_valid, time32(TimeUnit::SECOND),
-                         options);
-  CheckFails<Time64Type>(time64(TimeUnit::NANO), v9, is_valid, time32(TimeUnit::MILLI),
-                         options);
-  CheckFails<Time64Type>(time64(TimeUnit::NANO), v10, is_valid, time32(TimeUnit::SECOND),
-                         options);
-}
-
-TEST_F(TestCast, PrimitiveZeroCopy) {
-  shared_ptr<Array> arr;
-
-  ArrayFromVector<UInt8Type, uint8_t>(uint8(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, uint8());
-  ArrayFromVector<Int8Type, int8_t>(int8(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, int8());
-
-  ArrayFromVector<UInt16Type, uint16_t>(uint16(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, uint16());
-  ArrayFromVector<Int16Type, int8_t>(int16(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, int16());
-
-  ArrayFromVector<UInt32Type, uint32_t>(uint32(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, uint32());
-  ArrayFromVector<Int32Type, int8_t>(int32(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, int32());
-
-  ArrayFromVector<UInt64Type, uint64_t>(uint64(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, uint64());
-  ArrayFromVector<Int64Type, int8_t>(int64(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, int64());
-
-  ArrayFromVector<FloatType, float>(float32(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, float32());
-
-  ArrayFromVector<DoubleType, double>(float64(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, float64());
-}
-
-TEST_F(TestCast, DateToCompatible) {
-  CastOptions options;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  constexpr int64_t F = 86400000;
-
-  // Multiply promotion
-  vector<int32_t> v1 = {0, 100, 200, 1, 2};
-  vector<int64_t> e1 = {0, 100 * F, 200 * F, F, 2 * F};
-  CheckCase<Date32Type, int32_t, Date64Type, int64_t>(date32(), v1, is_valid, date64(),
-                                                      e1, options);
-
-  // Zero copy
-  vector<int32_t> v2 = {0, 70000, 2000, 1000, 0};
-  vector<int64_t> v3 = {0, 70000, 2000, 1000, 0};
-  shared_ptr<Array> arr;
-  ArrayFromVector<Date32Type, int32_t>(date32(), is_valid, v2, &arr);
-  CheckZeroCopy(*arr, date32());
-
-  // ARROW-1773: zero copy cast to integer
-  CheckZeroCopy(*arr, int32());
-
-  ArrayFromVector<Date64Type, int64_t>(date64(), is_valid, v3, &arr);
-  CheckZeroCopy(*arr, date64());
-
-  // ARROW-1773: zero copy cast to integer
-  CheckZeroCopy(*arr, int64());
-
-  // Divide, truncate
-  vector<int64_t> v8 = {0, 100 * F + 123, 200 * F + 456, F + 123, 2 * F + 456};
-  vector<int32_t> e8 = {0, 100, 200, 1, 2};
-
-  options.allow_time_truncate = true;
-  CheckCase<Date64Type, int64_t, Date32Type, int32_t>(date64(), v8, is_valid, date32(),
-                                                      e8, options);
-
-  // Disallow truncate, failures
-  options.allow_time_truncate = false;
-  CheckFails<Date64Type>(date64(), v8, is_valid, date32(), options);
-}
-
-TEST_F(TestCast, ToDouble) {
-  CastOptions options;
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  // int16 to double
-  vector<int16_t> v1 = {0, 100, 200, 1, 2};
-  vector<double> e1 = {0, 100, 200, 1, 2};
-  CheckCase<Int16Type, int16_t, DoubleType, double>(int16(), v1, is_valid, float64(), e1,
-                                                    options);
-
-  // float to double
-  vector<float> v2 = {0, 100, 200, 1, 2};
-  vector<double> e2 = {0, 100, 200, 1, 2};
-  CheckCase<FloatType, float, DoubleType, double>(float32(), v2, is_valid, float64(), e2,
-                                                  options);
-
-  // bool to double
-  vector<bool> v3 = {true, true, false, false, true};
-  vector<double> e3 = {1, 1, 0, 0, 1};
-  CheckCase<BooleanType, bool, DoubleType, double>(boolean(), v3, is_valid, float64(), e3,
-                                                   options);
-}
-
-TEST_F(TestCast, ChunkedArray) {
-  vector<int16_t> values1 = {0, 1, 2};
-  vector<int16_t> values2 = {3, 4, 5};
-
-  auto type = int16();
-  auto out_type = int64();
-
-  auto a1 = _MakeArray<Int16Type, int16_t>(type, values1, {});
-  auto a2 = _MakeArray<Int16Type, int16_t>(type, values2, {});
-
-  ArrayVector arrays = {a1, a2};
-  auto carr = std::make_shared<ChunkedArray>(arrays);
-
-  CastOptions options;
-
-  Datum out;
-  ASSERT_OK(Cast(&this->ctx_, Datum(carr), out_type, options, &out));
-  ASSERT_EQ(Datum::CHUNKED_ARRAY, out.kind());
-
-  auto out_carr = out.chunked_array();
-
-  vector<int64_t> ex_values1 = {0, 1, 2};
-  vector<int64_t> ex_values2 = {3, 4, 5};
-  auto a3 = _MakeArray<Int64Type, int64_t>(out_type, ex_values1, {});
-  auto a4 = _MakeArray<Int64Type, int64_t>(out_type, ex_values2, {});
-
-  ArrayVector ex_arrays = {a3, a4};
-  auto ex_carr = std::make_shared<ChunkedArray>(ex_arrays);
-
-  ASSERT_TRUE(out.chunked_array()->Equals(*ex_carr));
-}
-
-TEST_F(TestCast, UnsupportedTarget) {
-  vector<bool> is_valid = {true, false, true, true, true};
-  vector<int32_t> v1 = {0, 1, 2, 3, 4};
-
-  shared_ptr<Array> arr;
-  ArrayFromVector<Int32Type, int32_t>(int32(), is_valid, v1, &arr);
-
-  shared_ptr<Array> result;
-  ASSERT_RAISES(NotImplemented, Cast(&this->ctx_, *arr, utf8(), {}, &result));
-}
-
-TEST_F(TestCast, DateTimeZeroCopy) {
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  vector<int32_t> v1 = {0, 70000, 2000, 1000, 0};
-  shared_ptr<Array> arr;
-  ArrayFromVector<Int32Type, int32_t>(int32(), is_valid, v1, &arr);
-
-  CheckZeroCopy(*arr, time32(TimeUnit::SECOND));
-  CheckZeroCopy(*arr, date32());
-
-  vector<int64_t> v2 = {0, 70000, 2000, 1000, 0};
-  ArrayFromVector<Int64Type, int64_t>(int64(), is_valid, v2, &arr);
-
-  CheckZeroCopy(*arr, time64(TimeUnit::MICRO));
-  CheckZeroCopy(*arr, date64());
-  CheckZeroCopy(*arr, timestamp(TimeUnit::NANO));
-}
-
-TEST_F(TestCast, FromNull) {
-  // Null casts to everything
-  const int length = 10;
-
-  NullArray arr(length);
-
-  shared_ptr<Array> result;
-  ASSERT_OK(Cast(&ctx_, arr, int32(), {}, &result));
-
-  ASSERT_EQ(length, result->length());
-  ASSERT_EQ(length, result->null_count());
-
-  // OK to look at bitmaps
-  ASSERT_ARRAYS_EQUAL(*result, *result);
-}
-
-TEST_F(TestCast, PreallocatedMemory) {
-  CastOptions options;
-  options.allow_int_overflow = false;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  const int64_t length = 5;
-
-  shared_ptr<Array> arr;
-  vector<int32_t> v1 = {0, 70000, 2000, 1000, 0};
-  vector<int64_t> e1 = {0, 70000, 2000, 1000, 0};
-  ArrayFromVector<Int32Type, int32_t>(int32(), is_valid, v1, &arr);
-
-  auto out_type = int64();
-
-  std::unique_ptr<UnaryKernel> kernel;
-  ASSERT_OK(GetCastFunction(*int32(), out_type, options, &kernel));
-
-  auto out_data = ArrayData::Make(out_type, length);
-
-  shared_ptr<Buffer> out_values;
-  ASSERT_OK(this->ctx_.Allocate(length * sizeof(int64_t), &out_values));
-
-  out_data->buffers.push_back(nullptr);
-  out_data->buffers.push_back(out_values);
-
-  Datum out(out_data);
-  ASSERT_OK(kernel->Call(&this->ctx_, Datum(arr), &out));
-
-  // Buffer address unchanged
-  ASSERT_EQ(out_values.get(), out_data->buffers[1].get());
-
-  shared_ptr<Array> result = MakeArray(out_data);
-  shared_ptr<Array> expected;
-  ArrayFromVector<Int64Type, int64_t>(int64(), is_valid, e1, &expected);
-
-  ASSERT_ARRAYS_EQUAL(*expected, *result);
-}
-
-template <typename InType, typename InT, typename OutType, typename OutT>
-void CheckOffsetOutputCase(FunctionContext* ctx, const std::shared_ptr<DataType>& in_type,
-                           const vector<InT>& in_values,
-                           const std::shared_ptr<DataType>& out_type,
-                           const vector<OutT>& out_values) {
-  using OutTraits = TypeTraits<OutType>;
-
-  CastOptions options;
-
-  const int64_t length = static_cast<int64_t>(in_values.size());
-
-  shared_ptr<Array> arr, expected;
-  ArrayFromVector<InType, InT>(in_type, in_values, &arr);
-  ArrayFromVector<OutType, OutT>(out_type, out_values, &expected);
-
-  shared_ptr<Buffer> out_buffer;
-  ASSERT_OK(ctx->Allocate(OutTraits::bytes_required(length), &out_buffer));
-
-  std::unique_ptr<UnaryKernel> kernel;
-  ASSERT_OK(GetCastFunction(*in_type, out_type, options, &kernel));
-
-  const int64_t first_half = length / 2;
-
-  auto out_data = ArrayData::Make(out_type, length, {nullptr, out_buffer});
-  auto out_second_data = out_data->Copy();
-  out_second_data->offset = first_half;
-
-  Datum out_first(out_data);
-  Datum out_second(out_second_data);
-
-  // Cast each bit
-  ASSERT_OK(kernel->Call(ctx, Datum(arr->Slice(0, first_half)), &out_first));
-  ASSERT_OK(kernel->Call(ctx, Datum(arr->Slice(first_half)), &out_second));
-
-  shared_ptr<Array> result = MakeArray(out_data);
-
-  ASSERT_ARRAYS_EQUAL(*expected, *result);
-}
-
-TEST_F(TestCast, OffsetOutputBuffer) {
-  // ARROW-1735
-  vector<int32_t> v1 = {0, 10000, 2000, 1000, 0};
-  vector<int64_t> e1 = {0, 10000, 2000, 1000, 0};
-
-  auto in_type = int32();
-  auto out_type = int64();
-  CheckOffsetOutputCase<Int32Type, int32_t, Int64Type, int64_t>(&this->ctx_, in_type, v1,
-                                                                out_type, e1);
-
-  vector<bool> e2 = {false, true, true, true, false};
-
-  out_type = boolean();
-  CheckOffsetOutputCase<Int32Type, int32_t, BooleanType, bool>(&this->ctx_, in_type, v1,
-                                                               boolean(), e2);
-
-  vector<int16_t> e3 = {0, 10000, 2000, 1000, 0};
-  CheckOffsetOutputCase<Int32Type, int32_t, Int16Type, int16_t>(&this->ctx_, in_type, v1,
-                                                                int16(), e3);
-}
-
-TEST_F(TestCast, StringToBoolean) {
-  CastOptions options;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  vector<std::string> v1 = {"False", "true", "true", "True", "false"};
-  vector<std::string> v2 = {"0", "1", "1", "1", "0"};
-  vector<bool> e = {false, true, true, true, false};
-  CheckCase<StringType, std::string, BooleanType, bool>(utf8(), v1, is_valid, boolean(),
-                                                        e, options);
-  CheckCase<StringType, std::string, BooleanType, bool>(utf8(), v2, is_valid, boolean(),
-                                                        e, options);
-}
-
-TEST_F(TestCast, StringToBooleanErrors) {
-  CastOptions options;
-
-  vector<bool> is_valid = {true};
-
-  CheckFails<StringType, std::string>(utf8(), {"false "}, is_valid, boolean(), options);
-  CheckFails<StringType, std::string>(utf8(), {"T"}, is_valid, boolean(), options);
-}
-
-TEST_F(TestCast, StringToNumber) {
-  CastOptions options;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  // string to int
-  vector<std::string> v_int = {"0", "1", "127", "-1", "0"};
-  vector<int8_t> e_int8 = {0, 1, 127, -1, 0};
-  vector<int16_t> e_int16 = {0, 1, 127, -1, 0};
-  vector<int32_t> e_int32 = {0, 1, 127, -1, 0};
-  vector<int64_t> e_int64 = {0, 1, 127, -1, 0};
-  CheckCase<StringType, std::string, Int8Type, int8_t>(utf8(), v_int, is_valid, int8(),
-                                                       e_int8, options);
-  CheckCase<StringType, std::string, Int16Type, int16_t>(utf8(), v_int, is_valid, int16(),
-                                                         e_int16, options);
-  CheckCase<StringType, std::string, Int32Type, int32_t>(utf8(), v_int, is_valid, int32(),
-                                                         e_int32, options);
-  CheckCase<StringType, std::string, Int64Type, int64_t>(utf8(), v_int, is_valid, int64(),
-                                                         e_int64, options);
-
-  v_int = {"2147483647", "0", "-2147483648", "0", "0"};
-  e_int32 = {2147483647, 0, -2147483648LL, 0, 0};
-  CheckCase<StringType, std::string, Int32Type, int32_t>(utf8(), v_int, is_valid, int32(),
-                                                         e_int32, options);
-  v_int = {"9223372036854775807", "0", "-9223372036854775808", "0", "0"};
-  e_int64 = {9223372036854775807LL, 0, (-9223372036854775807LL - 1), 0, 0};
-  CheckCase<StringType, std::string, Int64Type, int64_t>(utf8(), v_int, is_valid, int64(),
-                                                         e_int64, options);
-
-  // string to uint
-  vector<std::string> v_uint = {"0", "1", "127", "255", "0"};
-  vector<uint8_t> e_uint8 = {0, 1, 127, 255, 0};
-  vector<uint16_t> e_uint16 = {0, 1, 127, 255, 0};
-  vector<uint32_t> e_uint32 = {0, 1, 127, 255, 0};
-  vector<uint64_t> e_uint64 = {0, 1, 127, 255, 0};
-  CheckCase<StringType, std::string, UInt8Type, uint8_t>(utf8(), v_uint, is_valid,
-                                                         uint8(), e_uint8, options);
-  CheckCase<StringType, std::string, UInt16Type, uint16_t>(utf8(), v_uint, is_valid,
-                                                           uint16(), e_uint16, options);
-  CheckCase<StringType, std::string, UInt32Type, uint32_t>(utf8(), v_uint, is_valid,
-                                                           uint32(), e_uint32, options);
-  CheckCase<StringType, std::string, UInt64Type, uint64_t>(utf8(), v_uint, is_valid,
-                                                           uint64(), e_uint64, options);
-
-  v_uint = {"4294967295", "0", "0", "0", "0"};
-  e_uint32 = {4294967295, 0, 0, 0, 0};
-  CheckCase<StringType, std::string, UInt32Type, uint32_t>(utf8(), v_uint, is_valid,
-                                                           uint32(), e_uint32, options);
-  v_uint = {"18446744073709551615", "0", "0", "0", "0"};
-  e_uint64 = {18446744073709551615ULL, 0, 0, 0, 0};
-  CheckCase<StringType, std::string, UInt64Type, uint64_t>(utf8(), v_uint, is_valid,
-                                                           uint64(), e_uint64, options);
-
-  // string to float
-  vector<std::string> v_float = {"0.1", "1.2", "127.3", "200.4", "0.5"};
-  vector<float> e_float = {0.1f, 1.2f, 127.3f, 200.4f, 0.5f};
-  vector<double> e_double = {0.1, 1.2, 127.3, 200.4, 0.5};
-  CheckCase<StringType, std::string, FloatType, float>(utf8(), v_float, is_valid,
-                                                       float32(), e_float, options);
-  CheckCase<StringType, std::string, DoubleType, double>(utf8(), v_float, is_valid,
-                                                         float64(), e_double, options);
-
-  // Test that casting is locale-independent
-  auto global_locale = std::locale();
-  try {
-    // French locale uses the comma as decimal point
-    std::locale::global(std::locale("fr_FR.UTF-8"));
-  } catch (std::runtime_error&) {
-    // Locale unavailable, ignore
-  }
-  CheckCase<StringType, std::string, FloatType, float>(utf8(), v_float, is_valid,
-                                                       float32(), e_float, options);
-  CheckCase<StringType, std::string, DoubleType, double>(utf8(), v_float, is_valid,
-                                                         float64(), e_double, options);
-  std::locale::global(global_locale);
-}
-
-TEST_F(TestCast, StringToNumberErrors) {
-  CastOptions options;
-
-  vector<bool> is_valid = {true};
-
-  CheckFails<StringType, std::string>(utf8(), {"z"}, is_valid, int8(), options);
-  CheckFails<StringType, std::string>(utf8(), {"12 z"}, is_valid, int8(), options);
-  CheckFails<StringType, std::string>(utf8(), {"128"}, is_valid, int8(), options);
-  CheckFails<StringType, std::string>(utf8(), {"-129"}, is_valid, int8(), options);
-  CheckFails<StringType, std::string>(utf8(), {"0.5"}, is_valid, int8(), options);
-
-  CheckFails<StringType, std::string>(utf8(), {"256"}, is_valid, uint8(), options);
-  CheckFails<StringType, std::string>(utf8(), {"-1"}, is_valid, uint8(), options);
-
-  CheckFails<StringType, std::string>(utf8(), {"z"}, is_valid, float32(), options);
-}
-
-template <typename TestType>
-class TestDictionaryCast : public TestCast {};
-
-typedef ::testing::Types<NullType, UInt8Type, Int8Type, UInt16Type, Int16Type, Int32Type,
-                         UInt32Type, UInt64Type, Int64Type, FloatType, DoubleType,
-                         Date32Type, Date64Type, FixedSizeBinaryType, BinaryType>
-    TestTypes;
-
-TYPED_TEST_CASE(TestDictionaryCast, TestTypes);
-
-TYPED_TEST(TestDictionaryCast, Basic) {
-  CastOptions options;
-  shared_ptr<Array> plain_array =
-      TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 2);
-
-  Datum out;
-  ASSERT_OK(DictionaryEncode(&this->ctx_, Datum(plain_array->data()), &out));
-
-  this->CheckPass(*MakeArray(out.array()), *plain_array, plain_array->type(), options);
-}
-
-TEST_F(TestCast, DictToNonDictNoNulls) {
-  vector<std::string> dict_values = {"foo", "bar", "baz"};
-  auto ex_dict = _MakeArray<StringType, std::string>(utf8(), dict_values, {});
-  auto dict_type = dictionary(int32(), ex_dict);
-
-  // Explicitly construct with nullptr for the null_bitmap_data
-  std::vector<int32_t> i1 = {1, 0, 1};
-  std::vector<int32_t> i2 = {2, 1, 0, 1};
-  auto c1 = std::make_shared<NumericArray<Int32Type>>(3, Buffer::Wrap(i1));
-  auto c2 = std::make_shared<NumericArray<Int32Type>>(4, Buffer::Wrap(i2));
-
-  ArrayVector dict_arrays = {std::make_shared<DictionaryArray>(dict_type, c1),
-                             std::make_shared<DictionaryArray>(dict_type, c2)};
-  auto dict_carr = std::make_shared<ChunkedArray>(dict_arrays);
-
-  Datum cast_input(dict_carr);
-  Datum cast_output;
-  // Ensure that casting works even when the null_bitmap_data array is a nullptr
-  ASSERT_OK(Cast(&this->ctx_, cast_input,
-                 static_cast<DictionaryType&>(*dict_type).dictionary()->type(),
-                 CastOptions(), &cast_output));
-  ASSERT_EQ(Datum::CHUNKED_ARRAY, cast_output.kind());
-
-  auto e1 = _MakeArray<StringType, std::string>(utf8(), {"bar", "foo", "bar"}, {});
-  auto e2 = _MakeArray<StringType, std::string>(utf8(), {"baz", "bar", "foo", "bar"}, {});
-
-  auto chunks = cast_output.chunked_array()->chunks();
-  ASSERT_EQ(chunks.size(), 2);
-  ASSERT_ARRAYS_EQUAL(*e1, *chunks[0]);
-  ASSERT_ARRAYS_EQUAL(*e2, *chunks[1]);
-}
-
-/*TYPED_TEST(TestDictionaryCast, Reverse) {
-  CastOptions options;
-  shared_ptr<Array> plain_array =
-      TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 2);
-
-  shared_ptr<Array> dict_array;
-  ASSERT_OK(EncodeArrayToDictionary(*plain_array, this->pool_, &dict_array));
-
-  this->CheckPass(*plain_array, *dict_array, dict_array->type(), options);
-}*/
-
-TEST_F(TestCast, ListToList) {
-  CastOptions options;
-  std::shared_ptr<Array> offsets;
-
-  vector<int32_t> offsets_values = {0, 1, 2, 5, 7, 7, 8, 10};
-  std::vector<bool> offsets_is_valid = {true, true, true, true, false, true, true, true};
-  ArrayFromVector<Int32Type, int32_t>(offsets_is_valid, offsets_values, &offsets);
-
-  shared_ptr<Array> int32_plain_array =
-      TestBase::MakeRandomArray<typename TypeTraits<Int32Type>::ArrayType>(10, 2);
-  std::shared_ptr<Array> int32_list_array;
-  ASSERT_OK(
-      ListArray::FromArrays(*offsets, *int32_plain_array, pool_, &int32_list_array));
-
-  std::shared_ptr<Array> int64_plain_array;
-  ASSERT_OK(Cast(&this->ctx_, *int32_plain_array, int64(), options, &int64_plain_array));
-  std::shared_ptr<Array> int64_list_array;
-  ASSERT_OK(
-      ListArray::FromArrays(*offsets, *int64_plain_array, pool_, &int64_list_array));
-
-  std::shared_ptr<Array> float64_plain_array;
-  ASSERT_OK(
-      Cast(&this->ctx_, *int32_plain_array, float64(), options, &float64_plain_array));
-  std::shared_ptr<Array> float64_list_array;
-  ASSERT_OK(
-      ListArray::FromArrays(*offsets, *float64_plain_array, pool_, &float64_list_array));
-
-  CheckPass(*int32_list_array, *int64_list_array, int64_list_array->type(), options);
-  CheckPass(*int32_list_array, *float64_list_array, float64_list_array->type(), options);
-  CheckPass(*int64_list_array, *int32_list_array, int32_list_array->type(), options);
-  CheckPass(*int64_list_array, *float64_list_array, float64_list_array->type(), options);
-
-  options.allow_float_truncate = true;
-  CheckPass(*float64_list_array, *int32_list_array, int32_list_array->type(), options);
-  CheckPass(*float64_list_array, *int64_list_array, int64_list_array->type(), options);
-}
-
 // ----------------------------------------------------------------------
-// Dictionary tests
-
-template <typename Type, typename T>
-void CheckUnique(FunctionContext* ctx, const shared_ptr<DataType>& type,
-                 const vector<T>& in_values, const vector<bool>& in_is_valid,
-                 const vector<T>& out_values, const vector<bool>& out_is_valid) {
-  shared_ptr<Array> input = _MakeArray<Type, T>(type, in_values, in_is_valid);
-  shared_ptr<Array> expected = _MakeArray<Type, T>(type, out_values, out_is_valid);
+// Datum
 
-  shared_ptr<Array> result;
-  ASSERT_OK(Unique(ctx, Datum(input), &result));
-  ASSERT_ARRAYS_EQUAL(*expected, *result);
+template <typename T>
+void CheckImplicitConstructor(enum Datum::type expected_kind) {
+  std::shared_ptr<T> value;
+  Datum datum = value;
+  ASSERT_EQ(expected_kind, datum.kind());
 }
 
-template <typename Type, typename T>
-void CheckDictEncode(FunctionContext* ctx, const shared_ptr<DataType>& type,
-                     const vector<T>& in_values, const vector<bool>& in_is_valid,
-                     const vector<T>& out_values, const vector<bool>& out_is_valid,
-                     const vector<int32_t>& out_indices) {
-  shared_ptr<Array> input = _MakeArray<Type, T>(type, in_values, in_is_valid);
-  shared_ptr<Array> ex_dict = _MakeArray<Type, T>(type, out_values, out_is_valid);
-  shared_ptr<Array> ex_indices =
-      _MakeArray<Int32Type, int32_t>(int32(), out_indices, in_is_valid);
-
-  DictionaryArray expected(dictionary(int32(), ex_dict), ex_indices);
+TEST(TestDatum, ImplicitConstructors) {
+  CheckImplicitConstructor<Array>(Datum::ARRAY);
 
-  Datum datum_out;
-  ASSERT_OK(DictionaryEncode(ctx, Datum(input), &datum_out));
-  shared_ptr<Array> result = MakeArray(datum_out.array());
+  // Instantiate from array subclass
+  CheckImplicitConstructor<BinaryArray>(Datum::ARRAY);
 
-  ASSERT_ARRAYS_EQUAL(expected, *result);
-}
-
-class TestHashKernel : public ComputeFixture, public TestBase {};
-
-template <typename Type>
-class TestHashKernelPrimitive : public ComputeFixture, public TestBase {};
-
-typedef ::testing::Types<Int8Type, UInt8Type, Int16Type, UInt16Type, Int32Type,
-                         UInt32Type, Int64Type, UInt64Type, FloatType, DoubleType,
-                         Date32Type, Date64Type>
-    PrimitiveDictionaries;
-
-TYPED_TEST_CASE(TestHashKernelPrimitive, PrimitiveDictionaries);
-
-TYPED_TEST(TestHashKernelPrimitive, Unique) {
-  using T = typename TypeParam::c_type;
-  auto type = TypeTraits<TypeParam>::type_singleton();
-  CheckUnique<TypeParam, T>(&this->ctx_, type, {2, 1, 2, 1}, {true, false, true, true},
-                            {2, 1}, {});
-  CheckUnique<TypeParam, T>(&this->ctx_, type, {2, 1, 3, 1}, {false, false, true, true},
-                            {3, 1}, {});
-}
-
-TYPED_TEST(TestHashKernelPrimitive, DictEncode) {
-  using T = typename TypeParam::c_type;
-  auto type = TypeTraits<TypeParam>::type_singleton();
-  CheckDictEncode<TypeParam, T>(&this->ctx_, type, {2, 1, 2, 1, 2, 3},
-                                {true, false, true, true, true, true}, {2, 1, 3}, {},
-                                {0, 0, 0, 1, 0, 2});
-}
-
-TYPED_TEST(TestHashKernelPrimitive, PrimitiveResizeTable) {
-  using T = typename TypeParam::c_type;
-  // Skip this test for (u)int8
-  if (sizeof(Scalar) == 1) {
-    return;
-  }
-
-  const int64_t kTotalValues = 1000000;
-  const int64_t kRepeats = 5;
-
-  vector<T> values;
-  vector<T> uniques;
-  vector<int32_t> indices;
-  for (int64_t i = 0; i < kTotalValues * kRepeats; i++) {
-    const auto val = static_cast<T>(i % kTotalValues);
-    values.push_back(val);
-
-    if (i < kTotalValues) {
-      uniques.push_back(val);
-    }
-    indices.push_back(static_cast<int32_t>(i % kTotalValues));
-  }
-
-  auto type = TypeTraits<TypeParam>::type_singleton();
-  CheckUnique<TypeParam, T>(&this->ctx_, type, values, {}, uniques, {});
-
-  CheckDictEncode<TypeParam, T>(&this->ctx_, type, values, {}, uniques, {}, indices);
-}
-
-TEST_F(TestHashKernel, UniqueTimeTimestamp) {
-  CheckUnique<Time32Type, int32_t>(&this->ctx_, time32(TimeUnit::SECOND), {2, 1, 2, 1},
-                                   {true, false, true, true}, {2, 1}, {});
-
-  CheckUnique<Time64Type, int64_t>(&this->ctx_, time64(TimeUnit::NANO), {2, 1, 2, 1},
-                                   {true, false, true, true}, {2, 1}, {});
-
-  CheckUnique<TimestampType, int64_t>(&this->ctx_, timestamp(TimeUnit::NANO),
-                                      {2, 1, 2, 1}, {true, false, true, true}, {2, 1},
-                                      {});
-}
-
-TEST_F(TestHashKernel, UniqueBoolean) {
-  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {true, true, false, true},
-                                 {true, false, true, true}, {true, false}, {});
-
-  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {false, true, false, true},
-                                 {true, false, true, true}, {false, true}, {});
-
-  // No nulls
-  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {true, true, false, true}, {},
-                                 {true, false}, {});
-
-  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {false, true, false, true}, {},
-                                 {false, true}, {});
-}
-
-TEST_F(TestHashKernel, DictEncodeBoolean) {
-  CheckDictEncode<BooleanType, bool>(
-      &this->ctx_, boolean(), {true, true, false, true, false},
-      {true, false, true, true, true}, {true, false}, {}, {0, 0, 1, 0, 1});
-
-  CheckDictEncode<BooleanType, bool>(
-      &this->ctx_, boolean(), {false, true, false, true, false},
-      {true, false, true, true, true}, {false, true}, {}, {0, 0, 0, 1, 0});
-
-  // No nulls
-  CheckDictEncode<BooleanType, bool>(&this->ctx_, boolean(),
-                                     {true, true, false, true, false}, {}, {true, false},
-                                     {}, {0, 0, 1, 0, 1});
-
-  CheckDictEncode<BooleanType, bool>(&this->ctx_, boolean(),
-                                     {false, true, false, true, false}, {}, {false, true},
-                                     {}, {0, 1, 0, 1, 0});
-}
-
-TEST_F(TestHashKernel, UniqueBinary) {
-  CheckUnique<BinaryType, std::string>(&this->ctx_, binary(),
-                                       {"test", "", "test2", "test"},
-                                       {true, false, true, true}, {"test", "test2"}, {});
-
-  CheckUnique<StringType, std::string>(&this->ctx_, utf8(), {"test", "", "test2", "test"},
-                                       {true, false, true, true}, {"test", "test2"}, {});
-}
-
-TEST_F(TestHashKernel, DictEncodeBinary) {
-  CheckDictEncode<BinaryType, std::string>(
-      &this->ctx_, binary(), {"test", "", "test2", "test", "baz"},
-      {true, false, true, true, true}, {"test", "test2", "baz"}, {}, {0, 0, 1, 0, 2});
-
-  CheckDictEncode<StringType, std::string>(
-      &this->ctx_, utf8(), {"test", "", "test2", "test", "baz"},
-      {true, false, true, true, true}, {"test", "test2", "baz"}, {}, {0, 0, 1, 0, 2});
-}
-
-TEST_F(TestHashKernel, BinaryResizeTable) {
-  const int32_t kTotalValues = 10000;
-#if !defined(ARROW_VALGRIND)
-  const int32_t kRepeats = 10;
-#else
-  // Mitigate Valgrind's slowness
-  const int32_t kRepeats = 3;
-#endif
-
-  vector<std::string> values;
-  vector<std::string> uniques;
-  vector<int32_t> indices;
-  char buf[20] = "test";
-
-  for (int32_t i = 0; i < kTotalValues * kRepeats; i++) {
-    int32_t index = i % kTotalValues;
-
-    ASSERT_GE(snprintf(buf + 4, sizeof(buf) - 4, "%d", index), 0);
-    values.emplace_back(buf);
-
-    if (i < kTotalValues) {
-      uniques.push_back(values.back());
-    }
-    indices.push_back(index);
-  }
-
-  CheckUnique<BinaryType, std::string>(&this->ctx_, binary(), values, {}, uniques, {});
-  CheckDictEncode<BinaryType, std::string>(&this->ctx_, binary(), values, {}, uniques, {},
-                                           indices);
-
-  CheckUnique<StringType, std::string>(&this->ctx_, utf8(), values, {}, uniques, {});
-  CheckDictEncode<StringType, std::string>(&this->ctx_, utf8(), values, {}, uniques, {},
-                                           indices);
-}
-
-TEST_F(TestHashKernel, UniqueFixedSizeBinary) {
-  CheckUnique<FixedSizeBinaryType, std::string>(
-      &this->ctx_, fixed_size_binary(5), {"aaaaa", "", "bbbbb", "aaaaa"},
-      {true, false, true, true}, {"aaaaa", "bbbbb"}, {});
-}
-
-TEST_F(TestHashKernel, DictEncodeFixedSizeBinary) {
-  CheckDictEncode<FixedSizeBinaryType, std::string>(
-      &this->ctx_, fixed_size_binary(5), {"bbbbb", "", "bbbbb", "aaaaa", "ccccc"},
-      {true, false, true, true, true}, {"bbbbb", "aaaaa", "ccccc"}, {}, {0, 0, 0, 1, 2});
-}
-
-TEST_F(TestHashKernel, FixedSizeBinaryResizeTable) {
-  const int32_t kTotalValues = 10000;
-#if !defined(ARROW_VALGRIND)
-  const int32_t kRepeats = 10;
-#else
-  // Mitigate Valgrind's slowness
-  const int32_t kRepeats = 3;
-#endif
-
-  vector<std::string> values;
-  vector<std::string> uniques;
-  vector<int32_t> indices;
-  char buf[7] = "test..";
-
-  for (int32_t i = 0; i < kTotalValues * kRepeats; i++) {
-    int32_t index = i % kTotalValues;
-
-    buf[4] = static_cast<char>(index / 128);
-    buf[5] = static_cast<char>(index % 128);
-    values.emplace_back(buf, 6);
-
-    if (i < kTotalValues) {
-      uniques.push_back(values.back());
-    }
-    indices.push_back(index);
-  }
-
-  auto type = fixed_size_binary(6);
-  CheckUnique<FixedSizeBinaryType, std::string>(&this->ctx_, type, values, {}, uniques,
-                                                {});
-  CheckDictEncode<FixedSizeBinaryType, std::string>(&this->ctx_, type, values, {},
-                                                    uniques, {}, indices);
-}
-
-TEST_F(TestHashKernel, UniqueDecimal) {
-  vector<Decimal128> values{12, 12, 11, 12};
-  vector<Decimal128> expected{12, 11};
-
-  CheckUnique<Decimal128Type, Decimal128>(&this->ctx_, decimal(2, 0), values,
-                                          {true, false, true, true}, expected, {});
-}
-
-TEST_F(TestHashKernel, DictEncodeDecimal) {
-  vector<Decimal128> values{12, 12, 11, 12, 13};
-  vector<Decimal128> expected{12, 11, 13};
-
-  CheckDictEncode<Decimal128Type, Decimal128>(&this->ctx_, decimal(2, 0), values,
-                                              {true, false, true, true, true}, expected,
-                                              {}, {0, 0, 1, 0, 2});
-}
-
-TEST_F(TestHashKernel, ChunkedArrayInvoke) {
-  vector<std::string> values1 = {"foo", "bar", "foo"};
-  vector<std::string> values2 = {"bar", "baz", "quuux", "foo"};
-
-  auto type = utf8();
-  auto a1 = _MakeArray<StringType, std::string>(type, values1, {});
-  auto a2 = _MakeArray<StringType, std::string>(type, values2, {});
-
-  vector<std::string> dict_values = {"foo", "bar", "baz", "quuux"};
-  auto ex_dict = _MakeArray<StringType, std::string>(type, dict_values, {});
-
-  ArrayVector arrays = {a1, a2};
-  auto carr = std::make_shared<ChunkedArray>(arrays);
-
-  // Unique
-  shared_ptr<Array> result;
-  ASSERT_OK(Unique(&this->ctx_, Datum(carr), &result));
-  ASSERT_ARRAYS_EQUAL(*ex_dict, *result);
-
-  // Dictionary encode
-  auto dict_type = dictionary(int32(), ex_dict);
-
-  auto i1 = _MakeArray<Int32Type, int32_t>(int32(), {0, 1, 0}, {});
-  auto i2 = _MakeArray<Int32Type, int32_t>(int32(), {1, 2, 3, 0}, {});
-
-  ArrayVector dict_arrays = {std::make_shared<DictionaryArray>(dict_type, i1),
-                             std::make_shared<DictionaryArray>(dict_type, i2)};
-  auto dict_carr = std::make_shared<ChunkedArray>(dict_arrays);
-
-  Datum encoded_out;
-  ASSERT_OK(DictionaryEncode(&this->ctx_, Datum(carr), &encoded_out));
-  ASSERT_EQ(Datum::CHUNKED_ARRAY, encoded_out.kind());
-
-  AssertChunkedEqual(*dict_carr, *encoded_out.chunked_array());
-}
-
-using BinaryKernelFunc =
-    std::function<Status(FunctionContext*, const Datum&, const Datum&, Datum* out)>;
-
-class TestBooleanKernel : public ComputeFixture, public TestBase {
- public:
-  void TestArrayBinary(const BinaryKernelFunc& kernel, const std::shared_ptr<Array>& left,
-                       const std::shared_ptr<Array>& right,
-                       const std::shared_ptr<Array>& expected) {
-    Datum result;
-    ASSERT_OK(kernel(&this->ctx_, Datum(left), Datum(right), &result));
-    ASSERT_EQ(Datum::ARRAY, result.kind());
-    std::shared_ptr<Array> result_array = result.make_array();
-    ASSERT_TRUE(result_array->Equals(expected));
-  }
-
-  void TestChunkedArrayBinary(const BinaryKernelFunc& kernel,
-                              const std::shared_ptr<ChunkedArray>& left,
-                              const std::shared_ptr<ChunkedArray>& right,
-                              const std::shared_ptr<ChunkedArray>& expected) {
-    Datum result;
-    std::shared_ptr<Array> result_array;
-    ASSERT_OK(kernel(&this->ctx_, Datum(left), Datum(right), &result));
-    ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-    std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-    ASSERT_TRUE(result_ca->Equals(expected));
-  }
-
-  void TestBinaryKernel(const BinaryKernelFunc& kernel, const std::vector<bool>& values1,
-                        const std::vector<bool>& values2,
-                        const std::vector<bool>& values3,
-                        const std::vector<bool>& values3_nulls) {
-    auto type = boolean();
-    auto a1 = _MakeArray<BooleanType, bool>(type, values1, {});
-    auto a2 = _MakeArray<BooleanType, bool>(type, values2, {});
-    auto a3 = _MakeArray<BooleanType, bool>(type, values3, {});
-    auto a1_nulls = _MakeArray<BooleanType, bool>(type, values1, values1);
-    auto a2_nulls = _MakeArray<BooleanType, bool>(type, values2, values2);
-    auto a3_nulls = _MakeArray<BooleanType, bool>(type, values3, values3_nulls);
-
-    TestArrayBinary(kernel, a1, a2, a3);
-    TestArrayBinary(kernel, a1_nulls, a2_nulls, a3_nulls);
-    TestArrayBinary(kernel, a1->Slice(1), a2->Slice(1), a3->Slice(1));
-    TestArrayBinary(kernel, a1_nulls->Slice(1), a2_nulls->Slice(1), a3_nulls->Slice(1));
-
-    // ChunkedArray
-    std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
-    auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
-    std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
-    auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
-    std::vector<std::shared_ptr<Array>> ca3_arrs = {a3, a3->Slice(1)};
-    auto ca3 = std::make_shared<ChunkedArray>(ca3_arrs);
-    TestChunkedArrayBinary(kernel, ca1, ca2, ca3);
-
-    // ChunkedArray with different chunks
-    std::vector<std::shared_ptr<Array>> ca4_arrs = {a1->Slice(0, 1), a1->Slice(1),
-                                                    a1->Slice(1, 1), a1->Slice(2)};
-    auto ca4 = std::make_shared<ChunkedArray>(ca4_arrs);
-    TestChunkedArrayBinary(kernel, ca4, ca2, ca3);
-  }
-};
-
-TEST_F(TestBooleanKernel, Invert) {
-  vector<bool> values1 = {true, false, true};
-  vector<bool> values2 = {false, true, false};
-
-  auto type = boolean();
-  auto a1 = _MakeArray<BooleanType, bool>(type, values1, {});
-  auto a2 = _MakeArray<BooleanType, bool>(type, values2, {});
-
-  // Plain array
-  Datum result;
-  ASSERT_OK(Invert(&this->ctx_, Datum(a1), &result));
-  ASSERT_EQ(Datum::ARRAY, result.kind());
-  std::shared_ptr<Array> result_array = result.make_array();
-  ASSERT_TRUE(result_array->Equals(a2));
-
-  // Array with offset
-  ASSERT_OK(Invert(&this->ctx_, Datum(a1->Slice(1)), &result));
-  ASSERT_EQ(Datum::ARRAY, result.kind());
-  result_array = result.make_array();
-  ASSERT_TRUE(result_array->Equals(a2->Slice(1)));
-
-  // ChunkedArray
-  std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
-  auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
-  std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
-  auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
-  ASSERT_OK(Invert(&this->ctx_, Datum(ca1), &result));
-  ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-  std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-  ASSERT_TRUE(result_ca->Equals(ca2));
-}
-
-TEST_F(TestBooleanKernel, And) {
-  vector<bool> values1 = {true, false, true, false, true, true};
-  vector<bool> values2 = {true, true, false, false, true, false};
-  vector<bool> values3 = {true, false, false, false, true, false};
-  TestBinaryKernel(And, values1, values2, values3, values3);
-}
-
-TEST_F(TestBooleanKernel, Or) {
-  vector<bool> values1 = {true, false, true, false, true, true};
-  vector<bool> values2 = {true, true, false, false, true, false};
-  vector<bool> values3 = {true, true, true, false, true, true};
-  vector<bool> values3_nulls = {true, false, false, false, true, false};
-  TestBinaryKernel(Or, values1, values2, values3, values3_nulls);
-}
-
-TEST_F(TestBooleanKernel, Xor) {
-  vector<bool> values1 = {true, false, true, false, true, true};
-  vector<bool> values2 = {true, true, false, false, true, false};
-  vector<bool> values3 = {false, true, true, false, false, true};
-  vector<bool> values3_nulls = {true, false, false, false, true, false};
-  TestBinaryKernel(Xor, values1, values2, values3, values3_nulls);
+  CheckImplicitConstructor<ChunkedArray>(Datum::CHUNKED_ARRAY);
+  CheckImplicitConstructor<RecordBatch>(Datum::RECORD_BATCH);
+  CheckImplicitConstructor<Table>(Datum::TABLE);
 }
 
 class TestInvokeBinaryKernel : public ComputeFixture, public TestBase {};
 
-class DummyBinaryKernel : public BinaryKernel {
-  Status Call(FunctionContext* ctx, const Datum& left, const Datum& right,
-              Datum* out) override {
-    return Status::OK();
-  }
-};
-
 TEST_F(TestInvokeBinaryKernel, Exceptions) {
-  DummyBinaryKernel kernel;
+  MockBinaryKernel kernel;
   std::vector<Datum> outputs;
   std::shared_ptr<Table> table;
   vector<bool> values1 = {true, false, true};
@@ -1585,14 +83,14 @@ TEST_F(TestInvokeBinaryKernel, Exceptions) {
   auto a2 = _MakeArray<BooleanType, bool>(type, values2, {});
 
   // Left is not an array-like
-  ASSERT_RAISES(Invalid, detail::InvokeBinaryArrayKernel(
-                             &this->ctx_, &kernel, Datum(table), Datum(a2), &outputs));
+  ASSERT_RAISES(Invalid, detail::InvokeBinaryArrayKernel(&this->ctx_, &kernel, table, a2,
+                                                         &outputs));
   // Right is not an array-like
-  ASSERT_RAISES(Invalid, detail::InvokeBinaryArrayKernel(&this->ctx_, &kernel, Datum(a1),
-                                                         Datum(table), &outputs));
+  ASSERT_RAISES(Invalid, detail::InvokeBinaryArrayKernel(&this->ctx_, &kernel, a1, table,
+                                                         &outputs));
   // Different sized inputs
-  ASSERT_RAISES(Invalid, detail::InvokeBinaryArrayKernel(&this->ctx_, &kernel, Datum(a1),
-                                                         Datum(a1->Slice(1)), &outputs));
+  ASSERT_RAISES(Invalid, detail::InvokeBinaryArrayKernel(&this->ctx_, &kernel, a1,
+                                                         a1->Slice(1), &outputs));
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index 8048fff75bc29..93bec75a026ba 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -19,6 +19,7 @@
 #define ARROW_COMPUTE_KERNEL_H
 
 #include <memory>
+#include <utility>
 #include <vector>
 
 #include "arrow/array.h"
@@ -60,24 +61,41 @@ struct ARROW_EXPORT Datum {
   /// \brief Empty datum, to be populated elsewhere
   Datum() : value(NULLPTR) {}
 
-  explicit Datum(const std::shared_ptr<Scalar>& value) : value(value) {}
-
-  explicit Datum(const std::shared_ptr<ArrayData>& value) : value(value) {}
-
-  explicit Datum(const std::shared_ptr<Array>& value) : Datum(value->data()) {}
-
-  explicit Datum(const std::shared_ptr<ChunkedArray>& value) : value(value) {}
-
-  explicit Datum(const std::shared_ptr<RecordBatch>& value) : value(value) {}
-
-  explicit Datum(const std::shared_ptr<Table>& value) : value(value) {}
-
-  explicit Datum(const std::vector<Datum>& value) : value(value) {}
+  Datum(const std::shared_ptr<Scalar>& value)  // NOLINT implicit conversion
+      : value(value) {}
+  Datum(const std::shared_ptr<ArrayData>& value)  // NOLINT implicit conversion
+      : value(value) {}
+
+  Datum(const std::shared_ptr<Array>& value)  // NOLINT implicit conversion
+      : Datum(value ? value->data() : NULLPTR) {}
+
+  Datum(const std::shared_ptr<ChunkedArray>& value)  // NOLINT implicit conversion
+      : value(value) {}
+  Datum(const std::shared_ptr<RecordBatch>& value)  // NOLINT implicit conversion
+      : value(value) {}
+  Datum(const std::shared_ptr<Table>& value)  // NOLINT implicit conversion
+      : value(value) {}
+  Datum(const std::vector<Datum>& value)  // NOLINT implicit conversion
+      : value(value) {}
+
+  // Cast from subtypes of Array to Datum
+  template <typename T,
+            typename = typename std::enable_if<std::is_base_of<Array, T>::value>::type>
+  Datum(const std::shared_ptr<T>& value)  // NOLINT implicit conversion
+      : Datum(std::shared_ptr<Array>(value)) {}
 
   ~Datum() {}
 
   Datum(const Datum& other) noexcept { this->value = other.value; }
 
+  // Define move constructor and move assignment, for better performance
+  Datum(Datum&& other) noexcept : value(std::move(other.value)) {}
+
+  Datum& operator=(Datum&& other) noexcept {
+    value = std::move(other.value);
+    return *this;
+  }
+
   Datum::type kind() const {
     switch (this->value.which()) {
       case 0:
@@ -133,9 +151,24 @@ struct ARROW_EXPORT Datum {
 };
 
 /// \class UnaryKernel
-/// \brief An array-valued function of a single input argument
+/// \brief An function of a single input argument.
+///
+/// Note to implementors:  Try to avoid making kernels that allocate memory if
+/// the output size is a deterministic function of the Input Datum's metadata.
+/// Instead separate the logic of the kernel and allocations necessary into
+/// two different kernels.  Some reusable kernels that allocate buffers
+/// and delegate computation to another kernel are available in util-internal.h.
 class ARROW_EXPORT UnaryKernel : public OpKernel {
  public:
+  /// \brief Executes the kernel.
+  ///
+  /// \param[in] ctx The function context for the kernel
+  /// \param[in] input The kernel input data
+  /// \param[out] out The output of the function. Each implementation of this
+  /// function might assume different things about the existing contents of out
+  /// (e.g. which buffers are preallocated).  In the future it is expected that
+  /// there will be a more generic mechansim for understanding the necessary
+  /// contracts.
   virtual Status Call(FunctionContext* ctx, const Datum& input, Datum* out) = 0;
 };
 
diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index 923c8c3bd4e81..4d508aacb9990 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -15,8 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
-install(FILES
-  boolean.h
-  cast.h
-  hash.h
-  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/compute/kernels")
+ARROW_INSTALL_ALL_HEADERS("arrow/compute/kernels")
+
+ADD_ARROW_TEST(boolean-test PREFIX "arrow-compute")
+ADD_ARROW_TEST(cast-test PREFIX "arrow-compute")
+ADD_ARROW_TEST(hash-test PREFIX "arrow-compute")
diff --git a/cpp/src/arrow/compute/kernels/boolean-test.cc b/cpp/src/arrow/compute/kernels/boolean-test.cc
new file mode 100644
index 0000000000000..5f4613367f6c5
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/boolean-test.cc
@@ -0,0 +1,169 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "arrow/test-common.h"
+#include "arrow/test-util.h"
+
+#include "arrow/compute/context.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/compute/kernels/boolean.h"
+#include "arrow/compute/kernels/util-internal.h"
+#include "arrow/compute/test-util.h"
+
+using std::shared_ptr;
+using std::vector;
+
+namespace arrow {
+namespace compute {
+
+using BinaryKernelFunc =
+    std::function<Status(FunctionContext*, const Datum&, const Datum&, Datum* out)>;
+
+class TestBooleanKernel : public ComputeFixture, public TestBase {
+ public:
+  void TestArrayBinary(const BinaryKernelFunc& kernel, const std::shared_ptr<Array>& left,
+                       const std::shared_ptr<Array>& right,
+                       const std::shared_ptr<Array>& expected) {
+    Datum result;
+    ASSERT_OK(kernel(&this->ctx_, left, right, &result));
+    ASSERT_EQ(Datum::ARRAY, result.kind());
+    std::shared_ptr<Array> result_array = result.make_array();
+    ASSERT_TRUE(result_array->Equals(expected));
+  }
+
+  void TestChunkedArrayBinary(const BinaryKernelFunc& kernel,
+                              const std::shared_ptr<ChunkedArray>& left,
+                              const std::shared_ptr<ChunkedArray>& right,
+                              const std::shared_ptr<ChunkedArray>& expected) {
+    Datum result;
+    std::shared_ptr<Array> result_array;
+    ASSERT_OK(kernel(&this->ctx_, left, right, &result));
+    ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
+    std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
+    ASSERT_TRUE(result_ca->Equals(expected));
+  }
+
+  void TestBinaryKernel(const BinaryKernelFunc& kernel, const std::vector<bool>& values1,
+                        const std::vector<bool>& values2,
+                        const std::vector<bool>& values3,
+                        const std::vector<bool>& values3_nulls) {
+    auto type = boolean();
+    auto a1 = _MakeArray<BooleanType, bool>(type, values1, {});
+    auto a2 = _MakeArray<BooleanType, bool>(type, values2, {});
+    auto a3 = _MakeArray<BooleanType, bool>(type, values3, {});
+    auto a1_nulls = _MakeArray<BooleanType, bool>(type, values1, values1);
+    auto a2_nulls = _MakeArray<BooleanType, bool>(type, values2, values2);
+    auto a3_nulls = _MakeArray<BooleanType, bool>(type, values3, values3_nulls);
+
+    TestArrayBinary(kernel, a1, a2, a3);
+    TestArrayBinary(kernel, a1_nulls, a2_nulls, a3_nulls);
+    TestArrayBinary(kernel, a1->Slice(1), a2->Slice(1), a3->Slice(1));
+    TestArrayBinary(kernel, a1_nulls->Slice(1), a2_nulls->Slice(1), a3_nulls->Slice(1));
+
+    // ChunkedArray
+    std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
+    auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
+    std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
+    auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
+    std::vector<std::shared_ptr<Array>> ca3_arrs = {a3, a3->Slice(1)};
+    auto ca3 = std::make_shared<ChunkedArray>(ca3_arrs);
+    TestChunkedArrayBinary(kernel, ca1, ca2, ca3);
+
+    // ChunkedArray with different chunks
+    std::vector<std::shared_ptr<Array>> ca4_arrs = {a1->Slice(0, 1), a1->Slice(1),
+                                                    a1->Slice(1, 1), a1->Slice(2)};
+    auto ca4 = std::make_shared<ChunkedArray>(ca4_arrs);
+    TestChunkedArrayBinary(kernel, ca4, ca2, ca3);
+  }
+};
+
+TEST_F(TestBooleanKernel, Invert) {
+  vector<bool> values1 = {true, false, true};
+  vector<bool> values2 = {false, true, false};
+
+  auto type = boolean();
+  auto a1 = _MakeArray<BooleanType, bool>(type, values1, {});
+  auto a2 = _MakeArray<BooleanType, bool>(type, values2, {});
+
+  // Plain array
+  Datum result;
+  ASSERT_OK(Invert(&this->ctx_, a1, &result));
+  ASSERT_EQ(Datum::ARRAY, result.kind());
+  std::shared_ptr<Array> result_array = result.make_array();
+  ASSERT_TRUE(result_array->Equals(a2));
+
+  // Array with offset
+  ASSERT_OK(Invert(&this->ctx_, a1->Slice(1), &result));
+  ASSERT_EQ(Datum::ARRAY, result.kind());
+  result_array = result.make_array();
+  ASSERT_TRUE(result_array->Equals(a2->Slice(1)));
+
+  // ChunkedArray
+  std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
+  auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
+  std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
+  auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
+  ASSERT_OK(Invert(&this->ctx_, ca1, &result));
+  ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
+  std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
+  ASSERT_TRUE(result_ca->Equals(ca2));
+}
+
+TEST_F(TestBooleanKernel, InvertEmptyArray) {
+  auto type = boolean();
+  std::vector<std::shared_ptr<Buffer>> data_buffers(2);
+  Datum input;
+  input.value = ArrayData::Make(boolean(), 0 /* length */, std::move(data_buffers),
+                                0 /* null_count */);
+
+  Datum result;
+  ASSERT_OK(Invert(&this->ctx_, input, &result));
+  ASSERT_TRUE(result.make_array()->Equals(input.make_array()));
+}
+
+TEST_F(TestBooleanKernel, And) {
+  vector<bool> values1 = {true, false, true, false, true, true};
+  vector<bool> values2 = {true, true, false, false, true, false};
+  vector<bool> values3 = {true, false, false, false, true, false};
+  TestBinaryKernel(And, values1, values2, values3, values3);
+}
+
+TEST_F(TestBooleanKernel, Or) {
+  vector<bool> values1 = {true, false, true, false, true, true};
+  vector<bool> values2 = {true, true, false, false, true, false};
+  vector<bool> values3 = {true, true, true, false, true, true};
+  vector<bool> values3_nulls = {true, false, false, false, true, false};
+  TestBinaryKernel(Or, values1, values2, values3, values3_nulls);
+}
+
+TEST_F(TestBooleanKernel, Xor) {
+  vector<bool> values1 = {true, false, true, false, true, true};
+  vector<bool> values2 = {true, true, false, false, true, false};
+  vector<bool> values3 = {false, true, true, false, false, true};
+  vector<bool> values3_nulls = {true, false, false, false, true, false};
+  TestBinaryKernel(Xor, values1, values2, values3, values3_nulls);
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/boolean.cc b/cpp/src/arrow/compute/kernels/boolean.cc
index e1aa1669572d0..78ae7d49bd24f 100644
--- a/cpp/src/arrow/compute/kernels/boolean.cc
+++ b/cpp/src/arrow/compute/kernels/boolean.cc
@@ -43,34 +43,37 @@ namespace compute {
 class InvertKernel : public UnaryKernel {
   Status Call(FunctionContext* ctx, const Datum& input, Datum* out) override {
     DCHECK_EQ(Datum::ARRAY, input.kind());
+    constexpr int64_t kZeroDestOffset = 0;
 
     const ArrayData& in_data = *input.array();
-    ArrayData* result;
-
-    out->value = ArrayData::Make(boolean(), in_data.length);
-    result = out->array().get();
+    std::shared_ptr<ArrayData> result = out->array();
+    result->type = boolean();
 
-    // Allocate or copy bitmap
+    // Handle validity bitmap
     result->null_count = in_data.null_count;
-    std::shared_ptr<Buffer> validity_bitmap = in_data.buffers[0];
-    if (in_data.offset != 0) {
-      RETURN_NOT_OK(CopyBitmap(ctx->memory_pool(), validity_bitmap->data(),
-                               in_data.offset, in_data.length, &validity_bitmap));
+    const std::shared_ptr<Buffer>& validity_bitmap = in_data.buffers[0];
+    if (in_data.offset != 0 && in_data.null_count > 0) {
+      DCHECK_LE(BitUtil::BytesForBits(in_data.length), validity_bitmap->size());
+      CopyBitmap(validity_bitmap->data(), in_data.offset, in_data.length,
+                 result->buffers[0]->mutable_data(), kZeroDestOffset);
+    } else {
+      result->buffers[0] = validity_bitmap;
     }
-    result->buffers.push_back(validity_bitmap);
-
-    // Allocate output data buffer
-    std::shared_ptr<Buffer> data_buffer;
-    RETURN_NOT_OK(InvertBitmap(ctx->memory_pool(), in_data.buffers[1]->data(),
-                               in_data.offset, in_data.length, &data_buffer));
-    result->buffers.push_back(data_buffer);
 
+    // Handle output data buffer
+    if (in_data.length > 0) {
+      const Buffer& data_buffer = *in_data.buffers[1];
+      DCHECK_LE(BitUtil::BytesForBits(in_data.length), data_buffer.size());
+      InvertBitmap(data_buffer.data(), in_data.offset, in_data.length,
+                   result->buffers[1]->mutable_data(), kZeroDestOffset);
+    }
     return Status::OK();
   }
 };
 
 Status Invert(FunctionContext* ctx, const Datum& value, Datum* out) {
-  InvertKernel kernel;
+  detail::PrimitiveAllocatingUnaryKernel kernel(
+      std::unique_ptr<UnaryKernel>(new InvertKernel()));
 
   std::vector<Datum> result;
   RETURN_NOT_OK(detail::InvokeUnaryArrayKernel(ctx, &kernel, value, &result));
diff --git a/cpp/src/arrow/compute/kernels/cast-test.cc b/cpp/src/arrow/compute/kernels/cast-test.cc
new file mode 100644
index 0000000000000..c3a0df5d8a73f
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/cast-test.cc
@@ -0,0 +1,1232 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <cstdio>
+#include <functional>
+#include <locale>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "arrow/test-common.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/decimal.h"
+
+#include "arrow/compute/context.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/compute/kernels/cast.h"
+#include "arrow/compute/kernels/hash.h"
+#include "arrow/compute/kernels/util-internal.h"
+#include "arrow/compute/test-util.h"
+
+using std::shared_ptr;
+using std::vector;
+
+namespace arrow {
+namespace compute {
+
+static std::vector<std::shared_ptr<DataType>> kNumericTypes = {
+    uint8(), int8(),   uint16(), int16(),   uint32(),
+    int32(), uint64(), int64(),  float32(), float64()};
+
+static void AssertBufferSame(const Array& left, const Array& right, int buffer_index) {
+  ASSERT_EQ(left.data()->buffers[buffer_index].get(),
+            right.data()->buffers[buffer_index].get());
+}
+
+class TestCast : public ComputeFixture, public TestBase {
+ public:
+  void CheckPass(const Array& input, const Array& expected,
+                 const shared_ptr<DataType>& out_type, const CastOptions& options) {
+    shared_ptr<Array> result;
+    ASSERT_OK(Cast(&ctx_, input, out_type, options, &result));
+    ASSERT_ARRAYS_EQUAL(expected, *result);
+  }
+
+  template <typename InType, typename I_TYPE>
+  void CheckFails(const shared_ptr<DataType>& in_type, const vector<I_TYPE>& in_values,
+                  const vector<bool>& is_valid, const shared_ptr<DataType>& out_type,
+                  const CastOptions& options) {
+    shared_ptr<Array> input, result;
+    if (is_valid.size() > 0) {
+      ArrayFromVector<InType, I_TYPE>(in_type, is_valid, in_values, &input);
+    } else {
+      ArrayFromVector<InType, I_TYPE>(in_type, in_values, &input);
+    }
+    ASSERT_RAISES(Invalid, Cast(&ctx_, *input, out_type, options, &result));
+  }
+
+  void CheckZeroCopy(const Array& input, const shared_ptr<DataType>& out_type) {
+    shared_ptr<Array> result;
+    ASSERT_OK(Cast(&ctx_, input, out_type, {}, &result));
+    ASSERT_EQ(input.data()->buffers.size(), result->data()->buffers.size());
+    for (size_t i = 0; i < input.data()->buffers.size(); ++i) {
+      AssertBufferSame(input, *result, static_cast<int>(i));
+    }
+  }
+
+  template <typename InType, typename I_TYPE, typename OutType, typename O_TYPE>
+  void CheckCase(const shared_ptr<DataType>& in_type, const vector<I_TYPE>& in_values,
+                 const vector<bool>& is_valid, const shared_ptr<DataType>& out_type,
+                 const vector<O_TYPE>& out_values, const CastOptions& options) {
+    DCHECK_EQ(in_values.size(), out_values.size());
+    shared_ptr<Array> input, expected;
+    if (is_valid.size() > 0) {
+      DCHECK_EQ(is_valid.size(), out_values.size());
+      ArrayFromVector<InType, I_TYPE>(in_type, is_valid, in_values, &input);
+      ArrayFromVector<OutType, O_TYPE>(out_type, is_valid, out_values, &expected);
+    } else {
+      ArrayFromVector<InType, I_TYPE>(in_type, in_values, &input);
+      ArrayFromVector<OutType, O_TYPE>(out_type, out_values, &expected);
+    }
+    CheckPass(*input, *expected, out_type, options);
+
+    // Check a sliced variant
+    if (input->length() > 1) {
+      CheckPass(*input->Slice(1), *expected->Slice(1), out_type, options);
+    }
+  }
+
+  void CheckCaseJSON(const shared_ptr<DataType>& in_type,
+                     const shared_ptr<DataType>& out_type, const std::string& in_json,
+                     const std::string& expected_json,
+                     const CastOptions& options = CastOptions()) {
+    shared_ptr<Array> input = ArrayFromJSON(in_type, in_json);
+    shared_ptr<Array> expected = ArrayFromJSON(out_type, expected_json);
+    DCHECK_EQ(input->length(), expected->length());
+    CheckPass(*input, *expected, out_type, options);
+
+    // Check a sliced variant
+    if (input->length() > 1) {
+      CheckPass(*input->Slice(1), *expected->Slice(1), out_type, options);
+    }
+  }
+};
+
+TEST_F(TestCast, SameTypeZeroCopy) {
+  shared_ptr<Array> arr = ArrayFromJSON(int32(), "[0, null, 2, 3, 4]");
+  shared_ptr<Array> result;
+  ASSERT_OK(Cast(&this->ctx_, *arr, int32(), {}, &result));
+
+  AssertBufferSame(*arr, *result, 0);
+  AssertBufferSame(*arr, *result, 1);
+}
+
+TEST_F(TestCast, FromBoolean) {
+  CastOptions options;
+
+  vector<bool> is_valid(20, true);
+  is_valid[3] = false;
+
+  vector<bool> v1(is_valid.size(), true);
+  vector<int32_t> e1(is_valid.size(), 1);
+  for (size_t i = 0; i < v1.size(); ++i) {
+    if (i % 3 == 1) {
+      v1[i] = false;
+      e1[i] = 0;
+    }
+  }
+
+  CheckCase<BooleanType, bool, Int32Type, int32_t>(boolean(), v1, is_valid, int32(), e1,
+                                                   options);
+}
+
+TEST_F(TestCast, ToBoolean) {
+  CastOptions options;
+  for (auto type : kNumericTypes) {
+    CheckCaseJSON(type, boolean(), "[0, null, 127, 1, 0]",
+                  "[false, null, true, true, false]");
+  }
+
+  // Check negative numbers
+  CheckCaseJSON(int8(), boolean(), "[0, null, 127, -1, 0]",
+                "[false, null, true, true, false]");
+  CheckCaseJSON(float64(), boolean(), "[0, null, 127, -1, 0]",
+                "[false, null, true, true, false]");
+}
+
+TEST_F(TestCast, ToIntUpcast) {
+  CastOptions options;
+  options.allow_int_overflow = false;
+
+  vector<bool> is_valid = {true, false, true, true, true};
+
+  // int8 to int32
+  vector<int8_t> v1 = {0, 1, 127, -1, 0};
+  vector<int32_t> e1 = {0, 1, 127, -1, 0};
+  CheckCase<Int8Type, int8_t, Int32Type, int32_t>(int8(), v1, is_valid, int32(), e1,
+                                                  options);
+
+  // bool to int8
+  vector<bool> v2 = {false, true, false, true, true};
+  vector<int8_t> e2 = {0, 1, 0, 1, 1};
+  CheckCase<BooleanType, bool, Int8Type, int8_t>(boolean(), v2, is_valid, int8(), e2,
+                                                 options);
+
+  // uint8 to int16, no overflow/underrun
+  vector<uint8_t> v3 = {0, 100, 200, 255, 0};
+  vector<int16_t> e3 = {0, 100, 200, 255, 0};
+  CheckCase<UInt8Type, uint8_t, Int16Type, int16_t>(uint8(), v3, is_valid, int16(), e3,
+                                                    options);
+}
+
+TEST_F(TestCast, OverflowInNullSlot) {
+  CastOptions options;
+  options.allow_int_overflow = false;
+
+  vector<bool> is_valid = {true, false, true, true, true};
+
+  vector<int32_t> v11 = {0, 70000, 2000, 1000, 0};
+  vector<int16_t> e11 = {0, 0, 2000, 1000, 0};
+
+  shared_ptr<Array> expected;
+  ArrayFromVector<Int16Type, int16_t>(int16(), is_valid, e11, &expected);
+
+  auto buf = Buffer::Wrap(v11.data(), v11.size());
+  Int32Array tmp11(5, buf, expected->null_bitmap(), -1);
+
+  CheckPass(tmp11, *expected, int16(), options);
+}
+
+TEST_F(TestCast, ToIntDowncastSafe) {
+  CastOptions options;
+  options.allow_int_overflow = false;
+
+  vector<bool> is_valid = {true, false, true, true, true};
+
+  // int16 to uint8, no overflow/underrun
+  vector<int16_t> v1 = {0, 100, 200, 1, 2};
+  vector<uint8_t> e1 = {0, 100, 200, 1, 2};
+  CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v1, is_valid, uint8(), e1,
+                                                    options);
+
+  // int16 to uint8, with overflow
+  vector<int16_t> v2 = {0, 100, 256, 0, 0};
+  CheckFails<Int16Type>(int16(), v2, is_valid, uint8(), options);
+
+  // underflow
+  vector<int16_t> v3 = {0, 100, -1, 0, 0};
+  CheckFails<Int16Type>(int16(), v3, is_valid, uint8(), options);
+
+  // int32 to int16, no overflow
+  vector<int32_t> v4 = {0, 1000, 2000, 1, 2};
+  vector<int16_t> e4 = {0, 1000, 2000, 1, 2};
+  CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v4, is_valid, int16(), e4,
+                                                    options);
+
+  // int32 to int16, overflow
+  vector<int32_t> v5 = {0, 1000, 2000, 70000, 0};
+  CheckFails<Int32Type>(int32(), v5, is_valid, int16(), options);
+
+  // underflow
+  vector<int32_t> v6 = {0, 1000, 2000, -70000, 0};
+  CheckFails<Int32Type>(int32(), v6, is_valid, int16(), options);
+
+  vector<int32_t> v7 = {0, 1000, 2000, -70000, 0};
+  CheckFails<Int32Type>(int32(), v7, is_valid, uint8(), options);
+}
+
+template <typename O, typename I>
+std::vector<O> UnsafeVectorCast(const std::vector<I>& v) {
+  size_t n_elems = v.size();
+  std::vector<O> result(n_elems);
+
+  for (size_t i = 0; i < v.size(); i++) result[i] = static_cast<O>(v[i]);
+
+  return std::move(result);
+}
+
+TEST_F(TestCast, IntegerSignedToUnsigned) {
+  CastOptions options;
+  options.allow_int_overflow = false;
+
+  vector<bool> is_valid = {true, false, true, true, true};
+
+  vector<int32_t> v1 = {INT32_MIN, 100, -1, UINT16_MAX, INT32_MAX};
+
+  // Same width
+  CheckFails<Int32Type>(int32(), v1, is_valid, uint32(), options);
+  // Wider
+  CheckFails<Int32Type>(int32(), v1, is_valid, uint64(), options);
+  // Narrower
+  CheckFails<Int32Type>(int32(), v1, is_valid, uint16(), options);
+  // Fail because of overflow (instead of underflow).
+  vector<int32_t> over = {0, -11, 0, UINT16_MAX + 1, INT32_MAX};
+  CheckFails<Int32Type>(int32(), over, is_valid, uint16(), options);
+
+  options.allow_int_overflow = true;
+
+  CheckCase<Int32Type, int32_t, UInt32Type, uint32_t>(
+      int32(), v1, is_valid, uint32(), UnsafeVectorCast<uint32_t, int32_t>(v1), options);
+  CheckCase<Int32Type, int32_t, UInt64Type, uint64_t>(
+      int32(), v1, is_valid, uint64(), UnsafeVectorCast<uint64_t, int32_t>(v1), options);
+  CheckCase<Int32Type, int32_t, UInt16Type, uint16_t>(
+      int32(), v1, is_valid, uint16(), UnsafeVectorCast<uint16_t, int32_t>(v1), options);
+  CheckCase<Int32Type, int32_t, UInt16Type, uint16_t>(
+      int32(), over, is_valid, uint16(), UnsafeVectorCast<uint16_t, int32_t>(over),
+      options);
+}
+
+TEST_F(TestCast, IntegerUnsignedToSigned) {
+  CastOptions options;
+  options.allow_int_overflow = false;
+
+  vector<bool> is_valid = {true, true, true};
+
+  vector<uint32_t> v1 = {0, INT16_MAX + 1, UINT32_MAX};
+  vector<uint32_t> v2 = {0, INT16_MAX + 1, 2};
+  // Same width
+  CheckFails<UInt32Type>(uint32(), v1, is_valid, int32(), options);
+  // Narrower
+  CheckFails<UInt32Type>(uint32(), v1, is_valid, int16(), options);
+  CheckFails<UInt32Type>(uint32(), v2, is_valid, int16(), options);
+
+  options.allow_int_overflow = true;
+
+  CheckCase<UInt32Type, uint32_t, Int32Type, int32_t>(
+      uint32(), v1, is_valid, int32(), UnsafeVectorCast<int32_t, uint32_t>(v1), options);
+  CheckCase<UInt32Type, uint32_t, Int64Type, int64_t>(
+      uint32(), v1, is_valid, int64(), UnsafeVectorCast<int64_t, uint32_t>(v1), options);
+  CheckCase<UInt32Type, uint32_t, Int16Type, int16_t>(
+      uint32(), v1, is_valid, int16(), UnsafeVectorCast<int16_t, uint32_t>(v1), options);
+  CheckCase<UInt32Type, uint32_t, Int16Type, int16_t>(
+      uint32(), v2, is_valid, int16(), UnsafeVectorCast<int16_t, uint32_t>(v2), options);
+}
+
+TEST_F(TestCast, ToIntDowncastUnsafe) {
+  CastOptions options;
+  options.allow_int_overflow = true;
+
+  vector<bool> is_valid = {true, false, true, true, true};
+
+  // int16 to uint8, no overflow/underrun
+  vector<int16_t> v1 = {0, 100, 200, 1, 2};
+  vector<uint8_t> e1 = {0, 100, 200, 1, 2};
+  CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v1, is_valid, uint8(), e1,
+                                                    options);
+
+  // int16 to uint8, with overflow
+  vector<int16_t> v2 = {0, 100, 256, 0, 0};
+  vector<uint8_t> e2 = {0, 100, 0, 0, 0};
+  CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v2, is_valid, uint8(), e2,
+                                                    options);
+
+  // underflow
+  vector<int16_t> v3 = {0, 100, -1, 0, 0};
+  vector<uint8_t> e3 = {0, 100, 255, 0, 0};
+  CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v3, is_valid, uint8(), e3,
+                                                    options);
+
+  // int32 to int16, no overflow
+  vector<int32_t> v4 = {0, 1000, 2000, 1, 2};
+  vector<int16_t> e4 = {0, 1000, 2000, 1, 2};
+  CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v4, is_valid, int16(), e4,
+                                                    options);
+
+  // int32 to int16, overflow
+  // TODO(wesm): do we want to allow this? we could set to null
+  vector<int32_t> v5 = {0, 1000, 2000, 70000, 0};
+  vector<int16_t> e5 = {0, 1000, 2000, 4464, 0};
+  CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v5, is_valid, int16(), e5,
+                                                    options);
+
+  // underflow
+  // TODO(wesm): do we want to allow this? we could set overflow to null
+  vector<int32_t> v6 = {0, 1000, 2000, -70000, 0};
+  vector<int16_t> e6 = {0, 1000, 2000, -4464, 0};
+  CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v6, is_valid, int16(), e6,
+                                                    options);
+}
+
+TEST_F(TestCast, FloatingPointToInt) {
+  // which means allow_float_truncate == false
+  auto options = CastOptions::Safe();
+
+  vector<bool> is_valid = {true, false, true, true, true};
+  vector<bool> all_valid = {true, true, true, true, true};
+
+  // float32 to int32 no truncation
+  vector<float> v1 = {1.0, 0, 0.0, -1.0, 5.0};
+  vector<int32_t> e1 = {1, 0, 0, -1, 5};
+  CheckCase<FloatType, float, Int32Type, int32_t>(float32(), v1, is_valid, int32(), e1,
+                                                  options);
+  CheckCase<FloatType, float, Int32Type, int32_t>(float32(), v1, all_valid, int32(), e1,
+                                                  options);
+
+  // float64 to int32 no truncation
+  vector<double> v2 = {1.0, 0, 0.0, -1.0, 5.0};
+  vector<int32_t> e2 = {1, 0, 0, -1, 5};
+  CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v2, is_valid, int32(), e2,
+                                                    options);
+  CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v2, all_valid, int32(), e2,
+                                                    options);
+
+  // float64 to int64 no truncation
+  vector<double> v3 = {1.0, 0, 0.0, -1.0, 5.0};
+  vector<int64_t> e3 = {1, 0, 0, -1, 5};
+  CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v3, is_valid, int64(), e3,
+                                                    options);
+  CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v3, all_valid, int64(), e3,
+                                                    options);
+
+  // float64 to int32 truncate
+  vector<double> v4 = {1.5, 0, 0.5, -1.5, 5.5};
+  vector<int32_t> e4 = {1, 0, 0, -1, 5};
+
+  options.allow_float_truncate = false;
+  CheckFails<DoubleType>(float64(), v4, is_valid, int32(), options);
+  CheckFails<DoubleType>(float64(), v4, all_valid, int32(), options);
+
+  options.allow_float_truncate = true;
+  CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v4, is_valid, int32(), e4,
+                                                    options);
+  CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v4, all_valid, int32(), e4,
+                                                    options);
+
+  // float64 to int64 truncate
+  vector<double> v5 = {1.5, 0, 0.5, -1.5, 5.5};
+  vector<int64_t> e5 = {1, 0, 0, -1, 5};
+
+  options.allow_float_truncate = false;
+  CheckFails<DoubleType>(float64(), v5, is_valid, int64(), options);
+  CheckFails<DoubleType>(float64(), v5, all_valid, int64(), options);
+
+  options.allow_float_truncate = true;
+  CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v5, is_valid, int64(), e5,
+                                                    options);
+  CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v5, all_valid, int64(), e5,
+                                                    options);
+}
+
+TEST_F(TestCast, IntToFloatingPoint) {
+  auto options = CastOptions::Safe();
+
+  vector<bool> all_valid = {true, true, true, true, true};
+  vector<bool> all_invalid = {false, false, false, false, false};
+
+  vector<int64_t> v1 = {INT64_MIN, INT64_MIN + 1, 0, INT64_MAX - 1, INT64_MAX};
+  CheckFails<Int64Type>(int64(), v1, all_valid, float32(), options);
+
+  // While it's not safe to convert, all values are null.
+  CheckCase<Int64Type, int64_t, DoubleType, double>(int64(), v1, all_invalid, float64(),
+                                                    UnsafeVectorCast<double, int64_t>(v1),
+                                                    options);
+}
+
+TEST_F(TestCast, TimestampToTimestamp) {
+  CastOptions options;
+
+  auto CheckTimestampCast =
+      [this](const CastOptions& options, TimeUnit::type from_unit, TimeUnit::type to_unit,
+             const vector<int64_t>& from_values, const vector<int64_t>& to_values,
+             const vector<bool>& is_valid) {
+        CheckCase<TimestampType, int64_t, TimestampType, int64_t>(
+            timestamp(from_unit), from_values, is_valid, timestamp(to_unit), to_values,
+            options);
+      };
+
+  vector<bool> is_valid = {true, false, true, true, true};
+
+  // Multiply promotions
+  vector<int64_t> v1 = {0, 100, 200, 1, 2};
+  vector<int64_t> e1 = {0, 100000, 200000, 1000, 2000};
+  CheckTimestampCast(options, TimeUnit::SECOND, TimeUnit::MILLI, v1, e1, is_valid);
+
+  vector<int64_t> v2 = {0, 100, 200, 1, 2};
+  vector<int64_t> e2 = {0, 100000000L, 200000000L, 1000000, 2000000};
+  CheckTimestampCast(options, TimeUnit::SECOND, TimeUnit::MICRO, v2, e2, is_valid);
+
+  vector<int64_t> v3 = {0, 100, 200, 1, 2};
+  vector<int64_t> e3 = {0, 100000000000L, 200000000000L, 1000000000L, 2000000000L};
+  CheckTimestampCast(options, TimeUnit::SECOND, TimeUnit::NANO, v3, e3, is_valid);
+
+  vector<int64_t> v4 = {0, 100, 200, 1, 2};
+  vector<int64_t> e4 = {0, 100000, 200000, 1000, 2000};
+  CheckTimestampCast(options, TimeUnit::MILLI, TimeUnit::MICRO, v4, e4, is_valid);
+
+  vector<int64_t> v5 = {0, 100, 200, 1, 2};
+  vector<int64_t> e5 = {0, 100000000L, 200000000L, 1000000, 2000000};
+  CheckTimestampCast(options, TimeUnit::MILLI, TimeUnit::NANO, v5, e5, is_valid);
+
+  vector<int64_t> v6 = {0, 100, 200, 1, 2};
+  vector<int64_t> e6 = {0, 100000, 200000, 1000, 2000};
+  CheckTimestampCast(options, TimeUnit::MICRO, TimeUnit::NANO, v6, e6, is_valid);
+
+  // Zero copy
+  vector<int64_t> v7 = {0, 70000, 2000, 1000, 0};
+  shared_ptr<Array> arr;
+  ArrayFromVector<TimestampType, int64_t>(timestamp(TimeUnit::SECOND), is_valid, v7,
+                                          &arr);
+  CheckZeroCopy(*arr, timestamp(TimeUnit::SECOND));
+
+  // ARROW-1773, cast to integer
+  CheckZeroCopy(*arr, int64());
+
+  // Divide, truncate
+  vector<int64_t> v8 = {0, 100123, 200456, 1123, 2456};
+  vector<int64_t> e8 = {0, 100, 200, 1, 2};
+
+  options.allow_time_truncate = true;
+  CheckTimestampCast(options, TimeUnit::MILLI, TimeUnit::SECOND, v8, e8, is_valid);
+  CheckTimestampCast(options, TimeUnit::MICRO, TimeUnit::MILLI, v8, e8, is_valid);
+  CheckTimestampCast(options, TimeUnit::NANO, TimeUnit::MICRO, v8, e8, is_valid);
+
+  vector<int64_t> v9 = {0, 100123000, 200456000, 1123000, 2456000};
+  vector<int64_t> e9 = {0, 100, 200, 1, 2};
+  CheckTimestampCast(options, TimeUnit::MICRO, TimeUnit::SECOND, v9, e9, is_valid);
+  CheckTimestampCast(options, TimeUnit::NANO, TimeUnit::MILLI, v9, e9, is_valid);
+
+  vector<int64_t> v10 = {0, 100123000000L, 200456000000L, 1123000000L, 2456000000};
+  vector<int64_t> e10 = {0, 100, 200, 1, 2};
+  CheckTimestampCast(options, TimeUnit::NANO, TimeUnit::SECOND, v10, e10, is_valid);
+
+  // Disallow truncate, failures
+  options.allow_time_truncate = false;
+  CheckFails<TimestampType>(timestamp(TimeUnit::MILLI), v8, is_valid,
+                            timestamp(TimeUnit::SECOND), options);
+  CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v8, is_valid,
+                            timestamp(TimeUnit::MILLI), options);
+  CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v8, is_valid,
+                            timestamp(TimeUnit::MICRO), options);
+  CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v9, is_valid,
+                            timestamp(TimeUnit::SECOND), options);
+  CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v9, is_valid,
+                            timestamp(TimeUnit::MILLI), options);
+  CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v10, is_valid,
+                            timestamp(TimeUnit::SECOND), options);
+}
+
+TEST_F(TestCast, TimestampToDate32_Date64) {
+  CastOptions options;
+
+  vector<bool> is_valid = {true, true, false};
+
+  // 2000-01-01, 2000-01-02, null
+  vector<int64_t> v_nano = {946684800000000000, 946771200000000000, 0};
+  vector<int64_t> v_micro = {946684800000000, 946771200000000, 0};
+  vector<int64_t> v_milli = {946684800000, 946771200000, 0};
+  vector<int64_t> v_second = {946684800, 946771200, 0};
+  vector<int32_t> v_day = {10957, 10958, 0};
+
+  // Simple conversions
+  CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
+      timestamp(TimeUnit::NANO), v_nano, is_valid, date64(), v_milli, options);
+  CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
+      timestamp(TimeUnit::MICRO), v_micro, is_valid, date64(), v_milli, options);
+  CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
+      timestamp(TimeUnit::MILLI), v_milli, is_valid, date64(), v_milli, options);
+  CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
+      timestamp(TimeUnit::SECOND), v_second, is_valid, date64(), v_milli, options);
+
+  CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
+      timestamp(TimeUnit::NANO), v_nano, is_valid, date32(), v_day, options);
+  CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
+      timestamp(TimeUnit::MICRO), v_micro, is_valid, date32(), v_day, options);
+  CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
+      timestamp(TimeUnit::MILLI), v_milli, is_valid, date32(), v_day, options);
+  CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
+      timestamp(TimeUnit::SECOND), v_second, is_valid, date32(), v_day, options);
+
+  // Disallow truncate, failures
+  vector<int64_t> v_nano_fail = {946684800000000001, 946771200000000001, 0};
+  vector<int64_t> v_micro_fail = {946684800000001, 946771200000001, 0};
+  vector<int64_t> v_milli_fail = {946684800001, 946771200001, 0};
+  vector<int64_t> v_second_fail = {946684801, 946771201, 0};
+
+  options.allow_time_truncate = false;
+  CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v_nano_fail, is_valid, date64(),
+                            options);
+  CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v_micro_fail, is_valid, date64(),
+                            options);
+  CheckFails<TimestampType>(timestamp(TimeUnit::MILLI), v_milli_fail, is_valid, date64(),
+                            options);
+  CheckFails<TimestampType>(timestamp(TimeUnit::SECOND), v_second_fail, is_valid,
+                            date64(), options);
+
+  CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v_nano_fail, is_valid, date32(),
+                            options);
+  CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v_micro_fail, is_valid, date32(),
+                            options);
+  CheckFails<TimestampType>(timestamp(TimeUnit::MILLI), v_milli_fail, is_valid, date32(),
+                            options);
+  CheckFails<TimestampType>(timestamp(TimeUnit::SECOND), v_second_fail, is_valid,
+                            date32(), options);
+
+  // Make sure that nulls are excluded from the truncation checks
+  vector<int64_t> v_second_nofail = {946684800, 946771200, 1};
+  CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
+      timestamp(TimeUnit::SECOND), v_second_nofail, is_valid, date64(), v_milli, options);
+  CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
+      timestamp(TimeUnit::SECOND), v_second_nofail, is_valid, date32(), v_day, options);
+}
+
+TEST_F(TestCast, TimeToCompatible) {
+  CastOptions options;
+
+  vector<bool> is_valid = {true, false, true, true, true};
+
+  // Multiply promotions
+  vector<int32_t> v1 = {0, 100, 200, 1, 2};
+  vector<int32_t> e1 = {0, 100000, 200000, 1000, 2000};
+  CheckCase<Time32Type, int32_t, Time32Type, int32_t>(
+      time32(TimeUnit::SECOND), v1, is_valid, time32(TimeUnit::MILLI), e1, options);
+
+  vector<int32_t> v2 = {0, 100, 200, 1, 2};
+  vector<int64_t> e2 = {0, 100000000L, 200000000L, 1000000, 2000000};
+  CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
+      time32(TimeUnit::SECOND), v2, is_valid, time64(TimeUnit::MICRO), e2, options);
+
+  vector<int32_t> v3 = {0, 100, 200, 1, 2};
+  vector<int64_t> e3 = {0, 100000000000L, 200000000000L, 1000000000L, 2000000000L};
+  CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
+      time32(TimeUnit::SECOND), v3, is_valid, time64(TimeUnit::NANO), e3, options);
+
+  vector<int32_t> v4 = {0, 100, 200, 1, 2};
+  vector<int64_t> e4 = {0, 100000, 200000, 1000, 2000};
+  CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
+      time32(TimeUnit::MILLI), v4, is_valid, time64(TimeUnit::MICRO), e4, options);
+
+  vector<int32_t> v5 = {0, 100, 200, 1, 2};
+  vector<int64_t> e5 = {0, 100000000L, 200000000L, 1000000, 2000000};
+  CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
+      time32(TimeUnit::MILLI), v5, is_valid, time64(TimeUnit::NANO), e5, options);
+
+  vector<int64_t> v6 = {0, 100, 200, 1, 2};
+  vector<int64_t> e6 = {0, 100000, 200000, 1000, 2000};
+  CheckCase<Time64Type, int64_t, Time64Type, int64_t>(
+      time64(TimeUnit::MICRO), v6, is_valid, time64(TimeUnit::NANO), e6, options);
+
+  // Zero copy
+  vector<int64_t> v7 = {0, 70000, 2000, 1000, 0};
+  shared_ptr<Array> arr;
+  ArrayFromVector<Time64Type, int64_t>(time64(TimeUnit::MICRO), is_valid, v7, &arr);
+  CheckZeroCopy(*arr, time64(TimeUnit::MICRO));
+
+  // ARROW-1773: cast to int64
+  CheckZeroCopy(*arr, int64());
+
+  vector<int32_t> v7_2 = {0, 70000, 2000, 1000, 0};
+  ArrayFromVector<Time32Type, int32_t>(time32(TimeUnit::SECOND), is_valid, v7_2, &arr);
+  CheckZeroCopy(*arr, time32(TimeUnit::SECOND));
+
+  // ARROW-1773: cast to int64
+  CheckZeroCopy(*arr, int32());
+
+  // Divide, truncate
+  vector<int32_t> v8 = {0, 100123, 200456, 1123, 2456};
+  vector<int32_t> e8 = {0, 100, 200, 1, 2};
+
+  options.allow_time_truncate = true;
+  CheckCase<Time32Type, int32_t, Time32Type, int32_t>(
+      time32(TimeUnit::MILLI), v8, is_valid, time32(TimeUnit::SECOND), e8, options);
+  CheckCase<Time64Type, int32_t, Time32Type, int32_t>(
+      time64(TimeUnit::MICRO), v8, is_valid, time32(TimeUnit::MILLI), e8, options);
+  CheckCase<Time64Type, int32_t, Time64Type, int32_t>(
+      time64(TimeUnit::NANO), v8, is_valid, time64(TimeUnit::MICRO), e8, options);
+
+  vector<int64_t> v9 = {0, 100123000, 200456000, 1123000, 2456000};
+  vector<int32_t> e9 = {0, 100, 200, 1, 2};
+  CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
+      time64(TimeUnit::MICRO), v9, is_valid, time32(TimeUnit::SECOND), e9, options);
+  CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
+      time64(TimeUnit::NANO), v9, is_valid, time32(TimeUnit::MILLI), e9, options);
+
+  vector<int64_t> v10 = {0, 100123000000L, 200456000000L, 1123000000L, 2456000000};
+  vector<int32_t> e10 = {0, 100, 200, 1, 2};
+  CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
+      time64(TimeUnit::NANO), v10, is_valid, time32(TimeUnit::SECOND), e10, options);
+
+  // Disallow truncate, failures
+
+  options.allow_time_truncate = false;
+  CheckFails<Time32Type>(time32(TimeUnit::MILLI), v8, is_valid, time32(TimeUnit::SECOND),
+                         options);
+  CheckFails<Time64Type>(time64(TimeUnit::MICRO), v8, is_valid, time32(TimeUnit::MILLI),
+                         options);
+  CheckFails<Time64Type>(time64(TimeUnit::NANO), v8, is_valid, time64(TimeUnit::MICRO),
+                         options);
+  CheckFails<Time64Type>(time64(TimeUnit::MICRO), v9, is_valid, time32(TimeUnit::SECOND),
+                         options);
+  CheckFails<Time64Type>(time64(TimeUnit::NANO), v9, is_valid, time32(TimeUnit::MILLI),
+                         options);
+  CheckFails<Time64Type>(time64(TimeUnit::NANO), v10, is_valid, time32(TimeUnit::SECOND),
+                         options);
+}
+
+TEST_F(TestCast, DateToCompatible) {
+  CastOptions options;
+
+  vector<bool> is_valid = {true, false, true, true, true};
+
+  constexpr int64_t F = 86400000;
+
+  // Multiply promotion
+  vector<int32_t> v1 = {0, 100, 200, 1, 2};
+  vector<int64_t> e1 = {0, 100 * F, 200 * F, F, 2 * F};
+  CheckCase<Date32Type, int32_t, Date64Type, int64_t>(date32(), v1, is_valid, date64(),
+                                                      e1, options);
+
+  // Zero copy
+  vector<int32_t> v2 = {0, 70000, 2000, 1000, 0};
+  vector<int64_t> v3 = {0, 70000, 2000, 1000, 0};
+  shared_ptr<Array> arr;
+  ArrayFromVector<Date32Type, int32_t>(date32(), is_valid, v2, &arr);
+  CheckZeroCopy(*arr, date32());
+
+  // ARROW-1773: zero copy cast to integer
+  CheckZeroCopy(*arr, int32());
+
+  ArrayFromVector<Date64Type, int64_t>(date64(), is_valid, v3, &arr);
+  CheckZeroCopy(*arr, date64());
+
+  // ARROW-1773: zero copy cast to integer
+  CheckZeroCopy(*arr, int64());
+
+  // Divide, truncate
+  vector<int64_t> v8 = {0, 100 * F + 123, 200 * F + 456, F + 123, 2 * F + 456};
+  vector<int32_t> e8 = {0, 100, 200, 1, 2};
+
+  options.allow_time_truncate = true;
+  CheckCase<Date64Type, int64_t, Date32Type, int32_t>(date64(), v8, is_valid, date32(),
+                                                      e8, options);
+
+  // Disallow truncate, failures
+  options.allow_time_truncate = false;
+  CheckFails<Date64Type>(date64(), v8, is_valid, date32(), options);
+}
+
+TEST_F(TestCast, ToDouble) {
+  CastOptions options;
+  vector<bool> is_valid = {true, false, true, true, true};
+
+  // int16 to double
+  vector<int16_t> v1 = {0, 100, 200, 1, 2};
+  vector<double> e1 = {0, 100, 200, 1, 2};
+  CheckCase<Int16Type, int16_t, DoubleType, double>(int16(), v1, is_valid, float64(), e1,
+                                                    options);
+
+  // float to double
+  vector<float> v2 = {0, 100, 200, 1, 2};
+  vector<double> e2 = {0, 100, 200, 1, 2};
+  CheckCase<FloatType, float, DoubleType, double>(float32(), v2, is_valid, float64(), e2,
+                                                  options);
+
+  // bool to double
+  vector<bool> v3 = {true, true, false, false, true};
+  vector<double> e3 = {1, 1, 0, 0, 1};
+  CheckCase<BooleanType, bool, DoubleType, double>(boolean(), v3, is_valid, float64(), e3,
+                                                   options);
+}
+
+TEST_F(TestCast, ChunkedArray) {
+  vector<int16_t> values1 = {0, 1, 2};
+  vector<int16_t> values2 = {3, 4, 5};
+
+  auto type = int16();
+  auto out_type = int64();
+
+  auto a1 = _MakeArray<Int16Type, int16_t>(type, values1, {});
+  auto a2 = _MakeArray<Int16Type, int16_t>(type, values2, {});
+
+  ArrayVector arrays = {a1, a2};
+  auto carr = std::make_shared<ChunkedArray>(arrays);
+
+  CastOptions options;
+
+  Datum out;
+  ASSERT_OK(Cast(&this->ctx_, carr, out_type, options, &out));
+  ASSERT_EQ(Datum::CHUNKED_ARRAY, out.kind());
+
+  auto out_carr = out.chunked_array();
+
+  vector<int64_t> ex_values1 = {0, 1, 2};
+  vector<int64_t> ex_values2 = {3, 4, 5};
+  auto a3 = _MakeArray<Int64Type, int64_t>(out_type, ex_values1, {});
+  auto a4 = _MakeArray<Int64Type, int64_t>(out_type, ex_values2, {});
+
+  ArrayVector ex_arrays = {a3, a4};
+  auto ex_carr = std::make_shared<ChunkedArray>(ex_arrays);
+
+  ASSERT_TRUE(out.chunked_array()->Equals(*ex_carr));
+}
+
+TEST_F(TestCast, UnsupportedTarget) {
+  vector<bool> is_valid = {true, false, true, true, true};
+  vector<int32_t> v1 = {0, 1, 2, 3, 4};
+
+  shared_ptr<Array> arr;
+  ArrayFromVector<Int32Type, int32_t>(int32(), is_valid, v1, &arr);
+
+  shared_ptr<Array> result;
+  ASSERT_RAISES(NotImplemented, Cast(&this->ctx_, *arr, utf8(), {}, &result));
+}
+
+TEST_F(TestCast, DateTimeZeroCopy) {
+  vector<bool> is_valid = {true, false, true, true, true};
+
+  vector<int32_t> v1 = {0, 70000, 2000, 1000, 0};
+  shared_ptr<Array> arr;
+  ArrayFromVector<Int32Type, int32_t>(int32(), is_valid, v1, &arr);
+
+  CheckZeroCopy(*arr, time32(TimeUnit::SECOND));
+  CheckZeroCopy(*arr, date32());
+
+  vector<int64_t> v2 = {0, 70000, 2000, 1000, 0};
+  ArrayFromVector<Int64Type, int64_t>(int64(), is_valid, v2, &arr);
+
+  CheckZeroCopy(*arr, time64(TimeUnit::MICRO));
+  CheckZeroCopy(*arr, date64());
+  CheckZeroCopy(*arr, timestamp(TimeUnit::NANO));
+}
+
+TEST_F(TestCast, FromNull) {
+  // Null casts to everything
+  const int length = 10;
+
+  NullArray arr(length);
+
+  shared_ptr<Array> result;
+  ASSERT_OK(Cast(&ctx_, arr, int32(), {}, &result));
+
+  ASSERT_EQ(length, result->length());
+  ASSERT_EQ(length, result->null_count());
+
+  // OK to look at bitmaps
+  ASSERT_ARRAYS_EQUAL(*result, *result);
+}
+
+TEST_F(TestCast, PreallocatedMemory) {
+  CastOptions options;
+  options.allow_int_overflow = false;
+
+  vector<bool> is_valid = {true, false, true, true, true};
+
+  const int64_t length = 5;
+
+  shared_ptr<Array> arr;
+  vector<int32_t> v1 = {0, 70000, 2000, 1000, 0};
+  vector<int64_t> e1 = {0, 70000, 2000, 1000, 0};
+  ArrayFromVector<Int32Type, int32_t>(int32(), is_valid, v1, &arr);
+
+  auto out_type = int64();
+
+  std::unique_ptr<UnaryKernel> kernel;
+  ASSERT_OK(GetCastFunction(*int32(), out_type, options, &kernel));
+
+  auto out_data = ArrayData::Make(out_type, length);
+
+  shared_ptr<Buffer> out_values;
+  ASSERT_OK(this->ctx_.Allocate(length * sizeof(int64_t), &out_values));
+
+  out_data->buffers.push_back(nullptr);
+  out_data->buffers.push_back(out_values);
+
+  Datum out(out_data);
+  ASSERT_OK(kernel->Call(&this->ctx_, arr, &out));
+
+  // Buffer address unchanged
+  ASSERT_EQ(out_values.get(), out_data->buffers[1].get());
+
+  shared_ptr<Array> result = MakeArray(out_data);
+  shared_ptr<Array> expected;
+  ArrayFromVector<Int64Type, int64_t>(int64(), is_valid, e1, &expected);
+
+  ASSERT_ARRAYS_EQUAL(*expected, *result);
+}
+
+template <typename InType, typename InT, typename OutType, typename OutT>
+void CheckOffsetOutputCase(FunctionContext* ctx, const std::shared_ptr<DataType>& in_type,
+                           const vector<InT>& in_values,
+                           const std::shared_ptr<DataType>& out_type,
+                           const vector<OutT>& out_values) {
+  using OutTraits = TypeTraits<OutType>;
+
+  CastOptions options;
+
+  const int64_t length = static_cast<int64_t>(in_values.size());
+
+  shared_ptr<Array> arr, expected;
+  ArrayFromVector<InType, InT>(in_type, in_values, &arr);
+  ArrayFromVector<OutType, OutT>(out_type, out_values, &expected);
+
+  shared_ptr<Buffer> out_buffer;
+  ASSERT_OK(ctx->Allocate(OutTraits::bytes_required(length), &out_buffer));
+
+  std::unique_ptr<UnaryKernel> kernel;
+  ASSERT_OK(GetCastFunction(*in_type, out_type, options, &kernel));
+
+  const int64_t first_half = length / 2;
+
+  auto out_data = ArrayData::Make(out_type, length, {nullptr, out_buffer});
+  auto out_second_data = out_data->Copy();
+  out_second_data->offset = first_half;
+
+  Datum out_first(out_data);
+  Datum out_second(out_second_data);
+
+  // Cast each bit
+  ASSERT_OK(kernel->Call(ctx, arr->Slice(0, first_half), &out_first));
+  ASSERT_OK(kernel->Call(ctx, arr->Slice(first_half), &out_second));
+
+  shared_ptr<Array> result = MakeArray(out_data);
+
+  ASSERT_ARRAYS_EQUAL(*expected, *result);
+}
+
+TEST_F(TestCast, OffsetOutputBuffer) {
+  // ARROW-1735
+  vector<int32_t> v1 = {0, 10000, 2000, 1000, 0};
+  vector<int64_t> e1 = {0, 10000, 2000, 1000, 0};
+
+  auto in_type = int32();
+  auto out_type = int64();
+  CheckOffsetOutputCase<Int32Type, int32_t, Int64Type, int64_t>(&this->ctx_, in_type, v1,
+                                                                out_type, e1);
+
+  vector<bool> e2 = {false, true, true, true, false};
+
+  out_type = boolean();
+  CheckOffsetOutputCase<Int32Type, int32_t, BooleanType, bool>(&this->ctx_, in_type, v1,
+                                                               boolean(), e2);
+
+  vector<int16_t> e3 = {0, 10000, 2000, 1000, 0};
+  CheckOffsetOutputCase<Int32Type, int32_t, Int16Type, int16_t>(&this->ctx_, in_type, v1,
+                                                                int16(), e3);
+}
+
+TEST_F(TestCast, StringToBoolean) {
+  CastOptions options;
+
+  vector<bool> is_valid = {true, false, true, true, true};
+
+  vector<std::string> v1 = {"False", "true", "true", "True", "false"};
+  vector<std::string> v2 = {"0", "1", "1", "1", "0"};
+  vector<bool> e = {false, true, true, true, false};
+  CheckCase<StringType, std::string, BooleanType, bool>(utf8(), v1, is_valid, boolean(),
+                                                        e, options);
+  CheckCase<StringType, std::string, BooleanType, bool>(utf8(), v2, is_valid, boolean(),
+                                                        e, options);
+}
+
+TEST_F(TestCast, StringToBooleanErrors) {
+  CastOptions options;
+
+  vector<bool> is_valid = {true};
+
+  CheckFails<StringType, std::string>(utf8(), {"false "}, is_valid, boolean(), options);
+  CheckFails<StringType, std::string>(utf8(), {"T"}, is_valid, boolean(), options);
+}
+
+TEST_F(TestCast, StringToNumber) {
+  CastOptions options;
+
+  vector<bool> is_valid = {true, false, true, true, true};
+
+  // string to int
+  vector<std::string> v_int = {"0", "1", "127", "-1", "0"};
+  vector<int8_t> e_int8 = {0, 1, 127, -1, 0};
+  vector<int16_t> e_int16 = {0, 1, 127, -1, 0};
+  vector<int32_t> e_int32 = {0, 1, 127, -1, 0};
+  vector<int64_t> e_int64 = {0, 1, 127, -1, 0};
+  CheckCase<StringType, std::string, Int8Type, int8_t>(utf8(), v_int, is_valid, int8(),
+                                                       e_int8, options);
+  CheckCase<StringType, std::string, Int16Type, int16_t>(utf8(), v_int, is_valid, int16(),
+                                                         e_int16, options);
+  CheckCase<StringType, std::string, Int32Type, int32_t>(utf8(), v_int, is_valid, int32(),
+                                                         e_int32, options);
+  CheckCase<StringType, std::string, Int64Type, int64_t>(utf8(), v_int, is_valid, int64(),
+                                                         e_int64, options);
+
+  v_int = {"2147483647", "0", "-2147483648", "0", "0"};
+  e_int32 = {2147483647, 0, -2147483648LL, 0, 0};
+  CheckCase<StringType, std::string, Int32Type, int32_t>(utf8(), v_int, is_valid, int32(),
+                                                         e_int32, options);
+  v_int = {"9223372036854775807", "0", "-9223372036854775808", "0", "0"};
+  e_int64 = {9223372036854775807LL, 0, (-9223372036854775807LL - 1), 0, 0};
+  CheckCase<StringType, std::string, Int64Type, int64_t>(utf8(), v_int, is_valid, int64(),
+                                                         e_int64, options);
+
+  // string to uint
+  vector<std::string> v_uint = {"0", "1", "127", "255", "0"};
+  vector<uint8_t> e_uint8 = {0, 1, 127, 255, 0};
+  vector<uint16_t> e_uint16 = {0, 1, 127, 255, 0};
+  vector<uint32_t> e_uint32 = {0, 1, 127, 255, 0};
+  vector<uint64_t> e_uint64 = {0, 1, 127, 255, 0};
+  CheckCase<StringType, std::string, UInt8Type, uint8_t>(utf8(), v_uint, is_valid,
+                                                         uint8(), e_uint8, options);
+  CheckCase<StringType, std::string, UInt16Type, uint16_t>(utf8(), v_uint, is_valid,
+                                                           uint16(), e_uint16, options);
+  CheckCase<StringType, std::string, UInt32Type, uint32_t>(utf8(), v_uint, is_valid,
+                                                           uint32(), e_uint32, options);
+  CheckCase<StringType, std::string, UInt64Type, uint64_t>(utf8(), v_uint, is_valid,
+                                                           uint64(), e_uint64, options);
+
+  v_uint = {"4294967295", "0", "0", "0", "0"};
+  e_uint32 = {4294967295, 0, 0, 0, 0};
+  CheckCase<StringType, std::string, UInt32Type, uint32_t>(utf8(), v_uint, is_valid,
+                                                           uint32(), e_uint32, options);
+  v_uint = {"18446744073709551615", "0", "0", "0", "0"};
+  e_uint64 = {18446744073709551615ULL, 0, 0, 0, 0};
+  CheckCase<StringType, std::string, UInt64Type, uint64_t>(utf8(), v_uint, is_valid,
+                                                           uint64(), e_uint64, options);
+
+  // string to float
+  vector<std::string> v_float = {"0.1", "1.2", "127.3", "200.4", "0.5"};
+  vector<float> e_float = {0.1f, 1.2f, 127.3f, 200.4f, 0.5f};
+  vector<double> e_double = {0.1, 1.2, 127.3, 200.4, 0.5};
+  CheckCase<StringType, std::string, FloatType, float>(utf8(), v_float, is_valid,
+                                                       float32(), e_float, options);
+  CheckCase<StringType, std::string, DoubleType, double>(utf8(), v_float, is_valid,
+                                                         float64(), e_double, options);
+
+  // Test that casting is locale-independent
+  auto global_locale = std::locale();
+  try {
+    // French locale uses the comma as decimal point
+    std::locale::global(std::locale("fr_FR.UTF-8"));
+  } catch (std::runtime_error&) {
+    // Locale unavailable, ignore
+  }
+  CheckCase<StringType, std::string, FloatType, float>(utf8(), v_float, is_valid,
+                                                       float32(), e_float, options);
+  CheckCase<StringType, std::string, DoubleType, double>(utf8(), v_float, is_valid,
+                                                         float64(), e_double, options);
+  std::locale::global(global_locale);
+}
+
+TEST_F(TestCast, StringToNumberErrors) {
+  CastOptions options;
+
+  vector<bool> is_valid = {true};
+
+  CheckFails<StringType, std::string>(utf8(), {"z"}, is_valid, int8(), options);
+  CheckFails<StringType, std::string>(utf8(), {"12 z"}, is_valid, int8(), options);
+  CheckFails<StringType, std::string>(utf8(), {"128"}, is_valid, int8(), options);
+  CheckFails<StringType, std::string>(utf8(), {"-129"}, is_valid, int8(), options);
+  CheckFails<StringType, std::string>(utf8(), {"0.5"}, is_valid, int8(), options);
+
+  CheckFails<StringType, std::string>(utf8(), {"256"}, is_valid, uint8(), options);
+  CheckFails<StringType, std::string>(utf8(), {"-1"}, is_valid, uint8(), options);
+
+  CheckFails<StringType, std::string>(utf8(), {"z"}, is_valid, float32(), options);
+}
+
+TEST_F(TestCast, StringToTimestamp) {
+  CastOptions options;
+
+  vector<bool> is_valid = {true, false, true};
+  vector<std::string> strings = {"1970-01-01", "xxx", "2000-02-29"};
+
+  auto type = timestamp(TimeUnit::SECOND);
+  vector<int64_t> e = {0, 0, 951782400};
+  CheckCase<StringType, std::string, TimestampType, int64_t>(utf8(), strings, is_valid,
+                                                             type, e, options);
+
+  type = timestamp(TimeUnit::MICRO);
+  e = {0, 0, 951782400000000LL};
+  CheckCase<StringType, std::string, TimestampType, int64_t>(utf8(), strings, is_valid,
+                                                             type, e, options);
+
+  // NOTE: timestamp parsing is tested comprehensively in parsing-util-test.cc
+}
+
+TEST_F(TestCast, StringToTimestampErrors) {
+  CastOptions options;
+
+  vector<bool> is_valid = {true};
+
+  for (auto unit : {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO}) {
+    auto type = timestamp(unit);
+    CheckFails<StringType, std::string>(utf8(), {""}, is_valid, type, options);
+    CheckFails<StringType, std::string>(utf8(), {"xxx"}, is_valid, type, options);
+  }
+}
+
+constexpr const char* kInvalidUtf8 = "\xa0\xa1";
+
+TEST_F(TestCast, BinaryToString) {
+  CastOptions options;
+
+  // All valid except the last one
+  vector<bool> all = {1, 1, 1, 1, 1};
+  vector<bool> valid = {1, 1, 1, 1, 0};
+  vector<std::string> strings = {"Hi", "olá mundo", "你好世界", "", kInvalidUtf8};
+
+  std::shared_ptr<Array> array;
+
+  // Should accept when invalid but null.
+  ArrayFromVector<BinaryType, std::string>(binary(), valid, strings, &array);
+  CheckZeroCopy(*array, utf8());
+
+  // Should refuse due to invalid utf8 payload
+  CheckFails<BinaryType, std::string>(binary(), strings, all, utf8(), options);
+
+  // Should accept due to option override
+  options.allow_invalid_utf8 = true;
+  CheckCase<BinaryType, std::string, StringType, std::string>(binary(), strings, all,
+                                                              utf8(), strings, options);
+}
+
+template <typename TestType>
+class TestDictionaryCast : public TestCast {};
+
+typedef ::testing::Types<NullType, UInt8Type, Int8Type, UInt16Type, Int16Type, Int32Type,
+                         UInt32Type, UInt64Type, Int64Type, FloatType, DoubleType,
+                         Date32Type, Date64Type, FixedSizeBinaryType, BinaryType>
+    TestTypes;
+
+TYPED_TEST_CASE(TestDictionaryCast, TestTypes);
+
+TYPED_TEST(TestDictionaryCast, Basic) {
+  CastOptions options;
+  shared_ptr<Array> plain_array =
+      TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 2);
+
+  Datum out;
+  ASSERT_OK(DictionaryEncode(&this->ctx_, plain_array->data(), &out));
+
+  this->CheckPass(*MakeArray(out.array()), *plain_array, plain_array->type(), options);
+}
+
+TEST_F(TestCast, DictToNonDictNoNulls) {
+  vector<std::string> dict_values = {"foo", "bar", "baz"};
+  auto ex_dict = _MakeArray<StringType, std::string>(utf8(), dict_values, {});
+  auto dict_type = dictionary(int32(), ex_dict);
+
+  // Explicitly construct with nullptr for the null_bitmap_data
+  std::vector<int32_t> i1 = {1, 0, 1};
+  std::vector<int32_t> i2 = {2, 1, 0, 1};
+  auto c1 = std::make_shared<NumericArray<Int32Type>>(3, Buffer::Wrap(i1));
+  auto c2 = std::make_shared<NumericArray<Int32Type>>(4, Buffer::Wrap(i2));
+
+  ArrayVector dict_arrays = {std::make_shared<DictionaryArray>(dict_type, c1),
+                             std::make_shared<DictionaryArray>(dict_type, c2)};
+  auto dict_carr = std::make_shared<ChunkedArray>(dict_arrays);
+
+  Datum cast_input(dict_carr);
+  Datum cast_output;
+  // Ensure that casting works even when the null_bitmap_data array is a nullptr
+  ASSERT_OK(Cast(&this->ctx_, cast_input,
+                 static_cast<DictionaryType&>(*dict_type).dictionary()->type(),
+                 CastOptions(), &cast_output));
+  ASSERT_EQ(Datum::CHUNKED_ARRAY, cast_output.kind());
+
+  auto e1 = _MakeArray<StringType, std::string>(utf8(), {"bar", "foo", "bar"}, {});
+  auto e2 = _MakeArray<StringType, std::string>(utf8(), {"baz", "bar", "foo", "bar"}, {});
+
+  auto chunks = cast_output.chunked_array()->chunks();
+  ASSERT_EQ(chunks.size(), 2);
+  ASSERT_ARRAYS_EQUAL(*e1, *chunks[0]);
+  ASSERT_ARRAYS_EQUAL(*e2, *chunks[1]);
+}
+
+/*TYPED_TEST(TestDictionaryCast, Reverse) {
+  CastOptions options;
+  shared_ptr<Array> plain_array =
+      TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 2);
+
+  shared_ptr<Array> dict_array;
+  ASSERT_OK(EncodeArrayToDictionary(*plain_array, this->pool_, &dict_array));
+
+  this->CheckPass(*plain_array, *dict_array, dict_array->type(), options);
+}*/
+
+TEST_F(TestCast, ListToList) {
+  CastOptions options;
+  std::shared_ptr<Array> offsets;
+
+  vector<int32_t> offsets_values = {0, 1, 2, 5, 7, 7, 8, 10};
+  std::vector<bool> offsets_is_valid = {true, true, true, true, false, true, true, true};
+  ArrayFromVector<Int32Type, int32_t>(offsets_is_valid, offsets_values, &offsets);
+
+  shared_ptr<Array> int32_plain_array =
+      TestBase::MakeRandomArray<typename TypeTraits<Int32Type>::ArrayType>(10, 2);
+  std::shared_ptr<Array> int32_list_array;
+  ASSERT_OK(
+      ListArray::FromArrays(*offsets, *int32_plain_array, pool_, &int32_list_array));
+
+  std::shared_ptr<Array> int64_plain_array;
+  ASSERT_OK(Cast(&this->ctx_, *int32_plain_array, int64(), options, &int64_plain_array));
+  std::shared_ptr<Array> int64_list_array;
+  ASSERT_OK(
+      ListArray::FromArrays(*offsets, *int64_plain_array, pool_, &int64_list_array));
+
+  std::shared_ptr<Array> float64_plain_array;
+  ASSERT_OK(
+      Cast(&this->ctx_, *int32_plain_array, float64(), options, &float64_plain_array));
+  std::shared_ptr<Array> float64_list_array;
+  ASSERT_OK(
+      ListArray::FromArrays(*offsets, *float64_plain_array, pool_, &float64_list_array));
+
+  CheckPass(*int32_list_array, *int64_list_array, int64_list_array->type(), options);
+  CheckPass(*int32_list_array, *float64_list_array, float64_list_array->type(), options);
+  CheckPass(*int64_list_array, *int32_list_array, int32_list_array->type(), options);
+  CheckPass(*int64_list_array, *float64_list_array, float64_list_array->type(), options);
+
+  options.allow_float_truncate = true;
+  CheckPass(*float64_list_array, *int32_list_array, int32_list_array->type(), options);
+  CheckPass(*float64_list_array, *int64_list_array, int64_list_array->type(), options);
+}
+
+TEST_F(TestCast, IdentityCasts) {
+  // ARROW-4102
+  auto CheckIdentityCast = [this](std::shared_ptr<DataType> type,
+                                  const std::string& json) {
+    auto arr = ArrayFromJSON(type, json);
+    CheckZeroCopy(*arr, type);
+  };
+
+  CheckIdentityCast(null(), "[null, null, null]");
+  CheckIdentityCast(boolean(), "[false, true, null, false]");
+
+  for (auto type : kNumericTypes) {
+    CheckIdentityCast(type, "[1, 2, null, 4]");
+  }
+  CheckIdentityCast(binary(), "[\"foo\", \"bar\"]");
+  CheckIdentityCast(utf8(), "[\"foo\", \"bar\"]");
+  CheckIdentityCast(fixed_size_binary(3), "[\"foo\", \"bar\"]");
+
+  CheckIdentityCast(list(int8()), "[[1, 2], [null], [], [3]]");
+
+  CheckIdentityCast(time32(TimeUnit::MILLI), "[1, 2, 3, 4]");
+  CheckIdentityCast(time64(TimeUnit::MICRO), "[1, 2, 3, 4]");
+  CheckIdentityCast(date32(), "[1, 2, 3, 4]");
+  CheckIdentityCast(date64(), "[86400000, 0]");
+  CheckIdentityCast(timestamp(TimeUnit::SECOND), "[1, 2, 3, 4]");
+
+  {
+    auto dict_type = dictionary(int8(), ArrayFromJSON(int8(), "[1, 2, 3]"));
+    auto dict_indices = ArrayFromJSON(int8(), "[0, 1, 2, 0, null, 2]");
+    auto dict_array = std::make_shared<DictionaryArray>(dict_type, dict_indices);
+    CheckZeroCopy(*dict_array, dict_type);
+  }
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/cast.cc b/cpp/src/arrow/compute/kernels/cast.cc
index cd45b2d35275e..74ee7d6d110f4 100644
--- a/cpp/src/arrow/compute/kernels/cast.cc
+++ b/cpp/src/arrow/compute/kernels/cast.cc
@@ -37,6 +37,7 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/parsing.h"  // IWYU pragma: keep
+#include "arrow/util/utf8.h"
 
 #include "arrow/compute/context.h"
 #include "arrow/compute/kernel.h"
@@ -77,6 +78,19 @@ namespace compute {
 
 constexpr int64_t kMillisecondsInDay = 86400000;
 
+template <typename O, typename I, typename Enable = void>
+struct is_binary_to_string {
+  static constexpr bool value = false;
+};
+
+template <typename O, typename I>
+struct is_binary_to_string<
+    O, I,
+    typename std::enable_if<std::is_same<BinaryType, I>::value &&
+                            std::is_base_of<StringType, O>::value>::type> {
+  static constexpr bool value = true;
+};
+
 // ----------------------------------------------------------------------
 // Zero copy casts
 
@@ -85,6 +99,8 @@ struct is_zero_copy_cast {
   static constexpr bool value = false;
 };
 
+// TODO(wesm): ARROW-4110; this is no longer needed, but may be useful if we
+// ever _do_ want to generate identity cast kernels at compile time
 template <typename O, typename I>
 struct is_zero_copy_cast<
     O, I,
@@ -112,15 +128,30 @@ struct is_zero_copy_cast<
   static constexpr bool value = sizeof(O_T) == sizeof(I_T);
 };
 
+// Binary to String doesn't require copying, the payload only needs to be
+// validated.
+template <typename O, typename I>
+struct is_zero_copy_cast<
+    O, I,
+    typename std::enable_if<!std::is_same<I, O>::value &&
+                            is_binary_to_string<O, I>::value>::type> {
+  static constexpr bool value = true;
+};
+
 template <typename OutType, typename InType, typename Enable = void>
 struct CastFunctor {};
 
 // Indicated no computation required
+//
+// The case BinaryType -> StringType is special cased due to validation
+// requirements.
 template <typename O, typename I>
-struct CastFunctor<O, I, typename std::enable_if<is_zero_copy_cast<O, I>::value>::type> {
+struct CastFunctor<O, I,
+                   typename std::enable_if<is_zero_copy_cast<O, I>::value &&
+                                           !is_binary_to_string<O, I>::value>::type> {
   void operator()(FunctionContext* ctx, const CastOptions& options,
                   const ArrayData& input, ArrayData* output) {
-    CopyData(input, output);
+    ZeroCopyData(input, output);
   }
 };
 
@@ -373,6 +404,7 @@ struct is_float_truncate<
 
 template <typename O, typename I>
 struct CastFunctor<O, I, typename std::enable_if<is_float_truncate<O, I>::value>::type> {
+  ARROW_DISABLE_UBSAN("float-cast-overflow")
   void operator()(FunctionContext* ctx, const CastOptions& options,
                   const ArrayData& input, ArrayData* output) {
     using in_type = typename I::c_type;
@@ -479,11 +511,9 @@ void ShiftTime(FunctionContext* ctx, const CastOptions& options, const bool is_m
         out_data[i] = static_cast<out_type>(in_data[i] / factor);
       }
     } else {
-#define RAISE_INVALID_CAST(VAL)                                                         \
-  std::stringstream ss;                                                                 \
-  ss << "Casting from " << input.type->ToString() << " to " << output->type->ToString() \
-     << " would lose data: " << VAL;                                                    \
-  ctx->SetStatus(Status::Invalid(ss.str()));
+#define RAISE_INVALID_CAST(VAL)                                                   \
+  ctx->SetStatus(Status::Invalid("Casting from ", input.type->ToString(), " to ", \
+                                 output->type->ToString(), " would lose data: ", VAL));
 
       if (input.null_count != 0) {
         internal::BitmapReader bit_reader(input.buffers[0]->data(), input.offset,
@@ -532,7 +562,7 @@ struct CastFunctor<TimestampType, TimestampType> {
     const auto& out_type = checked_cast<const TimestampType&>(*output->type);
 
     if (in_type.unit() == out_type.unit()) {
-      CopyData(input, output);
+      ZeroCopyData(input, output);
       return;
     }
 
@@ -625,7 +655,7 @@ struct CastFunctor<O, I,
     const auto& out_type = checked_cast<const O&>(*output->type);
 
     if (in_type.unit() == out_type.unit()) {
-      CopyData(input, output);
+      ZeroCopyData(input, output);
       return;
     }
 
@@ -766,9 +796,8 @@ struct CastFunctor<
         UnpackFixedSizeBinaryDictionary<Int64Type>(ctx, indices, dictionary, output);
         break;
       default:
-        std::stringstream ss;
-        ss << "Invalid index type: " << indices.type()->ToString();
-        ctx->SetStatus(Status::Invalid(ss.str()));
+        ctx->SetStatus(
+            Status::Invalid("Invalid index type: ", indices.type()->ToString()));
         return;
     }
   }
@@ -845,9 +874,8 @@ struct CastFunctor<T, DictionaryType,
             (UnpackBinaryDictionary<Int64Type>(ctx, indices, dictionary, output)));
         break;
       default:
-        std::stringstream ss;
-        ss << "Invalid index type: " << indices.type()->ToString();
-        ctx->SetStatus(Status::Invalid(ss.str()));
+        ctx->SetStatus(
+            Status::Invalid("Invalid index type: ", indices.type()->ToString()));
         return;
     }
   }
@@ -903,9 +931,8 @@ struct CastFunctor<T, DictionaryType,
         UnpackPrimitiveDictionary<Int64Type, c_type>(indices, dictionary, out);
         break;
       default:
-        std::stringstream ss;
-        ss << "Invalid index type: " << indices.type()->ToString();
-        ctx->SetStatus(Status::Invalid(ss.str()));
+        ctx->SetStatus(
+            Status::Invalid("Invalid index type: ", indices.type()->ToString()));
         return;
     }
   }
@@ -931,9 +958,8 @@ struct CastFunctor<O, StringType, enable_if_number<O>> {
 
       auto str = input_array.GetView(i);
       if (!converter(str.data(), str.length(), out_data)) {
-        std::stringstream ss;
-        ss << "Failed to cast String '" << str << "' into " << output->type->ToString();
-        ctx->SetStatus(Status(StatusCode::Invalid, ss.str()));
+        ctx->SetStatus(Status::Invalid("Failed to cast String '", str, "' into ",
+                                       output->type->ToString()));
         return;
       }
     }
@@ -962,10 +988,9 @@ struct CastFunctor<O, StringType,
       bool value;
       auto str = input_array.GetView(i);
       if (!converter(str.data(), str.length(), &value)) {
-        std::stringstream ss;
-        ss << "Failed to cast String '" << input_array.GetString(i) << "' into "
-           << output->type->ToString();
-        ctx->SetStatus(Status(StatusCode::Invalid, ss.str()));
+        ctx->SetStatus(Status::Invalid("Failed to cast String '",
+                                       input_array.GetString(i), "' into ",
+                                       output->type->ToString()));
         return;
       }
 
@@ -980,6 +1005,80 @@ struct CastFunctor<O, StringType,
   }
 };
 
+// ----------------------------------------------------------------------
+// String to Timestamp
+
+template <>
+struct CastFunctor<TimestampType, StringType> {
+  void operator()(FunctionContext* ctx, const CastOptions& options,
+                  const ArrayData& input, ArrayData* output) {
+    using out_type = TimestampType::c_type;
+
+    StringArray input_array(input.Copy());
+    auto out_data = output->GetMutableValues<out_type>(1);
+    internal::StringConverter<TimestampType> converter(output->type);
+
+    for (int64_t i = 0; i < input.length; ++i, ++out_data) {
+      if (input_array.IsNull(i)) {
+        continue;
+      }
+
+      const auto str = input_array.GetView(i);
+      if (!converter(str.data(), str.length(), out_data)) {
+        ctx->SetStatus(Status::Invalid("Failed to cast String '", str, "' into ",
+                                       output->type->ToString()));
+        return;
+      }
+    }
+  }
+};
+
+// ----------------------------------------------------------------------
+// Binary to String
+//
+
+template <typename I>
+struct CastFunctor<
+    StringType, I,
+    typename std::enable_if<is_binary_to_string<StringType, I>::value>::type> {
+  void operator()(FunctionContext* ctx, const CastOptions& options,
+                  const ArrayData& input, ArrayData* output) {
+    BinaryArray binary(input.Copy());
+
+    if (options.allow_invalid_utf8) {
+      ZeroCopyData(input, output);
+      return;
+    }
+
+    util::InitializeUTF8();
+
+    if (binary.null_count() != 0) {
+      for (int64_t i = 0; i < input.length; i++) {
+        if (binary.IsNull(i)) {
+          continue;
+        }
+
+        const auto str = binary.GetView(i);
+        if (ARROW_PREDICT_FALSE(!arrow::util::ValidateUTF8(str))) {
+          ctx->SetStatus(Status::Invalid("Invalid UTF8 payload"));
+          return;
+        }
+      }
+
+    } else {
+      for (int64_t i = 0; i < input.length; i++) {
+        const auto str = binary.GetView(i);
+        if (ARROW_PREDICT_FALSE(!arrow::util::ValidateUTF8(str))) {
+          ctx->SetStatus(Status::Invalid("Invalid UTF8 payload"));
+          return;
+        }
+      }
+    }
+
+    ZeroCopyData(input, output);
+  }
+};
+
 // ----------------------------------------------------------------------
 
 typedef std::function<void(FunctionContext*, const CastOptions& options, const ArrayData&,
@@ -1019,9 +1118,8 @@ static Status AllocateIfNotPreallocated(FunctionContext* ctx, const ArrayData& i
 
     if (!(is_primitive(type_id) || type_id == Type::FIXED_SIZE_BINARY ||
           type_id == Type::DECIMAL)) {
-      std::stringstream ss;
-      ss << "Cannot pre-allocate memory for type: " << out->type->ToString();
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("Cannot pre-allocate memory for type: ",
+                                    out->type->ToString());
     }
 
     if (type_id != Type::NA) {
@@ -1048,6 +1146,17 @@ static Status AllocateIfNotPreallocated(FunctionContext* ctx, const ArrayData& i
   return Status::OK();
 }
 
+class IdentityCast : public UnaryKernel {
+ public:
+  IdentityCast() {}
+
+  Status Call(FunctionContext* ctx, const Datum& input, Datum* out) override {
+    DCHECK_EQ(input.kind(), Datum::ARRAY);
+    out->value = input.array()->Copy();
+    return Status::OK();
+  }
+};
+
 class CastKernel : public UnaryKernel {
  public:
   CastKernel(const CastOptions& options, const CastFunction& func, bool is_zero_copy,
@@ -1059,17 +1168,22 @@ class CastKernel : public UnaryKernel {
         out_type_(out_type) {}
 
   Status Call(FunctionContext* ctx, const Datum& input, Datum* out) override {
-    DCHECK_EQ(Datum::ARRAY, input.kind());
+    if (input.kind() != Datum::ARRAY)
+      return Status::NotImplemented("CastKernel only supports Datum::ARRAY input");
 
     const ArrayData& in_data = *input.array();
-    ArrayData* result;
 
-    if (out->kind() == Datum::NONE) {
-      out->value = ArrayData::Make(out_type_, in_data.length);
+    switch (out->kind()) {
+      case Datum::NONE:
+        out->value = ArrayData::Make(out_type_, in_data.length);
+        break;
+      case Datum::ARRAY:
+        break;
+      default:
+        return Status::NotImplemented("CastKernel only supports Datum::ARRAY output");
     }
 
-    result = out->array().get();
-
+    ArrayData* result = out->array().get();
     if (!is_zero_copy_) {
       RETURN_NOT_OK(
           AllocateIfNotPreallocated(ctx, in_data, can_pre_allocate_values_, result));
@@ -1088,6 +1202,8 @@ class CastKernel : public UnaryKernel {
   std::shared_ptr<DataType> out_type_;
 };
 
+// TODO(wesm): ARROW-4110 Do not generate cases that could return IdentityCast
+
 #define CAST_CASE(InType, OutType)                                                      \
   case OutType::type_id:                                                                \
     is_zero_copy = is_zero_copy_cast<OutType, InType>::value;                           \
@@ -1133,12 +1249,10 @@ class CastKernel : public UnaryKernel {
   FN(Int64Type, Date64Type);
 
 #define DATE32_CASES(FN, IN_TYPE) \
-  FN(Date32Type, Date32Type);     \
   FN(Date32Type, Date64Type);     \
   FN(Date32Type, Int32Type);
 
 #define DATE64_CASES(FN, IN_TYPE) \
-  FN(Date64Type, Date64Type);     \
   FN(Date64Type, Date32Type);     \
   FN(Date64Type, Int64Type);
 
@@ -1158,8 +1272,9 @@ class CastKernel : public UnaryKernel {
   FN(TimestampType, Date64Type);     \
   FN(TimestampType, Int64Type);
 
+#define BINARY_CASES(FN, IN_TYPE) FN(BinaryType, StringType);
+
 #define STRING_CASES(FN, IN_TYPE) \
-  FN(StringType, StringType);     \
   FN(StringType, BooleanType);    \
   FN(StringType, UInt8Type);      \
   FN(StringType, Int8Type);       \
@@ -1170,7 +1285,8 @@ class CastKernel : public UnaryKernel {
   FN(StringType, UInt64Type);     \
   FN(StringType, Int64Type);      \
   FN(StringType, FloatType);      \
-  FN(StringType, DoubleType);
+  FN(StringType, DoubleType);     \
+  FN(StringType, TimestampType);
 
 #define DICTIONARY_CASES(FN, IN_TYPE) \
   FN(IN_TYPE, NullType);              \
@@ -1212,25 +1328,26 @@ class CastKernel : public UnaryKernel {
     return nullptr;                                                            \
   }
 
-GET_CAST_FUNCTION(NULL_CASES, NullType);
-GET_CAST_FUNCTION(NUMERIC_CASES, BooleanType);
-GET_CAST_FUNCTION(NUMERIC_CASES, UInt8Type);
-GET_CAST_FUNCTION(NUMERIC_CASES, Int8Type);
-GET_CAST_FUNCTION(NUMERIC_CASES, UInt16Type);
-GET_CAST_FUNCTION(NUMERIC_CASES, Int16Type);
-GET_CAST_FUNCTION(NUMERIC_CASES, UInt32Type);
-GET_CAST_FUNCTION(INT32_CASES, Int32Type);
-GET_CAST_FUNCTION(NUMERIC_CASES, UInt64Type);
-GET_CAST_FUNCTION(INT64_CASES, Int64Type);
-GET_CAST_FUNCTION(NUMERIC_CASES, FloatType);
-GET_CAST_FUNCTION(NUMERIC_CASES, DoubleType);
-GET_CAST_FUNCTION(DATE32_CASES, Date32Type);
-GET_CAST_FUNCTION(DATE64_CASES, Date64Type);
-GET_CAST_FUNCTION(TIME32_CASES, Time32Type);
-GET_CAST_FUNCTION(TIME64_CASES, Time64Type);
-GET_CAST_FUNCTION(TIMESTAMP_CASES, TimestampType);
-GET_CAST_FUNCTION(STRING_CASES, StringType);
-GET_CAST_FUNCTION(DICTIONARY_CASES, DictionaryType);
+GET_CAST_FUNCTION(NULL_CASES, NullType)
+GET_CAST_FUNCTION(NUMERIC_CASES, BooleanType)
+GET_CAST_FUNCTION(NUMERIC_CASES, UInt8Type)
+GET_CAST_FUNCTION(NUMERIC_CASES, Int8Type)
+GET_CAST_FUNCTION(NUMERIC_CASES, UInt16Type)
+GET_CAST_FUNCTION(NUMERIC_CASES, Int16Type)
+GET_CAST_FUNCTION(NUMERIC_CASES, UInt32Type)
+GET_CAST_FUNCTION(INT32_CASES, Int32Type)
+GET_CAST_FUNCTION(NUMERIC_CASES, UInt64Type)
+GET_CAST_FUNCTION(INT64_CASES, Int64Type)
+GET_CAST_FUNCTION(NUMERIC_CASES, FloatType)
+GET_CAST_FUNCTION(NUMERIC_CASES, DoubleType)
+GET_CAST_FUNCTION(DATE32_CASES, Date32Type)
+GET_CAST_FUNCTION(DATE64_CASES, Date64Type)
+GET_CAST_FUNCTION(TIME32_CASES, Time32Type)
+GET_CAST_FUNCTION(TIME64_CASES, Time64Type)
+GET_CAST_FUNCTION(TIMESTAMP_CASES, TimestampType)
+GET_CAST_FUNCTION(BINARY_CASES, BinaryType)
+GET_CAST_FUNCTION(STRING_CASES, StringType)
+GET_CAST_FUNCTION(DICTIONARY_CASES, DictionaryType)
 
 #define CAST_FUNCTION_CASE(InType)                      \
   case InType::type_id:                                 \
@@ -1259,6 +1376,11 @@ Status GetListCastFunc(const DataType& in_type, const std::shared_ptr<DataType>&
 
 Status GetCastFunction(const DataType& in_type, const std::shared_ptr<DataType>& out_type,
                        const CastOptions& options, std::unique_ptr<UnaryKernel>* kernel) {
+  if (in_type.Equals(out_type)) {
+    *kernel = std::unique_ptr<UnaryKernel>(new IdentityCast);
+    return Status::OK();
+  }
+
   switch (in_type.id()) {
     CAST_FUNCTION_CASE(NullType);
     CAST_FUNCTION_CASE(BooleanType);
@@ -1277,6 +1399,7 @@ Status GetCastFunction(const DataType& in_type, const std::shared_ptr<DataType>&
     CAST_FUNCTION_CASE(Time32Type);
     CAST_FUNCTION_CASE(Time64Type);
     CAST_FUNCTION_CASE(TimestampType);
+    CAST_FUNCTION_CASE(BinaryType);
     CAST_FUNCTION_CASE(StringType);
     CAST_FUNCTION_CASE(DictionaryType);
     case Type::LIST:
@@ -1286,10 +1409,8 @@ Status GetCastFunction(const DataType& in_type, const std::shared_ptr<DataType>&
       break;
   }
   if (*kernel == nullptr) {
-    std::stringstream ss;
-    ss << "No cast implemented from " << in_type.ToString() << " to "
-       << out_type->ToString();
-    return Status::NotImplemented(ss.str());
+    return Status::NotImplemented("No cast implemented from ", in_type.ToString(), " to ",
+                                  out_type->ToString());
   }
   return Status::OK();
 }
diff --git a/cpp/src/arrow/compute/kernels/cast.h b/cpp/src/arrow/compute/kernels/cast.h
index 65c70bf14aa88..8c42f07bda7f1 100644
--- a/cpp/src/arrow/compute/kernels/cast.h
+++ b/cpp/src/arrow/compute/kernels/cast.h
@@ -38,12 +38,14 @@ struct ARROW_EXPORT CastOptions {
   CastOptions()
       : allow_int_overflow(false),
         allow_time_truncate(false),
-        allow_float_truncate(false) {}
+        allow_float_truncate(false),
+        allow_invalid_utf8(false) {}
 
   explicit CastOptions(bool safe)
       : allow_int_overflow(!safe),
         allow_time_truncate(!safe),
-        allow_float_truncate(!safe) {}
+        allow_float_truncate(!safe),
+        allow_invalid_utf8(!safe) {}
 
   static CastOptions Safe() { return CastOptions(true); }
 
@@ -52,6 +54,9 @@ struct ARROW_EXPORT CastOptions {
   bool allow_int_overflow;
   bool allow_time_truncate;
   bool allow_float_truncate;
+  // Indicate if conversions from Binary/FixedSizeBinary to string must
+  // validate the utf8 payload.
+  bool allow_invalid_utf8;
 };
 
 /// \since 0.7.0
diff --git a/cpp/src/arrow/compute/kernels/hash-test.cc b/cpp/src/arrow/compute/kernels/hash-test.cc
new file mode 100644
index 0000000000000..f20575f621b4c
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/hash-test.cc
@@ -0,0 +1,344 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <cstdio>
+#include <functional>
+#include <locale>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "arrow/test-common.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/decimal.h"
+
+#include "arrow/compute/context.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/compute/kernels/hash.h"
+#include "arrow/compute/kernels/util-internal.h"
+#include "arrow/compute/test-util.h"
+
+using std::shared_ptr;
+using std::vector;
+
+namespace arrow {
+namespace compute {
+
+// ----------------------------------------------------------------------
+// Dictionary tests
+
+template <typename Type, typename T>
+void CheckUnique(FunctionContext* ctx, const shared_ptr<DataType>& type,
+                 const vector<T>& in_values, const vector<bool>& in_is_valid,
+                 const vector<T>& out_values, const vector<bool>& out_is_valid) {
+  shared_ptr<Array> input = _MakeArray<Type, T>(type, in_values, in_is_valid);
+  shared_ptr<Array> expected = _MakeArray<Type, T>(type, out_values, out_is_valid);
+
+  shared_ptr<Array> result;
+  ASSERT_OK(Unique(ctx, input, &result));
+  ASSERT_ARRAYS_EQUAL(*expected, *result);
+}
+
+template <typename Type, typename T>
+void CheckDictEncode(FunctionContext* ctx, const shared_ptr<DataType>& type,
+                     const vector<T>& in_values, const vector<bool>& in_is_valid,
+                     const vector<T>& out_values, const vector<bool>& out_is_valid,
+                     const vector<int32_t>& out_indices) {
+  shared_ptr<Array> input = _MakeArray<Type, T>(type, in_values, in_is_valid);
+  shared_ptr<Array> ex_dict = _MakeArray<Type, T>(type, out_values, out_is_valid);
+  shared_ptr<Array> ex_indices =
+      _MakeArray<Int32Type, int32_t>(int32(), out_indices, in_is_valid);
+
+  DictionaryArray expected(dictionary(int32(), ex_dict), ex_indices);
+
+  Datum datum_out;
+  ASSERT_OK(DictionaryEncode(ctx, input, &datum_out));
+  shared_ptr<Array> result = MakeArray(datum_out.array());
+
+  ASSERT_ARRAYS_EQUAL(expected, *result);
+}
+
+class TestHashKernel : public ComputeFixture, public TestBase {};
+
+template <typename Type>
+class TestHashKernelPrimitive : public ComputeFixture, public TestBase {};
+
+typedef ::testing::Types<Int8Type, UInt8Type, Int16Type, UInt16Type, Int32Type,
+                         UInt32Type, Int64Type, UInt64Type, FloatType, DoubleType,
+                         Date32Type, Date64Type>
+    PrimitiveDictionaries;
+
+TYPED_TEST_CASE(TestHashKernelPrimitive, PrimitiveDictionaries);
+
+TYPED_TEST(TestHashKernelPrimitive, Unique) {
+  using T = typename TypeParam::c_type;
+  auto type = TypeTraits<TypeParam>::type_singleton();
+  CheckUnique<TypeParam, T>(&this->ctx_, type, {2, 1, 2, 1}, {true, false, true, true},
+                            {2, 1}, {});
+  CheckUnique<TypeParam, T>(&this->ctx_, type, {2, 1, 3, 1}, {false, false, true, true},
+                            {3, 1}, {});
+}
+
+TYPED_TEST(TestHashKernelPrimitive, DictEncode) {
+  using T = typename TypeParam::c_type;
+  auto type = TypeTraits<TypeParam>::type_singleton();
+  CheckDictEncode<TypeParam, T>(&this->ctx_, type, {2, 1, 2, 1, 2, 3},
+                                {true, false, true, true, true, true}, {2, 1, 3}, {},
+                                {0, 0, 0, 1, 0, 2});
+}
+
+TYPED_TEST(TestHashKernelPrimitive, PrimitiveResizeTable) {
+  using T = typename TypeParam::c_type;
+  // Skip this test for (u)int8
+  if (sizeof(Scalar) == 1) {
+    return;
+  }
+
+  const int64_t kTotalValues = 1000000;
+  const int64_t kRepeats = 5;
+
+  vector<T> values;
+  vector<T> uniques;
+  vector<int32_t> indices;
+  for (int64_t i = 0; i < kTotalValues * kRepeats; i++) {
+    const auto val = static_cast<T>(i % kTotalValues);
+    values.push_back(val);
+
+    if (i < kTotalValues) {
+      uniques.push_back(val);
+    }
+    indices.push_back(static_cast<int32_t>(i % kTotalValues));
+  }
+
+  auto type = TypeTraits<TypeParam>::type_singleton();
+  CheckUnique<TypeParam, T>(&this->ctx_, type, values, {}, uniques, {});
+
+  CheckDictEncode<TypeParam, T>(&this->ctx_, type, values, {}, uniques, {}, indices);
+}
+
+TEST_F(TestHashKernel, UniqueTimeTimestamp) {
+  CheckUnique<Time32Type, int32_t>(&this->ctx_, time32(TimeUnit::SECOND), {2, 1, 2, 1},
+                                   {true, false, true, true}, {2, 1}, {});
+
+  CheckUnique<Time64Type, int64_t>(&this->ctx_, time64(TimeUnit::NANO), {2, 1, 2, 1},
+                                   {true, false, true, true}, {2, 1}, {});
+
+  CheckUnique<TimestampType, int64_t>(&this->ctx_, timestamp(TimeUnit::NANO),
+                                      {2, 1, 2, 1}, {true, false, true, true}, {2, 1},
+                                      {});
+}
+
+TEST_F(TestHashKernel, UniqueBoolean) {
+  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {true, true, false, true},
+                                 {true, false, true, true}, {true, false}, {});
+
+  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {false, true, false, true},
+                                 {true, false, true, true}, {false, true}, {});
+
+  // No nulls
+  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {true, true, false, true}, {},
+                                 {true, false}, {});
+
+  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {false, true, false, true}, {},
+                                 {false, true}, {});
+}
+
+TEST_F(TestHashKernel, DictEncodeBoolean) {
+  CheckDictEncode<BooleanType, bool>(
+      &this->ctx_, boolean(), {true, true, false, true, false},
+      {true, false, true, true, true}, {true, false}, {}, {0, 0, 1, 0, 1});
+
+  CheckDictEncode<BooleanType, bool>(
+      &this->ctx_, boolean(), {false, true, false, true, false},
+      {true, false, true, true, true}, {false, true}, {}, {0, 0, 0, 1, 0});
+
+  // No nulls
+  CheckDictEncode<BooleanType, bool>(&this->ctx_, boolean(),
+                                     {true, true, false, true, false}, {}, {true, false},
+                                     {}, {0, 0, 1, 0, 1});
+
+  CheckDictEncode<BooleanType, bool>(&this->ctx_, boolean(),
+                                     {false, true, false, true, false}, {}, {false, true},
+                                     {}, {0, 1, 0, 1, 0});
+}
+
+TEST_F(TestHashKernel, UniqueBinary) {
+  CheckUnique<BinaryType, std::string>(&this->ctx_, binary(),
+                                       {"test", "", "test2", "test"},
+                                       {true, false, true, true}, {"test", "test2"}, {});
+
+  CheckUnique<StringType, std::string>(&this->ctx_, utf8(), {"test", "", "test2", "test"},
+                                       {true, false, true, true}, {"test", "test2"}, {});
+}
+
+TEST_F(TestHashKernel, DictEncodeBinary) {
+  CheckDictEncode<BinaryType, std::string>(
+      &this->ctx_, binary(), {"test", "", "test2", "test", "baz"},
+      {true, false, true, true, true}, {"test", "test2", "baz"}, {}, {0, 0, 1, 0, 2});
+
+  CheckDictEncode<StringType, std::string>(
+      &this->ctx_, utf8(), {"test", "", "test2", "test", "baz"},
+      {true, false, true, true, true}, {"test", "test2", "baz"}, {}, {0, 0, 1, 0, 2});
+}
+
+TEST_F(TestHashKernel, BinaryResizeTable) {
+  const int32_t kTotalValues = 10000;
+#if !defined(ARROW_VALGRIND)
+  const int32_t kRepeats = 10;
+#else
+  // Mitigate Valgrind's slowness
+  const int32_t kRepeats = 3;
+#endif
+
+  vector<std::string> values;
+  vector<std::string> uniques;
+  vector<int32_t> indices;
+  char buf[20] = "test";
+
+  for (int32_t i = 0; i < kTotalValues * kRepeats; i++) {
+    int32_t index = i % kTotalValues;
+
+    ASSERT_GE(snprintf(buf + 4, sizeof(buf) - 4, "%d", index), 0);
+    values.emplace_back(buf);
+
+    if (i < kTotalValues) {
+      uniques.push_back(values.back());
+    }
+    indices.push_back(index);
+  }
+
+  CheckUnique<BinaryType, std::string>(&this->ctx_, binary(), values, {}, uniques, {});
+  CheckDictEncode<BinaryType, std::string>(&this->ctx_, binary(), values, {}, uniques, {},
+                                           indices);
+
+  CheckUnique<StringType, std::string>(&this->ctx_, utf8(), values, {}, uniques, {});
+  CheckDictEncode<StringType, std::string>(&this->ctx_, utf8(), values, {}, uniques, {},
+                                           indices);
+}
+
+TEST_F(TestHashKernel, UniqueFixedSizeBinary) {
+  CheckUnique<FixedSizeBinaryType, std::string>(
+      &this->ctx_, fixed_size_binary(5), {"aaaaa", "", "bbbbb", "aaaaa"},
+      {true, false, true, true}, {"aaaaa", "bbbbb"}, {});
+}
+
+TEST_F(TestHashKernel, DictEncodeFixedSizeBinary) {
+  CheckDictEncode<FixedSizeBinaryType, std::string>(
+      &this->ctx_, fixed_size_binary(5), {"bbbbb", "", "bbbbb", "aaaaa", "ccccc"},
+      {true, false, true, true, true}, {"bbbbb", "aaaaa", "ccccc"}, {}, {0, 0, 0, 1, 2});
+}
+
+TEST_F(TestHashKernel, FixedSizeBinaryResizeTable) {
+  const int32_t kTotalValues = 10000;
+#if !defined(ARROW_VALGRIND)
+  const int32_t kRepeats = 10;
+#else
+  // Mitigate Valgrind's slowness
+  const int32_t kRepeats = 3;
+#endif
+
+  vector<std::string> values;
+  vector<std::string> uniques;
+  vector<int32_t> indices;
+  char buf[7] = "test..";
+
+  for (int32_t i = 0; i < kTotalValues * kRepeats; i++) {
+    int32_t index = i % kTotalValues;
+
+    buf[4] = static_cast<char>(index / 128);
+    buf[5] = static_cast<char>(index % 128);
+    values.emplace_back(buf, 6);
+
+    if (i < kTotalValues) {
+      uniques.push_back(values.back());
+    }
+    indices.push_back(index);
+  }
+
+  auto type = fixed_size_binary(6);
+  CheckUnique<FixedSizeBinaryType, std::string>(&this->ctx_, type, values, {}, uniques,
+                                                {});
+  CheckDictEncode<FixedSizeBinaryType, std::string>(&this->ctx_, type, values, {},
+                                                    uniques, {}, indices);
+}
+
+TEST_F(TestHashKernel, UniqueDecimal) {
+  vector<Decimal128> values{12, 12, 11, 12};
+  vector<Decimal128> expected{12, 11};
+
+  CheckUnique<Decimal128Type, Decimal128>(&this->ctx_, decimal(2, 0), values,
+                                          {true, false, true, true}, expected, {});
+}
+
+TEST_F(TestHashKernel, DictEncodeDecimal) {
+  vector<Decimal128> values{12, 12, 11, 12, 13};
+  vector<Decimal128> expected{12, 11, 13};
+
+  CheckDictEncode<Decimal128Type, Decimal128>(&this->ctx_, decimal(2, 0), values,
+                                              {true, false, true, true, true}, expected,
+                                              {}, {0, 0, 1, 0, 2});
+}
+
+TEST_F(TestHashKernel, ChunkedArrayInvoke) {
+  vector<std::string> values1 = {"foo", "bar", "foo"};
+  vector<std::string> values2 = {"bar", "baz", "quuux", "foo"};
+
+  auto type = utf8();
+  auto a1 = _MakeArray<StringType, std::string>(type, values1, {});
+  auto a2 = _MakeArray<StringType, std::string>(type, values2, {});
+
+  vector<std::string> dict_values = {"foo", "bar", "baz", "quuux"};
+  auto ex_dict = _MakeArray<StringType, std::string>(type, dict_values, {});
+
+  ArrayVector arrays = {a1, a2};
+  auto carr = std::make_shared<ChunkedArray>(arrays);
+
+  // Unique
+  shared_ptr<Array> result;
+  ASSERT_OK(Unique(&this->ctx_, carr, &result));
+  ASSERT_ARRAYS_EQUAL(*ex_dict, *result);
+
+  // Dictionary encode
+  auto dict_type = dictionary(int32(), ex_dict);
+
+  auto i1 = _MakeArray<Int32Type, int32_t>(int32(), {0, 1, 0}, {});
+  auto i2 = _MakeArray<Int32Type, int32_t>(int32(), {1, 2, 3, 0}, {});
+
+  ArrayVector dict_arrays = {std::make_shared<DictionaryArray>(dict_type, i1),
+                             std::make_shared<DictionaryArray>(dict_type, i2)};
+  auto dict_carr = std::make_shared<ChunkedArray>(dict_arrays);
+
+  Datum encoded_out;
+  ASSERT_OK(DictionaryEncode(&this->ctx_, carr, &encoded_out));
+  ASSERT_EQ(Datum::CHUNKED_ARRAY, encoded_out.kind());
+
+  AssertChunkedEqual(*dict_carr, *encoded_out.chunked_array());
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/hash.cc b/cpp/src/arrow/compute/kernels/hash.cc
index c057ea5736139..0513fe1f6ad4f 100644
--- a/cpp/src/arrow/compute/kernels/hash.cc
+++ b/cpp/src/arrow/compute/kernels/hash.cc
@@ -56,11 +56,9 @@ namespace compute {
 
 namespace {
 
-#define CHECK_IMPLEMENTED(KERNEL, FUNCNAME, TYPE)                  \
-  if (!KERNEL) {                                                   \
-    std::stringstream ss;                                          \
-    ss << FUNCNAME << " not implemented for " << type->ToString(); \
-    return Status::NotImplemented(ss.str());                       \
+#define CHECK_IMPLEMENTED(KERNEL, FUNCNAME, TYPE)                                       \
+  if (!KERNEL) {                                                                        \
+    return Status::NotImplemented(FUNCNAME, " not implemented for ", type->ToString()); \
   }
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compute/kernels/util-internal.cc b/cpp/src/arrow/compute/kernels/util-internal.cc
index 81fafcba6167a..745b30c3d26a9 100644
--- a/cpp/src/arrow/compute/kernels/util-internal.cc
+++ b/cpp/src/arrow/compute/kernels/util-internal.cc
@@ -20,6 +20,7 @@
 #include <algorithm>
 #include <cstdint>
 #include <memory>
+#include <utility>
 #include <vector>
 
 #include "arrow/array.h"
@@ -27,6 +28,7 @@
 #include "arrow/table.h"
 #include "arrow/util/logging.h"
 
+#include "arrow/compute/context.h"
 #include "arrow/compute/kernel.h"
 
 namespace arrow {
@@ -162,6 +164,47 @@ Datum WrapDatumsLike(const Datum& value, const std::vector<Datum>& datums) {
   }
 }
 
+PrimitiveAllocatingUnaryKernel::PrimitiveAllocatingUnaryKernel(
+    std::unique_ptr<UnaryKernel> delegate)
+    : delegate_(std::move(delegate)) {}
+
+inline void ZeroLastByte(Buffer* buffer) {
+  *(buffer->mutable_data() + (buffer->size() - 1)) = 0;
+}
+
+Status PrimitiveAllocatingUnaryKernel::Call(FunctionContext* ctx, const Datum& input,
+                                            Datum* out) {
+  std::vector<std::shared_ptr<Buffer>> data_buffers;
+  const ArrayData& in_data = *input.array();
+  MemoryPool* pool = ctx->memory_pool();
+
+  // Handle the validity buffer.
+  if (in_data.offset == 0 || in_data.null_count <= 0) {
+    // Validity bitmap will be zero copied (or allocated when buffer is known).
+    data_buffers.emplace_back();
+  } else {
+    std::shared_ptr<Buffer> buffer;
+    RETURN_NOT_OK(AllocateBitmap(pool, in_data.length, &buffer));
+    // Per spec all trailing bits should indicate nullness, since
+    // the last byte might only be partially set, we ensure the
+    // remaining bit is set.
+    ZeroLastByte(buffer.get());
+    buffer->ZeroPadding();
+    data_buffers.push_back(buffer);
+  }
+  // Allocate the boolean value buffer.
+  std::shared_ptr<Buffer> buffer;
+  RETURN_NOT_OK(AllocateBitmap(pool, in_data.length, &buffer));
+  // Some utility methods access the last byte before it might be
+  // initialized this makes valgrind/asan unhappy, so we proactively
+  // zero it.
+  ZeroLastByte(buffer.get());
+  data_buffers.push_back(buffer);
+  out->value = ArrayData::Make(null(), in_data.length, data_buffers);
+
+  return delegate_->Call(ctx, input, out);
+}
+
 }  // namespace detail
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/util-internal.h b/cpp/src/arrow/compute/kernels/util-internal.h
index 23ed4fd7ee7d7..22520235a524c 100644
--- a/cpp/src/arrow/compute/kernels/util-internal.h
+++ b/cpp/src/arrow/compute/kernels/util-internal.h
@@ -32,7 +32,9 @@ namespace compute {
 
 class FunctionContext;
 
-static inline void CopyData(const ArrayData& input, ArrayData* output) {
+// \brief Make a copy of the buffers into a destination array without carrying
+// the type.
+static inline void ZeroCopyData(const ArrayData& input, ArrayData* output) {
   output->length = input.length;
   output->null_count = input.null_count;
   output->buffers = input.buffers;
@@ -42,6 +44,12 @@ static inline void CopyData(const ArrayData& input, ArrayData* output) {
 
 namespace detail {
 
+/// \brief Invoke the kernel on value using the ctx and store results in outputs.
+///
+/// \param[in,out] ctx The function context to use when invoking the kernel.
+/// \param[in,out] kernel The kernel to execute.
+/// \param[in] value The input value to execute the kernel with.
+/// \param[out] outputs One ArrayData datum for each ArrayData available in value.
 ARROW_EXPORT
 Status InvokeUnaryArrayKernel(FunctionContext* ctx, UnaryKernel* kernel,
                               const Datum& value, std::vector<Datum>* outputs);
@@ -61,6 +69,26 @@ Datum WrapArraysLike(const Datum& value,
 ARROW_EXPORT
 Datum WrapDatumsLike(const Datum& value, const std::vector<Datum>& datums);
 
+/// \brief Kernel used to preallocate outputs for primitive types.
+class PrimitiveAllocatingUnaryKernel : public UnaryKernel {
+ public:
+  explicit PrimitiveAllocatingUnaryKernel(std::unique_ptr<UnaryKernel> delegate);
+  /// \brief Sets out to be of type ArrayData with the necessary
+  /// data buffers prepopulated.
+  ///
+  /// This method does not populate types on arrays and sets type to null.
+  ///
+  /// The current implementation only supports primitive boolean outputs and
+  /// assumes validity bitmaps that are not sliced will be zero copied (i.e.
+  /// no allocation happens for them).
+  ///
+  /// TODO(ARROW-1896): Make this generic enough to support casts.
+  Status Call(FunctionContext* ctx, const Datum& input, Datum* out) override;
+
+ private:
+  std::unique_ptr<UnaryKernel> delegate_;
+};
+
 }  // namespace detail
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/test-util.h b/cpp/src/arrow/compute/test-util.h
new file mode 100644
index 0000000000000..b406a710b45bc
--- /dev/null
+++ b/cpp/src/arrow/compute/test-util.h
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_COMPUTE_TEST_UTIL_H
+#define ARROW_COMPUTE_TEST_UTIL_H
+
+#include <memory>
+#include <vector>
+
+#include <gmock/gmock.h>
+
+#include "arrow/array.h"
+#include "arrow/memory_pool.h"
+#include "arrow/type.h"
+
+#include "arrow/compute/context.h"
+#include "arrow/compute/kernel.h"
+
+namespace arrow {
+namespace compute {
+
+class ComputeFixture {
+ public:
+  ComputeFixture() : ctx_(default_memory_pool()) {}
+
+ protected:
+  FunctionContext ctx_;
+};
+
+class MockUnaryKernel : public UnaryKernel {
+ public:
+  MOCK_METHOD3(Call, Status(FunctionContext* ctx, const Datum& input, Datum* out));
+};
+
+class MockBinaryKernel : public BinaryKernel {
+  MOCK_METHOD4(Call, Status(FunctionContext* ctx, const Datum& left, const Datum& right,
+                            Datum* out));
+};
+
+template <typename Type, typename T>
+std::shared_ptr<Array> _MakeArray(const std::shared_ptr<DataType>& type,
+                                  const std::vector<T>& values,
+                                  const std::vector<bool>& is_valid) {
+  std::shared_ptr<Array> result;
+  if (is_valid.size() > 0) {
+    ArrayFromVector<Type, T>(type, is_valid, values, &result);
+  } else {
+    ArrayFromVector<Type, T>(type, values, &result);
+  }
+  return result;
+}
+
+}  // namespace compute
+}  // namespace arrow
+
+#endif
diff --git a/cpp/src/arrow/csv/CMakeLists.txt b/cpp/src/arrow/csv/CMakeLists.txt
index 84b080b1eef09..2a72dceadad16 100644
--- a/cpp/src/arrow/csv/CMakeLists.txt
+++ b/cpp/src/arrow/csv/CMakeLists.txt
@@ -15,17 +15,18 @@
 # specific language governing permissions and limitations
 # under the License.
 
-ADD_ARROW_TEST(csv-chunker-test)
-ADD_ARROW_TEST(csv-column-builder-test)
-ADD_ARROW_TEST(csv-converter-test)
-ADD_ARROW_TEST(csv-parser-test)
+ADD_ARROW_TEST(chunker-test
+  PREFIX "arrow-csv")
+ADD_ARROW_TEST(column-builder-test
+  PREFIX "arrow-csv")
+ADD_ARROW_TEST(converter-test
+  PREFIX "arrow-csv")
+ADD_ARROW_TEST(parser-test
+  PREFIX "arrow-csv")
 
-ADD_ARROW_BENCHMARK(csv-converter-benchmark)
-ADD_ARROW_BENCHMARK(csv-parser-benchmark)
+ADD_ARROW_BENCHMARK(converter-benchmark
+  PREFIX "arrow-csv")
+ADD_ARROW_BENCHMARK(parser-benchmark
+  PREFIX "arrow-csv")
 
-# Headers: top level
-file(GLOB_RECURSE ARROW_CSV_HEADERS "*.h")
-
-install(FILES
-  ${ARROW_CSV_HEADERS}
-  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/csv")
+ARROW_INSTALL_ALL_HEADERS("arrow/csv")
diff --git a/cpp/src/arrow/csv/csv-chunker-test.cc b/cpp/src/arrow/csv/chunker-test.cc
similarity index 100%
rename from cpp/src/arrow/csv/csv-chunker-test.cc
rename to cpp/src/arrow/csv/chunker-test.cc
diff --git a/cpp/src/arrow/csv/csv-column-builder-test.cc b/cpp/src/arrow/csv/column-builder-test.cc
similarity index 100%
rename from cpp/src/arrow/csv/csv-column-builder-test.cc
rename to cpp/src/arrow/csv/column-builder-test.cc
diff --git a/cpp/src/arrow/csv/column-builder.cc b/cpp/src/arrow/csv/column-builder.cc
index 28cbad47580e8..1f37046798fd7 100644
--- a/cpp/src/arrow/csv/column-builder.cc
+++ b/cpp/src/arrow/csv/column-builder.cc
@@ -305,12 +305,12 @@ Status InferringColumnBuilder::TryConvertChunk(size_t chunk_index) {
 
 void InferringColumnBuilder::Insert(int64_t block_index,
                                     const std::shared_ptr<BlockParser>& parser) {
-  DCHECK_NE(converter_, nullptr);
-
   // Create a slot for the new chunk and spawn a task to convert it
   size_t chunk_index = static_cast<size_t>(block_index);
   {
     std::lock_guard<std::mutex> lock(mutex_);
+
+    DCHECK_NE(converter_, nullptr);
     if (chunks_.size() <= chunk_index) {
       chunks_.resize(chunk_index + 1);
     }
diff --git a/cpp/src/arrow/csv/column-builder.h b/cpp/src/arrow/csv/column-builder.h
index b21cff76be5c6..054a642295cb5 100644
--- a/cpp/src/arrow/csv/column-builder.h
+++ b/cpp/src/arrow/csv/column-builder.h
@@ -18,22 +18,29 @@
 #ifndef ARROW_CSV_COLUMN_BUILDER_H
 #define ARROW_CSV_COLUMN_BUILDER_H
 
+#include <cstdint>
 #include <memory>
-#include <vector>
 
 #include "arrow/array.h"
-#include "arrow/csv/converter.h"
-#include "arrow/csv/options.h"
-#include "arrow/memory_pool.h"
 #include "arrow/status.h"
-#include "arrow/table.h"
-#include "arrow/type.h"
-#include "arrow/util/task-group.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
+
+class ChunkedArray;
+class DataType;
+
+namespace internal {
+
+class TaskGroup;
+
+}  // namespace internal
+
 namespace csv {
 
+class BlockParser;
+struct ConvertOptions;
+
 class ARROW_EXPORT ColumnBuilder {
  public:
   virtual ~ColumnBuilder() = default;
diff --git a/cpp/src/arrow/csv/csv-converter-benchmark.cc b/cpp/src/arrow/csv/converter-benchmark.cc
similarity index 100%
rename from cpp/src/arrow/csv/csv-converter-benchmark.cc
rename to cpp/src/arrow/csv/converter-benchmark.cc
diff --git a/cpp/src/arrow/csv/csv-converter-test.cc b/cpp/src/arrow/csv/converter-test.cc
similarity index 82%
rename from cpp/src/arrow/csv/csv-converter-test.cc
rename to cpp/src/arrow/csv/converter-test.cc
index 2534541d3154a..ea12c0b66a94b 100644
--- a/cpp/src/arrow/csv/csv-converter-test.cc
+++ b/cpp/src/arrow/csv/converter-test.cc
@@ -176,13 +176,30 @@ TEST(IntegerConversion, Basics) {
 }
 
 TEST(IntegerConversion, Nulls) {
-  AssertConversion<Int8Type, int8_t>(int8(), {"12,34\n", ",-128\n"},
-                                     {{12, 0}, {34, -128}},
-                                     {{true, false}, {true, true}});
+  AssertConversion<Int8Type, int8_t>(int8(), {"12,N/A\n", ",-128\n"},
+                                     {{12, 0}, {0, -128}},
+                                     {{true, false}, {false, true}});
 
   AssertConversionAllNulls<Int8Type, int8_t>(int8());
 }
 
+TEST(IntegerConversion, CustomNulls) {
+  auto options = ConvertOptions::Defaults();
+  options.null_values = {"xxx", "zzz"};
+
+  AssertConversion<Int8Type, int8_t>(int8(), {"12,xxx\n", "zzz,-128\n"},
+                                     {{12, 0}, {0, -128}}, {{true, false}, {false, true}},
+                                     options);
+
+  AssertConversionError(int8(), {",xxx,N/A\n"}, {0, 2}, options);
+
+  // Duplicate nulls allowed
+  options.null_values = {"xxx", "zzz", "xxx"};
+  AssertConversion<Int8Type, int8_t>(int8(), {"12,xxx\n", "zzz,-128\n"},
+                                     {{12, 0}, {0, -128}}, {{true, false}, {false, true}},
+                                     options);
+}
+
 TEST(IntegerConversion, Whitespace) {
   AssertConversion<Int32Type, int32_t>(int32(), {" 12,34 \n", " 56 ,78\n"},
                                        {{12, 56}, {34, 78}});
@@ -203,6 +220,15 @@ TEST(FloatingPointConversion, Nulls) {
   AssertConversionAllNulls<DoubleType, double>(float64());
 }
 
+TEST(FloatingPointConversion, CustomNulls) {
+  auto options = ConvertOptions::Defaults();
+  options.null_values = {"xxx", "zzz"};
+
+  AssertConversion<FloatType, float>(float32(), {"1.5,xxx\n", "zzz,-1e10\n"},
+                                     {{1.5, 0.}, {0., -1e10f}},
+                                     {{true, false}, {false, true}}, options);
+}
+
 TEST(FloatingPointConversion, Whitespace) {
   AssertConversion<DoubleType, double>(float64(), {" 12,34.5\n", " 0 ,-1e100 \n"},
                                        {{12., 0.}, {34.5, -1e100}});
@@ -220,6 +246,15 @@ TEST(BooleanConversion, Nulls) {
                                       {{true, true}, {false, true}});
 }
 
+TEST(BooleanConversion, CustomNulls) {
+  auto options = ConvertOptions::Defaults();
+  options.null_values = {"xxx", "zzz"};
+
+  AssertConversion<BooleanType, bool>(boolean(), {"true,xxx\n", "zzz,0\n"},
+                                      {{true, false}, {false, false}},
+                                      {{true, false}, {false, true}}, options);
+}
+
 TEST(TimestampConversion, Basics) {
   auto type = timestamp(TimeUnit::SECOND);
 
@@ -243,6 +278,16 @@ TEST(TimestampConversion, Nulls) {
                                            {{true}, {false}, {false}});
 }
 
+TEST(TimestampConversion, CustomNulls) {
+  auto options = ConvertOptions::Defaults();
+  options.null_values = {"xxx", "zzz"};
+
+  auto type = timestamp(TimeUnit::MILLI);
+  AssertConversion<TimestampType, int64_t>(type, {"1970-01-01 00:01:00,xxx,zzz\n"},
+                                           {{60000}, {0}, {0}},
+                                           {{true}, {false}, {false}}, options);
+}
+
 TEST(DecimalConversion, NotImplemented) {
   std::shared_ptr<Converter> converter;
   ASSERT_RAISES(NotImplemented,
diff --git a/cpp/src/arrow/csv/converter.cc b/cpp/src/arrow/csv/converter.cc
index 7d8bff870ba84..22be7d6e58f3b 100644
--- a/cpp/src/arrow/csv/converter.cc
+++ b/cpp/src/arrow/csv/converter.cc
@@ -20,6 +20,8 @@
 #include <cstring>
 #include <sstream>
 #include <string>
+#include <type_traits>
+#include <vector>
 
 #include "arrow/builder.h"
 #include "arrow/csv/parser.h"
@@ -28,21 +30,23 @@
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/parsing.h"  // IWYU pragma: keep
+#include "arrow/util/trie.h"
 #include "arrow/util/utf8.h"
 
 namespace arrow {
 namespace csv {
 
 using internal::StringConverter;
+using internal::Trie;
+using internal::TrieBuilder;
 
 namespace {
 
 Status GenericConversionError(const std::shared_ptr<DataType>& type, const uint8_t* data,
                               uint32_t size) {
-  std::stringstream ss;
-  ss << "CSV conversion error to " << type->ToString() << ": invalid value '"
-     << std::string(reinterpret_cast<const char*>(data), size) << "'";
-  return Status::Invalid(ss.str());
+  return Status::Invalid("CSV conversion error to ", type->ToString(),
+                         ": invalid value '",
+                         std::string(reinterpret_cast<const char*>(data), size), "'");
 }
 
 inline bool IsWhitespace(uint8_t c) {
@@ -57,115 +61,28 @@ class ConcreteConverter : public Converter {
   using Converter::Converter;
 
  protected:
-  Status Initialize() override { return Status::OK(); }
+  Status Initialize() override;
   inline bool IsNull(const uint8_t* data, uint32_t size, bool quoted);
+
+  Trie null_trie_;
 };
 
-// Recognize various spellings of null values.  The list of possible spellings
-// is taken from Pandas read_csv() documentation.
+Status ConcreteConverter::Initialize() {
+  // TODO no need to build a separate Trie for each Converter instance
+  TrieBuilder builder;
+  for (const auto& s : options_.null_values) {
+    RETURN_NOT_OK(builder.Append(s, true /* allow_duplicates */));
+  }
+  null_trie_ = builder.Finish();
+  return Status::OK();
+}
+
 bool ConcreteConverter::IsNull(const uint8_t* data, uint32_t size, bool quoted) {
   if (quoted) {
     return false;
   }
-  if (size == 0) {
-    return true;
-  }
-  // No 1-character null value exists
-  if (size == 1) {
-    return false;
-  }
-
-  // XXX if the CSV parser guaranteed enough excess bytes at the end of the
-  // parsed area, we wouldn't need to always check size before comparing characters.
-
-  auto chars = reinterpret_cast<const char*>(data);
-  auto first = chars[0];
-  auto second = chars[1];
-  switch (first) {
-    case 'N': {
-      // "NA", "N/A", "NaN", "NULL"
-      if (size == 2) {
-        return second == 'A';
-      }
-      auto third = chars[2];
-      if (size == 3) {
-        return (second == '/' && third == 'A') || (second == 'a' && third == 'N');
-      }
-      if (size == 4) {
-        return (second == 'U' && third == 'L' && chars[3] == 'L');
-      }
-      return false;
-    }
-    case 'n': {
-      // "n/a", "nan", "null"
-      if (size == 2) {
-        return false;
-      }
-      auto third = chars[2];
-      if (size == 3) {
-        return (second == '/' && third == 'a') || (second == 'a' && third == 'n');
-      }
-      if (size == 4) {
-        return (second == 'u' && third == 'l' && chars[3] == 'l');
-      }
-      return false;
-    }
-    case '1': {
-      // '1.#IND', '1.#QNAN'
-      if (size == 6) {
-        // '#' is the most unlikely char here, check it first
-        return (chars[2] == '#' && chars[1] == '.' && chars[3] == 'I' &&
-                chars[4] == 'N' && chars[5] == 'D');
-      }
-      if (size == 7) {
-        return (chars[2] == '#' && chars[1] == '.' && chars[3] == 'Q' &&
-                chars[4] == 'N' && chars[5] == 'A' && chars[6] == 'N');
-      }
-      return false;
-    }
-    case '-': {
-      switch (second) {
-        case 'N':
-          // "-NaN"
-          return (size == 4 && chars[2] == 'a' && chars[3] == 'N');
-        case 'n':
-          // "-nan"
-          return (size == 4 && chars[2] == 'a' && chars[3] == 'n');
-        case '1':
-          // "-1.#IND", "-1.#QNAN"
-          if (size == 7) {
-            return (chars[3] == '#' && chars[2] == '.' && chars[4] == 'I' &&
-                    chars[5] == 'N' && chars[6] == 'D');
-          }
-          if (size == 8) {
-            return (chars[3] == '#' && chars[2] == '.' && chars[4] == 'Q' &&
-                    chars[5] == 'N' && chars[6] == 'A' && chars[7] == 'N');
-          }
-          return false;
-        default:
-          return false;
-      }
-    }
-    case '#': {
-      // "#N/A", "#N/A N/A", "#NA"
-      if (size < 3 || chars[1] != 'N') {
-        return false;
-      }
-      auto third = chars[2];
-      if (size == 3) {
-        return third == 'A';
-      }
-      if (size == 4) {
-        return third == '/' && chars[3] == 'A';
-      }
-      if (size == 8) {
-        return std::memcmp(data + 2, "/A N/A", 5) == 0;
-      }
-      return false;
-    }
-    default:
-      return false;
-  }
+  return null_trie_.Find(util::string_view(reinterpret_cast<const char*>(data), size)) >=
+         0;
 }
 
 /////////////////////////////////////////////////////////////////////////
@@ -213,9 +130,8 @@ class VarSizeBinaryConverter : public ConcreteConverter {
 
     auto visit = [&](const uint8_t* data, uint32_t size, bool quoted) -> Status {
       if (CheckUTF8 && ARROW_PREDICT_FALSE(!util::ValidateUTF8(data, size))) {
-        std::stringstream ss;
-        ss << "CSV conversion error to " << type_->ToString() << ": invalid UTF8 data";
-        return Status::Invalid(ss.str());
+        return Status::Invalid("CSV conversion error to ", type_->ToString(),
+                               ": invalid UTF8 data");
       }
       builder.UnsafeAppend(data, size);
       return Status::OK();
@@ -255,10 +171,8 @@ Status FixedSizeBinaryConverter::Convert(const BlockParser& parser, int32_t col_
 
   auto visit = [&](const uint8_t* data, uint32_t size, bool quoted) -> Status {
     if (ARROW_PREDICT_FALSE(size != byte_width)) {
-      std::stringstream ss;
-      ss << "CSV conversion error to " << type_->ToString() << ": got a " << size
-         << "-byte long string";
-      return Status::Invalid(ss.str());
+      return Status::Invalid("CSV conversion error to ", type_->ToString(), ": got a ",
+                             size, "-byte long string");
     }
     return builder.Append(data);
   };
@@ -409,9 +323,8 @@ Status Converter::Make(const std::shared_ptr<DataType>& type,
       break;
 
     default: {
-      std::stringstream ss;
-      ss << "CSV conversion to " << type->ToString() << " is not supported";
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("CSV conversion to ", type->ToString(),
+                                    " is not supported");
     }
 
 #undef CONVERTER_CASE
diff --git a/cpp/src/arrow/csv/converter.h b/cpp/src/arrow/csv/converter.h
index 38ade1d21a846..d64fe695d0a26 100644
--- a/cpp/src/arrow/csv/converter.h
+++ b/cpp/src/arrow/csv/converter.h
@@ -57,7 +57,7 @@ class ARROW_EXPORT Converter {
 
   virtual Status Initialize() = 0;
 
-  ConvertOptions options_;
+  const ConvertOptions options_;
   MemoryPool* pool_;
   std::shared_ptr<DataType> type_;
 };
diff --git a/cpp/src/arrow/csv/options.cc b/cpp/src/arrow/csv/options.cc
index fccf0b67db98c..01e687b8342a3 100644
--- a/cpp/src/arrow/csv/options.cc
+++ b/cpp/src/arrow/csv/options.cc
@@ -22,7 +22,14 @@ namespace csv {
 
 ParseOptions ParseOptions::Defaults() { return ParseOptions(); }
 
-ConvertOptions ConvertOptions::Defaults() { return ConvertOptions(); }
+ConvertOptions ConvertOptions::Defaults() {
+  auto options = ConvertOptions();
+  // The default list of possible null spellings is taken from Pandas' read_csv().
+  options.null_values = {"",     "#N/A", "#N/A N/A", "#NA",     "-1.#IND", "-1.#QNAN",
+                         "-NaN", "-nan", "1.#IND",   "1.#QNAN", "N/A",     "NA",
+                         "NULL", "NaN",  "n/a",      "nan",     "null"};
+  return options;
+}
 
 ReadOptions ReadOptions::Defaults() { return ReadOptions(); }
 
diff --git a/cpp/src/arrow/csv/options.h b/cpp/src/arrow/csv/options.h
index 10232d45e8df4..2b4653ccdce81 100644
--- a/cpp/src/arrow/csv/options.h
+++ b/cpp/src/arrow/csv/options.h
@@ -22,6 +22,7 @@
 #include <memory>
 #include <string>
 #include <unordered_map>
+#include <vector>
 
 #include "arrow/util/visibility.h"
 
@@ -66,6 +67,8 @@ struct ARROW_EXPORT ConvertOptions {
   bool check_utf8 = true;
   // Optional per-column types (disabling type inference on those columns)
   std::unordered_map<std::string, std::shared_ptr<DataType>> column_types;
+  // Recognized spellings for null values
+  std::vector<std::string> null_values;
 
   static ConvertOptions Defaults();
 };
diff --git a/cpp/src/arrow/csv/csv-parser-benchmark.cc b/cpp/src/arrow/csv/parser-benchmark.cc
similarity index 100%
rename from cpp/src/arrow/csv/csv-parser-benchmark.cc
rename to cpp/src/arrow/csv/parser-benchmark.cc
diff --git a/cpp/src/arrow/csv/csv-parser-test.cc b/cpp/src/arrow/csv/parser-test.cc
similarity index 100%
rename from cpp/src/arrow/csv/csv-parser-test.cc
rename to cpp/src/arrow/csv/parser-test.cc
diff --git a/cpp/src/arrow/csv/parser.cc b/cpp/src/arrow/csv/parser.cc
index fe7f841f58328..b1d175adfb582 100644
--- a/cpp/src/arrow/csv/parser.cc
+++ b/cpp/src/arrow/csv/parser.cc
@@ -30,9 +30,7 @@ namespace arrow {
 namespace csv {
 
 static Status ParseError(const char* message) {
-  std::stringstream ss;
-  ss << "CSV parse error: " << message;
-  return Status::Invalid(ss.str());
+  return Status::Invalid("CSV parse error: ", message);
 }
 
 static Status MismatchingColumns(int32_t expected, int32_t actual) {
diff --git a/cpp/src/arrow/csv/parser.h b/cpp/src/arrow/csv/parser.h
index 8a515744ee2d9..fdddc37a2c0fb 100644
--- a/cpp/src/arrow/csv/parser.h
+++ b/cpp/src/arrow/csv/parser.h
@@ -18,6 +18,7 @@
 #ifndef ARROW_CSV_PARSER_H
 #define ARROW_CSV_PARSER_H
 
+#include <cstddef>
 #include <cstdint>
 #include <memory>
 #include <vector>
diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index 8cf74d6b99901..efd61167b71a5 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -23,6 +23,8 @@
 #include <memory>
 #include <sstream>
 #include <string>
+#include <unordered_map>
+#include <utility>
 #include <vector>
 
 #include "arrow/buffer.h"
@@ -353,10 +355,8 @@ class ThreadedTableReader : public BaseTableReader {
                                       chunk_size, &parsed_size));
           if (parsed_size != chunk_size) {
             DCHECK_EQ(parsed_size, chunk_size);
-            std::stringstream ss;
-            ss << "Chunker and parser disagree on block size: " << chunk_size << " vs "
-               << parsed_size;
-            return Status::Invalid(ss.str());
+            return Status::Invalid("Chunker and parser disagree on block size: ",
+                                   chunk_size, " vs ", parsed_size);
           }
           RETURN_NOT_OK(ProcessData(parser, chunk_index));
           // Keep chunk buffer alive within closure and release it at the end
diff --git a/cpp/src/arrow/dbi/hiveserver2/CMakeLists.txt b/cpp/src/arrow/dbi/hiveserver2/CMakeLists.txt
index 3a16a7834c3c1..d2640a66b2f8f 100644
--- a/cpp/src/arrow/dbi/hiveserver2/CMakeLists.txt
+++ b/cpp/src/arrow/dbi/hiveserver2/CMakeLists.txt
@@ -16,17 +16,10 @@
 # under the License.
 
 add_custom_target(arrow_hiveserver2)
+add_custom_target(arrow_hiveserver2-tests)
 
 # Headers: top level
-install(FILES
-  api.h
-  columnar-row-set.h
-  operation.h
-  service.h
-  session.h
-  types.h
-  util.h
-  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/dbi/hiveserver2")
+ARROW_INSTALL_ALL_HEADERS("arrow/dbi/hiveserver2")
 
 set(ARROW_HIVESERVER2_SRCS
   columnar-row-set.cc
@@ -111,11 +104,13 @@ set(ARROW_HIVESERVER2_TEST_LINK_LIBS
   thriftstatic)
 
 if (ARROW_BUILD_TESTS)
-  ADD_ARROW_TEST(hiveserver2-test
+  ADD_TEST_CASE(hiveserver2-test
     STATIC_LINK_LIBS "${ARROW_HIVESERVER2_TEST_LINK_LIBS}"
-    LABELS "arrow_hiveserver2"
+    LABELS "arrow_hiveserver2-tests"
   )
-  set_property(TARGET hiveserver2-test
-    APPEND_STRING PROPERTY COMPILE_FLAGS
-    " -Wno-shadow-field")
+  if (TARGET arrow-hiveserver2-test)
+    set_property(TARGET arrow-hiveserver2-test
+      APPEND_STRING PROPERTY COMPILE_FLAGS
+      " -Wno-shadow-field")
+  endif()
 endif(ARROW_BUILD_TESTS)
diff --git a/cpp/src/arrow/dbi/hiveserver2/hiveserver2-test.cc b/cpp/src/arrow/dbi/hiveserver2/hiveserver2-test.cc
index 7022ff017f48e..a7749161c4676 100644
--- a/cpp/src/arrow/dbi/hiveserver2/hiveserver2-test.cc
+++ b/cpp/src/arrow/dbi/hiveserver2/hiveserver2-test.cc
@@ -97,10 +97,8 @@ Status Wait(const std::unique_ptr<Operation>& op,
   if (op_state == state) {
     return Status::OK();
   } else {
-    std::stringstream ss;
-    ss << "Failed to reach state '" << OperationStateToString(state) << "' after "
-       << retries << " retries.";
-    return Status::IOError(ss.str());
+    return Status::IOError("Failed to reach state '", OperationStateToString(state),
+                           "' after ", retries, " retries");
   }
 }
 
diff --git a/cpp/src/arrow/dbi/hiveserver2/service.cc b/cpp/src/arrow/dbi/hiveserver2/service.cc
index e2d3f2a21bf37..502a8a284b86f 100644
--- a/cpp/src/arrow/dbi/hiveserver2/service.cc
+++ b/cpp/src/arrow/dbi/hiveserver2/service.cc
@@ -92,9 +92,7 @@ Service::Service(const string& host, int port, int conn_timeout,
 
 Status Service::Open() {
   if (impl_->protocol_version < hs2::TProtocolVersion::HIVE_CLI_SERVICE_PROTOCOL_V6) {
-    std::stringstream ss;
-    ss << "Unsupported protocol: " << impl_->protocol_version;
-    return Status::NotImplemented(ss.str());
+    return Status::NotImplemented("Unsupported protocol: ", impl_->protocol_version);
   }
 
   impl_->socket.reset(new TSocket(host_, port_));
diff --git a/cpp/src/arrow/dbi/hiveserver2/thrift-internal.cc b/cpp/src/arrow/dbi/hiveserver2/thrift-internal.cc
index d154e143ba290..171eae36816e0 100644
--- a/cpp/src/arrow/dbi/hiveserver2/thrift-internal.cc
+++ b/cpp/src/arrow/dbi/hiveserver2/thrift-internal.cc
@@ -204,11 +204,7 @@ Status TStatusToStatus(const hs2::TStatus& tstatus) {
       return Status::IOError(tstatus.errorMessage);
     case hs2::TStatusCode::INVALID_HANDLE_STATUS:
       return Status::Invalid("Invalid handle");
-    default: {
-      std::stringstream ss;
-      ss << "Unknown TStatusCode " << tstatus.statusCode;
-      return Status::UnknownError(ss.str());
-    }
+    default: { return Status::UnknownError("Unknown TStatusCode ", tstatus.statusCode); }
   }
 }
 
diff --git a/cpp/src/arrow/dbi/hiveserver2/thrift/CMakeLists.txt b/cpp/src/arrow/dbi/hiveserver2/thrift/CMakeLists.txt
index be689f935c95c..ed90fe8f9e0d6 100644
--- a/cpp/src/arrow/dbi/hiveserver2/thrift/CMakeLists.txt
+++ b/cpp/src/arrow/dbi/hiveserver2/thrift/CMakeLists.txt
@@ -78,7 +78,7 @@ endfunction(HS2_THRIFT_GEN)
 
 message("Using Thrift compiler: ${THRIFT_COMPILER}")
 
-set(OUTPUT_DIR ${CMAKE_BINARY_DIR}/src)
+set(OUTPUT_DIR ${ARROW_BINARY_DIR}/src)
 file(MAKE_DIRECTORY ${OUTPUT_DIR})
 
 add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ErrorCodes.thrift
diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt
index bc22d60b7131a..b8b4d8d336365 100644
--- a/cpp/src/arrow/flight/CMakeLists.txt
+++ b/cpp/src/arrow/flight/CMakeLists.txt
@@ -18,24 +18,26 @@
 add_custom_target(arrow_flight)
 
 # Header files
-install(FILES
-  api.h
-  client.h
-  server.h
-  types.h
-  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/flight")
+ARROW_INSTALL_ALL_HEADERS("arrow/flight")
 
 SET(ARROW_FLIGHT_STATIC_LINK_LIBS
-  grpc_grpcpp
-  grpc_grpc
-  grpc_gpr
-  grpc_address_sorting
-  cares)
+  grpc_grpcpp_static
+  grpc_grpc_static
+  grpc_gpr_static
+  grpc_address_sorting_static
+  cares_static)
+
+SET(ARROW_FLIGHT_TEST_STATIC_LINK_LIBS
+  arrow_static
+  arrow_flight_static
+  arrow_testing_static
+  ${ARROW_FLIGHT_STATIC_LINK_LIBS}
+  ${PROTOBUF_LIBRARY})
 
 # TODO(wesm): Protobuf shared vs static linking
 
-set(FLIGHT_PROTO_PATH "${CMAKE_SOURCE_DIR}/../format")
-set(FLIGHT_PROTO ${CMAKE_SOURCE_DIR}/../format/Flight.proto)
+set(FLIGHT_PROTO_PATH "${ARROW_SOURCE_DIR}/../format")
+set(FLIGHT_PROTO ${ARROW_SOURCE_DIR}/../format/Flight.proto)
 
 set(FLIGHT_GENERATED_PROTO_FILES
   "${CMAKE_CURRENT_BINARY_DIR}/Flight.pb.cc"
@@ -43,14 +45,7 @@ set(FLIGHT_GENERATED_PROTO_FILES
   "${CMAKE_CURRENT_BINARY_DIR}/Flight.grpc.pb.cc"
   "${CMAKE_CURRENT_BINARY_DIR}/Flight.grpc.pb.h")
 
-if(PROTOBUF_VENDORED)
-  set(PROTO_DEPENDS ${FLIGHT_PROTO} protobuf)
-else()
-  set(PROTO_DEPENDS ${FLIGHT_PROTO})
-endif()
-
-# Get location of grpc_cpp_plugin so we can pass it to protoc
-get_property(GRPC_CPP_PLUGIN TARGET gRPC::grpc_cpp_plugin PROPERTY LOCATION)
+set(PROTO_DEPENDS ${FLIGHT_PROTO} ${PROTOBUF_LIBRARY} grpc)
 
 add_custom_command(
   OUTPUT ${FLIGHT_GENERATED_PROTO_FILES}
@@ -80,26 +75,36 @@ set(ARROW_FLIGHT_SRCS
 
 ADD_ARROW_LIB(arrow_flight
   SOURCES ${ARROW_FLIGHT_SRCS}
-  DEPENDENCIES arrow_dependencies
   SHARED_LINK_LIBS arrow_shared ${ARROW_FLIGHT_STATIC_LINK_LIBS}
   STATIC_LINK_LIBS arrow_static ${ARROW_FLIGHT_STATIC_LINK_LIBS})
 
 ADD_ARROW_TEST(flight-test
-  EXTRA_LINK_LIBS arrow_flight_static ${ARROW_FLIGHT_STATIC_LINK_LIBS}
+  EXTRA_LINK_LIBS ${ARROW_FLIGHT_TEST_STATIC_LINK_LIBS}
   LABELS "arrow_flight")
 
 # Build test server for unit tests or benchmarks
 if (ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
   add_executable(flight-test-server test-server.cc)
   target_link_libraries(flight-test-server
-    arrow_flight_static
-    ${ARROW_FLIGHT_STATIC_LINK_LIBS}
+    ${ARROW_FLIGHT_TEST_STATIC_LINK_LIBS}
+    gflags_static
+    ${GTEST_LIBRARY})
+
+  add_executable(flight-test-integration-server test-integration-server.cc)
+  target_link_libraries(flight-test-integration-server
+    ${ARROW_FLIGHT_TEST_STATIC_LINK_LIBS}
+    gflags_static
+    gtest_static)
+
+  add_executable(flight-test-integration-client test-integration-client.cc)
+  target_link_libraries(flight-test-integration-client
+    ${ARROW_FLIGHT_TEST_STATIC_LINK_LIBS}
     gflags_static
     gtest_static)
 
   # This is needed for the unit tests
   if (ARROW_BUILD_TESTS)
-    add_dependencies(flight-test flight-test-server)
+    add_dependencies(arrow-flight-test flight-test-server)
   endif()
 endif()
 
@@ -122,18 +127,20 @@ if (ARROW_BUILD_BENCHMARKS)
     perf.pb.cc)
   target_link_libraries(flight-perf-server
     arrow_flight_static
+    arrow_testing_static
     ${ARROW_FLIGHT_STATIC_LINK_LIBS}
     gflags_static
-    gtest_static)
+    ${GTEST_LIBRARY})
 
   add_executable(flight-benchmark
     flight-benchmark.cc
     perf.pb.cc)
   target_link_libraries(flight-benchmark
     arrow_flight_static
+    arrow_testing_static
     ${ARROW_FLIGHT_STATIC_LINK_LIBS}
     gflags_static
-    gtest_static)
+    ${GTEST_LIBRARY})
 
   add_dependencies(flight-benchmark flight-perf-server)
 endif(ARROW_BUILD_BENCHMARKS)
diff --git a/cpp/src/arrow/flight/client.cc b/cpp/src/arrow/flight/client.cc
index 94c4928d0220d..e25c1875d669f 100644
--- a/cpp/src/arrow/flight/client.cc
+++ b/cpp/src/arrow/flight/client.cc
@@ -232,7 +232,16 @@ class FlightStreamReader : public RecordBatchReader {
 
       // Validate IPC message
       RETURN_NOT_OK(ipc::Message::Open(data.metadata, data.body, &message));
-      return ipc::ReadRecordBatch(*message, schema_, out);
+      // The first message is a schema; read it and then try to read a
+      // record batch.
+      if (message->type() == ipc::Message::Type::SCHEMA) {
+        RETURN_NOT_OK(ipc::ReadSchema(*message, &schema_));
+        return ReadNext(out);
+      } else if (message->type() == ipc::Message::Type::RECORD_BATCH) {
+        return ipc::ReadRecordBatch(*message, schema_, out);
+      } else {
+        return Status(StatusCode::Invalid, "Unrecognized message in Flight stream");
+      }
     } else {
       // Stream is completed
       stream_finished_ = true;
diff --git a/cpp/src/arrow/flight/client.h b/cpp/src/arrow/flight/client.h
index be3d86a0dde77..53bb1755b2995 100644
--- a/cpp/src/arrow/flight/client.h
+++ b/cpp/src/arrow/flight/client.h
@@ -87,7 +87,7 @@ class ARROW_EXPORT FlightClient {
   /// \brief Given a flight ticket and schema, request to be sent the
   /// stream. Returns record batch stream reader
   /// \param[in] ticket
-  /// \param[in] schema the arrow::Schema for the stream as computed by
+  /// \param[in] schema the schema of the stream data as computed by
   /// GetFlightInfo
   /// \param[out] stream the returned RecordBatchReader
   /// \return Status
@@ -96,6 +96,7 @@ class ARROW_EXPORT FlightClient {
 
   /// \brief Initiate DoPut RPC, returns FlightPutWriter interface to
   /// write. Not yet implemented
+  /// \param[in] schema the schema of the stream data
   /// \param[out] stream the created stream to write record batches to
   /// \return Status
   Status DoPut(const Schema& schema, std::unique_ptr<FlightPutWriter>* stream);
diff --git a/cpp/src/arrow/flight/flight-test.cc b/cpp/src/arrow/flight/flight-test.cc
index 2d1b2f8477d9a..0389c76adb811 100644
--- a/cpp/src/arrow/flight/flight-test.cc
+++ b/cpp/src/arrow/flight/flight-test.cc
@@ -53,11 +53,11 @@ namespace arrow {
 namespace flight {
 
 TEST(TestFlight, StartStopTestServer) {
-  TestServer server("flight-test-server", 92385);
+  TestServer server("flight-test-server", 30000);
   server.Start();
   ASSERT_TRUE(server.IsRunning());
 
-  sleep_for(0.2);
+  std::this_thread::sleep_for(std::chrono::duration<double>(0.2));
 
   ASSERT_TRUE(server.IsRunning());
   int exit_code = server.Stop();
@@ -79,7 +79,7 @@ class TestFlightClient : public ::testing::Test {
   // void TearDown() {}
 
   void SetUp() {
-    port_ = 92358;
+    port_ = 30000;
     server_.reset(new TestServer("flight-test-server", port_));
     server_->Start();
     ASSERT_OK(ConnectClient());
diff --git a/cpp/src/arrow/flight/internal.cc b/cpp/src/arrow/flight/internal.cc
index 796e6095cdb7f..b4c6b2addcc11 100644
--- a/cpp/src/arrow/flight/internal.cc
+++ b/cpp/src/arrow/flight/internal.cc
@@ -37,16 +37,13 @@ Status FromGrpcStatus(const grpc::Status& grpc_status) {
   if (grpc_status.ok()) {
     return Status::OK();
   }
-  std::stringstream ss;
 
   if (grpc_status.error_code() == grpc::StatusCode::UNIMPLEMENTED) {
-    ss << "gRPC returned unimplemented error, with message: "
-       << grpc_status.error_message();
-    return Status::NotImplemented(ss.str());
+    return Status::NotImplemented("gRPC returned unimplemented error, with message: ",
+                                  grpc_status.error_message());
   } else {
-    ss << "gRPC failed with error code " << grpc_status.error_code()
-       << " and message: " << grpc_status.error_message();
-    return Status::IOError(ss.str());
+    return Status::IOError("gRPC failed with error code ", grpc_status.error_code(),
+                           " and message: ", grpc_status.error_message());
   }
 }
 
diff --git a/cpp/src/arrow/flight/perf-server.cc b/cpp/src/arrow/flight/perf-server.cc
index ce2ec7bca6cff..add544276f529 100644
--- a/cpp/src/arrow/flight/perf-server.cc
+++ b/cpp/src/arrow/flight/perf-server.cc
@@ -69,6 +69,8 @@ class PerfDataStream : public FlightDataStream {
     batch_ = RecordBatch::Make(schema, batch_length_, arrays_);
   }
 
+  std::shared_ptr<Schema> schema() override { return schema_; }
+
   Status Next(IpcPayload* payload) override {
     if (records_sent_ >= total_records_) {
       // Signal that iteration is over
diff --git a/cpp/src/arrow/flight/server.cc b/cpp/src/arrow/flight/server.cc
index 46815b5476c67..018c079501f2f 100644
--- a/cpp/src/arrow/flight/server.cc
+++ b/cpp/src/arrow/flight/server.cc
@@ -102,6 +102,10 @@ class SerializationTraits<IpcPayload> {
 
     int64_t body_size = 0;
     for (const auto& buffer : msg.body_buffers) {
+      // Buffer may be null when the row length is zero, or when all
+      // entries are invalid.
+      if (!buffer) continue;
+
       body_size += buffer->size();
 
       const int64_t remainder = buffer->size() % 8;
@@ -111,7 +115,11 @@ class SerializationTraits<IpcPayload> {
     }
 
     // 2 bytes for body tag
-    total_size += 2 + WireFormatLite::LengthDelimitedSize(static_cast<size_t>(body_size));
+    // Only written when there are body buffers
+    if (msg.body_length > 0) {
+      total_size +=
+          2 + WireFormatLite::LengthDelimitedSize(static_cast<size_t>(body_size));
+    }
 
     // TODO(wesm): messages over 2GB unlikely to be yet supported
     if (total_size > kInt32Max) {
@@ -135,20 +143,27 @@ class SerializationTraits<IpcPayload> {
     pb_stream.WriteRawMaybeAliased(msg.metadata->data(),
                                    static_cast<int>(msg.metadata->size()));
 
-    // Write body
-    WireFormatLite::WriteTag(pb::FlightData::kDataBodyFieldNumber,
-                             WireFormatLite::WIRETYPE_LENGTH_DELIMITED, &pb_stream);
-    pb_stream.WriteVarint32(static_cast<uint32_t>(body_size));
+    // Don't write tag if there are no body buffers
+    if (msg.body_length > 0) {
+      // Write body
+      WireFormatLite::WriteTag(pb::FlightData::kDataBodyFieldNumber,
+                               WireFormatLite::WIRETYPE_LENGTH_DELIMITED, &pb_stream);
+      pb_stream.WriteVarint32(static_cast<uint32_t>(body_size));
 
-    constexpr uint8_t kPaddingBytes[8] = {0};
+      constexpr uint8_t kPaddingBytes[8] = {0};
 
-    for (const auto& buffer : msg.body_buffers) {
-      pb_stream.WriteRawMaybeAliased(buffer->data(), static_cast<int>(buffer->size()));
+      for (const auto& buffer : msg.body_buffers) {
+        // Buffer may be null when the row length is zero, or when all
+        // entries are invalid.
+        if (!buffer) continue;
 
-      // Write padding if not multiple of 8
-      const int remainder = static_cast<int>(buffer->size() % 8);
-      if (remainder) {
-        pb_stream.WriteRawMaybeAliased(kPaddingBytes, 8 - remainder);
+        pb_stream.WriteRawMaybeAliased(buffer->data(), static_cast<int>(buffer->size()));
+
+        // Write padding if not multiple of 8
+        const int remainder = static_cast<int>(buffer->size() % 8);
+        if (remainder) {
+          pb_stream.WriteRawMaybeAliased(kPaddingBytes, 8 - remainder);
+        }
       }
     }
 
@@ -255,6 +270,14 @@ class FlightServiceImpl : public FlightService::Service {
     // Requires ServerWriter customization in grpc_customizations.h
     auto custom_writer = reinterpret_cast<ServerWriter<IpcPayload>*>(writer);
 
+    // Write the schema as the first message in the stream
+    IpcPayload schema_payload;
+    MemoryPool* pool = default_memory_pool();
+    ipc::DictionaryMemo dictionary_memo;
+    GRPC_RETURN_NOT_OK(ipc::internal::GetSchemaPayload(
+        *data_stream->schema(), pool, &dictionary_memo, &schema_payload));
+    custom_writer->Write(schema_payload, grpc::WriteOptions());
+
     while (true) {
       IpcPayload payload;
       GRPC_RETURN_NOT_OK(data_stream->Next(&payload));
@@ -368,6 +391,8 @@ Status FlightServerBase::ListActions(std::vector<ActionType>* actions) {
 RecordBatchStream::RecordBatchStream(const std::shared_ptr<RecordBatchReader>& reader)
     : pool_(default_memory_pool()), reader_(reader) {}
 
+std::shared_ptr<Schema> RecordBatchStream::schema() { return reader_->schema(); }
+
 Status RecordBatchStream::Next(IpcPayload* payload) {
   std::shared_ptr<RecordBatch> batch;
   RETURN_NOT_OK(reader_->ReadNext(&batch));
diff --git a/cpp/src/arrow/flight/server.h b/cpp/src/arrow/flight/server.h
index 89154ac8623e0..b3b8239132b7a 100644
--- a/cpp/src/arrow/flight/server.h
+++ b/cpp/src/arrow/flight/server.h
@@ -28,6 +28,7 @@
 #include "arrow/util/visibility.h"
 
 #include "arrow/flight/types.h"
+#include "arrow/ipc/dictionary.h"
 
 namespace arrow {
 
@@ -57,6 +58,9 @@ class ARROW_EXPORT FlightDataStream {
  public:
   virtual ~FlightDataStream() = default;
 
+  // When the stream starts, send the schema.
+  virtual std::shared_ptr<Schema> schema() = 0;
+
   // When the stream is completed, the last payload written will have null
   // metadata
   virtual Status Next(ipc::internal::IpcPayload* payload) = 0;
@@ -69,6 +73,7 @@ class ARROW_EXPORT RecordBatchStream : public FlightDataStream {
  public:
   explicit RecordBatchStream(const std::shared_ptr<RecordBatchReader>& reader);
 
+  std::shared_ptr<Schema> schema() override;
   Status Next(ipc::internal::IpcPayload* payload) override;
 
  private:
@@ -115,7 +120,7 @@ class ARROW_EXPORT FlightServerBase {
                                std::unique_ptr<FlightInfo>* info);
 
   /// \brief Get a stream of IPC payloads to put on the wire
-  /// \param[in] ticket an opaque ticket
+  /// \param[in] request an opaque ticket
   /// \param[out] stream the returned stream provider
   /// \return Status
   virtual Status DoGet(const Ticket& request, std::unique_ptr<FlightDataStream>* stream);
diff --git a/cpp/src/arrow/flight/test-integration-client.cc b/cpp/src/arrow/flight/test-integration-client.cc
new file mode 100644
index 0000000000000..267025a451cc7
--- /dev/null
+++ b/cpp/src/arrow/flight/test-integration-client.cc
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Client implementation for Flight integration testing. Requests the given
+// path from the Flight server, which reads that file and sends it as a stream
+// to the client. The client writes the server stream to the IPC file format at
+// the given output file path. The integration test script then uses the
+// existing integration test tools to compare the output binary with the
+// original JSON
+
+#include <iostream>
+#include <memory>
+#include <string>
+
+#include <gflags/gflags.h>
+
+#include "arrow/io/test-common.h"
+#include "arrow/ipc/json.h"
+#include "arrow/record_batch.h"
+
+#include "arrow/flight/server.h"
+#include "arrow/flight/test-util.h"
+
+DEFINE_string(host, "localhost", "Server port to connect to");
+DEFINE_int32(port, 31337, "Server port to connect to");
+DEFINE_string(path, "", "Resource path to request");
+DEFINE_string(output, "", "Where to write requested resource");
+
+int main(int argc, char** argv) {
+  gflags::SetUsageMessage("Integration testing client for Flight.");
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
+
+  std::unique_ptr<arrow::flight::FlightClient> client;
+  ABORT_NOT_OK(arrow::flight::FlightClient::Connect(FLAGS_host, FLAGS_port, &client));
+
+  arrow::flight::FlightDescriptor descr{
+      arrow::flight::FlightDescriptor::PATH, "", {FLAGS_path}};
+  std::unique_ptr<arrow::flight::FlightInfo> info;
+  ABORT_NOT_OK(client->GetFlightInfo(descr, &info));
+
+  std::shared_ptr<arrow::Schema> schema;
+  ABORT_NOT_OK(info->GetSchema(&schema));
+
+  if (info->endpoints().size() == 0) {
+    std::cerr << "No endpoints returned from Flight server." << std::endl;
+    return -1;
+  }
+
+  arrow::flight::Ticket ticket = info->endpoints()[0].ticket;
+  std::unique_ptr<arrow::RecordBatchReader> stream;
+  ABORT_NOT_OK(client->DoGet(ticket, schema, &stream));
+
+  std::shared_ptr<arrow::io::FileOutputStream> out_file;
+  ABORT_NOT_OK(arrow::io::FileOutputStream::Open(FLAGS_output, &out_file));
+  std::shared_ptr<arrow::ipc::RecordBatchWriter> writer;
+  ABORT_NOT_OK(arrow::ipc::RecordBatchFileWriter::Open(out_file.get(), schema, &writer));
+
+  std::shared_ptr<arrow::RecordBatch> chunk;
+  while (true) {
+    ABORT_NOT_OK(stream->ReadNext(&chunk));
+    if (chunk == nullptr) break;
+    ABORT_NOT_OK(writer->WriteRecordBatch(*chunk));
+  }
+
+  ABORT_NOT_OK(writer->Close());
+
+  return 0;
+}
diff --git a/cpp/src/arrow/flight/test-integration-server.cc b/cpp/src/arrow/flight/test-integration-server.cc
new file mode 100644
index 0000000000000..80813e7f19a4c
--- /dev/null
+++ b/cpp/src/arrow/flight/test-integration-server.cc
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Example server implementation for integration testing purposes
+
+#include <signal.h>
+#include <iostream>
+#include <memory>
+#include <string>
+
+#include <gflags/gflags.h>
+
+#include "arrow/io/test-common.h"
+#include "arrow/ipc/json.h"
+#include "arrow/record_batch.h"
+
+#include "arrow/flight/server.h"
+#include "arrow/flight/test-util.h"
+
+DEFINE_int32(port, 31337, "Server port to listen on");
+
+namespace arrow {
+namespace flight {
+
+class JsonReaderRecordBatchStream : public FlightDataStream {
+ public:
+  explicit JsonReaderRecordBatchStream(
+      std::unique_ptr<ipc::internal::json::JsonReader>&& reader)
+      : index_(0), pool_(default_memory_pool()), reader_(std::move(reader)) {}
+
+  std::shared_ptr<Schema> schema() override { return reader_->schema(); }
+
+  Status Next(ipc::internal::IpcPayload* payload) override {
+    if (index_ >= reader_->num_record_batches()) {
+      // Signal that iteration is over
+      payload->metadata = nullptr;
+      return Status::OK();
+    }
+
+    std::shared_ptr<RecordBatch> batch;
+    RETURN_NOT_OK(reader_->ReadRecordBatch(index_, &batch));
+    index_++;
+
+    if (!batch) {
+      // Signal that iteration is over
+      payload->metadata = nullptr;
+      return Status::OK();
+    } else {
+      return ipc::internal::GetRecordBatchPayload(*batch, pool_, payload);
+    }
+  }
+
+ private:
+  int index_;
+  MemoryPool* pool_;
+  std::unique_ptr<ipc::internal::json::JsonReader> reader_;
+};
+
+class FlightIntegrationTestServer : public FlightServerBase {
+  Status ReadJson(const std::string& json_path,
+                  std::unique_ptr<ipc::internal::json::JsonReader>* out) {
+    std::shared_ptr<io::ReadableFile> in_file;
+    std::cout << "Opening JSON file '" << json_path << "'" << std::endl;
+    RETURN_NOT_OK(io::ReadableFile::Open(json_path, &in_file));
+
+    int64_t file_size = 0;
+    RETURN_NOT_OK(in_file->GetSize(&file_size));
+
+    std::shared_ptr<Buffer> json_buffer;
+    RETURN_NOT_OK(in_file->Read(file_size, &json_buffer));
+
+    RETURN_NOT_OK(arrow::ipc::internal::json::JsonReader::Open(json_buffer, out));
+    return Status::OK();
+  }
+
+  Status GetFlightInfo(const FlightDescriptor& request,
+                       std::unique_ptr<FlightInfo>* info) override {
+    if (request.type == FlightDescriptor::PATH) {
+      if (request.path.size() == 0) {
+        return Status::Invalid("Invalid path");
+      }
+
+      std::unique_ptr<arrow::ipc::internal::json::JsonReader> reader;
+      RETURN_NOT_OK(ReadJson(request.path.back(), &reader));
+
+      FlightEndpoint endpoint1({{request.path.back()}, {}});
+
+      FlightInfo::Data flight_data;
+      RETURN_NOT_OK(internal::SchemaToString(*reader->schema(), &flight_data.schema));
+      flight_data.descriptor = request;
+      flight_data.endpoints = {endpoint1};
+      flight_data.total_records = reader->num_record_batches();
+      flight_data.total_bytes = -1;
+      FlightInfo value(flight_data);
+
+      *info = std::unique_ptr<FlightInfo>(new FlightInfo(value));
+      return Status::OK();
+    } else {
+      return Status::NotImplemented(request.type);
+    }
+  }
+
+  Status DoGet(const Ticket& request,
+               std::unique_ptr<FlightDataStream>* data_stream) override {
+    std::unique_ptr<arrow::ipc::internal::json::JsonReader> reader;
+    RETURN_NOT_OK(ReadJson(request.ticket, &reader));
+
+    *data_stream = std::unique_ptr<FlightDataStream>(
+        new JsonReaderRecordBatchStream(std::move(reader)));
+
+    return Status::OK();
+  }
+};
+
+}  // namespace flight
+}  // namespace arrow
+
+std::unique_ptr<arrow::flight::FlightIntegrationTestServer> g_server;
+
+void Shutdown(int signal) {
+  if (g_server != nullptr) {
+    g_server->Shutdown();
+  }
+}
+
+int main(int argc, char** argv) {
+  gflags::SetUsageMessage("Integration testing server for Flight.");
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
+
+  // SIGTERM shuts down the server
+  signal(SIGTERM, Shutdown);
+
+  g_server.reset(new arrow::flight::FlightIntegrationTestServer);
+  g_server->Run(FLAGS_port);
+  return 0;
+}
diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc
index 8c7588d03bd38..aba93ad68fa1e 100644
--- a/cpp/src/arrow/flight/types.cc
+++ b/cpp/src/arrow/flight/types.cc
@@ -36,8 +36,7 @@ Status FlightInfo::GetSchema(std::shared_ptr<Schema>* out) const {
   }
   /// XXX(wesm): arrow::ipc::ReadSchema in its current form will not suffice
   /// for reading schemas with dictionaries. See ARROW-3144
-  io::BufferReader schema_reader(reinterpret_cast<const uint8_t*>(data_.schema.c_str()),
-                                 static_cast<int64_t>(data_.schema.size()));
+  io::BufferReader schema_reader(data_.schema);
   RETURN_NOT_OK(ipc::ReadSchema(&schema_reader, &schema_));
   reconstructed_schema_ = true;
   *out = schema_;
diff --git a/cpp/src/arrow/gpu/CMakeLists.txt b/cpp/src/arrow/gpu/CMakeLists.txt
index ed4c125297771..204cb5e313f3f 100644
--- a/cpp/src/arrow/gpu/CMakeLists.txt
+++ b/cpp/src/arrow/gpu/CMakeLists.txt
@@ -16,9 +16,15 @@
 # under the License.
 
 #######################################
-# arrow_gpu
+# arrow_cuda
 #######################################
 
+add_custom_target(arrow_cuda-all)
+add_custom_target(arrow_cuda)
+add_custom_target(arrow_cuda-benchmarks)
+add_custom_target(arrow_cuda-tests)
+add_dependencies(arrow_cuda-all arrow_cuda arrow_cuda-tests arrow_cuda-benchmarks)
+
 if (DEFINED ENV{CUDA_HOME})
   set(CUDA_TOOLKIT_ROOT_DIR "$ENV{CUDA_HOME}")
 endif()
@@ -28,28 +34,30 @@ include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
 
 message(STATUS "CUDA Libraries: ${CUDA_LIBRARIES}")
 
-set(ARROW_GPU_SRCS
+set(ARROW_CUDA_SRCS
   cuda_arrow_ipc.cc
   cuda_context.cc
   cuda_memory.cc
 )
 
-set(ARROW_GPU_SHARED_LINK_LIBS
+set(ARROW_CUDA_SHARED_LINK_LIBS
   ${CUDA_LIBRARIES}
   ${CUDA_CUDA_LIBRARY}
 )
 
-ADD_ARROW_LIB(arrow_gpu
-  SOURCES ${ARROW_GPU_SRCS}
-  OUTPUTS ARROW_GPU_LIBRARIES
+ADD_ARROW_LIB(arrow_cuda
+  SOURCES ${ARROW_CUDA_SRCS}
+  OUTPUTS ARROW_CUDA_LIBRARIES
   DEPENDENCIES metadata_fbs
   SHARED_LINK_FLAGS ""
-  SHARED_LINK_LIBS arrow_shared ${ARROW_GPU_SHARED_LINK_LIBS}
-  # Static arrow_gpu must also link against CUDA shared libs
-  STATIC_LINK_LIBS ${ARROW_GPU_SHARED_LINK_LIBS}
+  SHARED_LINK_LIBS arrow_shared ${ARROW_CUDA_SHARED_LINK_LIBS}
+  # Static arrow_cuda must also link against CUDA shared libs
+  STATIC_LINK_LIBS ${ARROW_CUDA_SHARED_LINK_LIBS}
 )
 
-foreach(LIB_TARGET ${ARROW_GPU_LIBRARIES})
+add_dependencies(arrow_cuda ${ARROW_CUDA_LIBRARIES})
+
+foreach(LIB_TARGET ${ARROW_CUDA_LIBRARIES})
   target_compile_definitions(${LIB_TARGET}
     PRIVATE ARROW_EXPORTING)
 endforeach()
@@ -63,36 +71,24 @@ install(FILES
   "${CMAKE_CURRENT_BINARY_DIR}/cuda_version.h"
   DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/gpu")
 
-install(FILES
-  cuda_api.h
-  cuda_arrow_ipc.h
-  cuda_context.h
-  cuda_memory.h
-  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/gpu")
-
-# pkg-config support
-configure_file(arrow-gpu.pc.in
-  "${CMAKE_CURRENT_BINARY_DIR}/arrow-gpu.pc"
-  @ONLY)
-
-install(
-  FILES "${CMAKE_CURRENT_BINARY_DIR}/arrow-gpu.pc"
-  DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/")
+ARROW_INSTALL_ALL_HEADERS("arrow/gpu")
+ARROW_ADD_PKG_CONFIG("arrow-cuda")
 
-set(ARROW_GPU_TEST_LINK_LIBS
-  arrow_gpu_shared
+set(ARROW_CUDA_TEST_LINK_LIBS
+  arrow_cuda_shared
   ${ARROW_TEST_LINK_LIBS})
 
 if (ARROW_BUILD_TESTS)
   ADD_ARROW_TEST(cuda-test
-    STATIC_LINK_LIBS ${ARROW_GPU_TEST_LINK_LIBS}
+    STATIC_LINK_LIBS ${ARROW_CUDA_TEST_LINK_LIBS}
     NO_VALGRIND)
 endif()
 
 if (ARROW_BUILD_BENCHMARKS)
-  cuda_add_executable(cuda-benchmark cuda-benchmark.cc)
-  target_link_libraries(cuda-benchmark
-    arrow_gpu_shared
-    gtest_static
+  cuda_add_executable(arrow-cuda-benchmark cuda-benchmark.cc)
+  target_link_libraries(arrow-cuda-benchmark
+    arrow_cuda_shared
+    ${GTEST_LIBRARY}
     ${ARROW_BENCHMARK_LINK_LIBS})
+  add_dependencies(arrow_cuda-benchmarks arrow-cuda-benchmark)
 endif()
diff --git a/cpp/src/arrow/gpu/arrow-gpu.pc.in b/cpp/src/arrow/gpu/arrow-cuda.pc.in
similarity index 89%
rename from cpp/src/arrow/gpu/arrow-gpu.pc.in
rename to cpp/src/arrow/gpu/arrow-cuda.pc.in
index 3889d03b204ca..858096f892270 100644
--- a/cpp/src/arrow/gpu/arrow-gpu.pc.in
+++ b/cpp/src/arrow/gpu/arrow-cuda.pc.in
@@ -18,9 +18,9 @@
 libdir=@CMAKE_INSTALL_FULL_LIBDIR@
 includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
 
-Name: Apache Arrow GPU
-Description: GPU integration library for Apache Arrow
+Name: Apache Arrow CUDA
+Description: CUDA integration library for Apache Arrow
 Version: @ARROW_VERSION@
 Requires: arrow
-Libs: -L${libdir} -larrow_gpu
+Libs: -L${libdir} -larrow_cuda
 Cflags: -I${includedir}
diff --git a/cpp/src/arrow/gpu/cuda-benchmark.cc b/cpp/src/arrow/gpu/cuda-benchmark.cc
index 8b3723d838797..9889373d09c75 100644
--- a/cpp/src/arrow/gpu/cuda-benchmark.cc
+++ b/cpp/src/arrow/gpu/cuda-benchmark.cc
@@ -28,7 +28,7 @@
 #include "arrow/gpu/cuda_api.h"
 
 namespace arrow {
-namespace gpu {
+namespace cuda {
 
 constexpr int64_t kGpuNumber = 0;
 
@@ -94,5 +94,5 @@ BENCHMARK(BM_Writer_Unbuffered)
     ->MinTime(1.0)
     ->UseRealTime();
 
-}  // namespace gpu
+}  // namespace cuda
 }  // namespace arrow
diff --git a/cpp/src/arrow/gpu/cuda-test.cc b/cpp/src/arrow/gpu/cuda-test.cc
index cb375458004a0..628d0f2774a20 100644
--- a/cpp/src/arrow/gpu/cuda-test.cc
+++ b/cpp/src/arrow/gpu/cuda-test.cc
@@ -29,7 +29,7 @@
 #include "arrow/gpu/cuda_api.h"
 
 namespace arrow {
-namespace gpu {
+namespace cuda {
 
 constexpr int kGpuNumber = 0;
 
@@ -323,7 +323,7 @@ TEST_F(TestCudaArrowIpc, BasicWriteRead) {
   ASSERT_OK(ipc::MakeIntRecordBatch(&batch));
 
   std::shared_ptr<CudaBuffer> device_serialized;
-  ASSERT_OK(arrow::gpu::SerializeRecordBatch(*batch, context_.get(), &device_serialized));
+  ASSERT_OK(SerializeRecordBatch(*batch, context_.get(), &device_serialized));
 
   // Test that ReadRecordBatch works properly
   std::shared_ptr<RecordBatch> device_batch;
@@ -343,5 +343,19 @@ TEST_F(TestCudaArrowIpc, BasicWriteRead) {
   CompareBatch(*batch, *cpu_batch);
 }
 
-}  // namespace gpu
+class TestCudaContext : public TestCudaBufferBase {
+ public:
+  void SetUp() { TestCudaBufferBase::SetUp(); }
+};
+
+TEST_F(TestCudaContext, GetDeviceAddress) {
+  const int64_t kSize = 100;
+  std::shared_ptr<CudaBuffer> buffer;
+  uint8_t* devptr = NULL;
+  ASSERT_OK(context_->Allocate(kSize, &buffer));
+  ASSERT_OK(context_->GetDeviceAddress(buffer.get()->mutable_data(), &devptr));
+  ASSERT_EQ(buffer.get()->mutable_data(), devptr);
+}
+
+}  // namespace cuda
 }  // namespace arrow
diff --git a/cpp/src/arrow/gpu/cuda_arrow_ipc.cc b/cpp/src/arrow/gpu/cuda_arrow_ipc.cc
index a7262c8b4d4ba..b4d8744cb0bd0 100644
--- a/cpp/src/arrow/gpu/cuda_arrow_ipc.cc
+++ b/cpp/src/arrow/gpu/cuda_arrow_ipc.cc
@@ -38,7 +38,7 @@ namespace arrow {
 
 namespace flatbuf = org::apache::arrow::flatbuf;
 
-namespace gpu {
+namespace cuda {
 
 Status SerializeRecordBatch(const RecordBatch& batch, CudaContext* ctx,
                             std::shared_ptr<CudaBuffer>* out) {
@@ -82,9 +82,8 @@ Status ReadMessage(CudaBufferReader* reader, MemoryPool* pool,
   RETURN_NOT_OK(AllocateBuffer(pool, message_length, &metadata));
   RETURN_NOT_OK(reader->Read(message_length, &bytes_read, metadata->mutable_data()));
   if (bytes_read != message_length) {
-    std::stringstream ss;
-    ss << "Expected " << message_length << " metadata bytes, but only got " << bytes_read;
-    return Status::IOError(ss.str());
+    return Status::IOError("Expected ", message_length, " metadata bytes, but only got ",
+                           bytes_read);
   }
 
   return ipc::Message::ReadFrom(metadata, reader, out);
@@ -106,5 +105,5 @@ Status ReadRecordBatch(const std::shared_ptr<Schema>& schema,
   return ipc::ReadRecordBatch(*message, schema, out);
 }
 
-}  // namespace gpu
+}  // namespace cuda
 }  // namespace arrow
diff --git a/cpp/src/arrow/gpu/cuda_arrow_ipc.h b/cpp/src/arrow/gpu/cuda_arrow_ipc.h
index 52dd92473eaec..4eb85e797c77b 100644
--- a/cpp/src/arrow/gpu/cuda_arrow_ipc.h
+++ b/cpp/src/arrow/gpu/cuda_arrow_ipc.h
@@ -39,7 +39,7 @@ class Message;
 
 }  // namespace ipc
 
-namespace gpu {
+namespace cuda {
 
 /// \brief Write record batch message to GPU device memory
 /// \param[in] batch record batch to write
@@ -71,7 +71,7 @@ Status ReadRecordBatch(const std::shared_ptr<Schema>& schema,
                        const std::shared_ptr<CudaBuffer>& buffer, MemoryPool* pool,
                        std::shared_ptr<RecordBatch>* out);
 
-}  // namespace gpu
+}  // namespace cuda
 }  // namespace arrow
 
 #endif  // ARROW_GPU_CUDA_ARROW_IPC_H
diff --git a/cpp/src/arrow/gpu/cuda_common.h b/cpp/src/arrow/gpu/cuda_common.h
index c06c1a21ff481..2b630c8114325 100644
--- a/cpp/src/arrow/gpu/cuda_common.h
+++ b/cpp/src/arrow/gpu/cuda_common.h
@@ -25,7 +25,7 @@
 #include <cuda.h>
 
 namespace arrow {
-namespace gpu {
+namespace cuda {
 
 #define CUDA_DCHECK(STMT) \
   do {                    \
@@ -34,18 +34,16 @@ namespace gpu {
     (void)ret;            \
   } while (0)
 
-#define CU_RETURN_NOT_OK(STMT)                                                \
-  do {                                                                        \
-    CUresult ret = (STMT);                                                    \
-    if (ret != CUDA_SUCCESS) {                                                \
-      std::stringstream ss;                                                   \
-      ss << "Cuda Driver API call in " << __FILE__ << " at line " << __LINE__ \
-         << " failed with code " << ret << ": " << #STMT;                     \
-      return Status::IOError(ss.str());                                       \
-    }                                                                         \
+#define CU_RETURN_NOT_OK(STMT)                                                  \
+  do {                                                                          \
+    CUresult ret = (STMT);                                                      \
+    if (ret != CUDA_SUCCESS) {                                                  \
+      return Status::IOError("Cuda Driver API call in ", __FILE__, " at line ", \
+                             __LINE__, " failed with code ", ret, ": ", #STMT); \
+    }                                                                           \
   } while (0)
 
-}  // namespace gpu
+}  // namespace cuda
 }  // namespace arrow
 
 #endif  // ARROW_GPU_CUDA_COMMON_H
diff --git a/cpp/src/arrow/gpu/cuda_context.cc b/cpp/src/arrow/gpu/cuda_context.cc
index 566ae6f878330..2f3f1bd3f10de 100644
--- a/cpp/src/arrow/gpu/cuda_context.cc
+++ b/cpp/src/arrow/gpu/cuda_context.cc
@@ -28,8 +28,9 @@
 
 #include "arrow/gpu/cuda_common.h"
 #include "arrow/gpu/cuda_memory.h"
+
 namespace arrow {
-namespace gpu {
+namespace cuda {
 
 struct CudaDevice {
   int device_num;
@@ -342,5 +343,12 @@ void* CudaContext::handle() const { return impl_->context_handle(); }
 
 int CudaContext::device_number() const { return impl_->device().device_num; }
 
-}  // namespace gpu
+Status CudaContext::GetDeviceAddress(uint8_t* addr, uint8_t** devaddr) {
+  ContextSaver set_temporary(reinterpret_cast<CUcontext>(handle()));
+  CU_RETURN_NOT_OK(cuPointerGetAttribute(devaddr, CU_POINTER_ATTRIBUTE_DEVICE_POINTER,
+                                         reinterpret_cast<CUdeviceptr>(addr)));
+  return Status::OK();
+}
+
+}  // namespace cuda
 }  // namespace arrow
diff --git a/cpp/src/arrow/gpu/cuda_context.h b/cpp/src/arrow/gpu/cuda_context.h
index e59273e5624f7..938a81561d042 100644
--- a/cpp/src/arrow/gpu/cuda_context.h
+++ b/cpp/src/arrow/gpu/cuda_context.h
@@ -27,7 +27,7 @@
 #include "arrow/gpu/cuda_memory.h"
 
 namespace arrow {
-namespace gpu {
+namespace cuda {
 
 // Forward declaration
 class CudaContext;
@@ -37,23 +37,23 @@ class ARROW_EXPORT CudaDeviceManager {
   static Status GetInstance(CudaDeviceManager** manager);
 
   /// \brief Get the CUDA driver context for a particular device
-  /// \param[in] device_number
+  /// \param[in] device_number the CUDA device
   /// \param[out] out cached context
-  Status GetContext(int gpu_number, std::shared_ptr<CudaContext>* ctx);
+  Status GetContext(int device_number, std::shared_ptr<CudaContext>* out);
 
   /// \brief Get the shared CUDA driver context for a particular device
-  /// \param[in] device_number
+  /// \param[in] device_number the CUDA device
   /// \param[in] handle CUDA context handler created by another library
   /// \param[out] out shared context
   Status GetSharedContext(int device_number, void* handle,
                           std::shared_ptr<CudaContext>* out);
 
   /// \brief Allocate host memory with fast access to given GPU device
-  /// \param[in] device_number
+  /// \param[in] device_number the CUDA device
   /// \param[in] nbytes number of bytes
   /// \param[out] out the allocated buffer
   Status AllocateHost(int device_number, int64_t nbytes,
-                      std::shared_ptr<CudaHostBuffer>* buffer);
+                      std::shared_ptr<CudaHostBuffer>* out);
 
   Status FreeHost(void* data, int64_t nbytes);
 
@@ -98,15 +98,15 @@ class ARROW_EXPORT CudaContext : public std::enable_shared_from_this<CudaContext
 
   /// \brief Open existing CUDA IPC memory handle
   /// \param[in] ipc_handle opaque pointer to CUipcMemHandle (driver API)
-  /// \param[out] buffer a CudaBuffer referencing
+  /// \param[out] out a CudaBuffer referencing the IPC segment
   /// \return Status
   Status OpenIpcBuffer(const CudaIpcMemHandle& ipc_handle,
-                       std::shared_ptr<CudaBuffer>* buffer);
+                       std::shared_ptr<CudaBuffer>* out);
 
   /// \brief Close memory mapped with IPC buffer
   /// \param[in] buffer a CudaBuffer referencing
   /// \return Status
-  Status CloseIpcBuffer(CudaBuffer* buf);
+  Status CloseIpcBuffer(CudaBuffer* buffer);
 
   /// \brief Block until the all device tasks are completed.
   Status Synchronize(void);
@@ -119,6 +119,20 @@ class ARROW_EXPORT CudaContext : public std::enable_shared_from_this<CudaContext
   /// \brief Return device number
   int device_number() const;
 
+  /// \brief Return the device address that is reachable from kernels
+  /// running in the context
+  /// \param[in] addr device or host memory address
+  /// \param[out] devaddr the device address
+  /// \return Status
+  ///
+  /// The device address is defined as a memory address accessible by
+  /// device. While it is often a device memory address, it can be
+  /// also a host memory address, for instance, when the memory is
+  /// allocated as host memory (using cudaMallocHost or cudaHostAlloc)
+  /// or as managed memory (using cudaMallocManaged) or the host
+  /// memory is page-locked (using cudaHostRegister).
+  Status GetDeviceAddress(uint8_t* addr, uint8_t** devaddr);
+
  private:
   CudaContext();
 
@@ -135,10 +149,13 @@ class ARROW_EXPORT CudaContext : public std::enable_shared_from_this<CudaContext
   friend CudaBuffer;
   friend CudaBufferReader;
   friend CudaBufferWriter;
+  /// \cond FALSE
+  // (note: emits warning on Doxygen < 1.8.15)
   friend CudaDeviceManager::CudaDeviceManagerImpl;
+  /// \endcond
 };
 
-}  // namespace gpu
+}  // namespace cuda
 }  // namespace arrow
 
 #endif  // ARROW_GPU_CUDA_CONTEXT_H
diff --git a/cpp/src/arrow/gpu/cuda_memory.cc b/cpp/src/arrow/gpu/cuda_memory.cc
index e8cc4b5fe20b2..a0da580acf927 100644
--- a/cpp/src/arrow/gpu/cuda_memory.cc
+++ b/cpp/src/arrow/gpu/cuda_memory.cc
@@ -34,7 +34,7 @@
 #include "arrow/gpu/cuda_context.h"
 
 namespace arrow {
-namespace gpu {
+namespace cuda {
 
 // ----------------------------------------------------------------------
 // CUDA IPC memory handle
@@ -221,9 +221,16 @@ class CudaBufferWriter::CudaBufferWriterImpl {
     mutable_data_ = buffer->mutable_data();
     size_ = buffer->size();
     position_ = 0;
+    closed_ = false;
+  }
+
+#define CHECK_CLOSED()                                              \
+  if (closed_) {                                                    \
+    return Status::Invalid("Operation on closed CudaBufferWriter"); \
   }
 
   Status Seek(int64_t position) {
+    CHECK_CLOSED();
     if (position < 0 || position >= size_) {
       return Status::IOError("position out of bounds");
     }
@@ -234,12 +241,17 @@ class CudaBufferWriter::CudaBufferWriterImpl {
   Status Close() {
     if (!closed_) {
       closed_ = true;
-      RETURN_NOT_OK(Flush());
+      RETURN_NOT_OK(FlushInternal());
     }
     return Status::OK();
   }
 
   Status Flush() {
+    CHECK_CLOSED();
+    return FlushInternal();
+  }
+
+  Status FlushInternal() {
     if (buffer_size_ > 0 && buffer_position_ > 0) {
       // Only need to flush when the write has been buffered
       RETURN_NOT_OK(
@@ -253,11 +265,13 @@ class CudaBufferWriter::CudaBufferWriterImpl {
   bool closed() const { return closed_; }
 
   Status Tell(int64_t* position) const {
+    CHECK_CLOSED();
     *position = position_;
     return Status::OK();
   }
 
   Status Write(const void* data, int64_t nbytes) {
+    CHECK_CLOSED();
     if (nbytes == 0) {
       return Status::OK();
     }
@@ -283,11 +297,13 @@ class CudaBufferWriter::CudaBufferWriterImpl {
 
   Status WriteAt(int64_t position, const void* data, int64_t nbytes) {
     std::lock_guard<std::mutex> guard(lock_);
+    CHECK_CLOSED();
     RETURN_NOT_OK(Seek(position));
     return Write(data, nbytes);
   }
 
   Status SetBufferSize(const int64_t buffer_size) {
+    CHECK_CLOSED();
     if (buffer_position_ > 0) {
       // Flush any buffered data
       RETURN_NOT_OK(Flush());
@@ -303,6 +319,8 @@ class CudaBufferWriter::CudaBufferWriterImpl {
 
   int64_t buffer_position() const { return buffer_position_; }
 
+#undef CHECK_CLOSED
+
  private:
   std::shared_ptr<CudaContext> context_;
   std::shared_ptr<CudaBuffer> buffer_;
@@ -365,5 +383,5 @@ Status AllocateCudaHostBuffer(int device_number, const int64_t size,
   return manager->AllocateHost(device_number, size, out);
 }
 
-}  // namespace gpu
+}  // namespace cuda
 }  // namespace arrow
diff --git a/cpp/src/arrow/gpu/cuda_memory.h b/cpp/src/arrow/gpu/cuda_memory.h
index 0da58c170ff24..193deed82e554 100644
--- a/cpp/src/arrow/gpu/cuda_memory.h
+++ b/cpp/src/arrow/gpu/cuda_memory.h
@@ -27,7 +27,7 @@
 #include "arrow/status.h"
 
 namespace arrow {
-namespace gpu {
+namespace cuda {
 
 class CudaContext;
 class CudaIpcMemHandle;
@@ -57,7 +57,9 @@ class ARROW_EXPORT CudaBuffer : public Buffer {
                            std::shared_ptr<CudaBuffer>* out);
 
   /// \brief Copy memory from GPU device to CPU host
-  /// \param[out] out a pre-allocated output buffer
+  /// \param[in] position start position inside buffer to copy bytes from
+  /// \param[in] nbytes number of bytes to copy
+  /// \param[out] out start address of the host memory area to copy to
   /// \return Status
   Status CopyToHost(const int64_t position, const int64_t nbytes, void* out) const;
 
@@ -69,8 +71,8 @@ class ARROW_EXPORT CudaBuffer : public Buffer {
   Status CopyFromHost(const int64_t position, const void* data, int64_t nbytes);
 
   /// \brief Copy memory from device to device at position
-  /// \param[in] position start position to copy bytes
-  /// \param[in] data the device data to copy
+  /// \param[in] position start position inside buffer to copy bytes to
+  /// \param[in] data start address of the device memory area to copy from
   /// \param[in] nbytes number of bytes to copy
   /// \return Status
   ///
@@ -207,7 +209,7 @@ class ARROW_EXPORT CudaBufferWriter : public io::WritableFile {
 };
 
 /// \brief Allocate CUDA-accessible memory on CPU host
-/// \param[in] device_number
+/// \param[in] device_number device to expose host memory
 /// \param[in] size number of bytes
 /// \param[out] out the allocated buffer
 /// \return Status
@@ -215,7 +217,7 @@ ARROW_EXPORT
 Status AllocateCudaHostBuffer(int device_number, const int64_t size,
                               std::shared_ptr<CudaHostBuffer>* out);
 
-}  // namespace gpu
+}  // namespace cuda
 }  // namespace arrow
 
 #endif  // ARROW_GPU_CUDA_MEMORY_H
diff --git a/cpp/src/arrow/io/CMakeLists.txt b/cpp/src/arrow/io/CMakeLists.txt
index d21bb16755271..13b577f7d41b2 100644
--- a/cpp/src/arrow/io/CMakeLists.txt
+++ b/cpp/src/arrow/io/CMakeLists.txt
@@ -18,28 +18,27 @@
 # ----------------------------------------------------------------------
 # arrow_io : Arrow IO interfaces
 
-ADD_ARROW_TEST(io-buffered-test)
-ADD_ARROW_TEST(io-compressed-test)
-ADD_ARROW_TEST(io-file-test)
+ADD_ARROW_TEST(buffered-test
+  PREFIX "arrow-io")
+ADD_ARROW_TEST(compressed-test
+  PREFIX "arrow-io")
+ADD_ARROW_TEST(file-test
+  PREFIX "arrow-io")
 
 if (ARROW_HDFS AND NOT ARROW_BOOST_HEADER_ONLY)
-  ADD_ARROW_TEST(io-hdfs-test NO_VALGRIND)
+  ADD_ARROW_TEST(hdfs-test NO_VALGRIND
+    PREFIX "arrow-io")
 endif()
 
-ADD_ARROW_TEST(io-memory-test)
-ADD_ARROW_TEST(io-readahead-test)
+ADD_ARROW_TEST(memory-test
+  PREFIX "arrow-io")
+ADD_ARROW_TEST(readahead-test
+  PREFIX "arrow-io")
 
-ADD_ARROW_BENCHMARK(io-file-benchmark)
-ADD_ARROW_BENCHMARK(io-memory-benchmark)
+ADD_ARROW_BENCHMARK(file-benchmark
+  PREFIX "arrow-io")
+ADD_ARROW_BENCHMARK(memory-benchmark
+  PREFIX "arrow-io")
 
 # Headers: top level
-install(FILES
-  api.h
-  buffered.h
-  compressed.h
-  file.h
-  hdfs.h
-  interfaces.h
-  memory.h
-  readahead.h
-  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/io")
+ARROW_INSTALL_ALL_HEADERS("arrow/io")
diff --git a/cpp/src/arrow/io/api.h b/cpp/src/arrow/io/api.h
index 0d5742ad65864..cf1be337fd1a9 100644
--- a/cpp/src/arrow/io/api.h
+++ b/cpp/src/arrow/io/api.h
@@ -18,6 +18,7 @@
 #ifndef ARROW_IO_API_H
 #define ARROW_IO_API_H
 
+#include "arrow/io/buffered.h"
 #include "arrow/io/compressed.h"
 #include "arrow/io/file.h"
 #include "arrow/io/hdfs.h"
diff --git a/cpp/src/arrow/io/io-buffered-test.cc b/cpp/src/arrow/io/buffered-test.cc
similarity index 97%
rename from cpp/src/arrow/io/io-buffered-test.cc
rename to cpp/src/arrow/io/buffered-test.cc
index 7fc4c520d148b..ee07556de16f9 100644
--- a/cpp/src/arrow/io/io-buffered-test.cc
+++ b/cpp/src/arrow/io/buffered-test.cc
@@ -67,7 +67,7 @@ class FileTestFixture : public ::testing::Test {
 
   void EnsureFileDeleted() {
     if (FileExists(path_)) {
-      std::remove(path_.c_str());
+      ARROW_UNUSED(std::remove(path_.c_str()));
     }
   }
 
@@ -105,7 +105,8 @@ class TestBufferedOutputStream : public FileTestFixture<BufferedOutputStream> {
       lseek(fd_, 0, SEEK_END);
 #endif
     }
-    ASSERT_OK(BufferedOutputStream::Create(file, buffer_size, &buffered_));
+    ASSERT_OK(BufferedOutputStream::Create(buffer_size, default_memory_pool(), file,
+                                           &buffered_));
   }
 
   void WriteChunkwise(const std::string& datastr, const std::valarray<int64_t>& sizes) {
@@ -301,7 +302,7 @@ TEST_F(TestBufferedOutputStream, TruncatesFile) {
 // ----------------------------------------------------------------------
 // BufferedInputStream tests
 
-const char kExample1[] = ("informaticacrobaticsimmolation");
+const char kExample1[] = "informaticacrobaticsimmolation";
 
 class TestBufferedInputStream : public FileTestFixture<BufferedInputStream> {
  public:
@@ -321,7 +322,7 @@ class TestBufferedInputStream : public FileTestFixture<BufferedInputStream> {
     std::shared_ptr<ReadableFile> file_in;
     ASSERT_OK(ReadableFile::Open(path_, &file_in));
     raw_ = file_in;
-    ASSERT_OK(BufferedInputStream::Create(raw_, buffer_size, pool, &buffered_));
+    ASSERT_OK(BufferedInputStream::Create(buffer_size, pool, raw_, &buffered_));
   }
 
  protected:
diff --git a/cpp/src/arrow/io/buffered.cc b/cpp/src/arrow/io/buffered.cc
index 0c04ac21c208e..0b1431f440fa2 100644
--- a/cpp/src/arrow/io/buffered.cc
+++ b/cpp/src/arrow/io/buffered.cc
@@ -21,10 +21,10 @@
 #include <cstring>
 #include <memory>
 #include <mutex>
-#include <string>
 #include <utility>
 
 #include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
 #include "arrow/status.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/string_view.h"
@@ -91,8 +91,8 @@ class BufferedBase {
 
 class BufferedOutputStream::Impl : public BufferedBase {
  public:
-  explicit Impl(std::shared_ptr<OutputStream> raw)
-      : BufferedBase(default_memory_pool()), raw_(std::move(raw)) {}
+  explicit Impl(std::shared_ptr<OutputStream> raw, MemoryPool* pool)
+      : BufferedBase(pool), raw_(std::move(raw)) {}
 
   Status Close() {
     std::lock_guard<std::mutex> guard(lock_);
@@ -173,14 +173,16 @@ class BufferedOutputStream::Impl : public BufferedBase {
   std::shared_ptr<OutputStream> raw_;
 };
 
-BufferedOutputStream::BufferedOutputStream(std::shared_ptr<OutputStream> raw)
-    : impl_(new BufferedOutputStream::Impl(std::move(raw))) {}
+BufferedOutputStream::BufferedOutputStream(std::shared_ptr<OutputStream> raw,
+                                           MemoryPool* pool) {
+  impl_.reset(new Impl(std::move(raw), pool));
+}
 
-Status BufferedOutputStream::Create(std::shared_ptr<OutputStream> raw,
-                                    int64_t buffer_size,
+Status BufferedOutputStream::Create(int64_t buffer_size, MemoryPool* pool,
+                                    std::shared_ptr<OutputStream> raw,
                                     std::shared_ptr<BufferedOutputStream>* out) {
-  auto result =
-      std::shared_ptr<BufferedOutputStream>(new BufferedOutputStream(std::move(raw)));
+  auto result = std::shared_ptr<BufferedOutputStream>(
+      new BufferedOutputStream(std::move(raw), pool));
   RETURN_NOT_OK(result->SetBufferSize(buffer_size));
   *out = std::move(result);
   return Status::OK();
@@ -217,12 +219,12 @@ std::shared_ptr<OutputStream> BufferedOutputStream::raw() const { return impl_->
 // ----------------------------------------------------------------------
 // BufferedInputStream implementation
 
-class BufferedInputStream::BufferedInputStreamImpl : public BufferedBase {
+class BufferedInputStream::Impl : public BufferedBase {
  public:
-  BufferedInputStreamImpl(std::shared_ptr<InputStream> raw, MemoryPool* pool)
+  Impl(std::shared_ptr<InputStream> raw, MemoryPool* pool)
       : BufferedBase(pool), raw_(std::move(raw)), bytes_buffered_(0) {}
 
-  ~BufferedInputStreamImpl() { DCHECK_OK(Close()); }
+  ~Impl() { DCHECK_OK(Close()); }
 
   Status Close() {
     std::lock_guard<std::mutex> guard(lock_);
@@ -350,13 +352,13 @@ class BufferedInputStream::BufferedInputStreamImpl : public BufferedBase {
 
 BufferedInputStream::BufferedInputStream(std::shared_ptr<InputStream> raw,
                                          MemoryPool* pool) {
-  impl_.reset(new BufferedInputStreamImpl(std::move(raw), pool));
+  impl_.reset(new Impl(std::move(raw), pool));
 }
 
 BufferedInputStream::~BufferedInputStream() { DCHECK_OK(impl_->Close()); }
 
-Status BufferedInputStream::Create(std::shared_ptr<InputStream> raw, int64_t buffer_size,
-                                   MemoryPool* pool,
+Status BufferedInputStream::Create(int64_t buffer_size, MemoryPool* pool,
+                                   std::shared_ptr<InputStream> raw,
                                    std::shared_ptr<BufferedInputStream>* out) {
   auto result =
       std::shared_ptr<BufferedInputStream>(new BufferedInputStream(std::move(raw), pool));
diff --git a/cpp/src/arrow/io/buffered.h b/cpp/src/arrow/io/buffered.h
index e7302589dd650..945915bfe998f 100644
--- a/cpp/src/arrow/io/buffered.h
+++ b/cpp/src/arrow/io/buffered.h
@@ -29,6 +29,7 @@
 
 namespace arrow {
 
+class Buffer;
 class MemoryPool;
 class Status;
 
@@ -39,12 +40,13 @@ class ARROW_EXPORT BufferedOutputStream : public OutputStream {
   ~BufferedOutputStream() override;
 
   /// \brief Create a buffered output stream wrapping the given output stream.
+  /// \param[in] buffer_size the size of the temporary write buffer
+  /// \param[in] pool a MemoryPool to use for allocations
   /// \param[in] raw another OutputStream
-  /// \param[in] buffer_size the size of the temporary buffer. Allocates from
-  /// the default memory pool
   /// \param[out] out the created BufferedOutputStream
   /// \return Status
-  static Status Create(std::shared_ptr<OutputStream> raw, int64_t buffer_size,
+  static Status Create(int64_t buffer_size, MemoryPool* pool,
+                       std::shared_ptr<OutputStream> raw,
                        std::shared_ptr<BufferedOutputStream>* out);
 
   /// \brief Resize internal buffer
@@ -78,7 +80,7 @@ class ARROW_EXPORT BufferedOutputStream : public OutputStream {
   std::shared_ptr<OutputStream> raw() const;
 
  private:
-  explicit BufferedOutputStream(std::shared_ptr<OutputStream> raw);
+  explicit BufferedOutputStream(std::shared_ptr<OutputStream> raw, MemoryPool* pool);
 
   class ARROW_NO_EXPORT Impl;
   std::unique_ptr<Impl> impl_;
@@ -93,16 +95,13 @@ class ARROW_EXPORT BufferedInputStream : public InputStream {
   ~BufferedInputStream() override;
 
   /// \brief Create a BufferedInputStream from a raw InputStream
-  /// \param[in] raw a raw InputStream
   /// \param[in] buffer_size the size of the temporary read buffer
   /// \param[in] pool a MemoryPool to use for allocations
+  /// \param[in] raw a raw InputStream
   /// \param[out] out the created BufferedInputStream
-  static Status Create(std::shared_ptr<InputStream> raw, int64_t buffer_size,
-                       MemoryPool* pool, std::shared_ptr<BufferedInputStream>* out);
-
-  /// \brief Return string_view to buffered bytes, up to the indicated
-  /// number. View becomes invalid after any operation on file
-  util::string_view Peek(int64_t nbytes) const;
+  static Status Create(int64_t buffer_size, MemoryPool* pool,
+                       std::shared_ptr<InputStream> raw,
+                       std::shared_ptr<BufferedInputStream>* out);
 
   /// \brief Resize internal read buffer; calls to Read(...) will read at least
   /// \param[in] new_buffer_size the new read buffer size
@@ -124,6 +123,7 @@ class ARROW_EXPORT BufferedInputStream : public InputStream {
   std::shared_ptr<InputStream> raw() const;
 
   // InputStream APIs
+  util::string_view Peek(int64_t nbytes) const override;
   Status Close() override;
   bool closed() const override;
 
@@ -140,8 +140,8 @@ class ARROW_EXPORT BufferedInputStream : public InputStream {
  private:
   explicit BufferedInputStream(std::shared_ptr<InputStream> raw, MemoryPool* pool);
 
-  class ARROW_NO_EXPORT BufferedInputStreamImpl;
-  std::unique_ptr<BufferedInputStreamImpl> impl_;
+  class ARROW_NO_EXPORT Impl;
+  std::unique_ptr<Impl> impl_;
 };
 
 }  // namespace io
diff --git a/cpp/src/arrow/io/io-compressed-test.cc b/cpp/src/arrow/io/compressed-test.cc
similarity index 98%
rename from cpp/src/arrow/io/io-compressed-test.cc
rename to cpp/src/arrow/io/compressed-test.cc
index 4a3b32333eb4f..ce6533ca2604c 100644
--- a/cpp/src/arrow/io/io-compressed-test.cc
+++ b/cpp/src/arrow/io/compressed-test.cc
@@ -73,7 +73,7 @@ std::shared_ptr<Buffer> CompressDataOneShot(Codec* codec,
   ABORT_NOT_OK(codec->Compress(data.size(), data.data(), max_compressed_len,
                                compressed->mutable_data(), &compressed_len));
   ABORT_NOT_OK(compressed->Resize(compressed_len));
-  return compressed;
+  return std::move(compressed);
 }
 
 Status RunCompressedInputStream(Codec* codec, std::shared_ptr<Buffer> compressed,
@@ -180,7 +180,7 @@ TEST_P(CompressedInputStreamTest, TruncatedData) {
 
 TEST_P(CompressedInputStreamTest, InvalidData) {
   auto codec = MakeCodec();
-  auto compressed_data = MakeRandomData(10000);
+  auto compressed_data = MakeRandomData(100);
 
   auto buffer_reader = std::make_shared<BufferReader>(Buffer::Wrap(compressed_data));
   std::shared_ptr<CompressedInputStream> stream;
@@ -199,12 +199,14 @@ TEST_P(CompressedInputStreamTest, InvalidData) {
 INSTANTIATE_TEST_CASE_P(TestGZipInputStream, CompressedInputStreamTest,
                         ::testing::Values(Compression::GZIP));
 
-INSTANTIATE_TEST_CASE_P(TestZSTDInputStream, CompressedInputStreamTest,
-                        ::testing::Values(Compression::ZSTD));
-
 INSTANTIATE_TEST_CASE_P(TestBrotliInputStream, CompressedInputStreamTest,
                         ::testing::Values(Compression::BROTLI));
 
+#ifdef ARROW_WITH_ZSTD
+INSTANTIATE_TEST_CASE_P(TestZSTDInputStream, CompressedInputStreamTest,
+                        ::testing::Values(Compression::ZSTD));
+#endif
+
 class CompressedOutputStreamTest : public ::testing::TestWithParam<Compression::type> {
  protected:
   Compression::type GetCompression() { return GetParam(); }
@@ -235,11 +237,13 @@ TEST_P(CompressedOutputStreamTest, RandomData) {
 INSTANTIATE_TEST_CASE_P(TestGZipOutputStream, CompressedOutputStreamTest,
                         ::testing::Values(Compression::GZIP));
 
-INSTANTIATE_TEST_CASE_P(TestZSTDOutputStream, CompressedOutputStreamTest,
-                        ::testing::Values(Compression::ZSTD));
-
 INSTANTIATE_TEST_CASE_P(TestBrotliOutputStream, CompressedOutputStreamTest,
                         ::testing::Values(Compression::BROTLI));
 
+#ifdef ARROW_WITH_ZSTD
+INSTANTIATE_TEST_CASE_P(TestZSTDOutputStream, CompressedOutputStreamTest,
+                        ::testing::Values(Compression::ZSTD));
+#endif
+
 }  // namespace io
 }  // namespace arrow
diff --git a/cpp/src/arrow/io/compressed.cc b/cpp/src/arrow/io/compressed.cc
index e5fd6b4adf4c7..1311dbc246634 100644
--- a/cpp/src/arrow/io/compressed.cc
+++ b/cpp/src/arrow/io/compressed.cc
@@ -44,7 +44,7 @@ namespace io {
 class CompressedOutputStream::Impl {
  public:
   Impl(MemoryPool* pool, Codec* codec, const std::shared_ptr<OutputStream>& raw)
-      : pool_(pool), raw_(raw), codec_(codec), is_open_(true) {}
+      : pool_(pool), raw_(raw), codec_(codec), is_open_(true), compressed_pos_(0) {}
 
   ~Impl() { DCHECK(Close().ok()); }
 
diff --git a/cpp/src/arrow/io/io-file-benchmark.cc b/cpp/src/arrow/io/file-benchmark.cc
similarity index 93%
rename from cpp/src/arrow/io/io-file-benchmark.cc
rename to cpp/src/arrow/io/file-benchmark.cc
index c57fa6d605d68..3e99ba077acb3 100644
--- a/cpp/src/arrow/io/io-file-benchmark.cc
+++ b/cpp/src/arrow/io/file-benchmark.cc
@@ -30,12 +30,18 @@
 #include <thread>
 #include <valarray>
 
+#ifndef _WIN32
+
 #include <fcntl.h>
 #include <poll.h>
 #include <unistd.h>
 
+#endif
+
 namespace arrow {
 
+#ifndef _WIN32
+
 std::string GetNullFile() { return "/dev/null"; }
 
 const std::valarray<int64_t> small_sizes = {8, 24, 33, 1, 32, 192, 16, 40};
@@ -163,7 +169,8 @@ static void BM_BufferedOutputStreamSmallWritesToNull(
   ABORT_NOT_OK(io::FileOutputStream::Open(GetNullFile(), &file));
 
   std::shared_ptr<io::BufferedOutputStream> buffered_file;
-  ABORT_NOT_OK(io::BufferedOutputStream::Create(file, kBufferSize, &buffered_file));
+  ABORT_NOT_OK(io::BufferedOutputStream::Create(kBufferSize, default_memory_pool(), file,
+                                                &buffered_file));
   BenchmarkStreamingWrites(state, small_sizes, buffered_file.get());
 }
 
@@ -196,7 +203,8 @@ static void BM_BufferedOutputStreamSmallWritesToPipe(
   SetupPipeWriter(&stream, &reader);
 
   std::shared_ptr<io::BufferedOutputStream> buffered_stream;
-  ABORT_NOT_OK(io::BufferedOutputStream::Create(stream, kBufferSize, &buffered_stream));
+  ABORT_NOT_OK(io::BufferedOutputStream::Create(kBufferSize, default_memory_pool(),
+                                                stream, &buffered_stream));
   BenchmarkStreamingWrites(state, small_sizes, buffered_stream.get(), reader.get());
 }
 
@@ -207,7 +215,8 @@ static void BM_BufferedOutputStreamLargeWritesToPipe(
   SetupPipeWriter(&stream, &reader);
 
   std::shared_ptr<io::BufferedOutputStream> buffered_stream;
-  ABORT_NOT_OK(io::BufferedOutputStream::Create(stream, kBufferSize, &buffered_stream));
+  ABORT_NOT_OK(io::BufferedOutputStream::Create(kBufferSize, default_memory_pool(),
+                                                stream, &buffered_stream));
 
   BenchmarkStreamingWrites(state, large_sizes, buffered_stream.get(), reader.get());
 }
@@ -241,4 +250,6 @@ BENCHMARK(BM_BufferedOutputStreamLargeWritesToPipe)
     ->MinTime(1.0)
     ->UseRealTime();
 
+#endif  // ifndef _WIN32
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/io/io-file-test.cc b/cpp/src/arrow/io/file-test.cc
similarity index 98%
rename from cpp/src/arrow/io/io-file-test.cc
rename to cpp/src/arrow/io/file-test.cc
index afe2c60718b0e..f329ae9d504e5 100644
--- a/cpp/src/arrow/io/io-file-test.cc
+++ b/cpp/src/arrow/io/file-test.cc
@@ -56,7 +56,7 @@ class FileTestFixture : public ::testing::Test {
 
   void EnsureFileDeleted() {
     if (FileExists(path_)) {
-      std::remove(path_.c_str());
+      ARROW_UNUSED(std::remove(path_.c_str()));
     }
   }
 
@@ -345,6 +345,15 @@ TEST_F(TestReadableFile, FromFileDescriptor) {
   ASSERT_TRUE(FileIsClosed(fd));
 }
 
+TEST_F(TestReadableFile, Peek) {
+  MakeTestFile();
+  OpenFile();
+
+  // Cannot peek
+  auto view = file_->Peek(4);
+  ASSERT_EQ(0, view.size());
+}
+
 TEST_F(TestReadableFile, SeekTellSize) {
   MakeTestFile();
   OpenFile();
@@ -451,9 +460,7 @@ class MyMemoryPool : public MemoryPool {
     *ptr = reinterpret_cast<uint8_t*>(std::realloc(*ptr, new_size));
 
     if (*ptr == NULL) {
-      std::stringstream ss;
-      ss << "realloc of size " << new_size << " failed";
-      return Status::OutOfMemory(ss.str());
+      return Status::OutOfMemory("realloc of size ", new_size, " failed");
     }
 
     return Status::OK();
@@ -461,10 +468,10 @@ class MyMemoryPool : public MemoryPool {
 
   int64_t bytes_allocated() const override { return -1; }
 
-  int64_t num_allocations() const { return num_allocations_; }
+  int64_t num_allocations() const { return num_allocations_.load(); }
 
  private:
-  int64_t num_allocations_;
+  std::atomic<int64_t> num_allocations_;
 };
 
 TEST_F(TestReadableFile, CustomMemoryPool) {
diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc
index 869d8e3720766..0398d5a1f9e80 100644
--- a/cpp/src/arrow/io/file.cc
+++ b/cpp/src/arrow/io/file.cc
@@ -479,9 +479,7 @@ class MemoryMappedFile::MemoryMap : public MutableBuffer {
     void* result = mmap(nullptr, static_cast<size_t>(initial_size), prot_flags_,
                         map_mode_, file_->fd(), 0);
     if (result == MAP_FAILED) {
-      std::stringstream ss;
-      ss << "Memory mapping file failed: " << std::strerror(errno);
-      return Status::IOError(ss.str());
+      return Status::IOError("Memory mapping file failed: ", std::strerror(errno));
     }
     size_ = capacity_ = initial_size;
     data_ = mutable_data_ = static_cast<uint8_t*>(result);
diff --git a/cpp/src/arrow/io/hdfs-internal.cc b/cpp/src/arrow/io/hdfs-internal.cc
index c8be5164cfa78..c273ab45f634f 100644
--- a/cpp/src/arrow/io/hdfs-internal.cc
+++ b/cpp/src/arrow/io/hdfs-internal.cc
@@ -218,9 +218,7 @@ static arrow::Status try_dlopen(std::vector<fs::path> potential_paths, const cha
   }
 
   if (out_handle == NULL) {
-    std::stringstream ss;
-    ss << "Unable to load " << name;
-    return arrow::Status::IOError(ss.str());
+    return arrow::Status::IOError("Unable to load ", name);
   }
 
   return arrow::Status::OK();
@@ -243,9 +241,7 @@ static arrow::Status try_dlopen(std::vector<fs::path> potential_paths, const cha
   }
 
   if (out_handle == NULL) {
-    std::stringstream ss;
-    ss << "Unable to load " << name;
-    return arrow::Status::IOError(ss.str());
+    return arrow::Status::IOError("Unable to load ", name);
   }
 
   return arrow::Status::OK();
diff --git a/cpp/src/arrow/io/io-hdfs-test.cc b/cpp/src/arrow/io/hdfs-test.cc
similarity index 96%
rename from cpp/src/arrow/io/io-hdfs-test.cc
rename to cpp/src/arrow/io/hdfs-test.cc
index c853b2012666e..08a7e13a1f8a2 100644
--- a/cpp/src/arrow/io/io-hdfs-test.cc
+++ b/cpp/src/arrow/io/hdfs-test.cc
@@ -257,6 +257,23 @@ TYPED_TEST(TestHadoopFileSystem, GetPathInfo) {
   ASSERT_EQ(size, info.size);
 }
 
+TYPED_TEST(TestHadoopFileSystem, GetPathInfoNotExist) {
+  // ARROW-2919: Test that the error message is reasonable
+  SKIP_IF_NO_DRIVER();
+
+  ASSERT_OK(this->MakeScratchDir());
+  auto path = this->ScratchPath("path-does-not-exist");
+
+  HdfsPathInfo info;
+  Status s = this->client_->GetPathInfo(path, &info);
+  ASSERT_TRUE(s.IsIOError());
+
+  const std::string error_message = s.ToString();
+
+  // Check that the file path is found in the error message
+  ASSERT_LT(error_message.find(path), std::string::npos);
+}
+
 TYPED_TEST(TestHadoopFileSystem, AppendToFile) {
   SKIP_IF_NO_DRIVER();
 
@@ -377,6 +394,8 @@ TYPED_TEST(TestHadoopFileSystem, LargeFile) {
   std::shared_ptr<HdfsReadableFile> file;
   ASSERT_OK(this->client_->OpenReadable(path, &file));
 
+  ASSERT_FALSE(file->closed());
+
   std::shared_ptr<Buffer> buffer;
   ASSERT_OK(AllocateBuffer(nullptr, size, &buffer));
 
diff --git a/cpp/src/arrow/io/hdfs.cc b/cpp/src/arrow/io/hdfs.cc
index 6f01f75eec3c1..0a50d3dcdcd90 100644
--- a/cpp/src/arrow/io/hdfs.cc
+++ b/cpp/src/arrow/io/hdfs.cc
@@ -43,14 +43,25 @@ using std::size_t;
 namespace arrow {
 namespace io {
 
-#define CHECK_FAILURE(RETURN_VALUE, WHAT)                                             \
-  do {                                                                                \
-    if (RETURN_VALUE == -1) {                                                         \
-      std::stringstream ss;                                                           \
-      ss << "HDFS " << WHAT << " failed, errno: " << errno << " (" << strerror(errno) \
-         << ")";                                                                      \
-      return Status::IOError(ss.str());                                               \
-    }                                                                                 \
+namespace {
+
+std::string TranslateErrno(int error_code) {
+  std::stringstream ss;
+  ss << error_code << " (" << strerror(error_code) << ")";
+  if (error_code == 255) {
+    // Unknown error can occur if the host is correct but the port is not
+    ss << " Please check that you are connecting to the correct HDFS RPC port";
+  }
+  return ss.str();
+}
+
+}  // namespace
+
+#define CHECK_FAILURE(RETURN_VALUE, WHAT)                                               \
+  do {                                                                                  \
+    if (RETURN_VALUE == -1) {                                                           \
+      return Status::IOError("HDFS ", WHAT, " failed, errno: ", TranslateErrno(errno)); \
+    }                                                                                   \
   } while (0)
 
 static constexpr int kDefaultHdfsBufferSize = 1 << 16;
@@ -99,6 +110,16 @@ class HdfsAnyFileImpl {
   bool is_open_;
 };
 
+namespace {
+
+Status GetPathInfoFailed(const std::string& path) {
+  std::stringstream ss;
+  ss << "Calling GetPathInfo for " << path << " failed. errno: " << TranslateErrno(errno);
+  return Status::IOError(ss.str());
+}
+
+}  // namespace
+
 // Private implementation for read-only files
 class HdfsReadableFile::HdfsReadableFileImpl : public HdfsAnyFileImpl {
  public:
@@ -180,7 +201,7 @@ class HdfsReadableFile::HdfsReadableFileImpl : public HdfsAnyFileImpl {
   Status GetSize(int64_t* size) {
     hdfsFileInfo* entry = driver_->GetPathInfo(fs_, path_.c_str());
     if (entry == nullptr) {
-      return Status::IOError("HDFS: GetPathInfo failed");
+      return GetPathInfoFailed(path_);
     }
 
     *size = entry->mSize;
@@ -204,7 +225,7 @@ HdfsReadableFile::HdfsReadableFile(MemoryPool* pool) {
   impl_.reset(new HdfsReadableFileImpl(pool));
 }
 
-HdfsReadableFile::~HdfsReadableFile() { DCHECK(impl_->Close().ok()); }
+HdfsReadableFile::~HdfsReadableFile() { DCHECK_OK(impl_->Close()); }
 
 Status HdfsReadableFile::Close() { return impl_->Close(); }
 
@@ -272,7 +293,7 @@ class HdfsOutputStream::HdfsOutputStreamImpl : public HdfsAnyFileImpl {
 
 HdfsOutputStream::HdfsOutputStream() { impl_.reset(new HdfsOutputStreamImpl()); }
 
-HdfsOutputStream::~HdfsOutputStream() { DCHECK(impl_->Close().ok()); }
+HdfsOutputStream::~HdfsOutputStream() { DCHECK_OK(impl_->Close()); }
 
 Status HdfsOutputStream::Close() { return impl_->Close(); }
 
@@ -315,7 +336,7 @@ static void SetPathInfo(const hdfsFileInfo* input, HdfsPathInfo* out) {
 // Private implementation
 class HadoopFileSystem::HadoopFileSystemImpl {
  public:
-  HadoopFileSystemImpl() {}
+  HadoopFileSystemImpl() : driver_(NULLPTR), port_(0), fs_(NULLPTR) {}
 
   Status Connect(const HdfsConnectionConfig* config) {
     if (config->driver == HdfsDriver::LIBHDFS3) {
@@ -399,7 +420,7 @@ class HadoopFileSystem::HadoopFileSystemImpl {
     hdfsFileInfo* entry = driver_->GetPathInfo(fs_, path.c_str());
 
     if (entry == nullptr) {
-      return Status::IOError("HDFS: GetPathInfo failed");
+      return GetPathInfoFailed(path);
     }
 
     SetPathInfo(entry, info);
@@ -443,10 +464,8 @@ class HadoopFileSystem::HadoopFileSystemImpl {
       if ((errno == 0) || (errno == ENOENT && Exists(path))) {
         num_entries = 0;
       } else {
-        std::stringstream ss;
-        ss << "HDFS list directory failed, errno: " << errno << " (" << strerror(errno)
-           << ")";
-        return Status::IOError(ss.str());
+        return Status::IOError("HDFS list directory failed, errno: ",
+                               TranslateErrno(errno));
       }
     }
 
@@ -469,14 +488,9 @@ class HadoopFileSystem::HadoopFileSystemImpl {
     hdfsFile handle = driver_->OpenFile(fs_, path.c_str(), O_RDONLY, buffer_size, 0, 0);
 
     if (handle == nullptr) {
-      std::stringstream ss;
-      if (!Exists(path)) {
-        ss << "HDFS file does not exist: " << path;
-      } else {
-        // TODO(wesm): determine other causes of failure
-        ss << "HDFS path exists, but opening file failed: " << path;
-      }
-      return Status::IOError(ss.str());
+      const char* msg = !Exists(path) ? "HDFS file does not exist: "
+                                      : "HDFS path exists, but opening file failed: ";
+      return Status::IOError(msg, path);
     }
 
     // std::make_shared does not work with private ctors
@@ -498,10 +512,7 @@ class HadoopFileSystem::HadoopFileSystemImpl {
                           static_cast<tSize>(default_block_size));
 
     if (handle == nullptr) {
-      // TODO(wesm): determine cause of failure
-      std::stringstream ss;
-      ss << "Unable to open file " << path;
-      return Status::IOError(ss.str());
+      return Status::IOError("Unable to open file ", path);
     }
 
     // std::make_shared does not work with private ctors
diff --git a/cpp/src/arrow/io/interfaces.cc b/cpp/src/arrow/io/interfaces.cc
index ccabd475997a7..94e8fe6f43f0d 100644
--- a/cpp/src/arrow/io/interfaces.cc
+++ b/cpp/src/arrow/io/interfaces.cc
@@ -22,6 +22,7 @@
 #include <mutex>
 
 #include "arrow/status.h"
+#include "arrow/util/string_view.h"
 
 namespace arrow {
 namespace io {
@@ -33,6 +34,10 @@ Status InputStream::Advance(int64_t nbytes) {
   return Read(nbytes, &temp);
 }
 
+util::string_view InputStream::Peek(int64_t ARROW_ARG_UNUSED(nbytes)) const {
+  return util::string_view(nullptr, 0);
+}
+
 bool InputStream::supports_zero_copy() const { return false; }
 
 struct RandomAccessFile::RandomAccessFileImpl {
diff --git a/cpp/src/arrow/io/interfaces.h b/cpp/src/arrow/io/interfaces.h
index b6ba59bd247f9..7104affaed77c 100644
--- a/cpp/src/arrow/io/interfaces.h
+++ b/cpp/src/arrow/io/interfaces.h
@@ -24,6 +24,7 @@
 #include <vector>
 
 #include "arrow/util/macros.h"
+#include "arrow/util/string_view.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -121,6 +122,13 @@ class ARROW_EXPORT InputStream : virtual public FileInterface, virtual public Re
   /// \return Status
   Status Advance(int64_t nbytes);
 
+  /// \brief Return string_view to any buffered bytes, up to the indicated
+  /// number. View becomes invalid after any operation on file. If the
+  /// InputStream is unbuffered, returns 0-length string_view
+  /// \param[in] nbytes the maximum number of bytes to see
+  /// \return arrow::util::string_view
+  virtual util::string_view Peek(int64_t nbytes) const;
+
   /// \brief Return true if InputStream is capable of zero copy Buffer reads
   virtual bool supports_zero_copy() const;
 
diff --git a/cpp/src/arrow/io/io-memory-benchmark.cc b/cpp/src/arrow/io/io-memory-benchmark.cc
deleted file mode 100644
index 72a5dc8ac2a7f..0000000000000
--- a/cpp/src/arrow/io/io-memory-benchmark.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/api.h"
-#include "arrow/io/memory.h"
-#include "arrow/test-util.h"
-
-#include "benchmark/benchmark.h"
-
-#include <iostream>
-
-namespace arrow {
-
-static void BM_SerialMemcopy(benchmark::State& state) {  // NOLINT non-const reference
-  constexpr int64_t kTotalSize = 100 * 1024 * 1024;      // 100MB
-
-  std::shared_ptr<Buffer> buffer1, buffer2;
-  ABORT_NOT_OK(AllocateBuffer(kTotalSize, &buffer1));
-  ABORT_NOT_OK(AllocateBuffer(kTotalSize, &buffer2));
-  random_bytes(kTotalSize, 0, buffer2->mutable_data());
-
-  while (state.KeepRunning()) {
-    io::FixedSizeBufferWriter writer(buffer1);
-    ABORT_NOT_OK(writer.Write(buffer2->data(), buffer2->size()));
-  }
-  state.SetBytesProcessed(int64_t(state.iterations()) * kTotalSize);
-}
-
-static void BM_ParallelMemcopy(benchmark::State& state) {  // NOLINT non-const reference
-  constexpr int64_t kTotalSize = 100 * 1024 * 1024;        // 100MB
-
-  std::shared_ptr<Buffer> buffer1, buffer2;
-  ABORT_NOT_OK(AllocateBuffer(kTotalSize, &buffer1));
-  ABORT_NOT_OK(AllocateBuffer(kTotalSize, &buffer2));
-
-  random_bytes(kTotalSize, 0, buffer2->mutable_data());
-
-  while (state.KeepRunning()) {
-    io::FixedSizeBufferWriter writer(buffer1);
-    writer.set_memcopy_threads(4);
-    ABORT_NOT_OK(writer.Write(buffer2->data(), buffer2->size()));
-  }
-  state.SetBytesProcessed(int64_t(state.iterations()) * kTotalSize);
-}
-
-BENCHMARK(BM_SerialMemcopy)->MinTime(1.0)->Repetitions(2)->UseRealTime();
-
-BENCHMARK(BM_ParallelMemcopy)->MinTime(1.0)->Repetitions(2)->UseRealTime();
-
-}  // namespace arrow
diff --git a/cpp/src/arrow/io/memory-benchmark.cc b/cpp/src/arrow/io/memory-benchmark.cc
new file mode 100644
index 0000000000000..b36be4de1639c
--- /dev/null
+++ b/cpp/src/arrow/io/memory-benchmark.cc
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#else
+#include <immintrin.h>
+#endif
+
+#include <iostream>
+
+#include "arrow/api.h"
+#include "arrow/io/memory.h"
+#include "arrow/test-util.h"
+#include "arrow/util/cpu-info.h"
+
+#include "benchmark/benchmark.h"
+
+namespace arrow {
+
+static const int kNumCores = internal::CpuInfo::GetInstance()->num_cores();
+constexpr size_t kMemoryPerCore = 32 * 1024 * 1024;
+using BufferPtr = std::shared_ptr<Buffer>;
+
+using VectorType = __m128i;
+
+// See http://codearcana.com/posts/2013/05/18/achieving-maximum-memory-bandwidth.html
+// for the usage of stream loads/writes. Or section 6.1, page 47 of
+// https://akkadia.org/drepper/cpumemory.pdf .
+
+static void Read(void* src, void* dst, size_t size) {
+  auto simd = static_cast<VectorType*>(src);
+  (void)dst;
+
+  for (size_t i = 0; i < size / sizeof(VectorType); i++)
+    benchmark::DoNotOptimize(_mm_stream_load_si128(&simd[i]));
+}
+
+static void Write(void* src, void* dst, size_t size) {
+  auto simd = static_cast<VectorType*>(dst);
+  const VectorType ones = _mm_set1_epi32(1);
+  (void)src;
+
+  for (size_t i = 0; i < size / sizeof(VectorType); i++) _mm_stream_si128(&simd[i], ones);
+}
+
+static void ReadWrite(void* src, void* dst, size_t size) {
+  auto src_simd = static_cast<VectorType*>(src);
+  auto dst_simd = static_cast<VectorType*>(dst);
+
+  for (size_t i = 0; i < size / sizeof(VectorType); i++)
+    _mm_stream_si128(&dst_simd[i], _mm_stream_load_si128(&src_simd[i]));
+}
+
+using ApplyFn = decltype(Read);
+
+template <ApplyFn Apply>
+static void MemoryBandwidth(benchmark::State& state) {  // NOLINT non-const reference
+  const size_t buffer_size = kMemoryPerCore;
+  BufferPtr src, dst;
+
+  ABORT_NOT_OK(AllocateBuffer(buffer_size, &src));
+  ABORT_NOT_OK(AllocateBuffer(buffer_size, &dst));
+  random_bytes(buffer_size, 0, src->mutable_data());
+
+  while (state.KeepRunning()) {
+    Apply(src->mutable_data(), dst->mutable_data(), buffer_size);
+  }
+
+  state.SetBytesProcessed(state.iterations() * buffer_size);
+}
+
+// `UseRealTime` is required due to threads, otherwise the cumulative CPU time
+// is used which will skew the results by the number of threads.
+BENCHMARK_TEMPLATE(MemoryBandwidth, Read)->ThreadRange(1, kNumCores)->UseRealTime();
+BENCHMARK_TEMPLATE(MemoryBandwidth, Write)->ThreadRange(1, kNumCores)->UseRealTime();
+BENCHMARK_TEMPLATE(MemoryBandwidth, ReadWrite)->ThreadRange(1, kNumCores)->UseRealTime();
+
+static void ParallelMemoryCopy(benchmark::State& state) {  // NOLINT non-const reference
+  const int64_t n_threads = state.range(0);
+  const int64_t buffer_size = kMemoryPerCore;
+
+  std::shared_ptr<Buffer> src, dst;
+  ABORT_NOT_OK(AllocateBuffer(buffer_size, &src));
+  ABORT_NOT_OK(AllocateBuffer(buffer_size, &dst));
+
+  random_bytes(buffer_size, 0, src->mutable_data());
+
+  while (state.KeepRunning()) {
+    io::FixedSizeBufferWriter writer(dst);
+    writer.set_memcopy_threads(static_cast<int>(n_threads));
+    ABORT_NOT_OK(writer.Write(src->data(), src->size()));
+  }
+
+  state.SetBytesProcessed(int64_t(state.iterations()) * buffer_size);
+  state.counters["threads"] = static_cast<double>(n_threads);
+}
+
+BENCHMARK(ParallelMemoryCopy)->RangeMultiplier(2)->Range(1, kNumCores)->UseRealTime();
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/io/io-memory-test.cc b/cpp/src/arrow/io/memory-test.cc
similarity index 86%
rename from cpp/src/arrow/io/io-memory-test.cc
rename to cpp/src/arrow/io/memory-test.cc
index fa90c1f141bd3..ecd920b854c69 100644
--- a/cpp/src/arrow/io/io-memory-test.cc
+++ b/cpp/src/arrow/io/memory-test.cc
@@ -139,11 +139,29 @@ TEST(TestFixedSizeBufferWriter, Basics) {
   ASSERT_OK(writer.Close());
 }
 
+TEST(TestBufferReader, FromStrings) {
+  // ARROW-3291: construct BufferReader from std::string or
+  // arrow::util::string_view
+
+  std::string data = "data123456";
+  auto view = util::string_view(data);
+
+  BufferReader reader1(data);
+  BufferReader reader2(view);
+
+  std::shared_ptr<Buffer> piece;
+  ASSERT_OK(reader1.Read(4, &piece));
+  ASSERT_EQ(0, memcmp(piece->data(), data.data(), 4));
+
+  ASSERT_OK(reader2.Seek(2));
+  ASSERT_OK(reader2.Read(4, &piece));
+  ASSERT_EQ(0, memcmp(piece->data(), data.data() + 2, 4));
+}
+
 TEST(TestBufferReader, Seeking) {
   std::string data = "data123456";
 
-  auto buffer = std::make_shared<Buffer>(data);
-  BufferReader reader(buffer);
+  BufferReader reader(data);
   int64_t pos;
   ASSERT_OK(reader.Tell(&pos));
   ASSERT_EQ(pos, 0);
@@ -161,6 +179,21 @@ TEST(TestBufferReader, Seeking) {
   ASSERT_EQ(pos, 10);
 }
 
+TEST(TestBufferReader, Peek) {
+  std::string data = "data123456";
+
+  BufferReader reader(std::make_shared<Buffer>(data));
+
+  auto view = reader.Peek(4);
+
+  ASSERT_EQ(4, view.size());
+  ASSERT_EQ(data.substr(0, 4), view.to_string());
+
+  view = reader.Peek(20);
+  ASSERT_EQ(data.size(), view.size());
+  ASSERT_EQ(data, view.to_string());
+}
+
 TEST(TestBufferReader, RetainParentReference) {
   // ARROW-387
   std::string data = "data123456";
diff --git a/cpp/src/arrow/io/memory.cc b/cpp/src/arrow/io/memory.cc
index 8a79f6bb94dfd..6afafbc25dc04 100644
--- a/cpp/src/arrow/io/memory.cc
+++ b/cpp/src/arrow/io/memory.cc
@@ -287,6 +287,12 @@ Status BufferReader::Tell(int64_t* position) const {
   return Status::OK();
 }
 
+util::string_view BufferReader::Peek(int64_t nbytes) const {
+  const int64_t bytes_available = std::min(nbytes, size_ - position_);
+  return util::string_view(reinterpret_cast<const char*>(data_) + position_,
+                           static_cast<size_t>(bytes_available));
+}
+
 bool BufferReader::supports_zero_copy() const { return true; }
 
 Status BufferReader::ReadAt(int64_t position, int64_t nbytes, int64_t* bytes_read,
diff --git a/cpp/src/arrow/io/memory.h b/cpp/src/arrow/io/memory.h
index 7b29800762c8f..cf73def3decfd 100644
--- a/cpp/src/arrow/io/memory.h
+++ b/cpp/src/arrow/io/memory.h
@@ -25,6 +25,7 @@
 
 #include "arrow/io/interfaces.h"
 #include "arrow/memory_pool.h"
+#include "arrow/util/string_view.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -133,6 +134,12 @@ class ARROW_EXPORT BufferReader : public RandomAccessFile {
   explicit BufferReader(const Buffer& buffer);
   BufferReader(const uint8_t* data, int64_t size);
 
+  /// \brief Instantiate from std::string or arrow::util::string_view. Does not
+  /// own data
+  explicit BufferReader(const util::string_view& data)
+      : BufferReader(reinterpret_cast<const uint8_t*>(data.data()),
+                     static_cast<int64_t>(data.size())) {}
+
   Status Close() override;
   bool closed() const override;
   Status Tell(int64_t* position) const override;
@@ -140,6 +147,10 @@ class ARROW_EXPORT BufferReader : public RandomAccessFile {
   // Zero copy read
   Status Read(int64_t nbytes, std::shared_ptr<Buffer>* out) override;
 
+  util::string_view Peek(int64_t nbytes) const override;
+
+  bool supports_zero_copy() const override;
+
   Status ReadAt(int64_t position, int64_t nbytes, int64_t* bytes_read,
                 void* out) override;
   Status ReadAt(int64_t position, int64_t nbytes, std::shared_ptr<Buffer>* out) override;
@@ -147,8 +158,6 @@ class ARROW_EXPORT BufferReader : public RandomAccessFile {
   Status GetSize(int64_t* size) override;
   Status Seek(int64_t position) override;
 
-  bool supports_zero_copy() const override;
-
   std::shared_ptr<Buffer> buffer() const { return buffer_; }
 
  protected:
diff --git a/cpp/src/arrow/io/io-readahead-test.cc b/cpp/src/arrow/io/readahead-test.cc
similarity index 81%
rename from cpp/src/arrow/io/io-readahead-test.cc
rename to cpp/src/arrow/io/readahead-test.cc
index 1e5d02abd2f03..6575e898590d8 100644
--- a/cpp/src/arrow/io/io-readahead-test.cc
+++ b/cpp/src/arrow/io/readahead-test.cc
@@ -21,6 +21,7 @@
 #include <cstring>
 #include <functional>
 #include <memory>
+#include <mutex>
 #include <set>
 #include <string>
 #include <thread>
@@ -45,6 +46,51 @@ using internal::checked_cast;
 namespace io {
 namespace internal {
 
+class LockedInputStream : public InputStream {
+ public:
+  explicit LockedInputStream(const std::shared_ptr<InputStream>& stream)
+      : stream_(stream) {}
+
+  Status Close() override {
+    std::lock_guard<std::mutex> lock(mutex_);
+    return stream_->Close();
+  }
+
+  bool closed() const override {
+    std::lock_guard<std::mutex> lock(mutex_);
+    return stream_->closed();
+  }
+
+  Status Tell(int64_t* position) const override {
+    std::lock_guard<std::mutex> lock(mutex_);
+    return stream_->Tell(position);
+  }
+
+  Status Read(int64_t nbytes, int64_t* bytes_read, void* buffer) override {
+    std::lock_guard<std::mutex> lock(mutex_);
+    return stream_->Read(nbytes, bytes_read, buffer);
+  }
+
+  Status Read(int64_t nbytes, std::shared_ptr<Buffer>* out) override {
+    std::lock_guard<std::mutex> lock(mutex_);
+    return stream_->Read(nbytes, out);
+  }
+
+  bool supports_zero_copy() const override {
+    std::lock_guard<std::mutex> lock(mutex_);
+    return stream_->supports_zero_copy();
+  }
+
+  util::string_view Peek(int64_t nbytes) const override {
+    std::lock_guard<std::mutex> lock(mutex_);
+    return stream_->Peek(nbytes);
+  }
+
+ protected:
+  std::shared_ptr<InputStream> stream_;
+  mutable std::mutex mutex_;
+};
+
 static void sleep_for(double seconds) {
   std::this_thread::sleep_for(
       std::chrono::nanoseconds(static_cast<int64_t>(seconds * 1e9)));
@@ -57,14 +103,13 @@ static void busy_wait(double seconds, std::function<bool()> predicate) {
   }
 }
 
-std::shared_ptr<BufferReader> DataReader(const std::string& data) {
+std::shared_ptr<InputStream> DataReader(const std::string& data) {
   std::shared_ptr<Buffer> buffer;
-  ABORT_NOT_OK(AllocateBuffer(data.length(), &buffer));
-  memcpy(buffer->mutable_data(), data.data(), data.length());
-  return std::make_shared<BufferReader>(std::move(buffer));
+  ABORT_NOT_OK(Buffer::FromString(data, &buffer));
+  return std::make_shared<LockedInputStream>(std::make_shared<BufferReader>(buffer));
 }
 
-static int64_t WaitForPosition(const RandomAccessFile& file, int64_t expected,
+static int64_t WaitForPosition(const FileInterface& file, int64_t expected,
                                double seconds = 0.2) {
   int64_t pos = -1;
   busy_wait(seconds, [&]() -> bool {
@@ -74,12 +119,12 @@ static int64_t WaitForPosition(const RandomAccessFile& file, int64_t expected,
   return pos;
 }
 
-static void AssertEventualPosition(const RandomAccessFile& file, int64_t expected) {
+static void AssertEventualPosition(const FileInterface& file, int64_t expected) {
   int64_t pos = WaitForPosition(file, expected);
   ASSERT_EQ(pos, expected) << "File didn't reach expected position";
 }
 
-static void AssertPosition(const RandomAccessFile& file, int64_t expected) {
+static void AssertPosition(const FileInterface& file, int64_t expected) {
   int64_t pos = -1;
   ABORT_NOT_OK(file.Tell(&pos));
   ASSERT_EQ(pos, expected) << "File didn't reach expected position";
diff --git a/cpp/src/arrow/io/readahead.cc b/cpp/src/arrow/io/readahead.cc
index 89db6a66e8c8d..4222f87a5ca3b 100644
--- a/cpp/src/arrow/io/readahead.cc
+++ b/cpp/src/arrow/io/readahead.cc
@@ -162,11 +162,13 @@ class ReadaheadSpooler::Impl {
     int64_t bytes_read;
     RETURN_NOT_OK(AllocateResizableBuffer(
         pool_, read_size_ + buf->left_padding + buf->right_padding, &buffer));
+    DCHECK_NE(buffer->mutable_data(), nullptr);
     RETURN_NOT_OK(
         raw_->Read(read_size_, &bytes_read, buffer->mutable_data() + buf->left_padding));
     if (bytes_read < read_size_) {
       // Got a short read
       RETURN_NOT_OK(buffer->Resize(bytes_read + buf->left_padding + buf->right_padding));
+      DCHECK_NE(buffer->mutable_data(), nullptr);
     }
     // Zero padding areas
     memset(buffer->mutable_data(), 0, buf->left_padding);
diff --git a/cpp/src/arrow/io/test-common.h b/cpp/src/arrow/io/test-common.h
index fa9145259b182..d33e101175633 100644
--- a/cpp/src/arrow/io/test-common.h
+++ b/cpp/src/arrow/io/test-common.h
@@ -25,16 +25,11 @@
 #include <string>
 #include <vector>
 
-#ifndef _MSC_VER
-#include <fcntl.h>
-#endif
-
-#if defined(__MINGW32__)  // MinGW
-// nothing
-#elif defined(_MSC_VER)  // Visual Studio
+#ifdef _WIN32
+#include <crtdbg.h>
 #include <io.h>
-#else  // POSIX / Linux
-// nothing
+#else
+#include <fcntl.h>
 #endif
 
 #include "arrow/buffer.h"
@@ -64,7 +59,7 @@ static inline bool FileExists(const std::string& path) {
   return std::ifstream(path.c_str()).good();
 }
 
-#if defined(_MSC_VER)
+#if defined(_WIN32)
 static inline void InvalidParamHandler(const wchar_t* expr, const wchar_t* func,
                                        const wchar_t* source_file,
                                        unsigned int source_line, uintptr_t reserved) {
@@ -74,7 +69,7 @@ static inline void InvalidParamHandler(const wchar_t* expr, const wchar_t* func,
 #endif
 
 static inline bool FileIsClosed(int fd) {
-#if defined(_MSC_VER)
+#if defined(_WIN32)
   // Disables default behavior on wrong params which causes the application to crash
   // https://msdn.microsoft.com/en-us/library/ksazx244.aspx
   _set_invalid_parameter_handler(InvalidParamHandler);
@@ -118,7 +113,7 @@ class MemoryMapFixture {
  public:
   void TearDown() {
     for (auto path : tmp_files_) {
-      std::remove(path.c_str());
+      ARROW_UNUSED(std::remove(path.c_str()));
     }
   }
 
diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
index 13ed9b9e58060..796758252979e 100644
--- a/cpp/src/arrow/ipc/CMakeLists.txt
+++ b/cpp/src/arrow/ipc/CMakeLists.txt
@@ -15,26 +15,34 @@
 # specific language governing permissions and limitations
 # under the License.
 
+# Targets required for protocol integration testing
+add_custom_target(integration)
+add_dependencies(arrow-tests integration)
+
 #######################################
 # Messaging and interprocess communication
 
 ADD_ARROW_TEST(feather-test)
-ADD_ARROW_TEST(ipc-read-write-test)
-ADD_ARROW_TEST(ipc-json-test)
+ADD_ARROW_TEST(read-write-test
+  PREFIX "arrow-ipc")
+ADD_ARROW_TEST(json-simple-test
+  PREFIX "arrow-ipc")
+ADD_ARROW_TEST(json-test
+  PREFIX "arrow-ipc")
 
 if (NOT ARROW_BOOST_HEADER_ONLY)
   ADD_ARROW_TEST(json-integration-test
     EXTRA_LINK_LIBS gflags_static)
 
   # Test is being built
-  if (TARGET json-integration-test)
+  if (TARGET arrow-json-integration-test)
+    add_dependencies(integration arrow-json-integration-test)
     if (UNIX)
       if (APPLE)
-        set_target_properties(json-integration-test
+        set_target_properties(arrow-json-integration-test
           PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
       else()
-        target_link_libraries(json-integration-test
-          pthread)
+        target_link_libraries(arrow-json-integration-test PRIVATE pthread)
       endif()
     endif()
   endif()
@@ -45,17 +53,17 @@ set_source_files_properties(Message_generated.h PROPERTIES GENERATED TRUE)
 set_source_files_properties(feather_generated.h PROPERTIES GENERATED TRUE)
 set_source_files_properties(File_generated.h PROPERTIES GENERATED TRUE)
 
-set(OUTPUT_DIR ${CMAKE_BINARY_DIR}/src/arrow/ipc)
+set(OUTPUT_DIR ${ARROW_BINARY_DIR}/src/arrow/ipc)
 set(FBS_OUTPUT_FILES
   "${OUTPUT_DIR}/File_generated.h"
   "${OUTPUT_DIR}/Message_generated.h"
   "${OUTPUT_DIR}/feather_generated.h")
 
 set(FBS_SRC
-  ${CMAKE_SOURCE_DIR}/../format/Message.fbs
-  ${CMAKE_SOURCE_DIR}/../format/File.fbs
-  ${CMAKE_SOURCE_DIR}/../format/Schema.fbs
-  ${CMAKE_SOURCE_DIR}/../format/Tensor.fbs
+  ${ARROW_SOURCE_DIR}/../format/Message.fbs
+  ${ARROW_SOURCE_DIR}/../format/File.fbs
+  ${ARROW_SOURCE_DIR}/../format/Schema.fbs
+  ${ARROW_SOURCE_DIR}/../format/Tensor.fbs
   ${CMAKE_CURRENT_SOURCE_DIR}/feather.fbs)
 
 foreach(FIL ${FBS_SRC})
@@ -80,15 +88,7 @@ add_custom_command(
 add_custom_target(metadata_fbs DEPENDS ${FBS_OUTPUT_FILES})
 
 # Headers: top level
-install(FILES
-  api.h
-  dictionary.h
-  feather.h
-  json.h
-  message.h
-  reader.h
-  writer.h
-  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/ipc")
+ARROW_INSTALL_ALL_HEADERS("arrow/ipc")
 
 if (ARROW_BUILD_STATIC)
   set(ARROW_UTIL_LIB arrow_static)
@@ -109,12 +109,15 @@ if(NOT WIN32)
 endif()
 
 if (ARROW_BUILD_UTILITIES)
-  add_executable(file-to-stream file-to-stream.cc)
-  target_link_libraries(file-to-stream ${UTIL_LINK_LIBS})
-  add_executable(stream-to-file stream-to-file.cc)
-  target_link_libraries(stream-to-file ${UTIL_LINK_LIBS})
-endif()
+  add_executable(arrow-file-to-stream file-to-stream.cc)
+  target_link_libraries(arrow-file-to-stream ${UTIL_LINK_LIBS})
+  add_executable(arrow-stream-to-file stream-to-file.cc)
+  target_link_libraries(arrow-stream-to-file ${UTIL_LINK_LIBS})
 
-ADD_ARROW_BENCHMARK(ipc-read-write-benchmark)
+  add_dependencies(integration arrow-file-to-stream)
+  add_dependencies(integration arrow-stream-to-file)
+endif()
 
+ADD_ARROW_BENCHMARK(read-write-benchmark
+  PREFIX "arrow-ipc")
 ADD_ARROW_FUZZING(ipc-fuzzing-test)
diff --git a/cpp/src/arrow/ipc/dictionary.cc b/cpp/src/arrow/ipc/dictionary.cc
index 488bb75b9d75f..aa0d9085f5a8f 100644
--- a/cpp/src/arrow/ipc/dictionary.cc
+++ b/cpp/src/arrow/ipc/dictionary.cc
@@ -34,9 +34,7 @@ Status DictionaryMemo::GetDictionary(int64_t id,
                                      std::shared_ptr<Array>* dictionary) const {
   auto it = id_to_dictionary_.find(id);
   if (it == id_to_dictionary_.end()) {
-    std::stringstream ss;
-    ss << "Dictionary with id " << id << " not found";
-    return Status::KeyError(ss.str());
+    return Status::KeyError("Dictionary with id ", id, " not found");
   }
   *dictionary = it->second;
   return Status::OK();
@@ -70,9 +68,7 @@ bool DictionaryMemo::HasDictionaryId(int64_t id) const {
 Status DictionaryMemo::AddDictionary(int64_t id,
                                      const std::shared_ptr<Array>& dictionary) {
   if (HasDictionaryId(id)) {
-    std::stringstream ss;
-    ss << "Dictionary with id " << id << " already exists";
-    return Status::KeyError(ss.str());
+    return Status::KeyError("Dictionary with id ", id, " already exists");
   }
   intptr_t address = reinterpret_cast<intptr_t>(dictionary.get());
   id_to_dictionary_[id] = dictionary;
diff --git a/cpp/src/arrow/ipc/feather-internal.h b/cpp/src/arrow/ipc/feather-internal.h
index 90512dd117238..2aa04b2db72ba 100644
--- a/cpp/src/arrow/ipc/feather-internal.h
+++ b/cpp/src/arrow/ipc/feather-internal.h
@@ -119,7 +119,7 @@ class ARROW_EXPORT TableBuilder {
 
 class ARROW_EXPORT TableMetadata {
  public:
-  TableMetadata() {}
+  TableMetadata() : table_(NULLPTR) {}
   ~TableMetadata() = default;
 
   Status Open(const std::shared_ptr<Buffer>& buffer) {
diff --git a/cpp/src/arrow/ipc/feather-test.cc b/cpp/src/arrow/ipc/feather-test.cc
index d032710b0be7c..8139c47e09fca 100644
--- a/cpp/src/arrow/ipc/feather-test.cc
+++ b/cpp/src/arrow/ipc/feather-test.cc
@@ -30,6 +30,7 @@
 #include "arrow/pretty_print.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
+#include "arrow/table.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
@@ -289,7 +290,7 @@ class TestTableReader : public ::testing::Test {
 
     ASSERT_OK(stream_->Finish(&output_));
 
-    std::shared_ptr<io::BufferReader> buffer(new io::BufferReader(output_));
+    auto buffer = std::make_shared<io::BufferReader>(output_);
     ASSERT_OK(TableReader::Open(buffer, &reader_));
   }
 
@@ -364,7 +365,7 @@ class TestTableWriter : public ::testing::Test {
 
     ASSERT_OK(stream_->Finish(&output_));
 
-    std::shared_ptr<io::BufferReader> buffer(new io::BufferReader(output_));
+    auto buffer = std::make_shared<io::BufferReader>(output_);
     ASSERT_OK(TableReader::Open(buffer, &reader_));
   }
 
diff --git a/cpp/src/arrow/ipc/feather.cc b/cpp/src/arrow/ipc/feather.cc
index ebdb335fa57f7..d28bf7512999a 100644
--- a/cpp/src/arrow/ipc/feather.cc
+++ b/cpp/src/arrow/ipc/feather.cc
@@ -180,6 +180,7 @@ ColumnBuilder::ColumnBuilder(TableBuilder* parent, const std::string& name)
   fbb_ = &parent->fbb();
   name_ = name;
   type_ = ColumnType::PRIMITIVE;
+  meta_time_.unit = TimeUnit::SECOND;
 }
 
 flatbuffers::Offset<void> ColumnBuilder::CreateColumnMetadata() {
@@ -642,9 +643,7 @@ class TableWriter::TableWriterImpl : public ArrayVisitor {
 
   Status LoadArrayMetadata(const Array& values, ArrayMetadata* meta) {
     if (!(is_primitive(values.type_id()) || is_binary_like(values.type_id()))) {
-      std::stringstream ss;
-      ss << "Array is not primitive type: " << values.type()->ToString();
-      return Status::Invalid(ss.str());
+      return Status::Invalid("Array is not primitive type: ", values.type()->ToString());
     }
 
     meta->type = ToFlatbufferType(values.type_id());
diff --git a/cpp/src/arrow/ipc/json-integration-test.cc b/cpp/src/arrow/ipc/json-integration-test.cc
index 3e71415c69654..fe69a53a944c7 100644
--- a/cpp/src/arrow/ipc/json-integration-test.cc
+++ b/cpp/src/arrow/ipc/json-integration-test.cc
@@ -170,10 +170,8 @@ static Status ValidateArrowVsJson(const std::string& arrow_path,
   const int arrow_nbatches = arrow_reader->num_record_batches();
 
   if (json_nbatches != arrow_nbatches) {
-    std::stringstream ss;
-    ss << "Different number of record batches: " << json_nbatches << " (JSON) vs "
-       << arrow_nbatches << " (Arrow)";
-    return Status::Invalid(ss.str());
+    return Status::Invalid("Different number of record batches: ", json_nbatches,
+                           " (JSON) vs ", arrow_nbatches, " (Arrow)");
   }
 
   std::shared_ptr<RecordBatch> arrow_batch;
@@ -231,9 +229,7 @@ Status RunCommand(const std::string& json_path, const std::string& arrow_path,
 
     return ValidateArrowVsJson(arrow_path, json_path);
   } else {
-    std::stringstream ss;
-    ss << "Unknown command: " << command;
-    return Status::Invalid(ss.str());
+    return Status::Invalid("Unknown command: ", command);
   }
 }
 
@@ -262,7 +258,7 @@ class TestJSONIntegration : public ::testing::Test {
 
   void TearDown() {
     for (const std::string path : tmp_paths_) {
-      std::remove(path.c_str());
+      ARROW_UNUSED(std::remove(path.c_str()));
     }
   }
 
diff --git a/cpp/src/arrow/ipc/json-internal.cc b/cpp/src/arrow/ipc/json-internal.cc
index d5a5dd9f397db..05e547506c596 100644
--- a/cpp/src/arrow/ipc/json-internal.cc
+++ b/cpp/src/arrow/ipc/json-internal.cc
@@ -633,9 +633,7 @@ static Status GetInteger(const rj::Value::ConstObject& json_type,
       *type = is_signed ? int64() : uint64();
       break;
     default:
-      std::stringstream ss;
-      ss << "Invalid bit width: " << bit_width;
-      return Status::Invalid(ss.str());
+      return Status::Invalid("Invalid bit width: ", bit_width);
   }
   return Status::OK();
 }
@@ -654,9 +652,7 @@ static Status GetFloatingPoint(const RjObject& json_type,
   } else if (precision == "HALF") {
     *type = float16();
   } else {
-    std::stringstream ss;
-    ss << "Invalid precision: " << precision;
-    return Status::Invalid(ss.str());
+    return Status::Invalid("Invalid precision: ", precision);
   }
   return Status::OK();
 }
@@ -693,9 +689,7 @@ static Status GetDate(const RjObject& json_type, std::shared_ptr<DataType>* type
   } else if (unit_str == "MILLISECOND") {
     *type = date64();
   } else {
-    std::stringstream ss;
-    ss << "Invalid date unit: " << unit_str;
-    return Status::Invalid(ss.str());
+    return Status::Invalid("Invalid date unit: ", unit_str);
   }
   return Status::OK();
 }
@@ -718,9 +712,7 @@ static Status GetTime(const RjObject& json_type, std::shared_ptr<DataType>* type
   } else if (unit_str == "NANOSECOND") {
     *type = time64(TimeUnit::NANO);
   } else {
-    std::stringstream ss;
-    ss << "Invalid time unit: " << unit_str;
-    return Status::Invalid(ss.str());
+    return Status::Invalid("Invalid time unit: ", unit_str);
   }
 
   const auto& fw_type = checked_cast<const FixedWidthType&>(**type);
@@ -749,9 +741,7 @@ static Status GetTimestamp(const RjObject& json_type, std::shared_ptr<DataType>*
   } else if (unit_str == "NANOSECOND") {
     unit = TimeUnit::NANO;
   } else {
-    std::stringstream ss;
-    ss << "Invalid time unit: " << unit_str;
-    return Status::Invalid(ss.str());
+    return Status::Invalid("Invalid time unit: ", unit_str);
   }
 
   const auto& it_tz = json_type.FindMember("timezone");
@@ -778,9 +768,7 @@ static Status GetUnion(const RjObject& json_type,
   } else if (mode_str == "DENSE") {
     mode = UnionMode::DENSE;
   } else {
-    std::stringstream ss;
-    ss << "Invalid union mode: " << mode_str;
-    return Status::Invalid(ss.str());
+    return Status::Invalid("Invalid union mode: ", mode_str);
   }
 
   const auto& it_type_codes = json_type.FindMember("typeIds");
@@ -838,9 +826,7 @@ static Status GetType(const RjObject& json_type,
   } else if (type_name == "union") {
     return GetUnion(json_type, children, type);
   } else {
-    std::stringstream ss;
-    ss << "Unrecognized type name: " << type_name;
-    return Status::Invalid(ss.str());
+    return Status::Invalid("Unrecognized type name: ", type_name);
   }
   return Status::OK();
 }
@@ -1235,10 +1221,8 @@ class ArrayReader {
     const auto& json_children_arr = json_children->value.GetArray();
 
     if (type.num_children() != static_cast<int>(json_children_arr.Size())) {
-      std::stringstream ss;
-      ss << "Expected " << type.num_children() << " children, but got "
-         << json_children_arr.Size();
-      return Status::Invalid(ss.str());
+      return Status::Invalid("Expected ", type.num_children(), " children, but got ",
+                             json_children_arr.Size());
     }
 
     for (int i = 0; i < static_cast<int>(json_children_arr.Size()); ++i) {
@@ -1342,9 +1326,7 @@ static Status ReadDictionary(const RjObject& obj, const DictionaryTypeMap& id_to
 
   auto it = id_to_field.find(id);
   if (it == id_to_field.end()) {
-    std::stringstream ss;
-    ss << "No dictionary with id " << id;
-    return Status::Invalid(ss.str());
+    return Status::Invalid("No dictionary with id ", id);
   }
   std::vector<std::shared_ptr<Field>> fields = {it->second};
 
@@ -1489,9 +1471,7 @@ Status ReadArray(MemoryPool* pool, const rj::Value& json_array, const Schema& sc
   }
 
   if (result == nullptr) {
-    std::stringstream ss;
-    ss << "Field named " << name << " not found in schema";
-    return Status::KeyError(ss.str());
+    return Status::KeyError("Field named ", name, " not found in schema");
   }
 
   return ReadArray(pool, json_array, result->type(), array);
diff --git a/cpp/src/arrow/ipc/json-internal.h b/cpp/src/arrow/ipc/json-internal.h
index 8807a56551789..c8c724968f67c 100644
--- a/cpp/src/arrow/ipc/json-internal.h
+++ b/cpp/src/arrow/ipc/json-internal.h
@@ -36,6 +36,7 @@
 
 #include "rapidjson/document.h"      // IWYU pragma: export
 #include "rapidjson/encodings.h"     // IWYU pragma: export
+#include "rapidjson/error/en.h"      // IWYU pragma: export
 #include "rapidjson/stringbuffer.h"  // IWYU pragma: export
 #include "rapidjson/writer.h"        // IWYU pragma: export
 
@@ -48,56 +49,39 @@ using RjWriter = rj::Writer<rj::StringBuffer>;
 using RjArray = rj::Value::ConstArray;
 using RjObject = rj::Value::ConstObject;
 
-#define RETURN_NOT_FOUND(TOK, NAME, PARENT) \
-  if (NAME == (PARENT).MemberEnd()) {       \
-    std::stringstream ss;                   \
-    ss << "field " << TOK << " not found";  \
-    return Status::Invalid(ss.str());       \
+#define RETURN_NOT_FOUND(TOK, NAME, PARENT)              \
+  if (NAME == (PARENT).MemberEnd()) {                    \
+    return Status::Invalid("field ", TOK, " not found"); \
   }
 
-#define RETURN_NOT_STRING(TOK, NAME, PARENT) \
-  RETURN_NOT_FOUND(TOK, NAME, PARENT);       \
-  if (!NAME->value.IsString()) {             \
-    std::stringstream ss;                    \
-    ss << "field was not a string"           \
-       << " line " << __LINE__;              \
-    return Status::Invalid(ss.str());        \
+#define RETURN_NOT_STRING(TOK, NAME, PARENT)                          \
+  RETURN_NOT_FOUND(TOK, NAME, PARENT);                                \
+  if (!NAME->value.IsString()) {                                      \
+    return Status::Invalid("field was not a string line ", __LINE__); \
   }
 
-#define RETURN_NOT_BOOL(TOK, NAME, PARENT) \
-  RETURN_NOT_FOUND(TOK, NAME, PARENT);     \
-  if (!NAME->value.IsBool()) {             \
-    std::stringstream ss;                  \
-    ss << "field was not a boolean"        \
-       << " line " << __LINE__;            \
-    return Status::Invalid(ss.str());      \
+#define RETURN_NOT_BOOL(TOK, NAME, PARENT)                             \
+  RETURN_NOT_FOUND(TOK, NAME, PARENT);                                 \
+  if (!NAME->value.IsBool()) {                                         \
+    return Status::Invalid("field was not a boolean line ", __LINE__); \
   }
 
-#define RETURN_NOT_INT(TOK, NAME, PARENT) \
-  RETURN_NOT_FOUND(TOK, NAME, PARENT);    \
-  if (!NAME->value.IsInt()) {             \
-    std::stringstream ss;                 \
-    ss << "field was not an int"          \
-       << " line " << __LINE__;           \
-    return Status::Invalid(ss.str());     \
+#define RETURN_NOT_INT(TOK, NAME, PARENT)                           \
+  RETURN_NOT_FOUND(TOK, NAME, PARENT);                              \
+  if (!NAME->value.IsInt()) {                                       \
+    return Status::Invalid("field was not an int line ", __LINE__); \
   }
 
-#define RETURN_NOT_ARRAY(TOK, NAME, PARENT) \
-  RETURN_NOT_FOUND(TOK, NAME, PARENT);      \
-  if (!NAME->value.IsArray()) {             \
-    std::stringstream ss;                   \
-    ss << "field was not an array"          \
-       << " line " << __LINE__;             \
-    return Status::Invalid(ss.str());       \
+#define RETURN_NOT_ARRAY(TOK, NAME, PARENT)                           \
+  RETURN_NOT_FOUND(TOK, NAME, PARENT);                                \
+  if (!NAME->value.IsArray()) {                                       \
+    return Status::Invalid("field was not an array line ", __LINE__); \
   }
 
-#define RETURN_NOT_OBJECT(TOK, NAME, PARENT) \
-  RETURN_NOT_FOUND(TOK, NAME, PARENT);       \
-  if (!NAME->value.IsObject()) {             \
-    std::stringstream ss;                    \
-    ss << "field was not an object"          \
-       << " line " << __LINE__;              \
-    return Status::Invalid(ss.str());        \
+#define RETURN_NOT_OBJECT(TOK, NAME, PARENT)                           \
+  RETURN_NOT_FOUND(TOK, NAME, PARENT);                                 \
+  if (!NAME->value.IsObject()) {                                       \
+    return Status::Invalid("field was not an object line ", __LINE__); \
   }
 
 namespace arrow {
diff --git a/cpp/src/arrow/ipc/json-simple-test.cc b/cpp/src/arrow/ipc/json-simple-test.cc
new file mode 100644
index 0000000000000..2e80a0ca85822
--- /dev/null
+++ b/cpp/src/arrow/ipc/json-simple-test.cc
@@ -0,0 +1,635 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "arrow/array.h"
+#include "arrow/ipc/json-simple.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
+
+#if defined(_MSC_VER)
+// "warning C4307: '+': integral constant overflow"
+#pragma warning(disable : 4307)
+#endif
+
+namespace arrow {
+namespace ipc {
+namespace internal {
+namespace json {
+
+using ::arrow::internal::checked_cast;
+
+// Avoid undefined behaviour on signed overflow
+template <typename Signed>
+Signed SafeSignedAdd(Signed u, Signed v) {
+  using Unsigned = typename std::make_unsigned<Signed>::type;
+  return static_cast<Signed>(static_cast<Unsigned>(u) + static_cast<Unsigned>(v));
+}
+
+// Special case for 8-bit ints (must output their decimal value, not the
+// corresponding ASCII character)
+void JSONArrayInternal(std::ostream* ss, int8_t value) {
+  *ss << static_cast<int16_t>(value);
+}
+
+void JSONArrayInternal(std::ostream* ss, uint8_t value) {
+  *ss << static_cast<int16_t>(value);
+}
+
+template <typename Value>
+void JSONArrayInternal(std::ostream* ss, const Value& value) {
+  *ss << value;
+}
+
+template <typename Value, typename... Tail>
+void JSONArrayInternal(std::ostream* ss, const Value& value, Tail... tail) {
+  JSONArrayInternal(ss, value);
+  *ss << ", ";
+  JSONArrayInternal(ss, std::forward<Tail>(tail)...);
+}
+
+template <typename... Args>
+std::string JSONArray(Args... args) {
+  std::stringstream ss;
+  ss << "[";
+  JSONArrayInternal(&ss, std::forward<Args>(args)...);
+  ss << "]";
+  return ss.str();
+}
+
+template <typename T, typename C_TYPE = typename T::c_type>
+void AssertJSONArray(const std::shared_ptr<DataType>& type, const std::string& json,
+                     const std::vector<C_TYPE>& values) {
+  std::shared_ptr<Array> actual, expected;
+
+  ASSERT_OK(ArrayFromJSON(type, json, &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  ArrayFromVector<T, C_TYPE>(type, values, &expected);
+  AssertArraysEqual(*expected, *actual);
+}
+
+template <typename T, typename C_TYPE = typename T::c_type>
+void AssertJSONArray(const std::shared_ptr<DataType>& type, const std::string& json,
+                     const std::vector<bool>& is_valid,
+                     const std::vector<C_TYPE>& values) {
+  std::shared_ptr<Array> actual, expected;
+
+  ASSERT_OK(ArrayFromJSON(type, json, &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  ArrayFromVector<T, C_TYPE>(type, is_valid, values, &expected);
+  AssertArraysEqual(*expected, *actual);
+}
+
+TEST(TestHelper, JSONArray) {
+  // Test the JSONArray helper func
+  std::string s =
+      JSONArray(123, -4.5, static_cast<int8_t>(-12), static_cast<uint8_t>(34));
+  ASSERT_EQ(s, "[123, -4.5, -12, 34]");
+  s = JSONArray(9223372036854775807LL, 9223372036854775808ULL, -9223372036854775807LL - 1,
+                18446744073709551615ULL);
+  ASSERT_EQ(s,
+            "[9223372036854775807, 9223372036854775808, -9223372036854775808, "
+            "18446744073709551615]");
+}
+
+TEST(TestHelper, SafeSignedAdd) {
+  ASSERT_EQ(0, SafeSignedAdd<int8_t>(-128, -128));
+  ASSERT_EQ(1, SafeSignedAdd<int8_t>(-128, -127));
+  ASSERT_EQ(-128, SafeSignedAdd<int8_t>(1, 127));
+  ASSERT_EQ(-2147483648LL, SafeSignedAdd<int32_t>(1, 2147483647));
+}
+
+template <typename T>
+class TestIntegers : public ::testing::Test {};
+
+TYPED_TEST_CASE_P(TestIntegers);
+
+TYPED_TEST_P(TestIntegers, Basics) {
+  using T = TypeParam;
+  using c_type = typename T::c_type;
+
+  std::shared_ptr<Array> expected, actual;
+  std::shared_ptr<DataType> type = TypeTraits<T>::type_singleton();
+
+  AssertJSONArray<T>(type, "[]", {});
+  AssertJSONArray<T>(type, "[4, 0, 5]", {4, 0, 5});
+  AssertJSONArray<T>(type, "[4, null, 5]", {true, false, true}, {4, 0, 5});
+
+  // Test limits
+  const auto min_val = std::numeric_limits<c_type>::min();
+  const auto max_val = std::numeric_limits<c_type>::max();
+  std::string json_string = JSONArray(0, 1, min_val);
+  AssertJSONArray<T>(type, json_string, {0, 1, min_val});
+  json_string = JSONArray(0, 1, max_val);
+  AssertJSONArray<T>(type, json_string, {0, 1, max_val});
+}
+
+TYPED_TEST_P(TestIntegers, Errors) {
+  using T = TypeParam;
+
+  std::shared_ptr<Array> array;
+  std::shared_ptr<DataType> type = TypeTraits<T>::type_singleton();
+
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "0", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "{}", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0.0]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"0\"]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array));
+}
+
+TYPED_TEST_P(TestIntegers, OutOfBounds) {
+  using T = TypeParam;
+  using c_type = typename T::c_type;
+
+  std::shared_ptr<Array> array;
+  std::shared_ptr<DataType> type = TypeTraits<T>::type_singleton();
+
+  if (type->id() == Type::UINT64) {
+    ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[18446744073709551616]", &array));
+    ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[-1]", &array));
+  } else if (type->id() == Type::INT64) {
+    ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[9223372036854775808]", &array));
+    ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[-9223372036854775809]", &array));
+  } else if (std::is_signed<c_type>::value) {
+    const auto lower = SafeSignedAdd<int64_t>(std::numeric_limits<c_type>::min(), -1);
+    const auto upper = SafeSignedAdd<int64_t>(std::numeric_limits<c_type>::max(), +1);
+    auto json_string = JSONArray(lower);
+    ASSERT_RAISES(Invalid, ArrayFromJSON(type, json_string, &array));
+    json_string = JSONArray(upper);
+    ASSERT_RAISES(Invalid, ArrayFromJSON(type, json_string, &array));
+  } else {
+    const auto upper = static_cast<uint64_t>(std::numeric_limits<c_type>::max()) + 1;
+    auto json_string = JSONArray(upper);
+    ASSERT_RAISES(Invalid, ArrayFromJSON(type, json_string, &array));
+    ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[-1]", &array));
+  }
+}
+
+REGISTER_TYPED_TEST_CASE_P(TestIntegers, Basics, Errors, OutOfBounds);
+
+INSTANTIATE_TYPED_TEST_CASE_P(TestInt8, TestIntegers, Int8Type);
+INSTANTIATE_TYPED_TEST_CASE_P(TestInt16, TestIntegers, Int16Type);
+INSTANTIATE_TYPED_TEST_CASE_P(TestInt32, TestIntegers, Int32Type);
+INSTANTIATE_TYPED_TEST_CASE_P(TestInt64, TestIntegers, Int64Type);
+INSTANTIATE_TYPED_TEST_CASE_P(TestUInt8, TestIntegers, UInt8Type);
+INSTANTIATE_TYPED_TEST_CASE_P(TestUInt16, TestIntegers, UInt16Type);
+INSTANTIATE_TYPED_TEST_CASE_P(TestUInt32, TestIntegers, UInt32Type);
+INSTANTIATE_TYPED_TEST_CASE_P(TestUInt64, TestIntegers, UInt64Type);
+
+TEST(TestNull, Basics) {
+  std::shared_ptr<DataType> type = null();
+  std::shared_ptr<Array> expected, actual;
+
+  AssertJSONArray<NullType, std::nullptr_t>(type, "[]", {});
+  AssertJSONArray<NullType, std::nullptr_t>(type, "[null, null]", {nullptr, nullptr});
+}
+
+TEST(TestNull, Errors) {
+  std::shared_ptr<DataType> type = null();
+  std::shared_ptr<Array> array;
+
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[NaN]", &array));
+}
+
+TEST(TestBoolean, Basics) {
+  std::shared_ptr<DataType> type = boolean();
+  std::shared_ptr<Array> expected, actual;
+
+  AssertJSONArray<BooleanType, bool>(type, "[]", {});
+  AssertJSONArray<BooleanType, bool>(type, "[false, true, false]", {false, true, false});
+  AssertJSONArray<BooleanType, bool>(type, "[false, true, null]", {true, true, false},
+                                     {false, true, false});
+}
+
+TEST(TestBoolean, Errors) {
+  std::shared_ptr<DataType> type = boolean();
+  std::shared_ptr<Array> array;
+
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"true\"]", &array));
+}
+
+TEST(TestFloat, Basics) {
+  std::shared_ptr<DataType> type = float32();
+  std::shared_ptr<Array> expected, actual;
+
+  AssertJSONArray<FloatType>(type, "[]", {});
+  AssertJSONArray<FloatType>(type, "[1, 2.5, -3e4]", {1.0f, 2.5f, -3.0e4f});
+  AssertJSONArray<FloatType>(type, "[-0.0, Inf, -Inf, null]", {true, true, true, false},
+                             {-0.0f, INFINITY, -INFINITY, 0.0f});
+
+  // Check NaN separately as AssertArraysEqual simply memcmp's array contents
+  // and NaNs can have many bit representations.
+  ASSERT_OK(ArrayFromJSON(type, "[NaN]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  float value = checked_cast<FloatArray&>(*actual).Value(0);
+  ASSERT_TRUE(std::isnan(value));
+}
+
+TEST(TestFloat, Errors) {
+  std::shared_ptr<DataType> type = float32();
+  std::shared_ptr<Array> array;
+
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[true]", &array));
+}
+
+TEST(TestDouble, Basics) {
+  std::shared_ptr<DataType> type = float64();
+  std::shared_ptr<Array> expected, actual;
+
+  AssertJSONArray<DoubleType>(type, "[]", {});
+  AssertJSONArray<DoubleType>(type, "[1, 2.5, -3e4]", {1.0, 2.5, -3.0e4});
+  AssertJSONArray<DoubleType>(type, "[-0.0, Inf, -Inf, null]", {true, true, true, false},
+                              {-0.0, INFINITY, -INFINITY, 0.0});
+
+  ASSERT_OK(ArrayFromJSON(type, "[NaN]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  double value = checked_cast<DoubleArray&>(*actual).Value(0);
+  ASSERT_TRUE(std::isnan(value));
+}
+
+TEST(TestDouble, Errors) {
+  std::shared_ptr<DataType> type = float64();
+  std::shared_ptr<Array> array;
+
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[true]", &array));
+}
+
+TEST(TestString, Basics) {
+  // String type
+  std::shared_ptr<DataType> type = utf8();
+  std::shared_ptr<Array> expected, actual;
+
+  AssertJSONArray<StringType, std::string>(type, "[]", {});
+  AssertJSONArray<StringType, std::string>(type, "[\"\", \"foo\"]", {"", "foo"});
+  AssertJSONArray<StringType, std::string>(type, "[\"\", null]", {true, false}, {"", ""});
+  // NUL character in string
+  std::string s = "some";
+  s += '\x00';
+  s += "char";
+  AssertJSONArray<StringType, std::string>(type, "[\"\", \"some\\u0000char\"]", {"", s});
+  // UTF8 sequence in string
+  AssertJSONArray<StringType, std::string>(type, "[\"\xc3\xa9\"]", {"\xc3\xa9"});
+
+  // Binary type
+  type = binary();
+  AssertJSONArray<BinaryType, std::string>(type, "[\"\", \"foo\", null]",
+                                           {true, true, false}, {"", "foo", ""});
+  // Arbitrary binary (non-UTF8) sequence in string
+  s = "\xff\x9f";
+  AssertJSONArray<BinaryType, std::string>(type, "[\"" + s + "\"]", {s});
+  // Bytes < 0x20 can be represented as JSON unicode escapes
+  s = '\x00';
+  s += "\x1f";
+  AssertJSONArray<BinaryType, std::string>(type, "[\"\\u0000\\u001f\"]", {s});
+}
+
+TEST(TestString, Errors) {
+  std::shared_ptr<DataType> type = utf8();
+  std::shared_ptr<Array> array;
+
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]", &array));
+}
+
+TEST(TestFixedSizeBinary, Basics) {
+  std::shared_ptr<DataType> type = fixed_size_binary(3);
+  std::shared_ptr<Array> expected, actual;
+
+  AssertJSONArray<FixedSizeBinaryType, std::string>(type, "[]", {});
+  AssertJSONArray<FixedSizeBinaryType, std::string>(type, "[\"foo\", \"bar\"]",
+                                                    {"foo", "bar"});
+  AssertJSONArray<FixedSizeBinaryType, std::string>(type, "[null, \"foo\"]",
+                                                    {false, true}, {"", "foo"});
+  // Arbitrary binary (non-UTF8) sequence in string
+  std::string s = "\xff\x9f\xcc";
+  AssertJSONArray<FixedSizeBinaryType, std::string>(type, "[\"" + s + "\"]", {s});
+}
+
+TEST(TestFixedSizeBinary, Errors) {
+  std::shared_ptr<DataType> type = fixed_size_binary(3);
+  std::shared_ptr<Array> array;
+
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]", &array));
+  // Invalid length
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"\"]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"abcd\"]", &array));
+}
+
+TEST(TestDecimal, Basics) {
+  std::shared_ptr<DataType> type = decimal(10, 4);
+  std::shared_ptr<Array> expected, actual;
+
+  ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  {
+    Decimal128Builder builder(type);
+    ASSERT_OK(builder.Finish(&expected));
+  }
+  AssertArraysEqual(*expected, *actual);
+
+  ASSERT_OK(ArrayFromJSON(type, "[\"123.4567\", \"-78.9000\"]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  {
+    Decimal128Builder builder(type);
+    ASSERT_OK(builder.Append(Decimal128(1234567)));
+    ASSERT_OK(builder.Append(Decimal128(-789000)));
+    ASSERT_OK(builder.Finish(&expected));
+  }
+  AssertArraysEqual(*expected, *actual);
+
+  ASSERT_OK(ArrayFromJSON(type, "[\"123.4567\", null]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  {
+    Decimal128Builder builder(type);
+    ASSERT_OK(builder.Append(Decimal128(1234567)));
+    ASSERT_OK(builder.AppendNull());
+    ASSERT_OK(builder.Finish(&expected));
+  }
+  AssertArraysEqual(*expected, *actual);
+}
+
+TEST(TestDecimal, Errors) {
+  std::shared_ptr<DataType> type = decimal(10, 4);
+  std::shared_ptr<Array> array;
+
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[12.3456]", &array));
+  // Bad scale
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"12.345\"]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"12.34560\"]", &array));
+}
+
+TEST(TestList, IntegerList) {
+  auto pool = default_memory_pool();
+  std::shared_ptr<DataType> type = list(int64());
+  std::shared_ptr<Array> offsets, values, expected, actual;
+
+  ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  ArrayFromVector<Int32Type>({0}, &offsets);
+  ArrayFromVector<Int64Type>({}, &values);
+  ASSERT_OK(ListArray::FromArrays(*offsets, *values, pool, &expected));
+  AssertArraysEqual(*expected, *actual);
+
+  ASSERT_OK(ArrayFromJSON(type, "[[4, 5], [], [6]]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  ArrayFromVector<Int32Type>({0, 2, 2, 3}, &offsets);
+  ArrayFromVector<Int64Type>({4, 5, 6}, &values);
+  ASSERT_OK(ListArray::FromArrays(*offsets, *values, pool, &expected));
+  AssertArraysEqual(*expected, *actual);
+
+  ASSERT_OK(ArrayFromJSON(type, "[[], [null], [6, null]]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  ArrayFromVector<Int32Type>({0, 0, 1, 3}, &offsets);
+  auto is_valid = std::vector<bool>{false, true, false};
+  ArrayFromVector<Int64Type>(is_valid, {0, 6, 0}, &values);
+  ASSERT_OK(ListArray::FromArrays(*offsets, *values, pool, &expected));
+  AssertArraysEqual(*expected, *actual);
+
+  ASSERT_OK(ArrayFromJSON(type, "[null, [], null]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  {
+    std::unique_ptr<ArrayBuilder> builder;
+    ASSERT_OK(MakeBuilder(pool, type, &builder));
+    auto& list_builder = checked_cast<ListBuilder&>(*builder);
+    ASSERT_OK(list_builder.AppendNull());
+    ASSERT_OK(list_builder.Append());
+    ASSERT_OK(list_builder.AppendNull());
+    ASSERT_OK(list_builder.Finish(&expected));
+  }
+  AssertArraysEqual(*expected, *actual);
+}
+
+TEST(TestList, IntegerListErrors) {
+  std::shared_ptr<DataType> type = list(int64());
+  std::shared_ptr<Array> array;
+
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0.0]]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[9223372036854775808]]", &array));
+}
+
+TEST(TestList, NullList) {
+  auto pool = default_memory_pool();
+  std::shared_ptr<DataType> type = list(null());
+  std::shared_ptr<Array> offsets, values, expected, actual;
+
+  ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  ArrayFromVector<Int32Type>({0}, &offsets);
+  values = std::make_shared<NullArray>(0);
+  ASSERT_OK(ListArray::FromArrays(*offsets, *values, pool, &expected));
+  AssertArraysEqual(*expected, *actual);
+
+  ASSERT_OK(ArrayFromJSON(type, "[[], [null], [null, null]]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  ArrayFromVector<Int32Type>({0, 0, 1, 3}, &offsets);
+  values = std::make_shared<NullArray>(3);
+  ASSERT_OK(ListArray::FromArrays(*offsets, *values, pool, &expected));
+  AssertArraysEqual(*expected, *actual);
+
+  ASSERT_OK(ArrayFromJSON(type, "[null, [], null]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  {
+    std::unique_ptr<ArrayBuilder> builder;
+    ASSERT_OK(MakeBuilder(pool, type, &builder));
+    auto& list_builder = checked_cast<ListBuilder&>(*builder);
+    ASSERT_OK(list_builder.AppendNull());
+    ASSERT_OK(list_builder.Append());
+    ASSERT_OK(list_builder.AppendNull());
+    ASSERT_OK(list_builder.Finish(&expected));
+  }
+  AssertArraysEqual(*expected, *actual);
+}
+
+TEST(TestList, IntegerListList) {
+  auto pool = default_memory_pool();
+  std::shared_ptr<DataType> type = list(list(uint8()));
+  std::shared_ptr<Array> offsets, values, nested, expected, actual;
+
+  ASSERT_OK(ArrayFromJSON(type, "[[[4], [5, 6]], [[7, 8, 9]]]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  ArrayFromVector<Int32Type>({0, 1, 3, 6}, &offsets);
+  ArrayFromVector<UInt8Type>({4, 5, 6, 7, 8, 9}, &values);
+  ASSERT_OK(ListArray::FromArrays(*offsets, *values, pool, &nested));
+  ArrayFromVector<Int32Type>({0, 2, 3}, &offsets);
+  ASSERT_OK(ListArray::FromArrays(*offsets, *nested, pool, &expected));
+  ASSERT_EQ(actual->length(), 2);
+  AssertArraysEqual(*expected, *actual);
+
+  ASSERT_OK(ArrayFromJSON(type, "[[], [[]], [[4], [], [5, 6]], [[7, 8, 9]]]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  ArrayFromVector<Int32Type>({0, 0, 1, 1, 3, 6}, &offsets);
+  ArrayFromVector<UInt8Type>({4, 5, 6, 7, 8, 9}, &values);
+  ASSERT_OK(ListArray::FromArrays(*offsets, *values, pool, &nested));
+  ArrayFromVector<Int32Type>({0, 0, 1, 4, 5}, &offsets);
+  ASSERT_OK(ListArray::FromArrays(*offsets, *nested, pool, &expected));
+  ASSERT_EQ(actual->length(), 4);
+  AssertArraysEqual(*expected, *actual);
+
+  ASSERT_OK(ArrayFromJSON(type, "[null, [null], [[null]]]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  {
+    std::unique_ptr<ArrayBuilder> builder;
+    ASSERT_OK(MakeBuilder(pool, type, &builder));
+    auto& list_builder = checked_cast<ListBuilder&>(*builder);
+    auto& child_builder = checked_cast<ListBuilder&>(*list_builder.value_builder());
+    ASSERT_OK(list_builder.AppendNull());
+    ASSERT_OK(list_builder.Append());
+    ASSERT_OK(child_builder.AppendNull());
+    ASSERT_OK(list_builder.Append());
+    ASSERT_OK(child_builder.Append());
+    ASSERT_OK(list_builder.Finish(&expected));
+  }
+}
+
+TEST(TestStruct, SimpleStruct) {
+  auto field_a = field("a", int8());
+  auto field_b = field("b", boolean());
+  std::shared_ptr<DataType> type = struct_({field_a, field_b});
+  std::shared_ptr<Array> a, b, expected, actual;
+  std::shared_ptr<Buffer> null_bitmap;
+  std::vector<bool> is_valid;
+  std::vector<std::shared_ptr<Array>> children;
+
+  // Trivial
+  ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  ArrayFromVector<Int8Type>({}, &a);
+  ArrayFromVector<BooleanType, bool>({}, &b);
+  children.assign({a, b});
+  expected = std::make_shared<StructArray>(type, 0, children);
+  AssertArraysEqual(*expected, *actual);
+
+  // Non-empty
+  ArrayFromVector<Int8Type>({5, 6}, &a);
+  ArrayFromVector<BooleanType, bool>({true, false}, &b);
+  children.assign({a, b});
+  expected = std::make_shared<StructArray>(type, 2, children);
+
+  ASSERT_OK(ArrayFromJSON(type, "[[5, true], [6, false]]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  AssertArraysEqual(*expected, *actual);
+  ASSERT_OK(ArrayFromJSON(type, "[{\"a\": 5, \"b\": true}, {\"b\": false, \"a\": 6}]",
+                          &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  AssertArraysEqual(*expected, *actual);
+
+  // With nulls
+  is_valid = {false, true, false, false};
+  ArrayFromVector<Int8Type>(is_valid, {0, 5, 6, 0}, &a);
+  is_valid = {false, false, true, false};
+  ArrayFromVector<BooleanType, bool>(is_valid, {false, true, false, false}, &b);
+  children.assign({a, b});
+  BitmapFromVector<bool>({false, true, true, true}, &null_bitmap);
+  expected = std::make_shared<StructArray>(type, 4, children, null_bitmap, 1);
+
+  ASSERT_OK(
+      ArrayFromJSON(type, "[null, [5, null], [null, false], [null, null]]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  AssertArraysEqual(*expected, *actual);
+  // When using object notation, null members can be omitted
+  ASSERT_OK(ArrayFromJSON(type, "[null, {\"a\": 5, \"b\": null}, {\"b\": false}, {}]",
+                          &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  AssertArraysEqual(*expected, *actual);
+}
+
+TEST(TestStruct, NestedStruct) {
+  auto field_a = field("a", int8());
+  auto field_b = field("b", boolean());
+  auto field_c = field("c", float64());
+  std::shared_ptr<DataType> nested_type = struct_({field_a, field_b});
+  auto field_nested = field("nested", nested_type);
+  std::shared_ptr<DataType> type = struct_({field_nested, field_c});
+  std::shared_ptr<Array> expected, actual;
+  std::shared_ptr<Buffer> null_bitmap;
+  std::vector<bool> is_valid;
+  std::vector<std::shared_ptr<Array>> children(2);
+
+  ASSERT_OK(ArrayFromJSON(type, "[]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  ArrayFromVector<Int8Type>({}, &children[0]);
+  ArrayFromVector<BooleanType, bool>({}, &children[1]);
+  children[0] = std::make_shared<StructArray>(nested_type, 0, children);
+  ArrayFromVector<DoubleType>({}, &children[1]);
+  expected = std::make_shared<StructArray>(type, 0, children);
+  AssertArraysEqual(*expected, *actual);
+
+  ASSERT_OK(ArrayFromJSON(type, "[[[5, true], 1.5], [[6, false], -3e2]]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  ArrayFromVector<Int8Type>({5, 6}, &children[0]);
+  ArrayFromVector<BooleanType, bool>({true, false}, &children[1]);
+  children[0] = std::make_shared<StructArray>(nested_type, 2, children);
+  ArrayFromVector<DoubleType>({1.5, -300.0}, &children[1]);
+  expected = std::make_shared<StructArray>(type, 2, children);
+  AssertArraysEqual(*expected, *actual);
+
+  ASSERT_OK(ArrayFromJSON(type, "[null, [[5, null], null], [null, -3e2]]", &actual));
+  ASSERT_OK(ValidateArray(*actual));
+  is_valid = {false, true, false};
+  ArrayFromVector<Int8Type>(is_valid, {0, 5, 0}, &children[0]);
+  is_valid = {false, false, false};
+  ArrayFromVector<BooleanType, bool>(is_valid, {false, false, false}, &children[1]);
+  BitmapFromVector<bool>({false, true, false}, &null_bitmap);
+  children[0] = std::make_shared<StructArray>(nested_type, 3, children, null_bitmap, 2);
+  is_valid = {false, false, true};
+  ArrayFromVector<DoubleType>(is_valid, {0.0, 0.0, -300.0}, &children[1]);
+  BitmapFromVector<bool>({false, true, true}, &null_bitmap);
+  expected = std::make_shared<StructArray>(type, 3, children, null_bitmap, 1);
+  AssertArraysEqual(*expected, *actual);
+}
+
+TEST(TestStruct, Errors) {
+  auto field_a = field("a", int8());
+  auto field_b = field("b", boolean());
+  std::shared_ptr<DataType> type = struct_({field_a, field_b});
+  std::shared_ptr<Array> array;
+
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0, true]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0, true, 1]]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[true, 0]]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[{\"b\": 0, \"a\": true}]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[{\"c\": 0}]", &array));
+}
+
+}  // namespace json
+}  // namespace internal
+}  // namespace ipc
+}  // namespace arrow
diff --git a/cpp/src/arrow/ipc/json-simple.cc b/cpp/src/arrow/ipc/json-simple.cc
new file mode 100644
index 0000000000000..047788ce0f5de
--- /dev/null
+++ b/cpp/src/arrow/ipc/json-simple.cc
@@ -0,0 +1,540 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <sstream>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/builder.h"
+#include "arrow/ipc/json-internal.h"
+#include "arrow/ipc/json-simple.h"
+#include "arrow/memory_pool.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+namespace ipc {
+namespace internal {
+namespace json {
+
+using ::arrow::internal::checked_cast;
+
+static constexpr auto kParseFlags = rj::kParseFullPrecisionFlag | rj::kParseNanAndInfFlag;
+
+static Status JSONTypeError(const char* expected_type, rj::Type json_type) {
+  return Status::Invalid("Expected ", expected_type, " or null, got JSON type ",
+                         json_type);
+}
+
+class Converter {
+ public:
+  virtual ~Converter() = default;
+
+  virtual Status Init() { return Status::OK(); }
+
+  virtual Status AppendValue(const rj::Value& json_obj) = 0;
+
+  virtual Status AppendNull() = 0;
+
+  virtual Status AppendValues(const rj::Value& json_array) = 0;
+
+  virtual std::shared_ptr<ArrayBuilder> builder() = 0;
+
+  virtual Status Finish(std::shared_ptr<Array>* out) {
+    auto builder = this->builder();
+    if (builder->length() == 0) {
+      // Make sure the builder was initialized
+      RETURN_NOT_OK(builder->Resize(1));
+    }
+    return builder->Finish(out);
+  }
+
+ protected:
+  std::shared_ptr<DataType> type_;
+};
+
+Status GetConverter(const std::shared_ptr<DataType>&, std::shared_ptr<Converter>* out);
+
+// CRTP
+template <class Derived>
+class ConcreteConverter : public Converter {
+ public:
+  Status AppendValues(const rj::Value& json_array) override {
+    auto self = static_cast<Derived*>(this);
+    if (!json_array.IsArray()) {
+      return JSONTypeError("array", json_array.GetType());
+    }
+    auto size = json_array.Size();
+    for (uint32_t i = 0; i < size; ++i) {
+      RETURN_NOT_OK(self->AppendValue(json_array[i]));
+    }
+    return Status::OK();
+  }
+};
+
+// TODO : dates and times?
+
+// ------------------------------------------------------------------------
+// Converter for null arrays
+
+class NullConverter final : public ConcreteConverter<NullConverter> {
+ public:
+  explicit NullConverter(const std::shared_ptr<DataType>& type) {
+    type_ = type;
+    builder_ = std::make_shared<NullBuilder>();
+  }
+
+  Status AppendNull() override { return builder_->AppendNull(); }
+
+  Status AppendValue(const rj::Value& json_obj) override {
+    if (json_obj.IsNull()) {
+      return AppendNull();
+    }
+    return JSONTypeError("null", json_obj.GetType());
+  }
+
+  std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ protected:
+  std::shared_ptr<NullBuilder> builder_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for boolean arrays
+
+class BooleanConverter final : public ConcreteConverter<BooleanConverter> {
+ public:
+  explicit BooleanConverter(const std::shared_ptr<DataType>& type) {
+    type_ = type;
+    builder_ = std::make_shared<BooleanBuilder>();
+  }
+
+  Status AppendNull() override { return builder_->AppendNull(); }
+
+  Status AppendValue(const rj::Value& json_obj) override {
+    if (json_obj.IsNull()) {
+      return AppendNull();
+    }
+    if (json_obj.IsBool()) {
+      return builder_->Append(json_obj.GetBool());
+    }
+    return JSONTypeError("boolean", json_obj.GetType());
+  }
+
+  std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ protected:
+  std::shared_ptr<BooleanBuilder> builder_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for int arrays
+
+template <typename Type>
+class IntegerConverter final : public ConcreteConverter<IntegerConverter<Type>> {
+  using c_type = typename Type::c_type;
+  static constexpr auto is_signed = std::is_signed<c_type>::value;
+
+ public:
+  explicit IntegerConverter(const std::shared_ptr<DataType>& type) {
+    this->type_ = type;
+    builder_ = std::make_shared<NumericBuilder<Type>>();
+  }
+
+  Status AppendNull() override { return builder_->AppendNull(); }
+
+  Status AppendValue(const rj::Value& json_obj) override {
+    if (json_obj.IsNull()) {
+      return AppendNull();
+    }
+    return AppendNumber(json_obj);
+  }
+
+  std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ protected:
+  // Append signed integer value
+  template <typename Integer = c_type>
+  typename std::enable_if<std::is_signed<Integer>::value, Status>::type AppendNumber(
+      const rj::Value& json_obj) {
+    if (json_obj.IsInt64()) {
+      int64_t v64 = json_obj.GetInt64();
+      c_type v = static_cast<c_type>(v64);
+      if (v == v64) {
+        return builder_->Append(v);
+      } else {
+        return Status::Invalid("Value ", v64, " out of bounds for ",
+                               this->type_->ToString());
+      }
+    } else {
+      return JSONTypeError("signed int", json_obj.GetType());
+    }
+  }
+
+  // Append unsigned integer value
+  template <typename Integer = c_type>
+  typename std::enable_if<std::is_unsigned<Integer>::value, Status>::type AppendNumber(
+      const rj::Value& json_obj) {
+    if (json_obj.IsUint64()) {
+      uint64_t v64 = json_obj.GetUint64();
+      c_type v = static_cast<c_type>(v64);
+      if (v == v64) {
+        return builder_->Append(v);
+      } else {
+        return Status::Invalid("Value ", v64, " out of bounds for ",
+                               this->type_->ToString());
+      }
+      return builder_->Append(v);
+    } else {
+      return JSONTypeError("unsigned int", json_obj.GetType());
+    }
+  }
+
+  std::shared_ptr<NumericBuilder<Type>> builder_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for float arrays
+
+template <typename Type>
+class FloatConverter final : public ConcreteConverter<FloatConverter<Type>> {
+  using c_type = typename Type::c_type;
+
+ public:
+  explicit FloatConverter(const std::shared_ptr<DataType>& type) {
+    this->type_ = type;
+    builder_ = std::make_shared<NumericBuilder<Type>>();
+  }
+
+  Status AppendNull() override { return builder_->AppendNull(); }
+
+  Status AppendValue(const rj::Value& json_obj) override {
+    if (json_obj.IsNull()) {
+      return AppendNull();
+    }
+    if (json_obj.IsNumber()) {
+      c_type v = static_cast<c_type>(json_obj.GetDouble());
+      return builder_->Append(v);
+    } else {
+      return JSONTypeError("number", json_obj.GetType());
+    }
+  }
+
+  std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ protected:
+  std::shared_ptr<NumericBuilder<Type>> builder_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for decimal arrays
+
+class DecimalConverter final : public ConcreteConverter<DecimalConverter> {
+ public:
+  explicit DecimalConverter(const std::shared_ptr<DataType>& type) {
+    this->type_ = type;
+    decimal_type_ = checked_cast<Decimal128Type*>(type.get());
+    builder_ = std::make_shared<DecimalBuilder>(type);
+  }
+
+  Status AppendNull() override { return builder_->AppendNull(); }
+
+  Status AppendValue(const rj::Value& json_obj) override {
+    if (json_obj.IsNull()) {
+      return AppendNull();
+    }
+    if (json_obj.IsString()) {
+      int32_t precision, scale;
+      Decimal128 d;
+      auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength());
+      RETURN_NOT_OK(Decimal128::FromString(view, &d, &precision, &scale));
+      if (scale != decimal_type_->scale()) {
+        return Status::Invalid("Invalid scale for decimal: expected ",
+                               decimal_type_->scale(), ", got ", scale);
+      }
+      return builder_->Append(d);
+    }
+    return JSONTypeError("decimal string", json_obj.GetType());
+  }
+
+  std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ protected:
+  std::shared_ptr<DecimalBuilder> builder_;
+  Decimal128Type* decimal_type_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for binary and string arrays
+
+class StringConverter final : public ConcreteConverter<StringConverter> {
+ public:
+  explicit StringConverter(const std::shared_ptr<DataType>& type) {
+    this->type_ = type;
+    builder_ = std::make_shared<BinaryBuilder>(type, default_memory_pool());
+  }
+
+  Status AppendNull() override { return builder_->AppendNull(); }
+
+  Status AppendValue(const rj::Value& json_obj) override {
+    if (json_obj.IsNull()) {
+      return AppendNull();
+    }
+    if (json_obj.IsString()) {
+      auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength());
+      return builder_->Append(view);
+    } else {
+      return JSONTypeError("string", json_obj.GetType());
+    }
+  }
+
+  std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ protected:
+  std::shared_ptr<BinaryBuilder> builder_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for fixed-size binary arrays
+
+class FixedSizeBinaryConverter final
+    : public ConcreteConverter<FixedSizeBinaryConverter> {
+ public:
+  explicit FixedSizeBinaryConverter(const std::shared_ptr<DataType>& type) {
+    this->type_ = type;
+    builder_ = std::make_shared<FixedSizeBinaryBuilder>(type, default_memory_pool());
+  }
+
+  Status AppendNull() override { return builder_->AppendNull(); }
+
+  Status AppendValue(const rj::Value& json_obj) override {
+    if (json_obj.IsNull()) {
+      return AppendNull();
+    }
+    if (json_obj.IsString()) {
+      auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength());
+      if (view.length() != static_cast<size_t>(builder_->byte_width())) {
+        std::stringstream ss;
+        ss << "Invalid string length " << view.length() << " in JSON input for "
+           << this->type_->ToString();
+        return Status::Invalid(ss.str());
+      }
+      return builder_->Append(view);
+    } else {
+      return JSONTypeError("string", json_obj.GetType());
+    }
+  }
+
+  std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ protected:
+  std::shared_ptr<FixedSizeBinaryBuilder> builder_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for list arrays
+
+class ListConverter final : public ConcreteConverter<ListConverter> {
+ public:
+  explicit ListConverter(const std::shared_ptr<DataType>& type) { type_ = type; }
+
+  Status Init() override {
+    const auto& list_type = checked_cast<const ListType&>(*type_);
+    RETURN_NOT_OK(GetConverter(list_type.value_type(), &child_converter_));
+    auto child_builder = child_converter_->builder();
+    builder_ = std::make_shared<ListBuilder>(default_memory_pool(), child_builder, type_);
+    return Status::OK();
+  }
+
+  Status AppendNull() override { return builder_->AppendNull(); }
+
+  Status AppendValue(const rj::Value& json_obj) override {
+    if (json_obj.IsNull()) {
+      return AppendNull();
+    }
+    RETURN_NOT_OK(builder_->Append());
+    // Extend the child converter with this JSON array
+    return child_converter_->AppendValues(json_obj);
+  }
+
+  std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ protected:
+  std::shared_ptr<ListBuilder> builder_;
+  std::shared_ptr<Converter> child_converter_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for struct arrays
+
+class StructConverter final : public ConcreteConverter<StructConverter> {
+ public:
+  explicit StructConverter(const std::shared_ptr<DataType>& type) { type_ = type; }
+
+  Status Init() override {
+    std::vector<std::shared_ptr<ArrayBuilder>> child_builders;
+    for (const auto& field : type_->children()) {
+      std::shared_ptr<Converter> child_converter;
+      RETURN_NOT_OK(GetConverter(field->type(), &child_converter));
+      child_converters_.push_back(child_converter);
+      child_builders.push_back(child_converter->builder());
+    }
+    builder_ = std::make_shared<StructBuilder>(type_, default_memory_pool(),
+                                               std::move(child_builders));
+    return Status::OK();
+  }
+
+  Status AppendNull() override {
+    for (auto& converter : child_converters_) {
+      RETURN_NOT_OK(converter->AppendNull());
+    }
+    return builder_->AppendNull();
+  }
+
+  // Append a JSON value that is either an array of N elements in order
+  // or an object mapping struct names to values (omitted struct members
+  // are mapped to null).
+  Status AppendValue(const rj::Value& json_obj) override {
+    if (json_obj.IsNull()) {
+      return AppendNull();
+    }
+    if (json_obj.IsArray()) {
+      auto size = json_obj.Size();
+      auto expected_size = static_cast<uint32_t>(type_->num_children());
+      if (size != expected_size) {
+        return Status::Invalid("Expected array of size ", expected_size,
+                               ", got array of size ", size);
+      }
+      for (uint32_t i = 0; i < size; ++i) {
+        RETURN_NOT_OK(child_converters_[i]->AppendValue(json_obj[i]));
+      }
+      return builder_->Append();
+    }
+    if (json_obj.IsObject()) {
+      auto remaining = json_obj.MemberCount();
+      auto num_children = type_->num_children();
+      for (int32_t i = 0; i < num_children; ++i) {
+        const auto& field = type_->child(i);
+        auto it = json_obj.FindMember(field->name());
+        if (it != json_obj.MemberEnd()) {
+          --remaining;
+          RETURN_NOT_OK(child_converters_[i]->AppendValue(it->value));
+        } else {
+          RETURN_NOT_OK(child_converters_[i]->AppendNull());
+        }
+      }
+      if (remaining > 0) {
+        return Status::Invalid("Unexpected members in JSON object for type ",
+                               type_->ToString());
+      }
+      return builder_->Append();
+    }
+    return JSONTypeError("array or object", json_obj.GetType());
+  }
+
+  std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ protected:
+  std::shared_ptr<StructBuilder> builder_;
+  std::vector<std::shared_ptr<Converter>> child_converters_;
+};
+
+// ------------------------------------------------------------------------
+// General conversion functions
+
+Status GetConverter(const std::shared_ptr<DataType>& type,
+                    std::shared_ptr<Converter>* out) {
+  std::shared_ptr<Converter> res;
+
+#define SIMPLE_CONVERTER_CASE(ID, CLASS) \
+  case ID:                               \
+    res = std::make_shared<CLASS>(type); \
+    break;
+
+  switch (type->id()) {
+    SIMPLE_CONVERTER_CASE(Type::INT8, IntegerConverter<Int8Type>)
+    SIMPLE_CONVERTER_CASE(Type::INT16, IntegerConverter<Int16Type>)
+    SIMPLE_CONVERTER_CASE(Type::INT32, IntegerConverter<Int32Type>)
+    SIMPLE_CONVERTER_CASE(Type::TIME32, IntegerConverter<Int32Type>)
+    SIMPLE_CONVERTER_CASE(Type::DATE32, IntegerConverter<Date32Type>)
+    SIMPLE_CONVERTER_CASE(Type::INT64, IntegerConverter<Int64Type>)
+    SIMPLE_CONVERTER_CASE(Type::TIME64, IntegerConverter<Int64Type>)
+    SIMPLE_CONVERTER_CASE(Type::TIMESTAMP, IntegerConverter<Int64Type>)
+    SIMPLE_CONVERTER_CASE(Type::DATE64, IntegerConverter<Date64Type>)
+    SIMPLE_CONVERTER_CASE(Type::UINT8, IntegerConverter<UInt8Type>)
+    SIMPLE_CONVERTER_CASE(Type::UINT16, IntegerConverter<UInt16Type>)
+    SIMPLE_CONVERTER_CASE(Type::UINT32, IntegerConverter<UInt32Type>)
+    SIMPLE_CONVERTER_CASE(Type::UINT64, IntegerConverter<UInt64Type>)
+    SIMPLE_CONVERTER_CASE(Type::NA, NullConverter)
+    SIMPLE_CONVERTER_CASE(Type::BOOL, BooleanConverter)
+    SIMPLE_CONVERTER_CASE(Type::FLOAT, FloatConverter<FloatType>)
+    SIMPLE_CONVERTER_CASE(Type::DOUBLE, FloatConverter<DoubleType>)
+    SIMPLE_CONVERTER_CASE(Type::LIST, ListConverter)
+    SIMPLE_CONVERTER_CASE(Type::STRUCT, StructConverter)
+    SIMPLE_CONVERTER_CASE(Type::STRING, StringConverter)
+    SIMPLE_CONVERTER_CASE(Type::BINARY, StringConverter)
+    SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter)
+    SIMPLE_CONVERTER_CASE(Type::DECIMAL, DecimalConverter)
+    default: {
+      return Status::NotImplemented("JSON conversion to ", type->ToString(),
+                                    " not implemented");
+    }
+  }
+
+#undef SIMPLE_CONVERTER_CASE
+
+  RETURN_NOT_OK(res->Init());
+  *out = res;
+  return Status::OK();
+}
+
+Status ArrayFromJSON(const std::shared_ptr<DataType>& type,
+                     const util::string_view& json_string, std::shared_ptr<Array>* out) {
+  std::shared_ptr<Converter> converter;
+  RETURN_NOT_OK(GetConverter(type, &converter));
+
+  rj::Document json_doc;
+  json_doc.Parse<kParseFlags>(json_string.data(), json_string.length());
+  if (json_doc.HasParseError()) {
+    return Status::Invalid("JSON parse error at offset ", json_doc.GetErrorOffset(), ": ",
+                           GetParseError_En(json_doc.GetParseError()));
+  }
+
+  // The JSON document should be an array, append it
+  RETURN_NOT_OK(converter->AppendValues(json_doc));
+  return converter->Finish(out);
+}
+
+Status ArrayFromJSON(const std::shared_ptr<DataType>& type,
+                     const std::string& json_string, std::shared_ptr<Array>* out) {
+  return ArrayFromJSON(type, util::string_view(json_string), out);
+}
+
+Status ArrayFromJSON(const std::shared_ptr<DataType>& type, const char* json_string,
+                     std::shared_ptr<Array>* out) {
+  return ArrayFromJSON(type, util::string_view(json_string), out);
+}
+
+}  // namespace json
+}  // namespace internal
+}  // namespace ipc
+}  // namespace arrow
diff --git a/cpp/src/arrow/ipc/json-simple.h b/cpp/src/arrow/ipc/json-simple.h
new file mode 100644
index 0000000000000..da6483ff1556f
--- /dev/null
+++ b/cpp/src/arrow/ipc/json-simple.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Implement a simple JSON representation format for arrays
+
+#ifndef ARROW_IPC_JSON_SIMPLE_H
+#define ARROW_IPC_JSON_SIMPLE_H
+
+#include <memory>
+#include <string>
+
+#include "arrow/status.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+class DataType;
+
+namespace ipc {
+namespace internal {
+namespace json {
+
+ARROW_EXPORT
+Status ArrayFromJSON(const std::shared_ptr<DataType>&, const std::string& json,
+                     std::shared_ptr<Array>* out);
+
+ARROW_EXPORT
+Status ArrayFromJSON(const std::shared_ptr<DataType>&, const util::string_view& json,
+                     std::shared_ptr<Array>* out);
+
+ARROW_EXPORT
+Status ArrayFromJSON(const std::shared_ptr<DataType>&, const char* json,
+                     std::shared_ptr<Array>* out);
+
+}  // namespace json
+}  // namespace internal
+}  // namespace ipc
+}  // namespace arrow
+
+#endif  // ARROW_IPC_JSON_SIMPLE_H
diff --git a/cpp/src/arrow/ipc/ipc-json-test.cc b/cpp/src/arrow/ipc/json-test.cc
similarity index 100%
rename from cpp/src/arrow/ipc/ipc-json-test.cc
rename to cpp/src/arrow/ipc/json-test.cc
diff --git a/cpp/src/arrow/ipc/json.cc b/cpp/src/arrow/ipc/json.cc
index 394563c53c09d..61c242ca2dbbb 100644
--- a/cpp/src/arrow/ipc/json.cc
+++ b/cpp/src/arrow/ipc/json.cc
@@ -99,7 +99,7 @@ Status JsonWriter::WriteRecordBatch(const RecordBatch& batch) {
 class JsonReader::JsonReaderImpl {
  public:
   JsonReaderImpl(MemoryPool* pool, const std::shared_ptr<Buffer>& data)
-      : pool_(pool), data_(data) {}
+      : pool_(pool), data_(data), record_batches_(nullptr) {}
 
   Status ParseAndReadSchema() {
     doc_.Parse(reinterpret_cast<const rj::Document::Ch*>(data_->data()),
diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc
index 724e6255cbddb..23709a4619207 100644
--- a/cpp/src/arrow/ipc/message.cc
+++ b/cpp/src/arrow/ipc/message.cc
@@ -63,6 +63,8 @@ class Message::MessageImpl {
         return Message::RECORD_BATCH;
       case flatbuf::MessageHeader_Tensor:
         return Message::TENSOR;
+      case flatbuf::MessageHeader_SparseTensor:
+        return Message::SPARSE_TENSOR;
       default:
         return Message::NONE;
     }
@@ -153,10 +155,8 @@ Status Message::ReadFrom(const std::shared_ptr<Buffer>& metadata, io::InputStrea
   std::shared_ptr<Buffer> body;
   RETURN_NOT_OK(stream->Read(body_length, &body));
   if (body->size() < body_length) {
-    std::stringstream ss;
-    ss << "Expected to be able to read " << body_length << " bytes for message body, got "
-       << body->size();
-    return Status::IOError(ss.str());
+    return Status::IOError("Expected to be able to read ", body_length,
+                           " bytes for message body, got ", body->size());
   }
 
   return Message::Open(metadata, body, out);
@@ -171,10 +171,8 @@ Status Message::ReadFrom(const int64_t offset, const std::shared_ptr<Buffer>& me
   std::shared_ptr<Buffer> body;
   RETURN_NOT_OK(file->ReadAt(offset, body_length, &body));
   if (body->size() < body_length) {
-    std::stringstream ss;
-    ss << "Expected to be able to read " << body_length << " bytes for message body, got "
-       << body->size();
-    return Status::IOError(ss.str());
+    return Status::IOError("Expected to be able to read ", body_length,
+                           " bytes for message body, got ", body->size());
   }
 
   return Message::Open(metadata, body, out);
@@ -238,19 +236,16 @@ Status ReadMessage(int64_t offset, int32_t metadata_length, io::RandomAccessFile
   RETURN_NOT_OK(file->ReadAt(offset, metadata_length, &buffer));
 
   if (buffer->size() < metadata_length) {
-    std::stringstream ss;
-    ss << "Expected to read " << metadata_length << " metadata bytes but got "
-       << buffer->size();
-    return Status::Invalid(ss.str());
+    return Status::Invalid("Expected to read ", metadata_length,
+                           " metadata bytes but got ", buffer->size());
   }
 
   int32_t flatbuffer_size = *reinterpret_cast<const int32_t*>(buffer->data());
 
   if (flatbuffer_size + static_cast<int>(sizeof(int32_t)) > metadata_length) {
-    std::stringstream ss;
-    ss << "flatbuffer size " << metadata_length << " invalid. File offset: " << offset
-       << ", metadata length: " << metadata_length;
-    return Status::Invalid(ss.str());
+    return Status::Invalid("flatbuffer size ", metadata_length,
+                           " invalid. File offset: ", offset,
+                           ", metadata length: ", metadata_length);
   }
 
   auto metadata = SliceBuffer(buffer, 4, buffer->size() - 4);
@@ -303,10 +298,8 @@ Status ReadMessage(io::InputStream* file, std::unique_ptr<Message>* message) {
   std::shared_ptr<Buffer> metadata;
   RETURN_NOT_OK(file->Read(message_length, &metadata));
   if (metadata->size() != message_length) {
-    std::stringstream ss;
-    ss << "Expected to read " << message_length << " metadata bytes, but "
-       << "only read " << metadata->size();
-    return Status::Invalid(ss.str());
+    return Status::Invalid("Expected to read ", message_length, " metadata bytes, but ",
+                           "only read ", metadata->size());
   }
 
   return Message::ReadFrom(metadata, file, message);
diff --git a/cpp/src/arrow/ipc/message.h b/cpp/src/arrow/ipc/message.h
index 092a19ff9a0cf..760012d1a6878 100644
--- a/cpp/src/arrow/ipc/message.h
+++ b/cpp/src/arrow/ipc/message.h
@@ -70,7 +70,7 @@ constexpr int kMaxNestingDepth = 64;
 /// \brief An IPC message including metadata and body
 class ARROW_EXPORT Message {
  public:
-  enum Type { NONE, SCHEMA, DICTIONARY_BATCH, RECORD_BATCH, TENSOR };
+  enum Type { NONE, SCHEMA, DICTIONARY_BATCH, RECORD_BATCH, TENSOR, SPARSE_TENSOR };
 
   /// \brief Construct message, but do not validate
   ///
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index ef189c8ae617a..da6711395f8ea 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -31,6 +31,7 @@
 #include "arrow/ipc/Tensor_generated.h"  // IWYU pragma: keep
 #include "arrow/ipc/message.h"
 #include "arrow/ipc/util.h"
+#include "arrow/sparse_tensor.h"
 #include "arrow/status.h"
 #include "arrow/tensor.h"
 #include "arrow/type.h"
@@ -50,6 +51,7 @@ using DictionaryOffset = flatbuffers::Offset<flatbuf::DictionaryEncoding>;
 using FieldOffset = flatbuffers::Offset<flatbuf::Field>;
 using KeyValueOffset = flatbuffers::Offset<flatbuf::KeyValue>;
 using RecordBatchOffset = flatbuffers::Offset<flatbuf::RecordBatch>;
+using SparseTensorOffset = flatbuffers::Offset<flatbuf::SparseTensor>;
 using Offset = flatbuffers::Offset<void>;
 using FBString = flatbuffers::Offset<flatbuffers::String>;
 
@@ -443,9 +445,7 @@ static Status TypeToFlatbuffer(FBB& fbb, const DataType& type,
       return UnionToFlatBuffer(fbb, *value_type, children, dictionary_memo, offset);
     default:
       *out_type = flatbuf::Type_NONE;  // Make clang-tidy happy
-      std::stringstream ss;
-      ss << "Unable to convert type: " << type.ToString() << std::endl;
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("Unable to convert type: ", type.ToString());
   }
   return Status::OK();
 }
@@ -483,9 +483,7 @@ static Status TensorTypeToFlatbuffer(FBB& fbb, const DataType& type,
       break;
     default:
       *out_type = flatbuf::Type_NONE;  // Make clang-tidy happy
-      std::stringstream ss;
-      ss << "Unable to convert type: " << type.ToString() << std::endl;
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("Unable to convert type: ", type.ToString());
   }
   return Status::OK();
 }
@@ -785,6 +783,106 @@ Status WriteTensorMessage(const Tensor& tensor, int64_t buffer_start_offset,
                         body_length, out);
 }
 
+Status MakeSparseTensorIndexCOO(FBB& fbb, const SparseCOOIndex& sparse_index,
+                                const std::vector<BufferMetadata>& buffers,
+                                flatbuf::SparseTensorIndex* fb_sparse_index_type,
+                                Offset* fb_sparse_index, size_t* num_buffers) {
+  *fb_sparse_index_type = flatbuf::SparseTensorIndex_SparseTensorIndexCOO;
+  const BufferMetadata& indices_metadata = buffers[0];
+  flatbuf::Buffer indices(indices_metadata.offset, indices_metadata.length);
+  *fb_sparse_index = flatbuf::CreateSparseTensorIndexCOO(fbb, &indices).Union();
+  *num_buffers = 1;
+  return Status::OK();
+}
+
+Status MakeSparseMatrixIndexCSR(FBB& fbb, const SparseCSRIndex& sparse_index,
+                                const std::vector<BufferMetadata>& buffers,
+                                flatbuf::SparseTensorIndex* fb_sparse_index_type,
+                                Offset* fb_sparse_index, size_t* num_buffers) {
+  *fb_sparse_index_type = flatbuf::SparseTensorIndex_SparseMatrixIndexCSR;
+  const BufferMetadata& indptr_metadata = buffers[0];
+  const BufferMetadata& indices_metadata = buffers[1];
+  flatbuf::Buffer indptr(indptr_metadata.offset, indptr_metadata.length);
+  flatbuf::Buffer indices(indices_metadata.offset, indices_metadata.length);
+  *fb_sparse_index = flatbuf::CreateSparseMatrixIndexCSR(fbb, &indptr, &indices).Union();
+  *num_buffers = 2;
+  return Status::OK();
+}
+
+Status MakeSparseTensorIndex(FBB& fbb, const SparseIndex& sparse_index,
+                             const std::vector<BufferMetadata>& buffers,
+                             flatbuf::SparseTensorIndex* fb_sparse_index_type,
+                             Offset* fb_sparse_index, size_t* num_buffers) {
+  switch (sparse_index.format_id()) {
+    case SparseTensorFormat::COO:
+      RETURN_NOT_OK(MakeSparseTensorIndexCOO(
+          fbb, checked_cast<const SparseCOOIndex&>(sparse_index), buffers,
+          fb_sparse_index_type, fb_sparse_index, num_buffers));
+      break;
+
+    case SparseTensorFormat::CSR:
+      RETURN_NOT_OK(MakeSparseMatrixIndexCSR(
+          fbb, checked_cast<const SparseCSRIndex&>(sparse_index), buffers,
+          fb_sparse_index_type, fb_sparse_index, num_buffers));
+      break;
+
+    default:
+      std::stringstream ss;
+      ss << "Unsupporoted sparse tensor format:: " << sparse_index.ToString()
+         << std::endl;
+      return Status::NotImplemented(ss.str());
+  }
+
+  return Status::OK();
+}
+
+Status MakeSparseTensor(FBB& fbb, const SparseTensor& sparse_tensor, int64_t body_length,
+                        const std::vector<BufferMetadata>& buffers,
+                        SparseTensorOffset* offset) {
+  flatbuf::Type fb_type_type;
+  Offset fb_type;
+  RETURN_NOT_OK(
+      TensorTypeToFlatbuffer(fbb, *sparse_tensor.type(), &fb_type_type, &fb_type));
+
+  using TensorDimOffset = flatbuffers::Offset<flatbuf::TensorDim>;
+  std::vector<TensorDimOffset> dims;
+  for (int i = 0; i < sparse_tensor.ndim(); ++i) {
+    FBString name = fbb.CreateString(sparse_tensor.dim_name(i));
+    dims.push_back(flatbuf::CreateTensorDim(fbb, sparse_tensor.shape()[i], name));
+  }
+
+  auto fb_shape = fbb.CreateVector(dims);
+
+  flatbuf::SparseTensorIndex fb_sparse_index_type;
+  Offset fb_sparse_index;
+  size_t num_index_buffers = 0;
+  RETURN_NOT_OK(MakeSparseTensorIndex(fbb, *sparse_tensor.sparse_index(), buffers,
+                                      &fb_sparse_index_type, &fb_sparse_index,
+                                      &num_index_buffers));
+
+  const BufferMetadata& data_metadata = buffers[num_index_buffers];
+  flatbuf::Buffer data(data_metadata.offset, data_metadata.length);
+
+  const int64_t non_zero_length = sparse_tensor.non_zero_length();
+
+  *offset =
+      flatbuf::CreateSparseTensor(fbb, fb_type_type, fb_type, fb_shape, non_zero_length,
+                                  fb_sparse_index_type, fb_sparse_index, &data);
+
+  return Status::OK();
+}
+
+Status WriteSparseTensorMessage(const SparseTensor& sparse_tensor, int64_t body_length,
+                                const std::vector<BufferMetadata>& buffers,
+                                std::shared_ptr<Buffer>* out) {
+  FBB fbb;
+  SparseTensorOffset fb_sparse_tensor;
+  RETURN_NOT_OK(
+      MakeSparseTensor(fbb, sparse_tensor, body_length, buffers, &fb_sparse_tensor));
+  return WriteFBMessage(fbb, flatbuf::MessageHeader_SparseTensor,
+                        fb_sparse_tensor.Union(), body_length, out);
+}
+
 Status WriteDictionaryMessage(int64_t id, int64_t length, int64_t body_length,
                               const std::vector<FieldMetadata>& nodes,
                               const std::vector<BufferMetadata>& buffers,
@@ -937,6 +1035,52 @@ Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type
   return TypeFromFlatbuffer(tensor->type_type(), tensor->type(), {}, type);
 }
 
+Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type,
+                               std::vector<int64_t>* shape,
+                               std::vector<std::string>* dim_names,
+                               int64_t* non_zero_length,
+                               SparseTensorFormat::type* sparse_tensor_format_id) {
+  auto message = flatbuf::GetMessage(metadata.data());
+  if (message->header_type() != flatbuf::MessageHeader_SparseTensor) {
+    return Status::IOError("Header of flatbuffer-encoded Message is not SparseTensor.");
+  }
+  if (message->header() == nullptr) {
+    return Status::IOError("Header-pointer of flatbuffer-encoded Message is null.");
+  }
+
+  auto sparse_tensor = reinterpret_cast<const flatbuf::SparseTensor*>(message->header());
+  int ndim = static_cast<int>(sparse_tensor->shape()->size());
+
+  for (int i = 0; i < ndim; ++i) {
+    auto dim = sparse_tensor->shape()->Get(i);
+
+    shape->push_back(dim->size());
+    auto fb_name = dim->name();
+    if (fb_name == 0) {
+      dim_names->push_back("");
+    } else {
+      dim_names->push_back(fb_name->str());
+    }
+  }
+
+  *non_zero_length = sparse_tensor->non_zero_length();
+
+  switch (sparse_tensor->sparseIndex_type()) {
+    case flatbuf::SparseTensorIndex_SparseTensorIndexCOO:
+      *sparse_tensor_format_id = SparseTensorFormat::COO;
+      break;
+
+    case flatbuf::SparseTensorIndex_SparseMatrixIndexCSR:
+      *sparse_tensor_format_id = SparseTensorFormat::CSR;
+      break;
+
+    default:
+      return Status::Invalid("Unrecognized sparse index type");
+  }
+
+  return TypeFromFlatbuffer(sparse_tensor->type_type(), sparse_tensor->type(), {}, type);
+}
+
 // ----------------------------------------------------------------------
 // Implement message writing
 
diff --git a/cpp/src/arrow/ipc/metadata-internal.h b/cpp/src/arrow/ipc/metadata-internal.h
index 152ca1367ec0e..6562382b878e6 100644
--- a/cpp/src/arrow/ipc/metadata-internal.h
+++ b/cpp/src/arrow/ipc/metadata-internal.h
@@ -33,6 +33,7 @@
 #include "arrow/ipc/dictionary.h"  // IYWU pragma: keep
 #include "arrow/ipc/message.h"
 #include "arrow/memory_pool.h"
+#include "arrow/sparse_tensor.h"
 #include "arrow/status.h"
 
 namespace arrow {
@@ -40,6 +41,7 @@ namespace arrow {
 class DataType;
 class Schema;
 class Tensor;
+class SparseTensor;
 
 namespace flatbuf = org::apache::arrow::flatbuf;
 
@@ -103,6 +105,12 @@ Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type
                          std::vector<int64_t>* shape, std::vector<int64_t>* strides,
                          std::vector<std::string>* dim_names);
 
+// EXPERIMENTAL: Extracting metadata of a sparse tensor from the message
+Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type,
+                               std::vector<int64_t>* shape,
+                               std::vector<std::string>* dim_names, int64_t* length,
+                               SparseTensorFormat::type* sparse_tensor_format_id);
+
 /// Write a serialized message metadata with a length-prefix and padding to an
 /// 8-byte offset. Does not make assumptions about whether the stream is
 /// aligned already
@@ -137,6 +145,10 @@ Status WriteRecordBatchMessage(const int64_t length, const int64_t body_length,
 Status WriteTensorMessage(const Tensor& tensor, const int64_t buffer_start_offset,
                           std::shared_ptr<Buffer>* out);
 
+Status WriteSparseTensorMessage(const SparseTensor& sparse_tensor, int64_t body_length,
+                                const std::vector<BufferMetadata>& buffers,
+                                std::shared_ptr<Buffer>* out);
+
 Status WriteFileFooter(const Schema& schema, const std::vector<FileBlock>& dictionaries,
                        const std::vector<FileBlock>& record_batches,
                        DictionaryMemo* dictionary_memo, io::OutputStream* out);
diff --git a/cpp/src/arrow/ipc/ipc-read-write-benchmark.cc b/cpp/src/arrow/ipc/read-write-benchmark.cc
similarity index 100%
rename from cpp/src/arrow/ipc/ipc-read-write-benchmark.cc
rename to cpp/src/arrow/ipc/read-write-benchmark.cc
diff --git a/cpp/src/arrow/ipc/ipc-read-write-test.cc b/cpp/src/arrow/ipc/read-write-test.cc
similarity index 86%
rename from cpp/src/arrow/ipc/ipc-read-write-test.cc
rename to cpp/src/arrow/ipc/read-write-test.cc
index 31a9d474fe98a..bc27386f34f30 100644
--- a/cpp/src/arrow/ipc/ipc-read-write-test.cc
+++ b/cpp/src/arrow/ipc/read-write-test.cc
@@ -38,6 +38,7 @@
 #include "arrow/ipc/writer.h"
 #include "arrow/memory_pool.h"
 #include "arrow/record_batch.h"
+#include "arrow/sparse_tensor.h"
 #include "arrow/status.h"
 #include "arrow/tensor.h"
 #include "arrow/test-util.h"
@@ -657,16 +658,7 @@ class TestStreamFormat : public ::testing::TestWithParam<MakeRecordBatch*> {
 
     std::shared_ptr<RecordBatchReader> reader;
     RETURN_NOT_OK(RecordBatchStreamReader::Open(&buf_reader, &reader));
-
-    std::shared_ptr<RecordBatch> chunk;
-    while (true) {
-      RETURN_NOT_OK(reader->ReadNext(&chunk));
-      if (chunk == nullptr) {
-        break;
-      }
-      out_batches->emplace_back(chunk);
-    }
-    return Status::OK();
+    return reader->ReadAll(out_batches);
   }
 
  protected:
@@ -853,6 +845,117 @@ TEST_F(TestTensorRoundTrip, NonContiguous) {
   CheckTensorRoundTrip(tensor);
 }
 
+class TestSparseTensorRoundTrip : public ::testing::Test, public IpcTestFixture {
+ public:
+  void SetUp() { pool_ = default_memory_pool(); }
+  void TearDown() { io::MemoryMapFixture::TearDown(); }
+
+  template <typename SparseIndexType>
+  void CheckSparseTensorRoundTrip(const SparseTensorImpl<SparseIndexType>& tensor) {
+    GTEST_FAIL();
+  }
+};
+
+template <>
+void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCOOIndex>(
+    const SparseTensorImpl<SparseCOOIndex>& tensor) {
+  const auto& type = checked_cast<const FixedWidthType&>(*tensor.type());
+  const int elem_size = type.bit_width() / 8;
+
+  int32_t metadata_length;
+  int64_t body_length;
+
+  ASSERT_OK(mmap_->Seek(0));
+
+  ASSERT_OK(WriteSparseTensor(tensor, mmap_.get(), &metadata_length, &body_length,
+                              default_memory_pool()));
+
+  const auto& sparse_index = checked_cast<const SparseCOOIndex&>(*tensor.sparse_index());
+  const int64_t indices_length = elem_size * sparse_index.indices()->size();
+  const int64_t data_length = elem_size * tensor.non_zero_length();
+  const int64_t expected_body_length = indices_length + data_length;
+  ASSERT_EQ(expected_body_length, body_length);
+
+  ASSERT_OK(mmap_->Seek(0));
+
+  std::shared_ptr<SparseTensor> result;
+  ASSERT_OK(ReadSparseTensor(mmap_.get(), &result));
+
+  const auto& resulted_sparse_index =
+      checked_cast<const SparseCOOIndex&>(*result->sparse_index());
+  ASSERT_EQ(resulted_sparse_index.indices()->data()->size(), indices_length);
+  ASSERT_EQ(result->data()->size(), data_length);
+  ASSERT_TRUE(result->Equals(*result));
+}
+
+template <>
+void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCSRIndex>(
+    const SparseTensorImpl<SparseCSRIndex>& tensor) {
+  const auto& type = checked_cast<const FixedWidthType&>(*tensor.type());
+  const int elem_size = type.bit_width() / 8;
+
+  int32_t metadata_length;
+  int64_t body_length;
+
+  ASSERT_OK(mmap_->Seek(0));
+
+  ASSERT_OK(WriteSparseTensor(tensor, mmap_.get(), &metadata_length, &body_length,
+                              default_memory_pool()));
+
+  const auto& sparse_index = checked_cast<const SparseCSRIndex&>(*tensor.sparse_index());
+  const int64_t indptr_length = elem_size * sparse_index.indptr()->size();
+  const int64_t indices_length = elem_size * sparse_index.indices()->size();
+  const int64_t data_length = elem_size * tensor.non_zero_length();
+  const int64_t expected_body_length = indptr_length + indices_length + data_length;
+  ASSERT_EQ(expected_body_length, body_length);
+
+  ASSERT_OK(mmap_->Seek(0));
+
+  std::shared_ptr<SparseTensor> result;
+  ASSERT_OK(ReadSparseTensor(mmap_.get(), &result));
+
+  const auto& resulted_sparse_index =
+      checked_cast<const SparseCSRIndex&>(*result->sparse_index());
+  ASSERT_EQ(resulted_sparse_index.indptr()->data()->size(), indptr_length);
+  ASSERT_EQ(resulted_sparse_index.indices()->data()->size(), indices_length);
+  ASSERT_EQ(result->data()->size(), data_length);
+  ASSERT_TRUE(result->Equals(*result));
+}
+
+TEST_F(TestSparseTensorRoundTrip, WithSparseCOOIndex) {
+  std::string path = "test-write-sparse-coo-tensor";
+  constexpr int64_t kBufferSize = 1 << 20;
+  ASSERT_OK(io::MemoryMapFixture::InitMemoryMap(kBufferSize, path, &mmap_));
+
+  std::vector<int64_t> shape = {2, 3, 4};
+  std::vector<std::string> dim_names = {"foo", "bar", "baz"};
+  std::vector<int64_t> values = {1, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
+                                 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
+
+  auto data = Buffer::Wrap(values);
+  NumericTensor<Int64Type> t(data, shape, {}, dim_names);
+  SparseTensorImpl<SparseCOOIndex> st(t);
+
+  CheckSparseTensorRoundTrip(st);
+}
+
+TEST_F(TestSparseTensorRoundTrip, WithSparseCSRIndex) {
+  std::string path = "test-write-sparse-csr-matrix";
+  constexpr int64_t kBufferSize = 1 << 20;
+  ASSERT_OK(io::MemoryMapFixture::InitMemoryMap(kBufferSize, path, &mmap_));
+
+  std::vector<int64_t> shape = {4, 6};
+  std::vector<std::string> dim_names = {"foo", "bar", "baz"};
+  std::vector<int64_t> values = {1, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
+                                 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
+
+  auto data = Buffer::Wrap(values);
+  NumericTensor<Int64Type> t(data, shape, {}, dim_names);
+  SparseTensorImpl<SparseCSRIndex> st(t);
+
+  CheckSparseTensorRoundTrip(st);
+}
+
 TEST(TestRecordBatchStreamReader, MalformedInput) {
   const std::string empty_str = "";
   const std::string garbage_str = "12345678";
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 65f5d963e88db..1f04fad81743c 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -38,6 +38,7 @@
 #include "arrow/ipc/message.h"
 #include "arrow/ipc/metadata-internal.h"
 #include "arrow/record_batch.h"
+#include "arrow/sparse_tensor.h"
 #include "arrow/status.h"
 #include "arrow/tensor.h"
 #include "arrow/type.h"
@@ -225,9 +226,7 @@ class ArrayLoader {
 
     const int num_children = type.num_children();
     if (num_children != 1) {
-      std::stringstream ss;
-      ss << "Wrong number of children: " << num_children;
-      return Status::Invalid(ss.str());
+      return Status::Invalid("Wrong number of children: ", num_children);
     }
 
     return LoadChildren(type.children());
@@ -343,9 +342,7 @@ Status ReadDictionary(const Buffer& metadata, const DictionaryTypeMap& dictionar
   int64_t id = *dictionary_id = dictionary_batch->id();
   auto it = dictionary_types.find(id);
   if (it == dictionary_types.end()) {
-    std::stringstream ss;
-    ss << "Do not have type metadata for dictionary with id: " << id;
-    return Status::KeyError(ss.str());
+    return Status::KeyError("Do not have type metadata for dictionary with id: ", id);
   }
 
   std::vector<std::shared_ptr<Field>> fields = {it->second};
@@ -372,10 +369,8 @@ static Status ReadMessageAndValidate(MessageReader* reader, Message::Type expect
   RETURN_NOT_OK(reader->ReadNextMessage(message));
 
   if (!(*message) && !allow_null) {
-    std::stringstream ss;
-    ss << "Expected " << FormatMessageType(expected_type)
-       << " message in stream, was null or length 0";
-    return Status::Invalid(ss.str());
+    return Status::Invalid("Expected ", FormatMessageType(expected_type),
+                           " message in stream, was null or length 0");
   }
 
   if ((*message) == nullptr) {
@@ -383,10 +378,9 @@ static Status ReadMessageAndValidate(MessageReader* reader, Message::Type expect
   }
 
   if ((*message)->type() != expected_type) {
-    std::stringstream ss;
-    ss << "Message not expected type: " << FormatMessageType(expected_type)
-       << ", was: " << (*message)->type();
-    return Status::IOError(ss.str());
+    return Status::IOError(
+        "Message not expected type: ", FormatMessageType(expected_type),
+        ", was: ", (*message)->type());
   }
   return Status::OK();
 }
@@ -506,15 +500,15 @@ Status RecordBatchStreamReader::ReadNext(std::shared_ptr<RecordBatch>* batch) {
 
 class RecordBatchFileReader::RecordBatchFileReaderImpl {
  public:
-  RecordBatchFileReaderImpl() { dictionary_memo_ = std::make_shared<DictionaryMemo>(); }
+  RecordBatchFileReaderImpl() : file_(NULLPTR), footer_offset_(0), footer_(NULLPTR) {
+    dictionary_memo_ = std::make_shared<DictionaryMemo>();
+  }
 
   Status ReadFooter() {
     int magic_size = static_cast<int>(strlen(kArrowMagicBytes));
 
     if (footer_offset_ <= magic_size * 2 + 4) {
-      std::stringstream ss;
-      ss << "File is too small: " << footer_offset_;
-      return Status::Invalid(ss.str());
+      return Status::Invalid("File is too small: ", footer_offset_);
     }
 
     std::shared_ptr<Buffer> buffer;
@@ -523,9 +517,7 @@ class RecordBatchFileReader::RecordBatchFileReaderImpl {
 
     const int64_t expected_footer_size = magic_size + sizeof(int32_t);
     if (buffer->size() < expected_footer_size) {
-      std::stringstream ss;
-      ss << "Unable to read " << expected_footer_size << "from end of file";
-      return Status::Invalid(ss.str());
+      return Status::Invalid("Unable to read ", expected_footer_size, "from end of file");
     }
 
     if (memcmp(buffer->data() + sizeof(int32_t), kArrowMagicBytes, magic_size)) {
@@ -709,6 +701,12 @@ Status ReadSchema(io::InputStream* stream, std::shared_ptr<Schema>* out) {
   return Status::OK();
 }
 
+Status ReadSchema(const Message& message, std::shared_ptr<Schema>* out) {
+  std::shared_ptr<RecordBatchReader> reader;
+  DictionaryMemo dictionary_memo;
+  return internal::GetSchema(message.header(), dictionary_memo, &*out);
+}
+
 Status ReadRecordBatch(const std::shared_ptr<Schema>& schema, io::InputStream* file,
                        std::shared_ptr<RecordBatch>* out) {
   std::unique_ptr<Message> message;
@@ -735,5 +733,123 @@ Status ReadTensor(const Message& message, std::shared_ptr<Tensor>* out) {
   return Status::OK();
 }
 
+namespace {
+
+Status ReadSparseCOOIndex(const flatbuf::SparseTensor* sparse_tensor, int64_t ndim,
+                          int64_t non_zero_length, io::RandomAccessFile* file,
+                          std::shared_ptr<SparseIndex>* out) {
+  auto* sparse_index = sparse_tensor->sparseIndex_as_SparseTensorIndexCOO();
+  auto* indices_buffer = sparse_index->indicesBuffer();
+  std::shared_ptr<Buffer> indices_data;
+  RETURN_NOT_OK(
+      file->ReadAt(indices_buffer->offset(), indices_buffer->length(), &indices_data));
+  std::vector<int64_t> shape({non_zero_length, ndim});
+  const int64_t elsize = sizeof(int64_t);
+  std::vector<int64_t> strides({elsize, elsize * non_zero_length});
+  *out = std::make_shared<SparseCOOIndex>(
+      std::make_shared<SparseCOOIndex::CoordsTensor>(indices_data, shape, strides));
+  return Status::OK();
+}
+
+Status ReadSparseCSRIndex(const flatbuf::SparseTensor* sparse_tensor, int64_t ndim,
+                          int64_t non_zero_length, io::RandomAccessFile* file,
+                          std::shared_ptr<SparseIndex>* out) {
+  auto* sparse_index = sparse_tensor->sparseIndex_as_SparseMatrixIndexCSR();
+
+  auto* indptr_buffer = sparse_index->indptrBuffer();
+  std::shared_ptr<Buffer> indptr_data;
+  RETURN_NOT_OK(
+      file->ReadAt(indptr_buffer->offset(), indptr_buffer->length(), &indptr_data));
+
+  auto* indices_buffer = sparse_index->indicesBuffer();
+  std::shared_ptr<Buffer> indices_data;
+  RETURN_NOT_OK(
+      file->ReadAt(indices_buffer->offset(), indices_buffer->length(), &indices_data));
+
+  std::vector<int64_t> indptr_shape({ndim + 1});
+  std::vector<int64_t> indices_shape({non_zero_length});
+  *out = std::make_shared<SparseCSRIndex>(
+      std::make_shared<SparseCSRIndex::IndexTensor>(indptr_data, indptr_shape),
+      std::make_shared<SparseCSRIndex::IndexTensor>(indices_data, indices_shape));
+  return Status::OK();
+}
+
+Status MakeSparseTensorWithSparseCOOIndex(
+    const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
+    const std::vector<std::string>& dim_names,
+    const std::shared_ptr<SparseCOOIndex>& sparse_index, int64_t non_zero_length,
+    const std::shared_ptr<Buffer>& data, std::shared_ptr<SparseTensor>* out) {
+  *out = std::make_shared<SparseTensorImpl<SparseCOOIndex>>(sparse_index, type, data,
+                                                            shape, dim_names);
+  return Status::OK();
+}
+
+Status MakeSparseTensorWithSparseCSRIndex(
+    const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
+    const std::vector<std::string>& dim_names,
+    const std::shared_ptr<SparseCSRIndex>& sparse_index, int64_t non_zero_length,
+    const std::shared_ptr<Buffer>& data, std::shared_ptr<SparseTensor>* out) {
+  *out = std::make_shared<SparseTensorImpl<SparseCSRIndex>>(sparse_index, type, data,
+                                                            shape, dim_names);
+  return Status::OK();
+}
+
+}  // namespace
+
+Status ReadSparseTensor(const Buffer& metadata, io::RandomAccessFile* file,
+                        std::shared_ptr<SparseTensor>* out) {
+  std::shared_ptr<DataType> type;
+  std::vector<int64_t> shape;
+  std::vector<std::string> dim_names;
+  int64_t non_zero_length;
+  SparseTensorFormat::type sparse_tensor_format_id;
+
+  RETURN_NOT_OK(internal::GetSparseTensorMetadata(
+      metadata, &type, &shape, &dim_names, &non_zero_length, &sparse_tensor_format_id));
+
+  auto message = flatbuf::GetMessage(metadata.data());
+  auto sparse_tensor = reinterpret_cast<const flatbuf::SparseTensor*>(message->header());
+  const flatbuf::Buffer* buffer = sparse_tensor->data();
+  DCHECK(BitUtil::IsMultipleOf8(buffer->offset()))
+      << "Buffer of sparse index data "
+      << "did not start on 8-byte aligned offset: " << buffer->offset();
+
+  std::shared_ptr<Buffer> data;
+  RETURN_NOT_OK(file->ReadAt(buffer->offset(), buffer->length(), &data));
+
+  std::shared_ptr<SparseIndex> sparse_index;
+  switch (sparse_tensor_format_id) {
+    case SparseTensorFormat::COO:
+      RETURN_NOT_OK(ReadSparseCOOIndex(sparse_tensor, shape.size(), non_zero_length, file,
+                                       &sparse_index));
+      return MakeSparseTensorWithSparseCOOIndex(
+          type, shape, dim_names, std::dynamic_pointer_cast<SparseCOOIndex>(sparse_index),
+          non_zero_length, data, out);
+
+    case SparseTensorFormat::CSR:
+      RETURN_NOT_OK(ReadSparseCSRIndex(sparse_tensor, shape.size(), non_zero_length, file,
+                                       &sparse_index));
+      return MakeSparseTensorWithSparseCSRIndex(
+          type, shape, dim_names, std::dynamic_pointer_cast<SparseCSRIndex>(sparse_index),
+          non_zero_length, data, out);
+
+    default:
+      return Status::Invalid("Unsupported sparse index format");
+  }
+}
+
+Status ReadSparseTensor(const Message& message, std::shared_ptr<SparseTensor>* out) {
+  io::BufferReader buffer_reader(message.body());
+  return ReadSparseTensor(*message.metadata(), &buffer_reader, out);
+}
+
+Status ReadSparseTensor(io::InputStream* file, std::shared_ptr<SparseTensor>* out) {
+  std::unique_ptr<Message> message;
+  RETURN_NOT_OK(ReadContiguousPayload(file, &message));
+  DCHECK_EQ(message->type(), Message::SPARSE_TENSOR);
+  io::BufferReader buffer_reader(message->body());
+  return ReadSparseTensor(*message->metadata(), &buffer_reader, out);
+}
+
 }  // namespace ipc
 }  // namespace arrow
diff --git a/cpp/src/arrow/ipc/reader.h b/cpp/src/arrow/ipc/reader.h
index 942664d6f2269..641de3eaf7b41 100644
--- a/cpp/src/arrow/ipc/reader.h
+++ b/cpp/src/arrow/ipc/reader.h
@@ -33,6 +33,7 @@ class Buffer;
 class Schema;
 class Status;
 class Tensor;
+class SparseTensor;
 
 namespace io {
 
@@ -174,6 +175,14 @@ class ARROW_EXPORT RecordBatchFileReader {
 ARROW_EXPORT
 Status ReadSchema(io::InputStream* stream, std::shared_ptr<Schema>* out);
 
+/// \brief Read Schema from encapsulated Message
+///
+/// \param[in] message a message instance containing metadata
+/// \param[out] out the resulting Schema
+/// \return Status
+ARROW_EXPORT
+Status ReadSchema(const Message& message, std::shared_ptr<Schema>* out);
+
 /// Read record batch as encapsulated IPC message with metadata size prefix and
 /// header
 ///
@@ -235,6 +244,22 @@ Status ReadTensor(io::InputStream* file, std::shared_ptr<Tensor>* out);
 ARROW_EXPORT
 Status ReadTensor(const Message& message, std::shared_ptr<Tensor>* out);
 
+/// \brief EXPERIMETNAL: Read arrow::SparseTensor as encapsulated IPC message in file
+///
+/// \param[in] file an InputStream pointed at the start of the message
+/// \param[out] out the read sparse tensor
+/// \return Status
+ARROW_EXPORT
+Status ReadSparseTensor(io::InputStream* file, std::shared_ptr<SparseTensor>* out);
+
+/// \brief EXPERIMENTAL: Read arrow::SparseTensor from IPC message
+///
+/// \param[in] message a Message containing the tensor metadata and body
+/// \param[out] out the read sparse tensor
+/// \return Status
+ARROW_EXPORT
+Status ReadSparseTensor(const Message& message, std::shared_ptr<SparseTensor>* out);
+
 }  // namespace ipc
 }  // namespace arrow
 
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index 8225cce7b8131..1eb91998b5a93 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -21,6 +21,7 @@
 #include <cstdint>
 #include <cstring>
 #include <limits>
+#include <sstream>
 #include <vector>
 
 #include "arrow/array.h"
@@ -33,6 +34,7 @@
 #include "arrow/ipc/util.h"
 #include "arrow/memory_pool.h"
 #include "arrow/record_batch.h"
+#include "arrow/sparse_tensor.h"
 #include "arrow/status.h"
 #include "arrow/table.h"
 #include "arrow/tensor.h"
@@ -522,6 +524,15 @@ Status WriteIpcPayload(const IpcPayload& payload, io::OutputStream* dst,
   return Status::OK();
 }
 
+Status GetSchemaPayload(const Schema& schema, MemoryPool* pool,
+                        DictionaryMemo* dictionary_memo, IpcPayload* out) {
+  out->type = Message::Type::SCHEMA;
+  out->body_buffers.clear();
+  out->body_length = 0;
+  RETURN_NOT_OK(SerializeSchema(schema, pool, &out->metadata));
+  return WriteSchemaMessage(schema, dictionary_memo, &out->metadata);
+}
+
 Status GetRecordBatchPayload(const RecordBatch& batch, MemoryPool* pool,
                              IpcPayload* out) {
   RecordBatchSerializer writer(pool, 0, kMaxNestingDepth, true, out);
@@ -671,6 +682,105 @@ Status GetTensorMessage(const Tensor& tensor, MemoryPool* pool,
   return Status::OK();
 }
 
+namespace internal {
+
+class SparseTensorSerializer {
+ public:
+  SparseTensorSerializer(int64_t buffer_start_offset, IpcPayload* out)
+      : out_(out), buffer_start_offset_(buffer_start_offset) {}
+
+  ~SparseTensorSerializer() = default;
+
+  Status VisitSparseIndex(const SparseIndex& sparse_index) {
+    switch (sparse_index.format_id()) {
+      case SparseTensorFormat::COO:
+        RETURN_NOT_OK(
+            VisitSparseCOOIndex(checked_cast<const SparseCOOIndex&>(sparse_index)));
+        break;
+
+      case SparseTensorFormat::CSR:
+        RETURN_NOT_OK(
+            VisitSparseCSRIndex(checked_cast<const SparseCSRIndex&>(sparse_index)));
+        break;
+
+      default:
+        std::stringstream ss;
+        ss << "Unable to convert type: " << sparse_index.ToString() << std::endl;
+        return Status::NotImplemented(ss.str());
+    }
+
+    return Status::OK();
+  }
+
+  Status SerializeMetadata(const SparseTensor& sparse_tensor) {
+    return WriteSparseTensorMessage(sparse_tensor, out_->body_length, buffer_meta_,
+                                    &out_->metadata);
+  }
+
+  Status Assemble(const SparseTensor& sparse_tensor) {
+    if (buffer_meta_.size() > 0) {
+      buffer_meta_.clear();
+      out_->body_buffers.clear();
+    }
+
+    RETURN_NOT_OK(VisitSparseIndex(*sparse_tensor.sparse_index()));
+    out_->body_buffers.emplace_back(sparse_tensor.data());
+
+    int64_t offset = buffer_start_offset_;
+    buffer_meta_.reserve(out_->body_buffers.size());
+
+    for (size_t i = 0; i < out_->body_buffers.size(); ++i) {
+      const Buffer* buffer = out_->body_buffers[i].get();
+      int64_t size = buffer->size();
+      int64_t padding = BitUtil::RoundUpToMultipleOf8(size) - size;
+      buffer_meta_.push_back({offset, size + padding});
+      offset += size + padding;
+    }
+
+    out_->body_length = offset - buffer_start_offset_;
+    DCHECK(BitUtil::IsMultipleOf8(out_->body_length));
+
+    return SerializeMetadata(sparse_tensor);
+  }
+
+ private:
+  Status VisitSparseCOOIndex(const SparseCOOIndex& sparse_index) {
+    out_->body_buffers.emplace_back(sparse_index.indices()->data());
+    return Status::OK();
+  }
+
+  Status VisitSparseCSRIndex(const SparseCSRIndex& sparse_index) {
+    out_->body_buffers.emplace_back(sparse_index.indptr()->data());
+    out_->body_buffers.emplace_back(sparse_index.indices()->data());
+    return Status::OK();
+  }
+
+  IpcPayload* out_;
+
+  std::vector<internal::BufferMetadata> buffer_meta_;
+
+  int64_t buffer_start_offset_;
+};
+
+Status GetSparseTensorPayload(const SparseTensor& sparse_tensor, MemoryPool* pool,
+                              IpcPayload* out) {
+  SparseTensorSerializer writer(0, out);
+  return writer.Assemble(sparse_tensor);
+}
+
+}  // namespace internal
+
+Status WriteSparseTensor(const SparseTensor& sparse_tensor, io::OutputStream* dst,
+                         int32_t* metadata_length, int64_t* body_length,
+                         MemoryPool* pool) {
+  internal::IpcPayload payload;
+  internal::SparseTensorSerializer writer(0, &payload);
+  RETURN_NOT_OK(writer.Assemble(sparse_tensor));
+
+  *body_length = payload.body_length;
+  return internal::WriteIpcPayload(payload, dst, metadata_length);
+}
+
 Status WriteDictionary(int64_t dictionary_id, const std::shared_ptr<Array>& dictionary,
                        int64_t buffer_start_offset, io::OutputStream* dst,
                        int32_t* metadata_length, int64_t* body_length, MemoryPool* pool) {
@@ -772,7 +882,10 @@ class SchemaWriter : public StreamBookKeeper {
  public:
   SchemaWriter(const Schema& schema, DictionaryMemo* dictionary_memo, MemoryPool* pool,
                io::OutputStream* sink)
-      : StreamBookKeeper(sink), schema_(schema), dictionary_memo_(dictionary_memo) {}
+      : StreamBookKeeper(sink),
+        pool_(pool),
+        schema_(schema),
+        dictionary_memo_(dictionary_memo) {}
 
   Status WriteSchema() {
 #ifndef NDEBUG
@@ -949,6 +1062,10 @@ class RecordBatchFileWriter::RecordBatchFileWriterImpl
   }
 
   Status Close() override {
+    // Write the schema if not already written
+    // User is responsible for closing the OutputStream
+    RETURN_NOT_OK(CheckStarted());
+
     // Write metadata
     RETURN_NOT_OK(UpdatePosition());
 
diff --git a/cpp/src/arrow/ipc/writer.h b/cpp/src/arrow/ipc/writer.h
index a1c711146efe8..5b099d59c0ef0 100644
--- a/cpp/src/arrow/ipc/writer.h
+++ b/cpp/src/arrow/ipc/writer.h
@@ -30,12 +30,14 @@
 namespace arrow {
 
 class Buffer;
+class DictionaryMemo;
 class MemoryPool;
 class RecordBatch;
 class Schema;
 class Status;
 class Table;
 class Tensor;
+class SparseTensor;
 
 namespace io {
 
@@ -53,7 +55,9 @@ class ARROW_EXPORT RecordBatchWriter {
 
   /// \brief Write a record batch to the stream
   ///
-  /// \param allow_64bit boolean permitting field lengths exceeding INT32_MAX
+  /// \param[in] batch the record batch to write to the stream
+  /// \param[in] allow_64bit if true, allow field lengths that don't fit
+  ///    in a signed 32-bit int
   /// \return Status
   virtual Status WriteRecordBatch(const RecordBatch& batch, bool allow_64bit = false) = 0;
 
@@ -160,6 +164,7 @@ class ARROW_EXPORT RecordBatchFileWriter : public RecordBatchStreamWriter {
 /// \param[out] metadata_length the size of the length-prefixed flatbuffer
 /// including padding to a 64-byte boundary
 /// \param[out] body_length the size of the contiguous buffer block plus
+/// \param[in] pool the memory pool to allocate memory from
 /// \param[in] max_recursion_depth the maximum permitted nesting schema depth
 /// \param[in] allow_64bit permit field lengths exceeding INT32_MAX. May not be
 /// readable by other Arrow implementations
@@ -172,7 +177,9 @@ class ARROW_EXPORT RecordBatchFileWriter : public RecordBatchStreamWriter {
 /// prefixed by its size, followed by each of the memory buffers in the batch
 /// written end to end (with appropriate alignment and padding):
 ///
-/// <int32: metadata size> <uint8*: metadata> <buffers>
+/// \code
+/// <int32: metadata size> <uint8*: metadata> <buffers ...>
+/// \endcode
 ///
 /// Finally, the absolute offsets (relative to the start of the output stream)
 /// to the end of the body and end of the metadata / data header (suffixed by
@@ -253,22 +260,40 @@ ARROW_EXPORT
 Status GetTensorMessage(const Tensor& tensor, MemoryPool* pool,
                         std::unique_ptr<Message>* out);
 
-/// \brief Write arrow::Tensor as a contiguous message. The metadata and body
-/// are written assuming 64-byte alignment. It is the user's responsibility to
-/// ensure that the OutputStream has been aligned to a 64-byte multiple before
-/// writing the message.
+/// \brief Write arrow::Tensor as a contiguous message.
+///
+/// The metadata and body are written assuming 64-byte alignment. It is the
+/// user's responsibility to ensure that the OutputStream has been aligned
+/// to a 64-byte multiple before writing the message.
+///
+/// The message is written out as followed:
+/// \code
+/// <metadata size> <metadata> <tensor data>
+/// \endcode
 ///
 /// \param[in] tensor the Tensor to write
 /// \param[in] dst the OutputStream to write to
 /// \param[out] metadata_length the actual metadata length, including padding
 /// \param[out] body_length the acutal message body length
 /// \return Status
-///
-/// <metadata size><metadata><tensor data>
 ARROW_EXPORT
 Status WriteTensor(const Tensor& tensor, io::OutputStream* dst, int32_t* metadata_length,
                    int64_t* body_length);
 
+// \brief EXPERIMENTAL: Write arrow::SparseTensor as a contiguous mesasge. The metadata,
+// sparse index, and body are written assuming 64-byte alignment. It is the
+// user's responsibility to ensure that the OutputStream has been aligned
+// to a 64-byte multiple before writing the message.
+//
+// \param[in] tensor the SparseTensor to write
+// \param[in] dst the OutputStream to write to
+// \param[out] metadata_length the actual metadata length, including padding
+// \param[out] body_length the actual message body length
+ARROW_EXPORT
+Status WriteSparseTensor(const SparseTensor& sparse_tensor, io::OutputStream* dst,
+                         int32_t* metadata_length, int64_t* body_length,
+                         MemoryPool* pool);
+
 namespace internal {
 
 // These internal APIs may change without warning or deprecation
@@ -289,6 +314,17 @@ ARROW_EXPORT
 Status GetDictionaryPayloads(const Schema& schema,
                              std::vector<std::unique_ptr<IpcPayload>>* out);
 
+/// \brief Compute IpcPayload for the given schema
+/// \param[in] schema the Schema that is being serialized
+/// \param[in,out] pool for any required temporary memory allocations
+/// \param[in,out] dictionary_memo class for tracking dictionaries and assigning
+/// dictionary ids
+/// \param[out] out the returned IpcPayload
+/// \return Status
+ARROW_EXPORT
+Status GetSchemaPayload(const Schema& schema, MemoryPool* pool,
+                        DictionaryMemo* dictionary_memo, IpcPayload* out);
+
 /// \brief Compute IpcPayload for the given record batch
 /// \param[in] batch the RecordBatch that is being serialized
 /// \param[in,out] pool for any required temporary memory allocations
diff --git a/cpp/src/arrow/memory_pool-test.h b/cpp/src/arrow/memory_pool-test.h
index 34523a181ba1e..fc86d943ec116 100644
--- a/cpp/src/arrow/memory_pool-test.h
+++ b/cpp/src/arrow/memory_pool-test.h
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <algorithm>
+#include <cstddef>
 #include <cstdint>
 #include <limits>
 
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 0a27141b447f7..103771bf527a7 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -17,24 +17,22 @@
 
 #include "arrow/memory_pool.h"
 
-#include <algorithm>
-#include <atomic>
-#include <cerrno>
-#include <cstdlib>
-#include <cstring>
-#include <iostream>
+#include <algorithm>  // IWYU pragma: keep
+#include <cstdlib>    // IWYU pragma: keep
+#include <cstring>    // IWYU pragma: keep
+#include <iostream>   // IWYU pragma: keep
 #include <limits>
 #include <memory>
 #include <sstream>  // IWYU pragma: keep
 
 #include "arrow/status.h"
-#include "arrow/util/logging.h"
+#include "arrow/util/logging.h"  // IWYU pragma: keep
 
 #ifdef ARROW_JEMALLOC
 // Needed to support jemalloc 3 and 4
 #define JEMALLOC_MANGLE
 // Explicitly link to our version of jemalloc
-#include "jemalloc_ep/dist/include/jemalloc/jemalloc.h"
+#include "arrow_thirdparty/include/jemalloc/jemalloc.h"
 #endif
 
 namespace arrow {
@@ -42,6 +40,11 @@ namespace arrow {
 constexpr size_t kAlignment = 64;
 
 namespace {
+
+// A static piece of memory for 0-size allocations, so as to return
+// an aligned non-null pointer.
+alignas(kAlignment) static uint8_t zero_size_area[1];
+
 // Allocate memory according to the alignment requirements for Arrow
 // (as of May 2016 64 bytes)
 Status AllocateAligned(int64_t size, uint8_t** out) {
@@ -49,6 +52,10 @@ Status AllocateAligned(int64_t size, uint8_t** out) {
   if (size < 0) {
     return Status::Invalid("negative malloc size");
   }
+  if (size == 0) {
+    *out = zero_size_area;
+    return Status::OK();
+  }
   if (static_cast<uint64_t>(size) >= std::numeric_limits<size_t>::max()) {
     return Status::CapacityError("malloc size overflows size_t");
   }
@@ -57,35 +64,86 @@ Status AllocateAligned(int64_t size, uint8_t** out) {
   *out =
       reinterpret_cast<uint8_t*>(_aligned_malloc(static_cast<size_t>(size), kAlignment));
   if (!*out) {
-    std::stringstream ss;
-    ss << "malloc of size " << size << " failed";
-    return Status::OutOfMemory(ss.str());
+    return Status::OutOfMemory("malloc of size ", size, " failed");
   }
 #elif defined(ARROW_JEMALLOC)
-  *out = reinterpret_cast<uint8_t*>(mallocx(
-      std::max(static_cast<size_t>(size), kAlignment), MALLOCX_ALIGN(kAlignment)));
+  *out = reinterpret_cast<uint8_t*>(
+      mallocx(static_cast<size_t>(size), MALLOCX_ALIGN(kAlignment)));
   if (*out == NULL) {
-    std::stringstream ss;
-    ss << "malloc of size " << size << " failed";
-    return Status::OutOfMemory(ss.str());
+    return Status::OutOfMemory("malloc of size ", size, " failed");
   }
 #else
   const int result = posix_memalign(reinterpret_cast<void**>(out), kAlignment,
                                     static_cast<size_t>(size));
   if (result == ENOMEM) {
-    std::stringstream ss;
-    ss << "malloc of size " << size << " failed";
-    return Status::OutOfMemory(ss.str());
+    return Status::OutOfMemory("malloc of size ", size, " failed");
   }
 
   if (result == EINVAL) {
-    std::stringstream ss;
-    ss << "invalid alignment parameter: " << kAlignment;
-    return Status::Invalid(ss.str());
+    return Status::Invalid("invalid alignment parameter: ", kAlignment);
   }
 #endif
   return Status::OK();
 }
+
+void DeallocateAligned(uint8_t* ptr, int64_t size) {
+  if (ptr == zero_size_area) {
+    DCHECK_EQ(size, 0);
+  } else {
+#ifdef _WIN32
+    _aligned_free(ptr);
+#elif defined(ARROW_JEMALLOC)
+    dallocx(ptr, MALLOCX_ALIGN(kAlignment));
+#else
+    std::free(ptr);
+#endif
+  }
+}
+
+Status ReallocateAligned(int64_t old_size, int64_t new_size, uint8_t** ptr) {
+  uint8_t* previous_ptr = *ptr;
+  if (previous_ptr == zero_size_area) {
+    DCHECK_EQ(old_size, 0);
+    return AllocateAligned(new_size, ptr);
+  }
+  if (new_size == 0) {
+    DeallocateAligned(previous_ptr, old_size);
+    *ptr = zero_size_area;
+    return Status::OK();
+  }
+#ifdef ARROW_JEMALLOC
+  if (new_size < 0) {
+    return Status::Invalid("negative realloc size");
+  }
+  if (static_cast<uint64_t>(new_size) >= std::numeric_limits<size_t>::max()) {
+    return Status::CapacityError("realloc overflows size_t");
+  }
+  *ptr = reinterpret_cast<uint8_t*>(
+      rallocx(*ptr, static_cast<size_t>(new_size), MALLOCX_ALIGN(kAlignment)));
+  if (*ptr == NULL) {
+    *ptr = previous_ptr;
+    return Status::OutOfMemory("realloc of size ", new_size, " failed");
+  }
+#else
+  // Note: We cannot use realloc() here as it doesn't guarantee alignment.
+
+  // Allocate new chunk
+  uint8_t* out = nullptr;
+  RETURN_NOT_OK(AllocateAligned(new_size, &out));
+  DCHECK(out);
+  // Copy contents and release old memory chunk
+  memcpy(out, *ptr, static_cast<size_t>(std::min(new_size, old_size)));
+#ifdef _WIN32
+  _aligned_free(*ptr);
+#else
+  std::free(*ptr);
+#endif  // defined(_MSC_VER)
+  *ptr = out;
+#endif  // defined(ARROW_JEMALLOC)
+
+  return Status::OK();
+}
+
 }  // namespace
 
 MemoryPool::MemoryPool() {}
@@ -109,38 +167,7 @@ class DefaultMemoryPool : public MemoryPool {
   }
 
   Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override {
-#ifdef ARROW_JEMALLOC
-    uint8_t* previous_ptr = *ptr;
-    if (new_size < 0) {
-      return Status::Invalid("negative realloc size");
-    }
-    if (static_cast<uint64_t>(new_size) >= std::numeric_limits<size_t>::max()) {
-      return Status::CapacityError("realloc overflows size_t");
-    }
-    *ptr = reinterpret_cast<uint8_t*>(
-        rallocx(*ptr, static_cast<size_t>(new_size), MALLOCX_ALIGN(kAlignment)));
-    if (*ptr == NULL) {
-      std::stringstream ss;
-      ss << "realloc of size " << new_size << " failed";
-      *ptr = previous_ptr;
-      return Status::OutOfMemory(ss.str());
-    }
-#else
-    // Note: We cannot use realloc() here as it doesn't guarantee alignment.
-
-    // Allocate new chunk
-    uint8_t* out = nullptr;
-    RETURN_NOT_OK(AllocateAligned(new_size, &out));
-    DCHECK(out);
-    // Copy contents and release old memory chunk
-    memcpy(out, *ptr, static_cast<size_t>(std::min(new_size, old_size)));
-#ifdef _WIN32
-    _aligned_free(*ptr);
-#else
-    std::free(*ptr);
-#endif  // defined(_MSC_VER)
-    *ptr = out;
-#endif  // defined(ARROW_JEMALLOC)
+    RETURN_NOT_OK(ReallocateAligned(old_size, new_size, ptr));
 
     stats_.UpdateAllocatedBytes(new_size - old_size);
     return Status::OK();
@@ -149,13 +176,8 @@ class DefaultMemoryPool : public MemoryPool {
   int64_t bytes_allocated() const override { return stats_.bytes_allocated(); }
 
   void Free(uint8_t* buffer, int64_t size) override {
-#ifdef _WIN32
-    _aligned_free(buffer);
-#elif defined(ARROW_JEMALLOC)
-    dallocx(buffer, MALLOCX_ALIGN(kAlignment));
-#else
-    std::free(buffer);
-#endif
+    DeallocateAligned(buffer, size);
+
     stats_.UpdateAllocatedBytes(-size);
   }
 
diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h
index 49cd4c7efc3ed..8499b6f35d400 100644
--- a/cpp/src/arrow/memory_pool.h
+++ b/cpp/src/arrow/memory_pool.h
@@ -142,6 +142,7 @@ class ARROW_EXPORT ProxyMemoryPool : public MemoryPool {
   std::unique_ptr<ProxyMemoryPoolImpl> impl_;
 };
 
+/// Return the process-wide default memory pool.
 ARROW_EXPORT MemoryPool* default_memory_pool();
 
 #ifdef ARROW_NO_DEFAULT_MEMORY_POOL
diff --git a/cpp/src/arrow/pretty_print-test.cc b/cpp/src/arrow/pretty_print-test.cc
index 482bc4370fdca..8696efc735b8a 100644
--- a/cpp/src/arrow/pretty_print-test.cc
+++ b/cpp/src/arrow/pretty_print-test.cc
@@ -26,12 +26,10 @@
 
 #include "arrow/array.h"
 #include "arrow/builder.h"
-#include "arrow/memory_pool.h"
 #include "arrow/pretty_print.h"
 #include "arrow/table.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
-#include "arrow/util/decimal.h"
 
 namespace arrow {
 
@@ -163,16 +161,7 @@ TEST_F(TestPrettyPrint, StructTypeBasic) {
   auto simple_2 = field("two", int32());
   auto simple_struct = struct_({simple_1, simple_2});
 
-  auto int_builder_1 = std::make_shared<Int32Builder>();
-  auto int_builder_2 = std::make_shared<Int32Builder>();
-  StructBuilder builder(simple_struct, default_memory_pool(),
-                        {int_builder_1, int_builder_2});
-  ASSERT_OK(builder.Append());
-  ASSERT_OK(int_builder_1->Append(11));
-  ASSERT_OK(int_builder_2->Append(22));
-
-  std::shared_ptr<Array> array;
-  ASSERT_OK(builder.Finish(&array));
+  auto array = ArrayFromJSON(simple_struct, "[[11, 22]]");
 
   static const char* ex = R"expected(-- is_valid: all not null
 -- child 0 type: int32
@@ -202,22 +191,7 @@ TEST_F(TestPrettyPrint, StructTypeAdvanced) {
   auto simple_2 = field("two", int32());
   auto simple_struct = struct_({simple_1, simple_2});
 
-  auto int_builder_1 = std::make_shared<Int32Builder>();
-  auto int_builder_2 = std::make_shared<Int32Builder>();
-  StructBuilder builder(simple_struct, default_memory_pool(),
-                        {int_builder_1, int_builder_2});
-  ASSERT_OK(builder.Append());
-  ASSERT_OK(int_builder_1->Append(11));
-  ASSERT_OK(int_builder_2->Append(22));
-  ASSERT_OK(builder.AppendNull());
-  ASSERT_OK(int_builder_1->AppendNull());
-  ASSERT_OK(int_builder_2->AppendNull());
-  ASSERT_OK(builder.Append());
-  ASSERT_OK(int_builder_1->AppendNull());
-  ASSERT_OK(int_builder_2->Append(33));
-
-  std::shared_ptr<Array> array;
-  ASSERT_OK(builder.Finish(&array));
+  auto array = ArrayFromJSON(simple_struct, "[[11, 22], null, [null, 33]]");
 
   static const char* ex = R"expected(-- is_valid:
   [
@@ -251,24 +225,9 @@ TEST_F(TestPrettyPrint, BinaryType) {
 }
 
 TEST_F(TestPrettyPrint, ListType) {
-  Int64Builder* int_builder = new Int64Builder();
-  ListBuilder list_builder(default_memory_pool(),
-                           std::unique_ptr<ArrayBuilder>(int_builder));
-
-  ASSERT_OK(list_builder.Append());
-  ASSERT_OK(int_builder->AppendNull());
-  ASSERT_OK(list_builder.Append());
-  ASSERT_OK(list_builder.Append(false));
-  ASSERT_OK(list_builder.Append());
-  ASSERT_OK(int_builder->Append(4));
-  ASSERT_OK(int_builder->Append(6));
-  ASSERT_OK(int_builder->Append(7));
-  ASSERT_OK(list_builder.Append());
-  ASSERT_OK(int_builder->Append(2));
-  ASSERT_OK(int_builder->Append(3));
+  auto list_type = list(int64());
+  auto array = ArrayFromJSON(list_type, "[[null], [], null, [4, 6, 7], [2, 3]]");
 
-  std::shared_ptr<Array> array;
-  ASSERT_OK(list_builder.Finish(&array));
   static const char* ex = R"expected([
   [
     null
@@ -318,18 +277,11 @@ TEST_F(TestPrettyPrint, ListType) {
 
 TEST_F(TestPrettyPrint, FixedSizeBinaryType) {
   std::vector<bool> is_valid = {true, true, false, true, false};
-  std::vector<std::string> values = {"foo", "bar", "baz"};
 
-  std::shared_ptr<Array> array;
   auto type = fixed_size_binary(3);
-  FixedSizeBinaryBuilder builder(type);
+  auto array = ArrayFromJSON(type, "[\"foo\", \"bar\", null, \"baz\"]");
 
-  ASSERT_OK(builder.Append(values[0]));
-  ASSERT_OK(builder.Append(values[1]));
-  ASSERT_OK(builder.Append(values[2]));
-  ASSERT_OK(builder.Finish(&array));
-
-  static const char* ex = "[\n  666F6F,\n  626172,\n  62617A\n]";
+  static const char* ex = "[\n  666F6F,\n  626172,\n  null,\n  62617A\n]";
   CheckArray(*array, {0, 10}, ex);
   static const char* ex_2 = "  [\n    666F6F,\n    ...\n    62617A\n  ]";
   CheckArray(*array, {2, 1}, ex_2);
@@ -340,19 +292,7 @@ TEST_F(TestPrettyPrint, Decimal128Type) {
   int32_t s = 4;
 
   auto type = decimal(p, s);
-
-  Decimal128Builder builder(type);
-  Decimal128 val;
-
-  ASSERT_OK(Decimal128::FromString("123.4567", &val));
-  ASSERT_OK(builder.Append(val));
-
-  ASSERT_OK(Decimal128::FromString("456.7891", &val));
-  ASSERT_OK(builder.Append(val));
-  ASSERT_OK(builder.AppendNull());
-
-  std::shared_ptr<Array> array;
-  ASSERT_OK(builder.Finish(&array));
+  auto array = ArrayFromJSON(type, "[\"123.4567\", \"456.7891\", null]");
 
   static const char* ex = "[\n  123.4567,\n  456.7891,\n  null\n]";
   CheckArray(*array, {0}, ex);
@@ -392,11 +332,8 @@ TEST_F(TestPrettyPrint, DictionaryType) {
 }
 
 TEST_F(TestPrettyPrint, ChunkedArrayPrimitiveType) {
-  std::vector<bool> is_valid = {true, true, false, true, false};
-  std::vector<int32_t> values = {0, 1, 2, 3, 4};
-  std::shared_ptr<Array> array;
-  ArrayFromVector<Int32Type, int32_t>(is_valid, values, &array);
-  ChunkedArray chunked_array({array});
+  auto array = ArrayFromJSON(int32(), "[0, 1, null, 3, null]");
+  ChunkedArray chunked_array(array);
 
   static const char* expected = R"expected([
   [
@@ -432,11 +369,8 @@ TEST_F(TestPrettyPrint, ChunkedArrayPrimitiveType) {
 }
 
 TEST_F(TestPrettyPrint, ColumnPrimitiveType) {
-  std::vector<bool> is_valid = {true, true, false, true, false};
-  std::vector<int32_t> values = {0, 1, 2, 3, 4};
-  std::shared_ptr<Array> array;
-  ArrayFromVector<Int32Type, int32_t>(is_valid, values, &array);
   std::shared_ptr<Field> int_field = field("column", int32());
+  auto array = ArrayFromJSON(int_field->type(), "[0, 1, null, 3, null]");
   Column column(int_field, ArrayVector({array}));
 
   static const char* expected = R"expected(column: int32
@@ -475,11 +409,8 @@ TEST_F(TestPrettyPrint, ColumnPrimitiveType) {
 }
 
 TEST_F(TestPrettyPrint, TablePrimitive) {
-  std::vector<bool> is_valid = {true, true, false, true, false};
-  std::vector<int32_t> values = {0, 1, 2, 3, 4};
-  std::shared_ptr<Array> array;
-  ArrayFromVector<Int32Type, int32_t>(is_valid, values, &array);
   std::shared_ptr<Field> int_field = field("column", int32());
+  auto array = ArrayFromJSON(int_field->type(), "[0, 1, null, 3, null]");
   std::shared_ptr<Column> column =
       std::make_shared<Column>(int_field, ArrayVector({array}));
   std::shared_ptr<Schema> table_schema = schema({int_field});
diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc
index ec23bfb00fcde..c524039c3e86a 100644
--- a/cpp/src/arrow/pretty_print.cc
+++ b/cpp/src/arrow/pretty_print.cc
@@ -19,7 +19,7 @@
 #include <cstdint>
 #include <iostream>
 #include <memory>
-#include <sstream>
+#include <sstream>  // IWYU pragma: keep
 #include <string>
 #include <type_traits>
 #include <vector>
diff --git a/cpp/src/arrow/pretty_print.h b/cpp/src/arrow/pretty_print.h
index fde6c293f9b68..ca50bc0bc993c 100644
--- a/cpp/src/arrow/pretty_print.h
+++ b/cpp/src/arrow/pretty_print.h
@@ -21,14 +21,17 @@
 #include <ostream>
 #include <string>
 
-#include "arrow/type_fwd.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
 
 class Array;
+class Column;
 class ChunkedArray;
+class RecordBatch;
+class Schema;
 class Status;
+class Table;
 
 struct PrettyPrintOptions {
   PrettyPrintOptions(int indent_arg, int window_arg = 10, int indent_size_arg = 2,
diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt
index ff63eb05675df..7f1a0b5086e0b 100644
--- a/cpp/src/arrow/python/CMakeLists.txt
+++ b/cpp/src/arrow/python/CMakeLists.txt
@@ -22,7 +22,10 @@
 find_package(PythonLibsNew REQUIRED)
 find_package(NumPy REQUIRED)
 
+add_custom_target(arrow_python-all)
 add_custom_target(arrow_python)
+add_custom_target(arrow_python-tests)
+add_dependencies(arrow_python-all arrow_python arrow_python-tests)
 
 set(ARROW_PYTHON_SRCS
   arrow_to_pandas.cc
@@ -74,9 +77,11 @@ ADD_ARROW_LIB(arrow_python
   EXTRA_INCLUDES "${ARROW_PYTHON_INCLUDES}"
 )
 
+add_dependencies(arrow_python ${ARROW_PYTHON_LIBRARIES})
+
 foreach(LIB_TARGET ${ARROW_PYTHON_LIBRARIES})
   target_compile_definitions(${LIB_TARGET}
-    PRIVATE ARROW_EXPORTING)
+    PRIVATE ARROW_PYTHON_EXPORTING)
 endforeach()
 
 if (ARROW_BUILD_STATIC AND MSVC)
@@ -91,36 +96,10 @@ if ("${COMPILER_FAMILY}" STREQUAL "clang")
     COMPILE_FLAGS -Wno-parentheses-equality)
 endif()
 
-install(FILES
-  api.h
-  arrow_to_pandas.h
-  benchmark.h
-  common.h
-  config.h
-  decimal.h
-  deserialize.h
-  helpers.h
-  inference.h
-  init.h
-  io.h
-  iterators.h
-  numpy_convert.h
-  numpy_interop.h
-  numpy_to_arrow.h
-  python_to_arrow.h
-  platform.h
-  pyarrow.h
-  serialize.h
-  type_traits.h
-  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/python")
+ARROW_INSTALL_ALL_HEADERS("arrow/python")
 
 # pkg-config support
-configure_file(arrow-python.pc.in
-  "${CMAKE_CURRENT_BINARY_DIR}/arrow-python.pc"
-  @ONLY)
-install(
-  FILES "${CMAKE_CURRENT_BINARY_DIR}/arrow-python.pc"
-  DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/")
+ARROW_ADD_PKG_CONFIG("arrow-python")
 
 # ----------------------------------------------------------------------
 
@@ -129,7 +108,7 @@ if (ARROW_BUILD_TESTS)
 	util/test_main.cc)
 
   target_link_libraries(arrow_python_test_main
-    gtest_static)
+    ${GTEST_LIBRARY})
   target_include_directories(arrow_python_test_main SYSTEM PUBLIC
     ${ARROW_PYTHON_INCLUDES})
 
@@ -156,6 +135,6 @@ if (ARROW_BUILD_TESTS)
     STATIC_LINK_LIBS "${ARROW_PYTHON_TEST_LINK_LIBS}"
     EXTRA_LINK_LIBS ${PYTHON_LIBRARIES}
     EXTRA_INCLUDES "${ARROW_PYTHON_INCLUDES}"
-    LABELS "arrow_python"
+    LABELS "arrow_python-tests"
     NO_VALGRIND)
 endif()
diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc
index 3e04f2727ed51..8aa0bf74b7b27 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -36,9 +36,11 @@
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/hashing.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/parallel.h"
+#include "arrow/util/string_view.h"
 #include "arrow/visitor_inline.h"
 
 #include "arrow/compute/api.h"
@@ -75,21 +77,21 @@ template <typename T>
 struct WrapBytes {};
 
 template <>
-struct WrapBytes<StringArray> {
+struct WrapBytes<StringType> {
   static inline PyObject* Wrap(const char* data, int64_t length) {
     return PyUnicode_FromStringAndSize(data, length);
   }
 };
 
 template <>
-struct WrapBytes<BinaryArray> {
+struct WrapBytes<BinaryType> {
   static inline PyObject* Wrap(const char* data, int64_t length) {
     return PyBytes_FromStringAndSize(data, length);
   }
 };
 
 template <>
-struct WrapBytes<FixedSizeBinaryArray> {
+struct WrapBytes<FixedSizeBinaryType> {
   static inline PyObject* Wrap(const char* data, int64_t length) {
     return PyBytes_FromStringAndSize(data, length);
   }
@@ -216,7 +218,7 @@ class PandasBlock {
     CATEGORICAL
   };
 
-  PandasBlock(PandasOptions options, int64_t num_rows, int num_columns)
+  PandasBlock(const PandasOptions& options, int64_t num_rows, int num_columns)
       : num_rows_(num_rows), num_columns_(num_columns), options_(options) {}
   virtual ~PandasBlock() {}
 
@@ -301,8 +303,8 @@ inline const T* GetPrimitiveValues(const Array& arr) {
 }
 
 template <typename T>
-inline void ConvertIntegerWithNulls(PandasOptions options, const ChunkedArray& data,
-                                    double* out_values) {
+inline void ConvertIntegerWithNulls(const PandasOptions& options,
+                                    const ChunkedArray& data, double* out_values) {
   for (int c = 0; c < data.num_chunks(); c++) {
     const auto& arr = *data.chunk(c);
     const T* in_values = GetPrimitiveValues<T>(arr);
@@ -315,8 +317,8 @@ inline void ConvertIntegerWithNulls(PandasOptions options, const ChunkedArray& d
 }
 
 template <typename T>
-inline void ConvertIntegerNoNullsSameType(PandasOptions options, const ChunkedArray& data,
-                                          T* out_values) {
+inline void ConvertIntegerNoNullsSameType(const PandasOptions& options,
+                                          const ChunkedArray& data, T* out_values) {
   for (int c = 0; c < data.num_chunks(); c++) {
     const auto& arr = *data.chunk(c);
     if (arr.length() > 0) {
@@ -328,8 +330,8 @@ inline void ConvertIntegerNoNullsSameType(PandasOptions options, const ChunkedAr
 }
 
 template <typename InType, typename OutType>
-inline void ConvertIntegerNoNullsCast(PandasOptions options, const ChunkedArray& data,
-                                      OutType* out_values) {
+inline void ConvertIntegerNoNullsCast(const PandasOptions& options,
+                                      const ChunkedArray& data, OutType* out_values) {
   for (int c = 0; c < data.num_chunks(); c++) {
     const auto& arr = *data.chunk(c);
     const InType* in_values = GetPrimitiveValues<InType>(arr);
@@ -339,8 +341,8 @@ inline void ConvertIntegerNoNullsCast(PandasOptions options, const ChunkedArray&
   }
 }
 
-static Status ConvertBooleanWithNulls(PandasOptions options, const ChunkedArray& data,
-                                      PyObject** out_values) {
+static Status ConvertBooleanWithNulls(const PandasOptions& options,
+                                      const ChunkedArray& data, PyObject** out_values) {
   PyAcquireGIL lock;
   for (int c = 0; c < data.num_chunks(); c++) {
     const auto& arr = checked_cast<const BooleanArray&>(*data.chunk(c));
@@ -363,7 +365,7 @@ static Status ConvertBooleanWithNulls(PandasOptions options, const ChunkedArray&
   return Status::OK();
 }
 
-static void ConvertBooleanNoNulls(PandasOptions options, const ChunkedArray& data,
+static void ConvertBooleanNoNulls(const PandasOptions& options, const ChunkedArray& data,
                                   uint8_t* out_values) {
   for (int c = 0; c < data.num_chunks(); c++) {
     const auto& arr = checked_cast<const BooleanArray&>(*data.chunk(c));
@@ -373,59 +375,106 @@ static void ConvertBooleanNoNulls(PandasOptions options, const ChunkedArray& dat
   }
 }
 
-template <typename T>
-static Status ConvertIntegerObjects(PandasOptions options, const ChunkedArray& data,
-                                    PyObject** out_values) {
-  PyAcquireGIL lock;
-  constexpr bool is_signed = std::is_signed<T>::value;
-  for (int c = 0; c < data.num_chunks(); c++) {
-    const auto& arr = *data.chunk(c);
-    const auto* in_values = GetPrimitiveValues<T>(arr);
-
-    for (int i = 0; i < arr.length(); ++i) {
-      if (arr.IsNull(i)) {
-        Py_INCREF(Py_None);
-        *out_values++ = Py_None;
-      } else {
-        *out_values++ = is_signed ? PyLong_FromLongLong(in_values[i])
-                                  : PyLong_FromUnsignedLongLong(in_values[i]);
-        RETURN_IF_PYERROR();
-      }
+// Generic Array -> PyObject** converter that handles object deduplication, if
+// requested
+template <typename ArrayType, typename WriteValue>
+inline Status WriteArrayObjects(const ArrayType& arr, WriteValue&& write_func,
+                                PyObject** out_values) {
+  const bool has_nulls = arr.null_count() > 0;
+  for (int64_t i = 0; i < arr.length(); ++i) {
+    if (has_nulls && arr.IsNull(i)) {
+      Py_INCREF(Py_None);
+      *out_values = Py_None;
+    } else {
+      RETURN_NOT_OK(write_func(arr.GetView(i), out_values));
     }
+    ++out_values;
   }
   return Status::OK();
 }
 
-template <typename Type>
-inline Status ConvertBinaryLike(PandasOptions options, const ChunkedArray& data,
-                                PyObject** out_values) {
+template <typename T, typename Enable = void>
+struct MemoizationTraits {
+  using Scalar = typename T::c_type;
+};
+
+template <typename T>
+struct MemoizationTraits<T, enable_if_binary_like<T>> {
+  // For binary, we memoize string_view as a scalar value to avoid having to
+  // unnecessarily copy the memory into the memo table data structure
+  using Scalar = util::string_view;
+};
+
+template <typename Type, typename WrapFunction>
+inline Status ConvertAsPyObjects(const PandasOptions& options, const ChunkedArray& data,
+                                 WrapFunction&& wrap_func, PyObject** out_values) {
   using ArrayType = typename TypeTraits<Type>::ArrayType;
+  using Scalar = typename MemoizationTraits<Type>::Scalar;
+
   PyAcquireGIL lock;
+  ::arrow::internal::ScalarMemoTable<Scalar> memo_table;
+  std::vector<PyObject*> unique_values;
+  int32_t memo_size = 0;
+
+  auto WrapMemoized = [&](const Scalar& value, PyObject** out_values) {
+    int32_t memo_index = memo_table.GetOrInsert(value);
+    if (memo_index == memo_size) {
+      // New entry
+      RETURN_NOT_OK(wrap_func(value, out_values));
+      unique_values.push_back(*out_values);
+      ++memo_size;
+    } else {
+      // Duplicate entry
+      Py_INCREF(unique_values[memo_index]);
+      *out_values = unique_values[memo_index];
+    }
+    return Status::OK();
+  };
+
+  auto WrapUnmemoized = [&](const Scalar& value, PyObject** out_values) {
+    return wrap_func(value, out_values);
+  };
+
   for (int c = 0; c < data.num_chunks(); c++) {
     const auto& arr = checked_cast<const ArrayType&>(*data.chunk(c));
-
-    const bool has_nulls = data.null_count() > 0;
-    for (int64_t i = 0; i < arr.length(); ++i) {
-      if (has_nulls && arr.IsNull(i)) {
-        Py_INCREF(Py_None);
-        *out_values = Py_None;
-      } else {
-        auto view = arr.GetView(i);
-        *out_values = WrapBytes<ArrayType>::Wrap(view.data(), view.length());
-        if (*out_values == nullptr) {
-          PyErr_Clear();
-          std::stringstream ss;
-          ss << "Wrapping " << view << " failed";
-          return Status::UnknownError(ss.str());
-        }
-      }
-      ++out_values;
+    if (options.deduplicate_objects) {
+      RETURN_NOT_OK(WriteArrayObjects(arr, WrapMemoized, out_values));
+    } else {
+      RETURN_NOT_OK(WriteArrayObjects(arr, WrapUnmemoized, out_values));
     }
+    out_values += arr.length();
   }
   return Status::OK();
 }
 
-inline Status ConvertNulls(PandasOptions options, const ChunkedArray& data,
+template <typename Type>
+static Status ConvertIntegerObjects(const PandasOptions& options,
+                                    const ChunkedArray& data, PyObject** out_values) {
+  using T = typename Type::c_type;
+  auto WrapValue = [](T value, PyObject** out) {
+    *out = std::is_signed<T>::value ? PyLong_FromLongLong(value)
+                                    : PyLong_FromUnsignedLongLong(value);
+    RETURN_IF_PYERROR();
+    return Status::OK();
+  };
+  return ConvertAsPyObjects<Type>(options, data, WrapValue, out_values);
+}
+
+template <typename Type>
+inline Status ConvertBinaryLike(const PandasOptions& options, const ChunkedArray& data,
+                                PyObject** out_values) {
+  auto WrapValue = [](const util::string_view& view, PyObject** out) {
+    *out = WrapBytes<Type>::Wrap(view.data(), view.length());
+    if (*out == nullptr) {
+      PyErr_Clear();
+      return Status::UnknownError("Wrapping ", view, " failed");
+    }
+    return Status::OK();
+  };
+  return ConvertAsPyObjects<Type>(options, data, WrapValue, out_values);
+}
+
+inline Status ConvertNulls(const PandasOptions& options, const ChunkedArray& data,
                            PyObject** out_values) {
   PyAcquireGIL lock;
   for (int c = 0; c < data.num_chunks(); c++) {
@@ -441,7 +490,7 @@ inline Status ConvertNulls(PandasOptions options, const ChunkedArray& data,
   return Status::OK();
 }
 
-inline Status ConvertStruct(PandasOptions options, const ChunkedArray& data,
+inline Status ConvertStruct(const PandasOptions& options, const ChunkedArray& data,
                             PyObject** out_values) {
   PyAcquireGIL lock;
   if (data.num_chunks() <= 0) {
@@ -505,7 +554,8 @@ inline Status ConvertStruct(PandasOptions options, const ChunkedArray& data,
 }
 
 template <typename ArrowType>
-inline Status ConvertListsLike(PandasOptions options, const std::shared_ptr<Column>& col,
+inline Status ConvertListsLike(const PandasOptions& options,
+                               const std::shared_ptr<Column>& col,
                                PyObject** out_values) {
   const ChunkedArray& data = *col->data().get();
   const auto& list_type = checked_cast<const ListType&>(*col->type());
@@ -606,69 +656,40 @@ inline void ConvertDatetimeNanos(const ChunkedArray& data, int64_t* out_values)
   }
 }
 
-template <typename TYPE>
-static Status ConvertDates(PandasOptions options, const ChunkedArray& data,
+template <typename Type>
+static Status ConvertDates(const PandasOptions& options, const ChunkedArray& data,
                            PyObject** out_values) {
-  using ArrayType = typename TypeTraits<TYPE>::ArrayType;
-
-  PyAcquireGIL lock;
-  OwnedRef date_ref;
-
-  PyDateTime_IMPORT;
-
-  for (int c = 0; c < data.num_chunks(); c++) {
-    const auto& arr = checked_cast<const ArrayType&>(*data.chunk(c));
-    auto type = std::dynamic_pointer_cast<TYPE>(arr.type());
-    DCHECK(type);
-
-    const DateUnit unit = type->unit();
-
-    for (int64_t i = 0; i < arr.length(); ++i) {
-      if (arr.IsNull(i)) {
-        Py_INCREF(Py_None);
-        *out_values++ = Py_None;
-      } else {
-        RETURN_NOT_OK(PyDate_from_int(arr.Value(i), unit, out_values++));
-        RETURN_IF_PYERROR();
-      }
-    }
+  {
+    PyAcquireGIL lock;
+    PyDateTime_IMPORT;
   }
-
-  return Status::OK();
+  auto WrapValue = [](typename Type::c_type value, PyObject** out) {
+    RETURN_NOT_OK(PyDate_from_int(value, Type::UNIT, out));
+    RETURN_IF_PYERROR();
+    return Status::OK();
+  };
+  return ConvertAsPyObjects<Type>(options, data, WrapValue, out_values);
 }
 
-template <typename TYPE>
-static Status ConvertTimes(PandasOptions options, const ChunkedArray& data,
+template <typename Type>
+static Status ConvertTimes(const PandasOptions& options, const ChunkedArray& data,
                            PyObject** out_values) {
-  using ArrayType = typename TypeTraits<TYPE>::ArrayType;
-
-  PyAcquireGIL lock;
-  OwnedRef time_ref;
-
-  PyDateTime_IMPORT;
-
-  for (int c = 0; c < data.num_chunks(); c++) {
-    const auto& arr = checked_cast<const ArrayType&>(*data.chunk(c));
-    auto type = std::dynamic_pointer_cast<TYPE>(arr.type());
-    DCHECK(type);
-
-    const TimeUnit::type unit = type->unit();
-
-    for (int64_t i = 0; i < arr.length(); ++i) {
-      if (arr.IsNull(i)) {
-        Py_INCREF(Py_None);
-        *out_values++ = Py_None;
-      } else {
-        RETURN_NOT_OK(PyTime_from_int(arr.Value(i), unit, out_values++));
-        RETURN_IF_PYERROR();
-      }
-    }
+  {
+    PyAcquireGIL lock;
+    PyDateTime_IMPORT;
   }
 
-  return Status::OK();
+  const TimeUnit::type unit = checked_cast<const Type&>(*data.type()).unit();
+
+  auto WrapValue = [unit](typename Type::c_type value, PyObject** out) {
+    RETURN_NOT_OK(PyTime_from_int(value, unit, out));
+    RETURN_IF_PYERROR();
+    return Status::OK();
+  };
+  return ConvertAsPyObjects<Type>(options, data, WrapValue, out_values);
 }
 
-static Status ConvertDecimals(PandasOptions options, const ChunkedArray& data,
+static Status ConvertDecimals(const PandasOptions& options, const ChunkedArray& data,
                               PyObject** out_values) {
   PyAcquireGIL lock;
   OwnedRef decimal;
@@ -717,21 +738,21 @@ class ObjectBlock : public PandasBlock {
     if (type == Type::BOOL) {
       RETURN_NOT_OK(ConvertBooleanWithNulls(options_, data, out_buffer));
     } else if (type == Type::UINT8) {
-      RETURN_NOT_OK(ConvertIntegerObjects<uint8_t>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertIntegerObjects<UInt8Type>(options_, data, out_buffer));
     } else if (type == Type::INT8) {
-      RETURN_NOT_OK(ConvertIntegerObjects<int8_t>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertIntegerObjects<Int8Type>(options_, data, out_buffer));
     } else if (type == Type::UINT16) {
-      RETURN_NOT_OK(ConvertIntegerObjects<uint16_t>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertIntegerObjects<UInt16Type>(options_, data, out_buffer));
     } else if (type == Type::INT16) {
-      RETURN_NOT_OK(ConvertIntegerObjects<int16_t>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertIntegerObjects<Int16Type>(options_, data, out_buffer));
     } else if (type == Type::UINT32) {
-      RETURN_NOT_OK(ConvertIntegerObjects<uint32_t>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertIntegerObjects<UInt32Type>(options_, data, out_buffer));
     } else if (type == Type::INT32) {
-      RETURN_NOT_OK(ConvertIntegerObjects<int32_t>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertIntegerObjects<Int32Type>(options_, data, out_buffer));
     } else if (type == Type::UINT64) {
-      RETURN_NOT_OK(ConvertIntegerObjects<uint64_t>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertIntegerObjects<UInt64Type>(options_, data, out_buffer));
     } else if (type == Type::INT64) {
-      RETURN_NOT_OK(ConvertIntegerObjects<int64_t>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertIntegerObjects<Int64Type>(options_, data, out_buffer));
     } else if (type == Type::BINARY) {
       RETURN_NOT_OK(ConvertBinaryLike<BinaryType>(options_, data, out_buffer));
     } else if (type == Type::STRING) {
@@ -773,18 +794,16 @@ class ObjectBlock : public PandasBlock {
         CONVERTLISTSLIKE_CASE(ListType, LIST)
         CONVERTLISTSLIKE_CASE(NullType, NA)
         default: {
-          std::stringstream ss;
-          ss << "Not implemented type for conversion from List to Pandas ObjectBlock: "
-             << list_type->value_type()->ToString();
-          return Status::NotImplemented(ss.str());
+          return Status::NotImplemented(
+              "Not implemented type for conversion from List to Pandas ObjectBlock: ",
+              list_type->value_type()->ToString());
         }
       }
     } else if (type == Type::STRUCT) {
       RETURN_NOT_OK(ConvertStruct(options_, data, out_buffer));
     } else {
-      std::stringstream ss;
-      ss << "Unsupported type for object array output: " << col->type()->ToString();
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("Unsupported type for object array output: ",
+                                    col->type()->ToString());
     }
 
     placement_data_[rel_placement] = abs_placement;
@@ -810,10 +829,9 @@ class IntBlock : public PandasBlock {
     const ChunkedArray& data = *col->data().get();
 
     if (type != ARROW_TYPE) {
-      std::stringstream ss;
-      ss << "Cannot write Arrow data of type " << col->type()->ToString();
-      ss << " to a Pandas int" << sizeof(C_TYPE) << " block.";
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("Cannot write Arrow data of type ",
+                                    col->type()->ToString(), " to a Pandas int",
+                                    sizeof(C_TYPE), " block");
     }
 
     ConvertIntegerNoNullsSameType<C_TYPE>(options_, data, out_buffer);
@@ -841,10 +859,9 @@ class Float16Block : public PandasBlock {
     Type::type type = col->type()->id();
 
     if (type != Type::HALF_FLOAT) {
-      std::stringstream ss;
-      ss << "Cannot write Arrow data of type " << col->type()->ToString();
-      ss << " to a Pandas float16 block.";
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("Cannot write Arrow data of type ",
+                                    col->type()->ToString(),
+                                    " to a Pandas float16 block");
     }
 
     npy_half* out_buffer =
@@ -866,10 +883,9 @@ class Float32Block : public PandasBlock {
     Type::type type = col->type()->id();
 
     if (type != Type::FLOAT) {
-      std::stringstream ss;
-      ss << "Cannot write Arrow data of type " << col->type()->ToString();
-      ss << " to a Pandas float32 block.";
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("Cannot write Arrow data of type ",
+                                    col->type()->ToString(),
+                                    " to a Pandas float32 block");
     }
 
     float* out_buffer = reinterpret_cast<float*>(block_data_) + rel_placement * num_rows_;
@@ -922,10 +938,9 @@ class Float64Block : public PandasBlock {
         ConvertNumericNullable<double>(data, NAN, out_buffer);
         break;
       default:
-        std::stringstream ss;
-        ss << "Cannot write Arrow data of type " << col->type()->ToString();
-        ss << " to a Pandas float64 block.";
-        return Status::NotImplemented(ss.str());
+        return Status::NotImplemented("Cannot write Arrow data of type ",
+                                      col->type()->ToString(),
+                                      " to a Pandas float64 block");
     }
 
 #undef INTEGER_CASE
@@ -945,10 +960,9 @@ class BoolBlock : public PandasBlock {
     Type::type type = col->type()->id();
 
     if (type != Type::BOOL) {
-      std::stringstream ss;
-      ss << "Cannot write Arrow data of type " << col->type()->ToString();
-      ss << " to a Pandas boolean block.";
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("Cannot write Arrow data of type ",
+                                    col->type()->ToString(),
+                                    " to a Pandas boolean block");
     }
 
     uint8_t* out_buffer =
@@ -1006,10 +1020,9 @@ class DatetimeBlock : public PandasBlock {
         return Status::NotImplemented("Unsupported time unit");
       }
     } else {
-      std::stringstream ss;
-      ss << "Cannot write Arrow data of type " << col->type()->ToString();
-      ss << " to a Pandas datetime block.";
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("Cannot write Arrow data of type ",
+                                    col->type()->ToString(),
+                                    " to a Pandas datetime block.");
     }
 
     placement_data_[rel_placement] = abs_placement;
@@ -1019,7 +1032,8 @@ class DatetimeBlock : public PandasBlock {
 
 class DatetimeTZBlock : public DatetimeBlock {
  public:
-  DatetimeTZBlock(PandasOptions options, const std::string& timezone, int64_t num_rows)
+  DatetimeTZBlock(const PandasOptions& options, const std::string& timezone,
+                  int64_t num_rows)
       : DatetimeBlock(options, num_rows, 1), timezone_(timezone) {}
 
   // Like Categorical, the internal ndarray is 1-dimensional
@@ -1048,8 +1062,12 @@ class DatetimeTZBlock : public DatetimeBlock {
 
 class CategoricalBlock : public PandasBlock {
  public:
-  explicit CategoricalBlock(PandasOptions options, MemoryPool* pool, int64_t num_rows)
-      : PandasBlock(options, num_rows, 1), pool_(pool), needs_copy_(false) {}
+  explicit CategoricalBlock(const PandasOptions& options, MemoryPool* pool,
+                            int64_t num_rows)
+      : PandasBlock(options, num_rows, 1),
+        pool_(pool),
+        ordered_(false),
+        needs_copy_(false) {}
 
   Status Allocate() override {
     return Status::NotImplemented(
@@ -1075,9 +1093,8 @@ class CategoricalBlock : public PandasBlock {
       const T* values = arr.raw_values();
       for (int64_t i = 0; i < arr.length(); ++i) {
         if (arr.IsValid(i) && (values[i] < 0 || values[i] >= dict_length)) {
-          std::stringstream ss;
-          ss << "Out of bounds dictionary index: " << static_cast<int64_t>(values[i]);
-          return Status::Invalid(ss.str());
+          return Status::Invalid("Out of bounds dictionary index: ",
+                                 static_cast<int64_t>(values[i]));
         }
       }
       return Status::OK();
@@ -1088,16 +1105,15 @@ class CategoricalBlock : public PandasBlock {
       RETURN_NOT_OK(AllocateNDArrayFromIndices<T>(npy_type, indices_first));
     } else {
       if (options_.zero_copy_only) {
-        std::stringstream ss;
         if (needs_copy_) {
-          ss << "Need to allocate categorical memory, "
-             << "but only zero-copy conversions allowed.";
-        } else {
-          ss << "Needed to copy " << data.num_chunks() << " chunks with "
-             << indices_first->null_count()
-             << " indices nulls, but zero_copy_only was True";
+          return Status::Invalid("Need to allocate categorical memory, but ",
+                                 "only zero-copy conversions "
+                                 "allowed");
         }
-        return Status::Invalid(ss.str());
+
+        return Status::Invalid("Needed to copy ", data.num_chunks(), " chunks with ",
+                               indices_first->null_count(),
+                               " indices nulls, but zero_copy_only was True");
       }
       RETURN_NOT_OK(AllocateNDArray(npy_type, 1));
 
@@ -1155,10 +1171,8 @@ class CategoricalBlock : public PandasBlock {
         RETURN_NOT_OK(WriteIndices<Int64Type>(converted_col));
         break;
       default: {
-        std::stringstream ss;
-        ss << "Categorical index type not supported: "
-           << dict_type.index_type()->ToString();
-        return Status::NotImplemented(ss.str());
+        return Status::NotImplemented("Categorical index type not supported: ",
+                                      dict_type.index_type()->ToString());
       }
     }
 
@@ -1249,7 +1263,7 @@ class CategoricalBlock : public PandasBlock {
   bool needs_copy_;
 };
 
-Status MakeBlock(PandasOptions options, PandasBlock::type type, int64_t num_rows,
+Status MakeBlock(const PandasOptions& options, PandasBlock::type type, int64_t num_rows,
                  int num_columns, std::shared_ptr<PandasBlock>* block) {
 #define BLOCK_CASE(NAME, TYPE)                                       \
   case PandasBlock::NAME:                                            \
@@ -1349,10 +1363,8 @@ static Status GetPandasBlockType(const Column& col, const PandasOptions& options
     case Type::LIST: {
       auto list_type = std::static_pointer_cast<ListType>(col.type());
       if (!ListTypeSupported(*list_type->value_type())) {
-        std::stringstream ss;
-        ss << "Not implemented type for list in DataFrameBlock: "
-           << list_type->value_type()->ToString();
-        return Status::NotImplemented(ss.str());
+        return Status::NotImplemented("Not implemented type for list in DataFrameBlock: ",
+                                      list_type->value_type()->ToString());
       }
       *output_type = PandasBlock::OBJECT;
     } break;
@@ -1360,10 +1372,9 @@ static Status GetPandasBlockType(const Column& col, const PandasOptions& options
       *output_type = PandasBlock::CATEGORICAL;
       break;
     default:
-      std::stringstream ss;
-      ss << "No known equivalent Pandas block for Arrow data of type ";
-      ss << col.type()->ToString() << " is known.";
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented(
+          "No known equivalent Pandas block for Arrow data of type ",
+          col.type()->ToString(), " is known.");
   }
   return Status::OK();
 }
@@ -1535,7 +1546,7 @@ class DataFrameBlockCreator {
 
 class ArrowDeserializer {
  public:
-  ArrowDeserializer(PandasOptions options, const std::shared_ptr<Column>& col,
+  ArrowDeserializer(const PandasOptions& options, const std::shared_ptr<Column>& col,
                     PyObject* py_ref)
       : col_(col), data_(*col->data().get()), options_(options), py_ref_(py_ref) {}
 
@@ -1549,7 +1560,7 @@ class ArrowDeserializer {
   }
 
   template <int TYPE>
-  Status ConvertValuesZeroCopy(PandasOptions options, int npy_type,
+  Status ConvertValuesZeroCopy(const PandasOptions& options, int npy_type,
                                const std::shared_ptr<Array>& arr) {
     typedef typename internal::arrow_traits<TYPE>::T T;
 
@@ -1657,10 +1668,8 @@ class ArrowDeserializer {
     if (data_.num_chunks() == 1 && data_.null_count() == 0) {
       return ConvertValuesZeroCopy<TYPE>(options_, npy_type, data_.chunk(0));
     } else if (options_.zero_copy_only) {
-      std::stringstream ss;
-      ss << "Needed to copy " << data_.num_chunks() << " chunks with "
-         << data_.null_count() << " nulls, but zero_copy_only was True";
-      return Status::Invalid(ss.str());
+      return Status::Invalid("Needed to copy ", data_.num_chunks(), " chunks with ",
+                             data_.null_count(), " nulls, but zero_copy_only was True");
     }
 
     RETURN_NOT_OK(AllocateOutput(npy_type));
@@ -1751,17 +1760,13 @@ class ArrowDeserializer {
     if (data_.num_chunks() == 1 && data_.null_count() == 0) {
       return ConvertValuesZeroCopy<TYPE>(options_, traits::npy_type, data_.chunk(0));
     } else if (options_.zero_copy_only) {
-      std::stringstream ss;
-      ss << "Needed to copy " << data_.num_chunks() << " chunks with "
-         << data_.null_count() << " nulls, but zero_copy_only was True";
-      return Status::Invalid(ss.str());
+      return Status::Invalid("Needed to copy ", data_.num_chunks(), " chunks with ",
+                             data_.null_count(), " nulls, but zero_copy_only was True");
     }
 
     if (data_.null_count() > 0) {
       if (options_.integer_object_nulls) {
-        using c_type = typename Type::c_type;
-
-        return VisitObjects(ConvertIntegerObjects<c_type>);
+        return VisitObjects(ConvertIntegerObjects<Type>);
       } else {
         RETURN_NOT_OK(AllocateOutput(NPY_FLOAT64));
         auto out_values = reinterpret_cast<double*>(PyArray_DATA(arr_));
@@ -1854,9 +1859,8 @@ class ArrowDeserializer {
       CONVERTVALUES_LISTSLIKE_CASE(Decimal128Type, DECIMAL)
       CONVERTVALUES_LISTSLIKE_CASE(ListType, LIST)
       default: {
-        std::stringstream ss;
-        ss << "Not implemented type for lists: " << list_type->value_type()->ToString();
-        return Status::NotImplemented(ss.str());
+        return Status::NotImplemented("Not implemented type for lists: ",
+                                      list_type->value_type()->ToString());
       }
     }
 #undef CONVERTVALUES_LISTSLIKE_CASE
@@ -1900,15 +1904,16 @@ class ArrowDeserializer {
   PyObject* result_;
 };
 
-Status ConvertArrayToPandas(PandasOptions options, const std::shared_ptr<Array>& arr,
-                            PyObject* py_ref, PyObject** out) {
+Status ConvertArrayToPandas(const PandasOptions& options,
+                            const std::shared_ptr<Array>& arr, PyObject* py_ref,
+                            PyObject** out) {
   static std::string dummy_name = "dummy";
   auto field = std::make_shared<Field>(dummy_name, arr->type());
   auto col = std::make_shared<Column>(field, arr);
   return ConvertColumnToPandas(options, col, py_ref, out);
 }
 
-Status ConvertChunkedArrayToPandas(PandasOptions options,
+Status ConvertChunkedArrayToPandas(const PandasOptions& options,
                                    const std::shared_ptr<ChunkedArray>& ca,
                                    PyObject* py_ref, PyObject** out) {
   static std::string dummy_name = "dummy";
@@ -1917,19 +1922,21 @@ Status ConvertChunkedArrayToPandas(PandasOptions options,
   return ConvertColumnToPandas(options, col, py_ref, out);
 }
 
-Status ConvertColumnToPandas(PandasOptions options, const std::shared_ptr<Column>& col,
-                             PyObject* py_ref, PyObject** out) {
+Status ConvertColumnToPandas(const PandasOptions& options,
+                             const std::shared_ptr<Column>& col, PyObject* py_ref,
+                             PyObject** out) {
   ArrowDeserializer converter(options, col, py_ref);
   return converter.Convert(out);
 }
 
-Status ConvertTableToPandas(PandasOptions options, const std::shared_ptr<Table>& table,
-                            MemoryPool* pool, PyObject** out) {
+Status ConvertTableToPandas(const PandasOptions& options,
+                            const std::shared_ptr<Table>& table, MemoryPool* pool,
+                            PyObject** out) {
   return ConvertTableToPandas(options, std::unordered_set<std::string>(), table, pool,
                               out);
 }
 
-Status ConvertTableToPandas(PandasOptions options,
+Status ConvertTableToPandas(const PandasOptions& options,
                             const std::unordered_set<std::string>& categorical_columns,
                             const std::shared_ptr<Table>& table, MemoryPool* pool,
                             PyObject** out) {
diff --git a/cpp/src/arrow/python/arrow_to_pandas.h b/cpp/src/arrow/python/arrow_to_pandas.h
index 138b010515bed..20bad40971020 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.h
+++ b/cpp/src/arrow/python/arrow_to_pandas.h
@@ -27,7 +27,7 @@
 #include <string>
 #include <unordered_set>
 
-#include "arrow/util/visibility.h"
+#include "arrow/python/visibility.h"
 
 namespace arrow {
 
@@ -43,32 +43,32 @@ namespace py {
 
 struct PandasOptions {
   /// If true, we will convert all string columns to categoricals
-  bool strings_to_categorical;
-  bool zero_copy_only;
-  bool integer_object_nulls;
-  bool date_as_object;
-  bool use_threads;
-
-  PandasOptions()
-      : strings_to_categorical(false),
-        zero_copy_only(false),
-        integer_object_nulls(false),
-        date_as_object(false),
-        use_threads(false) {}
+  bool strings_to_categorical = false;
+  bool zero_copy_only = false;
+  bool integer_object_nulls = false;
+  bool date_as_object = false;
+  bool use_threads = false;
+
+  /// \brief If true, do not create duplicate PyObject versions of equal
+  /// objects. This only applies to immutable objects like strings or datetime
+  /// objects
+  bool deduplicate_objects = false;
 };
 
-ARROW_EXPORT
-Status ConvertArrayToPandas(PandasOptions options, const std::shared_ptr<Array>& arr,
-                            PyObject* py_ref, PyObject** out);
+ARROW_PYTHON_EXPORT
+Status ConvertArrayToPandas(const PandasOptions& options,
+                            const std::shared_ptr<Array>& arr, PyObject* py_ref,
+                            PyObject** out);
 
-ARROW_EXPORT
-Status ConvertChunkedArrayToPandas(PandasOptions options,
+ARROW_PYTHON_EXPORT
+Status ConvertChunkedArrayToPandas(const PandasOptions& options,
                                    const std::shared_ptr<ChunkedArray>& col,
                                    PyObject* py_ref, PyObject** out);
 
-ARROW_EXPORT
-Status ConvertColumnToPandas(PandasOptions options, const std::shared_ptr<Column>& col,
-                             PyObject* py_ref, PyObject** out);
+ARROW_PYTHON_EXPORT
+Status ConvertColumnToPandas(const PandasOptions& options,
+                             const std::shared_ptr<Column>& col, PyObject* py_ref,
+                             PyObject** out);
 
 // Convert a whole table as efficiently as possible to a pandas.DataFrame.
 //
@@ -76,16 +76,17 @@ Status ConvertColumnToPandas(PandasOptions options, const std::shared_ptr<Column
 // BlockManager structure of the pandas.DataFrame used as of pandas 0.19.x.
 //
 // tuple item: (indices: ndarray[int32], block: ndarray[TYPE, ndim=2])
-ARROW_EXPORT
-Status ConvertTableToPandas(PandasOptions options, const std::shared_ptr<Table>& table,
-                            MemoryPool* pool, PyObject** out);
+ARROW_PYTHON_EXPORT
+Status ConvertTableToPandas(const PandasOptions& options,
+                            const std::shared_ptr<Table>& table, MemoryPool* pool,
+                            PyObject** out);
 
 /// Convert a whole table as efficiently as possible to a pandas.DataFrame.
 ///
 /// Explicitly name columns that should be a categorical
 /// This option is only used on conversions that are applied to a table.
-ARROW_EXPORT
-Status ConvertTableToPandas(PandasOptions options,
+ARROW_PYTHON_EXPORT
+Status ConvertTableToPandas(const PandasOptions& options,
                             const std::unordered_set<std::string>& categorical_columns,
                             const std::shared_ptr<Table>& table, MemoryPool* pool,
                             PyObject** out);
diff --git a/cpp/src/arrow/python/benchmark.h b/cpp/src/arrow/python/benchmark.h
index f88b6b432bf79..caaff32b365dd 100644
--- a/cpp/src/arrow/python/benchmark.h
+++ b/cpp/src/arrow/python/benchmark.h
@@ -20,7 +20,7 @@
 
 #include "arrow/python/platform.h"
 
-#include "arrow/util/visibility.h"
+#include "arrow/python/visibility.h"
 
 namespace arrow {
 namespace py {
@@ -29,7 +29,7 @@ namespace benchmark {
 // Micro-benchmark routines for use from ASV
 
 // Run PandasObjectIsNull() once over every object in *list*
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 void Benchmark_PandasObjectIsNull(PyObject* list);
 
 }  // namespace benchmark
diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
index 5779ef09767fe..6e41beddd1b72 100644
--- a/cpp/src/arrow/python/common.h
+++ b/cpp/src/arrow/python/common.h
@@ -26,8 +26,8 @@
 #include "arrow/python/config.h"
 
 #include "arrow/buffer.h"
+#include "arrow/python/visibility.h"
 #include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
 
 namespace arrow {
 
@@ -35,7 +35,7 @@ class MemoryPool;
 
 namespace py {
 
-ARROW_EXPORT Status ConvertPyError(StatusCode code = StatusCode::UnknownError);
+ARROW_PYTHON_EXPORT Status ConvertPyError(StatusCode code = StatusCode::UnknownError);
 
 // Catch a pending Python exception and return the corresponding Status.
 // If no exception is pending, Status::OK() is returned.
@@ -47,14 +47,14 @@ inline Status CheckPyError(StatusCode code = StatusCode::UnknownError) {
   }
 }
 
-ARROW_EXPORT Status PassPyError();
+ARROW_PYTHON_EXPORT Status PassPyError();
 
 // TODO(wesm): We can just let errors pass through. To be explored later
 #define RETURN_IF_PYERROR() ARROW_RETURN_NOT_OK(CheckPyError());
 
 #define PY_RETURN_IF_ERROR(CODE) ARROW_RETURN_NOT_OK(CheckPyError(CODE));
 
-class ARROW_EXPORT PyAcquireGIL {
+class ARROW_PYTHON_EXPORT PyAcquireGIL {
  public:
   PyAcquireGIL() : acquired_gil_(false) { acquire(); }
 
@@ -85,7 +85,7 @@ class ARROW_EXPORT PyAcquireGIL {
 
 // A RAII primitive that DECREFs the underlying PyObject* when it
 // goes out of scope.
-class ARROW_EXPORT OwnedRef {
+class ARROW_PYTHON_EXPORT OwnedRef {
  public:
   OwnedRef() : obj_(NULLPTR) {}
   OwnedRef(OwnedRef&& other) : OwnedRef(other.detach()) {}
@@ -126,7 +126,7 @@ class ARROW_EXPORT OwnedRef {
 // Same as OwnedRef, but ensures the GIL is taken when it goes out of scope.
 // This is for situations where the GIL is not always known to be held
 // (e.g. if it is released in the middle of a function for performance reasons)
-class ARROW_EXPORT OwnedRefNoGIL : public OwnedRef {
+class ARROW_PYTHON_EXPORT OwnedRefNoGIL : public OwnedRef {
  public:
   OwnedRefNoGIL() : OwnedRef() {}
   OwnedRefNoGIL(OwnedRefNoGIL&& other) : OwnedRef(other.detach()) {}
@@ -215,10 +215,8 @@ struct PyBytesView {
       this->ref.reset();
       return Status::OK();
     } else {
-      std::stringstream ss;
-      ss << "Expected " << expected_msg << ", got a '" << Py_TYPE(obj)->tp_name
-         << "' object";
-      return Status::TypeError(ss.str());
+      return Status::TypeError("Expected ", expected_msg, ", got a '",
+                               Py_TYPE(obj)->tp_name, "' object");
     }
   }
 
@@ -226,10 +224,10 @@ struct PyBytesView {
 };
 
 // Return the common PyArrow memory pool
-ARROW_EXPORT void set_default_memory_pool(MemoryPool* pool);
-ARROW_EXPORT MemoryPool* get_memory_pool();
+ARROW_PYTHON_EXPORT void set_default_memory_pool(MemoryPool* pool);
+ARROW_PYTHON_EXPORT MemoryPool* get_memory_pool();
 
-class ARROW_EXPORT PyBuffer : public Buffer {
+class ARROW_PYTHON_EXPORT PyBuffer : public Buffer {
  public:
   /// While memoryview objects support multi-dimensional buffers, PyBuffer only supports
   /// one-dimensional byte buffers.
diff --git a/cpp/src/arrow/python/config.h b/cpp/src/arrow/python/config.h
index c2b089d382c00..5649ffe55c2ec 100644
--- a/cpp/src/arrow/python/config.h
+++ b/cpp/src/arrow/python/config.h
@@ -21,7 +21,7 @@
 #include "arrow/python/platform.h"
 
 #include "arrow/python/numpy_interop.h"
-#include "arrow/util/visibility.h"
+#include "arrow/python/visibility.h"
 
 #if PY_MAJOR_VERSION >= 3
 #define PyString_Check PyUnicode_Check
@@ -30,10 +30,10 @@
 namespace arrow {
 namespace py {
 
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 extern PyObject* numpy_nan;
 
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 void set_numpy_nan(PyObject* obj);
 
 }  // namespace py
diff --git a/cpp/src/arrow/python/decimal.cc b/cpp/src/arrow/python/decimal.cc
index 051f31faacacf..8db7c01b9ab8b 100644
--- a/cpp/src/arrow/python/decimal.cc
+++ b/cpp/src/arrow/python/decimal.cc
@@ -125,11 +125,9 @@ Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arr
   const int32_t scale = arrow_type.scale();
 
   if (ARROW_PREDICT_FALSE(inferred_precision > precision)) {
-    std::stringstream buf;
-    buf << "Decimal type with precision " << inferred_precision
-        << " does not fit into precision inferred from first array element: "
-        << precision;
-    return Status::Invalid(buf.str());
+    return Status::Invalid(
+        "Decimal type with precision ", inferred_precision,
+        " does not fit into precision inferred from first array element: ", precision);
   }
 
   if (scale != inferred_scale) {
diff --git a/cpp/src/arrow/python/decimal.h b/cpp/src/arrow/python/decimal.h
index dd382d14e063e..80727954e0b65 100644
--- a/cpp/src/arrow/python/decimal.h
+++ b/cpp/src/arrow/python/decimal.h
@@ -20,8 +20,8 @@
 
 #include <string>
 
+#include "arrow/python/visibility.h"
 #include "arrow/type.h"
-#include "arrow/util/visibility.h"
 
 namespace arrow {
 
@@ -38,21 +38,21 @@ class OwnedRef;
 namespace internal {
 
 // \brief Import the Python Decimal type
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status ImportDecimalType(OwnedRef* decimal_type);
 
 // \brief Convert a Python Decimal object to a C++ string
 // \param[in] python_decimal A Python decimal.Decimal instance
 // \param[out] The string representation of the Python Decimal instance
 // \return The status of the operation
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status PythonDecimalToString(PyObject* python_decimal, std::string* out);
 
 // \brief Convert a C++ std::string to a Python Decimal instance
 // \param[in] decimal_constructor The decimal type object
 // \param[in] decimal_string A decimal string
 // \return An instance of decimal.Decimal
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 PyObject* DecimalFromString(PyObject* decimal_constructor,
                             const std::string& decimal_string);
 
@@ -61,21 +61,21 @@ PyObject* DecimalFromString(PyObject* decimal_constructor,
 // \param[in] arrow_type An instance of arrow::DecimalType
 // \param[out] out A pointer to a Decimal128
 // \return The status of the operation
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type,
                                 Decimal128* out);
 
 // \brief Check whether obj is an instance of Decimal
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 bool PyDecimal_Check(PyObject* obj);
 
 // \brief Check whether obj is nan. This function will abort the program if the argument
 // is not a Decimal instance
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 bool PyDecimal_ISNAN(PyObject* obj);
 
 // \brief Helper class to track and update the precision and scale of a decimal
-class ARROW_EXPORT DecimalMetadata {
+class ARROW_PYTHON_EXPORT DecimalMetadata {
  public:
   DecimalMetadata();
   DecimalMetadata(int32_t precision, int32_t scale);
diff --git a/cpp/src/arrow/python/deserialize.cc b/cpp/src/arrow/python/deserialize.cc
index f1a7eab8fcbda..f13070a5883f9 100644
--- a/cpp/src/arrow/python/deserialize.cc
+++ b/cpp/src/arrow/python/deserialize.cc
@@ -108,17 +108,16 @@ Status DeserializeArray(int32_t index, PyObject* base, const SerializedPyObject&
   return Status::OK();
 }
 
-Status GetValue(PyObject* context, const UnionArray& parent, const Array& arr,
-                int64_t index, int32_t type, PyObject* base,
-                const SerializedPyObject& blobs, PyObject** result) {
-  switch (arr.type()->id()) {
-    case Type::BOOL:
+Status GetValue(PyObject* context, const Array& arr, int64_t index, int8_t type,
+                PyObject* base, const SerializedPyObject& blobs, PyObject** result) {
+  switch (type) {
+    case PythonType::BOOL:
       *result = PyBool_FromLong(checked_cast<const BooleanArray&>(arr).Value(index));
       return Status::OK();
-    case Type::INT64: {
+    case PythonType::PY2INT:
+    case PythonType::INT: {
 #if PY_MAJOR_VERSION < 3
-      const std::string& child_name = parent.type()->child(type)->name();
-      if (child_name == "py2_int") {
+      if (type == PythonType::PY2INT) {
         *result = PyInt_FromSsize_t(checked_cast<const Int64Array&>(arr).Value(index));
         return Status::OK();
       }
@@ -126,135 +125,151 @@ Status GetValue(PyObject* context, const UnionArray& parent, const Array& arr,
       *result = PyLong_FromSsize_t(checked_cast<const Int64Array&>(arr).Value(index));
       return Status::OK();
     }
-    case Type::BINARY: {
+    case PythonType::BYTES: {
       auto view = checked_cast<const BinaryArray&>(arr).GetView(index);
       *result = PyBytes_FromStringAndSize(view.data(), view.length());
       return CheckPyError();
     }
-    case Type::STRING: {
+    case PythonType::STRING: {
       auto view = checked_cast<const StringArray&>(arr).GetView(index);
       *result = PyUnicode_FromStringAndSize(view.data(), view.length());
       return CheckPyError();
     }
-    case Type::HALF_FLOAT: {
+    case PythonType::HALF_FLOAT: {
       *result = PyHalf_FromHalf(checked_cast<const HalfFloatArray&>(arr).Value(index));
       RETURN_IF_PYERROR();
       return Status::OK();
     }
-    case Type::FLOAT:
+    case PythonType::FLOAT:
       *result = PyFloat_FromDouble(checked_cast<const FloatArray&>(arr).Value(index));
       return Status::OK();
-    case Type::DOUBLE:
+    case PythonType::DOUBLE:
       *result = PyFloat_FromDouble(checked_cast<const DoubleArray&>(arr).Value(index));
       return Status::OK();
-    case Type::DATE64: {
+    case PythonType::DATE64: {
       RETURN_NOT_OK(PyDateTime_from_int(
           checked_cast<const Date64Array&>(arr).Value(index), TimeUnit::MICRO, result));
       RETURN_IF_PYERROR();
       return Status::OK();
     }
-    case Type::STRUCT: {
-      const auto& s = checked_cast<const StructArray&>(arr);
-      const auto& l = checked_cast<const ListArray&>(*s.field(0));
-      if (s.type()->child(0)->name() == "list") {
-        return DeserializeList(context, *l.values(), l.value_offset(index),
-                               l.value_offset(index + 1), base, blobs, result);
-      } else if (s.type()->child(0)->name() == "tuple") {
-        return DeserializeTuple(context, *l.values(), l.value_offset(index),
-                                l.value_offset(index + 1), base, blobs, result);
-      } else if (s.type()->child(0)->name() == "dict") {
-        return DeserializeDict(context, *l.values(), l.value_offset(index),
-                               l.value_offset(index + 1), base, blobs, result);
-      } else if (s.type()->child(0)->name() == "set") {
-        return DeserializeSet(context, *l.values(), l.value_offset(index),
+    case PythonType::LIST: {
+      const auto& l = checked_cast<const ListArray&>(arr);
+      return DeserializeList(context, *l.values(), l.value_offset(index),
+                             l.value_offset(index + 1), base, blobs, result);
+    }
+    case PythonType::DICT: {
+      const auto& l = checked_cast<const ListArray&>(arr);
+      return DeserializeDict(context, *l.values(), l.value_offset(index),
+                             l.value_offset(index + 1), base, blobs, result);
+    }
+    case PythonType::TUPLE: {
+      const auto& l = checked_cast<const ListArray&>(arr);
+      return DeserializeTuple(context, *l.values(), l.value_offset(index),
                               l.value_offset(index + 1), base, blobs, result);
-      } else {
-        DCHECK(false) << "unexpected StructArray type " << s.type()->child(0)->name();
-      }
     }
-    default: {
-      const std::string& child_name = parent.type()->child(type)->name();
-      if (child_name == "tensor") {
-        int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
-        *result = wrap_tensor(blobs.tensors[ref]);
-        return Status::OK();
-      } else if (child_name == "buffer") {
-        int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
-        *result = wrap_buffer(blobs.buffers[ref]);
-        return Status::OK();
-      } else if (child_name == "ndarray") {
-        int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
-        return DeserializeArray(ref, base, blobs, result);
-      } else {
-        DCHECK(false) << "union tag " << type << " with child name '" << child_name
-                      << "' not recognized";
-      }
+    case PythonType::SET: {
+      const auto& l = checked_cast<const ListArray&>(arr);
+      return DeserializeSet(context, *l.values(), l.value_offset(index),
+                            l.value_offset(index + 1), base, blobs, result);
+    }
+    case PythonType::TENSOR: {
+      int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
+      *result = wrap_tensor(blobs.tensors[ref]);
+      return Status::OK();
+    }
+    case PythonType::NDARRAY: {
+      int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
+      return DeserializeArray(ref, base, blobs, result);
+    }
+    case PythonType::BUFFER: {
+      int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
+      *result = wrap_buffer(blobs.buffers[ref]);
+      return Status::OK();
     }
+    default: { ARROW_CHECK(false) << "union tag " << type << "' not recognized"; }
   }
   return Status::OK();
 }
 
-#define DESERIALIZE_SEQUENCE(CREATE_FN, SET_ITEM_FN)                                     \
-  const auto& data = checked_cast<const UnionArray&>(array);                             \
-  OwnedRef result(CREATE_FN(stop_idx - start_idx));                                      \
-  const uint8_t* type_ids = data.raw_type_ids();                                         \
-  const int32_t* value_offsets = data.raw_value_offsets();                               \
-  for (int64_t i = start_idx; i < stop_idx; ++i) {                                       \
-    if (data.IsNull(i)) {                                                                \
-      Py_INCREF(Py_None);                                                                \
-      SET_ITEM_FN(result.obj(), i - start_idx, Py_None);                                 \
-    } else {                                                                             \
-      int64_t offset = value_offsets[i];                                                 \
-      uint8_t type = type_ids[i];                                                        \
-      PyObject* value;                                                                   \
-      RETURN_NOT_OK(GetValue(context, data, *data.UnsafeChild(type), offset, type, base, \
-                             blobs, &value));                                            \
-      SET_ITEM_FN(result.obj(), i - start_idx, value);                                   \
-    }                                                                                    \
-  }                                                                                      \
-  *out = result.detach();                                                                \
-  return Status::OK()
-
-Status DeserializeList(PyObject* context, const Array& array, int64_t start_idx,
-                       int64_t stop_idx, PyObject* base, const SerializedPyObject& blobs,
-                       PyObject** out) {
-  DESERIALIZE_SEQUENCE(PyList_New, PyList_SET_ITEM);
-}
-
-Status DeserializeTuple(PyObject* context, const Array& array, int64_t start_idx,
-                        int64_t stop_idx, PyObject* base, const SerializedPyObject& blobs,
-                        PyObject** out) {
-  DESERIALIZE_SEQUENCE(PyTuple_New, PyTuple_SET_ITEM);
+std::vector<int8_t> GetPythonTypes(const UnionArray& data) {
+  std::vector<int8_t> result;
+  auto type = data.type();
+  for (int i = 0; i < type->num_children(); ++i) {
+    // stoi is locale dependent, but should be ok for small integers
+    result.push_back(static_cast<int8_t>(std::stoi(type->child(i)->name())));
+  }
+  return result;
 }
 
-Status DeserializeSet(PyObject* context, const Array& array, int64_t start_idx,
-                      int64_t stop_idx, PyObject* base, const SerializedPyObject& blobs,
-                      PyObject** out) {
+template <typename CreateSequenceFn, typename SetItemFn>
+Status DeserializeSequence(PyObject* context, const Array& array, int64_t start_idx,
+                           int64_t stop_idx, PyObject* base,
+                           const SerializedPyObject& blobs,
+                           CreateSequenceFn&& create_sequence, SetItemFn&& set_item,
+                           PyObject** out) {
   const auto& data = checked_cast<const UnionArray&>(array);
-  OwnedRef result(PySet_New(nullptr));
+  OwnedRef result(create_sequence(stop_idx - start_idx));
+  RETURN_IF_PYERROR();
   const uint8_t* type_ids = data.raw_type_ids();
   const int32_t* value_offsets = data.raw_value_offsets();
+  auto python_types = GetPythonTypes(data);
   for (int64_t i = start_idx; i < stop_idx; ++i) {
     if (data.IsNull(i)) {
       Py_INCREF(Py_None);
-      if (PySet_Add(result.obj(), Py_None) < 0) {
-        RETURN_IF_PYERROR();
-      }
+      RETURN_NOT_OK(set_item(result.obj(), i - start_idx, Py_None));
     } else {
-      int32_t offset = value_offsets[i];
-      int8_t type = type_ids[i];
+      int64_t offset = value_offsets[i];
+      uint8_t type = type_ids[i];
       PyObject* value;
-      RETURN_NOT_OK(GetValue(context, data, *data.UnsafeChild(type), offset, type, base,
-                             blobs, &value));
-      if (PySet_Add(result.obj(), value) < 0) {
-        RETURN_IF_PYERROR();
-      }
+      RETURN_NOT_OK(GetValue(context, *data.UnsafeChild(type), offset,
+                             python_types[type_ids[i]], base, blobs, &value));
+      RETURN_NOT_OK(set_item(result.obj(), i - start_idx, value));
     }
   }
   *out = result.detach();
   return Status::OK();
 }
 
+Status DeserializeList(PyObject* context, const Array& array, int64_t start_idx,
+                       int64_t stop_idx, PyObject* base, const SerializedPyObject& blobs,
+                       PyObject** out) {
+  return DeserializeSequence(context, array, start_idx, stop_idx, base, blobs,
+                             [](int64_t size) { return PyList_New(size); },
+                             [](PyObject* seq, int64_t index, PyObject* item) {
+                               PyList_SET_ITEM(seq, index, item);
+                               return Status::OK();
+                             },
+                             out);
+}
+
+Status DeserializeTuple(PyObject* context, const Array& array, int64_t start_idx,
+                        int64_t stop_idx, PyObject* base, const SerializedPyObject& blobs,
+                        PyObject** out) {
+  return DeserializeSequence(context, array, start_idx, stop_idx, base, blobs,
+                             [](int64_t size) { return PyTuple_New(size); },
+                             [](PyObject* seq, int64_t index, PyObject* item) {
+                               PyTuple_SET_ITEM(seq, index, item);
+                               return Status::OK();
+                             },
+                             out);
+}
+
+Status DeserializeSet(PyObject* context, const Array& array, int64_t start_idx,
+                      int64_t stop_idx, PyObject* base, const SerializedPyObject& blobs,
+                      PyObject** out) {
+  return DeserializeSequence(context, array, start_idx, stop_idx, base, blobs,
+                             [](int64_t size) { return PySet_New(nullptr); },
+                             [](PyObject* seq, int64_t index, PyObject* item) {
+                               int err = PySet_Add(seq, item);
+                               Py_DECREF(item);
+                               if (err < 0) {
+                                 RETURN_IF_PYERROR();
+                               }
+                               return Status::OK();
+                             },
+                             out);
+}
+
 Status ReadSerializedObject(io::RandomAccessFile* src, SerializedPyObject* out) {
   int64_t bytes_read;
   int32_t num_tensors;
diff --git a/cpp/src/arrow/python/deserialize.h b/cpp/src/arrow/python/deserialize.h
index 754765a6825fd..b9c4984a3b0e4 100644
--- a/cpp/src/arrow/python/deserialize.h
+++ b/cpp/src/arrow/python/deserialize.h
@@ -23,8 +23,8 @@
 #include <vector>
 
 #include "arrow/python/serialize.h"
+#include "arrow/python/visibility.h"
 #include "arrow/status.h"
-#include "arrow/util/visibility.h"
 
 namespace arrow {
 
@@ -43,7 +43,7 @@ namespace py {
 /// \param[in] src a RandomAccessFile
 /// \param[out] out the reconstructed data
 /// \return Status
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status ReadSerializedObject(io::RandomAccessFile* src, SerializedPyObject* out);
 
 /// \brief Reconstruct SerializedPyObject from representation produced by
@@ -56,7 +56,7 @@ Status ReadSerializedObject(io::RandomAccessFile* src, SerializedPyObject* out);
 /// num_tensors * 2 + num_buffers in length
 /// \param[out] out the reconstructed object
 /// \return Status
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status GetSerializedFromComponents(int num_tensors, int num_ndarrays, int num_buffers,
                                    PyObject* data, SerializedPyObject* out);
 
@@ -72,7 +72,7 @@ Status GetSerializedFromComponents(int num_tensors, int num_ndarrays, int num_bu
 /// \param[out] out The returned object
 /// \return Status
 /// This acquires the GIL
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status DeserializeObject(PyObject* context, const SerializedPyObject& object,
                          PyObject* base, PyObject** out);
 
@@ -80,10 +80,10 @@ Status DeserializeObject(PyObject* context, const SerializedPyObject& object,
 /// \param[in] object Object to deserialize
 /// \param[out] out The deserialized tensor
 /// \return Status
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status DeserializeNdarray(const SerializedPyObject& object, std::shared_ptr<Tensor>* out);
 
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status NdarrayFromBuffer(std::shared_ptr<Buffer> src, std::shared_ptr<Tensor>* out);
 
 }  // namespace py
diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc
index 2f43db6505c67..28ed1a6c364dc 100644
--- a/cpp/src/arrow/python/helpers.cc
+++ b/cpp/src/arrow/python/helpers.cc
@@ -164,11 +164,10 @@ namespace {
 
 Status IntegerOverflowStatus(PyObject* obj, const std::string& overflow_message) {
   if (overflow_message.empty()) {
-    std::stringstream ss;
     std::string obj_as_stdstring;
     RETURN_NOT_OK(PyObject_StdStringStr(obj, &obj_as_stdstring));
-    ss << "Value " << obj_as_stdstring << " too large to fit in C integer type";
-    return Status::Invalid(ss.str());
+    return Status::Invalid("Value ", obj_as_stdstring,
+                           " too large to fit in C integer type");
   } else {
     return Status::Invalid(overflow_message);
   }
@@ -299,13 +298,10 @@ bool PandasObjectIsNull(PyObject* obj) {
 }
 
 Status InvalidValue(PyObject* obj, const std::string& why) {
-  std::stringstream ss;
-
   std::string obj_as_str;
   RETURN_NOT_OK(internal::PyObject_StdStringStr(obj, &obj_as_str));
-  ss << "Could not convert " << obj_as_str << " with type " << Py_TYPE(obj)->tp_name
-     << ": " << why;
-  return Status::Invalid(ss.str());
+  return Status::Invalid("Could not convert ", obj_as_str, " with type ",
+                         Py_TYPE(obj)->tp_name, ": ", why);
 }
 
 Status UnboxIntegerAsInt64(PyObject* obj, int64_t* out) {
@@ -355,10 +351,8 @@ Status IntegerScalarToDoubleSafe(PyObject* obj, double* out) {
   constexpr int64_t kDoubleMin = -(1LL << 53);
 
   if (value < kDoubleMin || value > kDoubleMax) {
-    std::stringstream ss;
-    ss << "Integer value " << value << " is outside of the range exactly"
-       << " representable by a IEEE 754 double precision value";
-    return Status::Invalid(ss.str());
+    return Status::Invalid("Integer value ", value, " is outside of the range exactly",
+                           " representable by a IEEE 754 double precision value");
   }
   *out = static_cast<double>(value);
   return Status::OK();
@@ -372,10 +366,8 @@ Status IntegerScalarToFloat32Safe(PyObject* obj, float* out) {
   constexpr int64_t kFloatMin = -(1LL << 24);
 
   if (value < kFloatMin || value > kFloatMax) {
-    std::stringstream ss;
-    ss << "Integer value " << value << " is outside of the range exactly"
-       << " representable by a IEEE 754 single precision value";
-    return Status::Invalid(ss.str());
+    return Status::Invalid("Integer value ", value, " is outside of the range exactly",
+                           " representable by a IEEE 754 single precision value");
   }
   *out = static_cast<float>(value);
   return Status::OK();
diff --git a/cpp/src/arrow/python/helpers.h b/cpp/src/arrow/python/helpers.h
index 4a7c8f12c15eb..2d44feea5ac81 100644
--- a/cpp/src/arrow/python/helpers.h
+++ b/cpp/src/arrow/python/helpers.h
@@ -27,9 +27,9 @@
 
 #include <numpy/halffloat.h>
 
+#include "arrow/python/visibility.h"
 #include "arrow/type.h"
 #include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
 
 namespace arrow {
 
@@ -40,20 +40,20 @@ class OwnedRef;
 // \brief Get an arrow DataType instance from Arrow's Type::type enum
 // \param[in] type One of the values of Arrow's Type::type enum
 // \return A shared pointer to DataType
-ARROW_EXPORT std::shared_ptr<DataType> GetPrimitiveType(Type::type type);
+ARROW_PYTHON_EXPORT std::shared_ptr<DataType> GetPrimitiveType(Type::type type);
 
 // \brief Construct a np.float16 object from a npy_half value.
-ARROW_EXPORT PyObject* PyHalf_FromHalf(npy_half value);
+ARROW_PYTHON_EXPORT PyObject* PyHalf_FromHalf(npy_half value);
 
 // \brief Convert a Python object to a npy_half value.
-ARROW_EXPORT Status PyFloat_AsHalf(PyObject* obj, npy_half* out);
+ARROW_PYTHON_EXPORT Status PyFloat_AsHalf(PyObject* obj, npy_half* out);
 
 namespace internal {
 
 // \brief Import a Python module
 // \param[in] module_name The name of the module
 // \param[out] ref The OwnedRef containing the module PyObject*
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status ImportModule(const std::string& module_name, OwnedRef* ref);
 
 // \brief Import an object from a Python module
@@ -61,7 +61,7 @@ Status ImportModule(const std::string& module_name, OwnedRef* ref);
 // \param[in] name The name of the object to import
 // \param[out] ref The OwnedRef containing the \c name attribute of the Python module \c
 // module
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status ImportFromModule(const OwnedRef& module, const std::string& name, OwnedRef* ref);
 
 // \brief Check whether obj is an integer, independent of Python versions.
@@ -74,11 +74,11 @@ inline bool IsPyInteger(PyObject* obj) {
 }
 
 // \brief Use pandas missing value semantics to check if a value is null
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 bool PandasObjectIsNull(PyObject* obj);
 
 // \brief Check whether obj is a floating-point NaN
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 bool PyFloat_IsNaN(PyObject* obj);
 
 inline bool IsPyBinary(PyObject* obj) {
@@ -93,19 +93,19 @@ template <typename Int>
 Status CIntFromPython(PyObject* obj, Int* out, const std::string& overflow_message = "");
 
 // \brief Convert a Python unicode string to a std::string
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status PyUnicode_AsStdString(PyObject* obj, std::string* out);
 
 // \brief Convert a Python bytes object to a std::string
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 std::string PyBytes_AsStdString(PyObject* obj);
 
 // \brief Call str() on the given object and return the result as a std::string
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status PyObject_StdStringStr(PyObject* obj, std::string* out);
 
 // \brief Return the repr() of the given object (always succeeds)
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 std::string PyObject_StdStringRepr(PyObject* obj);
 
 // \brief Cast the given size to int32_t, with error checking
@@ -121,12 +121,12 @@ inline Status CastSize(Py_ssize_t size, int32_t* out,
 
 // \brief Print the Python object's __str__ form along with the passed error
 // message
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status InvalidValue(PyObject* obj, const std::string& why);
 
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status IntegerScalarToDoubleSafe(PyObject* obj, double* result);
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status IntegerScalarToFloat32Safe(PyObject* obj, float* result);
 
 }  // namespace internal
diff --git a/cpp/src/arrow/python/inference.cc b/cpp/src/arrow/python/inference.cc
index e619a64eb8aae..c9db5f4f28531 100644
--- a/cpp/src/arrow/python/inference.cc
+++ b/cpp/src/arrow/python/inference.cc
@@ -58,10 +58,9 @@ class NumPyDtypeUnifier {
   NumPyDtypeUnifier() : current_type_num_(-1), current_dtype_(NULLPTR) {}
 
   Status InvalidMix(int new_dtype) {
-    std::stringstream ss;
-    ss << "Cannot mix NumPy dtypes " << GetNumPyTypeName(current_type_num_) << " and "
-       << GetNumPyTypeName(new_dtype);
-    return Status::Invalid(ss.str());
+    return Status::Invalid("Cannot mix NumPy dtypes ",
+                           GetNumPyTypeName(current_type_num_), " and ",
+                           GetNumPyTypeName(new_dtype));
   }
 
   int Observe_BOOL(PyArray_Descr* descr, int dtype) { return INVALID; }
@@ -250,9 +249,7 @@ class NumPyDtypeUnifier {
         action = Observe_DATETIME(descr);
         break;
       default:
-        std::stringstream ss;
-        ss << "Unsupported numpy type " << GetNumPyTypeName(dtype) << std::endl;
-        return Status::NotImplemented(ss.str());
+        return Status::NotImplemented("Unsupported numpy type ", GetNumPyTypeName(dtype));
     }
 
     if (action == INVALID) {
@@ -480,10 +477,8 @@ class TypeInferrer {
       } else if (PyBytes_Check(key_obj)) {
         key = internal::PyBytes_AsStdString(key_obj);
       } else {
-        std::stringstream ss;
-        ss << "Expected dict key of type str or bytes, got '" << Py_TYPE(key_obj)->tp_name
-           << "'";
-        return Status::TypeError(ss.str());
+        return Status::TypeError("Expected dict key of type str or bytes, got '",
+                                 Py_TYPE(key_obj)->tp_name, "'");
       }
       // Get or create visitor for this key
       auto it = struct_inferrers_.find(key);
@@ -583,13 +578,13 @@ Status InferArrowTypeAndSize(PyObject* obj, int64_t* size,
   return Status::OK();
 }
 
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 bool IsPyBool(PyObject* obj) { return internal::PyBoolScalar_Check(obj); }
 
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 bool IsPyInt(PyObject* obj) { return internal::PyIntScalar_Check(obj); }
 
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 bool IsPyFloat(PyObject* obj) { return internal::PyFloatScalar_Check(obj); }
 
 }  // namespace py
diff --git a/cpp/src/arrow/python/inference.h b/cpp/src/arrow/python/inference.h
index 2cffa17ac2dc8..f2e2305e34441 100644
--- a/cpp/src/arrow/python/inference.h
+++ b/cpp/src/arrow/python/inference.h
@@ -27,9 +27,9 @@
 #include <ostream>
 #include <string>
 
+#include "arrow/python/visibility.h"
 #include "arrow/type.h"
 #include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
 
 #include "arrow/python/common.h"
 
@@ -41,23 +41,23 @@ class Status;
 namespace py {
 
 // These three functions take a sequence input, not arbitrary iterables
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 arrow::Status InferArrowType(PyObject* obj, std::shared_ptr<arrow::DataType>* out_type);
 
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 arrow::Status InferArrowTypeAndSize(PyObject* obj, int64_t* size,
                                     std::shared_ptr<arrow::DataType>* out_type);
 
 /// Checks whether the passed Python object is a boolean scalar
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 bool IsPyBool(PyObject* obj);
 
 /// Checks whether the passed Python object is an integer scalar
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 bool IsPyInt(PyObject* obj);
 
 /// Checks whether the passed Python object is a float scalar
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 bool IsPyFloat(PyObject* obj);
 
 }  // namespace py
diff --git a/cpp/src/arrow/python/init.h b/cpp/src/arrow/python/init.h
index 1daa5a3d2624d..34d19b21fdf31 100644
--- a/cpp/src/arrow/python/init.h
+++ b/cpp/src/arrow/python/init.h
@@ -19,10 +19,10 @@
 #define ARROW_PYTHON_INIT_H
 
 #include "arrow/python/platform.h"
-#include "arrow/util/visibility.h"
+#include "arrow/python/visibility.h"
 
 extern "C" {
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 int arrow_init_numpy();
 }
 
diff --git a/cpp/src/arrow/python/io.h b/cpp/src/arrow/python/io.h
index 73d96f5f40fd8..d3b7c999eb8bb 100644
--- a/cpp/src/arrow/python/io.h
+++ b/cpp/src/arrow/python/io.h
@@ -22,7 +22,7 @@
 
 #include "arrow/io/interfaces.h"
 #include "arrow/io/memory.h"
-#include "arrow/util/visibility.h"
+#include "arrow/python/visibility.h"
 
 #include "arrow/python/config.h"
 
@@ -36,7 +36,7 @@ namespace py {
 
 class ARROW_NO_EXPORT PythonFile;
 
-class ARROW_EXPORT PyReadableFile : public io::RandomAccessFile {
+class ARROW_PYTHON_EXPORT PyReadableFile : public io::RandomAccessFile {
  public:
   explicit PyReadableFile(PyObject* file);
   ~PyReadableFile() override;
@@ -64,7 +64,7 @@ class ARROW_EXPORT PyReadableFile : public io::RandomAccessFile {
   std::unique_ptr<PythonFile> file_;
 };
 
-class ARROW_EXPORT PyOutputStream : public io::OutputStream {
+class ARROW_PYTHON_EXPORT PyOutputStream : public io::OutputStream {
  public:
   explicit PyOutputStream(PyObject* file);
   ~PyOutputStream() override;
@@ -87,7 +87,7 @@ class ARROW_EXPORT PyOutputStream : public io::OutputStream {
 // Keeping the reference in a Python wrapper would be incorrect as
 // the Python wrapper can get destroyed even though the wrapped C++
 // buffer is still alive (ARROW-2270).
-class ARROW_EXPORT PyForeignBuffer : public Buffer {
+class ARROW_PYTHON_EXPORT PyForeignBuffer : public Buffer {
  public:
   static Status Make(const uint8_t* data, int64_t size, PyObject* base,
                      std::shared_ptr<Buffer>* out);
diff --git a/cpp/src/arrow/python/numpy-internal.h b/cpp/src/arrow/python/numpy-internal.h
index 463795a2109f0..6954e35c3e199 100644
--- a/cpp/src/arrow/python/numpy-internal.h
+++ b/cpp/src/arrow/python/numpy-internal.h
@@ -143,9 +143,8 @@ inline Status VisitNumpyArrayInline(PyArrayObject* arr, VISITOR* visitor) {
     TYPE_VISIT_INLINE(DATETIME);
     TYPE_VISIT_INLINE(OBJECT);
   }
-  std::stringstream ss;
-  ss << "NumPy type not implemented: " << GetNumPyTypeName(PyArray_TYPE(arr));
-  return Status::NotImplemented(ss.str());
+  return Status::NotImplemented("NumPy type not implemented: ",
+                                GetNumPyTypeName(PyArray_TYPE(arr)));
 }
 
 #undef TYPE_VISIT_INLINE
diff --git a/cpp/src/arrow/python/numpy_convert.cc b/cpp/src/arrow/python/numpy_convert.cc
index d95e337a4870d..c73e0bc15c9c5 100644
--- a/cpp/src/arrow/python/numpy_convert.cc
+++ b/cpp/src/arrow/python/numpy_convert.cc
@@ -92,9 +92,7 @@ Status GetTensorType(PyObject* dtype, std::shared_ptr<DataType>* out) {
     TO_ARROW_TYPE_CASE(FLOAT32, float32);
     TO_ARROW_TYPE_CASE(FLOAT64, float64);
     default: {
-      std::stringstream ss;
-      ss << "Unsupported numpy type " << descr->type_num << std::endl;
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("Unsupported numpy type ", descr->type_num);
     }
   }
   return Status::OK();
@@ -119,9 +117,7 @@ Status GetNumPyType(const DataType& type, int* type_num) {
     NUMPY_TYPE_CASE(FLOAT, FLOAT32);
     NUMPY_TYPE_CASE(DOUBLE, FLOAT64);
     default: {
-      std::stringstream ss;
-      ss << "Unsupported tensor type: " << type.ToString() << std::endl;
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("Unsupported tensor type: ", type.ToString());
     }
   }
 #undef NUMPY_TYPE_CASE
@@ -181,9 +177,7 @@ Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr<DataType>* out) {
       }
     } break;
     default: {
-      std::stringstream ss;
-      ss << "Unsupported numpy type " << descr->type_num << std::endl;
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("Unsupported numpy type ", descr->type_num);
     }
   }
 
diff --git a/cpp/src/arrow/python/numpy_convert.h b/cpp/src/arrow/python/numpy_convert.h
index dfdb1acd1237b..dce5fe522d65b 100644
--- a/cpp/src/arrow/python/numpy_convert.h
+++ b/cpp/src/arrow/python/numpy_convert.h
@@ -27,7 +27,7 @@
 #include <string>
 
 #include "arrow/buffer.h"
-#include "arrow/util/visibility.h"
+#include "arrow/python/visibility.h"
 
 namespace arrow {
 
@@ -38,7 +38,7 @@ class Tensor;
 
 namespace py {
 
-class ARROW_EXPORT NumPyBuffer : public Buffer {
+class ARROW_PYTHON_EXPORT NumPyBuffer : public Buffer {
  public:
   explicit NumPyBuffer(PyObject* arr);
   virtual ~NumPyBuffer();
@@ -48,25 +48,25 @@ class ARROW_EXPORT NumPyBuffer : public Buffer {
 };
 
 // Handle misbehaved types like LONGLONG and ULONGLONG
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 int cast_npy_type_compat(int type_num);
 
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 bool is_contiguous(PyObject* array);
 
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status NumPyDtypeToArrow(PyObject* dtype, std::shared_ptr<DataType>* out);
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr<DataType>* out);
 
 Status GetTensorType(PyObject* dtype, std::shared_ptr<DataType>* out);
 Status GetNumPyType(const DataType& type, int* type_num);
 
-ARROW_EXPORT Status NdarrayToTensor(MemoryPool* pool, PyObject* ao,
-                                    std::shared_ptr<Tensor>* out);
+ARROW_PYTHON_EXPORT Status NdarrayToTensor(MemoryPool* pool, PyObject* ao,
+                                           std::shared_ptr<Tensor>* out);
 
-ARROW_EXPORT Status TensorToNdarray(const std::shared_ptr<Tensor>& tensor, PyObject* base,
-                                    PyObject** out);
+ARROW_PYTHON_EXPORT Status TensorToNdarray(const std::shared_ptr<Tensor>& tensor,
+                                           PyObject* base, PyObject** out);
 
 }  // namespace py
 }  // namespace arrow
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc
index 37141d7642b6f..a944b80914189 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -25,6 +25,7 @@
 #include <algorithm>
 #include <cmath>
 #include <cstdint>
+#include <cstring>
 #include <limits>
 #include <memory>
 #include <sstream>
@@ -41,6 +42,8 @@
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/string.h"
+#include "arrow/util/utf8.h"
 #include "arrow/visitor_inline.h"
 
 #include "arrow/compute/context.h"
@@ -60,6 +63,7 @@ namespace arrow {
 
 using internal::checked_cast;
 using internal::CopyBitmap;
+using internal::GenerateBitsUnrolled;
 
 namespace py {
 
@@ -243,6 +247,11 @@ class NumPyConverter {
     return Status::OK();
   }
 
+  // Called before ConvertData to ensure Numpy input buffer is in expected
+  // Arrow layout
+  template <typename ArrowType>
+  Status PrepareInputData(std::shared_ptr<Buffer>* data);
+
   // ----------------------------------------------------------------------
   // Traditional visitor conversion for non-object arrays
 
@@ -280,9 +289,8 @@ class NumPyConverter {
   }
 
   Status TypeNotImplemented(std::string type_name) {
-    std::stringstream ss;
-    ss << "NumPyConverter doesn't implement <" << type_name << "> conversion. ";
-    return Status::NotImplemented(ss.str());
+    return Status::NotImplemented("NumPyConverter doesn't implement <", type_name,
+                                  "> conversion. ");
   }
 
   MemoryPool* pool_;
@@ -405,57 +413,49 @@ Status CopyStridedArray(PyArrayObject* arr, const int64_t length, MemoryPool* po
 }  // namespace
 
 template <typename ArrowType>
-inline Status NumPyConverter::ConvertData(std::shared_ptr<Buffer>* data) {
+inline Status NumPyConverter::PrepareInputData(std::shared_ptr<Buffer>* data) {
   if (is_strided()) {
     RETURN_NOT_OK(CopyStridedArray<ArrowType>(arr_, length_, pool_, data));
+  } else if (dtype_->type_num == NPY_BOOL) {
+    int64_t nbytes = BitUtil::BytesForBits(length_);
+    std::shared_ptr<Buffer> buffer;
+    RETURN_NOT_OK(AllocateBuffer(pool_, nbytes, &buffer));
+
+    Ndarray1DIndexer<uint8_t> values(arr_);
+    int64_t i = 0;
+    const auto generate = [&values, &i]() -> bool { return values[i++] > 0; };
+    GenerateBitsUnrolled(buffer->mutable_data(), 0, length_, generate);
+
+    *data = buffer;
   } else {
     // Can zero-copy
     *data = std::make_shared<NumPyBuffer>(reinterpret_cast<PyObject*>(arr_));
   }
 
-  std::shared_ptr<DataType> input_type;
-  RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
-
-  if (!input_type->Equals(*type_)) {
-    RETURN_NOT_OK(CastBuffer(input_type, *data, length_, nullptr, 0, type_, cast_options_,
-                             pool_, data));
-  }
-
   return Status::OK();
 }
 
-template <>
-inline Status NumPyConverter::ConvertData<BooleanType>(std::shared_ptr<Buffer>* data) {
-  int64_t nbytes = BitUtil::BytesForBits(length_);
-  std::shared_ptr<Buffer> buffer;
-  RETURN_NOT_OK(AllocateBuffer(pool_, nbytes, &buffer));
-
-  Ndarray1DIndexer<uint8_t> values(arr_);
+template <typename ArrowType>
+inline Status NumPyConverter::ConvertData(std::shared_ptr<Buffer>* data) {
+  RETURN_NOT_OK(PrepareInputData<ArrowType>(data));
 
-  uint8_t* bitmap = buffer->mutable_data();
+  std::shared_ptr<DataType> input_type;
+  RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
 
-  memset(bitmap, 0, nbytes);
-  for (int i = 0; i < length_; ++i) {
-    if (values[i] > 0) {
-      BitUtil::SetBit(bitmap, i);
-    }
+  if (!input_type->Equals(*type_)) {
+    RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_, type_,
+                             cast_options_, pool_, data));
   }
 
-  *data = buffer;
   return Status::OK();
 }
 
 template <>
 inline Status NumPyConverter::ConvertData<Date32Type>(std::shared_ptr<Buffer>* data) {
-  if (is_strided()) {
-    RETURN_NOT_OK(CopyStridedArray<Date32Type>(arr_, length_, pool_, data));
-  } else {
-    // Can zero-copy
-    *data = std::make_shared<NumPyBuffer>(reinterpret_cast<PyObject*>(arr_));
-  }
-
   std::shared_ptr<DataType> input_type;
 
+  RETURN_NOT_OK(PrepareInputData<Date32Type>(data));
+
   auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(dtype_->c_metadata);
   if (dtype_->type_num == NPY_DATETIME) {
     // If we have inbound datetime64[D] data, this needs to be downcasted
@@ -477,8 +477,8 @@ inline Status NumPyConverter::ConvertData<Date32Type>(std::shared_ptr<Buffer>* d
   } else {
     RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
     if (!input_type->Equals(*type_)) {
-      RETURN_NOT_OK(CastBuffer(input_type, *data, length_, nullptr, 0, type_,
-                               cast_options_, pool_, data));
+      RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_,
+                               type_, cast_options_, pool_, data));
     }
   }
 
@@ -487,17 +487,11 @@ inline Status NumPyConverter::ConvertData<Date32Type>(std::shared_ptr<Buffer>* d
 
 template <>
 inline Status NumPyConverter::ConvertData<Date64Type>(std::shared_ptr<Buffer>* data) {
-  if (is_strided()) {
-    RETURN_NOT_OK(CopyStridedArray<Date64Type>(arr_, length_, pool_, data));
-  } else {
-    // Can zero-copy
-    *data = std::make_shared<NumPyBuffer>(reinterpret_cast<PyObject*>(arr_));
-  }
-
   constexpr int64_t kMillisecondsInDay = 86400000;
-
   std::shared_ptr<DataType> input_type;
 
+  RETURN_NOT_OK(PrepareInputData<Date64Type>(data));
+
   auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(dtype_->c_metadata);
   if (dtype_->type_num == NPY_DATETIME) {
     // If we have inbound datetime64[D] data, this needs to be downcasted
@@ -524,62 +518,61 @@ inline Status NumPyConverter::ConvertData<Date64Type>(std::shared_ptr<Buffer>* d
   } else {
     RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
     if (!input_type->Equals(*type_)) {
-      RETURN_NOT_OK(CastBuffer(input_type, *data, length_, nullptr, 0, type_,
-                               cast_options_, pool_, data));
+      RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_,
+                               type_, cast_options_, pool_, data));
     }
   }
 
   return Status::OK();
 }
 
+// Create 16MB chunks for binary data
+constexpr int32_t kBinaryChunksize = 1 << 24;
+
 Status NumPyConverter::Visit(const BinaryType& type) {
-  BinaryBuilder builder(pool_);
+  ::arrow::internal::ChunkedBinaryBuilder builder(kBinaryChunksize, pool_);
 
   auto data = reinterpret_cast<const uint8_t*>(PyArray_DATA(arr_));
 
-  int item_length = 0;
+  auto AppendNotNull = [&builder, this](const uint8_t* data) {
+    // This is annoying. NumPy allows strings to have nul-terminators, so
+    // we must check for them here
+    const size_t item_size =
+        strnlen(reinterpret_cast<const char*>(data), static_cast<size_t>(itemsize_));
+    return builder.Append(data, static_cast<int32_t>(item_size));
+  };
+
   if (mask_ != nullptr) {
     Ndarray1DIndexer<uint8_t> mask_values(mask_);
     for (int64_t i = 0; i < length_; ++i) {
       if (mask_values[i]) {
         RETURN_NOT_OK(builder.AppendNull());
       } else {
-        // This is annoying. NumPy allows strings to have nul-terminators, so
-        // we must check for them here
-        for (item_length = 0; item_length < itemsize_; ++item_length) {
-          if (data[item_length] == 0) {
-            break;
-          }
-        }
-        RETURN_NOT_OK(builder.Append(data, item_length));
+        RETURN_NOT_OK(AppendNotNull(data));
       }
       data += stride_;
     }
   } else {
     for (int64_t i = 0; i < length_; ++i) {
-      for (item_length = 0; item_length < itemsize_; ++item_length) {
-        // Look for nul-terminator
-        if (data[item_length] == 0) {
-          break;
-        }
-      }
-      RETURN_NOT_OK(builder.Append(data, item_length));
+      RETURN_NOT_OK(AppendNotNull(data));
       data += stride_;
     }
   }
 
-  std::shared_ptr<Array> result;
+  ArrayVector result;
   RETURN_NOT_OK(builder.Finish(&result));
-  return PushArray(result->data());
+  for (auto arr : result) {
+    RETURN_NOT_OK(PushArray(arr->data()));
+  }
+  return Status::OK();
 }
 
 Status NumPyConverter::Visit(const FixedSizeBinaryType& type) {
   auto byte_width = type.byte_width();
 
   if (itemsize_ != byte_width) {
-    std::stringstream ss;
-    ss << "Got bytestring of length " << itemsize_ << " (expected " << byte_width << ")";
-    return Status::Invalid(ss.str());
+    return Status::Invalid("Got bytestring of length ", itemsize_, " (expected ",
+                           byte_width, ")");
   }
 
   FixedSizeBinaryBuilder builder(::arrow::fixed_size_binary(byte_width), pool_);
@@ -634,30 +627,47 @@ Status AppendUTF32(const char* data, int itemsize, int byteorder,
 }  // namespace
 
 Status NumPyConverter::Visit(const StringType& type) {
+  util::InitializeUTF8();
+
   StringBuilder builder(pool_);
 
-  auto data = reinterpret_cast<const char*>(PyArray_DATA(arr_));
+  auto data = reinterpret_cast<const uint8_t*>(PyArray_DATA(arr_));
 
-  char numpy_byteorder = PyArray_DESCR(arr_)->byteorder;
+  char numpy_byteorder = dtype_->byteorder;
 
   // For Python C API, -1 is little-endian, 1 is big-endian
   int byteorder = numpy_byteorder == '>' ? 1 : -1;
 
   PyAcquireGIL gil_lock;
 
+  const bool is_binary_type = dtype_->type_num == NPY_STRING;
+
+  auto AppendNonNullValue = [&](const uint8_t* data) {
+    if (is_binary_type) {
+      if (ARROW_PREDICT_TRUE(util::ValidateUTF8(data, itemsize_))) {
+        return builder.Append(data, itemsize_);
+      } else {
+        return Status::Invalid("Encountered non-UTF8 binary value: ",
+                               HexEncode(data, itemsize_));
+      }
+    } else {
+      return AppendUTF32(reinterpret_cast<const char*>(data), itemsize_, byteorder,
+                         &builder);
+    }
+  };
   if (mask_ != nullptr) {
     Ndarray1DIndexer<uint8_t> mask_values(mask_);
     for (int64_t i = 0; i < length_; ++i) {
       if (mask_values[i]) {
         RETURN_NOT_OK(builder.AppendNull());
       } else {
-        RETURN_NOT_OK(AppendUTF32(data, itemsize_, byteorder, &builder));
+        RETURN_NOT_OK(AppendNonNullValue(data));
       }
       data += stride_;
     }
   } else {
     for (int64_t i = 0; i < length_; ++i) {
-      RETURN_NOT_OK(AppendUTF32(data, itemsize_, byteorder, &builder));
+      RETURN_NOT_OK(AppendNonNullValue(data));
       data += stride_;
     }
   }
@@ -682,9 +692,7 @@ Status NumPyConverter::Visit(const StructType& type) {
     for (auto field : type.children()) {
       PyObject* tup = PyDict_GetItemString(dtype_->fields, field->name().c_str());
       if (tup == NULL) {
-        std::stringstream ss;
-        ss << "Missing field '" << field->name() << "' in struct array";
-        return Status::TypeError(ss.str());
+        return Status::TypeError("Missing field '", field->name(), "' in struct array");
       }
       PyArray_Descr* sub_dtype =
           reinterpret_cast<PyArray_Descr*>(PyTuple_GET_ITEM(tup, 0));
diff --git a/cpp/src/arrow/python/numpy_to_arrow.h b/cpp/src/arrow/python/numpy_to_arrow.h
index 5e1c088264a46..4edc7669bb82e 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.h
+++ b/cpp/src/arrow/python/numpy_to_arrow.h
@@ -25,7 +25,7 @@
 #include <memory>
 
 #include "arrow/compute/kernels/cast.h"
-#include "arrow/util/visibility.h"
+#include "arrow/python/visibility.h"
 
 namespace arrow {
 
@@ -48,7 +48,7 @@ namespace py {
 /// \param[in] type a specific type to cast to, may be null
 /// \param[in] cast_options casting options
 /// \param[out] out a ChunkedArray, to accommodate chunked output
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status NdarrayToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo, bool from_pandas,
                       const std::shared_ptr<DataType>& type,
                       const compute::CastOptions& cast_options,
@@ -64,7 +64,7 @@ Status NdarrayToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo, bool from_pa
 /// whether values are null
 /// \param[in] type a specific type to cast to, may be null
 /// \param[out] out a ChunkedArray, to accommodate chunked output
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status NdarrayToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo, bool from_pandas,
                       const std::shared_ptr<DataType>& type,
                       std::shared_ptr<ChunkedArray>* out);
diff --git a/cpp/src/arrow/python/platform.h b/cpp/src/arrow/python/platform.h
index 4dc944e40900e..ca9b553fd641a 100644
--- a/cpp/src/arrow/python/platform.h
+++ b/cpp/src/arrow/python/platform.h
@@ -26,8 +26,10 @@
 #include <datetime.h>
 
 // Work around C2528 error
+#ifdef _MSC_VER
 #if _MSC_VER >= 1900
 #undef timezone
 #endif
+#endif
 
 #endif  // ARROW_PYTHON_PLATFORM_H
diff --git a/cpp/src/arrow/python/pyarrow.h b/cpp/src/arrow/python/pyarrow.h
index e637627006177..a5a3910847977 100644
--- a/cpp/src/arrow/python/pyarrow.h
+++ b/cpp/src/arrow/python/pyarrow.h
@@ -22,7 +22,7 @@
 
 #include <memory>
 
-#include "arrow/util/visibility.h"
+#include "arrow/python/visibility.h"
 
 namespace arrow {
 
@@ -39,44 +39,46 @@ class Tensor;
 
 namespace py {
 
-ARROW_EXPORT int import_pyarrow();
+ARROW_PYTHON_EXPORT int import_pyarrow();
 
-ARROW_EXPORT bool is_buffer(PyObject* buffer);
-ARROW_EXPORT Status unwrap_buffer(PyObject* buffer, std::shared_ptr<Buffer>* out);
-ARROW_EXPORT PyObject* wrap_buffer(const std::shared_ptr<Buffer>& buffer);
+ARROW_PYTHON_EXPORT bool is_buffer(PyObject* buffer);
+ARROW_PYTHON_EXPORT Status unwrap_buffer(PyObject* buffer, std::shared_ptr<Buffer>* out);
+ARROW_PYTHON_EXPORT PyObject* wrap_buffer(const std::shared_ptr<Buffer>& buffer);
 
-ARROW_EXPORT bool is_data_type(PyObject* data_type);
-ARROW_EXPORT Status unwrap_data_type(PyObject* data_type, std::shared_ptr<DataType>* out);
-ARROW_EXPORT PyObject* wrap_data_type(const std::shared_ptr<DataType>& type);
+ARROW_PYTHON_EXPORT bool is_data_type(PyObject* data_type);
+ARROW_PYTHON_EXPORT Status unwrap_data_type(PyObject* data_type,
+                                            std::shared_ptr<DataType>* out);
+ARROW_PYTHON_EXPORT PyObject* wrap_data_type(const std::shared_ptr<DataType>& type);
 
-ARROW_EXPORT bool is_field(PyObject* field);
-ARROW_EXPORT Status unwrap_field(PyObject* field, std::shared_ptr<Field>* out);
-ARROW_EXPORT PyObject* wrap_field(const std::shared_ptr<Field>& field);
+ARROW_PYTHON_EXPORT bool is_field(PyObject* field);
+ARROW_PYTHON_EXPORT Status unwrap_field(PyObject* field, std::shared_ptr<Field>* out);
+ARROW_PYTHON_EXPORT PyObject* wrap_field(const std::shared_ptr<Field>& field);
 
-ARROW_EXPORT bool is_schema(PyObject* schema);
-ARROW_EXPORT Status unwrap_schema(PyObject* schema, std::shared_ptr<Schema>* out);
-ARROW_EXPORT PyObject* wrap_schema(const std::shared_ptr<Schema>& schema);
+ARROW_PYTHON_EXPORT bool is_schema(PyObject* schema);
+ARROW_PYTHON_EXPORT Status unwrap_schema(PyObject* schema, std::shared_ptr<Schema>* out);
+ARROW_PYTHON_EXPORT PyObject* wrap_schema(const std::shared_ptr<Schema>& schema);
 
-ARROW_EXPORT bool is_array(PyObject* array);
-ARROW_EXPORT Status unwrap_array(PyObject* array, std::shared_ptr<Array>* out);
-ARROW_EXPORT PyObject* wrap_array(const std::shared_ptr<Array>& array);
+ARROW_PYTHON_EXPORT bool is_array(PyObject* array);
+ARROW_PYTHON_EXPORT Status unwrap_array(PyObject* array, std::shared_ptr<Array>* out);
+ARROW_PYTHON_EXPORT PyObject* wrap_array(const std::shared_ptr<Array>& array);
 
-ARROW_EXPORT bool is_tensor(PyObject* tensor);
-ARROW_EXPORT Status unwrap_tensor(PyObject* tensor, std::shared_ptr<Tensor>* out);
-ARROW_EXPORT PyObject* wrap_tensor(const std::shared_ptr<Tensor>& tensor);
+ARROW_PYTHON_EXPORT bool is_tensor(PyObject* tensor);
+ARROW_PYTHON_EXPORT Status unwrap_tensor(PyObject* tensor, std::shared_ptr<Tensor>* out);
+ARROW_PYTHON_EXPORT PyObject* wrap_tensor(const std::shared_ptr<Tensor>& tensor);
 
-ARROW_EXPORT bool is_column(PyObject* column);
-ARROW_EXPORT Status unwrap_column(PyObject* column, std::shared_ptr<Column>* out);
-ARROW_EXPORT PyObject* wrap_column(const std::shared_ptr<Column>& column);
+ARROW_PYTHON_EXPORT bool is_column(PyObject* column);
+ARROW_PYTHON_EXPORT Status unwrap_column(PyObject* column, std::shared_ptr<Column>* out);
+ARROW_PYTHON_EXPORT PyObject* wrap_column(const std::shared_ptr<Column>& column);
 
-ARROW_EXPORT bool is_table(PyObject* table);
-ARROW_EXPORT Status unwrap_table(PyObject* table, std::shared_ptr<Table>* out);
-ARROW_EXPORT PyObject* wrap_table(const std::shared_ptr<Table>& table);
+ARROW_PYTHON_EXPORT bool is_table(PyObject* table);
+ARROW_PYTHON_EXPORT Status unwrap_table(PyObject* table, std::shared_ptr<Table>* out);
+ARROW_PYTHON_EXPORT PyObject* wrap_table(const std::shared_ptr<Table>& table);
 
-ARROW_EXPORT bool is_record_batch(PyObject* batch);
-ARROW_EXPORT Status unwrap_record_batch(PyObject* batch,
-                                        std::shared_ptr<RecordBatch>* out);
-ARROW_EXPORT PyObject* wrap_record_batch(const std::shared_ptr<RecordBatch>& batch);
+ARROW_PYTHON_EXPORT bool is_record_batch(PyObject* batch);
+ARROW_PYTHON_EXPORT Status unwrap_record_batch(PyObject* batch,
+                                               std::shared_ptr<RecordBatch>* out);
+ARROW_PYTHON_EXPORT PyObject* wrap_record_batch(
+    const std::shared_ptr<RecordBatch>& batch);
 
 }  // namespace py
 }  // namespace arrow
diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc
index 2d15ce45b3b7f..7443c54845630 100644
--- a/cpp/src/arrow/python/python-test.cc
+++ b/cpp/src/arrow/python/python-test.cc
@@ -25,6 +25,7 @@
 #include "arrow/builder.h"
 #include "arrow/table.h"
 #include "arrow/test-util.h"
+#include "arrow/util/decimal.h"
 
 #include "arrow/python/arrow_to_pandas.h"
 #include "arrow/python/decimal.h"
diff --git a/cpp/src/arrow/python/python_to_arrow.cc b/cpp/src/arrow/python/python_to_arrow.cc
index a77cebc7e7d50..f5e6a5776071d 100644
--- a/cpp/src/arrow/python/python_to_arrow.cc
+++ b/cpp/src/arrow/python/python_to_arrow.cc
@@ -402,10 +402,7 @@ class TimestampConverter : public TypedConverter<TimestampType, TimestampConvert
       std::shared_ptr<DataType> type;
       RETURN_NOT_OK(NumPyDtypeToArrow(PyArray_DescrFromScalar(obj), &type));
       if (type->id() != Type::TIMESTAMP) {
-        std::ostringstream ss;
-        ss << "Expected np.datetime64 but got: ";
-        ss << type->ToString();
-        return Status::Invalid(ss.str());
+        return Status::Invalid("Expected np.datetime64 but got: ", type->ToString());
       }
       const TimestampType& ttype = checked_cast<const TimestampType&>(*type);
       if (unit_ != ttype.unit()) {
@@ -705,10 +702,7 @@ Status ListConverter::AppendNdarrayItem(PyObject* obj) {
       return value_converter_->AppendSingleVirtual(obj);
     }
     default: {
-      std::stringstream ss;
-      ss << "Unknown list item type: ";
-      ss << value_type_->ToString();
-      return Status::TypeError(ss.str());
+      return Status::TypeError("Unknown list item type: ", value_type_->ToString());
     }
   }
 }
@@ -911,9 +905,8 @@ Status GetConverter(const std::shared_ptr<DataType>& type, bool from_pandas,
           new StructConverter(from_pandas, strict_conversions));
       break;
     default:
-      std::stringstream ss;
-      ss << "Sequence converter for type " << type->ToString() << " not implemented";
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("Sequence converter for type ", type->ToString(),
+                                    " not implemented");
   }
   return Status::OK();
 }
diff --git a/cpp/src/arrow/python/python_to_arrow.h b/cpp/src/arrow/python/python_to_arrow.h
index d133089f97f51..f9d97569ef47a 100644
--- a/cpp/src/arrow/python/python_to_arrow.h
+++ b/cpp/src/arrow/python/python_to_arrow.h
@@ -26,9 +26,9 @@
 #include <cstdint>
 #include <memory>
 
+#include "arrow/python/visibility.h"
 #include "arrow/type.h"
 #include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
 
 #include "arrow/python/common.h"
 
@@ -68,12 +68,12 @@ struct PyConversionOptions {
 /// \param[in] options various conversion options
 /// \param[out] out a ChunkedArray containing one or more chunks
 /// \return Status
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status ConvertPySequence(PyObject* obj, PyObject* mask,
                          const PyConversionOptions& options,
                          std::shared_ptr<ChunkedArray>* out);
 
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status ConvertPySequence(PyObject* obj, const PyConversionOptions& options,
                          std::shared_ptr<ChunkedArray>* out);
 
diff --git a/cpp/src/arrow/python/serialize.cc b/cpp/src/arrow/python/serialize.cc
index 7911557ee73e0..4dd4c04a6ccb5 100644
--- a/cpp/src/arrow/python/serialize.cc
+++ b/cpp/src/arrow/python/serialize.cc
@@ -29,6 +29,7 @@
 #include <numpy/arrayscalars.h>
 
 #include "arrow/array.h"
+#include "arrow/array/builder_union.h"
 #include "arrow/builder.h"
 #include "arrow/io/interfaces.h"
 #include "arrow/io/memory.h"
@@ -55,249 +56,176 @@ using internal::checked_cast;
 
 namespace py {
 
-/// A Sequence is a heterogeneous collections of elements. It can contain
-/// scalar Python types, lists, tuples, dictionaries and tensors.
+class SequenceBuilder;
+class DictBuilder;
+
+Status Append(PyObject* context, PyObject* elem, SequenceBuilder* builder,
+              int32_t recursion_depth, SerializedPyObject* blobs_out);
+
+// A Sequence is a heterogeneous collections of elements. It can contain
+// scalar Python types, lists, tuples, dictionaries and tensors.
 class SequenceBuilder {
  public:
   explicit SequenceBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
       : pool_(pool),
         types_(::arrow::int8(), pool),
         offsets_(::arrow::int32(), pool),
-        nones_(pool),
-        bools_(::arrow::boolean(), pool),
-        ints_(::arrow::int64(), pool),
-        py2_ints_(::arrow::int64(), pool),
-        bytes_(::arrow::binary(), pool),
-        strings_(pool),
-        half_floats_(::arrow::float16(), pool),
-        floats_(::arrow::float32(), pool),
-        doubles_(::arrow::float64(), pool),
-        date64s_(::arrow::date64(), pool),
-        tensor_indices_(::arrow::int32(), pool),
-        ndarray_indices_(::arrow::int32(), pool),
-        buffer_indices_(::arrow::int32(), pool),
-        list_offsets_({0}),
-        tuple_offsets_({0}),
-        dict_offsets_({0}),
-        set_offsets_({0}) {}
-
-  /// Appending a none to the sequence
-  Status AppendNone() {
-    RETURN_NOT_OK(offsets_.Append(0));
-    RETURN_NOT_OK(types_.Append(0));
-    return nones_.AppendNull();
-  }
-
-  Status Update(int64_t offset, int8_t* tag) {
-    if (*tag == -1) {
-      *tag = num_tags_++;
-    }
+        type_map_(PythonType::NUM_PYTHON_TYPES, -1) {
+    builder_.reset(new DenseUnionBuilder(pool));
+  }
+
+  // Appending a none to the sequence
+  Status AppendNone() { return builder_->AppendNull(); }
+
+  template <typename BuilderType>
+  Status Update(BuilderType* child_builder, int8_t tag) {
     int32_t offset32 = -1;
-    RETURN_NOT_OK(internal::CastSize(offset, &offset32));
+    RETURN_NOT_OK(internal::CastSize(child_builder->length(), &offset32));
     DCHECK_GE(offset32, 0);
-    RETURN_NOT_OK(offsets_.Append(offset32));
-    RETURN_NOT_OK(types_.Append(*tag));
-    return nones_.Append(true);
+    return builder_->Append(tag, offset32);
+  }
+
+  template <typename BuilderType, typename MakeBuilderFn>
+  Status CreateAndUpdate(std::shared_ptr<BuilderType>* child_builder, int8_t tag,
+                         MakeBuilderFn make_builder) {
+    if (!*child_builder) {
+      child_builder->reset(make_builder());
+      // std::to_string is locale dependent, but should be ok for small integers
+      type_map_[tag] = builder_->AppendChild(*child_builder, std::to_string(tag));
+    }
+    return Update(child_builder->get(), type_map_[tag]);
   }
 
   template <typename BuilderType, typename T>
-  Status AppendPrimitive(const T val, int8_t* tag, BuilderType* out) {
-    RETURN_NOT_OK(Update(out->length(), tag));
-    return out->Append(val);
+  Status AppendPrimitive(std::shared_ptr<BuilderType>* child_builder, const T val,
+                         int8_t tag) {
+    RETURN_NOT_OK(
+        CreateAndUpdate(child_builder, tag, [this]() { return new BuilderType(pool_); }));
+    return (*child_builder)->Append(val);
   }
 
-  /// Appending a boolean to the sequence
+  // Appending a boolean to the sequence
   Status AppendBool(const bool data) {
-    return AppendPrimitive(data, &bool_tag_, &bools_);
+    return AppendPrimitive(&bools_, data, PythonType::BOOL);
   }
 
-  /// Appending a python 2 int64_t to the sequence
+  // Appending a python 2 int64_t to the sequence
   Status AppendPy2Int64(const int64_t data) {
-    return AppendPrimitive(data, &py2_int_tag_, &py2_ints_);
+    return AppendPrimitive(&py2_ints_, data, PythonType::PY2INT);
   }
 
-  /// Appending an int64_t to the sequence
+  // Appending an int64_t to the sequence
   Status AppendInt64(const int64_t data) {
-    return AppendPrimitive(data, &int_tag_, &ints_);
+    return AppendPrimitive(&ints_, data, PythonType::INT);
   }
 
-  /// Append a list of bytes to the sequence
+  // Append a list of bytes to the sequence
   Status AppendBytes(const uint8_t* data, int32_t length) {
-    RETURN_NOT_OK(Update(bytes_.length(), &bytes_tag_));
-    return bytes_.Append(data, length);
+    RETURN_NOT_OK(CreateAndUpdate(&bytes_, PythonType::BYTES,
+                                  [this]() { return new BinaryBuilder(pool_); }));
+    return bytes_->Append(data, length);
   }
 
-  /// Appending a string to the sequence
+  // Appending a string to the sequence
   Status AppendString(const char* data, int32_t length) {
-    RETURN_NOT_OK(Update(strings_.length(), &string_tag_));
-    return strings_.Append(data, length);
+    RETURN_NOT_OK(CreateAndUpdate(&strings_, PythonType::STRING,
+                                  [this]() { return new StringBuilder(pool_); }));
+    return strings_->Append(data, length);
   }
 
-  /// Appending a half_float to the sequence
+  // Appending a half_float to the sequence
   Status AppendHalfFloat(const npy_half data) {
-    return AppendPrimitive(data, &half_float_tag_, &half_floats_);
+    return AppendPrimitive(&half_floats_, data, PythonType::HALF_FLOAT);
   }
 
-  /// Appending a float to the sequence
+  // Appending a float to the sequence
   Status AppendFloat(const float data) {
-    return AppendPrimitive(data, &float_tag_, &floats_);
+    return AppendPrimitive(&floats_, data, PythonType::FLOAT);
   }
 
-  /// Appending a double to the sequence
+  // Appending a double to the sequence
   Status AppendDouble(const double data) {
-    return AppendPrimitive(data, &double_tag_, &doubles_);
+    return AppendPrimitive(&doubles_, data, PythonType::DOUBLE);
   }
 
-  /// Appending a Date64 timestamp to the sequence
+  // Appending a Date64 timestamp to the sequence
   Status AppendDate64(const int64_t timestamp) {
-    return AppendPrimitive(timestamp, &date64_tag_, &date64s_);
+    return AppendPrimitive(&date64s_, timestamp, PythonType::DATE64);
   }
 
-  /// Appending a tensor to the sequence
-  ///
-  /// \param tensor_index Index of the tensor in the object.
+  // Appending a tensor to the sequence
+  //
+  // \param tensor_index Index of the tensor in the object.
   Status AppendTensor(const int32_t tensor_index) {
-    RETURN_NOT_OK(Update(tensor_indices_.length(), &tensor_tag_));
-    return tensor_indices_.Append(tensor_index);
+    RETURN_NOT_OK(CreateAndUpdate(&tensor_indices_, PythonType::TENSOR,
+                                  [this]() { return new Int32Builder(pool_); }));
+    return tensor_indices_->Append(tensor_index);
   }
 
-  /// Appending a numpy ndarray to the sequence
-  ///
-  /// \param tensor_index Index of the tensor in the object.
+  // Appending a numpy ndarray to the sequence
+  //
+  // \param tensor_index Index of the tensor in the object.
   Status AppendNdarray(const int32_t ndarray_index) {
-    RETURN_NOT_OK(Update(ndarray_indices_.length(), &ndarray_tag_));
-    return ndarray_indices_.Append(ndarray_index);
+    RETURN_NOT_OK(CreateAndUpdate(&ndarray_indices_, PythonType::NDARRAY,
+                                  [this]() { return new Int32Builder(pool_); }));
+    return ndarray_indices_->Append(ndarray_index);
   }
 
-  /// Appending a buffer to the sequence
-  ///
-  /// \param buffer_index Indes of the buffer in the object.
+  // Appending a buffer to the sequence
+  //
+  // \param buffer_index Indes of the buffer in the object.
   Status AppendBuffer(const int32_t buffer_index) {
-    RETURN_NOT_OK(Update(buffer_indices_.length(), &buffer_tag_));
-    return buffer_indices_.Append(buffer_index);
-  }
-
-  /// Add a sublist to the sequence. The data contained in the sublist will be
-  /// specified in the "Finish" method.
-  ///
-  /// To construct l = [[11, 22], 33, [44, 55]] you would for example run
-  /// list = ListBuilder();
-  /// list.AppendList(2);
-  /// list.Append(33);
-  /// list.AppendList(2);
-  /// list.Finish([11, 22, 44, 55]);
-  /// list.Finish();
-
-  /// \param size
-  /// The size of the sublist
-  Status AppendList(Py_ssize_t size) {
-    int32_t offset;
-    RETURN_NOT_OK(internal::CastSize(list_offsets_.back() + size, &offset));
-    RETURN_NOT_OK(Update(list_offsets_.size() - 1, &list_tag_));
-    list_offsets_.push_back(offset);
-    return Status::OK();
+    RETURN_NOT_OK(CreateAndUpdate(&buffer_indices_, PythonType::BUFFER,
+                                  [this]() { return new Int32Builder(pool_); }));
+    return buffer_indices_->Append(buffer_index);
+  }
+
+  Status AppendSequence(PyObject* context, PyObject* sequence, int8_t tag,
+                        std::shared_ptr<ListBuilder>& target_sequence,
+                        std::unique_ptr<SequenceBuilder>& values, int32_t recursion_depth,
+                        SerializedPyObject* blobs_out) {
+    if (recursion_depth >= kMaxRecursionDepth) {
+      return Status::NotImplemented(
+          "This object exceeds the maximum recursion depth. It may contain itself "
+          "recursively.");
+    }
+    RETURN_NOT_OK(CreateAndUpdate(&target_sequence, tag, [this, &values]() {
+      values.reset(new SequenceBuilder(pool_));
+      return new ListBuilder(pool_, values->builder());
+    }));
+    RETURN_NOT_OK(target_sequence->Append());
+    return internal::VisitIterable(
+        sequence, [&](PyObject* obj, bool* keep_going /* unused */) {
+          return Append(context, obj, values.get(), recursion_depth, blobs_out);
+        });
   }
 
-  Status AppendTuple(Py_ssize_t size) {
-    int32_t offset;
-    RETURN_NOT_OK(internal::CastSize(tuple_offsets_.back() + size, &offset));
-    RETURN_NOT_OK(Update(tuple_offsets_.size() - 1, &tuple_tag_));
-    tuple_offsets_.push_back(offset);
-    return Status::OK();
+  Status AppendList(PyObject* context, PyObject* list, int32_t recursion_depth,
+                    SerializedPyObject* blobs_out) {
+    return AppendSequence(context, list, PythonType::LIST, lists_, list_values_,
+                          recursion_depth, blobs_out);
   }
 
-  Status AppendDict(Py_ssize_t size) {
-    int32_t offset;
-    RETURN_NOT_OK(internal::CastSize(dict_offsets_.back() + size, &offset));
-    RETURN_NOT_OK(Update(dict_offsets_.size() - 1, &dict_tag_));
-    dict_offsets_.push_back(offset);
-    return Status::OK();
+  Status AppendTuple(PyObject* context, PyObject* tuple, int32_t recursion_depth,
+                     SerializedPyObject* blobs_out) {
+    return AppendSequence(context, tuple, PythonType::TUPLE, tuples_, tuple_values_,
+                          recursion_depth, blobs_out);
   }
 
-  Status AppendSet(Py_ssize_t size) {
-    int32_t offset;
-    RETURN_NOT_OK(internal::CastSize(set_offsets_.back() + size, &offset));
-    RETURN_NOT_OK(Update(set_offsets_.size() - 1, &set_tag_));
-    set_offsets_.push_back(offset);
-    return Status::OK();
+  Status AppendSet(PyObject* context, PyObject* set, int32_t recursion_depth,
+                   SerializedPyObject* blobs_out) {
+    return AppendSequence(context, set, PythonType::SET, sets_, set_values_,
+                          recursion_depth, blobs_out);
   }
 
-  template <typename BuilderType>
-  Status AddElement(const int8_t tag, BuilderType* out, const std::string& name = "") {
-    if (tag != -1) {
-      fields_[tag] = ::arrow::field(name, out->type());
-      RETURN_NOT_OK(out->Finish(&children_[tag]));
-      RETURN_NOT_OK(nones_.Append(true));
-      type_ids_.push_back(tag);
-    }
-    return Status::OK();
-  }
+  Status AppendDict(PyObject* context, PyObject* dict, int32_t recursion_depth,
+                    SerializedPyObject* blobs_out);
 
-  Status AddSubsequence(int8_t tag, const Array* data,
-                        const std::vector<int32_t>& offsets, const std::string& name) {
-    if (data != nullptr) {
-      DCHECK(data->length() == offsets.back());
-      std::shared_ptr<Array> offset_array;
-      Int32Builder builder(::arrow::int32(), pool_);
-      RETURN_NOT_OK(builder.AppendValues(offsets.data(), offsets.size()));
-      RETURN_NOT_OK(builder.Finish(&offset_array));
-      std::shared_ptr<Array> list_array;
-      RETURN_NOT_OK(ListArray::FromArrays(*offset_array, *data, pool_, &list_array));
-      auto field = ::arrow::field(name, list_array->type());
-      auto type = ::arrow::struct_({field});
-      fields_[tag] = ::arrow::field("", type);
-      children_[tag] = std::shared_ptr<StructArray>(
-          new StructArray(type, list_array->length(), {list_array}));
-      RETURN_NOT_OK(nones_.Append(true));
-      type_ids_.push_back(tag);
-    } else {
-      DCHECK_EQ(offsets.size(), 1);
-    }
-    return Status::OK();
-  }
+  // Finish building the sequence and return the result.
+  // Input arrays may be nullptr
+  Status Finish(std::shared_ptr<Array>* out) { return builder_->Finish(out); }
 
-  /// Finish building the sequence and return the result.
-  /// Input arrays may be nullptr
-  Status Finish(const Array* list_data, const Array* tuple_data, const Array* dict_data,
-                const Array* set_data, std::shared_ptr<Array>* out) {
-    fields_.resize(num_tags_);
-    children_.resize(num_tags_);
-
-    RETURN_NOT_OK(AddElement(bool_tag_, &bools_));
-    RETURN_NOT_OK(AddElement(int_tag_, &ints_));
-    RETURN_NOT_OK(AddElement(py2_int_tag_, &py2_ints_, "py2_int"));
-    RETURN_NOT_OK(AddElement(string_tag_, &strings_));
-    RETURN_NOT_OK(AddElement(bytes_tag_, &bytes_));
-    RETURN_NOT_OK(AddElement(half_float_tag_, &half_floats_));
-    RETURN_NOT_OK(AddElement(float_tag_, &floats_));
-    RETURN_NOT_OK(AddElement(double_tag_, &doubles_));
-    RETURN_NOT_OK(AddElement(date64_tag_, &date64s_));
-    RETURN_NOT_OK(AddElement(tensor_tag_, &tensor_indices_, "tensor"));
-    RETURN_NOT_OK(AddElement(buffer_tag_, &buffer_indices_, "buffer"));
-    RETURN_NOT_OK(AddElement(ndarray_tag_, &ndarray_indices_, "ndarray"));
-
-    RETURN_NOT_OK(AddSubsequence(list_tag_, list_data, list_offsets_, "list"));
-    RETURN_NOT_OK(AddSubsequence(tuple_tag_, tuple_data, tuple_offsets_, "tuple"));
-    RETURN_NOT_OK(AddSubsequence(dict_tag_, dict_data, dict_offsets_, "dict"));
-    RETURN_NOT_OK(AddSubsequence(set_tag_, set_data, set_offsets_, "set"));
-
-    std::shared_ptr<Array> types_array;
-    RETURN_NOT_OK(types_.Finish(&types_array));
-    const auto& types = checked_cast<const Int8Array&>(*types_array);
-
-    std::shared_ptr<Array> offsets_array;
-    RETURN_NOT_OK(offsets_.Finish(&offsets_array));
-    const auto& offsets = checked_cast<const Int32Array&>(*offsets_array);
-
-    std::shared_ptr<Array> nones_array;
-    RETURN_NOT_OK(nones_.Finish(&nones_array));
-    const auto& nones = checked_cast<const BooleanArray&>(*nones_array);
-
-    auto type = ::arrow::union_(fields_, type_ids_, UnionMode::DENSE);
-    out->reset(new UnionArray(type, types.length(), children_, types.values(),
-                              offsets.values(), nones.null_bitmap(), nones.null_count()));
-    return Status::OK();
-  }
+  std::shared_ptr<DenseUnionBuilder> builder() { return builder_; }
 
  private:
   MemoryPool* pool_;
@@ -305,112 +233,108 @@ class SequenceBuilder {
   Int8Builder types_;
   Int32Builder offsets_;
 
-  BooleanBuilder nones_;
-  BooleanBuilder bools_;
-  Int64Builder ints_;
-  Int64Builder py2_ints_;
-  BinaryBuilder bytes_;
-  StringBuilder strings_;
-  HalfFloatBuilder half_floats_;
-  FloatBuilder floats_;
-  DoubleBuilder doubles_;
-  Date64Builder date64s_;
-
-  Int32Builder tensor_indices_;
-  Int32Builder ndarray_indices_;
-  Int32Builder buffer_indices_;
-
-  std::vector<int32_t> list_offsets_;
-  std::vector<int32_t> tuple_offsets_;
-  std::vector<int32_t> dict_offsets_;
-  std::vector<int32_t> set_offsets_;
-
-  // Tags for members of the sequence. If they are set to -1 it means
-  // they are not used and will not part be of the metadata when we call
-  // SequenceBuilder::Finish. If a member with one of the tags is added,
-  // the associated variable gets a unique index starting from 0. This
-  // happens in the UPDATE macro in sequence.cc.
-  int8_t bool_tag_ = -1;
-  int8_t int_tag_ = -1;
-  int8_t py2_int_tag_ = -1;
-  int8_t string_tag_ = -1;
-  int8_t bytes_tag_ = -1;
-  int8_t half_float_tag_ = -1;
-  int8_t float_tag_ = -1;
-  int8_t double_tag_ = -1;
-  int8_t date64_tag_ = -1;
-
-  int8_t tensor_tag_ = -1;
-  int8_t buffer_tag_ = -1;
-  int8_t ndarray_tag_ = -1;
-  int8_t list_tag_ = -1;
-  int8_t tuple_tag_ = -1;
-  int8_t dict_tag_ = -1;
-  int8_t set_tag_ = -1;
-
-  int8_t num_tags_ = 0;
-
-  // Members for the output union constructed in Finish
-  std::vector<std::shared_ptr<Field>> fields_;
-  std::vector<std::shared_ptr<Array>> children_;
-  std::vector<uint8_t> type_ids_;
+  /// Mapping from PythonType to child index
+  std::vector<int8_t> type_map_;
+
+  std::shared_ptr<BooleanBuilder> bools_;
+  std::shared_ptr<Int64Builder> ints_;
+  std::shared_ptr<Int64Builder> py2_ints_;
+  std::shared_ptr<BinaryBuilder> bytes_;
+  std::shared_ptr<StringBuilder> strings_;
+  std::shared_ptr<HalfFloatBuilder> half_floats_;
+  std::shared_ptr<FloatBuilder> floats_;
+  std::shared_ptr<DoubleBuilder> doubles_;
+  std::shared_ptr<Date64Builder> date64s_;
+
+  std::unique_ptr<SequenceBuilder> list_values_;
+  std::shared_ptr<ListBuilder> lists_;
+  std::unique_ptr<DictBuilder> dict_values_;
+  std::shared_ptr<ListBuilder> dicts_;
+  std::unique_ptr<SequenceBuilder> tuple_values_;
+  std::shared_ptr<ListBuilder> tuples_;
+  std::unique_ptr<SequenceBuilder> set_values_;
+  std::shared_ptr<ListBuilder> sets_;
+
+  std::shared_ptr<Int32Builder> tensor_indices_;
+  std::shared_ptr<Int32Builder> ndarray_indices_;
+  std::shared_ptr<Int32Builder> buffer_indices_;
+
+  std::shared_ptr<DenseUnionBuilder> builder_;
 };
 
-/// Constructing dictionaries of key/value pairs. Sequences of
-/// keys and values are built separately using a pair of
-/// SequenceBuilders. The resulting Arrow representation
-/// can be obtained via the Finish method.
+// Constructing dictionaries of key/value pairs. Sequences of
+// keys and values are built separately using a pair of
+// SequenceBuilders. The resulting Arrow representation
+// can be obtained via the Finish method.
 class DictBuilder {
  public:
-  explicit DictBuilder(MemoryPool* pool = nullptr) : keys_(pool), vals_(pool) {}
+  explicit DictBuilder(MemoryPool* pool = nullptr) : keys_(pool), vals_(pool) {
+    builder_.reset(new StructBuilder(nullptr, pool, {keys_.builder(), vals_.builder()}));
+  }
 
-  /// Builder for the keys of the dictionary
+  // Builder for the keys of the dictionary
   SequenceBuilder& keys() { return keys_; }
-  /// Builder for the values of the dictionary
+  // Builder for the values of the dictionary
   SequenceBuilder& vals() { return vals_; }
 
-  /// Construct an Arrow StructArray representing the dictionary.
-  /// Contains a field "keys" for the keys and "vals" for the values.
-  /// \param val_list_data
-  ///    List containing the data from nested lists in the value
-  ///   list of the dictionary
-  ///
-  /// \param val_dict_data
-  ///   List containing the data from nested dictionaries in the
-  ///   value list of the dictionary
-  Status Finish(const Array* key_tuple_data, const Array* key_dict_data,
-                const Array* val_list_data, const Array* val_tuple_data,
-                const Array* val_dict_data, const Array* val_set_data,
-                std::shared_ptr<Array>* out) {
-    // lists and sets can't be keys of dicts in Python, that is why for
-    // the keys we do not need to collect sublists
-    std::shared_ptr<Array> keys, vals;
-    RETURN_NOT_OK(keys_.Finish(nullptr, key_tuple_data, key_dict_data, nullptr, &keys));
-    RETURN_NOT_OK(
-        vals_.Finish(val_list_data, val_tuple_data, val_dict_data, val_set_data, &vals));
-    auto keys_field = std::make_shared<Field>("keys", keys->type());
-    auto vals_field = std::make_shared<Field>("vals", vals->type());
-    auto type = std::make_shared<StructType>(
-        std::vector<std::shared_ptr<Field>>({keys_field, vals_field}));
-    std::vector<std::shared_ptr<Array>> field_arrays({keys, vals});
-    DCHECK(keys->length() == vals->length());
-    out->reset(new StructArray(type, keys->length(), field_arrays));
-    return Status::OK();
-  }
+  // Construct an Arrow StructArray representing the dictionary.
+  // Contains a field "keys" for the keys and "vals" for the values.
+  Status Finish(std::shared_ptr<Array>* out) { return builder_->Finish(out); }
+
+  std::shared_ptr<StructBuilder> builder() { return builder_; }
 
  private:
   SequenceBuilder keys_;
   SequenceBuilder vals_;
+  std::shared_ptr<StructBuilder> builder_;
 };
 
+Status SequenceBuilder::AppendDict(PyObject* context, PyObject* dict,
+                                   int32_t recursion_depth,
+                                   SerializedPyObject* blobs_out) {
+  if (recursion_depth >= kMaxRecursionDepth) {
+    return Status::NotImplemented(
+        "This object exceeds the maximum recursion depth. It may contain itself "
+        "recursively.");
+  }
+  RETURN_NOT_OK(CreateAndUpdate(&dicts_, PythonType::DICT, [this]() {
+    dict_values_.reset(new DictBuilder(pool_));
+    return new ListBuilder(pool_, dict_values_->builder());
+  }));
+  RETURN_NOT_OK(dicts_->Append());
+  PyObject* key;
+  PyObject* value;
+  Py_ssize_t pos = 0;
+  while (PyDict_Next(dict, &pos, &key, &value)) {
+    RETURN_NOT_OK(dict_values_->builder()->Append());
+    RETURN_NOT_OK(
+        Append(context, key, &dict_values_->keys(), recursion_depth + 1, blobs_out));
+    RETURN_NOT_OK(
+        Append(context, value, &dict_values_->vals(), recursion_depth + 1, blobs_out));
+  }
+
+  // This block is used to decrement the reference counts of the results
+  // returned by the serialization callback, which is called in AppendArray,
+  // in DeserializeDict and in Append
+  static PyObject* py_type = PyUnicode_FromString("_pytype_");
+  if (PyDict_Contains(dict, py_type)) {
+    // If the dictionary contains the key "_pytype_", then the user has to
+    // have registered a callback.
+    if (context == Py_None) {
+      return Status::Invalid("No serialization callback set");
+    }
+    Py_XDECREF(dict);
+  }
+  return Status::OK();
+}
+
 Status CallCustomCallback(PyObject* context, PyObject* method_name, PyObject* elem,
                           PyObject** result) {
   *result = NULL;
   if (context == Py_None) {
-    std::stringstream ss;
-    ss << "error while calling callback on " << internal::PyObject_StdStringRepr(elem)
-       << ": handler not registered";
-    return Status::SerializationError(ss.str());
+    return Status::SerializationError("error while calling callback on ",
+                                      internal::PyObject_StdStringRepr(elem),
+                                      ": handler not registered");
   } else {
     *result = PyObject_CallMethodObjArgs(context, method_name, elem, NULL);
     return PassPyError();
@@ -434,16 +358,8 @@ Status CallDeserializeCallback(PyObject* context, PyObject* value,
   return CallCustomCallback(context, method_name.obj(), value, deserialized_object);
 }
 
-Status SerializeDict(PyObject* context, std::vector<PyObject*> dicts,
-                     int32_t recursion_depth, std::shared_ptr<Array>* out,
-                     SerializedPyObject* blobs_out);
-
-Status SerializeArray(PyObject* context, PyArrayObject* array, SequenceBuilder* builder,
-                      std::vector<PyObject*>* subdicts, SerializedPyObject* blobs_out);
-
-Status SerializeSequences(PyObject* context, std::vector<PyObject*> sequences,
-                          int32_t recursion_depth, std::shared_ptr<Array>* out,
-                          SerializedPyObject* blobs_out);
+Status AppendArray(PyObject* context, PyArrayObject* array, SequenceBuilder* builder,
+                   int32_t recursion_depth, SerializedPyObject* blobs_out);
 
 template <typename NumpyScalarObject>
 Status AppendIntegerScalar(PyObject* obj, SequenceBuilder* builder) {
@@ -503,9 +419,7 @@ Status AppendScalar(PyObject* obj, SequenceBuilder* builder) {
 }
 
 Status Append(PyObject* context, PyObject* elem, SequenceBuilder* builder,
-              std::vector<PyObject*>* sublists, std::vector<PyObject*>* subtuples,
-              std::vector<PyObject*>* subdicts, std::vector<PyObject*>* subsets,
-              SerializedPyObject* blobs_out) {
+              int32_t recursion_depth, SerializedPyObject* blobs_out) {
   // The bool case must precede the int case (PyInt_Check passes for bools)
   if (PyBool_Check(elem)) {
     RETURN_NOT_OK(builder->AppendBool(elem == Py_True));
@@ -524,8 +438,8 @@ Status Append(PyObject* context, PyObject* elem, SequenceBuilder* builder,
       PyObject* serialized_object;
       // The reference count of serialized_object will be decremented in SerializeDict
       RETURN_NOT_OK(CallSerializeCallback(context, elem, &serialized_object));
-      RETURN_NOT_OK(builder->AppendDict(PyDict_Size(serialized_object)));
-      subdicts->push_back(serialized_object);
+      RETURN_NOT_OK(
+          builder->AppendDict(context, serialized_object, recursion_depth, blobs_out));
     }
 #if PY_MAJOR_VERSION < 3
   } else if (PyInt_Check(elem)) {
@@ -533,32 +447,28 @@ Status Append(PyObject* context, PyObject* elem, SequenceBuilder* builder,
 #endif
   } else if (PyBytes_Check(elem)) {
     auto data = reinterpret_cast<uint8_t*>(PyBytes_AS_STRING(elem));
-    int32_t size;
+    int32_t size = -1;
     RETURN_NOT_OK(internal::CastSize(PyBytes_GET_SIZE(elem), &size));
     RETURN_NOT_OK(builder->AppendBytes(data, size));
   } else if (PyUnicode_Check(elem)) {
     PyBytesView view;
     RETURN_NOT_OK(view.FromString(elem));
-    int32_t size;
+    int32_t size = -1;
     RETURN_NOT_OK(internal::CastSize(view.size, &size));
     RETURN_NOT_OK(builder->AppendString(view.bytes, size));
   } else if (PyList_CheckExact(elem)) {
-    RETURN_NOT_OK(builder->AppendList(PyList_Size(elem)));
-    sublists->push_back(elem);
+    RETURN_NOT_OK(builder->AppendList(context, elem, recursion_depth, blobs_out));
   } else if (PyDict_CheckExact(elem)) {
-    RETURN_NOT_OK(builder->AppendDict(PyDict_Size(elem)));
-    subdicts->push_back(elem);
+    RETURN_NOT_OK(builder->AppendDict(context, elem, recursion_depth, blobs_out));
   } else if (PyTuple_CheckExact(elem)) {
-    RETURN_NOT_OK(builder->AppendTuple(PyTuple_Size(elem)));
-    subtuples->push_back(elem);
+    RETURN_NOT_OK(builder->AppendTuple(context, elem, recursion_depth, blobs_out));
   } else if (PySet_Check(elem)) {
-    RETURN_NOT_OK(builder->AppendSet(PySet_Size(elem)));
-    subsets->push_back(elem);
+    RETURN_NOT_OK(builder->AppendSet(context, elem, recursion_depth, blobs_out));
   } else if (PyArray_IsScalar(elem, Generic)) {
     RETURN_NOT_OK(AppendScalar(elem, builder));
   } else if (PyArray_CheckExact(elem)) {
-    RETURN_NOT_OK(SerializeArray(context, reinterpret_cast<PyArrayObject*>(elem), builder,
-                                 subdicts, blobs_out));
+    RETURN_NOT_OK(AppendArray(context, reinterpret_cast<PyArrayObject*>(elem), builder,
+                              recursion_depth, blobs_out));
   } else if (elem == Py_None) {
     RETURN_NOT_OK(builder->AppendNone());
   } else if (PyDateTime_Check(elem)) {
@@ -579,14 +489,14 @@ Status Append(PyObject* context, PyObject* elem, SequenceBuilder* builder,
     PyObject* serialized_object;
     // The reference count of serialized_object will be decremented in SerializeDict
     RETURN_NOT_OK(CallSerializeCallback(context, elem, &serialized_object));
-    RETURN_NOT_OK(builder->AppendDict(PyDict_Size(serialized_object)));
-    subdicts->push_back(serialized_object);
+    RETURN_NOT_OK(
+        builder->AppendDict(context, serialized_object, recursion_depth, blobs_out));
   }
   return Status::OK();
 }
 
-Status SerializeArray(PyObject* context, PyArrayObject* array, SequenceBuilder* builder,
-                      std::vector<PyObject*>* subdicts, SerializedPyObject* blobs_out) {
+Status AppendArray(PyObject* context, PyArrayObject* array, SequenceBuilder* builder,
+                   int32_t recursion_depth, SerializedPyObject* blobs_out) {
   int dtype = PyArray_TYPE(array);
   switch (dtype) {
     case NPY_UINT8:
@@ -612,129 +522,13 @@ Status SerializeArray(PyObject* context, PyArrayObject* array, SequenceBuilder*
       // The reference count of serialized_object will be decremented in SerializeDict
       RETURN_NOT_OK(CallSerializeCallback(context, reinterpret_cast<PyObject*>(array),
                                           &serialized_object));
-      RETURN_NOT_OK(builder->AppendDict(PyDict_Size(serialized_object)));
-      subdicts->push_back(serialized_object);
+      RETURN_NOT_OK(builder->AppendDict(context, serialized_object, recursion_depth + 1,
+                                        blobs_out));
     }
   }
   return Status::OK();
 }
 
-Status SerializeSequences(PyObject* context, std::vector<PyObject*> sequences,
-                          int32_t recursion_depth, std::shared_ptr<Array>* out,
-                          SerializedPyObject* blobs_out) {
-  DCHECK(out);
-  if (recursion_depth >= kMaxRecursionDepth) {
-    return Status::NotImplemented(
-        "This object exceeds the maximum recursion depth. It may contain itself "
-        "recursively.");
-  }
-  SequenceBuilder builder;
-  std::vector<PyObject*> sublists, subtuples, subdicts, subsets;
-  for (const auto& sequence : sequences) {
-    RETURN_NOT_OK(internal::VisitIterable(
-        sequence, [&](PyObject* obj, bool* keep_going /* unused */) {
-          return Append(context, obj, &builder, &sublists, &subtuples, &subdicts,
-                        &subsets, blobs_out);
-        }));
-  }
-  std::shared_ptr<Array> list;
-  if (sublists.size() > 0) {
-    RETURN_NOT_OK(
-        SerializeSequences(context, sublists, recursion_depth + 1, &list, blobs_out));
-  }
-  std::shared_ptr<Array> tuple;
-  if (subtuples.size() > 0) {
-    RETURN_NOT_OK(
-        SerializeSequences(context, subtuples, recursion_depth + 1, &tuple, blobs_out));
-  }
-  std::shared_ptr<Array> dict;
-  if (subdicts.size() > 0) {
-    RETURN_NOT_OK(
-        SerializeDict(context, subdicts, recursion_depth + 1, &dict, blobs_out));
-  }
-  std::shared_ptr<Array> set;
-  if (subsets.size() > 0) {
-    RETURN_NOT_OK(
-        SerializeSequences(context, subsets, recursion_depth + 1, &set, blobs_out));
-  }
-  return builder.Finish(list.get(), tuple.get(), dict.get(), set.get(), out);
-}
-
-Status SerializeDict(PyObject* context, std::vector<PyObject*> dicts,
-                     int32_t recursion_depth, std::shared_ptr<Array>* out,
-                     SerializedPyObject* blobs_out) {
-  DictBuilder result;
-  if (recursion_depth >= kMaxRecursionDepth) {
-    return Status::NotImplemented(
-        "This object exceeds the maximum recursion depth. It may contain itself "
-        "recursively.");
-  }
-  std::vector<PyObject*> key_tuples, key_dicts, val_lists, val_tuples, val_dicts,
-      val_sets, dummy;
-  for (const auto& dict : dicts) {
-    PyObject* key;
-    PyObject* value;
-    Py_ssize_t pos = 0;
-    while (PyDict_Next(dict, &pos, &key, &value)) {
-      RETURN_NOT_OK(Append(context, key, &result.keys(), &dummy, &key_tuples, &key_dicts,
-                           &dummy, blobs_out));
-      DCHECK_EQ(dummy.size(), 0);
-      RETURN_NOT_OK(Append(context, value, &result.vals(), &val_lists, &val_tuples,
-                           &val_dicts, &val_sets, blobs_out));
-    }
-  }
-  std::shared_ptr<Array> key_tuples_arr;
-  if (key_tuples.size() > 0) {
-    RETURN_NOT_OK(SerializeSequences(context, key_tuples, recursion_depth + 1,
-                                     &key_tuples_arr, blobs_out));
-  }
-  std::shared_ptr<Array> key_dicts_arr;
-  if (key_dicts.size() > 0) {
-    RETURN_NOT_OK(SerializeDict(context, key_dicts, recursion_depth + 1, &key_dicts_arr,
-                                blobs_out));
-  }
-  std::shared_ptr<Array> val_list_arr;
-  if (val_lists.size() > 0) {
-    RETURN_NOT_OK(SerializeSequences(context, val_lists, recursion_depth + 1,
-                                     &val_list_arr, blobs_out));
-  }
-  std::shared_ptr<Array> val_tuples_arr;
-  if (val_tuples.size() > 0) {
-    RETURN_NOT_OK(SerializeSequences(context, val_tuples, recursion_depth + 1,
-                                     &val_tuples_arr, blobs_out));
-  }
-  std::shared_ptr<Array> val_dict_arr;
-  if (val_dicts.size() > 0) {
-    RETURN_NOT_OK(
-        SerializeDict(context, val_dicts, recursion_depth + 1, &val_dict_arr, blobs_out));
-  }
-  std::shared_ptr<Array> val_set_arr;
-  if (val_sets.size() > 0) {
-    RETURN_NOT_OK(SerializeSequences(context, val_sets, recursion_depth + 1, &val_set_arr,
-                                     blobs_out));
-  }
-  RETURN_NOT_OK(result.Finish(key_tuples_arr.get(), key_dicts_arr.get(),
-                              val_list_arr.get(), val_tuples_arr.get(),
-                              val_dict_arr.get(), val_set_arr.get(), out));
-
-  // This block is used to decrement the reference counts of the results
-  // returned by the serialization callback, which is called in SerializeArray,
-  // in DeserializeDict and in Append
-  static PyObject* py_type = PyUnicode_FromString("_pytype_");
-  for (const auto& dict : dicts) {
-    if (PyDict_Contains(dict, py_type)) {
-      // If the dictionary contains the key "_pytype_", then the user has to
-      // have registered a callback.
-      if (context == Py_None) {
-        return Status::Invalid("No serialization callback set");
-      }
-      Py_XDECREF(dict);
-    }
-  }
-
-  return Status::OK();
-}
-
 std::shared_ptr<RecordBatch> MakeBatch(std::shared_ptr<Array> data) {
   auto field = std::make_shared<Field>("list", data->type());
   auto schema = ::arrow::schema({field});
@@ -745,9 +539,13 @@ Status SerializeObject(PyObject* context, PyObject* sequence, SerializedPyObject
   PyAcquireGIL lock;
   PyDateTime_IMPORT;
   import_pyarrow();
-  std::vector<PyObject*> sequences = {sequence};
+  SequenceBuilder builder;
+  RETURN_NOT_OK(internal::VisitIterable(
+      sequence, [&](PyObject* obj, bool* keep_going /* unused */) {
+        return Append(context, obj, &builder, 0, out);
+      }));
   std::shared_ptr<Array> array;
-  RETURN_NOT_OK(SerializeSequences(context, sequences, 0, &array, out));
+  RETURN_NOT_OK(builder.Finish(&array));
   out->batch = MakeBatch(array);
   return Status::OK();
 }
@@ -757,7 +555,7 @@ Status SerializeNdarray(std::shared_ptr<Tensor> tensor, SerializedPyObject* out)
   SequenceBuilder builder;
   RETURN_NOT_OK(builder.AppendNdarray(static_cast<int32_t>(out->ndarrays.size())));
   out->ndarrays.push_back(tensor);
-  RETURN_NOT_OK(builder.Finish(nullptr, nullptr, nullptr, nullptr, &array));
+  RETURN_NOT_OK(builder.Finish(&array));
   out->batch = MakeBatch(array);
   return Status::OK();
 }
diff --git a/cpp/src/arrow/python/serialize.h b/cpp/src/arrow/python/serialize.h
index 2759d0c9f1fb5..6cdbbe5053f04 100644
--- a/cpp/src/arrow/python/serialize.h
+++ b/cpp/src/arrow/python/serialize.h
@@ -21,8 +21,8 @@
 #include <memory>
 #include <vector>
 
+#include "arrow/python/visibility.h"
 #include "arrow/status.h"
-#include "arrow/util/visibility.h"
 
 // Forward declaring PyObject, see
 // https://mail.python.org/pipermail/python-dev/2003-August/037601.html
@@ -47,7 +47,7 @@ class OutputStream;
 
 namespace py {
 
-struct ARROW_EXPORT SerializedPyObject {
+struct ARROW_PYTHON_EXPORT SerializedPyObject {
   std::shared_ptr<RecordBatch> batch;
   std::vector<std::shared_ptr<Tensor>> tensors;
   std::vector<std::shared_ptr<Tensor>> ndarrays;
@@ -86,14 +86,14 @@ struct ARROW_EXPORT SerializedPyObject {
 /// \return Status
 ///
 /// Release GIL before calling
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status SerializeObject(PyObject* context, PyObject* sequence, SerializedPyObject* out);
 
 /// \brief Serialize an Arrow Tensor as a SerializedPyObject.
 /// \param[in] tensor Tensor to be serialized
 /// \param[out] out The serialized representation
 /// \return Status
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status SerializeTensor(std::shared_ptr<Tensor> tensor, py::SerializedPyObject* out);
 
 /// \brief Write the Tensor metadata header to an OutputStream.
@@ -102,11 +102,33 @@ Status SerializeTensor(std::shared_ptr<Tensor> tensor, py::SerializedPyObject* o
 /// \param[in] tensor_num_bytes The lengh of the Tensor data in bytes
 /// \param[in] dst The OutputStream to write the Tensor header to
 /// \return Status
-ARROW_EXPORT
+ARROW_PYTHON_EXPORT
 Status WriteNdarrayHeader(std::shared_ptr<DataType> dtype,
                           const std::vector<int64_t>& shape, int64_t tensor_num_bytes,
                           io::OutputStream* dst);
 
+struct PythonType {
+  enum type {
+    BOOL,
+    INT,
+    PY2INT,
+    BYTES,
+    STRING,
+    HALF_FLOAT,
+    FLOAT,
+    DOUBLE,
+    DATE64,
+    LIST,
+    DICT,
+    TUPLE,
+    SET,
+    TENSOR,
+    NDARRAY,
+    BUFFER,
+    NUM_PYTHON_TYPES
+  };
+};
+
 }  // namespace py
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/python/type_traits.h b/cpp/src/arrow/python/type_traits.h
index d90517a60a28a..bc71ec4e90bd0 100644
--- a/cpp/src/arrow/python/type_traits.h
+++ b/cpp/src/arrow/python/type_traits.h
@@ -149,6 +149,7 @@ template <>
 struct arrow_traits<Type::BOOL> {
   static constexpr int npy_type = NPY_BOOL;
   static constexpr bool supports_nulls = false;
+  typedef typename npy_traits<NPY_BOOL>::value_type T;
 };
 
 #define INT_DECL(TYPE)                                     \
diff --git a/cpp/src/arrow/python/util/CMakeLists.txt b/cpp/src/arrow/python/util/CMakeLists.txt
index 8edde12558fd8..30c75ef4509a3 100644
--- a/cpp/src/arrow/python/util/CMakeLists.txt
+++ b/cpp/src/arrow/python/util/CMakeLists.txt
@@ -25,13 +25,13 @@ if (PYARROW_BUILD_TESTS)
 
   if (APPLE)
 	target_link_libraries(arrow/python_test_main
-      gtest_static
+      ${GTEST_LIBRARY}
       dl)
 	set_target_properties(arrow/python_test_main
       PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
   else()
 	target_link_libraries(arrow/python_test_main
-      gtest_static
+      ${GTEST_LIBRARY}
       pthread
       dl
 	  )
diff --git a/cpp/src/arrow/python/util/datetime.h b/cpp/src/arrow/python/util/datetime.h
index 7350deadcc67f..dc462972c57b7 100644
--- a/cpp/src/arrow/python/util/datetime.h
+++ b/cpp/src/arrow/python/util/datetime.h
@@ -199,9 +199,7 @@ static inline Status PyTime_convert_int(int64_t val, const TimeUnit::type unit,
   switch (unit) {
     case TimeUnit::NANO:
       if (val % 1000 != 0) {
-        std::stringstream ss;
-        ss << "Value " << val << " has non-zero nanoseconds";
-        return Status::Invalid(ss.str());
+        return Status::Invalid("Value ", val, " has non-zero nanoseconds");
       }
       val /= 1000;
     // fall through
diff --git a/cpp/src/arrow/python/visibility.h b/cpp/src/arrow/python/visibility.h
new file mode 100644
index 0000000000000..c0b343c70e976
--- /dev/null
+++ b/cpp/src/arrow/python/visibility.h
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#if defined(_WIN32) || defined(__CYGWIN__)  // Windows
+#if defined(_MSC_VER)
+#pragma warning(disable : 4251)
+#else
+#pragma GCC diagnostic ignored "-Wattributes"
+#endif
+
+#ifdef ARROW_STATIC
+#define ARROW_PYTHON_EXPORT
+#elif defined(ARROW_PYTHON_EXPORTING)
+#define ARROW_PYTHON_EXPORT __declspec(dllexport)
+#else
+#define ARROW_PYTHON_EXPORT __declspec(dllimport)
+#endif
+
+#else  // Not Windows
+#ifndef ARROW_PYTHON_EXPORT
+#define ARROW_PYTHON_EXPORT __attribute__((visibility("default")))
+#endif
+#endif  // Non-Windows
diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index f295b864c0066..baaf5cb17500f 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -26,6 +26,7 @@
 
 #include "arrow/array.h"
 #include "arrow/status.h"
+#include "arrow/table.h"
 #include "arrow/type.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/stl.h"
@@ -94,16 +95,13 @@ class SimpleRecordBatch : public RecordBatch {
     DCHECK(column != nullptr);
 
     if (!field->type()->Equals(column->type())) {
-      std::stringstream ss;
-      ss << "Column data type " << field->type()->name()
-         << " does not match field data type " << column->type()->name();
-      return Status::Invalid(ss.str());
+      return Status::Invalid("Column data type ", field->type()->name(),
+                             " does not match field data type ", column->type()->name());
     }
     if (column->length() != num_rows_) {
-      std::stringstream ss;
-      ss << "Added column's length must match record batch's length. Expected length "
-         << num_rows_ << " but got length " << column->length();
-      return Status::Invalid(ss.str());
+      return Status::Invalid(
+          "Added column's length must match record batch's length. Expected length ",
+          num_rows_, " but got length ", column->length());
     }
 
     std::shared_ptr<Schema> new_schema;
@@ -228,17 +226,14 @@ Status RecordBatch::Validate() const {
     auto arr_shared = this->column_data(i);
     const ArrayData& arr = *arr_shared;
     if (arr.length != num_rows_) {
-      std::stringstream ss;
-      ss << "Number of rows in column " << i << " did not match batch: " << arr.length
-         << " vs " << num_rows_;
-      return Status::Invalid(ss.str());
+      return Status::Invalid("Number of rows in column ", i,
+                             " did not match batch: ", arr.length, " vs ", num_rows_);
     }
     const auto& schema_type = *schema_->field(i)->type();
     if (!arr.type->Equals(schema_type)) {
-      std::stringstream ss;
-      ss << "Column " << i << " type not match schema: " << arr.type->ToString() << " vs "
-         << schema_type.ToString();
-      return Status::Invalid(ss.str());
+      return Status::Invalid("Column ", i,
+                             " type not match schema: ", arr.type->ToString(), " vs ",
+                             schema_type.ToString());
     }
   }
   return Status::OK();
@@ -249,4 +244,22 @@ Status RecordBatch::Validate() const {
 
 RecordBatchReader::~RecordBatchReader() {}
 
+Status RecordBatchReader::ReadAll(std::vector<std::shared_ptr<RecordBatch>>* batches) {
+  while (true) {
+    std::shared_ptr<RecordBatch> batch;
+    RETURN_NOT_OK(ReadNext(&batch));
+    if (!batch) {
+      break;
+    }
+    batches->emplace_back(std::move(batch));
+  }
+  return Status::OK();
+}
+
+Status RecordBatchReader::ReadAll(std::shared_ptr<Table>* table) {
+  std::vector<std::shared_ptr<RecordBatch>> batches;
+  RETURN_NOT_OK(ReadAll(&batches));
+  return Table::FromRecordBatches(schema(), batches, table);
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index f6538f9c40578..ceb6885da621e 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -32,6 +32,7 @@ namespace arrow {
 class Array;
 struct ArrayData;
 class Status;
+class Table;
 
 /// \class RecordBatch
 /// \brief Collection of equal-length arrays matching a particular Schema
@@ -170,12 +171,18 @@ class ARROW_EXPORT RecordBatchReader {
   /// \return the shared schema of the record batches in the stream
   virtual std::shared_ptr<Schema> schema() const = 0;
 
-  /// Read the next record batch in the stream. Return null for batch when
-  /// reaching end of stream
+  /// \brief Read the next record batch in the stream. Return null for batch
+  /// when reaching end of stream
   ///
   /// \param[out] batch the next loaded batch, null at end of stream
   /// \return Status
   virtual Status ReadNext(std::shared_ptr<RecordBatch>* batch) = 0;
+
+  /// \brief Consume entire stream as a vector of record batches
+  Status ReadAll(std::vector<std::shared_ptr<RecordBatch>>* batches);
+
+  /// \brief Read all batches and concatenate as arrow::Table
+  Status ReadAll(std::shared_ptr<Table>* table);
 };
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/sparse_tensor-test.cc b/cpp/src/arrow/sparse_tensor-test.cc
new file mode 100644
index 0000000000000..0a3e98611ba7c
--- /dev/null
+++ b/cpp/src/arrow/sparse_tensor-test.cc
@@ -0,0 +1,271 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Unit tests for DataType (and subclasses), Field, and Schema
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <iostream>
+
+#include <gtest/gtest.h>
+
+#include "arrow/sparse_tensor.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+
+namespace arrow {
+
+static inline void CheckSparseIndexFormatType(SparseTensorFormat::type expected,
+                                              const SparseTensor& sparse_tensor) {
+  ASSERT_EQ(expected, sparse_tensor.format_id());
+  ASSERT_EQ(expected, sparse_tensor.sparse_index()->format_id());
+}
+
+static inline void AssertCOOIndex(
+    const std::shared_ptr<SparseCOOIndex::CoordsTensor>& sidx, const int64_t nth,
+    const std::vector<int64_t>& expected_values) {
+  int64_t n = static_cast<int64_t>(expected_values.size());
+  for (int64_t i = 0; i < n; ++i) {
+    ASSERT_EQ(expected_values[i], sidx->Value({nth, i}));
+  }
+}
+
+TEST(TestSparseCOOTensor, CreationEmptyTensor) {
+  std::vector<int64_t> shape = {2, 3, 4};
+  SparseTensorImpl<SparseCOOIndex> st1(int64(), shape);
+
+  std::vector<std::string> dim_names = {"foo", "bar", "baz"};
+  SparseTensorImpl<SparseCOOIndex> st2(int64(), shape, dim_names);
+
+  ASSERT_EQ(0, st1.non_zero_length());
+  ASSERT_EQ(0, st2.non_zero_length());
+
+  ASSERT_EQ(24, st1.size());
+  ASSERT_EQ(24, st2.size());
+
+  ASSERT_EQ(std::vector<std::string>({"foo", "bar", "baz"}), st2.dim_names());
+  ASSERT_EQ("foo", st2.dim_name(0));
+  ASSERT_EQ("bar", st2.dim_name(1));
+  ASSERT_EQ("baz", st2.dim_name(2));
+
+  ASSERT_EQ(std::vector<std::string>({}), st1.dim_names());
+  ASSERT_EQ("", st1.dim_name(0));
+  ASSERT_EQ("", st1.dim_name(1));
+  ASSERT_EQ("", st1.dim_name(2));
+}
+
+TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
+  std::vector<int64_t> shape = {2, 3, 4};
+  std::vector<int64_t> values = {1, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
+                                 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
+  std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
+  std::vector<std::string> dim_names = {"foo", "bar", "baz"};
+  NumericTensor<Int64Type> tensor1(buffer, shape);
+  NumericTensor<Int64Type> tensor2(buffer, shape, {}, dim_names);
+  SparseTensorImpl<SparseCOOIndex> st1(tensor1);
+  SparseTensorImpl<SparseCOOIndex> st2(tensor2);
+
+  CheckSparseIndexFormatType(SparseTensorFormat::COO, st1);
+
+  ASSERT_EQ(12, st1.non_zero_length());
+  ASSERT_TRUE(st1.is_mutable());
+
+  ASSERT_EQ(std::vector<std::string>({"foo", "bar", "baz"}), st2.dim_names());
+  ASSERT_EQ("foo", st2.dim_name(0));
+  ASSERT_EQ("bar", st2.dim_name(1));
+  ASSERT_EQ("baz", st2.dim_name(2));
+
+  ASSERT_EQ(std::vector<std::string>({}), st1.dim_names());
+  ASSERT_EQ("", st1.dim_name(0));
+  ASSERT_EQ("", st1.dim_name(1));
+  ASSERT_EQ("", st1.dim_name(2));
+
+  const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
+  AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
+
+  const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index());
+  ASSERT_EQ(std::string("SparseCOOIndex"), si.ToString());
+
+  std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
+  ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
+  ASSERT_TRUE(sidx->is_column_major());
+
+  AssertCOOIndex(sidx, 0, {0, 0, 0});
+  AssertCOOIndex(sidx, 1, {0, 0, 2});
+  AssertCOOIndex(sidx, 2, {0, 1, 1});
+  AssertCOOIndex(sidx, 10, {1, 2, 1});
+  AssertCOOIndex(sidx, 11, {1, 2, 3});
+}
+
+TEST(TestSparseCOOTensor, CreationFromTensor) {
+  std::vector<int64_t> shape = {2, 3, 4};
+  std::vector<int64_t> values = {1, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
+                                 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
+  std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
+  std::vector<std::string> dim_names = {"foo", "bar", "baz"};
+  Tensor tensor1(int64(), buffer, shape);
+  Tensor tensor2(int64(), buffer, shape, {}, dim_names);
+  SparseTensorImpl<SparseCOOIndex> st1(tensor1);
+  SparseTensorImpl<SparseCOOIndex> st2(tensor2);
+
+  ASSERT_EQ(12, st1.non_zero_length());
+  ASSERT_TRUE(st1.is_mutable());
+
+  ASSERT_EQ(std::vector<std::string>({"foo", "bar", "baz"}), st2.dim_names());
+  ASSERT_EQ("foo", st2.dim_name(0));
+  ASSERT_EQ("bar", st2.dim_name(1));
+  ASSERT_EQ("baz", st2.dim_name(2));
+
+  ASSERT_EQ(std::vector<std::string>({}), st1.dim_names());
+  ASSERT_EQ("", st1.dim_name(0));
+  ASSERT_EQ("", st1.dim_name(1));
+  ASSERT_EQ("", st1.dim_name(2));
+
+  const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
+  AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
+
+  const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index());
+  std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
+  ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
+  ASSERT_TRUE(sidx->is_column_major());
+
+  AssertCOOIndex(sidx, 0, {0, 0, 0});
+  AssertCOOIndex(sidx, 1, {0, 0, 2});
+  AssertCOOIndex(sidx, 2, {0, 1, 1});
+  AssertCOOIndex(sidx, 10, {1, 2, 1});
+  AssertCOOIndex(sidx, 11, {1, 2, 3});
+}
+
+TEST(TestSparseCOOTensor, CreationFromNonContiguousTensor) {
+  std::vector<int64_t> shape = {2, 3, 4};
+  std::vector<int64_t> values = {1,  0, 0, 0, 2,  0, 0, 0, 0, 0, 3,  0, 0, 0, 4,  0,
+                                 5,  0, 0, 0, 6,  0, 0, 0, 0, 0, 11, 0, 0, 0, 12, 0,
+                                 13, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, 16, 0};
+  std::vector<int64_t> strides = {192, 64, 16};
+  std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
+  Tensor tensor(int64(), buffer, shape, strides);
+  SparseTensorImpl<SparseCOOIndex> st(tensor);
+
+  ASSERT_EQ(12, st.non_zero_length());
+  ASSERT_TRUE(st.is_mutable());
+
+  const int64_t* raw_data = reinterpret_cast<const int64_t*>(st.raw_data());
+  AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
+
+  const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st.sparse_index());
+  std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
+  ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
+  ASSERT_TRUE(sidx->is_column_major());
+
+  AssertCOOIndex(sidx, 0, {0, 0, 0});
+  AssertCOOIndex(sidx, 1, {0, 0, 2});
+  AssertCOOIndex(sidx, 2, {0, 1, 1});
+  AssertCOOIndex(sidx, 10, {1, 2, 1});
+  AssertCOOIndex(sidx, 11, {1, 2, 3});
+}
+
+TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
+  std::vector<int64_t> shape = {6, 4};
+  std::vector<int64_t> values = {1, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
+                                 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
+  std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
+  std::vector<std::string> dim_names = {"foo", "bar", "baz"};
+  NumericTensor<Int64Type> tensor1(buffer, shape);
+  NumericTensor<Int64Type> tensor2(buffer, shape, {}, dim_names);
+
+  SparseTensorImpl<SparseCSRIndex> st1(tensor1);
+  SparseTensorImpl<SparseCSRIndex> st2(tensor2);
+
+  CheckSparseIndexFormatType(SparseTensorFormat::CSR, st1);
+
+  ASSERT_EQ(12, st1.non_zero_length());
+  ASSERT_TRUE(st1.is_mutable());
+
+  ASSERT_EQ(std::vector<std::string>({"foo", "bar", "baz"}), st2.dim_names());
+  ASSERT_EQ("foo", st2.dim_name(0));
+  ASSERT_EQ("bar", st2.dim_name(1));
+  ASSERT_EQ("baz", st2.dim_name(2));
+
+  ASSERT_EQ(std::vector<std::string>({}), st1.dim_names());
+  ASSERT_EQ("", st1.dim_name(0));
+  ASSERT_EQ("", st1.dim_name(1));
+  ASSERT_EQ("", st1.dim_name(2));
+
+  const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
+  AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
+
+  const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st1.sparse_index());
+  ASSERT_EQ(std::string("SparseCSRIndex"), si.ToString());
+  ASSERT_EQ(1, si.indptr()->ndim());
+  ASSERT_EQ(1, si.indices()->ndim());
+
+  const int64_t* indptr_begin = reinterpret_cast<const int64_t*>(si.indptr()->raw_data());
+  std::vector<int64_t> indptr_values(indptr_begin,
+                                     indptr_begin + si.indptr()->shape()[0]);
+
+  ASSERT_EQ(7, indptr_values.size());
+  ASSERT_EQ(std::vector<int64_t>({0, 2, 4, 6, 8, 10, 12}), indptr_values);
+
+  const int64_t* indices_begin =
+      reinterpret_cast<const int64_t*>(si.indices()->raw_data());
+  std::vector<int64_t> indices_values(indices_begin,
+                                      indices_begin + si.indices()->shape()[0]);
+
+  ASSERT_EQ(12, indices_values.size());
+  ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values);
+}
+
+TEST(TestSparseCSRMatrix, CreationFromNonContiguousTensor) {
+  std::vector<int64_t> shape = {6, 4};
+  std::vector<int64_t> values = {1,  0, 0, 0, 2,  0, 0, 0, 0, 0, 3,  0, 0, 0, 4,  0,
+                                 5,  0, 0, 0, 6,  0, 0, 0, 0, 0, 11, 0, 0, 0, 12, 0,
+                                 13, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, 16, 0};
+  std::vector<int64_t> strides = {64, 16};
+  std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
+  Tensor tensor(int64(), buffer, shape, strides);
+  SparseTensorImpl<SparseCSRIndex> st(tensor);
+
+  ASSERT_EQ(12, st.non_zero_length());
+  ASSERT_TRUE(st.is_mutable());
+
+  const int64_t* raw_data = reinterpret_cast<const int64_t*>(st.raw_data());
+  AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
+
+  const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st.sparse_index());
+  ASSERT_EQ(1, si.indptr()->ndim());
+  ASSERT_EQ(1, si.indices()->ndim());
+
+  const int64_t* indptr_begin = reinterpret_cast<const int64_t*>(si.indptr()->raw_data());
+  std::vector<int64_t> indptr_values(indptr_begin,
+                                     indptr_begin + si.indptr()->shape()[0]);
+
+  ASSERT_EQ(7, indptr_values.size());
+  ASSERT_EQ(std::vector<int64_t>({0, 2, 4, 6, 8, 10, 12}), indptr_values);
+
+  const int64_t* indices_begin =
+      reinterpret_cast<const int64_t*>(si.indices()->raw_data());
+  std::vector<int64_t> indices_values(indices_begin,
+                                      indices_begin + si.indices()->shape()[0]);
+
+  ASSERT_EQ(12, indices_values.size());
+  ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values);
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
new file mode 100644
index 0000000000000..a55f51a56733f
--- /dev/null
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -0,0 +1,452 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/sparse_tensor.h"
+
+#include <functional>
+#include <memory>
+#include <numeric>
+
+#include "arrow/compare.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+namespace {
+
+// ----------------------------------------------------------------------
+// SparseTensorConverter
+
+template <typename TYPE, typename SparseIndexType>
+class SparseTensorConverter {
+ public:
+  explicit SparseTensorConverter(const NumericTensor<TYPE>&) {}
+
+  Status Convert() { return Status::Invalid("Unsupported sparse index"); }
+};
+
+// ----------------------------------------------------------------------
+// SparseTensorConverter for SparseCOOIndex
+
+template <typename TYPE>
+struct SparseTensorConverterBase {
+  using NumericTensorType = NumericTensor<TYPE>;
+  using value_type = typename NumericTensorType::value_type;
+
+  explicit SparseTensorConverterBase(const NumericTensorType& tensor) : tensor_(tensor) {}
+
+  bool TensorIsTriviallyIterable() const {
+    return tensor_.ndim() <= 1 || tensor_.is_contiguous();
+  }
+
+  size_t CountNonZero() const {
+    if (tensor_.size() == 0) {
+      return 0;
+    }
+
+    if (TensorIsTriviallyIterable()) {
+      const value_type* data = reinterpret_cast<const value_type*>(tensor_.raw_data());
+      return std::count_if(data, data + tensor_.size(),
+                           [](value_type x) { return x != 0; });
+    }
+
+    const std::vector<int64_t>& shape = tensor_.shape();
+    const int64_t ndim = tensor_.ndim();
+
+    size_t count = 0;
+    std::vector<int64_t> coord(ndim, 0);
+    for (int64_t n = tensor_.size(); n > 0; n--) {
+      if (tensor_.Value(coord) != 0) {
+        ++count;
+      }
+
+      // increment index
+      ++coord[ndim - 1];
+      if (n > 1 && coord[ndim - 1] == shape[ndim - 1]) {
+        int64_t d = ndim - 1;
+        while (d > 0 && coord[d] == shape[d]) {
+          coord[d] = 0;
+          ++coord[d - 1];
+          --d;
+        }
+      }
+    }
+    return count;
+  }
+
+  const NumericTensorType& tensor_;
+};
+
+template <typename TYPE>
+class SparseTensorConverter<TYPE, SparseCOOIndex>
+    : private SparseTensorConverterBase<TYPE> {
+ public:
+  using BaseClass = SparseTensorConverterBase<TYPE>;
+  using NumericTensorType = typename BaseClass::NumericTensorType;
+  using value_type = typename BaseClass::value_type;
+
+  explicit SparseTensorConverter(const NumericTensorType& tensor) : BaseClass(tensor) {}
+
+  Status Convert() {
+    const int64_t ndim = tensor_.ndim();
+    const int64_t nonzero_count = static_cast<int64_t>(CountNonZero());
+
+    std::shared_ptr<Buffer> indices_buffer;
+    RETURN_NOT_OK(
+        AllocateBuffer(sizeof(int64_t) * ndim * nonzero_count, &indices_buffer));
+    int64_t* indices = reinterpret_cast<int64_t*>(indices_buffer->mutable_data());
+
+    std::shared_ptr<Buffer> values_buffer;
+    RETURN_NOT_OK(AllocateBuffer(sizeof(value_type) * nonzero_count, &values_buffer));
+    value_type* values = reinterpret_cast<value_type*>(values_buffer->mutable_data());
+
+    if (ndim <= 1) {
+      const value_type* data = reinterpret_cast<const value_type*>(tensor_.raw_data());
+      const int64_t count = ndim == 0 ? 1 : tensor_.shape()[0];
+      for (int64_t i = 0; i < count; ++i, ++data) {
+        if (*data != 0) {
+          *indices++ = i;
+          *values++ = *data;
+        }
+      }
+    } else {
+      const std::vector<int64_t>& shape = tensor_.shape();
+      std::vector<int64_t> coord(ndim, 0);
+
+      for (int64_t n = tensor_.size(); n > 0; n--) {
+        const value_type x = tensor_.Value(coord);
+        if (tensor_.Value(coord) != 0) {
+          *values++ = x;
+
+          int64_t* indp = indices;
+          for (int64_t i = 0; i < ndim; ++i) {
+            *indp = coord[i];
+            indp += nonzero_count;
+          }
+          indices++;
+        }
+
+        // increment index
+        ++coord[ndim - 1];
+        if (n > 1 && coord[ndim - 1] == shape[ndim - 1]) {
+          int64_t d = ndim - 1;
+          while (d > 0 && coord[d] == shape[d]) {
+            coord[d] = 0;
+            ++coord[d - 1];
+            --d;
+          }
+        }
+      }
+    }
+
+    // make results
+    const std::vector<int64_t> indices_shape = {nonzero_count, ndim};
+    const int64_t indices_elsize = sizeof(int64_t);
+    const std::vector<int64_t> indices_strides = {indices_elsize,
+                                                  indices_elsize * nonzero_count};
+    sparse_index =
+        std::make_shared<SparseCOOIndex>(std::make_shared<SparseCOOIndex::CoordsTensor>(
+            indices_buffer, indices_shape, indices_strides));
+    data = values_buffer;
+
+    return Status::OK();
+  }
+
+  std::shared_ptr<SparseCOOIndex> sparse_index;
+  std::shared_ptr<Buffer> data;
+
+ private:
+  using SparseTensorConverterBase<TYPE>::tensor_;
+  using SparseTensorConverterBase<TYPE>::CountNonZero;
+};
+
+template <typename TYPE, typename SparseIndexType>
+void MakeSparseTensorFromTensor(const Tensor& tensor,
+                                std::shared_ptr<SparseIndex>* sparse_index,
+                                std::shared_ptr<Buffer>* data) {
+  NumericTensor<TYPE> numeric_tensor(tensor.data(), tensor.shape(), tensor.strides());
+  SparseTensorConverter<TYPE, SparseIndexType> converter(numeric_tensor);
+  DCHECK_OK(converter.Convert());
+  *sparse_index = converter.sparse_index;
+  *data = converter.data;
+}
+
+// ----------------------------------------------------------------------
+// SparseTensorConverter for SparseCSRIndex
+
+template <typename TYPE>
+class SparseTensorConverter<TYPE, SparseCSRIndex>
+    : private SparseTensorConverterBase<TYPE> {
+ public:
+  using BaseClass = SparseTensorConverterBase<TYPE>;
+  using NumericTensorType = typename BaseClass::NumericTensorType;
+  using value_type = typename BaseClass::value_type;
+
+  explicit SparseTensorConverter(const NumericTensorType& tensor) : BaseClass(tensor) {}
+
+  Status Convert() {
+    const int64_t ndim = tensor_.ndim();
+    if (ndim > 2) {
+      return Status::Invalid("Invalid tensor dimension");
+    }
+
+    const int64_t nr = tensor_.shape()[0];
+    const int64_t nc = tensor_.shape()[1];
+    const int64_t nonzero_count = static_cast<int64_t>(CountNonZero());
+
+    std::shared_ptr<Buffer> indptr_buffer;
+    std::shared_ptr<Buffer> indices_buffer;
+
+    std::shared_ptr<Buffer> values_buffer;
+    RETURN_NOT_OK(AllocateBuffer(sizeof(value_type) * nonzero_count, &values_buffer));
+    value_type* values = reinterpret_cast<value_type*>(values_buffer->mutable_data());
+
+    if (ndim <= 1) {
+      return Status::NotImplemented("TODO for ndim <= 1");
+    } else {
+      RETURN_NOT_OK(AllocateBuffer(sizeof(int64_t) * (nr + 1), &indptr_buffer));
+      int64_t* indptr = reinterpret_cast<int64_t*>(indptr_buffer->mutable_data());
+
+      RETURN_NOT_OK(AllocateBuffer(sizeof(int64_t) * nonzero_count, &indices_buffer));
+      int64_t* indices = reinterpret_cast<int64_t*>(indices_buffer->mutable_data());
+
+      int64_t k = 0;
+      *indptr++ = 0;
+      for (int64_t i = 0; i < nr; ++i) {
+        for (int64_t j = 0; j < nc; ++j) {
+          const value_type x = tensor_.Value({i, j});
+          if (x != 0) {
+            *values++ = x;
+            *indices++ = j;
+            k++;
+          }
+        }
+        *indptr++ = k;
+      }
+    }
+
+    std::vector<int64_t> indptr_shape({nr + 1});
+    std::shared_ptr<SparseCSRIndex::IndexTensor> indptr_tensor =
+        std::make_shared<SparseCSRIndex::IndexTensor>(indptr_buffer, indptr_shape);
+
+    std::vector<int64_t> indices_shape({nonzero_count});
+    std::shared_ptr<SparseCSRIndex::IndexTensor> indices_tensor =
+        std::make_shared<SparseCSRIndex::IndexTensor>(indices_buffer, indices_shape);
+
+    sparse_index = std::make_shared<SparseCSRIndex>(indptr_tensor, indices_tensor);
+    data = values_buffer;
+
+    return Status::OK();
+  }
+
+  std::shared_ptr<SparseCSRIndex> sparse_index;
+  std::shared_ptr<Buffer> data;
+
+ private:
+  using BaseClass::tensor_;
+  using SparseTensorConverterBase<TYPE>::CountNonZero;
+};
+
+// ----------------------------------------------------------------------
+// Instantiate templates
+
+#define INSTANTIATE_SPARSE_TENSOR_CONVERTER(IndexType)            \
+  template class SparseTensorConverter<UInt8Type, IndexType>;     \
+  template class SparseTensorConverter<UInt16Type, IndexType>;    \
+  template class SparseTensorConverter<UInt32Type, IndexType>;    \
+  template class SparseTensorConverter<UInt64Type, IndexType>;    \
+  template class SparseTensorConverter<Int8Type, IndexType>;      \
+  template class SparseTensorConverter<Int16Type, IndexType>;     \
+  template class SparseTensorConverter<Int32Type, IndexType>;     \
+  template class SparseTensorConverter<Int64Type, IndexType>;     \
+  template class SparseTensorConverter<HalfFloatType, IndexType>; \
+  template class SparseTensorConverter<FloatType, IndexType>;     \
+  template class SparseTensorConverter<DoubleType, IndexType>
+
+INSTANTIATE_SPARSE_TENSOR_CONVERTER(SparseCOOIndex);
+INSTANTIATE_SPARSE_TENSOR_CONVERTER(SparseCSRIndex);
+
+}  // namespace
+
+// ----------------------------------------------------------------------
+// SparseCOOIndex
+
+// Constructor with a column-major NumericTensor
+SparseCOOIndex::SparseCOOIndex(const std::shared_ptr<CoordsTensor>& coords)
+    : SparseIndexBase(coords->shape()[0]), coords_(coords) {
+  DCHECK(coords_->is_column_major());
+}
+
+std::string SparseCOOIndex::ToString() const { return std::string("SparseCOOIndex"); }
+
+// ----------------------------------------------------------------------
+// SparseCSRIndex
+
+// Constructor with two index vectors
+SparseCSRIndex::SparseCSRIndex(const std::shared_ptr<IndexTensor>& indptr,
+                               const std::shared_ptr<IndexTensor>& indices)
+    : SparseIndexBase(indices->shape()[0]), indptr_(indptr), indices_(indices) {
+  DCHECK_EQ(1, indptr_->ndim());
+  DCHECK_EQ(1, indices_->ndim());
+}
+
+std::string SparseCSRIndex::ToString() const { return std::string("SparseCSRIndex"); }
+
+// ----------------------------------------------------------------------
+// SparseTensor
+
+// Constructor with all attributes
+SparseTensor::SparseTensor(const std::shared_ptr<DataType>& type,
+                           const std::shared_ptr<Buffer>& data,
+                           const std::vector<int64_t>& shape,
+                           const std::shared_ptr<SparseIndex>& sparse_index,
+                           const std::vector<std::string>& dim_names)
+    : type_(type),
+      data_(data),
+      shape_(shape),
+      sparse_index_(sparse_index),
+      dim_names_(dim_names) {
+  DCHECK(is_tensor_supported(type->id()));
+}
+
+const std::string& SparseTensor::dim_name(int i) const {
+  static const std::string kEmpty = "";
+  if (dim_names_.size() == 0) {
+    return kEmpty;
+  } else {
+    DCHECK_LT(i, static_cast<int>(dim_names_.size()));
+    return dim_names_[i];
+  }
+}
+
+int64_t SparseTensor::size() const {
+  return std::accumulate(shape_.begin(), shape_.end(), 1LL, std::multiplies<int64_t>());
+}
+
+bool SparseTensor::Equals(const SparseTensor& other) const {
+  return SparseTensorEquals(*this, other);
+}
+
+// ----------------------------------------------------------------------
+// SparseTensorImpl
+
+// Constructor with a dense tensor
+template <typename SparseIndexType>
+SparseTensorImpl<SparseIndexType>::SparseTensorImpl(
+    const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
+    const std::vector<std::string>& dim_names)
+    : SparseTensorImpl(nullptr, type, nullptr, shape, dim_names) {}
+
+// Constructor with a dense tensor
+template <typename SparseIndexType>
+template <typename TYPE>
+SparseTensorImpl<SparseIndexType>::SparseTensorImpl(const NumericTensor<TYPE>& tensor)
+    : SparseTensorImpl(nullptr, tensor.type(), nullptr, tensor.shape(),
+                       tensor.dim_names_) {
+  SparseTensorConverter<TYPE, SparseIndexType> converter(tensor);
+  DCHECK_OK(converter.Convert());
+  sparse_index_ = converter.sparse_index;
+  data_ = converter.data;
+}
+
+// Constructor with a dense tensor
+template <typename SparseIndexType>
+SparseTensorImpl<SparseIndexType>::SparseTensorImpl(const Tensor& tensor)
+    : SparseTensorImpl(nullptr, tensor.type(), nullptr, tensor.shape(),
+                       tensor.dim_names_) {
+  switch (tensor.type()->id()) {
+    case Type::UINT8:
+      MakeSparseTensorFromTensor<UInt8Type, SparseIndexType>(tensor, &sparse_index_,
+                                                             &data_);
+      return;
+    case Type::INT8:
+      MakeSparseTensorFromTensor<Int8Type, SparseIndexType>(tensor, &sparse_index_,
+                                                            &data_);
+      return;
+    case Type::UINT16:
+      MakeSparseTensorFromTensor<UInt16Type, SparseIndexType>(tensor, &sparse_index_,
+                                                              &data_);
+      return;
+    case Type::INT16:
+      MakeSparseTensorFromTensor<Int16Type, SparseIndexType>(tensor, &sparse_index_,
+                                                             &data_);
+      return;
+    case Type::UINT32:
+      MakeSparseTensorFromTensor<UInt32Type, SparseIndexType>(tensor, &sparse_index_,
+                                                              &data_);
+      return;
+    case Type::INT32:
+      MakeSparseTensorFromTensor<Int32Type, SparseIndexType>(tensor, &sparse_index_,
+                                                             &data_);
+      return;
+    case Type::UINT64:
+      MakeSparseTensorFromTensor<UInt64Type, SparseIndexType>(tensor, &sparse_index_,
+                                                              &data_);
+      return;
+    case Type::INT64:
+      MakeSparseTensorFromTensor<Int64Type, SparseIndexType>(tensor, &sparse_index_,
+                                                             &data_);
+      return;
+    case Type::HALF_FLOAT:
+      MakeSparseTensorFromTensor<HalfFloatType, SparseIndexType>(tensor, &sparse_index_,
+                                                                 &data_);
+      return;
+    case Type::FLOAT:
+      MakeSparseTensorFromTensor<FloatType, SparseIndexType>(tensor, &sparse_index_,
+                                                             &data_);
+      return;
+    case Type::DOUBLE:
+      MakeSparseTensorFromTensor<DoubleType, SparseIndexType>(tensor, &sparse_index_,
+                                                              &data_);
+      return;
+    default:
+      break;
+  }
+}
+
+// ----------------------------------------------------------------------
+// Instantiate templates
+
+#define INSTANTIATE_SPARSE_TENSOR(IndexType)                           \
+  template class ARROW_TEMPLATE_EXPORT SparseTensorImpl<IndexType>;    \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<UInt8Type>&);                                \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<UInt16Type>&);                               \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<UInt32Type>&);                               \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<UInt64Type>&);                               \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<Int8Type>&);                                 \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<Int16Type>&);                                \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<Int32Type>&);                                \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<Int64Type>&);                                \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<HalfFloatType>&);                            \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<FloatType>&);                                \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<DoubleType>&)
+
+INSTANTIATE_SPARSE_TENSOR(SparseCOOIndex);
+INSTANTIATE_SPARSE_TENSOR(SparseCSRIndex);
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
new file mode 100644
index 0000000000000..ded3a6d9bf8e3
--- /dev/null
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -0,0 +1,212 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_SPARSE_TENSOR_H
+#define ARROW_SPARSE_TENSOR_H
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/tensor.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// SparseIndex class
+
+/// \brief EXPERIMENTAL: Sparse tensor format enumeration
+struct SparseTensorFormat {
+  enum type { COO, CSR };
+};
+
+/// \brief EXPERIMENTAL: The base class for representing index of non-zero
+/// values in sparse tensor
+class ARROW_EXPORT SparseIndex {
+ public:
+  explicit SparseIndex(SparseTensorFormat::type format_id, int64_t non_zero_length)
+      : format_id_(format_id), non_zero_length_(non_zero_length) {}
+
+  virtual ~SparseIndex() = default;
+
+  SparseTensorFormat::type format_id() const { return format_id_; }
+  int64_t non_zero_length() const { return non_zero_length_; }
+
+  virtual std::string ToString() const = 0;
+
+ protected:
+  SparseTensorFormat::type format_id_;
+  int64_t non_zero_length_;
+};
+
+template <typename SparseIndexType>
+class SparseIndexBase : public SparseIndex {
+ public:
+  explicit SparseIndexBase(int64_t non_zero_length)
+      : SparseIndex(SparseIndexType::format_id, non_zero_length) {}
+};
+
+// ----------------------------------------------------------------------
+// SparseCOOIndex class
+
+/// \brief EXPERIMENTAL: The index data for COO sparse tensor
+class ARROW_EXPORT SparseCOOIndex : public SparseIndexBase<SparseCOOIndex> {
+ public:
+  using CoordsTensor = NumericTensor<Int64Type>;
+
+  static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::COO;
+
+  // Constructor with a column-major NumericTensor
+  explicit SparseCOOIndex(const std::shared_ptr<CoordsTensor>& coords);
+
+  const std::shared_ptr<CoordsTensor>& indices() const { return coords_; }
+
+  std::string ToString() const override;
+
+  bool Equals(const SparseCOOIndex& other) const {
+    return indices()->Equals(*other.indices());
+  }
+
+ protected:
+  std::shared_ptr<CoordsTensor> coords_;
+};
+
+// ----------------------------------------------------------------------
+// SparseCSRIndex class
+
+/// \brief EXPERIMENTAL: The index data for CSR sparse matrix
+class ARROW_EXPORT SparseCSRIndex : public SparseIndexBase<SparseCSRIndex> {
+ public:
+  using IndexTensor = NumericTensor<Int64Type>;
+
+  static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::CSR;
+
+  // Constructor with two index vectors
+  explicit SparseCSRIndex(const std::shared_ptr<IndexTensor>& indptr,
+                          const std::shared_ptr<IndexTensor>& indices);
+
+  const std::shared_ptr<IndexTensor>& indptr() const { return indptr_; }
+  const std::shared_ptr<IndexTensor>& indices() const { return indices_; }
+
+  std::string ToString() const override;
+
+  bool Equals(const SparseCSRIndex& other) const {
+    return indptr()->Equals(*other.indptr()) && indices()->Equals(*other.indices());
+  }
+
+ protected:
+  std::shared_ptr<IndexTensor> indptr_;
+  std::shared_ptr<IndexTensor> indices_;
+};
+
+// ----------------------------------------------------------------------
+// SparseTensor class
+
+/// \brief EXPERIMENTAL: The base class of sparse tensor container
+class ARROW_EXPORT SparseTensor {
+ public:
+  virtual ~SparseTensor() = default;
+
+  SparseTensorFormat::type format_id() const { return sparse_index_->format_id(); }
+
+  std::shared_ptr<DataType> type() const { return type_; }
+  std::shared_ptr<Buffer> data() const { return data_; }
+
+  const uint8_t* raw_data() const { return data_->data(); }
+  uint8_t* raw_mutable_data() const { return data_->mutable_data(); }
+
+  const std::vector<int64_t>& shape() const { return shape_; }
+
+  const std::shared_ptr<SparseIndex>& sparse_index() const { return sparse_index_; }
+
+  int ndim() const { return static_cast<int>(shape_.size()); }
+
+  const std::vector<std::string>& dim_names() const { return dim_names_; }
+  const std::string& dim_name(int i) const;
+
+  /// Total number of value cells in the sparse tensor
+  int64_t size() const;
+
+  /// Return true if the underlying data buffer is mutable
+  bool is_mutable() const { return data_->is_mutable(); }
+
+  /// Total number of non-zero cells in the sparse tensor
+  int64_t non_zero_length() const {
+    return sparse_index_ ? sparse_index_->non_zero_length() : 0;
+  }
+
+  bool Equals(const SparseTensor& other) const;
+
+ protected:
+  // Constructor with all attributes
+  SparseTensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
+               const std::vector<int64_t>& shape,
+               const std::shared_ptr<SparseIndex>& sparse_index,
+               const std::vector<std::string>& dim_names);
+
+  std::shared_ptr<DataType> type_;
+  std::shared_ptr<Buffer> data_;
+  std::vector<int64_t> shape_;
+  std::shared_ptr<SparseIndex> sparse_index_;
+
+  /// These names are optional
+  std::vector<std::string> dim_names_;
+};
+
+// ----------------------------------------------------------------------
+// SparseTensorImpl class
+
+/// \brief EXPERIMENTAL: Concrete sparse tensor implementation classes with sparse index
+/// type
+template <typename SparseIndexType>
+class ARROW_EXPORT SparseTensorImpl : public SparseTensor {
+ public:
+  virtual ~SparseTensorImpl() = default;
+
+  // Constructor with all attributes
+  SparseTensorImpl(const std::shared_ptr<SparseIndexType>& sparse_index,
+                   const std::shared_ptr<DataType>& type,
+                   const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
+                   const std::vector<std::string>& dim_names)
+      : SparseTensor(type, data, shape, sparse_index, dim_names) {}
+
+  // Constructor for empty sparse tensor
+  SparseTensorImpl(const std::shared_ptr<DataType>& type,
+                   const std::vector<int64_t>& shape,
+                   const std::vector<std::string>& dim_names = {});
+
+  // Constructor with a dense numeric tensor
+  template <typename TYPE>
+  explicit SparseTensorImpl(const NumericTensor<TYPE>& tensor);
+
+  // Constructor with a dense tensor
+  explicit SparseTensorImpl(const Tensor& tensor);
+
+ private:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(SparseTensorImpl);
+};
+
+/// \brief EXPERIMENTAL: Type alias for COO sparse tensor
+using SparseTensorCOO = SparseTensorImpl<SparseCOOIndex>;
+
+/// \brief EXPERIMENTAL: Type alias for CSR sparse matrix
+using SparseTensorCSR = SparseTensorImpl<SparseCSRIndex>;
+using SparseMatrixCSR = SparseTensorImpl<SparseCSRIndex>;
+
+}  // namespace arrow
+
+#endif  // ARROW_SPARSE_TENSOR_H
diff --git a/cpp/src/arrow/status.cc b/cpp/src/arrow/status.cc
index 8be8b36d13bd8..db7f087149017 100644
--- a/cpp/src/arrow/status.cc
+++ b/cpp/src/arrow/status.cc
@@ -13,6 +13,7 @@
 #include "arrow/status.h"
 
 #include <assert.h>
+#include <sstream>
 
 namespace arrow {
 
diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h
index 7280133a65fb9..96b018b650dfa 100644
--- a/cpp/src/arrow/status.h
+++ b/cpp/src/arrow/status.h
@@ -25,32 +25,43 @@
 #endif
 
 #include "arrow/util/macros.h"
+#include "arrow/util/string_builder.h"
 #include "arrow/util/visibility.h"
 
 #ifdef ARROW_EXTRA_ERROR_CONTEXT
 
-#define ARROW_RETURN_NOT_OK(s)                                                      \
-  do {                                                                              \
-    ::arrow::Status _s = (s);                                                       \
-    if (ARROW_PREDICT_FALSE(!_s.ok())) {                                            \
-      std::stringstream ss;                                                         \
-      ss << __FILE__ << ":" << __LINE__ << " code: " << #s << "\n" << _s.message(); \
-      return Status(_s.code(), ss.str());                                           \
-    }                                                                               \
+/// \brief Return with given status if condition is met.
+#define ARROW_RETURN_IF_(condition, status, expr)                                     \
+  do {                                                                                \
+    if (ARROW_PREDICT_FALSE(condition)) {                                             \
+      ::arrow::Status _s = (status);                                                  \
+      std::stringstream ss;                                                           \
+      ss << _s.message() << "\n" << __FILE__ << ":" << __LINE__ << " code: " << expr; \
+      return ::arrow::Status(_s.code(), ss.str());                                    \
+    }                                                                                 \
   } while (0)
 
 #else
 
-#define ARROW_RETURN_NOT_OK(s)           \
-  do {                                   \
-    ::arrow::Status _s = (s);            \
-    if (ARROW_PREDICT_FALSE(!_s.ok())) { \
-      return _s;                         \
-    }                                    \
-  } while (false)
+#define ARROW_RETURN_IF_(condition, status, _) \
+  do {                                         \
+    if (ARROW_PREDICT_FALSE(condition)) {      \
+      return (status);                         \
+    }                                          \
+  } while (0)
 
 #endif  // ARROW_EXTRA_ERROR_CONTEXT
 
+#define ARROW_RETURN_IF(condition, status) \
+  ARROW_RETURN_IF_(condition, status, ARROW_STRINGIFY(status))
+
+/// \brief Propagate any non-successful Status to the caller
+#define ARROW_RETURN_NOT_OK(status)                            \
+  do {                                                         \
+    ::arrow::Status __s = (status);                            \
+    ARROW_RETURN_IF_(!__s.ok(), __s, ARROW_STRINGIFY(status)); \
+  } while (false)
+
 #define RETURN_NOT_OK_ELSE(s, else_) \
   do {                               \
     ::arrow::Status _s = (s);        \
@@ -60,17 +71,6 @@
     }                                \
   } while (false)
 
-#define ARROW_RETURN_FAILURE_IF_FALSE(condition, status)                                 \
-  do {                                                                                   \
-    if (!(condition)) {                                                                  \
-      Status _status = (status);                                                         \
-      std::stringstream ss;                                                              \
-      ss << __FILE__ << ":" << __LINE__ << " code: " << _status.CodeAsString() << " \n " \
-         << _status.message();                                                           \
-      return Status(_status.code(), ss.str());                                           \
-    }                                                                                    \
-  } while (0)
-
 // This is an internal-use macro and should not be used in public headers.
 #ifndef RETURN_NOT_OK
 #define RETURN_NOT_OK(s) ARROW_RETURN_NOT_OK(s)
@@ -107,10 +107,18 @@ enum class StatusCode : char {
 class ARROW_MUST_USE_RESULT ARROW_EXPORT Status;
 #endif
 
+/// \brief Status outcome object (success or error)
+///
+/// The Status object is an object holding the outcome of an operation.
+/// The outcome is represented as a StatusCode, either success
+/// (StatusCode::OK) or an error (any other of the StatusCode enumeration values).
+///
+/// Additionally, if an error occurred, a specific error message is generally
+/// attached.
 class ARROW_EXPORT Status {
  public:
   // Create a success status.
-  Status() noexcept : state_(NULL) {}
+  Status() noexcept : state_(NULLPTR) {}
   ~Status() noexcept {
     // ARROW-2400: On certain compilers, splitting off the slow path improves
     // performance significantly.
@@ -122,123 +130,174 @@ class ARROW_EXPORT Status {
   Status(StatusCode code, const std::string& msg);
 
   // Copy the specified status.
-  Status(const Status& s);
-  Status& operator=(const Status& s);
+  inline Status(const Status& s);
+  inline Status& operator=(const Status& s);
 
   // Move the specified status.
   inline Status(Status&& s) noexcept;
-  Status& operator=(Status&& s) noexcept;
+  inline Status& operator=(Status&& s) noexcept;
 
   // AND the statuses.
-  Status operator&(const Status& s) const noexcept;
-  Status operator&(Status&& s) const noexcept;
-  Status& operator&=(const Status& s) noexcept;
-  Status& operator&=(Status&& s) noexcept;
+  inline Status operator&(const Status& s) const noexcept;
+  inline Status operator&(Status&& s) const noexcept;
+  inline Status& operator&=(const Status& s) noexcept;
+  inline Status& operator&=(Status&& s) noexcept;
 
-  // Return a success status.
+  /// Return a success status
   static Status OK() { return Status(); }
 
-  // Return a success status with extra info
-  static Status OK(const std::string& msg) { return Status(StatusCode::OK, msg); }
+  /// Return a success status with a specific message
+  template <typename... Args>
+  static Status OK(Args&&... args) {
+    return Status(StatusCode::OK, util::StringBuilder(std::forward<Args>(args)...));
+  }
 
-  // Return error status of an appropriate type.
-  static Status OutOfMemory(const std::string& msg) {
-    return Status(StatusCode::OutOfMemory, msg);
+  /// Return an error status for out-of-memory conditions
+  template <typename... Args>
+  static Status OutOfMemory(Args&&... args) {
+    return Status(StatusCode::OutOfMemory,
+                  util::StringBuilder(std::forward<Args>(args)...));
   }
 
-  static Status KeyError(const std::string& msg) {
-    return Status(StatusCode::KeyError, msg);
+  /// Return an error status for failed key lookups (e.g. column name in a table)
+  template <typename... Args>
+  static Status KeyError(Args&&... args) {
+    return Status(StatusCode::KeyError, util::StringBuilder(std::forward<Args>(args)...));
   }
 
-  static Status TypeError(const std::string& msg) {
-    return Status(StatusCode::TypeError, msg);
+  /// Return an error status for type errors (such as mismatching data types)
+  template <typename... Args>
+  static Status TypeError(Args&&... args) {
+    return Status(StatusCode::TypeError,
+                  util::StringBuilder(std::forward<Args>(args)...));
   }
 
-  static Status UnknownError(const std::string& msg) {
-    return Status(StatusCode::UnknownError, msg);
+  /// Return an error status for unknown errors
+  template <typename... Args>
+  static Status UnknownError(Args&&... args) {
+    return Status(StatusCode::UnknownError,
+                  util::StringBuilder(std::forward<Args>(args)...));
   }
 
-  static Status NotImplemented(const std::string& msg) {
-    return Status(StatusCode::NotImplemented, msg);
+  /// Return an error status when an operation or a combination of operation and
+  /// data types is unimplemented
+  template <typename... Args>
+  static Status NotImplemented(Args&&... args) {
+    return Status(StatusCode::NotImplemented,
+                  util::StringBuilder(std::forward<Args>(args)...));
   }
 
-  static Status Invalid(const std::string& msg) {
-    return Status(StatusCode::Invalid, msg);
+  /// Return an error status for invalid data (for example a string that fails parsing)
+  template <typename... Args>
+  static Status Invalid(Args&&... args) {
+    return Status(StatusCode::Invalid, util::StringBuilder(std::forward<Args>(args)...));
   }
 
-  static Status CapacityError(const std::string& msg) {
-    return Status(StatusCode::CapacityError, msg);
+  /// Return an error status when a container's capacity would exceed its limits
+  template <typename... Args>
+  static Status CapacityError(Args&&... args) {
+    return Status(StatusCode::CapacityError,
+                  util::StringBuilder(std::forward<Args>(args)...));
   }
 
-  static Status IOError(const std::string& msg) {
-    return Status(StatusCode::IOError, msg);
+  /// Return an error status when some IO-related operation failed
+  template <typename... Args>
+  static Status IOError(Args&&... args) {
+    return Status(StatusCode::IOError, util::StringBuilder(std::forward<Args>(args)...));
   }
 
-  static Status SerializationError(const std::string& msg) {
-    return Status(StatusCode::SerializationError, msg);
+  /// Return an error status when some (de)serialization operation failed
+  template <typename... Args>
+  static Status SerializationError(Args&&... args) {
+    return Status(StatusCode::SerializationError,
+                  util::StringBuilder(std::forward<Args>(args)...));
   }
 
-  static Status RError(const std::string& msg) { return Status(StatusCode::RError, msg); }
+  template <typename... Args>
+  static Status RError(Args&&... args) {
+    return Status(StatusCode::RError, util::StringBuilder(std::forward<Args>(args)...));
+  }
 
-  static Status PlasmaObjectExists(const std::string& msg) {
-    return Status(StatusCode::PlasmaObjectExists, msg);
+  template <typename... Args>
+  static Status PlasmaObjectExists(Args&&... args) {
+    return Status(StatusCode::PlasmaObjectExists,
+                  util::StringBuilder(std::forward<Args>(args)...));
   }
 
-  static Status PlasmaObjectNonexistent(const std::string& msg) {
-    return Status(StatusCode::PlasmaObjectNonexistent, msg);
+  template <typename... Args>
+  static Status PlasmaObjectNonexistent(Args&&... args) {
+    return Status(StatusCode::PlasmaObjectNonexistent,
+                  util::StringBuilder(std::forward<Args>(args)...));
   }
 
-  static Status PlasmaObjectAlreadySealed(const std::string& msg) {
-    return Status(StatusCode::PlasmaObjectAlreadySealed, msg);
+  template <typename... Args>
+  static Status PlasmaObjectAlreadySealed(Args&&... args) {
+    return Status(StatusCode::PlasmaObjectAlreadySealed,
+                  util::StringBuilder(std::forward<Args>(args)...));
   }
 
-  static Status PlasmaStoreFull(const std::string& msg) {
-    return Status(StatusCode::PlasmaStoreFull, msg);
+  template <typename... Args>
+  static Status PlasmaStoreFull(Args&&... args) {
+    return Status(StatusCode::PlasmaStoreFull,
+                  util::StringBuilder(std::forward<Args>(args)...));
   }
 
   static Status StillExecuting() { return Status(StatusCode::StillExecuting, ""); }
 
-  // Return error status of an appropriate type.
-  static Status CodeGenError(const std::string& msg) {
-    return Status(StatusCode::CodeGenError, msg);
+  template <typename... Args>
+  static Status CodeGenError(Args&&... args) {
+    return Status(StatusCode::CodeGenError,
+                  util::StringBuilder(std::forward<Args>(args)...));
   }
 
-  static Status ExpressionValidationError(const std::string& msg) {
-    return Status(StatusCode::ExpressionValidationError, msg);
+  template <typename... Args>
+  static Status ExpressionValidationError(Args&&... args) {
+    return Status(StatusCode::ExpressionValidationError,
+                  util::StringBuilder(std::forward<Args>(args)...));
   }
 
-  static Status ExecutionError(const std::string& msg) {
-    return Status(StatusCode::ExecutionError, msg);
+  template <typename... Args>
+  static Status ExecutionError(Args&&... args) {
+    return Status(StatusCode::ExecutionError,
+                  util::StringBuilder(std::forward<Args>(args)...));
   }
 
-  // Returns true iff the status indicates success.
-  bool ok() const { return (state_ == NULL); }
+  /// Return true iff the status indicates success.
+  bool ok() const { return (state_ == NULLPTR); }
 
+  /// Return true iff the status indicates an out-of-memory error.
   bool IsOutOfMemory() const { return code() == StatusCode::OutOfMemory; }
+  /// Return true iff the status indicates a key lookup error.
   bool IsKeyError() const { return code() == StatusCode::KeyError; }
+  /// Return true iff the status indicates invalid data.
   bool IsInvalid() const { return code() == StatusCode::Invalid; }
+  /// Return true iff the status indicates an IO-related failure.
   bool IsIOError() const { return code() == StatusCode::IOError; }
+  /// Return true iff the status indicates a container reaching capacity limits.
   bool IsCapacityError() const { return code() == StatusCode::CapacityError; }
+  /// Return true iff the status indicates a type error.
   bool IsTypeError() const { return code() == StatusCode::TypeError; }
+  /// Return true iff the status indicates an unknown error.
   bool IsUnknownError() const { return code() == StatusCode::UnknownError; }
+  /// Return true iff the status indicates an unimplemented operation.
   bool IsNotImplemented() const { return code() == StatusCode::NotImplemented; }
-  // An object could not be serialized or deserialized.
+  /// Return true iff the status indicates a (de)serialization failure
   bool IsSerializationError() const { return code() == StatusCode::SerializationError; }
-  // An error from R
+  /// Return true iff the status indicates a R-originated error.
   bool IsRError() const { return code() == StatusCode::RError; }
-  // An error is propagated from a nested Python function.
+  /// Return true iff the status indicates a Python-originated error.
   bool IsPythonError() const { return code() == StatusCode::PythonError; }
-  // An object with this object ID already exists in the plasma store.
+  /// Return true iff the status indicates an already existing Plasma object.
   bool IsPlasmaObjectExists() const { return code() == StatusCode::PlasmaObjectExists; }
-  // An object was requested that doesn't exist in the plasma store.
+  /// Return true iff the status indicates a non-existent Plasma object.
   bool IsPlasmaObjectNonexistent() const {
     return code() == StatusCode::PlasmaObjectNonexistent;
   }
-  // An already sealed object is tried to be sealed again.
+  /// Return true iff the status indicates an already sealed Plasma object.
   bool IsPlasmaObjectAlreadySealed() const {
     return code() == StatusCode::PlasmaObjectAlreadySealed;
   }
-  // An object is too large to fit into the plasma store.
+  /// Return true iff the status indicates the Plasma store reached its capacity limit.
   bool IsPlasmaStoreFull() const { return code() == StatusCode::PlasmaStoreFull; }
 
   bool IsStillExecuting() const { return code() == StatusCode::StillExecuting; }
@@ -251,16 +310,19 @@ class ARROW_EXPORT Status {
 
   bool IsExecutionError() const { return code() == StatusCode::ExecutionError; }
 
-  // Return a string representation of this status suitable for printing.
-  // Returns the string "OK" for success.
+  /// \brief Return a string representation of this status suitable for printing.
+  ///
+  /// The string "OK" is returned for success.
   std::string ToString() const;
 
-  // Return a string representation of the status code, without the message
-  // text or posix code information.
+  /// \brief Return a string representation of the status code, without the message
+  /// text or POSIX code information.
   std::string CodeAsString() const;
 
+  /// \brief Return the StatusCode value attached to this status.
   StatusCode code() const { return ok() ? StatusCode::OK : state_->code; }
 
+  /// \brief Return the specific error message attached to this status.
   std::string message() const { return ok() ? "" : state_->msg; }
 
  private:
@@ -274,10 +336,10 @@ class ARROW_EXPORT Status {
 
   void DeleteState() {
     delete state_;
-    state_ = NULL;
+    state_ = NULLPTR;
   }
   void CopyFrom(const Status& s);
-  void MoveFrom(Status& s);
+  inline void MoveFrom(Status& s);
 };
 
 static inline std::ostream& operator<<(std::ostream& os, const Status& x) {
@@ -285,16 +347,16 @@ static inline std::ostream& operator<<(std::ostream& os, const Status& x) {
   return os;
 }
 
-inline void Status::MoveFrom(Status& s) {
+void Status::MoveFrom(Status& s) {
   delete state_;
   state_ = s.state_;
-  s.state_ = NULL;
+  s.state_ = NULLPTR;
 }
 
-inline Status::Status(const Status& s)
-    : state_((s.state_ == NULL) ? NULL : new State(*s.state_)) {}
+Status::Status(const Status& s)
+    : state_((s.state_ == NULLPTR) ? NULLPTR : new State(*s.state_)) {}
 
-inline Status& Status::operator=(const Status& s) {
+Status& Status::operator=(const Status& s) {
   // The following condition catches both aliasing (when this == &s),
   // and the common case where both s and *this are ok.
   if (state_ != s.state_) {
@@ -303,14 +365,17 @@ inline Status& Status::operator=(const Status& s) {
   return *this;
 }
 
-inline Status::Status(Status&& s) noexcept : state_(s.state_) { s.state_ = NULL; }
+Status::Status(Status&& s) noexcept : state_(s.state_) { s.state_ = NULLPTR; }
 
-inline Status& Status::operator=(Status&& s) noexcept {
+Status& Status::operator=(Status&& s) noexcept {
   MoveFrom(s);
   return *this;
 }
 
-inline Status Status::operator&(const Status& s) const noexcept {
+/// \cond FALSE
+// (note: emits warnings on Doxygen < 1.8.15,
+//  see https://github.com/doxygen/doxygen/issues/6295)
+Status Status::operator&(const Status& s) const noexcept {
   if (ok()) {
     return s;
   } else {
@@ -318,7 +383,7 @@ inline Status Status::operator&(const Status& s) const noexcept {
   }
 }
 
-inline Status Status::operator&(Status&& s) const noexcept {
+Status Status::operator&(Status&& s) const noexcept {
   if (ok()) {
     return std::move(s);
   } else {
@@ -326,19 +391,20 @@ inline Status Status::operator&(Status&& s) const noexcept {
   }
 }
 
-inline Status& Status::operator&=(const Status& s) noexcept {
+Status& Status::operator&=(const Status& s) noexcept {
   if (ok() && !s.ok()) {
     CopyFrom(s);
   }
   return *this;
 }
 
-inline Status& Status::operator&=(Status&& s) noexcept {
+Status& Status::operator&=(Status&& s) noexcept {
   if (ok() && !s.ok()) {
     MoveFrom(s);
   }
   return *this;
 }
+/// \endcond
 
 }  // namespace arrow
 
diff --git a/cpp/src/arrow/stl.h b/cpp/src/arrow/stl.h
index 5c632b31751c8..def496bccbc11 100644
--- a/cpp/src/arrow/stl.h
+++ b/cpp/src/arrow/stl.h
@@ -24,6 +24,7 @@
 #include <vector>
 
 #include "arrow/type.h"
+#include "arrow/type_traits.h"
 
 namespace arrow {
 
@@ -31,40 +32,6 @@ class Schema;
 
 namespace stl {
 
-/// Traits meta class to map standard C/C++ types to equivalent Arrow types.
-template <typename T>
-struct ConversionTraits {};
-
-#define ARROW_STL_CONVERSION(c_type, ArrowType_)    \
-  template <>                                       \
-  struct ConversionTraits<c_type> {                 \
-    static std::shared_ptr<DataType> arrow_type() { \
-      return std::make_shared<ArrowType_>();        \
-    }                                               \
-    constexpr static bool nullable = false;         \
-  };
-
-ARROW_STL_CONVERSION(bool, BooleanType)
-ARROW_STL_CONVERSION(int8_t, Int8Type)
-ARROW_STL_CONVERSION(int16_t, Int16Type)
-ARROW_STL_CONVERSION(int32_t, Int32Type)
-ARROW_STL_CONVERSION(int64_t, Int64Type)
-ARROW_STL_CONVERSION(uint8_t, UInt8Type)
-ARROW_STL_CONVERSION(uint16_t, UInt16Type)
-ARROW_STL_CONVERSION(uint32_t, UInt32Type)
-ARROW_STL_CONVERSION(uint64_t, UInt64Type)
-ARROW_STL_CONVERSION(float, FloatType)
-ARROW_STL_CONVERSION(double, DoubleType)
-ARROW_STL_CONVERSION(std::string, StringType)
-
-template <typename value_c_type>
-struct ConversionTraits<std::vector<value_c_type>> {
-  static std::shared_ptr<DataType> arrow_type() {
-    return list(ConversionTraits<value_c_type>::arrow_type());
-  }
-  constexpr static bool nullable = false;
-};
-
 /// Build an arrow::Schema based upon the types defined in a std::tuple-like structure.
 ///
 /// While the type information is available at compile-time, we still need to add the
@@ -82,8 +49,8 @@ struct SchemaFromTuple {
       const std::vector<std::string>& names) {
     std::vector<std::shared_ptr<Field>> ret =
         SchemaFromTuple<Tuple, N - 1>::MakeSchemaRecursion(names);
-    std::shared_ptr<DataType> type = ConversionTraits<Element>::arrow_type();
-    ret.push_back(field(names[N - 1], type, ConversionTraits<Element>::nullable));
+    std::shared_ptr<DataType> type = CTypeTraits<Element>::type_singleton();
+    ret.push_back(field(names[N - 1], type, false /* nullable */));
     return ret;
   }
 
@@ -111,9 +78,8 @@ struct SchemaFromTuple {
       const NamesTuple& names) {
     std::vector<std::shared_ptr<Field>> ret =
         SchemaFromTuple<Tuple, N - 1>::MakeSchemaRecursionT(names);
-    std::shared_ptr<DataType> type = ConversionTraits<Element>::arrow_type();
-    ret.push_back(
-        field(std::get<N - 1>(names), type, ConversionTraits<Element>::nullable));
+    std::shared_ptr<DataType> type = CTypeTraits<Element>::type_singleton();
+    ret.push_back(field(std::get<N - 1>(names), type, false /* nullable */));
     return ret;
   }
 
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 04af4d9741c71..d232ac35e30c7 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -234,10 +234,8 @@ Status Column::ValidateData() {
   for (int i = 0; i < data_->num_chunks(); ++i) {
     std::shared_ptr<DataType> type = data_->chunk(i)->type();
     if (!this->type()->Equals(type)) {
-      std::stringstream ss;
-      ss << "In chunk " << i << " expected type " << this->type()->ToString()
-         << " but saw " << type->ToString();
-      return Status::Invalid(ss.str());
+      return Status::Invalid("In chunk ", i, " expected type ", this->type()->ToString(),
+                             " but saw ", type->ToString());
     }
   }
   return Status::OK();
@@ -301,10 +299,9 @@ class SimpleTable : public Table {
     DCHECK(col != nullptr);
 
     if (col->length() != num_rows_) {
-      std::stringstream ss;
-      ss << "Added column's length must match table's length. Expected length "
-         << num_rows_ << " but got length " << col->length();
-      return Status::Invalid(ss.str());
+      return Status::Invalid(
+          "Added column's length must match table's length. Expected length ", num_rows_,
+          " but got length ", col->length());
     }
 
     std::shared_ptr<Schema> new_schema;
@@ -319,10 +316,9 @@ class SimpleTable : public Table {
     DCHECK(col != nullptr);
 
     if (col->length() != num_rows_) {
-      std::stringstream ss;
-      ss << "Added column's length must match table's length. Expected length "
-         << num_rows_ << " but got length " << col->length();
-      return Status::Invalid(ss.str());
+      return Status::Invalid(
+          "Added column's length must match table's length. Expected length ", num_rows_,
+          " but got length ", col->length());
     }
 
     std::shared_ptr<Schema> new_schema;
@@ -363,15 +359,11 @@ class SimpleTable : public Table {
     for (int i = 0; i < num_columns(); ++i) {
       const Column* col = columns_[i].get();
       if (col == nullptr) {
-        std::stringstream ss;
-        ss << "Column " << i << " was null";
-        return Status::Invalid(ss.str());
+        return Status::Invalid("Column ", i, " was null");
       }
       if (!col->field()->Equals(*schema_->field(i))) {
-        std::stringstream ss;
-        ss << "Column field " << i << " named " << col->name()
-           << " is inconsistent with schema";
-        return Status::Invalid(ss.str());
+        return Status::Invalid("Column field ", i, " named ", col->name(),
+                               " is inconsistent with schema");
       }
     }
 
@@ -379,10 +371,8 @@ class SimpleTable : public Table {
     for (int i = 0; i < num_columns(); ++i) {
       const Column* col = columns_[i].get();
       if (col->length() != num_rows_) {
-        std::stringstream ss;
-        ss << "Column " << i << " named " << col->name() << " expected length "
-           << num_rows_ << " but got length " << col->length();
-        return Status::Invalid(ss.str());
+        return Status::Invalid("Column ", i, " named ", col->name(), " expected length ",
+                               num_rows_, " but got length ", col->length());
       }
     }
     return Status::OK();
@@ -392,7 +382,7 @@ class SimpleTable : public Table {
   std::vector<std::shared_ptr<Column>> columns_;
 };
 
-Table::Table() {}
+Table::Table() : num_rows_(0) {}
 
 std::shared_ptr<Table> Table::Make(const std::shared_ptr<Schema>& schema,
                                    const std::vector<std::shared_ptr<Column>>& columns,
@@ -414,11 +404,9 @@ Status Table::FromRecordBatches(const std::shared_ptr<Schema>& schema,
 
   for (int i = 0; i < nbatches; ++i) {
     if (!batches[i]->schema()->Equals(*schema, false)) {
-      std::stringstream ss;
-      ss << "Schema at index " << static_cast<int>(i) << " was different: \n"
-         << schema->ToString() << "\nvs\n"
-         << batches[i]->schema()->ToString();
-      return Status::Invalid(ss.str());
+      return Status::Invalid("Schema at index ", static_cast<int>(i),
+                             " was different: \n", schema->ToString(), "\nvs\n",
+                             batches[i]->schema()->ToString());
     }
   }
 
@@ -458,11 +446,9 @@ Status ConcatenateTables(const std::vector<std::shared_ptr<Table>>& tables,
 
   for (int i = 1; i < ntables; ++i) {
     if (!tables[i]->schema()->Equals(*schema, false)) {
-      std::stringstream ss;
-      ss << "Schema at index " << static_cast<int>(i) << " was different: \n"
-         << schema->ToString() << "\nvs\n"
-         << tables[i]->schema()->ToString();
-      return Status::Invalid(ss.str());
+      return Status::Invalid("Schema at index ", static_cast<int>(i),
+                             " was different: \n", schema->ToString(), "\nvs\n",
+                             tables[i]->schema()->ToString());
     }
   }
 
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index 119e4e4491225..2ac34b4cde57d 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -39,7 +39,19 @@ class Status;
 /// as one large array
 class ARROW_EXPORT ChunkedArray {
  public:
+  /// \brief Construct a chunked array from a vector of arrays
+  ///
+  /// The vector should be non-empty and all its elements should have the same
+  /// data type.
   explicit ChunkedArray(const ArrayVector& chunks);
+
+  /// \brief Construct a chunked array from a single Array
+  explicit ChunkedArray(const std::shared_ptr<Array>& chunk)
+      : ChunkedArray(ArrayVector({chunk})) {}
+
+  /// \brief Construct a chunked array from a vector of arrays and a data type
+  ///
+  /// As the data type is passed explicitly, the vector may be empty.
   ChunkedArray(const ArrayVector& chunks, const std::shared_ptr<DataType>& type);
 
   /// \return the total length of the chunked array; computed on construction
@@ -78,7 +90,12 @@ class ARROW_EXPORT ChunkedArray {
 
   std::shared_ptr<DataType> type() const { return type_; }
 
+  /// \brief Determine if two chunked arrays are equal.
+  ///
+  /// Two chunked arrays can be equal only if they have equal datatypes.
+  /// However, they may be equal even if they have different chunkings.
   bool Equals(const ChunkedArray& other) const;
+  /// \brief Determine if two chunked arrays are equal.
   bool Equals(const std::shared_ptr<ChunkedArray>& other) const;
 
  protected:
@@ -96,13 +113,26 @@ class ARROW_EXPORT ChunkedArray {
 /// metadata) and a chunked data array
 class ARROW_EXPORT Column {
  public:
+  /// \brief Construct a column from a vector of arrays
+  ///
+  /// The array chunks' datatype must match the field's datatype.
   Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks);
+  /// \brief Construct a column from a chunked array
+  ///
+  /// The chunked array's datatype must match the field's datatype.
   Column(const std::shared_ptr<Field>& field, const std::shared_ptr<ChunkedArray>& data);
-
+  /// \brief Construct a column from a single array
+  ///
+  /// The array's datatype must match the field's datatype.
   Column(const std::shared_ptr<Field>& field, const std::shared_ptr<Array>& data);
 
-  // Construct from name and array
+  /// \brief Construct a column from a name and an array
+  ///
+  /// A field with the given name and the array's datatype is automatically created.
   Column(const std::string& name, const std::shared_ptr<Array>& data);
+  /// \brief Construct a column from a name and a chunked array
+  ///
+  /// A field with the given name and the array's datatype is automatically created.
   Column(const std::string& name, const std::shared_ptr<ChunkedArray>& data);
 
   int64_t length() const { return data_->length(); }
@@ -147,7 +177,12 @@ class ARROW_EXPORT Column {
   /// \param[out] out The resulting vector of arrays
   Status Flatten(MemoryPool* pool, std::vector<std::shared_ptr<Column>>* out) const;
 
+  /// \brief Determine if two columns are equal.
+  ///
+  /// Two columns can be equal only if they have equal datatypes.
+  /// However, they may be equal even if they have different chunkings.
   bool Equals(const Column& other) const;
+  /// \brief Determine if the two columns are equal.
   bool Equals(const std::shared_ptr<Column>& other) const;
 
   /// \brief Verify that the column's array data is consistent with the passed
@@ -207,11 +242,10 @@ class ARROW_EXPORT Table {
       const std::vector<std::shared_ptr<RecordBatch>>& batches,
       std::shared_ptr<Table>* table);
 
-  /// \return the table's schema
+  /// Return the table schema
   std::shared_ptr<Schema> schema() const { return schema_; }
 
-  /// \param[in] i column index, does not boundscheck
-  /// \return the i-th column
+  /// Return a column by index
   virtual std::shared_ptr<Column> column(int i) const = 0;
 
   /// \brief Remove column from the table, producing a new Table
@@ -243,13 +277,16 @@ class ARROW_EXPORT Table {
   /// \brief Perform any checks to validate the input arguments
   virtual Status Validate() const = 0;
 
-  /// \return the number of columns in the table
+  /// \brief Return the number of columns in the table
   int num_columns() const { return schema_->num_fields(); }
 
-  /// \return the number of rows (the corresponding length of each column)
+  /// \brief Return the number of rows (equal to each column's logical length)
   int64_t num_rows() const { return num_rows_; }
 
-  /// \brief Determine if semantic contents of tables are exactly equal
+  /// \brief Determine if tables are equal
+  ///
+  /// Two tables can be equal only if they have equal schemas.
+  /// However, they may be equal even if they have different chunkings.
   bool Equals(const Table& other) const;
 
  protected:
@@ -262,18 +299,25 @@ class ARROW_EXPORT Table {
   ARROW_DISALLOW_COPY_AND_ASSIGN(Table);
 };
 
-/// \brief Compute a sequence of record batches from a (possibly chunked) Table
+/// \brief Compute a stream of record batches from a (possibly chunked) Table
+///
+/// The conversion is zero-copy: each record batch is a view over a slice
+/// of the table's columns.
 class ARROW_EXPORT TableBatchReader : public RecordBatchReader {
  public:
   ~TableBatchReader() override;
 
-  /// \brief Read batches with the maximum possible size
+  /// \brief Construct a TableBatchReader for the given table
   explicit TableBatchReader(const Table& table);
 
   std::shared_ptr<Schema> schema() const override;
 
   Status ReadNext(std::shared_ptr<RecordBatch>* out) override;
 
+  /// \brief Set the desired maximum chunk size of record batches
+  ///
+  /// The actual chunk size of each record batch may be smaller, depending
+  /// on actual chunking characteristics of each table column.
   void set_chunksize(int64_t chunksize);
 
  private:
@@ -282,7 +326,10 @@ class ARROW_EXPORT TableBatchReader : public RecordBatchReader {
 };
 
 /// \brief Construct table from multiple input tables.
-/// \return Status, fails if any schemas are different
+///
+/// The tables are concatenated vertically.  Therefore, all tables should
+/// have the same schema.  Each column in the output table is the result
+/// of concatenating the corresponding columns in all input tables.
 ARROW_EXPORT
 Status ConcatenateTables(const std::vector<std::shared_ptr<Table>>& tables,
                          std::shared_ptr<Table>* table);
diff --git a/cpp/src/arrow/tensor-test.cc b/cpp/src/arrow/tensor-test.cc
index a437e6db5adaf..11ea7c2a0ca69 100644
--- a/cpp/src/arrow/tensor-test.cc
+++ b/cpp/src/arrow/tensor-test.cc
@@ -66,8 +66,11 @@ TEST(TestTensor, BasicCtors) {
   ASSERT_EQ(strides, t1.strides());
   ASSERT_EQ(strides, t2.strides());
 
+  ASSERT_EQ(std::vector<std::string>({"foo", "bar"}), t3.dim_names());
   ASSERT_EQ("foo", t3.dim_name(0));
   ASSERT_EQ("bar", t3.dim_name(1));
+
+  ASSERT_EQ(std::vector<std::string>({}), t1.dim_names());
   ASSERT_EQ("", t1.dim_name(0));
   ASSERT_EQ("", t1.dim_name(1));
 }
@@ -104,13 +107,16 @@ TEST(TestTensor, ZeroDimensionalTensor) {
   ASSERT_EQ(t.strides().size(), 1);
 }
 
-TEST(TestNumericTensor, ElementAccess) {
+TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
   std::vector<int64_t> shape = {3, 4};
 
   std::vector<int64_t> values_i64 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
   std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
   NumericTensor<Int64Type> t_i64(buffer_i64, shape);
 
+  ASSERT_TRUE(t_i64.is_row_major());
+  ASSERT_FALSE(t_i64.is_column_major());
+  ASSERT_TRUE(t_i64.is_contiguous());
   ASSERT_EQ(1, t_i64.Value({0, 0}));
   ASSERT_EQ(5, t_i64.Value({1, 0}));
   ASSERT_EQ(6, t_i64.Value({1, 1}));
@@ -121,22 +127,27 @@ TEST(TestNumericTensor, ElementAccess) {
   std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
   NumericTensor<FloatType> t_f32(buffer_f32, shape);
 
+  ASSERT_TRUE(t_f32.is_row_major());
+  ASSERT_FALSE(t_f32.is_column_major());
+  ASSERT_TRUE(t_f32.is_contiguous());
   ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
   ASSERT_EQ(5.1f, t_f32.Value({1, 0}));
   ASSERT_EQ(6.1f, t_f32.Value({1, 1}));
   ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
 }
 
-TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
+TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) {
   std::vector<int64_t> shape = {3, 4};
 
   const int64_t i64_size = sizeof(int64_t);
-  std::vector<int64_t> values_i64 = {1, 2, 3, 4, 0,  0,  5,  6, 7,
-                                     8, 0, 0, 9, 10, 11, 12, 0, 0};
-  std::vector<int64_t> strides_i64 = {i64_size * 6, i64_size};
+  std::vector<int64_t> values_i64 = {1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8, 12};
+  std::vector<int64_t> strides_i64 = {i64_size, i64_size * 3};
   std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
   NumericTensor<Int64Type> t_i64(buffer_i64, shape, strides_i64);
 
+  ASSERT_TRUE(t_i64.is_column_major());
+  ASSERT_FALSE(t_i64.is_row_major());
+  ASSERT_TRUE(t_i64.is_contiguous());
   ASSERT_EQ(1, t_i64.Value({0, 0}));
   ASSERT_EQ(2, t_i64.Value({0, 1}));
   ASSERT_EQ(4, t_i64.Value({0, 3}));
@@ -145,13 +156,15 @@ TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
   ASSERT_EQ(11, t_i64.Value({2, 2}));
 
   const int64_t f32_size = sizeof(float);
-  std::vector<float> values_f32 = {1.1f, 2.1f,  3.1f,  4.1f,  0.0f, 0.0f,
-                                   5.1f, 6.1f,  7.1f,  8.1f,  0.0f, 0.0f,
-                                   9.1f, 10.1f, 11.1f, 12.1f, 0.0f, 0.0f};
-  std::vector<int64_t> strides_f32 = {f32_size * 6, f32_size};
+  std::vector<float> values_f32 = {1.1f, 5.1f, 9.1f,  2.1f, 6.1f, 10.1f,
+                                   3.1f, 7.1f, 11.1f, 4.1f, 8.1f, 12.1f};
+  std::vector<int64_t> strides_f32 = {f32_size, f32_size * 3};
   std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
   NumericTensor<FloatType> t_f32(buffer_f32, shape, strides_f32);
 
+  ASSERT_TRUE(t_f32.is_column_major());
+  ASSERT_FALSE(t_f32.is_row_major());
+  ASSERT_TRUE(t_f32.is_contiguous());
   ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
   ASSERT_EQ(2.1f, t_f32.Value({0, 1}));
   ASSERT_EQ(4.1f, t_f32.Value({0, 3}));
@@ -160,15 +173,19 @@ TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
   ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
 }
 
-TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) {
+TEST(TestNumericTensor, ElementAccessWithNonContiguousStrides) {
   std::vector<int64_t> shape = {3, 4};
 
   const int64_t i64_size = sizeof(int64_t);
-  std::vector<int64_t> values_i64 = {1, 5, 9, 0, 2, 6, 10, 0, 3, 7, 11, 0, 4, 8, 12, 0};
-  std::vector<int64_t> strides_i64 = {i64_size, i64_size * 4};
+  std::vector<int64_t> values_i64 = {1, 2, 3, 4, 0,  0,  5,  6, 7,
+                                     8, 0, 0, 9, 10, 11, 12, 0, 0};
+  std::vector<int64_t> strides_i64 = {i64_size * 6, i64_size};
   std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
   NumericTensor<Int64Type> t_i64(buffer_i64, shape, strides_i64);
 
+  ASSERT_FALSE(t_i64.is_contiguous());
+  ASSERT_FALSE(t_i64.is_row_major());
+  ASSERT_FALSE(t_i64.is_column_major());
   ASSERT_EQ(1, t_i64.Value({0, 0}));
   ASSERT_EQ(2, t_i64.Value({0, 1}));
   ASSERT_EQ(4, t_i64.Value({0, 3}));
@@ -177,12 +194,16 @@ TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) {
   ASSERT_EQ(11, t_i64.Value({2, 2}));
 
   const int64_t f32_size = sizeof(float);
-  std::vector<float> values_f32 = {1.1f, 5.1f, 9.1f,  0.0f, 2.1f, 6.1f, 10.1f, 0.0f,
-                                   3.1f, 7.1f, 11.1f, 0.0f, 4.1f, 8.1f, 12.1f, 0.0f};
-  std::vector<int64_t> strides_f32 = {f32_size, f32_size * 4};
+  std::vector<float> values_f32 = {1.1f, 2.1f,  3.1f,  4.1f,  0.0f, 0.0f,
+                                   5.1f, 6.1f,  7.1f,  8.1f,  0.0f, 0.0f,
+                                   9.1f, 10.1f, 11.1f, 12.1f, 0.0f, 0.0f};
+  std::vector<int64_t> strides_f32 = {f32_size * 6, f32_size};
   std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
   NumericTensor<FloatType> t_f32(buffer_f32, shape, strides_f32);
 
+  ASSERT_FALSE(t_f32.is_contiguous());
+  ASSERT_FALSE(t_f32.is_row_major());
+  ASSERT_FALSE(t_f32.is_column_major());
   ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
   ASSERT_EQ(2.1f, t_f32.Value({0, 1}));
   ASSERT_EQ(4.1f, t_f32.Value({0, 3}));
diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc
index 589ee995e2181..a4db298a04d90 100644
--- a/cpp/src/arrow/tensor.cc
+++ b/cpp/src/arrow/tensor.cc
@@ -17,6 +17,7 @@
 
 #include "arrow/tensor.h"
 
+#include <cstddef>
 #include <cstdint>
 #include <functional>
 #include <memory>
@@ -122,50 +123,4 @@ Type::type Tensor::type_id() const { return type_->id(); }
 
 bool Tensor::Equals(const Tensor& other) const { return TensorEquals(*this, other); }
 
-// ----------------------------------------------------------------------
-// NumericTensor
-
-template <typename TYPE>
-NumericTensor<TYPE>::NumericTensor(const std::shared_ptr<Buffer>& data,
-                                   const std::vector<int64_t>& shape)
-    : NumericTensor(data, shape, {}, {}) {}
-
-template <typename TYPE>
-NumericTensor<TYPE>::NumericTensor(const std::shared_ptr<Buffer>& data,
-                                   const std::vector<int64_t>& shape,
-                                   const std::vector<int64_t>& strides)
-    : NumericTensor(data, shape, strides, {}) {}
-
-template <typename TYPE>
-NumericTensor<TYPE>::NumericTensor(const std::shared_ptr<Buffer>& data,
-                                   const std::vector<int64_t>& shape,
-                                   const std::vector<int64_t>& strides,
-                                   const std::vector<std::string>& dim_names)
-    : Tensor(TypeTraits<TYPE>::type_singleton(), data, shape, strides, dim_names) {}
-
-template <typename TYPE>
-int64_t NumericTensor<TYPE>::CalculateValueOffset(
-    const std::vector<int64_t>& index) const {
-  int64_t offset = 0;
-  for (size_t i = 0; i < index.size(); ++i) {
-    offset += index[i] * strides_[i];
-  }
-  return offset;
-}
-
-// ----------------------------------------------------------------------
-// Instantiate templates
-
-template class ARROW_TEMPLATE_EXPORT NumericTensor<UInt8Type>;
-template class ARROW_TEMPLATE_EXPORT NumericTensor<UInt16Type>;
-template class ARROW_TEMPLATE_EXPORT NumericTensor<UInt32Type>;
-template class ARROW_TEMPLATE_EXPORT NumericTensor<UInt64Type>;
-template class ARROW_TEMPLATE_EXPORT NumericTensor<Int8Type>;
-template class ARROW_TEMPLATE_EXPORT NumericTensor<Int16Type>;
-template class ARROW_TEMPLATE_EXPORT NumericTensor<Int32Type>;
-template class ARROW_TEMPLATE_EXPORT NumericTensor<Int64Type>;
-template class ARROW_TEMPLATE_EXPORT NumericTensor<HalfFloatType>;
-template class ARROW_TEMPLATE_EXPORT NumericTensor<FloatType>;
-template class ARROW_TEMPLATE_EXPORT NumericTensor<DoubleType>;
-
 }  // namespace arrow
diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h
index a9b5df81fa193..fb2093b915730 100644
--- a/cpp/src/arrow/tensor.h
+++ b/cpp/src/arrow/tensor.h
@@ -25,6 +25,7 @@
 
 #include "arrow/buffer.h"
 #include "arrow/type.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
@@ -50,6 +51,9 @@ static inline bool is_tensor_supported(Type::type type_id) {
   return false;
 }
 
+template <typename SparseIndexType>
+class SparseTensorImpl;
+
 class ARROW_EXPORT Tensor {
  public:
   virtual ~Tensor() = default;
@@ -78,6 +82,7 @@ class ARROW_EXPORT Tensor {
 
   int ndim() const { return static_cast<int>(shape_.size()); }
 
+  const std::vector<std::string>& dim_names() const { return dim_names_; }
   const std::string& dim_name(int i) const;
 
   /// Total number of value cells in the tensor
@@ -110,27 +115,33 @@ class ARROW_EXPORT Tensor {
   /// These names are optional
   std::vector<std::string> dim_names_;
 
+  template <typename SparseIndexType>
+  friend class SparseTensorImpl;
+
  private:
   ARROW_DISALLOW_COPY_AND_ASSIGN(Tensor);
 };
 
 template <typename TYPE>
-class ARROW_EXPORT NumericTensor : public Tensor {
+class NumericTensor : public Tensor {
  public:
   using TypeClass = TYPE;
   using value_type = typename TypeClass::c_type;
 
+  /// Constructor with non-negative strides and dimension names
+  NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
+                const std::vector<int64_t>& strides,
+                const std::vector<std::string>& dim_names)
+      : Tensor(TypeTraits<TYPE>::type_singleton(), data, shape, strides, dim_names) {}
+
   /// Constructor with no dimension names or strides, data assumed to be row-major
-  NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape);
+  NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape)
+      : NumericTensor(data, shape, {}, {}) {}
 
   /// Constructor with non-negative strides
   NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
-                const std::vector<int64_t>& strides);
-
-  /// Constructor with non-negative strides and dimension names
-  NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
-                const std::vector<int64_t>& strides,
-                const std::vector<std::string>& dim_names);
+                const std::vector<int64_t>& strides)
+      : NumericTensor(data, shape, strides, {}) {}
 
   const value_type& Value(const std::vector<int64_t>& index) const {
     int64_t offset = CalculateValueOffset(index);
@@ -139,7 +150,13 @@ class ARROW_EXPORT NumericTensor : public Tensor {
   }
 
  protected:
-  int64_t CalculateValueOffset(const std::vector<int64_t>& index) const;
+  int64_t CalculateValueOffset(const std::vector<int64_t>& index) const {
+    int64_t offset = 0;
+    for (size_t i = 0; i < index.size(); ++i) {
+      offset += index[i] * strides_[i];
+    }
+    return offset;
+  }
 };
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/test-util.cc b/cpp/src/arrow/test-util.cc
index 84c76ee1aee84..617c53978f619 100644
--- a/cpp/src/arrow/test-util.cc
+++ b/cpp/src/arrow/test-util.cc
@@ -18,13 +18,12 @@
 #include "arrow/test-util.h"
 
 #ifndef _WIN32
-#include <sys/stat.h>
-#include <sys/wait.h>
-#include <unistd.h>
+#include <sys/stat.h>  // IWYU pragma: keep
+#include <sys/wait.h>  // IWYU pragma: keep
+#include <unistd.h>    // IWYU pragma: keep
 #endif
 
 #include <algorithm>
-#include <chrono>
 #include <cstdint>
 #include <cstdlib>
 #include <iostream>
@@ -33,34 +32,31 @@
 #include <random>
 #include <sstream>
 #include <string>
-#include <thread>
 #include <vector>
 
 #include <gtest/gtest.h>
 
 #include "arrow/array.h"
 #include "arrow/buffer.h"
-#include "arrow/builder.h"
-#include "arrow/memory_pool.h"
+#include "arrow/ipc/json-simple.h"
 #include "arrow/pretty_print.h"
 #include "arrow/status.h"
 #include "arrow/table.h"
 #include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/bit-util.h"
-#include "arrow/util/decimal.h"
 #include "arrow/util/logging.h"
 
-void sleep_for(double seconds) {
-  std::this_thread::sleep_for(
-      std::chrono::nanoseconds(static_cast<int64_t>(seconds * 1e9)));
-}
-
 namespace arrow {
 
+std::shared_ptr<Array> ArrayFromJSON(const std::shared_ptr<DataType>& type,
+                                     const std::string& json) {
+  std::shared_ptr<Array> out;
+  ABORT_NOT_OK(ipc::internal::json::ArrayFromJSON(type, json, &out));
+  return out;
+}
+
 void random_null_bytes(int64_t n, double pct_null, uint8_t* null_bytes) {
   const int random_seed = 0;
-  std::mt19937 gen(random_seed);
+  std::default_random_engine gen(random_seed);
   std::uniform_real_distribution<double> d(0.0, 1.0);
   std::generate(null_bytes, null_bytes + n,
                 [&d, &gen, &pct_null] { return d(gen) > pct_null; });
@@ -68,7 +64,7 @@ void random_null_bytes(int64_t n, double pct_null, uint8_t* null_bytes) {
 
 void random_is_valid(int64_t n, double pct_null, std::vector<bool>* is_valid) {
   const int random_seed = 0;
-  std::mt19937 gen(random_seed);
+  std::default_random_engine gen(random_seed);
   std::uniform_real_distribution<double> d(0.0, 1.0);
   is_valid->resize(n, false);
   std::generate(is_valid->begin(), is_valid->end(),
@@ -76,7 +72,7 @@ void random_is_valid(int64_t n, double pct_null, std::vector<bool>* is_valid) {
 }
 
 void random_bytes(int64_t n, uint32_t seed, uint8_t* out) {
-  std::mt19937 gen(seed);
+  std::default_random_engine gen(seed);
   std::uniform_int_distribution<uint32_t> d(0, std::numeric_limits<uint8_t>::max());
   std::generate(out, out + n, [&d, &gen] { return static_cast<uint8_t>(d(gen)); });
 }
@@ -150,7 +146,7 @@ int32_t DecimalSize(int32_t precision) {
 }
 
 void random_decimals(int64_t n, uint32_t seed, int32_t precision, uint8_t* out) {
-  std::mt19937 gen(seed);
+  std::default_random_engine gen(seed);
   std::uniform_int_distribution<uint32_t> d(0, std::numeric_limits<uint8_t>::max());
   const int32_t required_bytes = DecimalSize(precision);
   constexpr int32_t byte_width = 16;
@@ -307,17 +303,23 @@ void AssertZeroPadded(const Array& array) {
   for (const auto& buffer : array.data()->buffers) {
     if (buffer) {
       const int64_t padding = buffer->capacity() - buffer->size();
-      std::vector<uint8_t> zeros(padding);
-      ASSERT_EQ(0, memcmp(buffer->data() + buffer->size(), zeros.data(), padding));
+      if (padding > 0) {
+        std::vector<uint8_t> zeros(padding);
+        ASSERT_EQ(0, memcmp(buffer->data() + buffer->size(), zeros.data(), padding));
+      }
     }
   }
 }
 
 void TestInitialized(const Array& array) {
   for (const auto& buffer : array.data()->buffers) {
-    if (buffer) {
-      std::vector<uint8_t> zeros(buffer->capacity());
-      throw_away = memcmp(buffer->data(), zeros.data(), buffer->size());
+    if (buffer && buffer->capacity() > 0) {
+      int total = 0;
+      auto data = buffer->data();
+      for (int64_t i = 0; i < buffer->size(); ++i) {
+        total ^= data[i];
+      }
+      throw_away = total;
     }
   }
 }
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index 3011f287f096a..713ff38ca5283 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -15,26 +15,19 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef ARROW_TEST_UTIL_H_
-#define ARROW_TEST_UTIL_H_
-
-#ifndef _WIN32
-#include <sys/stat.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#endif
+#pragma once
 
 #include <algorithm>
-#include <chrono>
 #include <cstdint>
 #include <cstdlib>
+#include <cstring>
 #include <iostream>
 #include <limits>
 #include <memory>
 #include <random>
 #include <sstream>
 #include <string>
-#include <thread>
+#include <type_traits>
 #include <vector>
 
 #include <gtest/gtest.h>
@@ -44,44 +37,42 @@
 #include "arrow/builder.h"
 #include "arrow/memory_pool.h"
 #include "arrow/pretty_print.h"
+#include "arrow/record_batch.h"
 #include "arrow/status.h"
-#include "arrow/table.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit-util.h"
-#include "arrow/util/decimal.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
-#define STRINGIFY(x) #x
-
-#define ASSERT_RAISES(ENUM, expr)                                         \
-  do {                                                                    \
-    ::arrow::Status s = (expr);                                           \
-    if (!s.Is##ENUM()) {                                                  \
-      FAIL() << "Expected '" STRINGIFY(expr) "' to fail with " STRINGIFY( \
-                    ENUM) ", but got "                                    \
-             << s.ToString();                                             \
-    }                                                                     \
+#define ASSERT_RAISES(ENUM, expr)                                                     \
+  do {                                                                                \
+    ::arrow::Status s = (expr);                                                       \
+    if (!s.Is##ENUM()) {                                                              \
+      FAIL() << "Expected '" ARROW_STRINGIFY(expr) "' to fail with " ARROW_STRINGIFY( \
+                    ENUM) ", but got "                                                \
+             << s.ToString();                                                         \
+    }                                                                                 \
   } while (false)
 
-#define ASSERT_RAISES_WITH_MESSAGE(ENUM, message, expr)                   \
-  do {                                                                    \
-    ::arrow::Status s = (expr);                                           \
-    if (!s.Is##ENUM()) {                                                  \
-      FAIL() << "Expected '" STRINGIFY(expr) "' to fail with " STRINGIFY( \
-                    ENUM) ", but got "                                    \
-             << s.ToString();                                             \
-    }                                                                     \
-    ASSERT_EQ((message), s.ToString());                                   \
+#define ASSERT_RAISES_WITH_MESSAGE(ENUM, message, expr)                               \
+  do {                                                                                \
+    ::arrow::Status s = (expr);                                                       \
+    if (!s.Is##ENUM()) {                                                              \
+      FAIL() << "Expected '" ARROW_STRINGIFY(expr) "' to fail with " ARROW_STRINGIFY( \
+                    ENUM) ", but got "                                                \
+             << s.ToString();                                                         \
+    }                                                                                 \
+    ASSERT_EQ((message), s.ToString());                                               \
   } while (false)
 
-#define ASSERT_OK(expr)                                               \
-  do {                                                                \
-    ::arrow::Status s = (expr);                                       \
-    if (!s.ok()) {                                                    \
-      FAIL() << "'" STRINGIFY(expr) "' failed with " << s.ToString(); \
-    }                                                                 \
+#define ASSERT_OK(expr)                                                      \
+  do {                                                                       \
+    ::arrow::Status _s = (expr);                                             \
+    if (!_s.ok()) {                                                          \
+      FAIL() << "'" ARROW_STRINGIFY(expr) "' failed with " << _s.ToString(); \
+    }                                                                        \
   } while (false)
 
 #define ASSERT_OK_NO_THROW(expr) ASSERT_NO_THROW(ASSERT_OK(expr))
@@ -103,6 +94,10 @@
 
 namespace arrow {
 
+class ChunkedArray;
+class Column;
+class Table;
+
 using ArrayVector = std::vector<std::shared_ptr<Array>>;
 
 #define ASSERT_ARRAYS_EQUAL(LEFT, RIGHT)                                               \
@@ -120,7 +115,7 @@ using ArrayVector = std::vector<std::shared_ptr<Array>>;
 template <typename T, typename U>
 void randint(int64_t N, T lower, T upper, std::vector<U>* out) {
   const int random_seed = 0;
-  std::mt19937 gen(random_seed);
+  std::default_random_engine gen(random_seed);
   std::uniform_int_distribution<T> d(lower, upper);
   out->resize(N, static_cast<T>(0));
   std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
@@ -129,7 +124,7 @@ void randint(int64_t N, T lower, T upper, std::vector<U>* out) {
 template <typename T, typename U>
 void random_real(int64_t n, uint32_t seed, T min_value, T max_value,
                  std::vector<U>* out) {
-  std::mt19937 gen(seed);
+  std::default_random_engine gen(seed);
   std::uniform_real_distribution<T> d(min_value, max_value);
   out->resize(n, static_cast<T>(0));
   std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
@@ -170,6 +165,12 @@ static inline Status GetBitmapFromVector(const std::vector<T>& is_valid,
   return Status::OK();
 }
 
+template <typename T>
+inline void BitmapFromVector(const std::vector<T>& is_valid,
+                             std::shared_ptr<Buffer>* out) {
+  ASSERT_OK(GetBitmapFromVector(is_valid, out));
+}
+
 // Sets approximately pct_null of the first n bytes in null_bytes to zero
 // and the rest to non-zero (true) values.
 ARROW_EXPORT void random_null_bytes(int64_t n, double pct_null, uint8_t* null_bytes);
@@ -201,6 +202,15 @@ ARROW_EXPORT void PrintColumn(const Column& col, std::stringstream* ss);
 ARROW_EXPORT void AssertTablesEqual(const Table& expected, const Table& actual,
                                     bool same_chunk_layout = true);
 
+template <typename C_TYPE>
+void AssertNumericDataEqual(const C_TYPE* raw_data,
+                            const std::vector<C_TYPE>& expected_values) {
+  for (auto expected : expected_values) {
+    ASSERT_EQ(expected, *raw_data);
+    ++raw_data;
+  }
+}
+
 ARROW_EXPORT void CompareBatch(const RecordBatch& left, const RecordBatch& right);
 
 // Check if the padding of the buffers of the array is zero.
@@ -221,7 +231,7 @@ void FinishAndCheckPadding(BuilderType* builder, std::shared_ptr<Array>* out) {
 template <typename T, typename U>
 void rand_uniform_int(int64_t n, uint32_t seed, T min_value, T max_value, U* out) {
   DCHECK(out || (n == 0));
-  std::mt19937 gen(seed);
+  std::default_random_engine gen(seed);
   std::uniform_int_distribution<T> d(min_value, max_value);
   std::generate(out, out + n, [&d, &gen] { return static_cast<U>(d(gen)); });
 }
@@ -248,6 +258,12 @@ Status MakeRandomBuffer(int64_t length, MemoryPool* pool,
   return Status::OK();
 }
 
+// ArrayFromJSON: construct an Array from a simple JSON representation
+
+ARROW_EXPORT
+std::shared_ptr<Array> ArrayFromJSON(const std::shared_ptr<DataType>&,
+                                     const std::string& json);
+
 // ArrayFromVector: construct an Array from vectors of C values
 
 template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
@@ -409,5 +425,3 @@ class BatchIterator : public RecordBatchReader {
 };
 
 }  // namespace arrow
-
-#endif  // ARROW_TEST_UTIL_H_
diff --git a/cpp/src/arrow/type-test.cc b/cpp/src/arrow/type-test.cc
index e0a10690c2c77..1bacbc937d5c6 100644
--- a/cpp/src/arrow/type-test.cc
+++ b/cpp/src/arrow/type-test.cc
@@ -24,7 +24,10 @@
 
 #include <gtest/gtest.h>
 
+#include "arrow/memory_pool.h"
+#include "arrow/test-util.h"
 #include "arrow/type.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
 
 using std::shared_ptr;
@@ -56,6 +59,7 @@ TEST(TestField, Equals) {
   ASSERT_TRUE(f0.Equals(f0_other));
   ASSERT_FALSE(f0.Equals(f0_nn));
   ASSERT_FALSE(f0.Equals(f0_with_meta));
+  ASSERT_TRUE(f0.Equals(f0_with_meta, false));
 }
 
 TEST(TestField, TestMetadataConstruction) {
@@ -198,28 +202,31 @@ TEST_F(TestSchema, GetFieldIndex) {
 }
 
 TEST_F(TestSchema, TestMetadataConstruction) {
-  auto f0 = field("f0", int32());
-  auto f1 = field("f1", uint8(), false);
-  auto f2 = field("f2", utf8());
   auto metadata0 = key_value_metadata({{"foo", "bar"}, {"bizz", "buzz"}});
   auto metadata1 = key_value_metadata({{"foo", "baz"}});
 
-  auto schema0 = ::arrow::schema({f0, f1, f2}, metadata0);
-  ASSERT_TRUE(metadata0->Equals(*schema0->metadata()));
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", uint8(), false);
+  auto f2 = field("f2", utf8(), true);
+  auto f3 = field("f2", utf8(), true, metadata1->Copy());
 
+  auto schema0 = ::arrow::schema({f0, f1, f2}, metadata0);
   auto schema1 = ::arrow::schema({f0, f1, f2}, metadata1);
-  ASSERT_TRUE(metadata1->Equals(*schema1->metadata()));
-
   auto schema2 = ::arrow::schema({f0, f1, f2}, metadata0->Copy());
-  ASSERT_TRUE(metadata0->Equals(*schema2->metadata()));
+  auto schema3 = ::arrow::schema({f0, f1, f3}, metadata0->Copy());
 
+  ASSERT_TRUE(metadata0->Equals(*schema0->metadata()));
+  ASSERT_TRUE(metadata1->Equals(*schema1->metadata()));
+  ASSERT_TRUE(metadata0->Equals(*schema2->metadata()));
   ASSERT_TRUE(schema0->Equals(*schema2));
   ASSERT_FALSE(schema0->Equals(*schema1));
   ASSERT_FALSE(schema2->Equals(*schema1));
+  ASSERT_FALSE(schema2->Equals(*schema3));
 
   // don't check metadata
   ASSERT_TRUE(schema0->Equals(*schema1, false));
   ASSERT_TRUE(schema2->Equals(*schema1, false));
+  ASSERT_TRUE(schema2->Equals(*schema3, false));
 }
 
 TEST_F(TestSchema, TestAddMetadata) {
@@ -248,27 +255,34 @@ TEST_F(TestSchema, TestRemoveMetadata) {
   ASSERT_TRUE(new_schema->metadata() == nullptr);
 }
 
-#define PRIMITIVE_TEST(KLASS, ENUM, NAME)        \
-  TEST(TypesTest, TestPrimitive_##ENUM) {        \
-    KLASS tp;                                    \
-                                                 \
-    ASSERT_EQ(tp.id(), Type::ENUM);              \
-    ASSERT_EQ(tp.ToString(), std::string(NAME)); \
+#define PRIMITIVE_TEST(KLASS, CTYPE, ENUM, NAME)                              \
+  TEST(TypesTest, ARROW_CONCAT(TestPrimitive_, ENUM)) {                       \
+    KLASS tp;                                                                 \
+                                                                              \
+    ASSERT_EQ(tp.id(), Type::ENUM);                                           \
+    ASSERT_EQ(tp.ToString(), std::string(NAME));                              \
+                                                                              \
+    using CType = TypeTraits<KLASS>::CType;                                   \
+    static_assert(std::is_same<CType, CTYPE>::value, "Not the same c-type!"); \
+                                                                              \
+    using DerivedArrowType = CTypeTraits<CTYPE>::ArrowType;                   \
+    static_assert(std::is_same<DerivedArrowType, KLASS>::value,               \
+                  "Not the same arrow-type!");                                \
   }
 
-PRIMITIVE_TEST(Int8Type, INT8, "int8");
-PRIMITIVE_TEST(Int16Type, INT16, "int16");
-PRIMITIVE_TEST(Int32Type, INT32, "int32");
-PRIMITIVE_TEST(Int64Type, INT64, "int64");
-PRIMITIVE_TEST(UInt8Type, UINT8, "uint8");
-PRIMITIVE_TEST(UInt16Type, UINT16, "uint16");
-PRIMITIVE_TEST(UInt32Type, UINT32, "uint32");
-PRIMITIVE_TEST(UInt64Type, UINT64, "uint64");
+PRIMITIVE_TEST(Int8Type, int8_t, INT8, "int8");
+PRIMITIVE_TEST(Int16Type, int16_t, INT16, "int16");
+PRIMITIVE_TEST(Int32Type, int32_t, INT32, "int32");
+PRIMITIVE_TEST(Int64Type, int64_t, INT64, "int64");
+PRIMITIVE_TEST(UInt8Type, uint8_t, UINT8, "uint8");
+PRIMITIVE_TEST(UInt16Type, uint16_t, UINT16, "uint16");
+PRIMITIVE_TEST(UInt32Type, uint32_t, UINT32, "uint32");
+PRIMITIVE_TEST(UInt64Type, uint64_t, UINT64, "uint64");
 
-PRIMITIVE_TEST(FloatType, FLOAT, "float");
-PRIMITIVE_TEST(DoubleType, DOUBLE, "double");
+PRIMITIVE_TEST(FloatType, float, FLOAT, "float");
+PRIMITIVE_TEST(DoubleType, double, DOUBLE, "double");
 
-PRIMITIVE_TEST(BooleanType, BOOL, "bool");
+PRIMITIVE_TEST(BooleanType, bool, BOOL, "bool");
 
 TEST(TestBinaryType, ToString) {
   BinaryType t1;
@@ -446,7 +460,7 @@ TEST(TestStructType, Basics) {
   // TODO(wesm): out of bounds for field(...)
 }
 
-TEST(TestStructType, GetChildByName) {
+TEST(TestStructType, GetFieldByName) {
   auto f0 = field("f0", int32());
   auto f1 = field("f1", uint8(), false);
   auto f2 = field("f2", utf8());
@@ -455,17 +469,17 @@ TEST(TestStructType, GetChildByName) {
   StructType struct_type({f0, f1, f2, f3});
   std::shared_ptr<Field> result;
 
-  result = struct_type.GetChildByName("f1");
+  result = struct_type.GetFieldByName("f1");
   ASSERT_EQ(f1, result);
 
-  result = struct_type.GetChildByName("f3");
+  result = struct_type.GetFieldByName("f3");
   ASSERT_EQ(f3, result);
 
-  result = struct_type.GetChildByName("not-found");
+  result = struct_type.GetFieldByName("not-found");
   ASSERT_EQ(result, nullptr);
 }
 
-TEST(TestStructType, GetChildIndex) {
+TEST(TestStructType, GetFieldIndex) {
   auto f0 = field("f0", int32());
   auto f1 = field("f1", uint8(), false);
   auto f2 = field("f2", utf8());
@@ -473,11 +487,147 @@ TEST(TestStructType, GetChildIndex) {
 
   StructType struct_type({f0, f1, f2, f3});
 
-  ASSERT_EQ(0, struct_type.GetChildIndex(f0->name()));
-  ASSERT_EQ(1, struct_type.GetChildIndex(f1->name()));
-  ASSERT_EQ(2, struct_type.GetChildIndex(f2->name()));
-  ASSERT_EQ(3, struct_type.GetChildIndex(f3->name()));
-  ASSERT_EQ(-1, struct_type.GetChildIndex("not-found"));
+  ASSERT_EQ(0, struct_type.GetFieldIndex(f0->name()));
+  ASSERT_EQ(1, struct_type.GetFieldIndex(f1->name()));
+  ASSERT_EQ(2, struct_type.GetFieldIndex(f2->name()));
+  ASSERT_EQ(3, struct_type.GetFieldIndex(f3->name()));
+  ASSERT_EQ(-1, struct_type.GetFieldIndex("not-found"));
+}
+
+TEST(TestStructType, GetFieldIndexDuplicates) {
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", int64());
+  auto f2 = field("f1", utf8());
+  StructType struct_type({f0, f1, f2});
+
+  ASSERT_EQ(0, struct_type.GetFieldIndex("f0"));
+  ASSERT_EQ(-1, struct_type.GetFieldIndex("f1"));
+}
+
+TEST(TestDictionaryType, Equals) {
+  auto t1 = dictionary(int8(), ArrayFromJSON(int32(), "[3, 4, 5, 6]"));
+  auto t2 = dictionary(int8(), ArrayFromJSON(int32(), "[3, 4, 5, 6]"));
+  auto t3 = dictionary(int16(), ArrayFromJSON(int32(), "[3, 4, 5, 6]"));
+  auto t4 = dictionary(int8(), ArrayFromJSON(int16(), "[3, 4, 5, 6]"));
+  auto t5 = dictionary(int8(), ArrayFromJSON(int32(), "[3, 4, 7, 6]"));
+
+  ASSERT_TRUE(t1->Equals(t2));
+  // Different index type
+  ASSERT_FALSE(t1->Equals(t3));
+  // Different value type
+  ASSERT_FALSE(t1->Equals(t4));
+  // Different values
+  ASSERT_FALSE(t1->Equals(t5));
+}
+
+TEST(TestDictionaryType, UnifyNumeric) {
+  auto t1 = dictionary(int8(), ArrayFromJSON(int64(), "[3, 4, 7]"));
+  auto t2 = dictionary(int8(), ArrayFromJSON(int64(), "[1, 7, 4, 8]"));
+  auto t3 = dictionary(int8(), ArrayFromJSON(int64(), "[1, -200]"));
+
+  auto expected = dictionary(int8(), ArrayFromJSON(int64(), "[3, 4, 7, 1, 8, -200]"));
+
+  std::shared_ptr<DataType> dict_type;
+  ASSERT_OK(DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get(), t3.get()},
+                                  &dict_type));
+  ASSERT_TRUE(dict_type->Equals(expected));
+
+  std::vector<std::vector<int32_t>> transpose_maps;
+  ASSERT_OK(DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get(), t3.get()},
+                                  &dict_type, &transpose_maps));
+  ASSERT_TRUE(dict_type->Equals(expected));
+  ASSERT_EQ(transpose_maps.size(), 3);
+  ASSERT_EQ(transpose_maps[0], std::vector<int32_t>({0, 1, 2}));
+  ASSERT_EQ(transpose_maps[1], std::vector<int32_t>({3, 2, 1, 4}));
+  ASSERT_EQ(transpose_maps[2], std::vector<int32_t>({3, 5}));
+}
+
+TEST(TestDictionaryType, UnifyString) {
+  auto t1 = dictionary(int16(), ArrayFromJSON(utf8(), "[\"foo\", \"bar\"]"));
+  auto t2 = dictionary(int32(), ArrayFromJSON(utf8(), "[\"quux\", \"foo\"]"));
+
+  auto expected =
+      dictionary(int8(), ArrayFromJSON(utf8(), "[\"foo\", \"bar\", \"quux\"]"));
+
+  std::shared_ptr<DataType> dict_type;
+  ASSERT_OK(
+      DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get()}, &dict_type));
+  ASSERT_TRUE(dict_type->Equals(expected));
+
+  std::vector<std::vector<int32_t>> transpose_maps;
+  ASSERT_OK(DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get()}, &dict_type,
+                                  &transpose_maps));
+  ASSERT_TRUE(dict_type->Equals(expected));
+
+  ASSERT_EQ(transpose_maps.size(), 2);
+  ASSERT_EQ(transpose_maps[0], std::vector<int32_t>({0, 1}));
+  ASSERT_EQ(transpose_maps[1], std::vector<int32_t>({2, 0}));
+}
+
+TEST(TestDictionaryType, UnifyFixedSizeBinary) {
+  auto type = fixed_size_binary(3);
+
+  std::string data = "foobarbazqux";
+  auto buf = std::make_shared<Buffer>(data);
+  // ["foo", "bar"]
+  auto dict1 = std::make_shared<FixedSizeBinaryArray>(type, 2, SliceBuffer(buf, 0, 6));
+  auto t1 = dictionary(int16(), dict1);
+  // ["bar", "baz", "qux"]
+  auto dict2 = std::make_shared<FixedSizeBinaryArray>(type, 3, SliceBuffer(buf, 3, 9));
+  auto t2 = dictionary(int16(), dict2);
+
+  // ["foo", "bar", "baz", "qux"]
+  auto expected_dict = std::make_shared<FixedSizeBinaryArray>(type, 4, buf);
+  auto expected = dictionary(int8(), expected_dict);
+
+  std::shared_ptr<DataType> dict_type;
+  ASSERT_OK(
+      DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get()}, &dict_type));
+  ASSERT_TRUE(dict_type->Equals(expected));
+
+  std::vector<std::vector<int32_t>> transpose_maps;
+  ASSERT_OK(DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get()}, &dict_type,
+                                  &transpose_maps));
+  ASSERT_TRUE(dict_type->Equals(expected));
+  ASSERT_EQ(transpose_maps.size(), 2);
+  ASSERT_EQ(transpose_maps[0], std::vector<int32_t>({0, 1}));
+  ASSERT_EQ(transpose_maps[1], std::vector<int32_t>({1, 2, 3}));
+}
+
+TEST(TestDictionaryType, UnifyLarge) {
+  // Unifying "large" dictionary types should choose the right index type
+  std::shared_ptr<Array> dict1, dict2, expected_dict;
+
+  Int32Builder builder;
+  ASSERT_OK(builder.Reserve(120));
+  for (int32_t i = 0; i < 120; ++i) {
+    builder.UnsafeAppend(i);
+  }
+  ASSERT_OK(builder.Finish(&dict1));
+  ASSERT_EQ(dict1->length(), 120);
+  auto t1 = dictionary(int8(), dict1);
+
+  ASSERT_OK(builder.Reserve(30));
+  for (int32_t i = 110; i < 140; ++i) {
+    builder.UnsafeAppend(i);
+  }
+  ASSERT_OK(builder.Finish(&dict2));
+  ASSERT_EQ(dict2->length(), 30);
+  auto t2 = dictionary(int8(), dict2);
+
+  ASSERT_OK(builder.Reserve(140));
+  for (int32_t i = 0; i < 140; ++i) {
+    builder.UnsafeAppend(i);
+  }
+  ASSERT_OK(builder.Finish(&expected_dict));
+  ASSERT_EQ(expected_dict->length(), 140);
+  // int8 would be too narrow to hold all possible index values
+  auto expected = dictionary(int16(), expected_dict);
+
+  std::shared_ptr<DataType> dict_type;
+  ASSERT_OK(
+      DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get()}, &dict_type));
+  ASSERT_TRUE(dict_type->Equals(expected));
 }
 
 TEST(TypesTest, TestDecimal128Small) {
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 5f1ca8d7b0f09..15f353d4d1f6a 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -65,13 +65,15 @@ std::vector<std::shared_ptr<Field>> Field::Flatten() const {
   return flattened;
 }
 
-bool Field::Equals(const Field& other) const {
+bool Field::Equals(const Field& other, bool check_metadata) const {
   if (this == &other) {
     return true;
   }
   if (this->name_ == other.name_ && this->nullable_ == other.nullable_ &&
       this->type_->Equals(*other.type_.get())) {
-    if (this->HasMetadata() && other.HasMetadata()) {
+    if (!check_metadata) {
+      return true;
+    } else if (this->HasMetadata() && other.HasMetadata()) {
       return metadata_->Equals(*other.metadata_);
     } else if (!this->HasMetadata() && !other.HasMetadata()) {
       return true;
@@ -82,8 +84,8 @@ bool Field::Equals(const Field& other) const {
   return false;
 }
 
-bool Field::Equals(const std::shared_ptr<Field>& other) const {
-  return Equals(*other.get());
+bool Field::Equals(const std::shared_ptr<Field>& other, bool check_metadata) const {
+  return Equals(*other.get(), check_metadata);
 }
 
 std::string Field::ToString() const {
@@ -135,12 +137,11 @@ std::string FixedSizeBinaryType::ToString() const {
 // ----------------------------------------------------------------------
 // Date types
 
-DateType::DateType(Type::type type_id, DateUnit unit)
-    : FixedWidthType(type_id), unit_(unit) {}
+DateType::DateType(Type::type type_id) : FixedWidthType(type_id) {}
 
-Date32Type::Date32Type() : DateType(Type::DATE32, DateUnit::DAY) {}
+Date32Type::Date32Type() : DateType(Type::DATE32) {}
 
-Date64Type::Date64Type() : DateType(Type::DATE64, DateUnit::MILLI) {}
+Date64Type::Date64Type() : DateType(Type::DATE64) {}
 
 std::string Date64Type::ToString() const { return std::string("date64[ms]"); }
 
@@ -218,6 +219,24 @@ std::string UnionType::ToString() const {
 // ----------------------------------------------------------------------
 // Struct type
 
+namespace {
+
+std::unordered_map<std::string, int> CreateNameToIndexMap(
+    const std::vector<std::shared_ptr<Field>>& fields) {
+  std::unordered_map<std::string, int> name_to_index;
+  for (size_t i = 0; i < fields.size(); ++i) {
+    name_to_index[fields[i]->name()] = static_cast<int>(i);
+  }
+  return name_to_index;
+}
+
+}  // namespace
+
+StructType::StructType(const std::vector<std::shared_ptr<Field>>& fields)
+    : NestedType(Type::STRUCT), name_to_index_(CreateNameToIndexMap(fields)) {
+  children_ = fields;
+}
+
 std::string StructType::ToString() const {
   std::stringstream s;
   s << "struct<";
@@ -232,15 +251,28 @@ std::string StructType::ToString() const {
   return s.str();
 }
 
-std::shared_ptr<Field> StructType::GetChildByName(const std::string& name) const {
-  int i = GetChildIndex(name);
+std::shared_ptr<Field> StructType::GetFieldByName(const std::string& name) const {
+  int i = GetFieldIndex(name);
   return i == -1 ? nullptr : children_[i];
 }
 
-int StructType::GetChildIndex(const std::string& name) const {
-  if (children_.size() > 0 && name_to_index_.size() == 0) {
+int StructType::GetFieldIndex(const std::string& name) const {
+  if (name_to_index_.size() < children_.size()) {
+    // There are duplicate field names. Refuse to guess
+    int counts = 0;
+    int last_observed_index = -1;
     for (size_t i = 0; i < children_.size(); ++i) {
-      name_to_index_[children_[i]->name()] = static_cast<int>(i);
+      if (children_[i]->name() == name) {
+        ++counts;
+        last_observed_index = static_cast<int>(i);
+      }
+    }
+
+    if (counts == 1) {
+      return last_observed_index;
+    } else {
+      // Duplicate or not found
+      return -1;
     }
   }
 
@@ -252,6 +284,14 @@ int StructType::GetChildIndex(const std::string& name) const {
   }
 }
 
+std::shared_ptr<Field> StructType::GetChildByName(const std::string& name) const {
+  return GetFieldByName(name);
+}
+
+int StructType::GetChildIndex(const std::string& name) const {
+  return GetFieldIndex(name);
+}
+
 // ----------------------------------------------------------------------
 // DictionaryType
 
@@ -260,7 +300,12 @@ DictionaryType::DictionaryType(const std::shared_ptr<DataType>& index_type,
     : FixedWidthType(Type::DICTIONARY),
       index_type_(index_type),
       dictionary_(dictionary),
-      ordered_(ordered) {}
+      ordered_(ordered) {
+#ifndef NDEBUG
+  const auto& int_type = checked_cast<const Integer&>(*index_type);
+  DCHECK_EQ(int_type.is_signed(), true) << "dictionary index type should be signed";
+#endif
+}
 
 int DictionaryType::bit_width() const {
   return checked_cast<const FixedWidthType&>(*index_type_).bit_width();
@@ -285,11 +330,15 @@ std::string NullType::ToString() const { return name(); }
 
 Schema::Schema(const std::vector<std::shared_ptr<Field>>& fields,
                const std::shared_ptr<const KeyValueMetadata>& metadata)
-    : fields_(fields), metadata_(metadata) {}
+    : fields_(fields),
+      name_to_index_(CreateNameToIndexMap(fields_)),
+      metadata_(metadata) {}
 
 Schema::Schema(std::vector<std::shared_ptr<Field>>&& fields,
                const std::shared_ptr<const KeyValueMetadata>& metadata)
-    : fields_(std::move(fields)), metadata_(metadata) {}
+    : fields_(std::move(fields)),
+      name_to_index_(CreateNameToIndexMap(fields_)),
+      metadata_(metadata) {}
 
 bool Schema::Equals(const Schema& other, bool check_metadata) const {
   if (this == &other) {
@@ -301,7 +350,7 @@ bool Schema::Equals(const Schema& other, bool check_metadata) const {
     return false;
   }
   for (int i = 0; i < num_fields(); ++i) {
-    if (!field(i)->Equals(*other.field(i).get())) {
+    if (!field(i)->Equals(*other.field(i).get(), check_metadata)) {
       return false;
     }
   }
@@ -324,12 +373,6 @@ std::shared_ptr<Field> Schema::GetFieldByName(const std::string& name) const {
 }
 
 int64_t Schema::GetFieldIndex(const std::string& name) const {
-  if (fields_.size() > 0 && name_to_index_.size() == 0) {
-    for (size_t i = 0; i < fields_.size(); ++i) {
-      name_to_index_[fields_[i]->name()] = static_cast<int>(i);
-    }
-  }
-
   auto it = name_to_index_.find(name);
   if (it == name_to_index_.end()) {
     return -1;
@@ -419,22 +462,22 @@ std::shared_ptr<Schema> schema(std::vector<std::shared_ptr<Field>>&& fields,
 #define ACCEPT_VISITOR(TYPE) \
   Status TYPE::Accept(TypeVisitor* visitor) const { return visitor->Visit(*this); }
 
-ACCEPT_VISITOR(NullType);
-ACCEPT_VISITOR(BooleanType);
-ACCEPT_VISITOR(BinaryType);
-ACCEPT_VISITOR(FixedSizeBinaryType);
-ACCEPT_VISITOR(StringType);
-ACCEPT_VISITOR(ListType);
-ACCEPT_VISITOR(StructType);
-ACCEPT_VISITOR(Decimal128Type);
-ACCEPT_VISITOR(UnionType);
-ACCEPT_VISITOR(Date32Type);
-ACCEPT_VISITOR(Date64Type);
-ACCEPT_VISITOR(Time32Type);
-ACCEPT_VISITOR(Time64Type);
-ACCEPT_VISITOR(TimestampType);
-ACCEPT_VISITOR(IntervalType);
-ACCEPT_VISITOR(DictionaryType);
+ACCEPT_VISITOR(NullType)
+ACCEPT_VISITOR(BooleanType)
+ACCEPT_VISITOR(BinaryType)
+ACCEPT_VISITOR(FixedSizeBinaryType)
+ACCEPT_VISITOR(StringType)
+ACCEPT_VISITOR(ListType)
+ACCEPT_VISITOR(StructType)
+ACCEPT_VISITOR(Decimal128Type)
+ACCEPT_VISITOR(UnionType)
+ACCEPT_VISITOR(Date32Type)
+ACCEPT_VISITOR(Date64Type)
+ACCEPT_VISITOR(Time32Type)
+ACCEPT_VISITOR(Time64Type)
+ACCEPT_VISITOR(TimestampType)
+ACCEPT_VISITOR(IntervalType)
+ACCEPT_VISITOR(DictionaryType)
 
 #define TYPE_FACTORY(NAME, KLASS)                                        \
   std::shared_ptr<DataType> NAME() {                                     \
@@ -442,23 +485,23 @@ ACCEPT_VISITOR(DictionaryType);
     return result;                                                       \
   }
 
-TYPE_FACTORY(null, NullType);
-TYPE_FACTORY(boolean, BooleanType);
-TYPE_FACTORY(int8, Int8Type);
-TYPE_FACTORY(uint8, UInt8Type);
-TYPE_FACTORY(int16, Int16Type);
-TYPE_FACTORY(uint16, UInt16Type);
-TYPE_FACTORY(int32, Int32Type);
-TYPE_FACTORY(uint32, UInt32Type);
-TYPE_FACTORY(int64, Int64Type);
-TYPE_FACTORY(uint64, UInt64Type);
-TYPE_FACTORY(float16, HalfFloatType);
-TYPE_FACTORY(float32, FloatType);
-TYPE_FACTORY(float64, DoubleType);
-TYPE_FACTORY(utf8, StringType);
-TYPE_FACTORY(binary, BinaryType);
-TYPE_FACTORY(date64, Date64Type);
-TYPE_FACTORY(date32, Date32Type);
+TYPE_FACTORY(null, NullType)
+TYPE_FACTORY(boolean, BooleanType)
+TYPE_FACTORY(int8, Int8Type)
+TYPE_FACTORY(uint8, UInt8Type)
+TYPE_FACTORY(int16, Int16Type)
+TYPE_FACTORY(uint16, UInt16Type)
+TYPE_FACTORY(int32, Int32Type)
+TYPE_FACTORY(uint32, UInt32Type)
+TYPE_FACTORY(int64, Int64Type)
+TYPE_FACTORY(uint64, UInt64Type)
+TYPE_FACTORY(float16, HalfFloatType)
+TYPE_FACTORY(float32, FloatType)
+TYPE_FACTORY(float64, DoubleType)
+TYPE_FACTORY(utf8, StringType)
+TYPE_FACTORY(binary, BinaryType)
+TYPE_FACTORY(date64, Date64Type)
+TYPE_FACTORY(date32, Date32Type)
 
 std::shared_ptr<DataType> fixed_size_binary(int32_t byte_width) {
   return std::make_shared<FixedSizeBinaryType>(byte_width);
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 63f0e2d237242..752fc85fb9504 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -39,12 +39,13 @@ namespace arrow {
 
 class Array;
 class Field;
+class MemoryPool;
 
-/// \brief Main data type enumeration
-///
-/// This enumeration provides a quick way to interrogate the category
-/// of a DataType instance.
 struct Type {
+  /// \brief Main data type enumeration
+  ///
+  /// This enumeration provides a quick way to interrogate the category
+  /// of a DataType instance.
   enum type {
     /// A NULL type having no physical storage
     NA,
@@ -143,18 +144,19 @@ struct Type {
 /// nested type consisting of other data types, or another data type (e.g. a
 /// timestamp encoded as an int64).
 ///
-/// Simple datatypes may be entirely described by their Type id, but
+/// Simple datatypes may be entirely described by their Type::type id, but
 /// complex datatypes are usually parametric.
 class ARROW_EXPORT DataType {
  public:
   explicit DataType(Type::type id) : id_(id) {}
   virtual ~DataType();
 
-  // Return whether the types are equal
-  //
-  // Types that are logically convertible from one to another (e.g. List<UInt8>
-  // and Binary) are NOT equal.
+  /// \brief Return whether the types are equal
+  ///
+  /// Types that are logically convertible from one to another (e.g. List<UInt8>
+  /// and Binary) are NOT equal.
   virtual bool Equals(const DataType& other) const;
+  /// \brief Return whether the types are equal
   bool Equals(const std::shared_ptr<DataType>& other) const;
 
   std::shared_ptr<Field> child(int i) const { return children_[i]; }
@@ -174,6 +176,7 @@ class ARROW_EXPORT DataType {
   /// \since 0.7.0
   virtual std::string name() const = 0;
 
+  /// \brief Return the type category
   Type::type id() const { return id_; }
 
  protected:
@@ -248,23 +251,31 @@ class ARROW_EXPORT Field {
         const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR)
       : name_(name), type_(type), nullable_(nullable), metadata_(metadata) {}
 
+  /// \brief Return the field's attached metadata
   std::shared_ptr<const KeyValueMetadata> metadata() const { return metadata_; }
 
+  /// \brief Return whether the field has non-empty metadata
   bool HasMetadata() const;
 
+  /// \brief Return a copy of this field with the given metadata attached to it
   std::shared_ptr<Field> AddMetadata(
       const std::shared_ptr<const KeyValueMetadata>& metadata) const;
+  /// \brief Return a copy of this field without any metadata attached to it
   std::shared_ptr<Field> RemoveMetadata() const;
 
   std::vector<std::shared_ptr<Field>> Flatten() const;
 
-  bool Equals(const Field& other) const;
-  bool Equals(const std::shared_ptr<Field>& other) const;
+  bool Equals(const Field& other, bool check_metadata = true) const;
+  bool Equals(const std::shared_ptr<Field>& other, bool check_metadata = true) const;
 
+  /// \brief Return a string representation ot the field
   std::string ToString() const;
 
+  /// \brief Return the field name
   const std::string& name() const { return name_; }
+  /// \brief Return the field data type
   std::shared_ptr<DataType> type() const { return type_; }
+  /// \brief Return whether the field is nullable
   bool nullable() const { return nullable_; }
 
  private:
@@ -495,24 +506,27 @@ class ARROW_EXPORT StructType : public NestedType {
  public:
   static constexpr Type::type type_id = Type::STRUCT;
 
-  explicit StructType(const std::vector<std::shared_ptr<Field>>& fields)
-      : NestedType(Type::STRUCT) {
-    children_ = fields;
-  }
+  explicit StructType(const std::vector<std::shared_ptr<Field>>& fields);
 
   Status Accept(TypeVisitor* visitor) const override;
   std::string ToString() const override;
   std::string name() const override { return "struct"; }
 
   /// Returns null if name not found
+  std::shared_ptr<Field> GetFieldByName(const std::string& name) const;
+
+  /// Returns -1 if name not found or if there are multiple fields having the
+  /// same name
+  int GetFieldIndex(const std::string& name) const;
+
+  ARROW_DEPRECATED("Use GetFieldByName")
   std::shared_ptr<Field> GetChildByName(const std::string& name) const;
 
-  /// Returns -1 if name not found
+  ARROW_DEPRECATED("Use GetFieldIndex")
   int GetChildIndex(const std::string& name) const;
 
  private:
-  /// Lazily initialized mapping
-  mutable std::unordered_map<std::string, int> name_to_index_;
+  std::unordered_map<std::string, int> name_to_index_;
 };
 
 /// \brief Base type class for (fixed-size) decimal data
@@ -582,17 +596,17 @@ enum class DateUnit : char { DAY = 0, MILLI = 1 };
 /// \brief Base type class for date data
 class ARROW_EXPORT DateType : public FixedWidthType {
  public:
-  DateUnit unit() const { return unit_; }
+  virtual DateUnit unit() const = 0;
 
  protected:
-  DateType(Type::type type_id, DateUnit unit);
-  DateUnit unit_;
+  explicit DateType(Type::type type_id);
 };
 
 /// Concrete type class for 32-bit date data (as number of days since UNIX epoch)
 class ARROW_EXPORT Date32Type : public DateType {
  public:
   static constexpr Type::type type_id = Type::DATE32;
+  static constexpr DateUnit UNIT = DateUnit::DAY;
 
   using c_type = int32_t;
 
@@ -604,12 +618,14 @@ class ARROW_EXPORT Date32Type : public DateType {
   std::string ToString() const override;
 
   std::string name() const override { return "date32"; }
+  DateUnit unit() const override { return UNIT; }
 };
 
 /// Concrete type class for 64-bit date data (as number of milliseconds since UNIX epoch)
 class ARROW_EXPORT Date64Type : public DateType {
  public:
   static constexpr Type::type type_id = Type::DATE64;
+  static constexpr DateUnit UNIT = DateUnit::MILLI;
 
   using c_type = int64_t;
 
@@ -621,9 +637,11 @@ class ARROW_EXPORT Date64Type : public DateType {
   std::string ToString() const override;
 
   std::string name() const override { return "date64"; }
+  DateUnit unit() const override { return UNIT; }
 };
 
 struct TimeUnit {
+  /// The unit for a time or timestamp DataType
   enum type { SECOND = 0, MILLI = 1, MICRO = 2, NANO = 3 };
 };
 
@@ -757,6 +775,23 @@ class ARROW_EXPORT DictionaryType : public FixedWidthType {
 
   bool ordered() const { return ordered_; }
 
+  /// \brief Unify several dictionary types
+  ///
+  /// Compute a resulting dictionary that will allow the union of values
+  /// of all input dictionary types.  The input types must all have the
+  /// same value type.
+  /// \param[in] pool Memory pool to allocate dictionary values from
+  /// \param[in] types A sequence of input dictionary types
+  /// \param[out] out_type The unified dictionary type
+  /// \param[out] out_transpose_maps (optionally) A sequence of integer vectors,
+  ///     one per input type.  Each integer vector represents the transposition
+  ///     of input type indices into unified type indices.
+  // XXX Should we return something special (an empty transpose map?) when
+  // the transposition is the identity function?
+  static Status Unify(MemoryPool* pool, const std::vector<const DataType*>& types,
+                      std::shared_ptr<DataType>* out_type,
+                      std::vector<std::vector<int32_t>>* out_transpose_maps = NULLPTR);
+
  private:
   // Must be an integer type (not currently checked)
   std::shared_ptr<DataType> index_type_;
@@ -827,8 +862,7 @@ class ARROW_EXPORT Schema {
  private:
   std::vector<std::shared_ptr<Field>> fields_;
 
-  /// Lazily initialized mapping
-  mutable std::unordered_map<std::string, int> name_to_index_;
+  std::unordered_map<std::string, int> name_to_index_;
 
   std::shared_ptr<const KeyValueMetadata> metadata_;
 };
@@ -837,6 +871,9 @@ class ARROW_EXPORT Schema {
 // Parametric factory functions
 // Other factory functions are in type_fwd.h
 
+/// \addtogroup type-factories
+/// @{
+
 /// \brief Create a FixedSizeBinaryType instance
 ARROW_EXPORT
 std::shared_ptr<DataType> fixed_size_binary(int32_t byte_width);
@@ -890,6 +927,13 @@ std::shared_ptr<DataType> ARROW_EXPORT
 dictionary(const std::shared_ptr<DataType>& index_type,
            const std::shared_ptr<Array>& values, bool ordered = false);
 
+/// @}
+
+/// \defgroup schema-factories Factory functions for fields and schemas
+///
+/// Factory functions for fields and schemas
+/// @{
+
 /// \brief Create a Field instance
 ///
 /// \param name the field name
@@ -920,6 +964,8 @@ std::shared_ptr<Schema> schema(
     std::vector<std::shared_ptr<Field>>&& fields,
     const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
 
+/// @}
+
 }  // namespace arrow
 
 #endif  // ARROW_TYPE_H
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index dbbe7092b4f12..2593a4f7e9947 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -96,17 +96,17 @@ class NumericTensor;
   using KLASS##Builder = NumericBuilder<KLASS##Type>; \
   using KLASS##Tensor = NumericTensor<KLASS##Type>;
 
-_NUMERIC_TYPE_DECL(Int8);
-_NUMERIC_TYPE_DECL(Int16);
-_NUMERIC_TYPE_DECL(Int32);
-_NUMERIC_TYPE_DECL(Int64);
-_NUMERIC_TYPE_DECL(UInt8);
-_NUMERIC_TYPE_DECL(UInt16);
-_NUMERIC_TYPE_DECL(UInt32);
-_NUMERIC_TYPE_DECL(UInt64);
-_NUMERIC_TYPE_DECL(HalfFloat);
-_NUMERIC_TYPE_DECL(Float);
-_NUMERIC_TYPE_DECL(Double);
+_NUMERIC_TYPE_DECL(Int8)
+_NUMERIC_TYPE_DECL(Int16)
+_NUMERIC_TYPE_DECL(Int32)
+_NUMERIC_TYPE_DECL(Int64)
+_NUMERIC_TYPE_DECL(UInt8)
+_NUMERIC_TYPE_DECL(UInt16)
+_NUMERIC_TYPE_DECL(UInt32)
+_NUMERIC_TYPE_DECL(UInt64)
+_NUMERIC_TYPE_DECL(HalfFloat)
+_NUMERIC_TYPE_DECL(Float)
+_NUMERIC_TYPE_DECL(Double)
 
 #undef _NUMERIC_TYPE_DECL
 
@@ -137,6 +137,11 @@ using IntervalArray = NumericArray<IntervalType>;
 // (parameter-free) Factory functions
 // Other factory functions are in type.h
 
+/// \defgroup type-factories Factory functions for creating data types
+///
+/// Factory functions for creating data types
+/// @{
+
 /// \brief Return a NullType instance
 std::shared_ptr<DataType> ARROW_EXPORT null();
 /// \brief Return a BooleanType instance
@@ -172,6 +177,8 @@ std::shared_ptr<DataType> ARROW_EXPORT date32();
 /// \brief Return a Date64Type instance
 std::shared_ptr<DataType> ARROW_EXPORT date64();
 
+/// @}
+
 }  // namespace arrow
 
 #endif  // ARROW_TYPE_FWD_H
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
index da5cf25f5eed1..fd1d52a370f24 100644
--- a/cpp/src/arrow/type_traits.h
+++ b/cpp/src/arrow/type_traits.h
@@ -19,7 +19,9 @@
 #define ARROW_TYPE_TRAITS_H
 
 #include <memory>
+#include <string>
 #include <type_traits>
+#include <vector>
 
 #include "arrow/type_fwd.h"
 #include "arrow/util/bit-util.h"
@@ -33,6 +35,9 @@ namespace arrow {
 template <typename T>
 struct TypeTraits {};
 
+template <typename T>
+struct CTypeTraits {};
+
 template <>
 struct TypeTraits<NullType> {
   using ArrayType = NullArray;
@@ -41,109 +46,68 @@ struct TypeTraits<NullType> {
 };
 
 template <>
-struct TypeTraits<UInt8Type> {
-  using ArrayType = UInt8Array;
-  using BuilderType = UInt8Builder;
-  using TensorType = UInt8Tensor;
-  static inline int64_t bytes_required(int64_t elements) { return elements; }
-  constexpr static bool is_parameter_free = true;
-  static inline std::shared_ptr<DataType> type_singleton() { return uint8(); }
-};
-
-template <>
-struct TypeTraits<Int8Type> {
-  using ArrayType = Int8Array;
-  using BuilderType = Int8Builder;
-  using TensorType = Int8Tensor;
-  static inline int64_t bytes_required(int64_t elements) { return elements; }
-  constexpr static bool is_parameter_free = true;
-  static inline std::shared_ptr<DataType> type_singleton() { return int8(); }
-};
-
-template <>
-struct TypeTraits<UInt16Type> {
-  using ArrayType = UInt16Array;
-  using BuilderType = UInt16Builder;
-  using TensorType = UInt16Tensor;
-
-  static inline int64_t bytes_required(int64_t elements) {
-    return elements * sizeof(uint16_t);
-  }
-  constexpr static bool is_parameter_free = true;
-  static inline std::shared_ptr<DataType> type_singleton() { return uint16(); }
-};
-
-template <>
-struct TypeTraits<Int16Type> {
-  using ArrayType = Int16Array;
-  using BuilderType = Int16Builder;
-  using TensorType = Int16Tensor;
-
-  static inline int64_t bytes_required(int64_t elements) {
-    return elements * sizeof(int16_t);
-  }
-  constexpr static bool is_parameter_free = true;
-  static inline std::shared_ptr<DataType> type_singleton() { return int16(); }
-};
-
-template <>
-struct TypeTraits<UInt32Type> {
-  using ArrayType = UInt32Array;
-  using BuilderType = UInt32Builder;
-  using TensorType = UInt32Tensor;
-
-  static inline int64_t bytes_required(int64_t elements) {
-    return elements * sizeof(uint32_t);
-  }
-  constexpr static bool is_parameter_free = true;
-  static inline std::shared_ptr<DataType> type_singleton() { return uint32(); }
-};
-
-template <>
-struct TypeTraits<Int32Type> {
-  using ArrayType = Int32Array;
-  using BuilderType = Int32Builder;
-  using TensorType = Int32Tensor;
-
-  static inline int64_t bytes_required(int64_t elements) {
-    return elements * sizeof(int32_t);
-  }
-  constexpr static bool is_parameter_free = true;
-  static inline std::shared_ptr<DataType> type_singleton() { return int32(); }
-};
-
-template <>
-struct TypeTraits<UInt64Type> {
-  using ArrayType = UInt64Array;
-  using BuilderType = UInt64Builder;
-  using TensorType = UInt64Tensor;
+struct TypeTraits<BooleanType> {
+  using ArrayType = BooleanArray;
+  using BuilderType = BooleanBuilder;
+  using CType = bool;
 
-  static inline int64_t bytes_required(int64_t elements) {
-    return elements * sizeof(uint64_t);
+  static constexpr int64_t bytes_required(int64_t elements) {
+    return BitUtil::BytesForBits(elements);
   }
   constexpr static bool is_parameter_free = true;
-  static inline std::shared_ptr<DataType> type_singleton() { return uint64(); }
+  static inline std::shared_ptr<DataType> type_singleton() { return boolean(); }
 };
 
 template <>
-struct TypeTraits<Int64Type> {
-  using ArrayType = Int64Array;
-  using BuilderType = Int64Builder;
-  using TensorType = Int64Tensor;
+struct CTypeTraits<bool> : public TypeTraits<BooleanType> {
+  using ArrowType = BooleanType;
+};
+
+#define PRIMITIVE_TYPE_TRAITS_DEF_(CType_, ArrowType_, ArrowArrayType, ArrowBuilderType, \
+                                   ArrowTensorType, SingletonFn)                         \
+  template <>                                                                            \
+  struct TypeTraits<ArrowType_> {                                                        \
+    using ArrayType = ArrowArrayType;                                                    \
+    using BuilderType = ArrowBuilderType;                                                \
+    using TensorType = ArrowTensorType;                                                  \
+    using CType = CType_;                                                                \
+    static constexpr int64_t bytes_required(int64_t elements) {                          \
+      return elements * sizeof(CType_);                                                  \
+    }                                                                                    \
+    constexpr static bool is_parameter_free = true;                                      \
+    static inline std::shared_ptr<DataType> type_singleton() { return SingletonFn(); }   \
+  };                                                                                     \
+                                                                                         \
+  template <>                                                                            \
+  struct CTypeTraits<CType_> : public TypeTraits<ArrowType_> {                           \
+    using ArrowType = ArrowType_;                                                        \
+  };
 
-  static inline int64_t bytes_required(int64_t elements) {
-    return elements * sizeof(int64_t);
-  }
-  constexpr static bool is_parameter_free = true;
-  static inline std::shared_ptr<DataType> type_singleton() { return int64(); }
-};
+#define PRIMITIVE_TYPE_TRAITS_DEF(CType, ArrowShort, SingletonFn)             \
+  PRIMITIVE_TYPE_TRAITS_DEF_(                                                 \
+      CType, ARROW_CONCAT(ArrowShort, Type), ARROW_CONCAT(ArrowShort, Array), \
+      ARROW_CONCAT(ArrowShort, Builder), ARROW_CONCAT(ArrowShort, Tensor), SingletonFn)
+
+PRIMITIVE_TYPE_TRAITS_DEF(uint8_t, UInt8, uint8)
+PRIMITIVE_TYPE_TRAITS_DEF(int8_t, Int8, int8)
+PRIMITIVE_TYPE_TRAITS_DEF(uint16_t, UInt16, uint16)
+PRIMITIVE_TYPE_TRAITS_DEF(int16_t, Int16, int16)
+PRIMITIVE_TYPE_TRAITS_DEF(uint32_t, UInt32, uint32)
+PRIMITIVE_TYPE_TRAITS_DEF(int32_t, Int32, int32)
+PRIMITIVE_TYPE_TRAITS_DEF(uint64_t, UInt64, uint64)
+PRIMITIVE_TYPE_TRAITS_DEF(int64_t, Int64, int64)
+PRIMITIVE_TYPE_TRAITS_DEF(float, Float, float32)
+PRIMITIVE_TYPE_TRAITS_DEF(double, Double, float64)
+
+#undef PRIMITIVE_TYPE_TRAITS_DEF
+#undef PRIMITIVE_TYPE_TRAITS_DEF_
 
 template <>
 struct TypeTraits<Date64Type> {
   using ArrayType = Date64Array;
   using BuilderType = Date64Builder;
 
-  static inline int64_t bytes_required(int64_t elements) {
+  static constexpr int64_t bytes_required(int64_t elements) {
     return elements * sizeof(int64_t);
   }
   constexpr static bool is_parameter_free = true;
@@ -155,7 +119,7 @@ struct TypeTraits<Date32Type> {
   using ArrayType = Date32Array;
   using BuilderType = Date32Builder;
 
-  static inline int64_t bytes_required(int64_t elements) {
+  static constexpr int64_t bytes_required(int64_t elements) {
     return elements * sizeof(int32_t);
   }
   constexpr static bool is_parameter_free = true;
@@ -167,7 +131,7 @@ struct TypeTraits<TimestampType> {
   using ArrayType = TimestampArray;
   using BuilderType = TimestampBuilder;
 
-  static inline int64_t bytes_required(int64_t elements) {
+  static constexpr int64_t bytes_required(int64_t elements) {
     return elements * sizeof(int64_t);
   }
   constexpr static bool is_parameter_free = false;
@@ -178,7 +142,7 @@ struct TypeTraits<Time32Type> {
   using ArrayType = Time32Array;
   using BuilderType = Time32Builder;
 
-  static inline int64_t bytes_required(int64_t elements) {
+  static constexpr int64_t bytes_required(int64_t elements) {
     return elements * sizeof(int32_t);
   }
   constexpr static bool is_parameter_free = false;
@@ -189,7 +153,7 @@ struct TypeTraits<Time64Type> {
   using ArrayType = Time64Array;
   using BuilderType = Time64Builder;
 
-  static inline int64_t bytes_required(int64_t elements) {
+  static constexpr int64_t bytes_required(int64_t elements) {
     return elements * sizeof(int64_t);
   }
   constexpr static bool is_parameter_free = false;
@@ -201,39 +165,13 @@ struct TypeTraits<HalfFloatType> {
   using BuilderType = HalfFloatBuilder;
   using TensorType = HalfFloatTensor;
 
-  static inline int64_t bytes_required(int64_t elements) {
+  static constexpr int64_t bytes_required(int64_t elements) {
     return elements * sizeof(uint16_t);
   }
   constexpr static bool is_parameter_free = true;
   static inline std::shared_ptr<DataType> type_singleton() { return float16(); }
 };
 
-template <>
-struct TypeTraits<FloatType> {
-  using ArrayType = FloatArray;
-  using BuilderType = FloatBuilder;
-  using TensorType = FloatTensor;
-
-  static inline int64_t bytes_required(int64_t elements) {
-    return static_cast<int64_t>(elements * sizeof(float));
-  }
-  constexpr static bool is_parameter_free = true;
-  static inline std::shared_ptr<DataType> type_singleton() { return float32(); }
-};
-
-template <>
-struct TypeTraits<DoubleType> {
-  using ArrayType = DoubleArray;
-  using BuilderType = DoubleBuilder;
-  using TensorType = DoubleTensor;
-
-  static inline int64_t bytes_required(int64_t elements) {
-    return static_cast<int64_t>(elements * sizeof(double));
-  }
-  constexpr static bool is_parameter_free = true;
-  static inline std::shared_ptr<DataType> type_singleton() { return float64(); }
-};
-
 template <>
 struct TypeTraits<Decimal128Type> {
   using ArrayType = Decimal128Array;
@@ -241,18 +179,6 @@ struct TypeTraits<Decimal128Type> {
   constexpr static bool is_parameter_free = false;
 };
 
-template <>
-struct TypeTraits<BooleanType> {
-  using ArrayType = BooleanArray;
-  using BuilderType = BooleanBuilder;
-
-  static inline int64_t bytes_required(int64_t elements) {
-    return BitUtil::BytesForBits(elements);
-  }
-  constexpr static bool is_parameter_free = true;
-  static inline std::shared_ptr<DataType> type_singleton() { return boolean(); }
-};
-
 template <>
 struct TypeTraits<StringType> {
   using ArrayType = StringArray;
@@ -261,6 +187,16 @@ struct TypeTraits<StringType> {
   static inline std::shared_ptr<DataType> type_singleton() { return utf8(); }
 };
 
+template <>
+struct CTypeTraits<std::string> : public TypeTraits<StringType> {
+  using ArrowType = StringType;
+};
+
+template <>
+struct CTypeTraits<char*> : public TypeTraits<StringType> {
+  using ArrowType = StringType;
+};
+
 template <>
 struct TypeTraits<BinaryType> {
   using ArrayType = BinaryArray;
@@ -283,6 +219,15 @@ struct TypeTraits<ListType> {
   constexpr static bool is_parameter_free = false;
 };
 
+template <typename CType>
+struct CTypeTraits<std::vector<CType>> : public TypeTraits<ListType> {
+  using ArrowType = ListType;
+
+  static inline std::shared_ptr<DataType> type_singleton() {
+    return list(CTypeTraits<CType>::type_singleton());
+  }
+};
+
 template <>
 struct TypeTraits<StructType> {
   using ArrayType = StructArray;
@@ -371,6 +316,11 @@ template <typename T>
 using enable_if_boolean =
     typename std::enable_if<std::is_same<BooleanType, T>::value>::type;
 
+template <typename T>
+using enable_if_binary_like =
+    typename std::enable_if<std::is_base_of<BinaryType, T>::value ||
+                            std::is_base_of<FixedSizeBinaryType, T>::value>::type;
+
 template <typename T>
 using enable_if_fixed_size_binary =
     typename std::enable_if<std::is_base_of<FixedSizeBinaryType, T>::value>::type;
@@ -401,8 +351,8 @@ struct as_void {
     using type = typename T::ATTR_NAME;                                          \
   };
 
-GET_ATTR(c_type, void);
-GET_ATTR(TypeClass, void);
+GET_ATTR(c_type, void)
+GET_ATTR(TypeClass, void)
 
 #undef GET_ATTR
 
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index 6b9c3590b44dc..fefc8d6da8098 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -20,45 +20,7 @@
 #######################################
 
 # Headers: top level
-install(FILES
-  bit-stream-utils.h
-  bit-util.h
-  bpacking.h
-  checked_cast.h
-  compiler-util.h
-  compression.h
-  compression_brotli.h
-  compression_bz2.h
-  compression_lz4.h
-  compression_snappy.h
-  compression_zlib.h
-  compression_zstd.h
-  cpu-info.h
-  date.h
-  decimal.h
-  hash-util.h
-  hashing.h
-  io-util.h
-  key_value_metadata.h
-  lazy.h
-  logging.h
-  macros.h
-  memory.h
-  neon-util.h
-  parallel.h
-  rle-encoding.h
-  sse-util.h
-  stl.h
-  stopwatch.h
-  string.h
-  string_view.h
-  thread-pool.h
-  type_traits.h
-  utf8.h
-  variant.h
-  visibility.h
-  windows_compatibility.h
-  DESTINATION include/arrow/util)
+ARROW_INSTALL_ALL_HEADERS("arrow/util")
 
 #######################################
 # arrow_test_main
@@ -68,22 +30,22 @@ if (ARROW_BUILD_BENCHMARKS)
   add_library(arrow_benchmark_main benchmark_main.cc)
   if (APPLE)
     target_link_libraries(arrow_benchmark_main
-      benchmark_static
+      gbenchmark_static
     )
   elseif(MSVC)
     target_link_libraries(arrow_benchmark_main
-      benchmark_static
+      gbenchmark_static
       Shlwapi.lib
   )
   else()
     target_link_libraries(arrow_benchmark_main
-      benchmark_static
+      gbenchmark_static
       pthread
     )
   endif()
 
   # TODO(wesm): Some benchmarks include gtest.h
-  add_dependencies(arrow_benchmark_main gtest_static)
+  add_dependencies(arrow_benchmark_main ${GTEST_LIBRARY})
 endif()
 
 ADD_ARROW_TEST(bit-util-test)
@@ -100,6 +62,7 @@ ADD_ARROW_TEST(rle-encoding-test)
 ADD_ARROW_TEST(stl-util-test)
 ADD_ARROW_TEST(task-group-test)
 ADD_ARROW_TEST(thread-pool-test)
+ADD_ARROW_TEST(trie-test)
 ADD_ARROW_TEST(utf8-util-test)
 
 ADD_ARROW_BENCHMARK(bit-util-benchmark)
@@ -108,8 +71,8 @@ ADD_ARROW_BENCHMARK(decimal-benchmark)
 ADD_ARROW_BENCHMARK(hashing-benchmark)
 ADD_ARROW_BENCHMARK(int-util-benchmark)
 ADD_ARROW_BENCHMARK(lazy-benchmark)
+ADD_ARROW_BENCHMARK(machine-benchmark)
 ADD_ARROW_BENCHMARK(number-parsing-benchmark)
+ADD_ARROW_BENCHMARK(thread-pool-benchmark)
+ADD_ARROW_BENCHMARK(trie-benchmark)
 ADD_ARROW_BENCHMARK(utf8-util-benchmark)
-
-add_subdirectory(string_view)
-add_subdirectory(variant)
diff --git a/cpp/src/arrow/util/basic_decimal.cc b/cpp/src/arrow/util/basic_decimal.cc
new file mode 100644
index 0000000000000..bb235f4016619
--- /dev/null
+++ b/cpp/src/arrow/util/basic_decimal.cc
@@ -0,0 +1,690 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/basic_decimal.h"
+
+#include <algorithm>
+#include <array>
+#include <climits>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iomanip>
+#include <limits>
+#include <string>
+
+#include "arrow/util/bit-util.h"
+#include "arrow/util/int-util.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+
+using internal::SafeLeftShift;
+using internal::SafeSignedAdd;
+
+static const BasicDecimal128 ScaleMultipliers[] = {
+    BasicDecimal128(1LL),
+    BasicDecimal128(10LL),
+    BasicDecimal128(100LL),
+    BasicDecimal128(1000LL),
+    BasicDecimal128(10000LL),
+    BasicDecimal128(100000LL),
+    BasicDecimal128(1000000LL),
+    BasicDecimal128(10000000LL),
+    BasicDecimal128(100000000LL),
+    BasicDecimal128(1000000000LL),
+    BasicDecimal128(10000000000LL),
+    BasicDecimal128(100000000000LL),
+    BasicDecimal128(1000000000000LL),
+    BasicDecimal128(10000000000000LL),
+    BasicDecimal128(100000000000000LL),
+    BasicDecimal128(1000000000000000LL),
+    BasicDecimal128(10000000000000000LL),
+    BasicDecimal128(100000000000000000LL),
+    BasicDecimal128(1000000000000000000LL),
+    BasicDecimal128(0LL, 10000000000000000000ULL),
+    BasicDecimal128(5LL, 7766279631452241920ULL),
+    BasicDecimal128(54LL, 3875820019684212736ULL),
+    BasicDecimal128(542LL, 1864712049423024128ULL),
+    BasicDecimal128(5421LL, 200376420520689664ULL),
+    BasicDecimal128(54210LL, 2003764205206896640ULL),
+    BasicDecimal128(542101LL, 1590897978359414784ULL),
+    BasicDecimal128(5421010LL, 15908979783594147840ULL),
+    BasicDecimal128(54210108LL, 11515845246265065472ULL),
+    BasicDecimal128(542101086LL, 4477988020393345024ULL),
+    BasicDecimal128(5421010862LL, 7886392056514347008ULL),
+    BasicDecimal128(54210108624LL, 5076944270305263616ULL),
+    BasicDecimal128(542101086242LL, 13875954555633532928ULL),
+    BasicDecimal128(5421010862427LL, 9632337040368467968ULL),
+    BasicDecimal128(54210108624275LL, 4089650035136921600ULL),
+    BasicDecimal128(542101086242752LL, 4003012203950112768ULL),
+    BasicDecimal128(5421010862427522LL, 3136633892082024448ULL),
+    BasicDecimal128(54210108624275221LL, 12919594847110692864ULL),
+    BasicDecimal128(542101086242752217LL, 68739955140067328ULL),
+    BasicDecimal128(5421010862427522170LL, 687399551400673280ULL)};
+
+static const BasicDecimal128 ScaleMultipliersHalf[] = {
+    BasicDecimal128(0ULL),
+    BasicDecimal128(5ULL),
+    BasicDecimal128(50ULL),
+    BasicDecimal128(500ULL),
+    BasicDecimal128(5000ULL),
+    BasicDecimal128(50000ULL),
+    BasicDecimal128(500000ULL),
+    BasicDecimal128(5000000ULL),
+    BasicDecimal128(50000000ULL),
+    BasicDecimal128(500000000ULL),
+    BasicDecimal128(5000000000ULL),
+    BasicDecimal128(50000000000ULL),
+    BasicDecimal128(500000000000ULL),
+    BasicDecimal128(5000000000000ULL),
+    BasicDecimal128(50000000000000ULL),
+    BasicDecimal128(500000000000000ULL),
+    BasicDecimal128(5000000000000000ULL),
+    BasicDecimal128(50000000000000000ULL),
+    BasicDecimal128(500000000000000000ULL),
+    BasicDecimal128(5000000000000000000ULL),
+    BasicDecimal128(2LL, 13106511852580896768ULL),
+    BasicDecimal128(27LL, 1937910009842106368ULL),
+    BasicDecimal128(271LL, 932356024711512064ULL),
+    BasicDecimal128(2710LL, 9323560247115120640ULL),
+    BasicDecimal128(27105LL, 1001882102603448320ULL),
+    BasicDecimal128(271050LL, 10018821026034483200ULL),
+    BasicDecimal128(2710505LL, 7954489891797073920ULL),
+    BasicDecimal128(27105054LL, 5757922623132532736ULL),
+    BasicDecimal128(271050543LL, 2238994010196672512ULL),
+    BasicDecimal128(2710505431LL, 3943196028257173504ULL),
+    BasicDecimal128(27105054312LL, 2538472135152631808ULL),
+    BasicDecimal128(271050543121LL, 6937977277816766464ULL),
+    BasicDecimal128(2710505431213LL, 14039540557039009792ULL),
+    BasicDecimal128(27105054312137LL, 11268197054423236608ULL),
+    BasicDecimal128(271050543121376LL, 2001506101975056384ULL),
+    BasicDecimal128(2710505431213761LL, 1568316946041012224ULL),
+    BasicDecimal128(27105054312137610LL, 15683169460410122240ULL),
+    BasicDecimal128(271050543121376108LL, 9257742014424809472ULL),
+    BasicDecimal128(2710505431213761085LL, 343699775700336640ULL)};
+
+static constexpr uint64_t kIntMask = 0xFFFFFFFF;
+static constexpr auto kCarryBit = static_cast<uint64_t>(1) << static_cast<uint64_t>(32);
+
+BasicDecimal128::BasicDecimal128(const uint8_t* bytes)
+    : BasicDecimal128(
+          BitUtil::FromLittleEndian(reinterpret_cast<const int64_t*>(bytes)[1]),
+          BitUtil::FromLittleEndian(reinterpret_cast<const uint64_t*>(bytes)[0])) {}
+
+std::array<uint8_t, 16> BasicDecimal128::ToBytes() const {
+  std::array<uint8_t, 16> out{{0}};
+  ToBytes(out.data());
+  return out;
+}
+
+void BasicDecimal128::ToBytes(uint8_t* out) const {
+  DCHECK_NE(out, nullptr);
+  reinterpret_cast<uint64_t*>(out)[0] = BitUtil::ToLittleEndian(low_bits_);
+  reinterpret_cast<int64_t*>(out)[1] = BitUtil::ToLittleEndian(high_bits_);
+}
+
+BasicDecimal128& BasicDecimal128::Negate() {
+  low_bits_ = ~low_bits_ + 1;
+  high_bits_ = ~high_bits_;
+  if (low_bits_ == 0) {
+    high_bits_ = SafeSignedAdd<int64_t>(high_bits_, 1);
+  }
+  return *this;
+}
+
+BasicDecimal128& BasicDecimal128::Abs() { return *this < 0 ? Negate() : *this; }
+
+BasicDecimal128& BasicDecimal128::operator+=(const BasicDecimal128& right) {
+  const uint64_t sum = low_bits_ + right.low_bits_;
+  high_bits_ = SafeSignedAdd<int64_t>(high_bits_, right.high_bits_);
+  if (sum < low_bits_) {
+    high_bits_ = SafeSignedAdd<int64_t>(high_bits_, 1);
+  }
+  low_bits_ = sum;
+  return *this;
+}
+
+BasicDecimal128& BasicDecimal128::operator-=(const BasicDecimal128& right) {
+  const uint64_t diff = low_bits_ - right.low_bits_;
+  high_bits_ -= right.high_bits_;
+  if (diff > low_bits_) {
+    --high_bits_;
+  }
+  low_bits_ = diff;
+  return *this;
+}
+
+BasicDecimal128& BasicDecimal128::operator/=(const BasicDecimal128& right) {
+  BasicDecimal128 remainder;
+  auto s = Divide(right, this, &remainder);
+  DCHECK_EQ(s, DecimalStatus::kSuccess);
+  return *this;
+}
+
+BasicDecimal128& BasicDecimal128::operator|=(const BasicDecimal128& right) {
+  low_bits_ |= right.low_bits_;
+  high_bits_ |= right.high_bits_;
+  return *this;
+}
+
+BasicDecimal128& BasicDecimal128::operator&=(const BasicDecimal128& right) {
+  low_bits_ &= right.low_bits_;
+  high_bits_ &= right.high_bits_;
+  return *this;
+}
+
+BasicDecimal128& BasicDecimal128::operator<<=(uint32_t bits) {
+  if (bits != 0) {
+    if (bits < 64) {
+      high_bits_ = SafeLeftShift(high_bits_, bits);
+      high_bits_ |= (low_bits_ >> (64 - bits));
+      low_bits_ <<= bits;
+    } else if (bits < 128) {
+      high_bits_ = static_cast<int64_t>(low_bits_) << (bits - 64);
+      low_bits_ = 0;
+    } else {
+      high_bits_ = 0;
+      low_bits_ = 0;
+    }
+  }
+  return *this;
+}
+
+BasicDecimal128& BasicDecimal128::operator>>=(uint32_t bits) {
+  if (bits != 0) {
+    if (bits < 64) {
+      low_bits_ >>= bits;
+      low_bits_ |= static_cast<uint64_t>(high_bits_ << (64 - bits));
+      high_bits_ = static_cast<int64_t>(static_cast<uint64_t>(high_bits_) >> bits);
+    } else if (bits < 128) {
+      low_bits_ = static_cast<uint64_t>(high_bits_ >> (bits - 64));
+      high_bits_ = static_cast<int64_t>(high_bits_ >= 0L ? 0L : -1L);
+    } else {
+      high_bits_ = static_cast<int64_t>(high_bits_ >= 0L ? 0L : -1L);
+      low_bits_ = static_cast<uint64_t>(high_bits_);
+    }
+  }
+  return *this;
+}
+
+BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
+  // Break the left and right numbers into 32 bit chunks
+  // so that we can multiply them without overflow.
+  const uint64_t L0 = static_cast<uint64_t>(high_bits_) >> 32;
+  const uint64_t L1 = static_cast<uint64_t>(high_bits_) & kIntMask;
+  const uint64_t L2 = low_bits_ >> 32;
+  const uint64_t L3 = low_bits_ & kIntMask;
+
+  const uint64_t R0 = static_cast<uint64_t>(right.high_bits_) >> 32;
+  const uint64_t R1 = static_cast<uint64_t>(right.high_bits_) & kIntMask;
+  const uint64_t R2 = right.low_bits_ >> 32;
+  const uint64_t R3 = right.low_bits_ & kIntMask;
+
+  uint64_t product = L3 * R3;
+  low_bits_ = product & kIntMask;
+
+  uint64_t sum = product >> 32;
+
+  product = L2 * R3;
+  sum += product;
+
+  product = L3 * R2;
+  sum += product;
+
+  low_bits_ += sum << 32;
+
+  high_bits_ = static_cast<int64_t>(sum < product ? kCarryBit : 0);
+  if (sum < product) {
+    high_bits_ += kCarryBit;
+  }
+
+  high_bits_ += static_cast<int64_t>(sum >> 32);
+  high_bits_ += L1 * R3 + L2 * R2 + L3 * R1;
+  high_bits_ += (L0 * R3 + L1 * R2 + L2 * R1 + L3 * R0) << 32;
+  return *this;
+}
+
+/// Expands the given value into an array of ints so that we can work on
+/// it. The array will be converted to an absolute value and the wasNegative
+/// flag will be set appropriately. The array will remove leading zeros from
+/// the value.
+/// \param array an array of length 4 to set with the value
+/// \param was_negative a flag for whether the value was original negative
+/// \result the output length of the array
+static int64_t FillInArray(const BasicDecimal128& value, uint32_t* array,
+                           bool& was_negative) {
+  uint64_t high;
+  uint64_t low;
+  const int64_t highbits = value.high_bits();
+  const uint64_t lowbits = value.low_bits();
+
+  if (highbits < 0) {
+    low = ~lowbits + 1;
+    high = static_cast<uint64_t>(~highbits);
+    if (low == 0) {
+      ++high;
+    }
+    was_negative = true;
+  } else {
+    low = lowbits;
+    high = static_cast<uint64_t>(highbits);
+    was_negative = false;
+  }
+
+  if (high != 0) {
+    if (high > std::numeric_limits<uint32_t>::max()) {
+      array[0] = static_cast<uint32_t>(high >> 32);
+      array[1] = static_cast<uint32_t>(high);
+      array[2] = static_cast<uint32_t>(low >> 32);
+      array[3] = static_cast<uint32_t>(low);
+      return 4;
+    }
+
+    array[0] = static_cast<uint32_t>(high);
+    array[1] = static_cast<uint32_t>(low >> 32);
+    array[2] = static_cast<uint32_t>(low);
+    return 3;
+  }
+
+  if (low >= std::numeric_limits<uint32_t>::max()) {
+    array[0] = static_cast<uint32_t>(low >> 32);
+    array[1] = static_cast<uint32_t>(low);
+    return 2;
+  }
+
+  if (low == 0) {
+    return 0;
+  }
+
+  array[0] = static_cast<uint32_t>(low);
+  return 1;
+}
+
+/// Shift the number in the array left by bits positions.
+/// \param array the number to shift, must have length elements
+/// \param length the number of entries in the array
+/// \param bits the number of bits to shift (0 <= bits < 32)
+static void ShiftArrayLeft(uint32_t* array, int64_t length, int64_t bits) {
+  if (length > 0 && bits != 0) {
+    for (int64_t i = 0; i < length - 1; ++i) {
+      array[i] = (array[i] << bits) | (array[i + 1] >> (32 - bits));
+    }
+    array[length - 1] <<= bits;
+  }
+}
+
+/// Shift the number in the array right by bits positions.
+/// \param array the number to shift, must have length elements
+/// \param length the number of entries in the array
+/// \param bits the number of bits to shift (0 <= bits < 32)
+static void ShiftArrayRight(uint32_t* array, int64_t length, int64_t bits) {
+  if (length > 0 && bits != 0) {
+    for (int64_t i = length - 1; i > 0; --i) {
+      array[i] = (array[i] >> bits) | (array[i - 1] << (32 - bits));
+    }
+    array[0] >>= bits;
+  }
+}
+
+/// \brief Fix the signs of the result and remainder at the end of the division based on
+/// the signs of the dividend and divisor.
+static void FixDivisionSigns(BasicDecimal128* result, BasicDecimal128* remainder,
+                             bool dividend_was_negative, bool divisor_was_negative) {
+  if (dividend_was_negative != divisor_was_negative) {
+    result->Negate();
+  }
+
+  if (dividend_was_negative) {
+    remainder->Negate();
+  }
+}
+
+/// \brief Build a BasicDecimal128 from a list of ints.
+static DecimalStatus BuildFromArray(BasicDecimal128* value, uint32_t* array,
+                                    int64_t length) {
+  switch (length) {
+    case 0:
+      *value = {static_cast<int64_t>(0)};
+      break;
+    case 1:
+      *value = {static_cast<int64_t>(array[0])};
+      break;
+    case 2:
+      *value = {static_cast<int64_t>(0),
+                (static_cast<uint64_t>(array[0]) << 32) + array[1]};
+      break;
+    case 3:
+      *value = {static_cast<int64_t>(array[0]),
+                (static_cast<uint64_t>(array[1]) << 32) + array[2]};
+      break;
+    case 4:
+      *value = {(static_cast<int64_t>(array[0]) << 32) + array[1],
+                (static_cast<uint64_t>(array[2]) << 32) + array[3]};
+      break;
+    case 5:
+      if (array[0] != 0) {
+        return DecimalStatus::kOverflow;
+      }
+      *value = {(static_cast<int64_t>(array[1]) << 32) + array[2],
+                (static_cast<uint64_t>(array[3]) << 32) + array[4]};
+      break;
+    default:
+      return DecimalStatus::kOverflow;
+  }
+
+  return DecimalStatus::kSuccess;
+}
+
+/// \brief Do a division where the divisor fits into a single 32 bit value.
+static DecimalStatus SingleDivide(const uint32_t* dividend, int64_t dividend_length,
+                                  uint32_t divisor, BasicDecimal128* remainder,
+                                  bool dividend_was_negative, bool divisor_was_negative,
+                                  BasicDecimal128* result) {
+  uint64_t r = 0;
+  uint32_t result_array[5];
+  for (int64_t j = 0; j < dividend_length; j++) {
+    r <<= 32;
+    r += dividend[j];
+    result_array[j] = static_cast<uint32_t>(r / divisor);
+    r %= divisor;
+  }
+  auto status = BuildFromArray(result, result_array, dividend_length);
+  if (status != DecimalStatus::kSuccess) {
+    return status;
+  }
+
+  *remainder = static_cast<int64_t>(r);
+  FixDivisionSigns(result, remainder, dividend_was_negative, divisor_was_negative);
+  return DecimalStatus::kSuccess;
+}
+
+DecimalStatus BasicDecimal128::Divide(const BasicDecimal128& divisor,
+                                      BasicDecimal128* result,
+                                      BasicDecimal128* remainder) const {
+  // Split the dividend and divisor into integer pieces so that we can
+  // work on them.
+  uint32_t dividend_array[5];
+  uint32_t divisor_array[4];
+  bool dividend_was_negative;
+  bool divisor_was_negative;
+  // leave an extra zero before the dividend
+  dividend_array[0] = 0;
+  int64_t dividend_length =
+      FillInArray(*this, dividend_array + 1, dividend_was_negative) + 1;
+  int64_t divisor_length = FillInArray(divisor, divisor_array, divisor_was_negative);
+
+  // Handle some of the easy cases.
+  if (dividend_length <= divisor_length) {
+    *remainder = *this;
+    *result = 0;
+    return DecimalStatus::kSuccess;
+  }
+
+  if (divisor_length == 0) {
+    return DecimalStatus::kDivideByZero;
+  }
+
+  if (divisor_length == 1) {
+    return SingleDivide(dividend_array, dividend_length, divisor_array[0], remainder,
+                        dividend_was_negative, divisor_was_negative, result);
+  }
+
+  int64_t result_length = dividend_length - divisor_length;
+  uint32_t result_array[4];
+
+  // Normalize by shifting both by a multiple of 2 so that
+  // the digit guessing is better. The requirement is that
+  // divisor_array[0] is greater than 2**31.
+  int64_t normalize_bits = BitUtil::CountLeadingZeros(divisor_array[0]);
+  ShiftArrayLeft(divisor_array, divisor_length, normalize_bits);
+  ShiftArrayLeft(dividend_array, dividend_length, normalize_bits);
+
+  // compute each digit in the result
+  for (int64_t j = 0; j < result_length; ++j) {
+    // Guess the next digit. At worst it is two too large
+    uint32_t guess = std::numeric_limits<uint32_t>::max();
+    const auto high_dividend =
+        static_cast<uint64_t>(dividend_array[j]) << 32 | dividend_array[j + 1];
+    if (dividend_array[j] != divisor_array[0]) {
+      guess = static_cast<uint32_t>(high_dividend / divisor_array[0]);
+    }
+
+    // catch all of the cases where guess is two too large and most of the
+    // cases where it is one too large
+    auto rhat = static_cast<uint32_t>(high_dividend -
+                                      guess * static_cast<uint64_t>(divisor_array[0]));
+    while (static_cast<uint64_t>(divisor_array[1]) * guess >
+           (static_cast<uint64_t>(rhat) << 32) + dividend_array[j + 2]) {
+      --guess;
+      rhat += divisor_array[0];
+      if (static_cast<uint64_t>(rhat) < divisor_array[0]) {
+        break;
+      }
+    }
+
+    // subtract off the guess * divisor from the dividend
+    uint64_t mult = 0;
+    for (int64_t i = divisor_length - 1; i >= 0; --i) {
+      mult += static_cast<uint64_t>(guess) * divisor_array[i];
+      uint32_t prev = dividend_array[j + i + 1];
+      dividend_array[j + i + 1] -= static_cast<uint32_t>(mult);
+      mult >>= 32;
+      if (dividend_array[j + i + 1] > prev) {
+        ++mult;
+      }
+    }
+    uint32_t prev = dividend_array[j];
+    dividend_array[j] -= static_cast<uint32_t>(mult);
+
+    // if guess was too big, we add back divisor
+    if (dividend_array[j] > prev) {
+      --guess;
+      uint32_t carry = 0;
+      for (int64_t i = divisor_length - 1; i >= 0; --i) {
+        const auto sum =
+            static_cast<uint64_t>(divisor_array[i]) + dividend_array[j + i + 1] + carry;
+        dividend_array[j + i + 1] = static_cast<uint32_t>(sum);
+        carry = static_cast<uint32_t>(sum >> 32);
+      }
+      dividend_array[j] += carry;
+    }
+
+    result_array[j] = guess;
+  }
+
+  // denormalize the remainder
+  ShiftArrayRight(dividend_array, dividend_length, normalize_bits);
+
+  // return result and remainder
+  auto status = BuildFromArray(result, result_array, result_length);
+  if (status != DecimalStatus::kSuccess) {
+    return status;
+  }
+  status = BuildFromArray(remainder, dividend_array, dividend_length);
+  if (status != DecimalStatus::kSuccess) {
+    return status;
+  }
+
+  FixDivisionSigns(result, remainder, dividend_was_negative, divisor_was_negative);
+  return DecimalStatus::kSuccess;
+}
+
+bool operator==(const BasicDecimal128& left, const BasicDecimal128& right) {
+  return left.high_bits() == right.high_bits() && left.low_bits() == right.low_bits();
+}
+
+bool operator!=(const BasicDecimal128& left, const BasicDecimal128& right) {
+  return !operator==(left, right);
+}
+
+bool operator<(const BasicDecimal128& left, const BasicDecimal128& right) {
+  return left.high_bits() < right.high_bits() ||
+         (left.high_bits() == right.high_bits() && left.low_bits() < right.low_bits());
+}
+
+bool operator<=(const BasicDecimal128& left, const BasicDecimal128& right) {
+  return !operator>(left, right);
+}
+
+bool operator>(const BasicDecimal128& left, const BasicDecimal128& right) {
+  return operator<(right, left);
+}
+
+bool operator>=(const BasicDecimal128& left, const BasicDecimal128& right) {
+  return !operator<(left, right);
+}
+
+BasicDecimal128 operator-(const BasicDecimal128& operand) {
+  BasicDecimal128 result(operand.high_bits(), operand.low_bits());
+  return result.Negate();
+}
+
+BasicDecimal128 operator~(const BasicDecimal128& operand) {
+  BasicDecimal128 result(~operand.high_bits(), ~operand.low_bits());
+  return result;
+}
+
+BasicDecimal128 operator+(const BasicDecimal128& left, const BasicDecimal128& right) {
+  BasicDecimal128 result(left.high_bits(), left.low_bits());
+  result += right;
+  return result;
+}
+
+BasicDecimal128 operator-(const BasicDecimal128& left, const BasicDecimal128& right) {
+  BasicDecimal128 result(left.high_bits(), left.low_bits());
+  result -= right;
+  return result;
+}
+
+BasicDecimal128 operator*(const BasicDecimal128& left, const BasicDecimal128& right) {
+  BasicDecimal128 result(left.high_bits(), left.low_bits());
+  result *= right;
+  return result;
+}
+
+BasicDecimal128 operator/(const BasicDecimal128& left, const BasicDecimal128& right) {
+  BasicDecimal128 remainder;
+  BasicDecimal128 result;
+  auto s = left.Divide(right, &result, &remainder);
+  DCHECK_EQ(s, DecimalStatus::kSuccess);
+  return result;
+}
+
+BasicDecimal128 operator%(const BasicDecimal128& left, const BasicDecimal128& right) {
+  BasicDecimal128 remainder;
+  BasicDecimal128 result;
+  auto s = left.Divide(right, &result, &remainder);
+  DCHECK_EQ(s, DecimalStatus::kSuccess);
+  return remainder;
+}
+
+static bool RescaleWouldCauseDataLoss(const BasicDecimal128& value, int32_t delta_scale,
+                                      int32_t abs_delta_scale, BasicDecimal128* result) {
+  BasicDecimal128 multiplier(ScaleMultipliers[abs_delta_scale]);
+
+  if (delta_scale < 0) {
+    DCHECK_NE(multiplier, 0);
+    BasicDecimal128 remainder;
+    auto status = value.Divide(multiplier, result, &remainder);
+    DCHECK_EQ(status, DecimalStatus::kSuccess);
+    return remainder != 0;
+  }
+
+  *result = value * multiplier;
+  return (value < 0) ? *result > value : *result < value;
+}
+
+DecimalStatus BasicDecimal128::Rescale(int32_t original_scale, int32_t new_scale,
+                                       BasicDecimal128* out) const {
+  DCHECK_NE(out, nullptr);
+  DCHECK_NE(original_scale, new_scale);
+
+  const int32_t delta_scale = new_scale - original_scale;
+  const int32_t abs_delta_scale = std::abs(delta_scale);
+
+  DCHECK_GE(abs_delta_scale, 1);
+  DCHECK_LE(abs_delta_scale, 38);
+
+  BasicDecimal128 result(*this);
+  const bool rescale_would_cause_data_loss =
+      RescaleWouldCauseDataLoss(result, delta_scale, abs_delta_scale, out);
+
+  // Fail if we overflow or truncate
+  if (ARROW_PREDICT_FALSE(rescale_would_cause_data_loss)) {
+    return DecimalStatus::kRescaleDataLoss;
+  }
+
+  return DecimalStatus::kSuccess;
+}
+
+void BasicDecimal128::GetWholeAndFraction(int scale, BasicDecimal128* whole,
+                                          BasicDecimal128* fraction) const {
+  DCHECK_GE(scale, 0);
+  DCHECK_LE(scale, 38);
+
+  BasicDecimal128 multiplier(ScaleMultipliers[scale]);
+  DCHECK_EQ(Divide(multiplier, whole, fraction), DecimalStatus::kSuccess);
+}
+
+const BasicDecimal128& BasicDecimal128::GetScaleMultiplier(int32_t scale) {
+  DCHECK_GE(scale, 0);
+  DCHECK_LE(scale, 38);
+
+  return ScaleMultipliers[scale];
+}
+
+BasicDecimal128 BasicDecimal128::IncreaseScaleBy(int32_t increase_by) const {
+  DCHECK_GE(increase_by, 0);
+  DCHECK_LE(increase_by, 38);
+
+  return (*this) * ScaleMultipliers[increase_by];
+}
+
+BasicDecimal128 BasicDecimal128::ReduceScaleBy(int32_t reduce_by, bool round) const {
+  DCHECK_GE(reduce_by, 0);
+  DCHECK_LE(reduce_by, 38);
+
+  BasicDecimal128 divisor(ScaleMultipliers[reduce_by]);
+  BasicDecimal128 result;
+  BasicDecimal128 remainder;
+  DCHECK_EQ(Divide(divisor, &result, &remainder), DecimalStatus::kSuccess);
+  if (round) {
+    auto divisor_half = ScaleMultipliersHalf[reduce_by];
+    if (remainder.Abs() >= divisor_half) {
+      if (result > 0) {
+        result += 1;
+      } else {
+        result -= 1;
+      }
+    }
+  }
+  return result;
+}
+
+int32_t BasicDecimal128::CountLeadingBinaryZeros() const {
+  DCHECK_GE(*this, BasicDecimal128(0));
+
+  if (high_bits_ == 0) {
+    return BitUtil::CountLeadingZeros(low_bits_) + 64;
+  } else {
+    return BitUtil::CountLeadingZeros(static_cast<uint64_t>(high_bits_));
+  }
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/basic_decimal.h b/cpp/src/arrow/util/basic_decimal.h
new file mode 100644
index 0000000000000..e19cb14a00ffb
--- /dev/null
+++ b/cpp/src/arrow/util/basic_decimal.h
@@ -0,0 +1,166 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <array>
+#include <cstdint>
+#include <limits>
+#include <string>
+#include <type_traits>
+
+#include "arrow/util/macros.h"
+#include "arrow/util/type_traits.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+enum class DecimalStatus {
+  kSuccess,
+  kDivideByZero,
+  kOverflow,
+  kRescaleDataLoss,
+};
+
+/// Represents a signed 128-bit integer in two's complement.
+///
+/// This class is also compiled into LLVM IR - so, it should not have cpp references like
+/// streams and boost.
+class ARROW_EXPORT BasicDecimal128 {
+ public:
+  /// \brief Create a BasicDecimal128 from the two's complement representation.
+  constexpr BasicDecimal128(int64_t high, uint64_t low) noexcept
+      : low_bits_(low), high_bits_(high) {}
+
+  /// \brief Empty constructor creates a BasicDecimal128 with a value of 0.
+  constexpr BasicDecimal128() noexcept : BasicDecimal128(0, 0) {}
+
+  /// \brief Convert any integer value into a BasicDecimal128.
+  template <typename T,
+            typename = typename std::enable_if<std::is_integral<T>::value, T>::type>
+  constexpr BasicDecimal128(T value) noexcept
+      : BasicDecimal128(static_cast<int64_t>(value) >= 0 ? 0 : -1,
+                        static_cast<uint64_t>(value)) {}
+
+  /// \brief Create a BasicDecimal128 from an array of bytes. Bytes are assumed to be in
+  /// little-endian byte order.
+  explicit BasicDecimal128(const uint8_t* bytes);
+
+  /// \brief Negate the current value (in-place)
+  BasicDecimal128& Negate();
+
+  /// \brief Absolute value (in-place)
+  BasicDecimal128& Abs();
+
+  /// \brief Add a number to this one. The result is truncated to 128 bits.
+  BasicDecimal128& operator+=(const BasicDecimal128& right);
+
+  /// \brief Subtract a number from this one. The result is truncated to 128 bits.
+  BasicDecimal128& operator-=(const BasicDecimal128& right);
+
+  /// \brief Multiply this number by another number. The result is truncated to 128 bits.
+  BasicDecimal128& operator*=(const BasicDecimal128& right);
+
+  /// Divide this number by right and return the result.
+  ///
+  /// This operation is not destructive.
+  /// The answer rounds to zero. Signs work like:
+  ///   21 /  5 ->  4,  1
+  ///  -21 /  5 -> -4, -1
+  ///   21 / -5 -> -4,  1
+  ///  -21 / -5 ->  4, -1
+  /// \param[in] divisor the number to divide by
+  /// \param[out] result the quotient
+  /// \param[out] remainder the remainder after the division
+  DecimalStatus Divide(const BasicDecimal128& divisor, BasicDecimal128* result,
+                       BasicDecimal128* remainder) const;
+
+  /// \brief In-place division.
+  BasicDecimal128& operator/=(const BasicDecimal128& right);
+
+  /// \brief Bitwise "or" between two BasicDecimal128.
+  BasicDecimal128& operator|=(const BasicDecimal128& right);
+
+  /// \brief Bitwise "and" between two BasicDecimal128.
+  BasicDecimal128& operator&=(const BasicDecimal128& right);
+
+  /// \brief Shift left by the given number of bits.
+  BasicDecimal128& operator<<=(uint32_t bits);
+
+  /// \brief Shift right by the given number of bits. Negative values will
+  BasicDecimal128& operator>>=(uint32_t bits);
+
+  /// \brief Get the high bits of the two's complement representation of the number.
+  inline int64_t high_bits() const { return high_bits_; }
+
+  /// \brief Get the low bits of the two's complement representation of the number.
+  inline uint64_t low_bits() const { return low_bits_; }
+
+  /// \brief Return the raw bytes of the value in little-endian byte order.
+  std::array<uint8_t, 16> ToBytes() const;
+  void ToBytes(uint8_t* out) const;
+
+  /// \brief seperate the integer and fractional parts for the given scale.
+  void GetWholeAndFraction(int32_t scale, BasicDecimal128* whole,
+                           BasicDecimal128* fraction) const;
+
+  /// \brief Scale multiplier for given scale value.
+  static const BasicDecimal128& GetScaleMultiplier(int32_t scale);
+
+  /// \brief Convert BasicDecimal128 from one scale to another
+  DecimalStatus Rescale(int32_t original_scale, int32_t new_scale,
+                        BasicDecimal128* out) const;
+
+  /// \brief Scale up.
+  BasicDecimal128 IncreaseScaleBy(int32_t increase_by) const;
+
+  /// \brief Scale down.
+  /// - If 'round' is true, the right-most digits are dropped and the result value is
+  ///   rounded up (+1 for +ve, -1 for -ve) based on the value of the dropped digits
+  ///   (>= 10^reduce_by / 2).
+  /// - If 'round' is false, the right-most digits are simply dropped.
+  BasicDecimal128 ReduceScaleBy(int32_t reduce_by, bool round = true) const;
+
+  /// \brief count the number of leading binary zeroes.
+  int32_t CountLeadingBinaryZeros() const;
+
+ private:
+  uint64_t low_bits_;
+  int64_t high_bits_;
+};
+
+ARROW_EXPORT bool operator==(const BasicDecimal128& left, const BasicDecimal128& right);
+ARROW_EXPORT bool operator!=(const BasicDecimal128& left, const BasicDecimal128& right);
+ARROW_EXPORT bool operator<(const BasicDecimal128& left, const BasicDecimal128& right);
+ARROW_EXPORT bool operator<=(const BasicDecimal128& left, const BasicDecimal128& right);
+ARROW_EXPORT bool operator>(const BasicDecimal128& left, const BasicDecimal128& right);
+ARROW_EXPORT bool operator>=(const BasicDecimal128& left, const BasicDecimal128& right);
+
+ARROW_EXPORT BasicDecimal128 operator-(const BasicDecimal128& operand);
+ARROW_EXPORT BasicDecimal128 operator~(const BasicDecimal128& operand);
+ARROW_EXPORT BasicDecimal128 operator+(const BasicDecimal128& left,
+                                       const BasicDecimal128& right);
+ARROW_EXPORT BasicDecimal128 operator-(const BasicDecimal128& left,
+                                       const BasicDecimal128& right);
+ARROW_EXPORT BasicDecimal128 operator*(const BasicDecimal128& left,
+                                       const BasicDecimal128& right);
+ARROW_EXPORT BasicDecimal128 operator/(const BasicDecimal128& left,
+                                       const BasicDecimal128& right);
+ARROW_EXPORT BasicDecimal128 operator%(const BasicDecimal128& left,
+                                       const BasicDecimal128& right);
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/bit-stream-utils.h b/cpp/src/arrow/util/bit-stream-utils.h
index ff215e488b4a3..ad86ee87c9fda 100644
--- a/cpp/src/arrow/util/bit-stream-utils.h
+++ b/cpp/src/arrow/util/bit-stream-utils.h
@@ -110,7 +110,12 @@ class BitReader {
     memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes);
   }
 
-  BitReader() : buffer_(NULL), max_bytes_(0) {}
+  BitReader()
+      : buffer_(NULL),
+        max_bytes_(0),
+        buffered_values_(0),
+        byte_offset_(0),
+        bit_offset_(0) {}
 
   void Reset(const uint8_t* buffer, int buffer_len) {
     buffer_ = buffer;
@@ -392,7 +397,8 @@ inline bool BitReader::GetVlqInt(int32_t* v) {
 }
 
 inline bool BitWriter::PutZigZagVlqInt(int32_t v) {
-  uint32_t u = (v << 1) ^ (v >> 31);
+  // Note negative left shift is undefined
+  uint32_t u = (static_cast<uint32_t>(v) << 1) ^ (v >> 31);
   return PutVlqInt(u);
 }
 
diff --git a/cpp/src/arrow/util/bit-util-benchmark.cc b/cpp/src/arrow/util/bit-util-benchmark.cc
index beb48df278acc..00093a2cf7b59 100644
--- a/cpp/src/arrow/util/bit-util-benchmark.cc
+++ b/cpp/src/arrow/util/bit-util-benchmark.cc
@@ -39,11 +39,7 @@ class NaiveBitmapReader {
   NaiveBitmapReader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
       : bitmap_(bitmap), position_(0) {}
 
-  bool IsSet() const {
-    const int64_t byte_offset = position_ / 8;
-    const int64_t bit_offset = position_ % 8;
-    return (bitmap_[byte_offset] & (1 << bit_offset)) == 0;
-  }
+  bool IsSet() const { return BitUtil::GetBit(bitmap_, position_); }
 
   bool IsNotSet() const { return !IsSet(); }
 
@@ -51,7 +47,7 @@ class NaiveBitmapReader {
 
  private:
   const uint8_t* bitmap_;
-  int64_t position_;
+  uint64_t position_;
 };
 
 // A naive bitmap writer implementation, meant as a baseline against
@@ -65,13 +61,15 @@ class NaiveBitmapWriter {
   void Set() {
     const int64_t byte_offset = position_ / 8;
     const int64_t bit_offset = position_ % 8;
-    bitmap_[byte_offset] |= static_cast<uint8_t>(1 << bit_offset);
+    auto bit_set_mask = (1U << bit_offset);
+    bitmap_[byte_offset] = static_cast<uint8_t>(bitmap_[byte_offset] | bit_set_mask);
   }
 
   void Clear() {
     const int64_t byte_offset = position_ / 8;
     const int64_t bit_offset = position_ % 8;
-    bitmap_[byte_offset] &= 0xFF ^ static_cast<uint8_t>(1 << bit_offset);
+    auto bit_clear_mask = 0xFFU ^ (1U << bit_offset);
+    bitmap_[byte_offset] = static_cast<uint8_t>(bitmap_[byte_offset] & bit_clear_mask);
   }
 
   void Next() { ++position_; }
@@ -100,7 +98,7 @@ static void BenchmarkBitmapReader(benchmark::State& state, int64_t nbytes) {
   const int64_t num_bits = nbytes * 8;
   const uint8_t* bitmap = buffer->data();
 
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     {
       BitmapReaderType reader(bitmap, 0, num_bits);
       int64_t total = 0;
@@ -240,11 +238,11 @@ BENCHMARK(BM_CopyBitmap)
     ->Unit(benchmark::kMicrosecond);
 
 BENCHMARK(BM_NaiveBitmapReader)
-    ->Args({100000})
-    ->MinTime(1.0)
+    ->Args({1000000})
+    ->MinTime(5.0)
     ->Unit(benchmark::kMicrosecond);
 
-BENCHMARK(BM_BitmapReader)->Args({100000})->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(BM_BitmapReader)->Args({1000000})->MinTime(5.0)->Unit(benchmark::kMicrosecond);
 
 BENCHMARK(BM_NaiveBitmapWriter)
     ->Args({100000})
diff --git a/cpp/src/arrow/util/bit-util-test.cc b/cpp/src/arrow/util/bit-util-test.cc
index 5f181e9b7b14c..6bcb6ea59266b 100644
--- a/cpp/src/arrow/util/bit-util-test.cc
+++ b/cpp/src/arrow/util/bit-util-test.cc
@@ -21,7 +21,6 @@
 #include <functional>
 #include <limits>
 #include <memory>
-#include <valarray>
 #include <vector>
 
 #include <gtest/gtest.h>
@@ -167,33 +166,40 @@ TEST(BitmapReader, DoesNotReadOutOfBounds) {
 }
 
 TEST(BitmapWriter, NormalOperation) {
-  {
-    uint8_t bitmap[] = {0, 0, 0, 0};
-    auto writer = internal::BitmapWriter(bitmap, 0, 12);
-    WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1});
-    //                      {0b00110110, 0b1010, 0, 0}
-    ASSERT_BYTES_EQ(bitmap, {0x36, 0x0a, 0, 0});
-  }
-  {
-    uint8_t bitmap[] = {0xff, 0xff, 0xff, 0xff};
-    auto writer = internal::BitmapWriter(bitmap, 0, 12);
-    WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1});
-    //                      {0b00110110, 0b11111010, 0xff, 0xff}
-    ASSERT_BYTES_EQ(bitmap, {0x36, 0xfa, 0xff, 0xff});
-  }
-  {
-    uint8_t bitmap[] = {0, 0, 0, 0};
-    auto writer = internal::BitmapWriter(bitmap, 3, 12);
-    WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1});
-    //                      {0b10110000, 0b01010001, 0, 0}
-    ASSERT_BYTES_EQ(bitmap, {0xb0, 0x51, 0, 0});
-  }
-  {
-    uint8_t bitmap[] = {0, 0, 0, 0};
-    auto writer = internal::BitmapWriter(bitmap, 20, 12);
-    WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1});
-    //                      {0, 0, 0b01100000, 0b10100011}
-    ASSERT_BYTES_EQ(bitmap, {0, 0, 0x60, 0xa3});
+  for (const auto fill_byte_int : {0x00, 0xff}) {
+    const uint8_t fill_byte = static_cast<uint8_t>(fill_byte_int);
+    {
+      uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
+      auto writer = internal::BitmapWriter(bitmap, 0, 12);
+      WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1});
+      //                      {0b00110110, 0b....1010, ........, ........}
+      ASSERT_BYTES_EQ(bitmap, {0x36, static_cast<uint8_t>(0x0a | (fill_byte & 0xf0)),
+                               fill_byte, fill_byte});
+    }
+    {
+      uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
+      auto writer = internal::BitmapWriter(bitmap, 3, 12);
+      WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1});
+      //                      {0b10110..., 0b.1010001, ........, ........}
+      ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>(0xb0 | (fill_byte & 0x07)),
+                               static_cast<uint8_t>(0x51 | (fill_byte & 0x80)), fill_byte,
+                               fill_byte});
+    }
+    {
+      uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
+      auto writer = internal::BitmapWriter(bitmap, 20, 12);
+      WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1});
+      //                      {........, ........, 0b0110...., 0b10100011}
+      ASSERT_BYTES_EQ(bitmap, {fill_byte, fill_byte,
+                               static_cast<uint8_t>(0x60 | (fill_byte & 0x0f)), 0xa3});
+    }
+    // 0-length writes
+    for (int64_t pos = 0; pos < 32; ++pos) {
+      uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
+      auto writer = internal::BitmapWriter(bitmap, pos, 0);
+      WriteVectorToWriter(writer, {});
+      ASSERT_BYTES_EQ(bitmap, {fill_byte, fill_byte, fill_byte, fill_byte});
+    }
   }
 }
 
@@ -266,6 +272,10 @@ TEST(FirstTimeBitmapWriter, NormalOperation) {
     }
     {
       uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
+      {
+        auto writer = internal::FirstTimeBitmapWriter(bitmap, 4, 0);
+        WriteVectorToWriter(writer, {});
+      }
       {
         auto writer = internal::FirstTimeBitmapWriter(bitmap, 4, 6);
         WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1});
@@ -274,6 +284,10 @@ TEST(FirstTimeBitmapWriter, NormalOperation) {
         auto writer = internal::FirstTimeBitmapWriter(bitmap, 10, 3);
         WriteVectorToWriter(writer, {0, 0, 0});
       }
+      {
+        auto writer = internal::FirstTimeBitmapWriter(bitmap, 13, 0);
+        WriteVectorToWriter(writer, {});
+      }
       {
         auto writer = internal::FirstTimeBitmapWriter(bitmap, 13, 3);
         WriteVectorToWriter(writer, {1, 0, 1});
@@ -319,8 +333,8 @@ TYPED_TEST(TestGenerateBits, NormalOperation) {
   for (const int64_t start_offset : start_offsets) {
     for (const int64_t length : lengths) {
       for (const uint8_t fill_byte : fill_bytes) {
-        uint8_t bitmap[kSourceSize];
-        memset(bitmap, fill_byte, kSourceSize);
+        uint8_t bitmap[kSourceSize + 1];
+        memset(bitmap, fill_byte, kSourceSize + 1);
         // First call GenerateBits
         {
           int64_t ncalled = 0;
@@ -344,7 +358,7 @@ TYPED_TEST(TestGenerateBits, NormalOperation) {
             result_reader.Next();
           }
         }
-        // Check bits preceding and following generated contents weren't clobbered
+        // Check bits preceding generated contents weren't clobbered
         {
           internal::BitmapReader reader_before(bitmap, 0, start_offset);
           for (int64_t i = 0; i < start_offset; ++i) {
@@ -352,6 +366,9 @@ TYPED_TEST(TestGenerateBits, NormalOperation) {
                 << "mismatch at preceding bit #" << start_offset - i;
           }
         }
+        // Check the byte following generated contents wasn't clobbered
+        auto byte_after = bitmap[BitUtil::CeilDiv(start_offset + length, 8)];
+        ASSERT_EQ(byte_after, fill_byte);
       }
     }
   }
@@ -499,6 +516,43 @@ TEST(BitUtilTests, TestCountSetBits) {
   }
 }
 
+TEST(BitUtilTests, TestSetBitsTo) {
+  using BitUtil::SetBitsTo;
+  for (const auto fill_byte_int : {0x00, 0xff}) {
+    const uint8_t fill_byte = static_cast<uint8_t>(fill_byte_int);
+    {
+      // test set within a byte
+      uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
+      SetBitsTo(bitmap, 2, 2, true);
+      SetBitsTo(bitmap, 4, 2, false);
+      ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>((fill_byte & ~0x3C) | 0xC)});
+    }
+    {
+      // test straddling a single byte boundary
+      uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
+      SetBitsTo(bitmap, 4, 7, true);
+      SetBitsTo(bitmap, 11, 7, false);
+      ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>((fill_byte & 0xF) | 0xF0), 0x7,
+                               static_cast<uint8_t>(fill_byte & ~0x3)});
+    }
+    {
+      // test byte aligned end
+      uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
+      SetBitsTo(bitmap, 4, 4, true);
+      SetBitsTo(bitmap, 8, 8, false);
+      ASSERT_BYTES_EQ(bitmap,
+                      {static_cast<uint8_t>((fill_byte & 0xF) | 0xF0), 0x00, fill_byte});
+    }
+    {
+      // test byte aligned end, multiple bytes
+      uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
+      SetBitsTo(bitmap, 0, 24, false);
+      uint8_t false_byte = static_cast<uint8_t>(0);
+      ASSERT_BYTES_EQ(bitmap, {false_byte, false_byte, false_byte, fill_byte});
+    }
+  }
+}
+
 TEST(BitUtilTests, TestCopyBitmap) {
   const int kBufferSize = 1000;
 
@@ -734,6 +788,30 @@ TEST(BitUtil, CountLeadingZeros) {
   EXPECT_EQ(BitUtil::CountLeadingZeros(U64(ULLONG_MAX)), 0);
 }
 
+TEST(BitUtil, CountTrailingZeros) {
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U32(0)), 32);
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U32(1) << 31), 31);
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U32(1) << 30), 30);
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U32(1) << 29), 29);
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U32(1) << 28), 28);
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U32(8)), 3);
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U32(4)), 2);
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U32(2)), 1);
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U32(1)), 0);
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U32(ULONG_MAX)), 0);
+
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U64(0)), 64);
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U64(1) << 63), 63);
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U64(1) << 62), 62);
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U64(1) << 61), 61);
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U64(1) << 60), 60);
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U64(8)), 3);
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U64(4)), 2);
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U64(2)), 1);
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U64(1)), 0);
+  EXPECT_EQ(BitUtil::CountTrailingZeros(U64(ULLONG_MAX)), 0);
+}
+
 #undef U32
 #undef U64
 
@@ -756,7 +834,9 @@ static void TestZigZag(int32_t v) {
 TEST(BitStreamUtil, ZigZag) {
   TestZigZag(0);
   TestZigZag(1);
+  TestZigZag(1234);
   TestZigZag(-1);
+  TestZigZag(-1234);
   TestZigZag(std::numeric_limits<int32_t>::max());
   TestZigZag(-std::numeric_limits<int32_t>::max());
 }
diff --git a/cpp/src/arrow/util/bit-util.cc b/cpp/src/arrow/util/bit-util.cc
index 7b7a7261fad13..862b1fd050bc5 100644
--- a/cpp/src/arrow/util/bit-util.cc
+++ b/cpp/src/arrow/util/bit-util.cc
@@ -196,8 +196,8 @@ Status TransferBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset,
 
   TransferBitmap<invert_bits, false>(data, offset, length, 0, dest);
 
-  // As we have freshly allocated this bitmap, we should take care of zeroing the remaing
-  // bits.
+  // As we have freshly allocated this bitmap, we should take care of zeroing the
+  // remaining bits.
   int64_t num_bytes = BitUtil::BytesForBits(length);
   int64_t bits_to_zero = num_bytes * 8 - length;
   for (int64_t i = length; i < length + bits_to_zero; ++i) {
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index cd3d5b0c58ff8..bfdb44f255c53 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -45,6 +45,7 @@
 #if defined(_MSC_VER)
 #include <intrin.h>
 #pragma intrinsic(_BitScanReverse)
+#pragma intrinsic(_BitScanForward)
 #define ARROW_BYTE_SWAP64 _byteswap_uint64
 #define ARROW_BYTE_SWAP32 _byteswap_ulong
 #else
@@ -53,6 +54,7 @@
 #endif
 
 #include <cstdint>
+#include <cstring>
 #include <limits>
 #include <memory>
 #include <type_traits>
@@ -84,11 +86,11 @@ namespace BitUtil {
 //
 
 // Returns the ceil of value/divisor
-static inline int64_t CeilDiv(int64_t value, int64_t divisor) {
+constexpr int64_t CeilDiv(int64_t value, int64_t divisor) {
   return value / divisor + (value % divisor != 0);
 }
 
-static inline int64_t BytesForBits(int64_t bits) { return (bits + 7) >> 3; }
+constexpr int64_t BytesForBits(int64_t bits) { return (bits + 7) >> 3; }
 
 // Returns the smallest power of two that contains v.  If v is already a
 // power of two, it is returned as is.
@@ -106,12 +108,12 @@ static inline int64_t NextPower2(int64_t n) {
   return n;
 }
 
-static inline bool IsMultipleOf64(int64_t n) { return (n & 63) == 0; }
+constexpr bool IsMultipleOf64(int64_t n) { return (n & 63) == 0; }
 
-static inline bool IsMultipleOf8(int64_t n) { return (n & 7) == 0; }
+constexpr bool IsMultipleOf8(int64_t n) { return (n & 7) == 0; }
 
 // Returns 'value' rounded up to the nearest multiple of 'factor'
-static inline int64_t RoundUp(int64_t value, int64_t factor) {
+constexpr int64_t RoundUp(int64_t value, int64_t factor) {
   return (value + (factor - 1)) / factor * factor;
 }
 
@@ -119,16 +121,14 @@ static inline int64_t RoundUp(int64_t value, int64_t factor) {
 // is a power of two.
 // The result is undefined on overflow, i.e. if `value > 2**64 - factor`,
 // since we cannot return the correct result which would be 2**64.
-static inline int64_t RoundUpToPowerOf2(int64_t value, int64_t factor) {
+constexpr int64_t RoundUpToPowerOf2(int64_t value, int64_t factor) {
   // DCHECK((factor > 0) && ((factor & (factor - 1)) == 0));
   return (value + (factor - 1)) & ~(factor - 1);
 }
 
-static inline int64_t RoundUpToMultipleOf8(int64_t num) {
-  return RoundUpToPowerOf2(num, 8);
-}
+constexpr int64_t RoundUpToMultipleOf8(int64_t num) { return RoundUpToPowerOf2(num, 8); }
 
-static inline int64_t RoundUpToMultipleOf64(int64_t num) {
+constexpr int64_t RoundUpToMultipleOf64(int64_t num) {
   return RoundUpToPowerOf2(num, 64);
 }
 
@@ -183,6 +183,56 @@ static inline int CountLeadingZeros(uint64_t value) {
 #endif
 }
 
+static inline int CountTrailingZeros(uint32_t value) {
+#if defined(__clang__) || defined(__GNUC__)
+  if (value == 0) return 32;
+  return static_cast<int>(__builtin_ctzl(value));
+#elif defined(_MSC_VER)
+  unsigned long index;  // NOLINT
+  if (_BitScanForward(&index, value)) {
+    return static_cast<int>(index);
+  } else {
+    return 32;
+  }
+#else
+  int bitpos = 0;
+  if (value) {
+    while (value & 1 == 0) {
+      value >>= 1;
+      ++bitpos;
+    }
+  } else {
+    bitpos = 32;
+  }
+  return bitpos;
+#endif
+}
+
+static inline int CountTrailingZeros(uint64_t value) {
+#if defined(__clang__) || defined(__GNUC__)
+  if (value == 0) return 64;
+  return static_cast<int>(__builtin_ctzll(value));
+#elif defined(_MSC_VER)
+  unsigned long index;  // NOLINT
+  if (_BitScanForward64(&index, value)) {
+    return static_cast<int>(index);
+  } else {
+    return 64;
+  }
+#else
+  int bitpos = 0;
+  if (value) {
+    while (value & 1 == 0) {
+      value >>= 1;
+      ++bitpos;
+    }
+  } else {
+    bitpos = 64;
+  }
+  return bitpos;
+#endif
+}
+
 // Returns the minimum number of bits needed to represent an unsigned value
 static inline int NumRequiredBits(uint64_t x) { return 64 - CountLeadingZeros(x); }
 
@@ -310,8 +360,8 @@ static constexpr uint8_t kPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127};
 // the bitwise complement version of kPrecedingBitmask
 static constexpr uint8_t kTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128};
 
-static inline bool GetBit(const uint8_t* bits, int64_t i) {
-  return (bits[i / 8] & kBitmask[i % 8]) != 0;
+static inline bool GetBit(const uint8_t* bits, uint64_t i) {
+  return (bits[i >> 3] >> (i & 0x07)) & 1;
 }
 
 static inline void ClearBit(uint8_t* bits, int64_t i) {
@@ -329,6 +379,48 @@ static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) {
                  kBitmask[i % 8];
 }
 
+/// \brief set or clear a range of bits quickly
+static inline void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length,
+                             bool bits_are_set) {
+  if (length == 0) return;
+
+  const auto i_begin = start_offset;
+  const auto i_end = start_offset + length;
+  const uint8_t fill_byte = static_cast<uint8_t>(-static_cast<uint8_t>(bits_are_set));
+
+  const auto bytes_begin = i_begin / 8;
+  const auto bytes_end = i_end / 8 + 1;
+
+  const auto first_byte_mask = kPrecedingBitmask[i_begin % 8];
+  const auto last_byte_mask = kTrailingBitmask[i_end % 8];
+
+  if (bytes_end == bytes_begin + 1) {
+    // set bits within a single byte
+    const auto only_byte_mask =
+        i_end % 8 == 0 ? first_byte_mask
+                       : static_cast<uint8_t>(first_byte_mask | last_byte_mask);
+    bits[bytes_begin] &= only_byte_mask;
+    bits[bytes_begin] |= static_cast<uint8_t>(fill_byte & ~only_byte_mask);
+    return;
+  }
+
+  // set/clear trailing bits of first byte
+  bits[bytes_begin] &= first_byte_mask;
+  bits[bytes_begin] |= static_cast<uint8_t>(fill_byte & ~first_byte_mask);
+
+  if (bytes_end - bytes_begin > 2) {
+    // set/clear whole bytes
+    std::memset(bits + bytes_begin + 1, fill_byte,
+                static_cast<size_t>(bytes_end - bytes_begin - 2));
+  }
+
+  if (i_end % 8 == 0) return;
+
+  // set/clear leading bits of last byte
+  bits[bytes_end - 1] &= last_byte_mask;
+  bits[bytes_end - 1] |= static_cast<uint8_t>(fill_byte & ~last_byte_mask);
+}
+
 /// \brief Convert vector of bytes to bitmap buffer
 ARROW_EXPORT
 Status BytesToBits(const std::vector<uint8_t>&, MemoryPool*, std::shared_ptr<Buffer>*);
@@ -409,7 +501,7 @@ class BitmapWriter {
 
   void Finish() {
     // Store current byte if we didn't went past bitmap storage
-    if (bit_mask_ != 0x01 || position_ < length_) {
+    if (length_ > 0 && (bit_mask_ != 0x01 || position_ < length_)) {
       bitmap_[byte_offset_] = current_byte_;
     }
   }
@@ -461,7 +553,7 @@ class FirstTimeBitmapWriter {
 
   void Finish() {
     // Store current byte if we didn't went past bitmap storage
-    if (bit_mask_ != 0x01 || position_ < length_) {
+    if (length_ > 0 && (bit_mask_ != 0x01 || position_ < length_)) {
       bitmap_[byte_offset_] = current_byte_;
     }
   }
@@ -578,8 +670,8 @@ Status CopyBitmap(MemoryPool* pool, const uint8_t* bitmap, int64_t offset, int64
 /// \param[in] offset bit offset into the source data
 /// \param[in] length number of bits to copy
 /// \param[in] dest_offset bit offset into the destination
-/// \param[out] dest the destination buffer, must have at least space for (offset +
-/// length) bits
+/// \param[out] dest the destination buffer, must have at least space for
+/// (offset + length) bits
 ARROW_EXPORT
 void CopyBitmap(const uint8_t* bitmap, int64_t offset, int64_t length, uint8_t* dest,
                 int64_t dest_offset);
@@ -590,8 +682,8 @@ void CopyBitmap(const uint8_t* bitmap, int64_t offset, int64_t length, uint8_t*
 /// \param[in] offset bit offset into the source data
 /// \param[in] length number of bits to copy
 /// \param[in] dest_offset bit offset into the destination
-/// \param[out] dest the destination buffer, must have at least space for (offset +
-/// length) bits
+/// \param[out] dest the destination buffer, must have at least space for
+/// (offset + length) bits
 ARROW_EXPORT
 void InvertBitmap(const uint8_t* bitmap, int64_t offset, int64_t length, uint8_t* dest,
                   int64_t dest_offset);
@@ -613,7 +705,8 @@ Status InvertBitmap(MemoryPool* pool, const uint8_t* bitmap, int64_t offset,
 ///
 /// \param[in] data a packed LSB-ordered bitmap as a byte array
 /// \param[in] bit_offset a bitwise offset into the bitmap
-/// \param[in] length the number of bits to inspect in the bitmap relative to the offset
+/// \param[in] length the number of bits to inspect in the bitmap relative to
+/// the offset
 ///
 /// \return The number of set (1) bits in the range
 ARROW_EXPORT
diff --git a/cpp/src/arrow/util/compression-test.cc b/cpp/src/arrow/util/compression-test.cc
index e0e6f4837f201..22bec001bfd45 100644
--- a/cpp/src/arrow/util/compression-test.cc
+++ b/cpp/src/arrow/util/compression-test.cc
@@ -448,17 +448,22 @@ TEST_P(CodecTest, StreamingRoundtrip) {
 
 INSTANTIATE_TEST_CASE_P(TestGZip, CodecTest, ::testing::Values(Compression::GZIP));
 
-INSTANTIATE_TEST_CASE_P(TestZSTD, CodecTest, ::testing::Values(Compression::ZSTD));
-
 INSTANTIATE_TEST_CASE_P(TestSnappy, CodecTest, ::testing::Values(Compression::SNAPPY));
 
 INSTANTIATE_TEST_CASE_P(TestLZ4, CodecTest, ::testing::Values(Compression::LZ4));
 
 INSTANTIATE_TEST_CASE_P(TestBrotli, CodecTest, ::testing::Values(Compression::BROTLI));
 
+// bz2 requires a binary installation, there is no ExternalProject
 #if ARROW_WITH_BZ2
 INSTANTIATE_TEST_CASE_P(TestBZ2, CodecTest, ::testing::Values(Compression::BZ2));
 #endif
 
+// The ExternalProject for zstd does not build on CMake < 3.7, so we do not
+// require it here
+#ifdef ARROW_WITH_ZSTD
+INSTANTIATE_TEST_CASE_P(TestZSTD, CodecTest, ::testing::Values(Compression::ZSTD));
+#endif
+
 }  // namespace util
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/compression_brotli.cc b/cpp/src/arrow/util/compression_brotli.cc
index 89d099d6a6067..3d75253e11d9f 100644
--- a/cpp/src/arrow/util/compression_brotli.cc
+++ b/cpp/src/arrow/util/compression_brotli.cc
@@ -81,9 +81,7 @@ class BrotliDecompressor : public Decompressor {
   Status BrotliError(const char* msg) { return Status::IOError(msg); }
 
   Status BrotliError(BrotliDecoderErrorCode code, const char* prefix_msg) {
-    std::stringstream ss;
-    ss << prefix_msg << BrotliDecoderErrorString(code);
-    return Status::IOError(ss.str());
+    return Status::IOError(prefix_msg, BrotliDecoderErrorString(code));
   }
 
   BrotliDecoderState* state_ = nullptr;
diff --git a/cpp/src/arrow/util/compression_lz4.cc b/cpp/src/arrow/util/compression_lz4.cc
index 0acd54d057218..d157ba6176054 100644
--- a/cpp/src/arrow/util/compression_lz4.cc
+++ b/cpp/src/arrow/util/compression_lz4.cc
@@ -18,6 +18,7 @@
 #include "arrow/util/compression_lz4.h"
 
 #include <cstdint>
+#include <cstring>
 #include <sstream>
 
 #include <lz4.h>
@@ -30,6 +31,10 @@
 namespace arrow {
 namespace util {
 
+static Status LZ4Error(LZ4F_errorCode_t ret, const char* prefix_msg) {
+  return Status::IOError(prefix_msg, LZ4F_getErrorName(ret));
+}
+
 // ----------------------------------------------------------------------
 // Lz4 decompressor implementation
 
@@ -78,12 +83,6 @@ class LZ4Decompressor : public Decompressor {
   bool IsFinished() override { return finished_; }
 
  protected:
-  Status LZ4Error(LZ4F_errorCode_t ret, const char* prefix_msg) {
-    std::stringstream ss;
-    ss << prefix_msg << LZ4F_getErrorName(ret);
-    return Status::IOError(ss.str());
-  }
-
   LZ4F_dctx* ctx_ = nullptr;
   bool finished_;
 };
@@ -124,12 +123,6 @@ class LZ4Compressor : public Compressor {
              bool* should_retry) override;
 
  protected:
-  Status LZ4Error(LZ4F_errorCode_t ret, const char* prefix_msg) {
-    std::stringstream ss;
-    ss << prefix_msg << LZ4F_getErrorName(ret);
-    return Status::IOError(ss.str());
-  }
-
   LZ4F_cctx* ctx_ = nullptr;
   LZ4F_preferences_t prefs_;
   bool first_time_;
diff --git a/cpp/src/arrow/util/compression_snappy.cc b/cpp/src/arrow/util/compression_snappy.cc
index 1b483e5855209..058593fe13d4e 100644
--- a/cpp/src/arrow/util/compression_snappy.cc
+++ b/cpp/src/arrow/util/compression_snappy.cc
@@ -57,10 +57,8 @@ Status SnappyCodec::Decompress(int64_t input_len, const uint8_t* input,
     return Status::IOError("Corrupt snappy compressed data.");
   }
   if (output_buffer_len < static_cast<int64_t>(decompressed_size)) {
-    std::stringstream ss;
-    ss << "Output buffer size (" << output_buffer_len << ") must be " << decompressed_size
-       << " or larger.";
-    return Status::Invalid(ss.str());
+    return Status::Invalid("Output buffer size (", output_buffer_len, ") must be ",
+                           decompressed_size, " or larger.");
   }
   if (output_len) {
     *output_len = static_cast<int64_t>(decompressed_size);
diff --git a/cpp/src/arrow/util/compression_zlib.cc b/cpp/src/arrow/util/compression_zlib.cc
index 686dffa640940..736b0ab4f1524 100644
--- a/cpp/src/arrow/util/compression_zlib.cc
+++ b/cpp/src/arrow/util/compression_zlib.cc
@@ -76,12 +76,16 @@ static int DecompressionWindowBitsForFormat(GZipCodec::Format format) {
   }
 }
 
+static Status ZlibErrorPrefix(const char* prefix_msg, const char* msg) {
+  return Status::IOError(prefix_msg, (msg) ? msg : "(unknown error)");
+}
+
 // ----------------------------------------------------------------------
 // gzip decompressor implementation
 
 class GZipDecompressor : public Decompressor {
  public:
-  GZipDecompressor() : initialized_(false) {}
+  GZipDecompressor() : initialized_(false), finished_(false) {}
 
   ~GZipDecompressor() override {
     if (initialized_) {
@@ -142,14 +146,7 @@ class GZipDecompressor : public Decompressor {
 
  protected:
   Status ZlibError(const char* prefix_msg) {
-    std::stringstream ss;
-    ss << prefix_msg;
-    if (stream_.msg && *stream_.msg) {
-      ss << stream_.msg;
-    } else {
-      ss << "(unknown error)";
-    }
-    return Status::IOError(ss.str());
+    return ZlibErrorPrefix(prefix_msg, stream_.msg);
   }
 
   z_stream stream_;
@@ -197,14 +194,7 @@ class GZipCompressor : public Compressor {
 
  protected:
   Status ZlibError(const char* prefix_msg) {
-    std::stringstream ss;
-    ss << prefix_msg;
-    if (stream_.msg && *stream_.msg) {
-      ss << stream_.msg;
-    } else {
-      ss << "(unknown error)";
-    }
-    return Status::IOError(ss.str());
+    return ZlibErrorPrefix(prefix_msg, stream_.msg);
   }
 
   z_stream stream_;
@@ -344,9 +334,7 @@ class GZipCodec::GZipCodecImpl {
     int window_bits = CompressionWindowBitsForFormat(format_);
     if ((ret = deflateInit2(&stream_, Z_DEFAULT_COMPRESSION, Z_DEFLATED, window_bits,
                             kGZipDefaultCompressionLevel, Z_DEFAULT_STRATEGY)) != Z_OK) {
-      std::stringstream ss;
-      ss << "zlib deflateInit failed: " << std::string(stream_.msg);
-      return Status::IOError(ss.str());
+      return ZlibErrorPrefix("zlib deflateInit failed: ", stream_.msg);
     }
     compressor_initialized_ = true;
     return Status::OK();
@@ -367,9 +355,7 @@ class GZipCodec::GZipCodecImpl {
     // Initialize to run either deflate or zlib/gzip format
     int window_bits = DecompressionWindowBitsForFormat(format_);
     if ((ret = inflateInit2(&stream_, window_bits)) != Z_OK) {
-      std::stringstream ss;
-      ss << "zlib inflateInit failed: " << std::string(stream_.msg);
-      return Status::IOError(ss.str());
+      return ZlibErrorPrefix("zlib inflateInit failed: ", stream_.msg);
     }
     decompressor_initialized_ = true;
     return Status::OK();
@@ -401,9 +387,7 @@ class GZipCodec::GZipCodecImpl {
 
     // Reset the stream for this block
     if (inflateReset(&stream_) != Z_OK) {
-      std::stringstream ss;
-      ss << "zlib inflateReset failed: " << std::string(stream_.msg);
-      return Status::IOError(ss.str());
+      return ZlibErrorPrefix("zlib inflateReset failed: ", stream_.msg);
     }
 
     int ret = 0;
@@ -425,18 +409,13 @@ class GZipCodec::GZipCodecImpl {
       if (ret == Z_STREAM_END || ret != Z_OK) break;
 
       // Failure, buffer was too small
-      std::stringstream ss;
-      ss << "Too small a buffer passed to GZipCodec. InputLength=" << input_length
-         << " OutputLength=" << output_buffer_length;
-      return Status::IOError(ss.str());
+      return Status::IOError("Too small a buffer passed to GZipCodec. InputLength=",
+                             input_length, " OutputLength=", output_buffer_length);
     }
 
     // Failure for some other reason
     if (ret != Z_STREAM_END) {
-      std::stringstream ss;
-      ss << "GZipCodec failed: ";
-      if (stream_.msg != NULL) ss << stream_.msg;
-      return Status::IOError(ss.str());
+      return ZlibErrorPrefix("GZipCodec failed: ", stream_.msg);
     }
 
     if (output_length) {
@@ -475,15 +454,12 @@ class GZipCodec::GZipCodecImpl {
         // small
         return Status::IOError("zlib deflate failed, output buffer too small");
       }
-      std::stringstream ss;
-      ss << "zlib deflate failed: " << stream_.msg;
-      return Status::IOError(ss.str());
+
+      return ZlibErrorPrefix("zlib deflate failed: ", stream_.msg);
     }
 
     if (deflateReset(&stream_) != Z_OK) {
-      std::stringstream ss;
-      ss << "zlib deflateReset failed: " << std::string(stream_.msg);
-      return Status::IOError(ss.str());
+      return ZlibErrorPrefix("zlib deflateReset failed: ", stream_.msg);
     }
 
     // Actual output length
diff --git a/cpp/src/arrow/util/compression_zstd.cc b/cpp/src/arrow/util/compression_zstd.cc
index 083cae99b9730..de9df8fc9492e 100644
--- a/cpp/src/arrow/util/compression_zstd.cc
+++ b/cpp/src/arrow/util/compression_zstd.cc
@@ -36,9 +36,7 @@ namespace util {
 constexpr int kZSTDDefaultCompressionLevel = 1;
 
 static Status ZSTDError(size_t ret, const char* prefix_msg) {
-  std::stringstream ss;
-  ss << prefix_msg << ZSTD_getErrorName(ret);
-  return Status::IOError(ss.str());
+  return Status::IOError(prefix_msg, ZSTD_getErrorName(ret));
 }
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/date.h b/cpp/src/arrow/util/date.h
deleted file mode 100644
index aa7648899b902..0000000000000
--- a/cpp/src/arrow/util/date.h
+++ /dev/null
@@ -1,6540 +0,0 @@
-// Vendored from https://github.com/HowardHinnant/date/
-
-#ifndef DATE_H
-#define DATE_H
-
-// The MIT License (MIT)
-//
-// Copyright (c) 2015, 2016, 2017 Howard Hinnant
-// Copyright (c) 2016 Adrian Colomitchi
-// Copyright (c) 2017 Florian Dang
-// Copyright (c) 2017 Paul Thompson
-// Copyright (c) 2018 Tomasz Kamiński
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-//
-// Our apologies.  When the previous paragraph was written, lowercase had not yet
-// been invented (that would involve another several millennia of evolution).
-// We did not mean to shout.
-
-#ifndef HAS_STRING_VIEW
-#if __cplusplus >= 201703
-#define HAS_STRING_VIEW 1
-#else
-#define HAS_STRING_VIEW 0
-#endif
-#endif  // HAS_STRING_VIEW
-
-#include <algorithm>
-#include <cassert>
-#include <cctype>
-#include <chrono>
-#include <climits>
-#if !(__cplusplus >= 201402)
-#include <cmath>
-#endif
-#include <cstddef>
-#include <cstdint>
-#include <cstdlib>
-#include <ctime>
-#include <ios>
-#include <istream>
-#include <iterator>
-#include <limits>
-#include <locale>
-#include <memory>
-#include <ostream>
-#include <ratio>
-#include <sstream>
-#include <stdexcept>
-#include <string>
-#if HAS_STRING_VIEW
-#include <string_view>
-#endif
-#include <type_traits>
-#include <utility>
-
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wpedantic"
-#if __GNUC__ < 5
-// GCC 4.9 Bug 61489 Wrong warning with -Wmissing-field-initializers
-#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
-#endif
-#endif
-
-namespace date {
-
-//---------------+
-// Configuration |
-//---------------+
-
-#ifndef ONLY_C_LOCALE
-#define ONLY_C_LOCALE 0
-#endif
-
-#if defined(_MSC_VER) && (!defined(__clang__) || (_MSC_VER < 1910))
-// MSVC
-#define _SILENCE_CXX17_UNCAUGHT_EXCEPTION_DEPRECATION_WARNING
-#if _MSC_VER < 1910
-//   before VS2017
-#define CONSTDATA const
-#define CONSTCD11
-#define CONSTCD14
-#define NOEXCEPT _NOEXCEPT
-#else
-//   VS2017 and later
-#define CONSTDATA constexpr const
-#define CONSTCD11 constexpr
-#define CONSTCD14 constexpr
-#define NOEXCEPT noexcept
-#endif
-
-#elif defined(__SUNPRO_CC) && __SUNPRO_CC <= 0x5150
-// Oracle Developer Studio 12.6 and earlier
-#define CONSTDATA constexpr const
-#define CONSTCD11 constexpr
-#define CONSTCD14
-#define NOEXCEPT noexcept
-
-#elif __cplusplus >= 201402
-// C++14
-#define CONSTDATA constexpr const
-#define CONSTCD11 constexpr
-#define CONSTCD14 constexpr
-#define NOEXCEPT noexcept
-#else
-// C++11
-#define CONSTDATA constexpr const
-#define CONSTCD11 constexpr
-#define CONSTCD14
-#define NOEXCEPT noexcept
-#endif
-
-#ifndef HAS_VOID_T
-#if __cplusplus >= 201703
-#define HAS_VOID_T 1
-#else
-#define HAS_VOID_T 0
-#endif
-#endif  // HAS_VOID_T
-
-// Protect from Oracle sun macro
-#ifdef sun
-#undef sun
-#endif
-
-//-----------+
-// Interface |
-//-----------+
-
-// durations
-
-using days = std::chrono::duration<
-    int, std::ratio_multiply<std::ratio<24>, std::chrono::hours::period>>;
-
-using weeks =
-    std::chrono::duration<int, std::ratio_multiply<std::ratio<7>, days::period>>;
-
-using years =
-    std::chrono::duration<int,
-                          std::ratio_multiply<std::ratio<146097, 400>, days::period>>;
-
-using months =
-    std::chrono::duration<int, std::ratio_divide<years::period, std::ratio<12>>>;
-
-// time_point
-
-template <class Duration>
-using sys_time = std::chrono::time_point<std::chrono::system_clock, Duration>;
-
-using sys_days = sys_time<days>;
-using sys_seconds = sys_time<std::chrono::seconds>;
-
-struct local_t {};
-
-template <class Duration>
-using local_time = std::chrono::time_point<local_t, Duration>;
-
-using local_seconds = local_time<std::chrono::seconds>;
-using local_days = local_time<days>;
-
-// types
-
-struct last_spec {
-  last_spec() = default;
-};
-
-class day;
-class month;
-class year;
-
-class weekday;
-class weekday_indexed;
-class weekday_last;
-
-class month_day;
-class month_day_last;
-class month_weekday;
-class month_weekday_last;
-
-class year_month;
-
-class year_month_day;
-class year_month_day_last;
-class year_month_weekday;
-class year_month_weekday_last;
-
-// date composition operators
-
-CONSTCD11 year_month operator/(const year& y, const month& m) NOEXCEPT;
-CONSTCD11 year_month operator/(const year& y, int m) NOEXCEPT;
-
-CONSTCD11 month_day operator/(const day& d, const month& m) NOEXCEPT;
-CONSTCD11 month_day operator/(const day& d, int m) NOEXCEPT;
-CONSTCD11 month_day operator/(const month& m, const day& d) NOEXCEPT;
-CONSTCD11 month_day operator/(const month& m, int d) NOEXCEPT;
-CONSTCD11 month_day operator/(int m, const day& d) NOEXCEPT;
-
-CONSTCD11 month_day_last operator/(const month& m, last_spec) NOEXCEPT;
-CONSTCD11 month_day_last operator/(int m, last_spec) NOEXCEPT;
-CONSTCD11 month_day_last operator/(last_spec, const month& m) NOEXCEPT;
-CONSTCD11 month_day_last operator/(last_spec, int m) NOEXCEPT;
-
-CONSTCD11 month_weekday operator/(const month& m, const weekday_indexed& wdi) NOEXCEPT;
-CONSTCD11 month_weekday operator/(int m, const weekday_indexed& wdi) NOEXCEPT;
-CONSTCD11 month_weekday operator/(const weekday_indexed& wdi, const month& m) NOEXCEPT;
-CONSTCD11 month_weekday operator/(const weekday_indexed& wdi, int m) NOEXCEPT;
-
-CONSTCD11 month_weekday_last operator/(const month& m, const weekday_last& wdl) NOEXCEPT;
-CONSTCD11 month_weekday_last operator/(int m, const weekday_last& wdl) NOEXCEPT;
-CONSTCD11 month_weekday_last operator/(const weekday_last& wdl, const month& m) NOEXCEPT;
-CONSTCD11 month_weekday_last operator/(const weekday_last& wdl, int m) NOEXCEPT;
-
-CONSTCD11 year_month_day operator/(const year_month& ym, const day& d) NOEXCEPT;
-CONSTCD11 year_month_day operator/(const year_month& ym, int d) NOEXCEPT;
-CONSTCD11 year_month_day operator/(const year& y, const month_day& md) NOEXCEPT;
-CONSTCD11 year_month_day operator/(int y, const month_day& md) NOEXCEPT;
-CONSTCD11 year_month_day operator/(const month_day& md, const year& y) NOEXCEPT;
-CONSTCD11 year_month_day operator/(const month_day& md, int y) NOEXCEPT;
-
-CONSTCD11
-year_month_day_last operator/(const year_month& ym, last_spec) NOEXCEPT;
-CONSTCD11
-year_month_day_last operator/(const year& y, const month_day_last& mdl) NOEXCEPT;
-CONSTCD11
-year_month_day_last operator/(int y, const month_day_last& mdl) NOEXCEPT;
-CONSTCD11
-year_month_day_last operator/(const month_day_last& mdl, const year& y) NOEXCEPT;
-CONSTCD11
-year_month_day_last operator/(const month_day_last& mdl, int y) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday operator/(const year_month& ym, const weekday_indexed& wdi) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday operator/(const year& y, const month_weekday& mwd) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday operator/(int y, const month_weekday& mwd) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday operator/(const month_weekday& mwd, const year& y) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday operator/(const month_weekday& mwd, int y) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday_last operator/(const year_month& ym, const weekday_last& wdl) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday_last operator/(const year& y, const month_weekday_last& mwdl) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday_last operator/(int y, const month_weekday_last& mwdl) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday_last operator/(const month_weekday_last& mwdl, const year& y) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday_last operator/(const month_weekday_last& mwdl, int y) NOEXCEPT;
-
-// Detailed interface
-
-// day
-
-class day {
-  unsigned char d_;
-
- public:
-  day() = default;
-  explicit CONSTCD11 day(unsigned d) NOEXCEPT;
-
-  CONSTCD14 day& operator++() NOEXCEPT;
-  CONSTCD14 day operator++(int) NOEXCEPT;
-  CONSTCD14 day& operator--() NOEXCEPT;
-  CONSTCD14 day operator--(int) NOEXCEPT;
-
-  CONSTCD14 day& operator+=(const days& d) NOEXCEPT;
-  CONSTCD14 day& operator-=(const days& d) NOEXCEPT;
-
-  CONSTCD11 explicit operator unsigned() const NOEXCEPT;
-  CONSTCD11 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const day& x, const day& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const day& x, const day& y) NOEXCEPT;
-CONSTCD11 bool operator<(const day& x, const day& y) NOEXCEPT;
-CONSTCD11 bool operator>(const day& x, const day& y) NOEXCEPT;
-CONSTCD11 bool operator<=(const day& x, const day& y) NOEXCEPT;
-CONSTCD11 bool operator>=(const day& x, const day& y) NOEXCEPT;
-
-CONSTCD11 day operator+(const day& x, const days& y) NOEXCEPT;
-CONSTCD11 day operator+(const days& x, const day& y) NOEXCEPT;
-CONSTCD11 day operator-(const day& x, const days& y) NOEXCEPT;
-CONSTCD11 days operator-(const day& x, const day& y) NOEXCEPT;
-
-template <class CharT, class Traits>
-std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
-                                              const day& d);
-
-// month
-
-class month {
-  unsigned char m_;
-
- public:
-  month() = default;
-  explicit CONSTCD11 month(unsigned m) NOEXCEPT;
-
-  CONSTCD14 month& operator++() NOEXCEPT;
-  CONSTCD14 month operator++(int) NOEXCEPT;
-  CONSTCD14 month& operator--() NOEXCEPT;
-  CONSTCD14 month operator--(int) NOEXCEPT;
-
-  CONSTCD14 month& operator+=(const months& m) NOEXCEPT;
-  CONSTCD14 month& operator-=(const months& m) NOEXCEPT;
-
-  CONSTCD11 explicit operator unsigned() const NOEXCEPT;
-  CONSTCD11 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const month& x, const month& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const month& x, const month& y) NOEXCEPT;
-CONSTCD11 bool operator<(const month& x, const month& y) NOEXCEPT;
-CONSTCD11 bool operator>(const month& x, const month& y) NOEXCEPT;
-CONSTCD11 bool operator<=(const month& x, const month& y) NOEXCEPT;
-CONSTCD11 bool operator>=(const month& x, const month& y) NOEXCEPT;
-
-CONSTCD14 month operator+(const month& x, const months& y) NOEXCEPT;
-CONSTCD14 month operator+(const months& x, const month& y) NOEXCEPT;
-CONSTCD14 month operator-(const month& x, const months& y) NOEXCEPT;
-CONSTCD14 months operator-(const month& x, const month& y) NOEXCEPT;
-
-template <class CharT, class Traits>
-std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
-                                              const month& m);
-
-// year
-
-class year {
-  int16_t y_;
-
- public:
-  year() = default;
-  explicit CONSTCD11 year(int y) NOEXCEPT;
-
-  CONSTCD14 year& operator++() NOEXCEPT;
-  CONSTCD14 year operator++(int) NOEXCEPT;
-  CONSTCD14 year& operator--() NOEXCEPT;
-  CONSTCD14 year operator--(int) NOEXCEPT;
-
-  CONSTCD14 year& operator+=(const years& y) NOEXCEPT;
-  CONSTCD14 year& operator-=(const years& y) NOEXCEPT;
-
-  CONSTCD11 year operator-() const NOEXCEPT;
-  CONSTCD11 year operator+() const NOEXCEPT;
-
-  CONSTCD11 bool is_leap() const NOEXCEPT;
-
-  CONSTCD11 explicit operator int() const NOEXCEPT;
-  CONSTCD11 bool ok() const NOEXCEPT;
-
-  static CONSTCD11 year min() NOEXCEPT;
-  static CONSTCD11 year max() NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const year& x, const year& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const year& x, const year& y) NOEXCEPT;
-CONSTCD11 bool operator<(const year& x, const year& y) NOEXCEPT;
-CONSTCD11 bool operator>(const year& x, const year& y) NOEXCEPT;
-CONSTCD11 bool operator<=(const year& x, const year& y) NOEXCEPT;
-CONSTCD11 bool operator>=(const year& x, const year& y) NOEXCEPT;
-
-CONSTCD11 year operator+(const year& x, const years& y) NOEXCEPT;
-CONSTCD11 year operator+(const years& x, const year& y) NOEXCEPT;
-CONSTCD11 year operator-(const year& x, const years& y) NOEXCEPT;
-CONSTCD11 years operator-(const year& x, const year& y) NOEXCEPT;
-
-template <class CharT, class Traits>
-std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
-                                              const year& y);
-
-// weekday
-
-class weekday {
-  unsigned char wd_;
-
- public:
-  weekday() = default;
-  explicit CONSTCD11 weekday(unsigned wd) NOEXCEPT;
-  CONSTCD11 weekday(const sys_days& dp) NOEXCEPT;
-  CONSTCD11 explicit weekday(const local_days& dp) NOEXCEPT;
-
-  CONSTCD14 weekday& operator++() NOEXCEPT;
-  CONSTCD14 weekday operator++(int) NOEXCEPT;
-  CONSTCD14 weekday& operator--() NOEXCEPT;
-  CONSTCD14 weekday operator--(int) NOEXCEPT;
-
-  CONSTCD14 weekday& operator+=(const days& d) NOEXCEPT;
-  CONSTCD14 weekday& operator-=(const days& d) NOEXCEPT;
-
-  CONSTCD11 bool ok() const NOEXCEPT;
-
-  CONSTCD11 weekday_indexed operator[](unsigned index) const NOEXCEPT;
-  CONSTCD11 weekday_last operator[](last_spec) const NOEXCEPT;
-
- private:
-  static CONSTCD11 unsigned char weekday_from_days(int z) NOEXCEPT;
-
-  friend CONSTCD11 bool operator==(const weekday& x, const weekday& y) NOEXCEPT;
-  friend CONSTCD14 days operator-(const weekday& x, const weekday& y) NOEXCEPT;
-  friend CONSTCD14 weekday operator+(const weekday& x, const days& y) NOEXCEPT;
-  template <class CharT, class Traits>
-  friend std::basic_ostream<CharT, Traits>& operator<<(
-      std::basic_ostream<CharT, Traits>& os, const weekday& wd);
-  friend class weekday_indexed;
-};
-
-CONSTCD11 bool operator==(const weekday& x, const weekday& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const weekday& x, const weekday& y) NOEXCEPT;
-
-CONSTCD14 weekday operator+(const weekday& x, const days& y) NOEXCEPT;
-CONSTCD14 weekday operator+(const days& x, const weekday& y) NOEXCEPT;
-CONSTCD14 weekday operator-(const weekday& x, const days& y) NOEXCEPT;
-CONSTCD14 days operator-(const weekday& x, const weekday& y) NOEXCEPT;
-
-template <class CharT, class Traits>
-std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
-                                              const weekday& wd);
-
-// weekday_indexed
-
-class weekday_indexed {
-  unsigned char wd_ : 4;
-  unsigned char index_ : 4;
-
- public:
-  weekday_indexed() = default;
-  CONSTCD11 weekday_indexed(const date::weekday& wd, unsigned index) NOEXCEPT;
-
-  CONSTCD11 date::weekday weekday() const NOEXCEPT;
-  CONSTCD11 unsigned index() const NOEXCEPT;
-  CONSTCD11 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT;
-
-template <class CharT, class Traits>
-std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
-                                              const weekday_indexed& wdi);
-
-// weekday_last
-
-class weekday_last {
-  date::weekday wd_;
-
- public:
-  explicit CONSTCD11 weekday_last(const date::weekday& wd) NOEXCEPT;
-
-  CONSTCD11 date::weekday weekday() const NOEXCEPT;
-  CONSTCD11 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const weekday_last& x, const weekday_last& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const weekday_last& x, const weekday_last& y) NOEXCEPT;
-
-template <class CharT, class Traits>
-std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
-                                              const weekday_last& wdl);
-
-namespace detail {
-
-struct unspecified_month_disambiguator {};
-
-}  // namespace detail
-
-// year_month
-
-class year_month {
-  date::year y_;
-  date::month m_;
-
- public:
-  year_month() = default;
-  CONSTCD11 year_month(const date::year& y, const date::month& m) NOEXCEPT;
-
-  CONSTCD11 date::year year() const NOEXCEPT;
-  CONSTCD11 date::month month() const NOEXCEPT;
-
-  template <class = detail::unspecified_month_disambiguator>
-  CONSTCD14 year_month& operator+=(const months& dm) NOEXCEPT;
-  template <class = detail::unspecified_month_disambiguator>
-  CONSTCD14 year_month& operator-=(const months& dm) NOEXCEPT;
-  CONSTCD14 year_month& operator+=(const years& dy) NOEXCEPT;
-  CONSTCD14 year_month& operator-=(const years& dy) NOEXCEPT;
-
-  CONSTCD11 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const year_month& x, const year_month& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const year_month& x, const year_month& y) NOEXCEPT;
-CONSTCD11 bool operator<(const year_month& x, const year_month& y) NOEXCEPT;
-CONSTCD11 bool operator>(const year_month& x, const year_month& y) NOEXCEPT;
-CONSTCD11 bool operator<=(const year_month& x, const year_month& y) NOEXCEPT;
-CONSTCD11 bool operator>=(const year_month& x, const year_month& y) NOEXCEPT;
-
-template <class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month operator+(const year_month& ym, const months& dm) NOEXCEPT;
-template <class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month operator+(const months& dm, const year_month& ym) NOEXCEPT;
-template <class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month operator-(const year_month& ym, const months& dm) NOEXCEPT;
-
-CONSTCD11 months operator-(const year_month& x, const year_month& y) NOEXCEPT;
-CONSTCD11 year_month operator+(const year_month& ym, const years& dy) NOEXCEPT;
-CONSTCD11 year_month operator+(const years& dy, const year_month& ym) NOEXCEPT;
-CONSTCD11 year_month operator-(const year_month& ym, const years& dy) NOEXCEPT;
-
-template <class CharT, class Traits>
-std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
-                                              const year_month& ym);
-
-// month_day
-
-class month_day {
-  date::month m_;
-  date::day d_;
-
- public:
-  month_day() = default;
-  CONSTCD11 month_day(const date::month& m, const date::day& d) NOEXCEPT;
-
-  CONSTCD11 date::month month() const NOEXCEPT;
-  CONSTCD11 date::day day() const NOEXCEPT;
-
-  CONSTCD14 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const month_day& x, const month_day& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const month_day& x, const month_day& y) NOEXCEPT;
-CONSTCD11 bool operator<(const month_day& x, const month_day& y) NOEXCEPT;
-CONSTCD11 bool operator>(const month_day& x, const month_day& y) NOEXCEPT;
-CONSTCD11 bool operator<=(const month_day& x, const month_day& y) NOEXCEPT;
-CONSTCD11 bool operator>=(const month_day& x, const month_day& y) NOEXCEPT;
-
-template <class CharT, class Traits>
-std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
-                                              const month_day& md);
-
-// month_day_last
-
-class month_day_last {
-  date::month m_;
-
- public:
-  CONSTCD11 explicit month_day_last(const date::month& m) NOEXCEPT;
-
-  CONSTCD11 date::month month() const NOEXCEPT;
-  CONSTCD11 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const month_day_last& x, const month_day_last& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const month_day_last& x, const month_day_last& y) NOEXCEPT;
-CONSTCD11 bool operator<(const month_day_last& x, const month_day_last& y) NOEXCEPT;
-CONSTCD11 bool operator>(const month_day_last& x, const month_day_last& y) NOEXCEPT;
-CONSTCD11 bool operator<=(const month_day_last& x, const month_day_last& y) NOEXCEPT;
-CONSTCD11 bool operator>=(const month_day_last& x, const month_day_last& y) NOEXCEPT;
-
-template <class CharT, class Traits>
-std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
-                                              const month_day_last& mdl);
-
-// month_weekday
-
-class month_weekday {
-  date::month m_;
-  date::weekday_indexed wdi_;
-
- public:
-  CONSTCD11 month_weekday(const date::month& m,
-                          const date::weekday_indexed& wdi) NOEXCEPT;
-
-  CONSTCD11 date::month month() const NOEXCEPT;
-  CONSTCD11 date::weekday_indexed weekday_indexed() const NOEXCEPT;
-
-  CONSTCD11 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const month_weekday& x, const month_weekday& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const month_weekday& x, const month_weekday& y) NOEXCEPT;
-
-template <class CharT, class Traits>
-std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
-                                              const month_weekday& mwd);
-
-// month_weekday_last
-
-class month_weekday_last {
-  date::month m_;
-  date::weekday_last wdl_;
-
- public:
-  CONSTCD11 month_weekday_last(const date::month& m,
-                               const date::weekday_last& wd) NOEXCEPT;
-
-  CONSTCD11 date::month month() const NOEXCEPT;
-  CONSTCD11 date::weekday_last weekday_last() const NOEXCEPT;
-
-  CONSTCD11 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11
-bool operator==(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT;
-CONSTCD11
-bool operator!=(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT;
-
-template <class CharT, class Traits>
-std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
-                                              const month_weekday_last& mwdl);
-
-// class year_month_day
-
-class year_month_day {
-  date::year y_;
-  date::month m_;
-  date::day d_;
-
- public:
-  year_month_day() = default;
-  CONSTCD11 year_month_day(const date::year& y, const date::month& m,
-                           const date::day& d) NOEXCEPT;
-  CONSTCD14 year_month_day(const year_month_day_last& ymdl) NOEXCEPT;
-
-  CONSTCD14 year_month_day(sys_days dp) NOEXCEPT;
-  CONSTCD14 explicit year_month_day(local_days dp) NOEXCEPT;
-
-  template <class = detail::unspecified_month_disambiguator>
-  CONSTCD14 year_month_day& operator+=(const months& m) NOEXCEPT;
-  template <class = detail::unspecified_month_disambiguator>
-  CONSTCD14 year_month_day& operator-=(const months& m) NOEXCEPT;
-  CONSTCD14 year_month_day& operator+=(const years& y) NOEXCEPT;
-  CONSTCD14 year_month_day& operator-=(const years& y) NOEXCEPT;
-
-  CONSTCD11 date::year year() const NOEXCEPT;
-  CONSTCD11 date::month month() const NOEXCEPT;
-  CONSTCD11 date::day day() const NOEXCEPT;
-
-  CONSTCD14 operator sys_days() const NOEXCEPT;
-  CONSTCD14 explicit operator local_days() const NOEXCEPT;
-  CONSTCD14 bool ok() const NOEXCEPT;
-
- private:
-  static CONSTCD14 year_month_day from_days(days dp) NOEXCEPT;
-  CONSTCD14 days to_days() const NOEXCEPT;
-};
-
-CONSTCD11 bool operator==(const year_month_day& x, const year_month_day& y) NOEXCEPT;
-CONSTCD11 bool operator!=(const year_month_day& x, const year_month_day& y) NOEXCEPT;
-CONSTCD11 bool operator<(const year_month_day& x, const year_month_day& y) NOEXCEPT;
-CONSTCD11 bool operator>(const year_month_day& x, const year_month_day& y) NOEXCEPT;
-CONSTCD11 bool operator<=(const year_month_day& x, const year_month_day& y) NOEXCEPT;
-CONSTCD11 bool operator>=(const year_month_day& x, const year_month_day& y) NOEXCEPT;
-
-template <class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month_day operator+(const year_month_day& ymd, const months& dm) NOEXCEPT;
-template <class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month_day operator+(const months& dm, const year_month_day& ymd) NOEXCEPT;
-template <class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month_day operator-(const year_month_day& ymd, const months& dm) NOEXCEPT;
-CONSTCD11 year_month_day operator+(const year_month_day& ymd, const years& dy) NOEXCEPT;
-CONSTCD11 year_month_day operator+(const years& dy, const year_month_day& ymd) NOEXCEPT;
-CONSTCD11 year_month_day operator-(const year_month_day& ymd, const years& dy) NOEXCEPT;
-
-template <class CharT, class Traits>
-std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
-                                              const year_month_day& ymd);
-
-// year_month_day_last
-
-class year_month_day_last {
-  date::year y_;
-  date::month_day_last mdl_;
-
- public:
-  CONSTCD11 year_month_day_last(const date::year& y,
-                                const date::month_day_last& mdl) NOEXCEPT;
-
-  template <class = detail::unspecified_month_disambiguator>
-  CONSTCD14 year_month_day_last& operator+=(const months& m) NOEXCEPT;
-  template <class = detail::unspecified_month_disambiguator>
-  CONSTCD14 year_month_day_last& operator-=(const months& m) NOEXCEPT;
-  CONSTCD14 year_month_day_last& operator+=(const years& y) NOEXCEPT;
-  CONSTCD14 year_month_day_last& operator-=(const years& y) NOEXCEPT;
-
-  CONSTCD11 date::year year() const NOEXCEPT;
-  CONSTCD11 date::month month() const NOEXCEPT;
-  CONSTCD11 date::month_day_last month_day_last() const NOEXCEPT;
-  CONSTCD14 date::day day() const NOEXCEPT;
-
-  CONSTCD14 operator sys_days() const NOEXCEPT;
-  CONSTCD14 explicit operator local_days() const NOEXCEPT;
-  CONSTCD11 bool ok() const NOEXCEPT;
-};
-
-CONSTCD11
-bool operator==(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
-CONSTCD11
-bool operator!=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
-CONSTCD11
-bool operator<(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
-CONSTCD11
-bool operator>(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
-CONSTCD11
-bool operator<=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
-CONSTCD11
-bool operator>=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
-
-template <class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month_day_last operator+(const year_month_day_last& ymdl,
-                                        const months& dm) NOEXCEPT;
-
-template <class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month_day_last operator+(const months& dm,
-                                        const year_month_day_last& ymdl) NOEXCEPT;
-
-CONSTCD11
-year_month_day_last operator+(const year_month_day_last& ymdl, const years& dy) NOEXCEPT;
-
-CONSTCD11
-year_month_day_last operator+(const years& dy, const year_month_day_last& ymdl) NOEXCEPT;
-
-template <class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month_day_last operator-(const year_month_day_last& ymdl,
-                                        const months& dm) NOEXCEPT;
-
-CONSTCD11
-year_month_day_last operator-(const year_month_day_last& ymdl, const years& dy) NOEXCEPT;
-
-template <class CharT, class Traits>
-std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
-                                              const year_month_day_last& ymdl);
-
-// year_month_weekday
-
-class year_month_weekday {
-  date::year y_;
-  date::month m_;
-  date::weekday_indexed wdi_;
-
- public:
-  year_month_weekday() = default;
-  CONSTCD11 year_month_weekday(const date::year& y, const date::month& m,
-                               const date::weekday_indexed& wdi) NOEXCEPT;
-  CONSTCD14 year_month_weekday(const sys_days& dp) NOEXCEPT;
-  CONSTCD14 explicit year_month_weekday(const local_days& dp) NOEXCEPT;
-
-  template <class = detail::unspecified_month_disambiguator>
-  CONSTCD14 year_month_weekday& operator+=(const months& m) NOEXCEPT;
-  template <class = detail::unspecified_month_disambiguator>
-  CONSTCD14 year_month_weekday& operator-=(const months& m) NOEXCEPT;
-  CONSTCD14 year_month_weekday& operator+=(const years& y) NOEXCEPT;
-  CONSTCD14 year_month_weekday& operator-=(const years& y) NOEXCEPT;
-
-  CONSTCD11 date::year year() const NOEXCEPT;
-  CONSTCD11 date::month month() const NOEXCEPT;
-  CONSTCD11 date::weekday weekday() const NOEXCEPT;
-  CONSTCD11 unsigned index() const NOEXCEPT;
-  CONSTCD11 date::weekday_indexed weekday_indexed() const NOEXCEPT;
-
-  CONSTCD14 operator sys_days() const NOEXCEPT;
-  CONSTCD14 explicit operator local_days() const NOEXCEPT;
-  CONSTCD14 bool ok() const NOEXCEPT;
-
- private:
-  static CONSTCD14 year_month_weekday from_days(days dp) NOEXCEPT;
-  CONSTCD14 days to_days() const NOEXCEPT;
-};
-
-CONSTCD11
-bool operator==(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT;
-CONSTCD11
-bool operator!=(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT;
-
-template <class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month_weekday operator+(const year_month_weekday& ymwd,
-                                       const months& dm) NOEXCEPT;
-
-template <class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month_weekday operator+(const months& dm,
-                                       const year_month_weekday& ymwd) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday operator+(const year_month_weekday& ymwd, const years& dy) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday operator+(const years& dy, const year_month_weekday& ymwd) NOEXCEPT;
-
-template <class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month_weekday operator-(const year_month_weekday& ymwd,
-                                       const months& dm) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday operator-(const year_month_weekday& ymwd, const years& dy) NOEXCEPT;
-
-template <class CharT, class Traits>
-std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
-                                              const year_month_weekday& ymwdi);
-
-// year_month_weekday_last
-
-class year_month_weekday_last {
-  date::year y_;
-  date::month m_;
-  date::weekday_last wdl_;
-
- public:
-  CONSTCD11 year_month_weekday_last(const date::year& y, const date::month& m,
-                                    const date::weekday_last& wdl) NOEXCEPT;
-
-  template <class = detail::unspecified_month_disambiguator>
-  CONSTCD14 year_month_weekday_last& operator+=(const months& m) NOEXCEPT;
-  template <class = detail::unspecified_month_disambiguator>
-  CONSTCD14 year_month_weekday_last& operator-=(const months& m) NOEXCEPT;
-  CONSTCD14 year_month_weekday_last& operator+=(const years& y) NOEXCEPT;
-  CONSTCD14 year_month_weekday_last& operator-=(const years& y) NOEXCEPT;
-
-  CONSTCD11 date::year year() const NOEXCEPT;
-  CONSTCD11 date::month month() const NOEXCEPT;
-  CONSTCD11 date::weekday weekday() const NOEXCEPT;
-  CONSTCD11 date::weekday_last weekday_last() const NOEXCEPT;
-
-  CONSTCD14 operator sys_days() const NOEXCEPT;
-  CONSTCD14 explicit operator local_days() const NOEXCEPT;
-  CONSTCD11 bool ok() const NOEXCEPT;
-
- private:
-  CONSTCD14 days to_days() const NOEXCEPT;
-};
-
-CONSTCD11
-bool operator==(const year_month_weekday_last& x,
-                const year_month_weekday_last& y) NOEXCEPT;
-
-CONSTCD11
-bool operator!=(const year_month_weekday_last& x,
-                const year_month_weekday_last& y) NOEXCEPT;
-
-template <class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month_weekday_last operator+(const year_month_weekday_last& ymwdl,
-                                            const months& dm) NOEXCEPT;
-
-template <class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month_weekday_last
-operator+(const months& dm, const year_month_weekday_last& ymwdl) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday_last operator+(const year_month_weekday_last& ymwdl,
-                                  const years& dy) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday_last operator+(const years& dy,
-                                  const year_month_weekday_last& ymwdl) NOEXCEPT;
-
-template <class = detail::unspecified_month_disambiguator>
-CONSTCD14 year_month_weekday_last operator-(const year_month_weekday_last& ymwdl,
-                                            const months& dm) NOEXCEPT;
-
-CONSTCD11
-year_month_weekday_last operator-(const year_month_weekday_last& ymwdl,
-                                  const years& dy) NOEXCEPT;
-
-template <class CharT, class Traits>
-std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
-                                              const year_month_weekday_last& ymwdl);
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-inline namespace literals {
-CONSTCD11 date::day operator"" _d(unsigned long long d) NOEXCEPT;
-CONSTCD11 date::year operator"" _y(unsigned long long y) NOEXCEPT;
-}  // namespace literals
-#endif  // !defined(_MSC_VER) || (_MSC_VER >= 1900)
-
-// CONSTDATA date::month January{1};
-// CONSTDATA date::month February{2};
-// CONSTDATA date::month March{3};
-// CONSTDATA date::month April{4};
-// CONSTDATA date::month May{5};
-// CONSTDATA date::month June{6};
-// CONSTDATA date::month July{7};
-// CONSTDATA date::month August{8};
-// CONSTDATA date::month September{9};
-// CONSTDATA date::month October{10};
-// CONSTDATA date::month November{11};
-// CONSTDATA date::month December{12};
-//
-// CONSTDATA date::weekday Sunday{0u};
-// CONSTDATA date::weekday Monday{1u};
-// CONSTDATA date::weekday Tuesday{2u};
-// CONSTDATA date::weekday Wednesday{3u};
-// CONSTDATA date::weekday Thursday{4u};
-// CONSTDATA date::weekday Friday{5u};
-// CONSTDATA date::weekday Saturday{6u};
-
-#if HAS_VOID_T
-
-template <class T, class = std::void_t<>>
-struct is_clock : std::false_type {};
-
-template <class T>
-struct is_clock<
-    T, std::void_t<decltype(T::now()), typename T::rep, typename T::period,
-                   typename T::duration, typename T::time_point, decltype(T::is_steady)>>
-    : std::true_type {};
-
-#endif  // HAS_VOID_T
-
-//----------------+
-// Implementation |
-//----------------+
-
-// utilities
-namespace detail {
-
-template <class CharT, class Traits = std::char_traits<CharT>>
-class save_istream {
- protected:
-  std::basic_ios<CharT, Traits>& is_;
-  CharT fill_;
-  std::ios::fmtflags flags_;
-  std::streamsize width_;
-  std::basic_ostream<CharT, Traits>* tie_;
-  std::locale loc_;
-
- public:
-  ~save_istream() {
-    is_.fill(fill_);
-    is_.flags(flags_);
-    is_.width(width_);
-    is_.imbue(loc_);
-    is_.tie(tie_);
-  }
-
-  save_istream(const save_istream&) = delete;
-  save_istream& operator=(const save_istream&) = delete;
-
-  explicit save_istream(std::basic_ios<CharT, Traits>& is)
-      : is_(is),
-        fill_(is.fill()),
-        flags_(is.flags()),
-        width_(is.width(0)),
-        tie_(is.tie(nullptr)),
-        loc_(is.getloc()) {
-    if (tie_ != nullptr) tie_->flush();
-  }
-};
-
-template <class CharT, class Traits = std::char_traits<CharT>>
-class save_ostream : private save_istream<CharT, Traits> {
- public:
-  ~save_ostream() {
-    if ((this->flags_ & std::ios::unitbuf) &&
-#if __cplusplus >= 201703
-        std::uncaught_exceptions() == 0 &&
-#else
-        !std::uncaught_exception() &&
-#endif
-        this->is_.good())
-      this->is_.rdbuf()->pubsync();
-  }
-
-  save_ostream(const save_ostream&) = delete;
-  save_ostream& operator=(const save_ostream&) = delete;
-
-  explicit save_ostream(std::basic_ios<CharT, Traits>& os)
-      : save_istream<CharT, Traits>(os) {}
-};
-
-template <class T>
-struct choose_trunc_type {
-  static const int digits = std::numeric_limits<T>::digits;
-  using type = typename std::conditional < digits < 32, std::int32_t,
-        typename std::conditional<digits<64, std::int64_t,
-#ifdef __SIZEOF_INT128__
-                                         __int128
-#else
-                                         std::int64_t
-#endif
-                                         >::type>::type;
-};
-
-template <class T>
-CONSTCD11 inline
-    typename std::enable_if<!std::chrono::treat_as_floating_point<T>::value, T>::type
-    trunc(T t) NOEXCEPT {
-  return t;
-}
-
-template <class T>
-CONSTCD14 inline
-    typename std::enable_if<std::chrono::treat_as_floating_point<T>::value, T>::type
-    trunc(T t) NOEXCEPT {
-  using std::numeric_limits;
-  using I = typename choose_trunc_type<T>::type;
-  CONSTDATA auto digits = numeric_limits<T>::digits;
-  static_assert(digits < numeric_limits<I>::digits, "");
-  CONSTDATA auto max = I{1} << (digits - 1);
-  CONSTDATA auto min = -max;
-  const auto negative = t < T{0};
-  if (min <= t && t <= max && t != 0 && t == t) {
-    t = static_cast<T>(static_cast<I>(t));
-    if (t == 0 && negative) t = -t;
-  }
-  return t;
-}
-
-template <std::intmax_t Xp, std::intmax_t Yp>
-struct static_gcd {
-  static const std::intmax_t value = static_gcd<Yp, Xp % Yp>::value;
-};
-
-template <std::intmax_t Xp>
-struct static_gcd<Xp, 0> {
-  static const std::intmax_t value = Xp;
-};
-
-template <>
-struct static_gcd<0, 0> {
-  static const std::intmax_t value = 1;
-};
-
-template <class R1, class R2>
-struct no_overflow {
- private:
-  static const std::intmax_t gcd_n1_n2 = static_gcd<R1::num, R2::num>::value;
-  static const std::intmax_t gcd_d1_d2 = static_gcd<R1::den, R2::den>::value;
-  static const std::intmax_t n1 = R1::num / gcd_n1_n2;
-  static const std::intmax_t d1 = R1::den / gcd_d1_d2;
-  static const std::intmax_t n2 = R2::num / gcd_n1_n2;
-  static const std::intmax_t d2 = R2::den / gcd_d1_d2;
-  static const std::intmax_t max =
-      -((std::intmax_t(1) << (sizeof(std::intmax_t) * CHAR_BIT - 1)) + 1);
-
-  template <std::intmax_t Xp, std::intmax_t Yp, bool overflow>
-  struct mul {  // overflow == false
-    static const std::intmax_t value = Xp * Yp;
-  };
-
-  template <std::intmax_t Xp, std::intmax_t Yp>
-  struct mul<Xp, Yp, true> {
-    static const std::intmax_t value = 1;
-  };
-
- public:
-  static const bool value = (n1 <= max / d2) && (n2 <= max / d1);
-  typedef std::ratio<mul<n1, d2, !value>::value, mul<n2, d1, !value>::value> type;
-};
-
-}  // namespace detail
-
-// trunc towards zero
-template <class To, class Rep, class Period>
-CONSTCD11 inline
-    typename std::enable_if<detail::no_overflow<Period, typename To::period>::value,
-                            To>::type
-    trunc(const std::chrono::duration<Rep, Period>& d) {
-  return To{detail::trunc(std::chrono::duration_cast<To>(d).count())};
-}
-
-template <class To, class Rep, class Period>
-CONSTCD11 inline
-    typename std::enable_if<!detail::no_overflow<Period, typename To::period>::value,
-                            To>::type
-    trunc(const std::chrono::duration<Rep, Period>& d) {
-  using std::chrono::duration;
-  using std::chrono::duration_cast;
-  using rep = typename std::common_type<Rep, typename To::rep>::type;
-  return To{detail::trunc(duration_cast<To>(duration_cast<duration<rep>>(d)).count())};
-}
-
-#ifndef HAS_CHRONO_ROUNDING
-#if defined(_MSC_FULL_VER) && \
-    (_MSC_FULL_VER >= 190023918 || (_MSC_FULL_VER >= 190000000 && defined(__clang__)))
-#define HAS_CHRONO_ROUNDING 1
-#elif defined(__cpp_lib_chrono) && __cplusplus > 201402 && __cpp_lib_chrono >= 201510
-#define HAS_CHRONO_ROUNDING 1
-#elif defined(_LIBCPP_VERSION) && __cplusplus > 201402 && _LIBCPP_VERSION >= 3800
-#define HAS_CHRONO_ROUNDING 1
-#else
-#define HAS_CHRONO_ROUNDING 0
-#endif
-#endif  // HAS_CHRONO_ROUNDING
-
-#if HAS_CHRONO_ROUNDING == 0
-
-// round down
-template <class To, class Rep, class Period>
-CONSTCD14 inline
-    typename std::enable_if<detail::no_overflow<Period, typename To::period>::value,
-                            To>::type
-    floor(const std::chrono::duration<Rep, Period>& d) {
-  auto t = trunc<To>(d);
-  if (t > d) return t - To{1};
-  return t;
-}
-
-template <class To, class Rep, class Period>
-CONSTCD14 inline
-    typename std::enable_if<!detail::no_overflow<Period, typename To::period>::value,
-                            To>::type
-    floor(const std::chrono::duration<Rep, Period>& d) {
-  using std::chrono::duration;
-  using rep = typename std::common_type<Rep, typename To::rep>::type;
-  return floor<To>(floor<duration<rep>>(d));
-}
-
-// round to nearest, to even on tie
-template <class To, class Rep, class Period>
-CONSTCD14 inline To round(const std::chrono::duration<Rep, Period>& d) {
-  auto t0 = floor<To>(d);
-  auto t1 = t0 + To{1};
-  if (t1 == To{0} && t0 < To{0}) t1 = -t1;
-  auto diff0 = d - t0;
-  auto diff1 = t1 - d;
-  if (diff0 == diff1) {
-    if (t0 - trunc<To>(t0 / 2) * 2 == To{0}) return t0;
-    return t1;
-  }
-  if (diff0 < diff1) return t0;
-  return t1;
-}
-
-// round up
-template <class To, class Rep, class Period>
-CONSTCD14 inline To ceil(const std::chrono::duration<Rep, Period>& d) {
-  auto t = trunc<To>(d);
-  if (t < d) return t + To{1};
-  return t;
-}
-
-template <class Rep, class Period,
-          class = typename std::enable_if<std::numeric_limits<Rep>::is_signed>::type>
-CONSTCD11 std::chrono::duration<Rep, Period> abs(std::chrono::duration<Rep, Period> d) {
-  return d >= d.zero() ? d : -d;
-}
-
-// round down
-template <class To, class Clock, class FromDuration>
-CONSTCD11 inline std::chrono::time_point<Clock, To> floor(
-    const std::chrono::time_point<Clock, FromDuration>& tp) {
-  using std::chrono::time_point;
-  return time_point<Clock, To>{date::floor<To>(tp.time_since_epoch())};
-}
-
-// round to nearest, to even on tie
-template <class To, class Clock, class FromDuration>
-CONSTCD11 inline std::chrono::time_point<Clock, To> round(
-    const std::chrono::time_point<Clock, FromDuration>& tp) {
-  using std::chrono::time_point;
-  return time_point<Clock, To>{round<To>(tp.time_since_epoch())};
-}
-
-// round up
-template <class To, class Clock, class FromDuration>
-CONSTCD11 inline std::chrono::time_point<Clock, To> ceil(
-    const std::chrono::time_point<Clock, FromDuration>& tp) {
-  using std::chrono::time_point;
-  return time_point<Clock, To>{ceil<To>(tp.time_since_epoch())};
-}
-
-#else  // HAS_CHRONO_ROUNDING == 1
-
-using std::chrono::abs;
-using std::chrono::ceil;
-using std::chrono::floor;
-using std::chrono::round;
-
-#endif  // HAS_CHRONO_ROUNDING
-
-// trunc towards zero
-template <class To, class Clock, class FromDuration>
-CONSTCD11 inline std::chrono::time_point<Clock, To> trunc(
-    const std::chrono::time_point<Clock, FromDuration>& tp) {
-  using std::chrono::time_point;
-  return time_point<Clock, To>{trunc<To>(tp.time_since_epoch())};
-}
-
-// day
-
-CONSTCD11 inline day::day(unsigned d) NOEXCEPT : d_(static_cast<unsigned char>(d)) {}
-CONSTCD14 inline day& day::operator++() NOEXCEPT {
-  ++d_;
-  return *this;
-}
-CONSTCD14 inline day day::operator++(int) NOEXCEPT {
-  auto tmp(*this);
-  ++(*this);
-  return tmp;
-}
-CONSTCD14 inline day& day::operator--() NOEXCEPT {
-  --d_;
-  return *this;
-}
-CONSTCD14 inline day day::operator--(int) NOEXCEPT {
-  auto tmp(*this);
-  --(*this);
-  return tmp;
-}
-CONSTCD14 inline day& day::operator+=(const days& d) NOEXCEPT {
-  *this = *this + d;
-  return *this;
-}
-CONSTCD14 inline day& day::operator-=(const days& d) NOEXCEPT {
-  *this = *this - d;
-  return *this;
-}
-CONSTCD11 inline day::operator unsigned() const NOEXCEPT { return d_; }
-CONSTCD11 inline bool day::ok() const NOEXCEPT { return 1 <= d_ && d_ <= 31; }
-
-CONSTCD11
-inline bool operator==(const day& x, const day& y) NOEXCEPT {
-  return static_cast<unsigned>(x) == static_cast<unsigned>(y);
-}
-
-CONSTCD11
-inline bool operator!=(const day& x, const day& y) NOEXCEPT { return !(x == y); }
-
-CONSTCD11
-inline bool operator<(const day& x, const day& y) NOEXCEPT {
-  return static_cast<unsigned>(x) < static_cast<unsigned>(y);
-}
-
-CONSTCD11
-inline bool operator>(const day& x, const day& y) NOEXCEPT { return y < x; }
-
-CONSTCD11
-inline bool operator<=(const day& x, const day& y) NOEXCEPT { return !(y < x); }
-
-CONSTCD11
-inline bool operator>=(const day& x, const day& y) NOEXCEPT { return !(x < y); }
-
-CONSTCD11
-inline days operator-(const day& x, const day& y) NOEXCEPT {
-  return days{
-      static_cast<days::rep>(static_cast<unsigned>(x) - static_cast<unsigned>(y))};
-}
-
-CONSTCD11
-inline day operator+(const day& x, const days& y) NOEXCEPT {
-  return day{static_cast<unsigned>(x) + static_cast<unsigned>(y.count())};
-}
-
-CONSTCD11
-inline day operator+(const days& x, const day& y) NOEXCEPT { return y + x; }
-
-CONSTCD11
-inline day operator-(const day& x, const days& y) NOEXCEPT { return x + -y; }
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& operator<<(
-    std::basic_ostream<CharT, Traits>& os, const day& d) {
-  detail::save_ostream<CharT, Traits> _(os);
-  os.fill('0');
-  os.flags(std::ios::dec | std::ios::right);
-  os.width(2);
-  os << static_cast<unsigned>(d);
-  if (!d.ok()) os << " is not a valid day";
-  return os;
-}
-
-// month
-
-CONSTCD11 inline month::month(unsigned m) NOEXCEPT : m_(static_cast<decltype(m_)>(m)) {}
-CONSTCD14 inline month& month::operator++() NOEXCEPT {
-  *this += months{1};
-  return *this;
-}
-CONSTCD14 inline month month::operator++(int) NOEXCEPT {
-  auto tmp(*this);
-  ++(*this);
-  return tmp;
-}
-CONSTCD14 inline month& month::operator--() NOEXCEPT {
-  *this -= months{1};
-  return *this;
-}
-CONSTCD14 inline month month::operator--(int) NOEXCEPT {
-  auto tmp(*this);
-  --(*this);
-  return tmp;
-}
-
-CONSTCD14
-inline month& month::operator+=(const months& m) NOEXCEPT {
-  *this = *this + m;
-  return *this;
-}
-
-CONSTCD14
-inline month& month::operator-=(const months& m) NOEXCEPT {
-  *this = *this - m;
-  return *this;
-}
-
-CONSTCD11 inline month::operator unsigned() const NOEXCEPT { return m_; }
-CONSTCD11 inline bool month::ok() const NOEXCEPT { return 1 <= m_ && m_ <= 12; }
-
-CONSTCD11
-inline bool operator==(const month& x, const month& y) NOEXCEPT {
-  return static_cast<unsigned>(x) == static_cast<unsigned>(y);
-}
-
-CONSTCD11
-inline bool operator!=(const month& x, const month& y) NOEXCEPT { return !(x == y); }
-
-CONSTCD11
-inline bool operator<(const month& x, const month& y) NOEXCEPT {
-  return static_cast<unsigned>(x) < static_cast<unsigned>(y);
-}
-
-CONSTCD11
-inline bool operator>(const month& x, const month& y) NOEXCEPT { return y < x; }
-
-CONSTCD11
-inline bool operator<=(const month& x, const month& y) NOEXCEPT { return !(y < x); }
-
-CONSTCD11
-inline bool operator>=(const month& x, const month& y) NOEXCEPT { return !(x < y); }
-
-CONSTCD14
-inline months operator-(const month& x, const month& y) NOEXCEPT {
-  auto const d = static_cast<unsigned>(x) - static_cast<unsigned>(y);
-  return months(d <= 11 ? d : d + 12);
-}
-
-CONSTCD14
-inline month operator+(const month& x, const months& y) NOEXCEPT {
-  auto const mu = static_cast<int64_t>(static_cast<unsigned>(x)) + (y.count() - 1);
-  auto const yr = (mu >= 0 ? mu : mu - 11) / 12;
-  return month{static_cast<unsigned>(mu - yr * 12 + 1)};
-}
-
-CONSTCD14
-inline month operator+(const months& x, const month& y) NOEXCEPT { return y + x; }
-
-CONSTCD14
-inline month operator-(const month& x, const months& y) NOEXCEPT { return x + -y; }
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& operator<<(
-    std::basic_ostream<CharT, Traits>& os, const month& m) {
-  if (m.ok()) {
-    CharT fmt[] = {'%', 'b', 0};
-    os << format(os.getloc(), fmt, m);
-  } else {
-    os << static_cast<unsigned>(m) << " is not a valid month";
-  }
-  return os;
-}
-
-// year
-
-CONSTCD11 inline year::year(int y) NOEXCEPT : y_(static_cast<decltype(y_)>(y)) {}
-CONSTCD14 inline year& year::operator++() NOEXCEPT {
-  ++y_;
-  return *this;
-}
-CONSTCD14 inline year year::operator++(int) NOEXCEPT {
-  auto tmp(*this);
-  ++(*this);
-  return tmp;
-}
-CONSTCD14 inline year& year::operator--() NOEXCEPT {
-  --y_;
-  return *this;
-}
-CONSTCD14 inline year year::operator--(int) NOEXCEPT {
-  auto tmp(*this);
-  --(*this);
-  return tmp;
-}
-CONSTCD14 inline year& year::operator+=(const years& y) NOEXCEPT {
-  *this = *this + y;
-  return *this;
-}
-CONSTCD14 inline year& year::operator-=(const years& y) NOEXCEPT {
-  *this = *this - y;
-  return *this;
-}
-CONSTCD11 inline year year::operator-() const NOEXCEPT { return year{-y_}; }
-CONSTCD11 inline year year::operator+() const NOEXCEPT { return *this; }
-
-CONSTCD11
-inline bool year::is_leap() const NOEXCEPT {
-  return y_ % 4 == 0 && (y_ % 100 != 0 || y_ % 400 == 0);
-}
-
-CONSTCD11 inline year::operator int() const NOEXCEPT { return y_; }
-
-CONSTCD11
-inline bool year::ok() const NOEXCEPT {
-  return y_ != std::numeric_limits<int16_t>::min();
-}
-
-CONSTCD11
-inline year year::min() NOEXCEPT { return year{-32767}; }
-
-CONSTCD11
-inline year year::max() NOEXCEPT { return year{32767}; }
-
-CONSTCD11
-inline bool operator==(const year& x, const year& y) NOEXCEPT {
-  return static_cast<int>(x) == static_cast<int>(y);
-}
-
-CONSTCD11
-inline bool operator!=(const year& x, const year& y) NOEXCEPT { return !(x == y); }
-
-CONSTCD11
-inline bool operator<(const year& x, const year& y) NOEXCEPT {
-  return static_cast<int>(x) < static_cast<int>(y);
-}
-
-CONSTCD11
-inline bool operator>(const year& x, const year& y) NOEXCEPT { return y < x; }
-
-CONSTCD11
-inline bool operator<=(const year& x, const year& y) NOEXCEPT { return !(y < x); }
-
-CONSTCD11
-inline bool operator>=(const year& x, const year& y) NOEXCEPT { return !(x < y); }
-
-CONSTCD11
-inline years operator-(const year& x, const year& y) NOEXCEPT {
-  return years{static_cast<int>(x) - static_cast<int>(y)};
-}
-
-CONSTCD11
-inline year operator+(const year& x, const years& y) NOEXCEPT {
-  return year{static_cast<int>(x) + y.count()};
-}
-
-CONSTCD11
-inline year operator+(const years& x, const year& y) NOEXCEPT { return y + x; }
-
-CONSTCD11
-inline year operator-(const year& x, const years& y) NOEXCEPT {
-  return year{static_cast<int>(x) - y.count()};
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& operator<<(
-    std::basic_ostream<CharT, Traits>& os, const year& y) {
-  detail::save_ostream<CharT, Traits> _(os);
-  os.fill('0');
-  os.flags(std::ios::dec | std::ios::internal);
-  os.width(4 + (y < year{0}));
-  os << static_cast<int>(y);
-  if (!y.ok()) os << " is not a valid year";
-  return os;
-}
-
-// weekday
-
-CONSTCD11
-inline unsigned char weekday::weekday_from_days(int z) NOEXCEPT {
-  return static_cast<unsigned char>(
-      static_cast<unsigned>(z >= -4 ? (z + 4) % 7 : (z + 5) % 7 + 6));
-}
-
-CONSTCD11
-inline weekday::weekday(unsigned wd) NOEXCEPT
-    : wd_(static_cast<decltype(wd_)>(wd != 7 ? wd : 0)) {}
-
-CONSTCD11
-inline weekday::weekday(const sys_days& dp) NOEXCEPT
-    : wd_(weekday_from_days(dp.time_since_epoch().count())) {}
-
-CONSTCD11
-inline weekday::weekday(const local_days& dp) NOEXCEPT
-    : wd_(weekday_from_days(dp.time_since_epoch().count())) {}
-
-CONSTCD14 inline weekday& weekday::operator++() NOEXCEPT {
-  *this += days{1};
-  return *this;
-}
-CONSTCD14 inline weekday weekday::operator++(int) NOEXCEPT {
-  auto tmp(*this);
-  ++(*this);
-  return tmp;
-}
-CONSTCD14 inline weekday& weekday::operator--() NOEXCEPT {
-  *this -= days{1};
-  return *this;
-}
-CONSTCD14 inline weekday weekday::operator--(int) NOEXCEPT {
-  auto tmp(*this);
-  --(*this);
-  return tmp;
-}
-
-CONSTCD14
-inline weekday& weekday::operator+=(const days& d) NOEXCEPT {
-  *this = *this + d;
-  return *this;
-}
-
-CONSTCD14
-inline weekday& weekday::operator-=(const days& d) NOEXCEPT {
-  *this = *this - d;
-  return *this;
-}
-
-CONSTCD11 inline bool weekday::ok() const NOEXCEPT { return wd_ <= 6; }
-
-CONSTCD11
-inline bool operator==(const weekday& x, const weekday& y) NOEXCEPT {
-  return x.wd_ == y.wd_;
-}
-
-CONSTCD11
-inline bool operator!=(const weekday& x, const weekday& y) NOEXCEPT { return !(x == y); }
-
-CONSTCD14
-inline days operator-(const weekday& x, const weekday& y) NOEXCEPT {
-  auto const wdu = x.wd_ - y.wd_;
-  auto const wk = (wdu >= 0 ? wdu : wdu - 6) / 7;
-  return days{wdu - wk * 7};
-}
-
-CONSTCD14
-inline weekday operator+(const weekday& x, const days& y) NOEXCEPT {
-  auto const wdu = static_cast<int64_t>(static_cast<unsigned>(x.wd_)) + y.count();
-  auto const wk = (wdu >= 0 ? wdu : wdu - 6) / 7;
-  return weekday{static_cast<unsigned>(wdu - wk * 7)};
-}
-
-CONSTCD14
-inline weekday operator+(const days& x, const weekday& y) NOEXCEPT { return y + x; }
-
-CONSTCD14
-inline weekday operator-(const weekday& x, const days& y) NOEXCEPT { return x + -y; }
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& operator<<(
-    std::basic_ostream<CharT, Traits>& os, const weekday& wd) {
-  if (wd.ok()) {
-    CharT fmt[] = {'%', 'a', 0};
-    os << format(fmt, wd);
-  } else {
-    os << static_cast<unsigned>(wd.wd_) << " is not a valid weekday";
-  }
-  return os;
-}
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-inline namespace literals {
-CONSTCD11
-inline date::day operator"" _d(unsigned long long d) NOEXCEPT {
-  return date::day{static_cast<unsigned>(d)};
-}
-
-CONSTCD11
-inline date::year operator"" _y(unsigned long long y) NOEXCEPT {
-  return date::year(static_cast<int>(y));
-}
-#endif  // !defined(_MSC_VER) || (_MSC_VER >= 1900)
-
-CONSTDATA date::last_spec last{};
-
-CONSTDATA date::month jan{1};
-CONSTDATA date::month feb{2};
-CONSTDATA date::month mar{3};
-CONSTDATA date::month apr{4};
-CONSTDATA date::month may{5};
-CONSTDATA date::month jun{6};
-CONSTDATA date::month jul{7};
-CONSTDATA date::month aug{8};
-CONSTDATA date::month sep{9};
-CONSTDATA date::month oct{10};
-CONSTDATA date::month nov{11};
-CONSTDATA date::month dec{12};
-
-CONSTDATA date::weekday sun{0u};
-CONSTDATA date::weekday mon{1u};
-CONSTDATA date::weekday tue{2u};
-CONSTDATA date::weekday wed{3u};
-CONSTDATA date::weekday thu{4u};
-CONSTDATA date::weekday fri{5u};
-CONSTDATA date::weekday sat{6u};
-
-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-}  // inline namespace literals
-#endif
-
-CONSTDATA date::month January{1};
-CONSTDATA date::month February{2};
-CONSTDATA date::month March{3};
-CONSTDATA date::month April{4};
-CONSTDATA date::month May{5};
-CONSTDATA date::month June{6};
-CONSTDATA date::month July{7};
-CONSTDATA date::month August{8};
-CONSTDATA date::month September{9};
-CONSTDATA date::month October{10};
-CONSTDATA date::month November{11};
-CONSTDATA date::month December{12};
-
-CONSTDATA date::weekday Monday{1};
-CONSTDATA date::weekday Tuesday{2};
-CONSTDATA date::weekday Wednesday{3};
-CONSTDATA date::weekday Thursday{4};
-CONSTDATA date::weekday Friday{5};
-CONSTDATA date::weekday Saturday{6};
-CONSTDATA date::weekday Sunday{7};
-
-// weekday_indexed
-
-CONSTCD11
-inline weekday weekday_indexed::weekday() const NOEXCEPT {
-  return date::weekday{static_cast<unsigned>(wd_)};
-}
-
-CONSTCD11 inline unsigned weekday_indexed::index() const NOEXCEPT { return index_; }
-
-CONSTCD11
-inline bool weekday_indexed::ok() const NOEXCEPT {
-  return weekday().ok() && 1 <= index_ && index_ <= 5;
-}
-
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wconversion"
-#endif  // __GNUC__
-
-CONSTCD11
-inline weekday_indexed::weekday_indexed(const date::weekday& wd, unsigned index) NOEXCEPT
-    : wd_(static_cast<decltype(wd_)>(static_cast<unsigned>(wd.wd_))),
-      index_(static_cast<decltype(index_)>(index)) {}
-
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif  // __GNUC__
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& operator<<(
-    std::basic_ostream<CharT, Traits>& os, const weekday_indexed& wdi) {
-  os << wdi.weekday() << '[' << wdi.index();
-  if (!(1 <= wdi.index() && wdi.index() <= 5)) os << " is not a valid index";
-  os << ']';
-  return os;
-}
-
-CONSTCD11
-inline weekday_indexed weekday::operator[](unsigned index) const NOEXCEPT {
-  return {*this, index};
-}
-
-CONSTCD11
-inline bool operator==(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT {
-  return x.weekday() == y.weekday() && x.index() == y.index();
-}
-
-CONSTCD11
-inline bool operator!=(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT {
-  return !(x == y);
-}
-
-// weekday_last
-
-CONSTCD11 inline date::weekday weekday_last::weekday() const NOEXCEPT { return wd_; }
-CONSTCD11 inline bool weekday_last::ok() const NOEXCEPT { return wd_.ok(); }
-CONSTCD11 inline weekday_last::weekday_last(const date::weekday& wd) NOEXCEPT : wd_(wd) {}
-
-CONSTCD11
-inline bool operator==(const weekday_last& x, const weekday_last& y) NOEXCEPT {
-  return x.weekday() == y.weekday();
-}
-
-CONSTCD11
-inline bool operator!=(const weekday_last& x, const weekday_last& y) NOEXCEPT {
-  return !(x == y);
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& operator<<(
-    std::basic_ostream<CharT, Traits>& os, const weekday_last& wdl) {
-  return os << wdl.weekday() << "[last]";
-}
-
-CONSTCD11
-inline weekday_last weekday::operator[](last_spec) const NOEXCEPT {
-  return weekday_last{*this};
-}
-
-// year_month
-
-CONSTCD11
-inline year_month::year_month(const date::year& y, const date::month& m) NOEXCEPT
-    : y_(y),
-      m_(m) {}
-
-CONSTCD11 inline year year_month::year() const NOEXCEPT { return y_; }
-CONSTCD11 inline month year_month::month() const NOEXCEPT { return m_; }
-CONSTCD11 inline bool year_month::ok() const NOEXCEPT { return y_.ok() && m_.ok(); }
-
-template <class>
-CONSTCD14 inline year_month& year_month::operator+=(const months& dm) NOEXCEPT {
-  *this = *this + dm;
-  return *this;
-}
-
-template <class>
-CONSTCD14 inline year_month& year_month::operator-=(const months& dm) NOEXCEPT {
-  *this = *this - dm;
-  return *this;
-}
-
-CONSTCD14
-inline year_month& year_month::operator+=(const years& dy) NOEXCEPT {
-  *this = *this + dy;
-  return *this;
-}
-
-CONSTCD14
-inline year_month& year_month::operator-=(const years& dy) NOEXCEPT {
-  *this = *this - dy;
-  return *this;
-}
-
-CONSTCD11
-inline bool operator==(const year_month& x, const year_month& y) NOEXCEPT {
-  return x.year() == y.year() && x.month() == y.month();
-}
-
-CONSTCD11
-inline bool operator!=(const year_month& x, const year_month& y) NOEXCEPT {
-  return !(x == y);
-}
-
-CONSTCD11
-inline bool operator<(const year_month& x, const year_month& y) NOEXCEPT {
-  return x.year() < y.year() ? true
-                             : (x.year() > y.year() ? false : (x.month() < y.month()));
-}
-
-CONSTCD11
-inline bool operator>(const year_month& x, const year_month& y) NOEXCEPT { return y < x; }
-
-CONSTCD11
-inline bool operator<=(const year_month& x, const year_month& y) NOEXCEPT {
-  return !(y < x);
-}
-
-CONSTCD11
-inline bool operator>=(const year_month& x, const year_month& y) NOEXCEPT {
-  return !(x < y);
-}
-
-template <class>
-CONSTCD14 inline year_month operator+(const year_month& ym, const months& dm) NOEXCEPT {
-  auto dmi = static_cast<int>(static_cast<unsigned>(ym.month())) - 1 + dm.count();
-  auto dy = (dmi >= 0 ? dmi : dmi - 11) / 12;
-  dmi = dmi - dy * 12 + 1;
-  return (ym.year() + years(dy)) / month(static_cast<unsigned>(dmi));
-}
-
-template <class>
-CONSTCD14 inline year_month operator+(const months& dm, const year_month& ym) NOEXCEPT {
-  return ym + dm;
-}
-
-template <class>
-CONSTCD14 inline year_month operator-(const year_month& ym, const months& dm) NOEXCEPT {
-  return ym + -dm;
-}
-
-CONSTCD11
-inline months operator-(const year_month& x, const year_month& y) NOEXCEPT {
-  return (x.year() - y.year()) +
-         months(static_cast<unsigned>(x.month()) - static_cast<unsigned>(y.month()));
-}
-
-CONSTCD11
-inline year_month operator+(const year_month& ym, const years& dy) NOEXCEPT {
-  return (ym.year() + dy) / ym.month();
-}
-
-CONSTCD11
-inline year_month operator+(const years& dy, const year_month& ym) NOEXCEPT {
-  return ym + dy;
-}
-
-CONSTCD11
-inline year_month operator-(const year_month& ym, const years& dy) NOEXCEPT {
-  return ym + -dy;
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& operator<<(
-    std::basic_ostream<CharT, Traits>& os, const year_month& ym) {
-  return os << ym.year() << '/' << ym.month();
-}
-
-// month_day
-
-CONSTCD11
-inline month_day::month_day(const date::month& m, const date::day& d) NOEXCEPT : m_(m),
-                                                                                 d_(d) {}
-
-CONSTCD11 inline date::month month_day::month() const NOEXCEPT { return m_; }
-CONSTCD11 inline date::day month_day::day() const NOEXCEPT { return d_; }
-
-CONSTCD14
-inline bool month_day::ok() const NOEXCEPT {
-  CONSTDATA date::day d[] = {date::day(31), date::day(29), date::day(31), date::day(30),
-                             date::day(31), date::day(30), date::day(31), date::day(31),
-                             date::day(30), date::day(31), date::day(30), date::day(31)};
-  return m_.ok() && date::day{1} <= d_ && d_ <= d[static_cast<unsigned>(m_) - 1];
-}
-
-CONSTCD11
-inline bool operator==(const month_day& x, const month_day& y) NOEXCEPT {
-  return x.month() == y.month() && x.day() == y.day();
-}
-
-CONSTCD11
-inline bool operator!=(const month_day& x, const month_day& y) NOEXCEPT {
-  return !(x == y);
-}
-
-CONSTCD11
-inline bool operator<(const month_day& x, const month_day& y) NOEXCEPT {
-  return x.month() < y.month() ? true
-                               : (x.month() > y.month() ? false : (x.day() < y.day()));
-}
-
-CONSTCD11
-inline bool operator>(const month_day& x, const month_day& y) NOEXCEPT { return y < x; }
-
-CONSTCD11
-inline bool operator<=(const month_day& x, const month_day& y) NOEXCEPT {
-  return !(y < x);
-}
-
-CONSTCD11
-inline bool operator>=(const month_day& x, const month_day& y) NOEXCEPT {
-  return !(x < y);
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& operator<<(
-    std::basic_ostream<CharT, Traits>& os, const month_day& md) {
-  return os << md.month() << '/' << md.day();
-}
-
-// month_day_last
-
-CONSTCD11 inline month month_day_last::month() const NOEXCEPT { return m_; }
-CONSTCD11 inline bool month_day_last::ok() const NOEXCEPT { return m_.ok(); }
-CONSTCD11 inline month_day_last::month_day_last(const date::month& m) NOEXCEPT : m_(m) {}
-
-CONSTCD11
-inline bool operator==(const month_day_last& x, const month_day_last& y) NOEXCEPT {
-  return x.month() == y.month();
-}
-
-CONSTCD11
-inline bool operator!=(const month_day_last& x, const month_day_last& y) NOEXCEPT {
-  return !(x == y);
-}
-
-CONSTCD11
-inline bool operator<(const month_day_last& x, const month_day_last& y) NOEXCEPT {
-  return x.month() < y.month();
-}
-
-CONSTCD11
-inline bool operator>(const month_day_last& x, const month_day_last& y) NOEXCEPT {
-  return y < x;
-}
-
-CONSTCD11
-inline bool operator<=(const month_day_last& x, const month_day_last& y) NOEXCEPT {
-  return !(y < x);
-}
-
-CONSTCD11
-inline bool operator>=(const month_day_last& x, const month_day_last& y) NOEXCEPT {
-  return !(x < y);
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& operator<<(
-    std::basic_ostream<CharT, Traits>& os, const month_day_last& mdl) {
-  return os << mdl.month() << "/last";
-}
-
-// month_weekday
-
-CONSTCD11
-inline month_weekday::month_weekday(const date::month& m,
-                                    const date::weekday_indexed& wdi) NOEXCEPT
-    : m_(m),
-      wdi_(wdi) {}
-
-CONSTCD11 inline month month_weekday::month() const NOEXCEPT { return m_; }
-
-CONSTCD11
-inline weekday_indexed month_weekday::weekday_indexed() const NOEXCEPT { return wdi_; }
-
-CONSTCD11
-inline bool month_weekday::ok() const NOEXCEPT { return m_.ok() && wdi_.ok(); }
-
-CONSTCD11
-inline bool operator==(const month_weekday& x, const month_weekday& y) NOEXCEPT {
-  return x.month() == y.month() && x.weekday_indexed() == y.weekday_indexed();
-}
-
-CONSTCD11
-inline bool operator!=(const month_weekday& x, const month_weekday& y) NOEXCEPT {
-  return !(x == y);
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& operator<<(
-    std::basic_ostream<CharT, Traits>& os, const month_weekday& mwd) {
-  return os << mwd.month() << '/' << mwd.weekday_indexed();
-}
-
-// month_weekday_last
-
-CONSTCD11
-inline month_weekday_last::month_weekday_last(const date::month& m,
-                                              const date::weekday_last& wdl) NOEXCEPT
-    : m_(m),
-      wdl_(wdl) {}
-
-CONSTCD11 inline month month_weekday_last::month() const NOEXCEPT { return m_; }
-
-CONSTCD11
-inline weekday_last month_weekday_last::weekday_last() const NOEXCEPT { return wdl_; }
-
-CONSTCD11
-inline bool month_weekday_last::ok() const NOEXCEPT { return m_.ok() && wdl_.ok(); }
-
-CONSTCD11
-inline bool operator==(const month_weekday_last& x,
-                       const month_weekday_last& y) NOEXCEPT {
-  return x.month() == y.month() && x.weekday_last() == y.weekday_last();
-}
-
-CONSTCD11
-inline bool operator!=(const month_weekday_last& x,
-                       const month_weekday_last& y) NOEXCEPT {
-  return !(x == y);
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& operator<<(
-    std::basic_ostream<CharT, Traits>& os, const month_weekday_last& mwdl) {
-  return os << mwdl.month() << '/' << mwdl.weekday_last();
-}
-
-// year_month_day_last
-
-CONSTCD11
-inline year_month_day_last::year_month_day_last(const date::year& y,
-                                                const date::month_day_last& mdl) NOEXCEPT
-    : y_(y),
-      mdl_(mdl) {}
-
-template <class>
-CONSTCD14 inline year_month_day_last& year_month_day_last::operator+=(
-    const months& m) NOEXCEPT {
-  *this = *this + m;
-  return *this;
-}
-
-template <class>
-CONSTCD14 inline year_month_day_last& year_month_day_last::operator-=(
-    const months& m) NOEXCEPT {
-  *this = *this - m;
-  return *this;
-}
-
-CONSTCD14
-inline year_month_day_last& year_month_day_last::operator+=(const years& y) NOEXCEPT {
-  *this = *this + y;
-  return *this;
-}
-
-CONSTCD14
-inline year_month_day_last& year_month_day_last::operator-=(const years& y) NOEXCEPT {
-  *this = *this - y;
-  return *this;
-}
-
-CONSTCD11 inline year year_month_day_last::year() const NOEXCEPT { return y_; }
-CONSTCD11 inline month year_month_day_last::month() const NOEXCEPT {
-  return mdl_.month();
-}
-
-CONSTCD11
-inline month_day_last year_month_day_last::month_day_last() const NOEXCEPT {
-  return mdl_;
-}
-
-CONSTCD14
-inline day year_month_day_last::day() const NOEXCEPT {
-  CONSTDATA date::day d[] = {date::day(31), date::day(28), date::day(31), date::day(30),
-                             date::day(31), date::day(30), date::day(31), date::day(31),
-                             date::day(30), date::day(31), date::day(30), date::day(31)};
-  return month() != February || !y_.is_leap() ? d[static_cast<unsigned>(month()) - 1]
-                                              : date::day{29};
-}
-
-CONSTCD14
-inline year_month_day_last::operator sys_days() const NOEXCEPT {
-  return sys_days(year() / month() / day());
-}
-
-CONSTCD14
-inline year_month_day_last::operator local_days() const NOEXCEPT {
-  return local_days(year() / month() / day());
-}
-
-CONSTCD11
-inline bool year_month_day_last::ok() const NOEXCEPT { return y_.ok() && mdl_.ok(); }
-
-CONSTCD11
-inline bool operator==(const year_month_day_last& x,
-                       const year_month_day_last& y) NOEXCEPT {
-  return x.year() == y.year() && x.month_day_last() == y.month_day_last();
-}
-
-CONSTCD11
-inline bool operator!=(const year_month_day_last& x,
-                       const year_month_day_last& y) NOEXCEPT {
-  return !(x == y);
-}
-
-CONSTCD11
-inline bool operator<(const year_month_day_last& x,
-                      const year_month_day_last& y) NOEXCEPT {
-  return x.year() < y.year()
-             ? true
-             : (x.year() > y.year() ? false : (x.month_day_last() < y.month_day_last()));
-}
-
-CONSTCD11
-inline bool operator>(const year_month_day_last& x,
-                      const year_month_day_last& y) NOEXCEPT {
-  return y < x;
-}
-
-CONSTCD11
-inline bool operator<=(const year_month_day_last& x,
-                       const year_month_day_last& y) NOEXCEPT {
-  return !(y < x);
-}
-
-CONSTCD11
-inline bool operator>=(const year_month_day_last& x,
-                       const year_month_day_last& y) NOEXCEPT {
-  return !(x < y);
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& operator<<(
-    std::basic_ostream<CharT, Traits>& os, const year_month_day_last& ymdl) {
-  return os << ymdl.year() << '/' << ymdl.month_day_last();
-}
-
-template <class>
-CONSTCD14 inline year_month_day_last operator+(const year_month_day_last& ymdl,
-                                               const months& dm) NOEXCEPT {
-  return (ymdl.year() / ymdl.month() + dm) / last;
-}
-
-template <class>
-CONSTCD14 inline year_month_day_last operator+(const months& dm,
-                                               const year_month_day_last& ymdl) NOEXCEPT {
-  return ymdl + dm;
-}
-
-template <class>
-CONSTCD14 inline year_month_day_last operator-(const year_month_day_last& ymdl,
-                                               const months& dm) NOEXCEPT {
-  return ymdl + (-dm);
-}
-
-CONSTCD11
-inline year_month_day_last operator+(const year_month_day_last& ymdl,
-                                     const years& dy) NOEXCEPT {
-  return {ymdl.year() + dy, ymdl.month_day_last()};
-}
-
-CONSTCD11
-inline year_month_day_last operator+(const years& dy,
-                                     const year_month_day_last& ymdl) NOEXCEPT {
-  return ymdl + dy;
-}
-
-CONSTCD11
-inline year_month_day_last operator-(const year_month_day_last& ymdl,
-                                     const years& dy) NOEXCEPT {
-  return ymdl + (-dy);
-}
-
-// year_month_day
-
-CONSTCD11
-inline year_month_day::year_month_day(const date::year& y, const date::month& m,
-                                      const date::day& d) NOEXCEPT : y_(y),
-                                                                     m_(m),
-                                                                     d_(d) {}
-
-CONSTCD14
-inline year_month_day::year_month_day(const year_month_day_last& ymdl) NOEXCEPT
-    : y_(ymdl.year()),
-      m_(ymdl.month()),
-      d_(ymdl.day()) {}
-
-CONSTCD14
-inline year_month_day::year_month_day(sys_days dp) NOEXCEPT
-    : year_month_day(from_days(dp.time_since_epoch())) {}
-
-CONSTCD14
-inline year_month_day::year_month_day(local_days dp) NOEXCEPT
-    : year_month_day(from_days(dp.time_since_epoch())) {}
-
-CONSTCD11 inline year year_month_day::year() const NOEXCEPT { return y_; }
-CONSTCD11 inline month year_month_day::month() const NOEXCEPT { return m_; }
-CONSTCD11 inline day year_month_day::day() const NOEXCEPT { return d_; }
-
-template <class>
-CONSTCD14 inline year_month_day& year_month_day::operator+=(const months& m) NOEXCEPT {
-  *this = *this + m;
-  return *this;
-}
-
-template <class>
-CONSTCD14 inline year_month_day& year_month_day::operator-=(const months& m) NOEXCEPT {
-  *this = *this - m;
-  return *this;
-}
-
-CONSTCD14
-inline year_month_day& year_month_day::operator+=(const years& y) NOEXCEPT {
-  *this = *this + y;
-  return *this;
-}
-
-CONSTCD14
-inline year_month_day& year_month_day::operator-=(const years& y) NOEXCEPT {
-  *this = *this - y;
-  return *this;
-}
-
-CONSTCD14
-inline days year_month_day::to_days() const NOEXCEPT {
-  static_assert(std::numeric_limits<unsigned>::digits >= 18,
-                "This algorithm has not been ported to a 16 bit unsigned integer");
-  static_assert(std::numeric_limits<int>::digits >= 20,
-                "This algorithm has not been ported to a 16 bit signed integer");
-  auto const y = static_cast<int>(y_) - (m_ <= February);
-  auto const m = static_cast<unsigned>(m_);
-  auto const d = static_cast<unsigned>(d_);
-  auto const era = (y >= 0 ? y : y - 399) / 400;
-  auto const yoe = static_cast<unsigned>(y - era * 400);             // [0, 399]
-  auto const doy = (153 * (m > 2 ? m - 3 : m + 9) + 2) / 5 + d - 1;  // [0, 365]
-  auto const doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;            // [0, 146096]
-  return days{era * 146097 + static_cast<int>(doe) - 719468};
-}
-
-CONSTCD14
-inline year_month_day::operator sys_days() const NOEXCEPT { return sys_days{to_days()}; }
-
-CONSTCD14
-inline year_month_day::operator local_days() const NOEXCEPT {
-  return local_days{to_days()};
-}
-
-CONSTCD14
-inline bool year_month_day::ok() const NOEXCEPT {
-  if (!(y_.ok() && m_.ok())) return false;
-  return date::day{1} <= d_ && d_ <= (y_ / m_ / last).day();
-}
-
-CONSTCD11
-inline bool operator==(const year_month_day& x, const year_month_day& y) NOEXCEPT {
-  return x.year() == y.year() && x.month() == y.month() && x.day() == y.day();
-}
-
-CONSTCD11
-inline bool operator!=(const year_month_day& x, const year_month_day& y) NOEXCEPT {
-  return !(x == y);
-}
-
-CONSTCD11
-inline bool operator<(const year_month_day& x, const year_month_day& y) NOEXCEPT {
-  return x.year() < y.year()
-             ? true
-             : (x.year() > y.year()
-                    ? false
-                    : (x.month() < y.month()
-                           ? true
-                           : (x.month() > y.month() ? false : (x.day() < y.day()))));
-}
-
-CONSTCD11
-inline bool operator>(const year_month_day& x, const year_month_day& y) NOEXCEPT {
-  return y < x;
-}
-
-CONSTCD11
-inline bool operator<=(const year_month_day& x, const year_month_day& y) NOEXCEPT {
-  return !(y < x);
-}
-
-CONSTCD11
-inline bool operator>=(const year_month_day& x, const year_month_day& y) NOEXCEPT {
-  return !(x < y);
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& operator<<(
-    std::basic_ostream<CharT, Traits>& os, const year_month_day& ymd) {
-  detail::save_ostream<CharT, Traits> _(os);
-  os.fill('0');
-  os.flags(std::ios::dec | std::ios::right);
-  os << ymd.year() << '-';
-  os.width(2);
-  os << static_cast<unsigned>(ymd.month()) << '-';
-  os << ymd.day();
-  if (!ymd.ok()) os << " is not a valid date";
-  return os;
-}
-
-CONSTCD14
-inline year_month_day year_month_day::from_days(days dp) NOEXCEPT {
-  static_assert(std::numeric_limits<unsigned>::digits >= 18,
-                "This algorithm has not been ported to a 16 bit unsigned integer");
-  static_assert(std::numeric_limits<int>::digits >= 20,
-                "This algorithm has not been ported to a 16 bit signed integer");
-  auto const z = dp.count() + 719468;
-  auto const era = (z >= 0 ? z : z - 146096) / 146097;
-  auto const doe = static_cast<unsigned>(z - era * 146097);                // [0, 146096]
-  auto const yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;  // [0, 399]
-  auto const y = static_cast<days::rep>(yoe) + era * 400;
-  auto const doy = doe - (365 * yoe + yoe / 4 - yoe / 100);  // [0, 365]
-  auto const mp = (5 * doy + 2) / 153;                       // [0, 11]
-  auto const d = doy - (153 * mp + 2) / 5 + 1;               // [1, 31]
-  auto const m = mp < 10 ? mp + 3 : mp - 9;                  // [1, 12]
-  return year_month_day{date::year{y + (m <= 2)}, date::month(m), date::day(d)};
-}
-
-template <class>
-CONSTCD14 inline year_month_day operator+(const year_month_day& ymd,
-                                          const months& dm) NOEXCEPT {
-  return (ymd.year() / ymd.month() + dm) / ymd.day();
-}
-
-template <class>
-CONSTCD14 inline year_month_day operator+(const months& dm,
-                                          const year_month_day& ymd) NOEXCEPT {
-  return ymd + dm;
-}
-
-template <class>
-CONSTCD14 inline year_month_day operator-(const year_month_day& ymd,
-                                          const months& dm) NOEXCEPT {
-  return ymd + (-dm);
-}
-
-CONSTCD11
-inline year_month_day operator+(const year_month_day& ymd, const years& dy) NOEXCEPT {
-  return (ymd.year() + dy) / ymd.month() / ymd.day();
-}
-
-CONSTCD11
-inline year_month_day operator+(const years& dy, const year_month_day& ymd) NOEXCEPT {
-  return ymd + dy;
-}
-
-CONSTCD11
-inline year_month_day operator-(const year_month_day& ymd, const years& dy) NOEXCEPT {
-  return ymd + (-dy);
-}
-
-// year_month_weekday
-
-CONSTCD11
-inline year_month_weekday::year_month_weekday(const date::year& y, const date::month& m,
-                                              const date::weekday_indexed& wdi) NOEXCEPT
-    : y_(y),
-      m_(m),
-      wdi_(wdi) {}
-
-CONSTCD14
-inline year_month_weekday::year_month_weekday(const sys_days& dp) NOEXCEPT
-    : year_month_weekday(from_days(dp.time_since_epoch())) {}
-
-CONSTCD14
-inline year_month_weekday::year_month_weekday(const local_days& dp) NOEXCEPT
-    : year_month_weekday(from_days(dp.time_since_epoch())) {}
-
-template <class>
-CONSTCD14 inline year_month_weekday& year_month_weekday::operator+=(
-    const months& m) NOEXCEPT {
-  *this = *this + m;
-  return *this;
-}
-
-template <class>
-CONSTCD14 inline year_month_weekday& year_month_weekday::operator-=(
-    const months& m) NOEXCEPT {
-  *this = *this - m;
-  return *this;
-}
-
-CONSTCD14
-inline year_month_weekday& year_month_weekday::operator+=(const years& y) NOEXCEPT {
-  *this = *this + y;
-  return *this;
-}
-
-CONSTCD14
-inline year_month_weekday& year_month_weekday::operator-=(const years& y) NOEXCEPT {
-  *this = *this - y;
-  return *this;
-}
-
-CONSTCD11 inline year year_month_weekday::year() const NOEXCEPT { return y_; }
-CONSTCD11 inline month year_month_weekday::month() const NOEXCEPT { return m_; }
-
-CONSTCD11
-inline weekday year_month_weekday::weekday() const NOEXCEPT { return wdi_.weekday(); }
-
-CONSTCD11
-inline unsigned year_month_weekday::index() const NOEXCEPT { return wdi_.index(); }
-
-CONSTCD11
-inline weekday_indexed year_month_weekday::weekday_indexed() const NOEXCEPT {
-  return wdi_;
-}
-
-CONSTCD14
-inline year_month_weekday::operator sys_days() const NOEXCEPT {
-  return sys_days{to_days()};
-}
-
-CONSTCD14
-inline year_month_weekday::operator local_days() const NOEXCEPT {
-  return local_days{to_days()};
-}
-
-CONSTCD14
-inline bool year_month_weekday::ok() const NOEXCEPT {
-  if (!y_.ok() || !m_.ok() || !wdi_.weekday().ok() || wdi_.index() < 1) return false;
-  if (wdi_.index() <= 4) return true;
-  auto d2 = wdi_.weekday() - date::weekday(static_cast<sys_days>(y_ / m_ / 1)) +
-            days((wdi_.index() - 1) * 7 + 1);
-  return static_cast<unsigned>(d2.count()) <=
-         static_cast<unsigned>((y_ / m_ / last).day());
-}
-
-CONSTCD14
-inline year_month_weekday year_month_weekday::from_days(days d) NOEXCEPT {
-  sys_days dp{d};
-  auto const wd = date::weekday(dp);
-  auto const ymd = year_month_day(dp);
-  return {ymd.year(), ymd.month(), wd[(static_cast<unsigned>(ymd.day()) - 1) / 7 + 1]};
-}
-
-CONSTCD14
-inline days year_month_weekday::to_days() const NOEXCEPT {
-  auto d = sys_days(y_ / m_ / 1);
-  return (d + (wdi_.weekday() - date::weekday(d) + days{(wdi_.index() - 1) * 7}))
-      .time_since_epoch();
-}
-
-CONSTCD11
-inline bool operator==(const year_month_weekday& x,
-                       const year_month_weekday& y) NOEXCEPT {
-  return x.year() == y.year() && x.month() == y.month() &&
-         x.weekday_indexed() == y.weekday_indexed();
-}
-
-CONSTCD11
-inline bool operator!=(const year_month_weekday& x,
-                       const year_month_weekday& y) NOEXCEPT {
-  return !(x == y);
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& operator<<(
-    std::basic_ostream<CharT, Traits>& os, const year_month_weekday& ymwdi) {
-  return os << ymwdi.year() << '/' << ymwdi.month() << '/' << ymwdi.weekday_indexed();
-}
-
-template <class>
-CONSTCD14 inline year_month_weekday operator+(const year_month_weekday& ymwd,
-                                              const months& dm) NOEXCEPT {
-  return (ymwd.year() / ymwd.month() + dm) / ymwd.weekday_indexed();
-}
-
-template <class>
-CONSTCD14 inline year_month_weekday operator+(const months& dm,
-                                              const year_month_weekday& ymwd) NOEXCEPT {
-  return ymwd + dm;
-}
-
-template <class>
-CONSTCD14 inline year_month_weekday operator-(const year_month_weekday& ymwd,
-                                              const months& dm) NOEXCEPT {
-  return ymwd + (-dm);
-}
-
-CONSTCD11
-inline year_month_weekday operator+(const year_month_weekday& ymwd,
-                                    const years& dy) NOEXCEPT {
-  return {ymwd.year() + dy, ymwd.month(), ymwd.weekday_indexed()};
-}
-
-CONSTCD11
-inline year_month_weekday operator+(const years& dy,
-                                    const year_month_weekday& ymwd) NOEXCEPT {
-  return ymwd + dy;
-}
-
-CONSTCD11
-inline year_month_weekday operator-(const year_month_weekday& ymwd,
-                                    const years& dy) NOEXCEPT {
-  return ymwd + (-dy);
-}
-
-// year_month_weekday_last
-
-CONSTCD11
-inline year_month_weekday_last::year_month_weekday_last(
-    const date::year& y, const date::month& m, const date::weekday_last& wdl) NOEXCEPT
-    : y_(y),
-      m_(m),
-      wdl_(wdl) {}
-
-template <class>
-CONSTCD14 inline year_month_weekday_last& year_month_weekday_last::operator+=(
-    const months& m) NOEXCEPT {
-  *this = *this + m;
-  return *this;
-}
-
-template <class>
-CONSTCD14 inline year_month_weekday_last& year_month_weekday_last::operator-=(
-    const months& m) NOEXCEPT {
-  *this = *this - m;
-  return *this;
-}
-
-CONSTCD14
-inline year_month_weekday_last& year_month_weekday_last::operator+=(
-    const years& y) NOEXCEPT {
-  *this = *this + y;
-  return *this;
-}
-
-CONSTCD14
-inline year_month_weekday_last& year_month_weekday_last::operator-=(
-    const years& y) NOEXCEPT {
-  *this = *this - y;
-  return *this;
-}
-
-CONSTCD11 inline year year_month_weekday_last::year() const NOEXCEPT { return y_; }
-CONSTCD11 inline month year_month_weekday_last::month() const NOEXCEPT { return m_; }
-
-CONSTCD11
-inline weekday year_month_weekday_last::weekday() const NOEXCEPT {
-  return wdl_.weekday();
-}
-
-CONSTCD11
-inline weekday_last year_month_weekday_last::weekday_last() const NOEXCEPT {
-  return wdl_;
-}
-
-CONSTCD14
-inline year_month_weekday_last::operator sys_days() const NOEXCEPT {
-  return sys_days{to_days()};
-}
-
-CONSTCD14
-inline year_month_weekday_last::operator local_days() const NOEXCEPT {
-  return local_days{to_days()};
-}
-
-CONSTCD11
-inline bool year_month_weekday_last::ok() const NOEXCEPT {
-  return y_.ok() && m_.ok() && wdl_.ok();
-}
-
-CONSTCD14
-inline days year_month_weekday_last::to_days() const NOEXCEPT {
-  auto const d = sys_days(y_ / m_ / last);
-  return (d - (date::weekday{d} - wdl_.weekday())).time_since_epoch();
-}
-
-CONSTCD11
-inline bool operator==(const year_month_weekday_last& x,
-                       const year_month_weekday_last& y) NOEXCEPT {
-  return x.year() == y.year() && x.month() == y.month() &&
-         x.weekday_last() == y.weekday_last();
-}
-
-CONSTCD11
-inline bool operator!=(const year_month_weekday_last& x,
-                       const year_month_weekday_last& y) NOEXCEPT {
-  return !(x == y);
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& operator<<(
-    std::basic_ostream<CharT, Traits>& os, const year_month_weekday_last& ymwdl) {
-  return os << ymwdl.year() << '/' << ymwdl.month() << '/' << ymwdl.weekday_last();
-}
-
-template <class>
-CONSTCD14 inline year_month_weekday_last operator+(const year_month_weekday_last& ymwdl,
-                                                   const months& dm) NOEXCEPT {
-  return (ymwdl.year() / ymwdl.month() + dm) / ymwdl.weekday_last();
-}
-
-template <class>
-CONSTCD14 inline year_month_weekday_last operator+(
-    const months& dm, const year_month_weekday_last& ymwdl) NOEXCEPT {
-  return ymwdl + dm;
-}
-
-template <class>
-CONSTCD14 inline year_month_weekday_last operator-(const year_month_weekday_last& ymwdl,
-                                                   const months& dm) NOEXCEPT {
-  return ymwdl + (-dm);
-}
-
-CONSTCD11
-inline year_month_weekday_last operator+(const year_month_weekday_last& ymwdl,
-                                         const years& dy) NOEXCEPT {
-  return {ymwdl.year() + dy, ymwdl.month(), ymwdl.weekday_last()};
-}
-
-CONSTCD11
-inline year_month_weekday_last operator+(const years& dy,
-                                         const year_month_weekday_last& ymwdl) NOEXCEPT {
-  return ymwdl + dy;
-}
-
-CONSTCD11
-inline year_month_weekday_last operator-(const year_month_weekday_last& ymwdl,
-                                         const years& dy) NOEXCEPT {
-  return ymwdl + (-dy);
-}
-
-// year_month from operator/()
-
-CONSTCD11
-inline year_month operator/(const year& y, const month& m) NOEXCEPT { return {y, m}; }
-
-CONSTCD11
-inline year_month operator/(const year& y, int m) NOEXCEPT {
-  return y / month(static_cast<unsigned>(m));
-}
-
-// month_day from operator/()
-
-CONSTCD11
-inline month_day operator/(const month& m, const day& d) NOEXCEPT { return {m, d}; }
-
-CONSTCD11
-inline month_day operator/(const day& d, const month& m) NOEXCEPT { return m / d; }
-
-CONSTCD11
-inline month_day operator/(const month& m, int d) NOEXCEPT {
-  return m / day(static_cast<unsigned>(d));
-}
-
-CONSTCD11
-inline month_day operator/(int m, const day& d) NOEXCEPT {
-  return month(static_cast<unsigned>(m)) / d;
-}
-
-CONSTCD11 inline month_day operator/(const day& d, int m) NOEXCEPT { return m / d; }
-
-// month_day_last from operator/()
-
-CONSTCD11
-inline month_day_last operator/(const month& m, last_spec) NOEXCEPT {
-  return month_day_last{m};
-}
-
-CONSTCD11
-inline month_day_last operator/(last_spec, const month& m) NOEXCEPT { return m / last; }
-
-CONSTCD11
-inline month_day_last operator/(int m, last_spec) NOEXCEPT {
-  return month(static_cast<unsigned>(m)) / last;
-}
-
-CONSTCD11
-inline month_day_last operator/(last_spec, int m) NOEXCEPT { return m / last; }
-
-// month_weekday from operator/()
-
-CONSTCD11
-inline month_weekday operator/(const month& m, const weekday_indexed& wdi) NOEXCEPT {
-  return {m, wdi};
-}
-
-CONSTCD11
-inline month_weekday operator/(const weekday_indexed& wdi, const month& m) NOEXCEPT {
-  return m / wdi;
-}
-
-CONSTCD11
-inline month_weekday operator/(int m, const weekday_indexed& wdi) NOEXCEPT {
-  return month(static_cast<unsigned>(m)) / wdi;
-}
-
-CONSTCD11
-inline month_weekday operator/(const weekday_indexed& wdi, int m) NOEXCEPT {
-  return m / wdi;
-}
-
-// month_weekday_last from operator/()
-
-CONSTCD11
-inline month_weekday_last operator/(const month& m, const weekday_last& wdl) NOEXCEPT {
-  return {m, wdl};
-}
-
-CONSTCD11
-inline month_weekday_last operator/(const weekday_last& wdl, const month& m) NOEXCEPT {
-  return m / wdl;
-}
-
-CONSTCD11
-inline month_weekday_last operator/(int m, const weekday_last& wdl) NOEXCEPT {
-  return month(static_cast<unsigned>(m)) / wdl;
-}
-
-CONSTCD11
-inline month_weekday_last operator/(const weekday_last& wdl, int m) NOEXCEPT {
-  return m / wdl;
-}
-
-// year_month_day from operator/()
-
-CONSTCD11
-inline year_month_day operator/(const year_month& ym, const day& d) NOEXCEPT {
-  return {ym.year(), ym.month(), d};
-}
-
-CONSTCD11
-inline year_month_day operator/(const year_month& ym, int d) NOEXCEPT {
-  return ym / day(static_cast<unsigned>(d));
-}
-
-CONSTCD11
-inline year_month_day operator/(const year& y, const month_day& md) NOEXCEPT {
-  return y / md.month() / md.day();
-}
-
-CONSTCD11
-inline year_month_day operator/(int y, const month_day& md) NOEXCEPT {
-  return year(y) / md;
-}
-
-CONSTCD11
-inline year_month_day operator/(const month_day& md, const year& y) NOEXCEPT {
-  return y / md;
-}
-
-CONSTCD11
-inline year_month_day operator/(const month_day& md, int y) NOEXCEPT {
-  return year(y) / md;
-}
-
-// year_month_day_last from operator/()
-
-CONSTCD11
-inline year_month_day_last operator/(const year_month& ym, last_spec) NOEXCEPT {
-  return {ym.year(), month_day_last{ym.month()}};
-}
-
-CONSTCD11
-inline year_month_day_last operator/(const year& y, const month_day_last& mdl) NOEXCEPT {
-  return {y, mdl};
-}
-
-CONSTCD11
-inline year_month_day_last operator/(int y, const month_day_last& mdl) NOEXCEPT {
-  return year(y) / mdl;
-}
-
-CONSTCD11
-inline year_month_day_last operator/(const month_day_last& mdl, const year& y) NOEXCEPT {
-  return y / mdl;
-}
-
-CONSTCD11
-inline year_month_day_last operator/(const month_day_last& mdl, int y) NOEXCEPT {
-  return year(y) / mdl;
-}
-
-// year_month_weekday from operator/()
-
-CONSTCD11
-inline year_month_weekday operator/(const year_month& ym,
-                                    const weekday_indexed& wdi) NOEXCEPT {
-  return {ym.year(), ym.month(), wdi};
-}
-
-CONSTCD11
-inline year_month_weekday operator/(const year& y, const month_weekday& mwd) NOEXCEPT {
-  return {y, mwd.month(), mwd.weekday_indexed()};
-}
-
-CONSTCD11
-inline year_month_weekday operator/(int y, const month_weekday& mwd) NOEXCEPT {
-  return year(y) / mwd;
-}
-
-CONSTCD11
-inline year_month_weekday operator/(const month_weekday& mwd, const year& y) NOEXCEPT {
-  return y / mwd;
-}
-
-CONSTCD11
-inline year_month_weekday operator/(const month_weekday& mwd, int y) NOEXCEPT {
-  return year(y) / mwd;
-}
-
-// year_month_weekday_last from operator/()
-
-CONSTCD11
-inline year_month_weekday_last operator/(const year_month& ym,
-                                         const weekday_last& wdl) NOEXCEPT {
-  return {ym.year(), ym.month(), wdl};
-}
-
-CONSTCD11
-inline year_month_weekday_last operator/(const year& y,
-                                         const month_weekday_last& mwdl) NOEXCEPT {
-  return {y, mwdl.month(), mwdl.weekday_last()};
-}
-
-CONSTCD11
-inline year_month_weekday_last operator/(int y, const month_weekday_last& mwdl) NOEXCEPT {
-  return year(y) / mwdl;
-}
-
-CONSTCD11
-inline year_month_weekday_last operator/(const month_weekday_last& mwdl,
-                                         const year& y) NOEXCEPT {
-  return y / mwdl;
-}
-
-CONSTCD11
-inline year_month_weekday_last operator/(const month_weekday_last& mwdl, int y) NOEXCEPT {
-  return year(y) / mwdl;
-}
-
-template <class Duration>
-struct fields;
-
-template <class CharT, class Traits, class Duration>
-std::basic_ostream<CharT, Traits>& to_stream(
-    std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const fields<Duration>& fds,
-    const std::string* abbrev = nullptr,
-    const std::chrono::seconds* offset_sec = nullptr);
-
-template <class CharT, class Traits, class Duration, class Alloc>
-std::basic_istream<CharT, Traits>& from_stream(
-    std::basic_istream<CharT, Traits>& is, const CharT* fmt, fields<Duration>& fds,
-    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-    std::chrono::minutes* offset = nullptr);
-
-// time_of_day
-
-enum { am = 1, pm };
-
-namespace detail {
-
-// width<n>::value is the number of fractional decimal digits in 1/n
-// width<0>::value and width<1>::value are defined to be 0
-// If 1/n takes more than 18 fractional decimal digits,
-//   the result is truncated to 19.
-// Example:  width<2>::value    ==  1
-// Example:  width<3>::value    == 19
-// Example:  width<4>::value    ==  2
-// Example:  width<10>::value   ==  1
-// Example:  width<1000>::value ==  3
-template <std::uint64_t n, std::uint64_t d = 10, unsigned w = 0,
-          bool should_continue = !(n < 2) && d != 0 && (w < 19)>
-struct width {
-  static CONSTDATA unsigned value = 1 + width<n, d % n * 10, w + 1>::value;
-};
-
-template <std::uint64_t n, std::uint64_t d, unsigned w>
-struct width<n, d, w, false> {
-  static CONSTDATA unsigned value = 0;
-};
-
-template <unsigned exp>
-struct static_pow10 {
- private:
-  static CONSTDATA std::uint64_t h = static_pow10<exp / 2>::value;
-
- public:
-  static CONSTDATA std::uint64_t value = h * h * (exp % 2 ? 10 : 1);
-};
-
-template <>
-struct static_pow10<0> {
-  static CONSTDATA std::uint64_t value = 1;
-};
-
-template <class Rep, unsigned w, bool in_range = (w < 19)>
-struct make_precision {
-  using type = std::chrono::duration<Rep, std::ratio<1, static_pow10<w>::value>>;
-  static CONSTDATA unsigned width = w;
-};
-
-template <class Rep, unsigned w>
-struct make_precision<Rep, w, false> {
-  using type = std::chrono::duration<Rep, std::micro>;
-  static CONSTDATA unsigned width = 6;
-};
-
-template <class Duration, unsigned w = width<std::common_type<
-                              Duration, std::chrono::seconds>::type::period::den>::value>
-class decimal_format_seconds {
- public:
-  using rep = typename std::common_type<Duration, std::chrono::seconds>::type::rep;
-  using precision = typename make_precision<rep, w>::type;
-  static auto CONSTDATA width = make_precision<rep, w>::width;
-
- private:
-  std::chrono::seconds s_;
-  precision sub_s_;
-
- public:
-  CONSTCD11 decimal_format_seconds() : s_(), sub_s_() {}
-
-  CONSTCD11 explicit decimal_format_seconds(const Duration& d) NOEXCEPT
-      : s_(std::chrono::duration_cast<std::chrono::seconds>(d)),
-        sub_s_(std::chrono::duration_cast<precision>(d - s_)) {}
-
-  CONSTCD14 std::chrono::seconds& seconds() NOEXCEPT { return s_; }
-  CONSTCD11 std::chrono::seconds seconds() const NOEXCEPT { return s_; }
-  CONSTCD11 precision subseconds() const NOEXCEPT { return sub_s_; }
-
-  CONSTCD14 precision to_duration() const NOEXCEPT { return s_ + sub_s_; }
-
-  CONSTCD11 bool in_conventional_range() const NOEXCEPT {
-    return sub_s_ < std::chrono::seconds{1} && s_ < std::chrono::minutes{1};
-  }
-
-  template <class CharT, class Traits>
-  friend std::basic_ostream<CharT, Traits>& operator<<(
-      std::basic_ostream<CharT, Traits>& os, const decimal_format_seconds& x) {
-    date::detail::save_ostream<CharT, Traits> _(os);
-    os.fill('0');
-    os.flags(std::ios::dec | std::ios::right);
-    os.width(2);
-    os << x.s_.count()
-       << std::use_facet<std::numpunct<char>>(os.getloc()).decimal_point();
-    os.width(width);
-    os << static_cast<std::int64_t>(x.sub_s_.count());
-    return os;
-  }
-};
-
-template <class Duration>
-class decimal_format_seconds<Duration, 0> {
-  static CONSTDATA unsigned w = 0;
-
- public:
-  using rep = typename std::common_type<Duration, std::chrono::seconds>::type::rep;
-  using precision = std::chrono::duration<rep>;
-  static auto CONSTDATA width = make_precision<rep, w>::width;
-
- private:
-  std::chrono::seconds s_;
-
- public:
-  CONSTCD11 decimal_format_seconds() : s_() {}
-  CONSTCD11 explicit decimal_format_seconds(const precision& s) NOEXCEPT : s_(s) {}
-
-  CONSTCD14 std::chrono::seconds& seconds() NOEXCEPT { return s_; }
-  CONSTCD11 std::chrono::seconds seconds() const NOEXCEPT { return s_; }
-  CONSTCD14 precision to_duration() const NOEXCEPT { return s_; }
-
-  CONSTCD11 bool in_conventional_range() const NOEXCEPT {
-    return s_ < std::chrono::minutes{1};
-  }
-
-  template <class CharT, class Traits>
-  friend std::basic_ostream<CharT, Traits>& operator<<(
-      std::basic_ostream<CharT, Traits>& os, const decimal_format_seconds& x) {
-    date::detail::save_ostream<CharT, Traits> _(os);
-    os.fill('0');
-    os.flags(std::ios::dec | std::ios::right);
-    os.width(2);
-    os << x.s_.count();
-    return os;
-  }
-};
-
-enum class classify { not_valid, hour, minute, second, subsecond };
-
-template <class Duration>
-struct classify_duration {
-  static CONSTDATA classify value =
-      std::is_convertible<Duration, std::chrono::hours>::value
-          ? classify::hour
-          : std::is_convertible<Duration, std::chrono::minutes>::value
-                ? classify::minute
-                : std::is_convertible<Duration, std::chrono::seconds>::value
-                      ? classify::second
-                      : std::chrono::treat_as_floating_point<
-                            typename Duration::rep>::value
-                            ? classify::not_valid
-                            : classify::subsecond;
-};
-
-template <class Rep, class Period>
-inline CONSTCD11 typename std::enable_if<std::numeric_limits<Rep>::is_signed,
-                                         std::chrono::duration<Rep, Period>>::type
-abs(std::chrono::duration<Rep, Period> d) {
-  return d >= d.zero() ? +d : -d;
-}
-
-template <class Rep, class Period>
-inline CONSTCD11 typename std::enable_if<!std::numeric_limits<Rep>::is_signed,
-                                         std::chrono::duration<Rep, Period>>::type
-abs(std::chrono::duration<Rep, Period> d) {
-  return d;
-}
-
-class time_of_day_base {
- protected:
-  std::chrono::hours h_;
-  unsigned char mode_;
-  bool neg_;
-
-  enum { is24hr };
-
-  CONSTCD11 time_of_day_base() NOEXCEPT : h_(0),
-                                          mode_(static_cast<decltype(mode_)>(is24hr)),
-                                          neg_(false) {}
-
-  CONSTCD11 time_of_day_base(std::chrono::hours h, bool neg, unsigned m) NOEXCEPT
-      : h_(detail::abs(h)),
-        mode_(static_cast<decltype(mode_)>(m)),
-        neg_(neg) {}
-
-  CONSTCD14 void make24() NOEXCEPT;
-  CONSTCD14 void make12() NOEXCEPT;
-
-  CONSTCD14 std::chrono::hours to24hr() const;
-
-  CONSTCD11 bool in_conventional_range() const NOEXCEPT { return !neg_ && h_ < days{1}; }
-};
-
-CONSTCD14
-inline std::chrono::hours time_of_day_base::to24hr() const {
-  auto h = h_;
-  if (mode_ == am || mode_ == pm) {
-    CONSTDATA auto h12 = std::chrono::hours(12);
-    if (mode_ == pm) {
-      if (h != h12) h = h + h12;
-    } else if (h == h12) {
-      h = std::chrono::hours(0);
-    }
-  }
-  return h;
-}
-
-CONSTCD14
-inline void time_of_day_base::make24() NOEXCEPT {
-  h_ = to24hr();
-  mode_ = is24hr;
-}
-
-CONSTCD14
-inline void time_of_day_base::make12() NOEXCEPT {
-  if (mode_ == is24hr) {
-    CONSTDATA auto h12 = std::chrono::hours(12);
-    if (h_ >= h12) {
-      if (h_ > h12) h_ = h_ - h12;
-      mode_ = pm;
-    } else {
-      if (h_ == std::chrono::hours(0)) h_ = h12;
-      mode_ = am;
-    }
-  }
-}
-
-template <class Duration, detail::classify = detail::classify_duration<Duration>::value>
-class time_of_day_storage;
-
-template <class Rep, class Period>
-class time_of_day_storage<std::chrono::duration<Rep, Period>, detail::classify::hour>
-    : private detail::time_of_day_base {
-  using base = detail::time_of_day_base;
-
- public:
-  using precision = std::chrono::hours;
-
-#if !defined(_MSC_VER) || _MSC_VER >= 1900
-  CONSTCD11 time_of_day_storage() NOEXCEPT = default;
-#else
-  CONSTCD11 time_of_day_storage() = default;
-#endif /* !defined(_MSC_VER) || _MSC_VER >= 1900 */
-
-  CONSTCD11 explicit time_of_day_storage(std::chrono::hours since_midnight) NOEXCEPT
-      : base(since_midnight, since_midnight < std::chrono::hours{0}, is24hr) {}
-
-  CONSTCD11 explicit time_of_day_storage(std::chrono::hours h, unsigned md) NOEXCEPT
-      : base(h, h < std::chrono::hours{0}, md) {}
-
-  CONSTCD11 std::chrono::hours hours() const NOEXCEPT { return h_; }
-  CONSTCD11 unsigned mode() const NOEXCEPT { return mode_; }
-
-  CONSTCD14 explicit operator precision() const NOEXCEPT {
-    auto p = to24hr();
-    if (neg_) p = -p;
-    return p;
-  }
-
-  CONSTCD14 precision to_duration() const NOEXCEPT {
-    return static_cast<precision>(*this);
-  }
-
-  CONSTCD14 time_of_day_storage& make24() NOEXCEPT {
-    base::make24();
-    return *this;
-  }
-  CONSTCD14 time_of_day_storage& make12() NOEXCEPT {
-    base::make12();
-    return *this;
-  }
-
-  CONSTCD11 bool in_conventional_range() const NOEXCEPT {
-    return base::in_conventional_range();
-  }
-
-  template <class CharT, class Traits>
-  friend std::basic_ostream<CharT, Traits>& operator<<(
-      std::basic_ostream<CharT, Traits>& os, const time_of_day_storage& t) {
-    detail::save_ostream<CharT, Traits> _(os);
-    if (t.neg_) os << '-';
-    os.fill('0');
-    os.flags(std::ios::dec | std::ios::right);
-    if (t.mode_ != am && t.mode_ != pm) os.width(2);
-    os << t.h_.count();
-    switch (t.mode_) {
-      case time_of_day_storage::is24hr:
-        os << "00";
-        break;
-      case am:
-        os << "am";
-        break;
-      case pm:
-        os << "pm";
-        break;
-    }
-    return os;
-  }
-};
-
-template <class Rep, class Period>
-class time_of_day_storage<std::chrono::duration<Rep, Period>, detail::classify::minute>
-    : private detail::time_of_day_base {
-  using base = detail::time_of_day_base;
-
-  std::chrono::minutes m_;
-
- public:
-  using precision = std::chrono::minutes;
-
-  CONSTCD11 time_of_day_storage() NOEXCEPT : base(), m_(0) {}
-
-  CONSTCD11 explicit time_of_day_storage(std::chrono::minutes since_midnight) NOEXCEPT
-      : base(std::chrono::duration_cast<std::chrono::hours>(since_midnight),
-             since_midnight < std::chrono::minutes{0}, is24hr),
-        m_(detail::abs(since_midnight) - h_) {}
-
-  CONSTCD11 explicit time_of_day_storage(std::chrono::hours h, std::chrono::minutes m,
-                                         unsigned md) NOEXCEPT : base(h, false, md),
-                                                                 m_(m) {}
-
-  CONSTCD11 std::chrono::hours hours() const NOEXCEPT { return h_; }
-  CONSTCD11 std::chrono::minutes minutes() const NOEXCEPT { return m_; }
-  CONSTCD11 unsigned mode() const NOEXCEPT { return mode_; }
-
-  CONSTCD14 explicit operator precision() const NOEXCEPT {
-    auto p = to24hr() + m_;
-    if (neg_) p = -p;
-    return p;
-  }
-
-  CONSTCD14 precision to_duration() const NOEXCEPT {
-    return static_cast<precision>(*this);
-  }
-
-  CONSTCD14 time_of_day_storage& make24() NOEXCEPT {
-    base::make24();
-    return *this;
-  }
-  CONSTCD14 time_of_day_storage& make12() NOEXCEPT {
-    base::make12();
-    return *this;
-  }
-
-  CONSTCD11 bool in_conventional_range() const NOEXCEPT {
-    return base::in_conventional_range() && m_ < std::chrono::hours{1};
-  }
-
-  template <class CharT, class Traits>
-  friend std::basic_ostream<CharT, Traits>& operator<<(
-      std::basic_ostream<CharT, Traits>& os, const time_of_day_storage& t) {
-    detail::save_ostream<CharT, Traits> _(os);
-    if (t.neg_) os << '-';
-    os.fill('0');
-    os.flags(std::ios::dec | std::ios::right);
-    if (t.mode_ != am && t.mode_ != pm) os.width(2);
-    os << t.h_.count() << ':';
-    os.width(2);
-    os << t.m_.count();
-    switch (t.mode_) {
-      case am:
-        os << "am";
-        break;
-      case pm:
-        os << "pm";
-        break;
-    }
-    return os;
-  }
-};
-
-template <class Rep, class Period>
-class time_of_day_storage<std::chrono::duration<Rep, Period>, detail::classify::second>
-    : private detail::time_of_day_base {
-  using base = detail::time_of_day_base;
-  using dfs = decimal_format_seconds<std::chrono::seconds>;
-
-  std::chrono::minutes m_;
-  dfs s_;
-
- public:
-  using precision = std::chrono::seconds;
-
-  CONSTCD11 time_of_day_storage() NOEXCEPT : base(), m_(0), s_() {}
-
-  CONSTCD11 explicit time_of_day_storage(std::chrono::seconds since_midnight) NOEXCEPT
-      : base(std::chrono::duration_cast<std::chrono::hours>(since_midnight),
-             since_midnight < std::chrono::seconds{0}, is24hr),
-        m_(std::chrono::duration_cast<std::chrono::minutes>(detail::abs(since_midnight) -
-                                                            h_)),
-        s_(detail::abs(since_midnight) - h_ - m_) {}
-
-  CONSTCD11 explicit time_of_day_storage(std::chrono::hours h, std::chrono::minutes m,
-                                         std::chrono::seconds s, unsigned md) NOEXCEPT
-      : base(h, false, md),
-        m_(m),
-        s_(s) {}
-
-  CONSTCD11 std::chrono::hours hours() const NOEXCEPT { return h_; }
-  CONSTCD11 std::chrono::minutes minutes() const NOEXCEPT { return m_; }
-  CONSTCD14 std::chrono::seconds& seconds() NOEXCEPT { return s_.seconds(); }
-  CONSTCD11 std::chrono::seconds seconds() const NOEXCEPT { return s_.seconds(); }
-  CONSTCD11 unsigned mode() const NOEXCEPT { return mode_; }
-
-  CONSTCD14 explicit operator precision() const NOEXCEPT {
-    auto p = to24hr() + s_.to_duration() + m_;
-    if (neg_) p = -p;
-    return p;
-  }
-
-  CONSTCD14 precision to_duration() const NOEXCEPT {
-    return static_cast<precision>(*this);
-  }
-
-  CONSTCD14 time_of_day_storage& make24() NOEXCEPT {
-    base::make24();
-    return *this;
-  }
-  CONSTCD14 time_of_day_storage& make12() NOEXCEPT {
-    base::make12();
-    return *this;
-  }
-
-  CONSTCD11 bool in_conventional_range() const NOEXCEPT {
-    return base::in_conventional_range() && m_ < std::chrono::hours{1} &&
-           s_.in_conventional_range();
-  }
-
-  template <class CharT, class Traits>
-  friend std::basic_ostream<CharT, Traits>& operator<<(
-      std::basic_ostream<CharT, Traits>& os, const time_of_day_storage& t) {
-    detail::save_ostream<CharT, Traits> _(os);
-    if (t.neg_) os << '-';
-    os.fill('0');
-    os.flags(std::ios::dec | std::ios::right);
-    if (t.mode_ != am && t.mode_ != pm) os.width(2);
-    os << t.h_.count() << ':';
-    os.width(2);
-    os << t.m_.count() << ':' << t.s_;
-    switch (t.mode_) {
-      case am:
-        os << "am";
-        break;
-      case pm:
-        os << "pm";
-        break;
-    }
-    return os;
-  }
-
-  template <class CharT, class Traits, class Duration>
-  friend std::basic_ostream<CharT, Traits>& date::to_stream(
-      std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
-      const fields<Duration>& fds, const std::string* abbrev,
-      const std::chrono::seconds* offset_sec);
-
-  template <class CharT, class Traits, class Duration, class Alloc>
-  friend std::basic_istream<CharT, Traits>& date::from_stream(
-      std::basic_istream<CharT, Traits>& is, const CharT* fmt, fields<Duration>& fds,
-      std::basic_string<CharT, Traits, Alloc>* abbrev, std::chrono::minutes* offset);
-};
-
-template <class Rep, class Period>
-class time_of_day_storage<std::chrono::duration<Rep, Period>, detail::classify::subsecond>
-    : private detail::time_of_day_base {
- public:
-  using Duration = std::chrono::duration<Rep, Period>;
-  using dfs = decimal_format_seconds<
-      typename std::common_type<Duration, std::chrono::seconds>::type>;
-  using precision = typename dfs::precision;
-
- private:
-  using base = detail::time_of_day_base;
-
-  std::chrono::minutes m_;
-  dfs s_;
-
- public:
-  CONSTCD11 time_of_day_storage() NOEXCEPT : base(), m_(0), s_() {}
-
-  CONSTCD11 explicit time_of_day_storage(Duration since_midnight) NOEXCEPT
-      : base(date::trunc<std::chrono::hours>(since_midnight),
-             since_midnight < Duration{0}, is24hr),
-        m_(date::trunc<std::chrono::minutes>(detail::abs(since_midnight) - h_)),
-        s_(detail::abs(since_midnight) - h_ - m_) {}
-
-  CONSTCD11 explicit time_of_day_storage(std::chrono::hours h, std::chrono::minutes m,
-                                         std::chrono::seconds s, precision sub_s,
-                                         unsigned md) NOEXCEPT : base(h, false, md),
-                                                                 m_(m),
-                                                                 s_(s + sub_s) {}
-
-  CONSTCD11 std::chrono::hours hours() const NOEXCEPT { return h_; }
-  CONSTCD11 std::chrono::minutes minutes() const NOEXCEPT { return m_; }
-  CONSTCD14 std::chrono::seconds& seconds() NOEXCEPT { return s_.seconds(); }
-  CONSTCD11 std::chrono::seconds seconds() const NOEXCEPT { return s_.seconds(); }
-  CONSTCD11 precision subseconds() const NOEXCEPT { return s_.subseconds(); }
-  CONSTCD11 unsigned mode() const NOEXCEPT { return mode_; }
-
-  CONSTCD14 explicit operator precision() const NOEXCEPT {
-    auto p = to24hr() + s_.to_duration() + m_;
-    if (neg_) p = -p;
-    return p;
-  }
-
-  CONSTCD14 precision to_duration() const NOEXCEPT {
-    return static_cast<precision>(*this);
-  }
-
-  CONSTCD14 time_of_day_storage& make24() NOEXCEPT {
-    base::make24();
-    return *this;
-  }
-  CONSTCD14 time_of_day_storage& make12() NOEXCEPT {
-    base::make12();
-    return *this;
-  }
-
-  CONSTCD11 bool in_conventional_range() const NOEXCEPT {
-    return base::in_conventional_range() && m_ < std::chrono::hours{1} &&
-           s_.in_conventional_range();
-  }
-
-  template <class CharT, class Traits>
-  friend std::basic_ostream<CharT, Traits>& operator<<(
-      std::basic_ostream<CharT, Traits>& os, const time_of_day_storage& t) {
-    detail::save_ostream<CharT, Traits> _(os);
-    if (t.neg_) os << '-';
-    os.fill('0');
-    os.flags(std::ios::dec | std::ios::right);
-    if (t.mode_ != am && t.mode_ != pm) os.width(2);
-    os << t.h_.count() << ':';
-    os.width(2);
-    os << t.m_.count() << ':' << t.s_;
-    switch (t.mode_) {
-      case am:
-        os << "am";
-        break;
-      case pm:
-        os << "pm";
-        break;
-    }
-    return os;
-  }
-
-  template <class CharT, class Traits, class Duration>
-  friend std::basic_ostream<CharT, Traits>& date::to_stream(
-      std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
-      const fields<Duration>& fds, const std::string* abbrev,
-      const std::chrono::seconds* offset_sec);
-
-  template <class CharT, class Traits, class Duration, class Alloc>
-  friend std::basic_istream<CharT, Traits>& date::from_stream(
-      std::basic_istream<CharT, Traits>& is, const CharT* fmt, fields<Duration>& fds,
-      std::basic_string<CharT, Traits, Alloc>* abbrev, std::chrono::minutes* offset);
-};
-
-}  // namespace detail
-
-template <class Duration>
-class time_of_day : public detail::time_of_day_storage<Duration> {
-  using base = detail::time_of_day_storage<Duration>;
-
- public:
-#if !defined(_MSC_VER) || _MSC_VER >= 1900
-  CONSTCD11 time_of_day() NOEXCEPT = default;
-#else
-  CONSTCD11 time_of_day() = default;
-#endif /* !defined(_MSC_VER) || _MSC_VER >= 1900 */
-
-  CONSTCD11 explicit time_of_day(Duration since_midnight) NOEXCEPT
-      : base(since_midnight) {}
-
-  template <class Arg0, class Arg1, class... Args>
-  CONSTCD11 explicit time_of_day(Arg0&& arg0, Arg1&& arg1, Args&&... args) NOEXCEPT
-      : base(std::forward<Arg0>(arg0), std::forward<Arg1>(arg1),
-             std::forward<Args>(args)...) {}
-};
-
-template <class Rep, class Period,
-          class = typename std::enable_if<
-              !std::chrono::treat_as_floating_point<Rep>::value>::type>
-CONSTCD11 inline time_of_day<std::chrono::duration<Rep, Period>> make_time(
-    const std::chrono::duration<Rep, Period>& d) {
-  return time_of_day<std::chrono::duration<Rep, Period>>(d);
-}
-
-CONSTCD11
-inline time_of_day<std::chrono::hours> make_time(const std::chrono::hours& h,
-                                                 unsigned md) {
-  return time_of_day<std::chrono::hours>(h, md);
-}
-
-CONSTCD11
-inline time_of_day<std::chrono::minutes> make_time(const std::chrono::hours& h,
-                                                   const std::chrono::minutes& m,
-                                                   unsigned md) {
-  return time_of_day<std::chrono::minutes>(h, m, md);
-}
-
-CONSTCD11
-inline time_of_day<std::chrono::seconds> make_time(const std::chrono::hours& h,
-                                                   const std::chrono::minutes& m,
-                                                   const std::chrono::seconds& s,
-                                                   unsigned md) {
-  return time_of_day<std::chrono::seconds>(h, m, s, md);
-}
-
-template <
-    class Rep, class Period,
-    class = typename std::enable_if<std::ratio_less<Period, std::ratio<1>>::value>::type>
-CONSTCD11 inline time_of_day<std::chrono::duration<Rep, Period>> make_time(
-    const std::chrono::hours& h, const std::chrono::minutes& m,
-    const std::chrono::seconds& s, const std::chrono::duration<Rep, Period>& sub_s,
-    unsigned md) {
-  return time_of_day<std::chrono::duration<Rep, Period>>(h, m, s, sub_s, md);
-}
-
-template <class CharT, class Traits, class Duration>
-inline typename std::enable_if<
-    !std::chrono::treat_as_floating_point<typename Duration::rep>::value &&
-        std::ratio_less<typename Duration::period, days::period>::value,
-    std::basic_ostream<CharT, Traits>&>::type
-operator<<(std::basic_ostream<CharT, Traits>& os, const sys_time<Duration>& tp) {
-  auto const dp = date::floor<days>(tp);
-  return os << year_month_day(dp) << ' ' << make_time(tp - dp);
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& operator<<(
-    std::basic_ostream<CharT, Traits>& os, const sys_days& dp) {
-  return os << year_month_day(dp);
-}
-
-template <class CharT, class Traits, class Duration>
-inline std::basic_ostream<CharT, Traits>& operator<<(
-    std::basic_ostream<CharT, Traits>& os, const local_time<Duration>& ut) {
-  return (os << sys_time<Duration>{ut.time_since_epoch()});
-}
-
-// to_stream
-
-CONSTDATA year nanyear{-32768};
-
-template <class Duration>
-struct fields {
-  year_month_day ymd{nanyear / 0 / 0};
-  weekday wd{8u};
-  time_of_day<Duration> tod{};
-  bool has_tod = false;
-
-  fields() = default;
-
-  explicit fields(year_month_day ymd_) : ymd(ymd_) {}
-  explicit fields(weekday wd_) : wd(wd_) {}
-  explicit fields(time_of_day<Duration> tod_) : tod(tod_), has_tod(true) {}
-
-  fields(year_month_day ymd_, weekday wd_) : ymd(ymd_), wd(wd_) {}
-  fields(year_month_day ymd_, time_of_day<Duration> tod_)
-      : ymd(ymd_), tod(tod_), has_tod(true) {}
-
-  fields(weekday wd_, time_of_day<Duration> tod_) : wd(wd_), tod(tod_), has_tod(true) {}
-
-  fields(year_month_day ymd_, weekday wd_, time_of_day<Duration> tod_)
-      : ymd(ymd_), wd(wd_), tod(tod_), has_tod(true) {}
-};
-
-namespace detail {
-
-template <class CharT, class Traits, class Duration>
-unsigned extract_weekday(std::basic_ostream<CharT, Traits>& os,
-                         const fields<Duration>& fds) {
-  if (!fds.ymd.ok() && !fds.wd.ok()) {
-    // fds does not contain a valid weekday
-    os.setstate(std::ios::failbit);
-    return 8;
-  }
-  weekday wd;
-  if (fds.ymd.ok()) {
-    wd = weekday{sys_days(fds.ymd)};
-    if (fds.wd.ok() && wd != fds.wd) {
-      // fds.ymd and fds.wd are inconsistent
-      os.setstate(std::ios::failbit);
-      return 8;
-    }
-  } else {
-    wd = fds.wd;
-  }
-  return static_cast<unsigned>((wd - Sunday).count());
-}
-
-template <class CharT, class Traits, class Duration>
-unsigned extract_month(std::basic_ostream<CharT, Traits>& os,
-                       const fields<Duration>& fds) {
-  if (!fds.ymd.month().ok()) {
-    // fds does not contain a valid month
-    os.setstate(std::ios::failbit);
-    return 0;
-  }
-  return static_cast<unsigned>(fds.ymd.month());
-}
-
-}  // namespace detail
-
-#if ONLY_C_LOCALE
-
-namespace detail {
-
-inline std::pair<const std::string*, const std::string*> weekday_names() {
-  static const string nm[] = {"Sunday", "Monday",   "Tuesday", "Wednesday", "Thursday",
-                              "Friday", "Saturday", "Sun",     "Mon",       "Tue",
-                              "Wed",    "Thu",      "Fri",     "Sat"};
-  return make_pair(nm, nm + sizeof(nm) / sizeof(nm[0]));
-}
-
-inline std::pair<const std::string*, const std::string*> month_names() {
-  static const string nm[] = {
-      "January",   "February", "March",    "April",    "May", "June", "July", "August",
-      "September", "October",  "November", "December", "Jan", "Feb",  "Mar",  "Apr",
-      "May",       "Jun",      "Jul",      "Aug",      "Sep", "Oct",  "Nov",  "Dec"};
-  return make_pair(nm, nm + sizeof(nm) / sizeof(nm[0]));
-}
-
-inline std::pair<const std::string*, const std::string*> ampm_names() {
-  static const string nm[] = {"AM", "PM"};
-  return make_pair(nm, nm + sizeof(nm) / sizeof(nm[0]));
-}
-
-template <class CharT, class Traits, class FwdIter>
-FwdIter scan_keyword(std::basic_istream<CharT, Traits>& is, FwdIter kb, FwdIter ke) {
-  size_t nkw = static_cast<size_t>(std::distance(kb, ke));
-  const unsigned char doesnt_match = '\0';
-  const unsigned char might_match = '\1';
-  const unsigned char does_match = '\2';
-  unsigned char statbuf[100];
-  unsigned char* status = statbuf;
-  unique_ptr<unsigned char, void (*)(void*)> stat_hold(0, free);
-  if (nkw > sizeof(statbuf)) {
-    status = (unsigned char*)malloc(nkw);
-    if (status == nullptr) throw bad_alloc();
-    stat_hold.reset(status);
-  }
-  size_t n_might_match = nkw;  // At this point, any keyword might match
-  size_t n_does_match = 0;     // but none of them definitely do
-  // Initialize all statuses to might_match, except for "" keywords are does_match
-  unsigned char* st = status;
-  for (auto ky = kb; ky != ke; ++ky, ++st) {
-    if (!ky->empty()) {
-      *st = might_match;
-    } else {
-      *st = does_match;
-      --n_might_match;
-      ++n_does_match;
-    }
-  }
-  // While there might be a match, test keywords against the next CharT
-  for (size_t indx = 0; is && n_might_match > 0; ++indx) {
-    // Peek at the next CharT but don't consume it
-    auto ic = is.peek();
-    if (ic == EOF) {
-      is.setstate(ios::eofbit);
-      break;
-    }
-    auto c = static_cast<char>(toupper(ic));
-    bool consume = false;
-    // For each keyword which might match, see if the indx character is c
-    // If a match if found, consume c
-    // If a match is found, and that is the last character in the keyword,
-    //    then that keyword matches.
-    // If the keyword doesn't match this character, then change the keyword
-    //    to doesn't match
-    st = status;
-    for (auto ky = kb; ky != ke; ++ky, ++st) {
-      if (*st == might_match) {
-        if (c == static_cast<char>(toupper((*ky)[indx]))) {
-          consume = true;
-          if (ky->size() == indx + 1) {
-            *st = does_match;
-            --n_might_match;
-            ++n_does_match;
-          }
-        } else {
-          *st = doesnt_match;
-          --n_might_match;
-        }
-      }
-    }
-    // consume if we matched a character
-    if (consume) {
-      (void)is.get();
-      // If we consumed a character and there might be a matched keyword that
-      //   was marked matched on a previous iteration, then such keywords
-      //   are now marked as not matching.
-      if (n_might_match + n_does_match > 1) {
-        st = status;
-        for (auto ky = kb; ky != ke; ++ky, ++st) {
-          if (*st == does_match && ky->size() != indx + 1) {
-            *st = doesnt_match;
-            --n_does_match;
-          }
-        }
-      }
-    }
-  }
-  // We've exited the loop because we hit eof and/or we have no more "might matches".
-  // Return the first matching result
-  for (st = status; kb != ke; ++kb, ++st)
-    if (*st == does_match) break;
-  if (kb == ke) is.setstate(ios_base::failbit);
-  return kb;
-}
-
-}  // namespace detail
-
-#endif  // ONLY_C_LOCALE
-
-template <class CharT, class Traits, class Duration>
-std::basic_ostream<CharT, Traits>& to_stream(std::basic_ostream<CharT, Traits>& os,
-                                             const CharT* fmt,
-                                             const fields<Duration>& fds,
-                                             const std::string* abbrev,
-                                             const std::chrono::seconds* offset_sec) {
-  using detail::save_ostream;
-  using std::ios;
-  using std::time_put;
-  using std::use_facet;
-  using std::chrono::duration;
-  using std::chrono::duration_cast;
-  using std::chrono::hours;
-  using std::chrono::minutes;
-  date::detail::save_ostream<CharT, Traits> ss(os);
-  os.fill(' ');
-  os.flags(std::ios::skipws | std::ios::dec);
-  os.width(0);
-  tm tm{};
-  bool insert_negative = fds.has_tod && fds.tod.to_duration() < Duration::zero();
-#if !ONLY_C_LOCALE
-  auto& facet = use_facet<time_put<CharT>>(os.getloc());
-#endif
-  const CharT* command = nullptr;
-  CharT modified = CharT{};
-  for (; *fmt; ++fmt) {
-    switch (*fmt) {
-      case 'a':
-      case 'A':
-        if (command) {
-          if (modified == CharT{}) {
-            tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
-            if (os.fail()) return os;
-#if !ONLY_C_LOCALE
-            const CharT f[] = {'%', *fmt};
-            facet.put(os, os, os.fill(), &tm, begin(f), end(f));
-#else   // ONLY_C_LOCALE
-            os << weekday_names().first[tm.tm_wday + 7 * (*fmt == 'a')];
-#endif  // ONLY_C_LOCALE
-          } else {
-            os << CharT{'%'} << modified << *fmt;
-            modified = CharT{};
-          }
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'b':
-      case 'B':
-      case 'h':
-        if (command) {
-          if (modified == CharT{}) {
-            tm.tm_mon = static_cast<int>(extract_month(os, fds)) - 1;
-#if !ONLY_C_LOCALE
-            const CharT f[] = {'%', *fmt};
-            facet.put(os, os, os.fill(), &tm, begin(f), end(f));
-#else   // ONLY_C_LOCALE
-            os << month_names().first[tm.tm_mon + 12 * (*fmt != 'B')];
-#endif  // ONLY_C_LOCALE
-          } else {
-            os << CharT{'%'} << modified << *fmt;
-            modified = CharT{};
-          }
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'c':
-      case 'x':
-        if (command) {
-          if (modified == CharT{'O'}) {
-            os << CharT{'%'} << modified << *fmt;
-          } else {
-            if (!fds.ymd.ok()) os.setstate(std::ios::failbit);
-            if (*fmt == 'c' && !fds.has_tod) os.setstate(std::ios::failbit);
-#if !ONLY_C_LOCALE
-            tm = std::tm{};
-            auto const& ymd = fds.ymd;
-            auto ld = local_days(ymd);
-            if (*fmt == 'c') {
-              tm.tm_sec = static_cast<int>(fds.tod.seconds().count());
-              tm.tm_min = static_cast<int>(fds.tod.minutes().count());
-              tm.tm_hour = static_cast<int>(fds.tod.hours().count());
-            }
-            tm.tm_mday = static_cast<int>(static_cast<unsigned>(ymd.day()));
-            tm.tm_mon = static_cast<int>(extract_month(os, fds) - 1);
-            tm.tm_year = static_cast<int>(ymd.year()) - 1900;
-            tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
-            if (os.fail()) return os;
-            tm.tm_yday = static_cast<int>((ld - local_days(ymd.year() / 1 / 1)).count());
-            CharT f[3] = {'%'};
-            auto fe = begin(f) + 1;
-            if (modified == CharT{'E'}) *fe++ = modified;
-            *fe++ = *fmt;
-            facet.put(os, os, os.fill(), &tm, begin(f), fe);
-#else   // ONLY_C_LOCALE
-            if (*fmt == 'c') {
-              auto wd = static_cast<int>(extract_weekday(os, fds));
-              os << weekday_names().first[static_cast<unsigned>(wd) + 7] << ' ';
-              os << month_names().first[extract_month(os, fds) - 1 + 12] << ' ';
-              auto d = static_cast<int>(static_cast<unsigned>(fds.ymd.day()));
-              if (d < 10) {
-                os << ' ';
-              }
-              os << d << ' ' << make_time(duration_cast<seconds>(fds.tod.to_duration()))
-                 << ' ' << fds.ymd.year();
-            } else {  // *fmt == 'x'
-              auto const& ymd = fds.ymd;
-              save_ostream<CharT, Traits> _(os);
-              os.fill('0');
-              os.flags(std::ios::dec | std::ios::right);
-              os.width(2);
-              os << static_cast<unsigned>(ymd.month()) << CharT{'/'};
-              os.width(2);
-              os << static_cast<unsigned>(ymd.day()) << CharT{'/'};
-              os.width(2);
-              os << static_cast<int>(ymd.year()) % 100;
-            }
-#endif  // ONLY_C_LOCALE
-          }
-          command = nullptr;
-          modified = CharT{};
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'C':
-        if (command) {
-          if (modified == CharT{'O'}) {
-            os << CharT{'%'} << modified << *fmt;
-          } else {
-            if (!fds.ymd.year().ok()) os.setstate(std::ios::failbit);
-            auto y = static_cast<int>(fds.ymd.year());
-#if !ONLY_C_LOCALE
-            if (modified == CharT{})
-#endif
-            {
-              save_ostream<CharT, Traits> _(os);
-              os.fill('0');
-              os.flags(std::ios::dec | std::ios::right);
-              if (y >= 0) {
-                os.width(2);
-                os << y / 100;
-              } else {
-                os << CharT{'-'};
-                os.width(2);
-                os << -(y - 99) / 100;
-              }
-            }
-#if !ONLY_C_LOCALE
-            else if (modified == CharT{'E'}) {
-              tm.tm_year = y - 1900;
-              CharT f[3] = {'%', 'E', 'C'};
-              facet.put(os, os, os.fill(), &tm, begin(f), end(f));
-            }
-#endif
-          }
-          command = nullptr;
-          modified = CharT{};
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'd':
-      case 'e':
-        if (command) {
-          if (modified == CharT{'E'}) {
-            os << CharT{'%'} << modified << *fmt;
-          } else {
-            if (!fds.ymd.day().ok()) os.setstate(std::ios::failbit);
-            auto d = static_cast<int>(static_cast<unsigned>(fds.ymd.day()));
-#if !ONLY_C_LOCALE
-            if (modified == CharT{})
-#endif
-            {
-              save_ostream<CharT, Traits> _(os);
-              if (*fmt == CharT{'d'}) {
-                os.fill('0');
-              } else {
-                os.fill(' ');
-              }
-              os.flags(std::ios::dec | std::ios::right);
-              os.width(2);
-              os << d;
-            }
-#if !ONLY_C_LOCALE
-            else if (modified == CharT{'O'}) {
-              tm.tm_mday = d;
-              CharT f[3] = {'%', 'O', *fmt};
-              facet.put(os, os, os.fill(), &tm, begin(f), end(f));
-            }
-#endif
-          }
-          command = nullptr;
-          modified = CharT{};
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'D':
-        if (command) {
-          if (modified == CharT{}) {
-            if (!fds.ymd.ok()) {
-              os.setstate(std::ios::failbit);
-            }
-            auto const& ymd = fds.ymd;
-            save_ostream<CharT, Traits> _(os);
-            os.fill('0');
-            os.flags(std::ios::dec | std::ios::right);
-            os.width(2);
-            os << static_cast<unsigned>(ymd.month()) << CharT{'/'};
-            os.width(2);
-            os << static_cast<unsigned>(ymd.day()) << CharT{'/'};
-            os.width(2);
-            os << static_cast<int>(ymd.year()) % 100;
-          } else {
-            os << CharT{'%'} << modified << *fmt;
-            modified = CharT{};
-          }
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'F':
-        if (command) {
-          if (modified == CharT{}) {
-            if (!fds.ymd.ok()) {
-              os.setstate(std::ios::failbit);
-            }
-            auto const& ymd = fds.ymd;
-            save_ostream<CharT, Traits> _(os);
-            os.fill('0');
-            os.flags(std::ios::dec | std::ios::right);
-            os.width(4);
-            os << static_cast<int>(ymd.year()) << CharT{'-'};
-            os.width(2);
-            os << static_cast<unsigned>(ymd.month()) << CharT{'-'};
-            os.width(2);
-            os << static_cast<unsigned>(ymd.day());
-          } else {
-            os << CharT{'%'} << modified << *fmt;
-            modified = CharT{};
-          }
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'g':
-      case 'G':
-        if (command) {
-          if (modified == CharT{}) {
-            if (!fds.ymd.ok()) os.setstate(std::ios::failbit);
-            auto ld = local_days(fds.ymd);
-            auto y = year_month_day{ld + days{3}}.year();
-            auto start = local_days((y - years{1}) / December / Thursday[last]) +
-                         (Monday - Thursday);
-            if (ld < start) {
-              --y;
-            }
-            if (*fmt == CharT{'G'}) {
-              os << y;
-            } else {
-              save_ostream<CharT, Traits> _(os);
-              os.fill('0');
-              os.flags(std::ios::dec | std::ios::right);
-              os.width(2);
-              os << std::abs(static_cast<int>(y)) % 100;
-            }
-          } else {
-            os << CharT{'%'} << modified << *fmt;
-            modified = CharT{};
-          }
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'H':
-      case 'I':
-        if (command) {
-          if (modified == CharT{'E'}) {
-            os << CharT{'%'} << modified << *fmt;
-          } else {
-            if (!fds.has_tod) {
-              os.setstate(std::ios::failbit);
-            }
-            if (insert_negative) {
-              os << '-';
-              insert_negative = false;
-            }
-            auto hms = fds.tod;
-#if !ONLY_C_LOCALE
-            if (modified == CharT{})
-#endif
-            {
-              if (*fmt == CharT{'I'}) hms.make12();
-              if (hms.hours() < hours{10}) os << CharT{'0'};
-              os << hms.hours().count();
-            }
-#if !ONLY_C_LOCALE
-            else if (modified == CharT{'O'}) {
-              const CharT f[] = {'%', modified, *fmt};
-              tm.tm_hour = static_cast<int>(hms.hours().count());
-              facet.put(os, os, os.fill(), &tm, begin(f), end(f));
-            }
-#endif
-          }
-          modified = CharT{};
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'j':
-        if (command) {
-          if (modified == CharT{}) {
-            if (!fds.ymd.ok()) {
-              os.setstate(std::ios::failbit);
-            }
-            auto ld = local_days(fds.ymd);
-            auto y = fds.ymd.year();
-            auto doy = ld - local_days(y / January / 1) + days{1};
-            save_ostream<CharT, Traits> _(os);
-            os.fill('0');
-            os.flags(std::ios::dec | std::ios::right);
-            os.width(3);
-            os << doy.count();
-          } else {
-            os << CharT{'%'} << modified << *fmt;
-            modified = CharT{};
-          }
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'm':
-        if (command) {
-          if (modified == CharT{'E'}) {
-            os << CharT{'%'} << modified << *fmt;
-          } else {
-            if (!fds.ymd.month().ok()) os.setstate(std::ios::failbit);
-            auto m = static_cast<unsigned>(fds.ymd.month());
-#if !ONLY_C_LOCALE
-            if (modified == CharT{})
-#endif
-            {
-              if (m < 10) os << CharT{'0'};
-              os << m;
-            }
-#if !ONLY_C_LOCALE
-            else if (modified == CharT{'O'}) {
-              const CharT f[] = {'%', modified, *fmt};
-              tm.tm_mon = static_cast<int>(m - 1);
-              facet.put(os, os, os.fill(), &tm, begin(f), end(f));
-            }
-#endif
-          }
-          modified = CharT{};
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'M':
-        if (command) {
-          if (modified == CharT{'E'}) {
-            os << CharT{'%'} << modified << *fmt;
-          } else {
-            if (!fds.has_tod) os.setstate(std::ios::failbit);
-            if (insert_negative) {
-              os << '-';
-              insert_negative = false;
-            }
-#if !ONLY_C_LOCALE
-            if (modified == CharT{})
-#endif
-            {
-              if (fds.tod.minutes() < minutes{10}) os << CharT{'0'};
-              os << fds.tod.minutes().count();
-            }
-#if !ONLY_C_LOCALE
-            else if (modified == CharT{'O'}) {
-              const CharT f[] = {'%', modified, *fmt};
-              tm.tm_min = static_cast<int>(fds.tod.minutes().count());
-              facet.put(os, os, os.fill(), &tm, begin(f), end(f));
-            }
-#endif
-          }
-          modified = CharT{};
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'n':
-        if (command) {
-          if (modified == CharT{})
-            os << CharT{'\n'};
-          else {
-            os << CharT{'%'} << modified << *fmt;
-            modified = CharT{};
-          }
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'p':
-        if (command) {
-          if (modified == CharT{}) {
-            if (!fds.has_tod) os.setstate(std::ios::failbit);
-#if !ONLY_C_LOCALE
-            const CharT f[] = {'%', *fmt};
-            tm.tm_hour = static_cast<int>(fds.tod.hours().count());
-            facet.put(os, os, os.fill(), &tm, begin(f), end(f));
-#else
-            if (fds.tod.hours() < hours{12}) {
-              os << ampm_names().first[0];
-            } else {
-              os << ampm_names().first[1];
-            }
-#endif
-          } else {
-            os << CharT{'%'} << modified << *fmt;
-          }
-          modified = CharT{};
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'r':
-        if (command) {
-          if (modified == CharT{}) {
-            if (!fds.has_tod) os.setstate(std::ios::failbit);
-#if !ONLY_C_LOCALE
-            const CharT f[] = {'%', *fmt};
-            tm.tm_hour = static_cast<int>(fds.tod.hours().count());
-            tm.tm_min = static_cast<int>(fds.tod.minutes().count());
-            tm.tm_sec = static_cast<int>(fds.tod.seconds().count());
-            facet.put(os, os, os.fill(), &tm, begin(f), end(f));
-#else
-            time_of_day<seconds> tod(duration_cast<seconds>(fds.tod.to_duration()));
-            tod.make12();
-            save_ostream<CharT, Traits> _(os);
-            os.fill('0');
-            os.width(2);
-            os << tod.hours().count() << CharT{':'};
-            os.width(2);
-            os << tod.minutes().count() << CharT{':'};
-            os.width(2);
-            os << tod.seconds().count() << CharT{' '};
-            tod.make24();
-            if (tod.hours() < hours{12}) {
-              os << ampm_names().first[0];
-            } else {
-              os << ampm_names().first[1];
-            }
-#endif
-          } else {
-            os << CharT{'%'} << modified << *fmt;
-          }
-          modified = CharT{};
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'R':
-        if (command) {
-          if (modified == CharT{}) {
-            if (!fds.has_tod) {
-              os.setstate(std::ios::failbit);
-            }
-            if (fds.tod.hours() < hours{10}) {
-              os << CharT{'0'};
-            }
-            os << fds.tod.hours().count() << CharT{':'};
-            if (fds.tod.minutes() < minutes{10}) {
-              os << CharT{'0'};
-            }
-            os << fds.tod.minutes().count();
-          } else {
-            os << CharT{'%'} << modified << *fmt;
-            modified = CharT{};
-          }
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'S':
-        if (command) {
-          if (modified == CharT{'E'}) {
-            os << CharT{'%'} << modified << *fmt;
-          } else {
-            if (!fds.has_tod) os.setstate(std::ios::failbit);
-            if (insert_negative) {
-              os << '-';
-              insert_negative = false;
-            }
-#if !ONLY_C_LOCALE
-            if (modified == CharT{})
-#endif
-            {
-              os << fds.tod.s_;
-            }
-#if !ONLY_C_LOCALE
-            else if (modified == CharT{'O'}) {
-              const CharT f[] = {'%', modified, *fmt};
-              tm.tm_sec = static_cast<int>(fds.tod.s_.seconds().count());
-              facet.put(os, os, os.fill(), &tm, begin(f), end(f));
-            }
-#endif
-          }
-          modified = CharT{};
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 't':
-        if (command) {
-          if (modified == CharT{}) {
-            os << CharT{'\t'};
-          } else {
-            os << CharT{'%'} << modified << *fmt;
-            modified = CharT{};
-          }
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'T':
-        if (command) {
-          if (modified == CharT{}) {
-            if (!fds.has_tod) os.setstate(std::ios::failbit);
-            os << fds.tod;
-          } else {
-            os << CharT{'%'} << modified << *fmt;
-            modified = CharT{};
-          }
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'u':
-        if (command) {
-          if (modified == CharT{'E'}) {
-            os << CharT{'%'} << modified << *fmt;
-          } else {
-            auto wd = extract_weekday(os, fds);
-#if !ONLY_C_LOCALE
-            if (modified == CharT{})
-#endif
-            {
-              os << (wd != 0 ? wd : 7u);
-            }
-#if !ONLY_C_LOCALE
-            else if (modified == CharT{'O'}) {
-              const CharT f[] = {'%', modified, *fmt};
-              tm.tm_wday = static_cast<int>(wd);
-              facet.put(os, os, os.fill(), &tm, begin(f), end(f));
-            }
-#endif
-          }
-          modified = CharT{};
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'U':
-        if (command) {
-          if (modified == CharT{'E'}) {
-            os << CharT{'%'} << modified << *fmt;
-          } else {
-            auto const& ymd = fds.ymd;
-            if (!ymd.ok()) os.setstate(std::ios::failbit);
-            auto ld = local_days(ymd);
-#if !ONLY_C_LOCALE
-            if (modified == CharT{})
-#endif
-            {
-              auto st = local_days(Sunday[1] / January / ymd.year());
-              if (ld < st)
-                os << CharT{'0'} << CharT{'0'};
-              else {
-                auto wn = duration_cast<weeks>(ld - st).count() + 1;
-                if (wn < 10) os << CharT{'0'};
-                os << wn;
-              }
-            }
-#if !ONLY_C_LOCALE
-            else if (modified == CharT{'O'}) {
-              const CharT f[] = {'%', modified, *fmt};
-              tm.tm_year = static_cast<int>(ymd.year()) - 1900;
-              tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
-              if (os.fail()) return os;
-              tm.tm_yday =
-                  static_cast<int>((ld - local_days(ymd.year() / 1 / 1)).count());
-              facet.put(os, os, os.fill(), &tm, begin(f), end(f));
-            }
-#endif
-          }
-          modified = CharT{};
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'V':
-        if (command) {
-          if (modified == CharT{'E'}) {
-            os << CharT{'%'} << modified << *fmt;
-          } else {
-            if (!fds.ymd.ok()) os.setstate(std::ios::failbit);
-            auto ld = local_days(fds.ymd);
-#if !ONLY_C_LOCALE
-            if (modified == CharT{})
-#endif
-            {
-              auto y = year_month_day{ld + days{3}}.year();
-              auto st =
-                  local_days((y - years{1}) / 12 / Thursday[last]) + (Monday - Thursday);
-              if (ld < st) {
-                --y;
-                st = local_days((y - years{1}) / 12 / Thursday[last]) +
-                     (Monday - Thursday);
-              }
-              auto wn = duration_cast<weeks>(ld - st).count() + 1;
-              if (wn < 10) os << CharT{'0'};
-              os << wn;
-            }
-#if !ONLY_C_LOCALE
-            else if (modified == CharT{'O'}) {
-              const CharT f[] = {'%', modified, *fmt};
-              auto const& ymd = fds.ymd;
-              tm.tm_year = static_cast<int>(ymd.year()) - 1900;
-              tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
-              if (os.fail()) return os;
-              tm.tm_yday =
-                  static_cast<int>((ld - local_days(ymd.year() / 1 / 1)).count());
-              facet.put(os, os, os.fill(), &tm, begin(f), end(f));
-            }
-#endif
-          }
-          modified = CharT{};
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'w':
-        if (command) {
-          auto wd = extract_weekday(os, fds);
-          if (os.fail()) return os;
-#if !ONLY_C_LOCALE
-          if (modified == CharT{})
-#else
-          if (modified != CharT{'E'})
-#endif
-          {
-            os << wd;
-          }
-#if !ONLY_C_LOCALE
-          else if (modified == CharT{'O'}) {
-            const CharT f[] = {'%', modified, *fmt};
-            tm.tm_wday = static_cast<int>(wd);
-            facet.put(os, os, os.fill(), &tm, begin(f), end(f));
-          }
-#endif
-          else {
-            os << CharT{'%'} << modified << *fmt;
-          }
-          modified = CharT{};
-          command = nullptr;
-        } else {
-          os << *fmt;
-        }
-        break;
-      case 'W':
-        if (command) {
-          if (modified == CharT{'E'}) {
-            os << CharT{'%'} << modified << *fmt;
-          } else {
-            auto const& ymd = fds.ymd;
-            if (!ymd.ok()) os.setstate(std::ios::failbit);
-            auto ld = local_days(ymd);
-#if !ONLY_C_LOCALE
-            if (modified == CharT{})
-#endif
-            {
-              auto st = local_days(Monday[1] / January / ymd.year());
-              if (ld < st)
-                os << CharT{'0'} << CharT{'0'};
-              else {
-                auto wn = duration_cast<weeks>(ld - st).count() + 1;
-                if (wn < 10) os << CharT{'0'};
-                os << wn;
-              }
-            }
-#if !ONLY_C_LOCALE
-            else if (modified == CharT{'O'}) {
-              const CharT f[] = {'%', modified, *fmt};
-              tm.tm_year = static_cast<int>(ymd.year()) - 1900;
-              tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
-              if (os.fail()) return os;
-              tm.tm_yday =
-                  static_cast<int>((ld - local_days(ymd.year() / 1 / 1)).count());
-              facet.put(os, os, os.fill(), &tm, begin(f), end(f));
-            }
-#endif
-          }
-          modified = CharT{};
-          command = nullptr;
-        } else
-          os << *fmt;
-        break;
-      case 'X':
-        if (command) {
-          if (modified == CharT{'O'})
-            os << CharT{'%'} << modified << *fmt;
-          else {
-            if (!fds.has_tod) os.setstate(std::ios::failbit);
-#if !ONLY_C_LOCALE
-            tm = std::tm{};
-            tm.tm_sec = static_cast<int>(fds.tod.seconds().count());
-            tm.tm_min = static_cast<int>(fds.tod.minutes().count());
-            tm.tm_hour = static_cast<int>(fds.tod.hours().count());
-            CharT f[3] = {'%'};
-            auto fe = begin(f) + 1;
-            if (modified == CharT{'E'}) *fe++ = modified;
-            *fe++ = *fmt;
-            facet.put(os, os, os.fill(), &tm, begin(f), fe);
-#else
-            os << fds.tod;
-#endif
-          }
-          command = nullptr;
-          modified = CharT{};
-        } else
-          os << *fmt;
-        break;
-      case 'y':
-        if (command) {
-          if (!fds.ymd.year().ok()) os.setstate(std::ios::failbit);
-          auto y = static_cast<int>(fds.ymd.year());
-#if !ONLY_C_LOCALE
-          if (modified == CharT{}) {
-#endif
-            y = std::abs(y) % 100;
-            if (y < 10) os << CharT{'0'};
-            os << y;
-#if !ONLY_C_LOCALE
-          } else {
-            const CharT f[] = {'%', modified, *fmt};
-            tm.tm_year = y - 1900;
-            facet.put(os, os, os.fill(), &tm, begin(f), end(f));
-          }
-#endif
-          modified = CharT{};
-          command = nullptr;
-        } else
-          os << *fmt;
-        break;
-      case 'Y':
-        if (command) {
-          if (modified == CharT{'O'})
-            os << CharT{'%'} << modified << *fmt;
-          else {
-            if (!fds.ymd.year().ok()) os.setstate(std::ios::failbit);
-            auto y = fds.ymd.year();
-#if !ONLY_C_LOCALE
-            if (modified == CharT{})
-#endif
-            {
-              os << y;
-            }
-#if !ONLY_C_LOCALE
-            else if (modified == CharT{'E'}) {
-              const CharT f[] = {'%', modified, *fmt};
-              tm.tm_year = static_cast<int>(y) - 1900;
-              facet.put(os, os, os.fill(), &tm, begin(f), end(f));
-            }
-#endif
-          }
-          modified = CharT{};
-          command = nullptr;
-        } else
-          os << *fmt;
-        break;
-      case 'z':
-        if (command) {
-          if (offset_sec == nullptr) {
-            // Can not format %z with unknown offset
-            os.setstate(ios::failbit);
-            return os;
-          }
-          auto m = duration_cast<minutes>(*offset_sec);
-          auto neg = m < minutes{0};
-          m = date::abs(m);
-          auto h = duration_cast<hours>(m);
-          m -= h;
-          if (neg)
-            os << CharT{'-'};
-          else
-            os << CharT{'+'};
-          if (h < hours{10}) os << CharT{'0'};
-          os << h.count();
-          if (modified != CharT{}) os << CharT{':'};
-          if (m < minutes{10}) os << CharT{'0'};
-          os << m.count();
-          command = nullptr;
-          modified = CharT{};
-        } else
-          os << *fmt;
-        break;
-      case 'Z':
-        if (command) {
-          if (modified == CharT{}) {
-            if (abbrev == nullptr) {
-              // Can not format %Z with unknown time_zone
-              os.setstate(ios::failbit);
-              return os;
-            }
-            for (auto c : *abbrev) os << CharT(c);
-          } else {
-            os << CharT{'%'} << modified << *fmt;
-            modified = CharT{};
-          }
-          command = nullptr;
-        } else
-          os << *fmt;
-        break;
-      case 'E':
-      case 'O':
-        if (command) {
-          if (modified == CharT{}) {
-            modified = *fmt;
-          } else {
-            os << CharT{'%'} << modified << *fmt;
-            command = nullptr;
-            modified = CharT{};
-          }
-        } else
-          os << *fmt;
-        break;
-      case '%':
-        if (command) {
-          if (modified == CharT{}) {
-            os << CharT{'%'};
-            command = nullptr;
-          } else {
-            os << CharT{'%'} << modified << CharT{'%'};
-            command = nullptr;
-            modified = CharT{};
-          }
-        } else
-          command = fmt;
-        break;
-      default:
-        if (command) {
-          os << CharT{'%'};
-          command = nullptr;
-        }
-        if (modified != CharT{}) {
-          os << modified;
-          modified = CharT{};
-        }
-        os << *fmt;
-        break;
-    }
-  }
-  if (command) os << CharT{'%'};
-  if (modified != CharT{}) os << modified;
-  return os;
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& to_stream(std::basic_ostream<CharT, Traits>& os,
-                                                    const CharT* fmt, const year& y) {
-  using CT = std::chrono::seconds;
-  fields<CT> fds{y / 0 / 0};
-  return to_stream(os, fmt, fds);
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& to_stream(std::basic_ostream<CharT, Traits>& os,
-                                                    const CharT* fmt, const month& m) {
-  using CT = std::chrono::seconds;
-  fields<CT> fds{m / 0 / nanyear};
-  return to_stream(os, fmt, fds);
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& to_stream(std::basic_ostream<CharT, Traits>& os,
-                                                    const CharT* fmt, const day& d) {
-  using CT = std::chrono::seconds;
-  fields<CT> fds{d / 0 / nanyear};
-  return to_stream(os, fmt, fds);
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& to_stream(std::basic_ostream<CharT, Traits>& os,
-                                                    const CharT* fmt, const weekday& wd) {
-  using CT = std::chrono::seconds;
-  fields<CT> fds{wd};
-  return to_stream(os, fmt, fds);
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& to_stream(std::basic_ostream<CharT, Traits>& os,
-                                                    const CharT* fmt,
-                                                    const year_month& ym) {
-  using CT = std::chrono::seconds;
-  fields<CT> fds{ym / 0};
-  return to_stream(os, fmt, fds);
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& to_stream(std::basic_ostream<CharT, Traits>& os,
-                                                    const CharT* fmt,
-                                                    const month_day& md) {
-  using CT = std::chrono::seconds;
-  fields<CT> fds{md / nanyear};
-  return to_stream(os, fmt, fds);
-}
-
-template <class CharT, class Traits>
-inline std::basic_ostream<CharT, Traits>& to_stream(std::basic_ostream<CharT, Traits>& os,
-                                                    const CharT* fmt,
-                                                    const year_month_day& ymd) {
-  using CT = std::chrono::seconds;
-  fields<CT> fds{ymd};
-  return to_stream(os, fmt, fds);
-}
-
-template <class CharT, class Traits, class Rep, class Period>
-inline std::basic_ostream<CharT, Traits>& to_stream(
-    std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
-    const std::chrono::duration<Rep, Period>& d) {
-  using Duration = std::chrono::duration<Rep, Period>;
-  using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
-  fields<CT> fds{time_of_day<CT>{d}};
-  return to_stream(os, fmt, fds);
-}
-
-template <class CharT, class Traits, class Duration>
-std::basic_ostream<CharT, Traits>& to_stream(
-    std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
-    const local_time<Duration>& tp, const std::string* abbrev = nullptr,
-    const std::chrono::seconds* offset_sec = nullptr) {
-  using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
-  auto ld = floor<days>(tp);
-  fields<CT> fds{year_month_day{ld}, time_of_day<CT>{tp - local_seconds{ld}}};
-  return to_stream(os, fmt, fds, abbrev, offset_sec);
-}
-
-template <class CharT, class Traits, class Duration>
-std::basic_ostream<CharT, Traits>& to_stream(std::basic_ostream<CharT, Traits>& os,
-                                             const CharT* fmt,
-                                             const sys_time<Duration>& tp) {
-  using namespace std::chrono;
-  using CT = typename std::common_type<Duration, seconds>::type;
-  const std::string abbrev("UTC");
-  CONSTDATA seconds offset{0};
-  auto sd = floor<days>(tp);
-  fields<CT> fds{year_month_day{sd}, time_of_day<CT>{tp - sys_seconds{sd}}};
-  return to_stream(os, fmt, fds, &abbrev, &offset);
-}
-
-// format
-
-template <class CharT, class Streamable>
-auto format(const std::locale& loc, const CharT* fmt, const Streamable& tp)
-    -> decltype(to_stream(std::declval<std::basic_ostream<CharT>&>(), fmt, tp),
-                std::basic_string<CharT>{}) {
-  std::basic_ostringstream<CharT> os;
-  os.exceptions(std::ios::failbit | std::ios::badbit);
-  os.imbue(loc);
-  to_stream(os, fmt, tp);
-  return os.str();
-}
-
-template <class CharT, class Streamable>
-auto format(const CharT* fmt, const Streamable& tp)
-    -> decltype(to_stream(std::declval<std::basic_ostream<CharT>&>(), fmt, tp),
-                std::basic_string<CharT>{}) {
-  std::basic_ostringstream<CharT> os;
-  os.exceptions(std::ios::failbit | std::ios::badbit);
-  to_stream(os, fmt, tp);
-  return os.str();
-}
-
-template <class CharT, class Traits, class Alloc, class Streamable>
-auto format(const std::locale& loc, const std::basic_string<CharT, Traits, Alloc>& fmt,
-            const Streamable& tp)
-    -> decltype(to_stream(std::declval<std::basic_ostream<CharT, Traits>&>(), fmt.c_str(),
-                          tp),
-                std::basic_string<CharT, Traits, Alloc>{}) {
-  std::basic_ostringstream<CharT, Traits, Alloc> os;
-  os.exceptions(std::ios::failbit | std::ios::badbit);
-  os.imbue(loc);
-  to_stream(os, fmt.c_str(), tp);
-  return os.str();
-}
-
-template <class CharT, class Traits, class Alloc, class Streamable>
-auto format(const std::basic_string<CharT, Traits, Alloc>& fmt, const Streamable& tp)
-    -> decltype(to_stream(std::declval<std::basic_ostream<CharT, Traits>&>(), fmt.c_str(),
-                          tp),
-                std::basic_string<CharT, Traits, Alloc>{}) {
-  std::basic_ostringstream<CharT, Traits, Alloc> os;
-  os.exceptions(std::ios::failbit | std::ios::badbit);
-  to_stream(os, fmt.c_str(), tp);
-  return os.str();
-}
-
-// parse
-
-namespace detail {
-
-template <class CharT, class Traits>
-bool read_char(std::basic_istream<CharT, Traits>& is, CharT fmt, std::ios::iostate& err) {
-  auto ic = is.get();
-  if (Traits::eq_int_type(ic, Traits::eof()) ||
-      !Traits::eq(Traits::to_char_type(ic), fmt)) {
-    err |= std::ios::failbit;
-    is.setstate(std::ios::failbit);
-    return false;
-  }
-  return true;
-}
-
-template <class CharT, class Traits>
-unsigned read_unsigned(std::basic_istream<CharT, Traits>& is, unsigned m = 1,
-                       unsigned M = 10) {
-  unsigned x = 0;
-  unsigned count = 0;
-  while (true) {
-    auto ic = is.peek();
-    if (Traits::eq_int_type(ic, Traits::eof())) break;
-    auto c = static_cast<char>(Traits::to_char_type(ic));
-    if (!('0' <= c && c <= '9')) break;
-    (void)is.get();
-    ++count;
-    x = 10 * x + static_cast<unsigned>(c - '0');
-    if (count == M) break;
-  }
-  if (count < m) is.setstate(std::ios::failbit);
-  return x;
-}
-
-template <class CharT, class Traits>
-int read_signed(std::basic_istream<CharT, Traits>& is, unsigned m = 1, unsigned M = 10) {
-  auto ic = is.peek();
-  if (!Traits::eq_int_type(ic, Traits::eof())) {
-    auto c = static_cast<char>(Traits::to_char_type(ic));
-    if (('0' <= c && c <= '9') || c == '-' || c == '+') {
-      if (c == '-' || c == '+') (void)is.get();
-      auto x = static_cast<int>(read_unsigned(is, std::max(m, 1u), M));
-      if (!is.fail()) {
-        if (c == '-') x = -x;
-        return x;
-      }
-    }
-  }
-  if (m > 0) is.setstate(std::ios::failbit);
-  return 0;
-}
-
-template <class CharT, class Traits>
-long double read_long_double(std::basic_istream<CharT, Traits>& is, unsigned m = 1,
-                             unsigned M = 10) {
-  using namespace std;
-  unsigned count = 0;
-  auto decimal_point =
-      Traits::to_int_type(use_facet<numpunct<CharT>>(is.getloc()).decimal_point());
-  std::string buf;
-  while (true) {
-    auto ic = is.peek();
-    if (Traits::eq_int_type(ic, Traits::eof())) break;
-    if (Traits::eq_int_type(ic, decimal_point)) {
-      buf += '.';
-      decimal_point = Traits::eof();
-      is.get();
-    } else {
-      auto c = static_cast<char>(Traits::to_char_type(ic));
-      if (!('0' <= c && c <= '9')) break;
-      buf += c;
-      (void)is.get();
-    }
-    if (++count == M) break;
-  }
-  if (count < m) {
-    is.setstate(std::ios::failbit);
-    return 0;
-  }
-  return std::stold(buf);
-}
-
-struct rs {
-  int& i;
-  unsigned m;
-  unsigned M;
-};
-
-struct ru {
-  int& i;
-  unsigned m;
-  unsigned M;
-};
-
-struct rld {
-  long double& i;
-  unsigned m;
-  unsigned M;
-};
-
-template <class CharT, class Traits>
-void read(std::basic_istream<CharT, Traits>&) {}
-
-template <class CharT, class Traits, class... Args>
-void read(std::basic_istream<CharT, Traits>& is, CharT a0, Args&&... args);
-
-template <class CharT, class Traits, class... Args>
-void read(std::basic_istream<CharT, Traits>& is, rs a0, Args&&... args);
-
-template <class CharT, class Traits, class... Args>
-void read(std::basic_istream<CharT, Traits>& is, ru a0, Args&&... args);
-
-template <class CharT, class Traits, class... Args>
-void read(std::basic_istream<CharT, Traits>& is, int a0, Args&&... args);
-
-template <class CharT, class Traits, class... Args>
-void read(std::basic_istream<CharT, Traits>& is, rld a0, Args&&... args);
-
-template <class CharT, class Traits, class... Args>
-void read(std::basic_istream<CharT, Traits>& is, CharT a0, Args&&... args) {
-  // No-op if a0 == CharT{}
-  if (a0 != CharT{}) {
-    auto ic = is.peek();
-    if (Traits::eq_int_type(ic, Traits::eof())) {
-      is.setstate(std::ios::failbit | std::ios::eofbit);
-      return;
-    }
-    if (!Traits::eq(Traits::to_char_type(ic), a0)) {
-      is.setstate(std::ios::failbit);
-      return;
-    }
-    (void)is.get();
-  }
-  read(is, std::forward<Args>(args)...);
-}
-
-template <class CharT, class Traits, class... Args>
-void read(std::basic_istream<CharT, Traits>& is, rs a0, Args&&... args) {
-  auto x = read_signed(is, a0.m, a0.M);
-  if (is.fail()) return;
-  a0.i = x;
-  read(is, std::forward<Args>(args)...);
-}
-
-template <class CharT, class Traits, class... Args>
-void read(std::basic_istream<CharT, Traits>& is, ru a0, Args&&... args) {
-  auto x = read_unsigned(is, a0.m, a0.M);
-  if (is.fail()) return;
-  a0.i = static_cast<int>(x);
-  read(is, std::forward<Args>(args)...);
-}
-
-template <class CharT, class Traits, class... Args>
-void read(std::basic_istream<CharT, Traits>& is, int a0, Args&&... args) {
-  if (a0 != -1) {
-    auto u = static_cast<unsigned>(a0);
-    CharT buf[std::numeric_limits<unsigned>::digits10 + 2] = {};
-    auto e = buf;
-    do {
-      *e++ = CharT(u % 10) + CharT{'0'};
-      u /= 10;
-    } while (u > 0);
-    std::reverse(buf, e);
-    for (auto p = buf; p != e && is.rdstate() == std::ios::goodbit; ++p) read(is, *p);
-  }
-  if (is.rdstate() == std::ios::goodbit) read(is, std::forward<Args>(args)...);
-}
-
-template <class CharT, class Traits, class... Args>
-void read(std::basic_istream<CharT, Traits>& is, rld a0, Args&&... args) {
-  auto x = read_long_double(is, a0.m, a0.M);
-  if (is.fail()) return;
-  a0.i = x;
-  read(is, std::forward<Args>(args)...);
-}
-
-template <class T, class CharT, class Traits>
-inline void checked_set(T& value, T from, T not_a_value,
-                        std::basic_ios<CharT, Traits>& is) {
-  if (!is.fail()) {
-    if (value == not_a_value)
-      value = std::move(from);
-    else if (value != from)
-      is.setstate(std::ios::failbit);
-  }
-}
-
-}  // namespace detail
-
-template <class CharT, class Traits, class Duration, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>& from_stream(
-    std::basic_istream<CharT, Traits>& is, const CharT* fmt, fields<Duration>& fds,
-    std::basic_string<CharT, Traits, Alloc>* abbrev, std::chrono::minutes* offset) {
-  using namespace std;
-  using namespace std::chrono;
-  typename basic_istream<CharT, Traits>::sentry ok{is, true};
-  if (ok) {
-    date::detail::save_istream<CharT, Traits> ss(is);
-    is.fill(' ');
-    is.flags(std::ios::skipws | std::ios::dec);
-    is.width(0);
-#if !ONLY_C_LOCALE
-    auto& f = use_facet<time_get<CharT>>(is.getloc());
-    std::tm tm{};
-#endif
-    const CharT* command = nullptr;
-    auto modified = CharT{};
-    auto width = -1;
-
-    CONSTDATA int not_a_year = numeric_limits<int>::min();
-    CONSTDATA int not_a_2digit_year = 100;
-    CONSTDATA int not_a_century = not_a_year / 100;
-    CONSTDATA int not_a_month = 0;
-    CONSTDATA int not_a_day = 0;
-    CONSTDATA int not_a_hour = numeric_limits<int>::min();
-    CONSTDATA int not_a_hour_12_value = 0;
-    CONSTDATA int not_a_minute = not_a_hour;
-    CONSTDATA Duration not_a_second = Duration::min();
-    CONSTDATA int not_a_doy = 0;
-    CONSTDATA int not_a_weekday = 8;
-    CONSTDATA int not_a_week_num = 100;
-    CONSTDATA int not_a_ampm = -1;
-    CONSTDATA minutes not_a_offset = minutes::min();
-
-    int Y = not_a_year;                                   // c, F, Y                   *
-    int y = not_a_2digit_year;                            // D, x, y                   *
-    int g = not_a_2digit_year;                            // g                         *
-    int G = not_a_year;                                   // G                         *
-    int C = not_a_century;                                // C                         *
-    int m = not_a_month;                                  // b, B, h, m, c, D, F, x    *
-    int d = not_a_day;                                    // c, d, D, e, F, x          *
-    int j = not_a_doy;                                    // j                         *
-    int wd = not_a_weekday;                               // a, A, u, w                *
-    int H = not_a_hour;                                   // c, H, R, T, X             *
-    int I = not_a_hour_12_value;                          // I, r                      *
-    int p = not_a_ampm;                                   // p, r                      *
-    int M = not_a_minute;                                 // c, M, r, R, T, X          *
-    Duration s = not_a_second;                            // c, r, S, T, X             *
-    int U = not_a_week_num;                               // U                         *
-    int V = not_a_week_num;                               // V                         *
-    int W = not_a_week_num;                               // W                         *
-    std::basic_string<CharT, Traits, Alloc> temp_abbrev;  // Z   *
-    minutes temp_offset = not_a_offset;                   // z                    *
-
-    using detail::checked_set;
-    using detail::read;
-    using detail::rld;
-    using detail::rs;
-    using detail::ru;
-    for (; *fmt && is.rdstate() == std::ios::goodbit; ++fmt) {
-      switch (*fmt) {
-        case 'a':
-        case 'A':
-        case 'u':
-        case 'w':  // wd:  a, A, u, w
-          if (command) {
-            int trial_wd = not_a_weekday;
-            if (*fmt == 'a' || *fmt == 'A') {
-              if (modified == CharT{}) {
-#if !ONLY_C_LOCALE
-                ios_base::iostate err = ios_base::goodbit;
-                f.get(is, nullptr, is, err, &tm, command, fmt + 1);
-                is.setstate(err);
-                if (!is.fail()) trial_wd = tm.tm_wday;
-#else
-                auto nm = detail::weekday_names();
-                auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
-                if (!is.fail()) trial_wd = i % 7;
-#endif
-              } else
-                read(is, CharT{'%'}, width, modified, *fmt);
-            } else  // *fmt == 'u' || *fmt == 'w'
-            {
-#if !ONLY_C_LOCALE
-              if (modified == CharT{})
-#else
-              if (modified != CharT{'E'})
-#endif
-              {
-                read(is,
-                     ru{trial_wd, 1, width == -1 ? 1u : static_cast<unsigned>(width)});
-                if (!is.fail()) {
-                  if (*fmt == 'u') {
-                    if (!(1 <= trial_wd && trial_wd <= 7)) {
-                      trial_wd = not_a_weekday;
-                      is.setstate(ios_base::failbit);
-                    } else if (trial_wd == 7)
-                      trial_wd = 0;
-                  } else  // *fmt == 'w'
-                  {
-                    if (!(0 <= trial_wd && trial_wd <= 6)) {
-                      trial_wd = not_a_weekday;
-                      is.setstate(ios_base::failbit);
-                    }
-                  }
-                }
-              }
-#if !ONLY_C_LOCALE
-              else if (modified == CharT{'O'}) {
-                ios_base::iostate err = ios_base::goodbit;
-                f.get(is, nullptr, is, err, &tm, command, fmt + 1);
-                is.setstate(err);
-                if (!is.fail()) trial_wd = tm.tm_wday;
-              }
-#endif
-              else
-                read(is, CharT{'%'}, width, modified, *fmt);
-            }
-            if (trial_wd != not_a_weekday) checked_set(wd, trial_wd, not_a_weekday, is);
-          } else  // !command
-            read(is, *fmt);
-          command = nullptr;
-          width = -1;
-          modified = CharT{};
-          break;
-        case 'b':
-        case 'B':
-        case 'h':
-          if (command) {
-            if (modified == CharT{}) {
-              int ttm = not_a_month;
-#if !ONLY_C_LOCALE
-              ios_base::iostate err = ios_base::goodbit;
-              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
-              if ((err & ios::failbit) == 0) ttm = tm.tm_mon + 1;
-              is.setstate(err);
-#else
-              auto nm = detail::month_names();
-              auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
-              if (!is.fail()) ttm = i % 12 + 1;
-#endif
-              checked_set(m, ttm, not_a_month, is);
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'c':
-          if (command) {
-            if (modified != CharT{'O'}) {
-#if !ONLY_C_LOCALE
-              ios_base::iostate err = ios_base::goodbit;
-              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
-              if ((err & ios::failbit) == 0) {
-                checked_set(Y, tm.tm_year + 1900, not_a_year, is);
-                checked_set(m, tm.tm_mon + 1, not_a_month, is);
-                checked_set(d, tm.tm_mday, not_a_day, is);
-                checked_set(H, tm.tm_hour, not_a_hour, is);
-                checked_set(M, tm.tm_min, not_a_minute, is);
-                checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}), not_a_second,
-                            is);
-              }
-              is.setstate(err);
-#else
-              // "%a %b %e %T %Y"
-              auto nm = detail::weekday_names();
-              auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
-              checked_set(wd, static_cast<int>(i % 7), not_a_weekday, is);
-              ws(is);
-              nm = detail::month_names();
-              i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
-              checked_set(m, static_cast<int>(i % 12 + 1), not_a_month, is);
-              ws(is);
-              int td = not_a_day;
-              read(is, rs{td, 1, 2});
-              checked_set(d, td, not_a_day, is);
-              ws(is);
-              using dfs = detail::decimal_format_seconds<Duration>;
-              CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
-              int tH;
-              int tM;
-              long double S;
-              read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2}, CharT{':'}, rld{S, 1, w});
-              checked_set(H, tH, not_a_hour, is);
-              checked_set(M, tM, not_a_minute, is);
-              checked_set(s, round<Duration>(duration<long double>{S}), not_a_second, is);
-              ws(is);
-              int tY = not_a_year;
-              read(is, rs{tY, 1, 4u});
-              checked_set(Y, tY, not_a_year, is);
-#endif
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'x':
-          if (command) {
-            if (modified != CharT{'O'}) {
-#if !ONLY_C_LOCALE
-              ios_base::iostate err = ios_base::goodbit;
-              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
-              if ((err & ios::failbit) == 0) {
-                checked_set(Y, tm.tm_year + 1900, not_a_year, is);
-                checked_set(m, tm.tm_mon + 1, not_a_month, is);
-                checked_set(d, tm.tm_mday, not_a_day, is);
-              }
-              is.setstate(err);
-#else
-              // "%m/%d/%y"
-              int ty = not_a_2digit_year;
-              int tm = not_a_month;
-              int td = not_a_day;
-              read(is, ru{tm, 1, 2}, CharT{'/'}, ru{td, 1, 2}, CharT{'/'}, rs{ty, 1, 2});
-              checked_set(y, ty, not_a_2digit_year, is);
-              checked_set(m, tm, not_a_month, is);
-              checked_set(d, td, not_a_day, is);
-#endif
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'X':
-          if (command) {
-            if (modified != CharT{'O'}) {
-#if !ONLY_C_LOCALE
-              ios_base::iostate err = ios_base::goodbit;
-              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
-              if ((err & ios::failbit) == 0) {
-                checked_set(H, tm.tm_hour, not_a_hour, is);
-                checked_set(M, tm.tm_min, not_a_minute, is);
-                checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}), not_a_second,
-                            is);
-              }
-              is.setstate(err);
-#else
-              // "%T"
-              using dfs = detail::decimal_format_seconds<Duration>;
-              CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
-              int tH = not_a_hour;
-              int tM = not_a_minute;
-              long double S;
-              read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2}, CharT{':'}, rld{S, 1, w});
-              checked_set(H, tH, not_a_hour, is);
-              checked_set(M, tM, not_a_minute, is);
-              checked_set(s, round<Duration>(duration<long double>{S}), not_a_second, is);
-#endif
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'C':
-          if (command) {
-            int tC = not_a_century;
-#if !ONLY_C_LOCALE
-            if (modified == CharT{}) {
-#endif
-              read(is, rs{tC, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-#if !ONLY_C_LOCALE
-            } else {
-              ios_base::iostate err = ios_base::goodbit;
-              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
-              if ((err & ios::failbit) == 0) {
-                auto tY = tm.tm_year + 1900;
-                tC = (tY >= 0 ? tY : tY - 99) / 100;
-              }
-              is.setstate(err);
-            }
-#endif
-            checked_set(C, tC, not_a_century, is);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'D':
-          if (command) {
-            if (modified == CharT{}) {
-              int tn = not_a_month;
-              int td = not_a_day;
-              int ty = not_a_2digit_year;
-              read(is, ru{tn, 1, 2}, CharT{'\0'}, CharT{'/'}, CharT{'\0'}, ru{td, 1, 2},
-                   CharT{'\0'}, CharT{'/'}, CharT{'\0'}, rs{ty, 1, 2});
-              checked_set(y, ty, not_a_2digit_year, is);
-              checked_set(m, tn, not_a_month, is);
-              checked_set(d, td, not_a_day, is);
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'F':
-          if (command) {
-            if (modified == CharT{}) {
-              int tY = not_a_year;
-              int tn = not_a_month;
-              int td = not_a_day;
-              read(is, rs{tY, 1, width == -1 ? 4u : static_cast<unsigned>(width)},
-                   CharT{'-'}, ru{tn, 1, 2}, CharT{'-'}, ru{td, 1, 2});
-              checked_set(Y, tY, not_a_year, is);
-              checked_set(m, tn, not_a_month, is);
-              checked_set(d, td, not_a_day, is);
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'd':
-        case 'e':
-          if (command) {
-#if !ONLY_C_LOCALE
-            if (modified == CharT{})
-#else
-            if (modified != CharT{'E'})
-#endif
-            {
-              int td = not_a_day;
-              read(is, rs{td, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-              checked_set(d, td, not_a_day, is);
-            }
-#if !ONLY_C_LOCALE
-            else if (modified == CharT{'O'}) {
-              ios_base::iostate err = ios_base::goodbit;
-              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
-              command = nullptr;
-              width = -1;
-              modified = CharT{};
-              if ((err & ios::failbit) == 0) checked_set(d, tm.tm_mday, not_a_day, is);
-              is.setstate(err);
-            }
-#endif
-            else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'H':
-          if (command) {
-#if !ONLY_C_LOCALE
-            if (modified == CharT{})
-#else
-            if (modified != CharT{'E'})
-#endif
-            {
-              int tH = not_a_hour;
-              read(is, ru{tH, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-              checked_set(H, tH, not_a_hour, is);
-            }
-#if !ONLY_C_LOCALE
-            else if (modified == CharT{'O'}) {
-              ios_base::iostate err = ios_base::goodbit;
-              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
-              if ((err & ios::failbit) == 0) checked_set(H, tm.tm_hour, not_a_hour, is);
-              is.setstate(err);
-            }
-#endif
-            else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'I':
-          if (command) {
-            if (modified == CharT{}) {
-              int tI = not_a_hour_12_value;
-              // reads in an hour into I, but most be in [1, 12]
-              read(is, rs{tI, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-              if (!(1 <= tI && tI <= 12)) is.setstate(ios::failbit);
-              checked_set(I, tI, not_a_hour_12_value, is);
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'j':
-          if (command) {
-            if (modified == CharT{}) {
-              int tj = not_a_doy;
-              read(is, ru{tj, 1, width == -1 ? 3u : static_cast<unsigned>(width)});
-              checked_set(j, tj, not_a_doy, is);
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'M':
-          if (command) {
-#if !ONLY_C_LOCALE
-            if (modified == CharT{})
-#else
-            if (modified != CharT{'E'})
-#endif
-            {
-              int tM = not_a_minute;
-              read(is, ru{tM, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-              checked_set(M, tM, not_a_minute, is);
-            }
-#if !ONLY_C_LOCALE
-            else if (modified == CharT{'O'}) {
-              ios_base::iostate err = ios_base::goodbit;
-              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
-              if ((err & ios::failbit) == 0) checked_set(M, tm.tm_min, not_a_minute, is);
-              is.setstate(err);
-            }
-#endif
-            else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'm':
-          if (command) {
-#if !ONLY_C_LOCALE
-            if (modified == CharT{})
-#else
-            if (modified != CharT{'E'})
-#endif
-            {
-              int tn = not_a_month;
-              read(is, rs{tn, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-              checked_set(m, tn, not_a_month, is);
-            }
-#if !ONLY_C_LOCALE
-            else if (modified == CharT{'O'}) {
-              ios_base::iostate err = ios_base::goodbit;
-              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
-              if ((err & ios::failbit) == 0)
-                checked_set(m, tm.tm_mon + 1, not_a_month, is);
-              is.setstate(err);
-            }
-#endif
-            else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'n':
-        case 't':
-          if (command) {
-            if (modified == CharT{}) {
-              // %n matches a single white space character
-              // %t matches 0 or 1 white space characters
-              auto ic = is.peek();
-              if (Traits::eq_int_type(ic, Traits::eof())) {
-                ios_base::iostate err = ios_base::eofbit;
-                if (*fmt == 'n') err |= ios_base::failbit;
-                is.setstate(err);
-                break;
-              }
-              if (isspace(ic)) {
-                (void)is.get();
-              } else if (*fmt == 'n')
-                is.setstate(ios_base::failbit);
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'p':
-          if (command) {
-            if (modified == CharT{}) {
-              int tp = not_a_ampm;
-#if !ONLY_C_LOCALE
-              tm = std::tm{};
-              tm.tm_hour = 1;
-              ios_base::iostate err = ios_base::goodbit;
-              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
-              is.setstate(err);
-              if (tm.tm_hour == 1)
-                tp = 0;
-              else if (tm.tm_hour == 13)
-                tp = 1;
-              else
-                is.setstate(err);
-#else
-              auto nm = detail::ampm_names();
-              auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
-              tp = i;
-#endif
-              checked_set(p, tp, not_a_ampm, is);
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-
-          break;
-        case 'r':
-          if (command) {
-            if (modified == CharT{}) {
-#if !ONLY_C_LOCALE
-              ios_base::iostate err = ios_base::goodbit;
-              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
-              if ((err & ios::failbit) == 0) {
-                checked_set(H, tm.tm_hour, not_a_hour, is);
-                checked_set(M, tm.tm_min, not_a_hour, is);
-                checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}), not_a_second,
-                            is);
-              }
-              is.setstate(err);
-#else
-              // "%I:%M:%S %p"
-              using dfs = detail::decimal_format_seconds<Duration>;
-              CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
-              long double S;
-              int tI = not_a_hour_12_value;
-              int tM = not_a_minute;
-              read(is, ru{tI, 1, 2}, CharT{':'}, ru{tM, 1, 2}, CharT{':'}, rld{S, 1, w});
-              checked_set(I, tI, not_a_hour_12_value, is);
-              checked_set(M, tM, not_a_minute, is);
-              checked_set(s, round<Duration>(duration<long double>{S}), not_a_second, is);
-              ws(is);
-              auto nm = detail::ampm_names();
-              auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
-              checked_set(p, static_cast<int>(i), not_a_ampm, is);
-#endif
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'R':
-          if (command) {
-            if (modified == CharT{}) {
-              int tH = not_a_hour;
-              int tM = not_a_minute;
-              read(is, ru{tH, 1, 2}, CharT{'\0'}, CharT{':'}, CharT{'\0'}, ru{tM, 1, 2},
-                   CharT{'\0'});
-              checked_set(H, tH, not_a_hour, is);
-              checked_set(M, tM, not_a_minute, is);
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'S':
-          if (command) {
-#if !ONLY_C_LOCALE
-            if (modified == CharT{})
-#else
-            if (modified != CharT{'E'})
-#endif
-            {
-              using dfs = detail::decimal_format_seconds<Duration>;
-              CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
-              long double S;
-              read(is, rld{S, 1, width == -1 ? w : static_cast<unsigned>(width)});
-              checked_set(s, round<Duration>(duration<long double>{S}), not_a_second, is);
-            }
-#if !ONLY_C_LOCALE
-            else if (modified == CharT{'O'}) {
-              ios_base::iostate err = ios_base::goodbit;
-              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
-              if ((err & ios::failbit) == 0)
-                checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}), not_a_second,
-                            is);
-              is.setstate(err);
-            }
-#endif
-            else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'T':
-          if (command) {
-            if (modified == CharT{}) {
-              using dfs = detail::decimal_format_seconds<Duration>;
-              CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
-              int tH = not_a_hour;
-              int tM = not_a_minute;
-              long double S;
-              read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2}, CharT{':'}, rld{S, 1, w});
-              checked_set(H, tH, not_a_hour, is);
-              checked_set(M, tM, not_a_minute, is);
-              checked_set(s, round<Duration>(duration<long double>{S}), not_a_second, is);
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'Y':
-          if (command) {
-#if !ONLY_C_LOCALE
-            if (modified == CharT{})
-#else
-            if (modified != CharT{'O'})
-#endif
-            {
-              int tY = not_a_year;
-              read(is, rs{tY, 1, width == -1 ? 4u : static_cast<unsigned>(width)});
-              checked_set(Y, tY, not_a_year, is);
-            }
-#if !ONLY_C_LOCALE
-            else if (modified == CharT{'E'}) {
-              ios_base::iostate err = ios_base::goodbit;
-              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
-              if ((err & ios::failbit) == 0)
-                checked_set(Y, tm.tm_year + 1900, not_a_year, is);
-              is.setstate(err);
-            }
-#endif
-            else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'y':
-          if (command) {
-#if !ONLY_C_LOCALE
-            if (modified == CharT{})
-#endif
-            {
-              int ty = not_a_2digit_year;
-              read(is, ru{ty, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-              checked_set(y, ty, not_a_2digit_year, is);
-            }
-#if !ONLY_C_LOCALE
-            else {
-              ios_base::iostate err = ios_base::goodbit;
-              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
-              if ((err & ios::failbit) == 0)
-                checked_set(Y, tm.tm_year + 1900, not_a_year, is);
-              is.setstate(err);
-            }
-#endif
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'g':
-          if (command) {
-            if (modified == CharT{}) {
-              int tg = not_a_2digit_year;
-              read(is, ru{tg, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-              checked_set(g, tg, not_a_2digit_year, is);
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'G':
-          if (command) {
-            if (modified == CharT{}) {
-              int tG = not_a_year;
-              read(is, rs{tG, 1, width == -1 ? 4u : static_cast<unsigned>(width)});
-              checked_set(G, tG, not_a_year, is);
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'U':
-          if (command) {
-            if (modified == CharT{}) {
-              int tU = not_a_week_num;
-              read(is, ru{tU, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-              checked_set(U, tU, not_a_week_num, is);
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'V':
-          if (command) {
-            if (modified == CharT{}) {
-              int tV = not_a_week_num;
-              read(is, ru{tV, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-              checked_set(V, tV, not_a_week_num, is);
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'W':
-          if (command) {
-            if (modified == CharT{}) {
-              int tW = not_a_week_num;
-              read(is, ru{tW, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-              checked_set(W, tW, not_a_week_num, is);
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'E':
-        case 'O':
-          if (command) {
-            if (modified == CharT{}) {
-              modified = *fmt;
-            } else {
-              read(is, CharT{'%'}, width, modified, *fmt);
-              command = nullptr;
-              width = -1;
-              modified = CharT{};
-            }
-          } else
-            read(is, *fmt);
-          break;
-        case '%':
-          if (command) {
-            if (modified == CharT{})
-              read(is, *fmt);
-            else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            command = fmt;
-          break;
-        case 'z':
-          if (command) {
-            int tH, tM;
-            minutes toff = not_a_offset;
-            bool neg = false;
-            auto ic = is.peek();
-            if (!Traits::eq_int_type(ic, Traits::eof())) {
-              auto c = static_cast<char>(Traits::to_char_type(ic));
-              if (c == '-') neg = true;
-            }
-            if (modified == CharT{}) {
-              read(is, rs{tH, 2, 2});
-              if (!is.fail()) toff = hours{std::abs(tH)};
-              if (is.good()) {
-                ic = is.peek();
-                if (!Traits::eq_int_type(ic, Traits::eof())) {
-                  auto c = static_cast<char>(Traits::to_char_type(ic));
-                  if ('0' <= c && c <= '9') {
-                    read(is, ru{tM, 2, 2});
-                    if (!is.fail()) toff += minutes{tM};
-                  }
-                }
-              }
-            } else {
-              read(is, rs{tH, 1, 2});
-              if (!is.fail()) toff = hours{std::abs(tH)};
-              if (is.good()) {
-                ic = is.peek();
-                if (!Traits::eq_int_type(ic, Traits::eof())) {
-                  auto c = static_cast<char>(Traits::to_char_type(ic));
-                  if (c == ':') {
-                    (void)is.get();
-                    read(is, ru{tM, 2, 2});
-                    if (!is.fail()) toff += minutes{tM};
-                  }
-                }
-              }
-            }
-            if (neg) toff = -toff;
-            checked_set(temp_offset, toff, not_a_offset, is);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        case 'Z':
-          if (command) {
-            if (modified == CharT{}) {
-              std::basic_string<CharT, Traits, Alloc> buf;
-              while (is.rdstate() == std::ios::goodbit) {
-                auto i = is.rdbuf()->sgetc();
-                if (Traits::eq_int_type(i, Traits::eof())) {
-                  is.setstate(ios::eofbit);
-                  break;
-                }
-                auto wc = Traits::to_char_type(i);
-                auto c = static_cast<char>(wc);
-                // is c a valid time zone name or abbreviation character?
-                if (!(CharT{1} < wc && wc < CharT{127}) ||
-                    !(isalnum(c) || c == '_' || c == '/' || c == '-' || c == '+'))
-                  break;
-                buf.push_back(c);
-                is.rdbuf()->sbumpc();
-              }
-              if (buf.empty()) is.setstate(ios::failbit);
-              checked_set(temp_abbrev, buf, {}, is);
-            } else
-              read(is, CharT{'%'}, width, modified, *fmt);
-            command = nullptr;
-            width = -1;
-            modified = CharT{};
-          } else
-            read(is, *fmt);
-          break;
-        default:
-          if (command) {
-            if (width == -1 && modified == CharT{} && '0' <= *fmt && *fmt <= '9') {
-              width = static_cast<char>(*fmt) - '0';
-              while ('0' <= fmt[1] && fmt[1] <= '9')
-                width = 10 * width + static_cast<char>(*++fmt) - '0';
-            } else {
-              if (modified == CharT{})
-                read(is, CharT{'%'}, width, *fmt);
-              else
-                read(is, CharT{'%'}, width, modified, *fmt);
-              command = nullptr;
-              width = -1;
-              modified = CharT{};
-            }
-          } else  // !command
-          {
-            if (isspace(static_cast<unsigned char>(*fmt)))
-              ws(is);  // space matches 0 or more white space characters
-            else
-              read(is, *fmt);
-          }
-          break;
-      }
-    }
-    // is.rdstate() != ios::goodbit || *fmt == CharT{}
-    if (is.rdstate() == ios::goodbit && command) {
-      if (modified == CharT{})
-        read(is, CharT{'%'}, width);
-      else
-        read(is, CharT{'%'}, width, modified);
-    }
-    if (is.rdstate() != ios::goodbit && *fmt != CharT{} && !is.fail())
-      is.setstate(ios::failbit);
-    if (!is.fail()) {
-      if (y != not_a_2digit_year) {
-        // Convert y and an optional C to Y
-        if (!(0 <= y && y <= 99)) goto broken;
-        if (C == not_a_century) {
-          if (Y == not_a_year) {
-            if (y >= 69)
-              C = 19;
-            else
-              C = 20;
-          } else {
-            C = (Y >= 0 ? Y : Y - 100) / 100;
-          }
-        }
-        int tY;
-        if (C >= 0)
-          tY = 100 * C + y;
-        else
-          tY = 100 * (C + 1) - (y == 0 ? 100 : y);
-        if (Y != not_a_year && Y != tY) goto broken;
-        Y = tY;
-      }
-      if (g != not_a_2digit_year) {
-        // Convert g and an optional C to G
-        if (!(0 <= g && g <= 99)) goto broken;
-        if (C == not_a_century) {
-          if (G == not_a_year) {
-            if (g >= 69)
-              C = 19;
-            else
-              C = 20;
-          } else {
-            C = (G >= 0 ? G : G - 100) / 100;
-          }
-        }
-        int tG;
-        if (C >= 0)
-          tG = 100 * C + g;
-        else
-          tG = 100 * (C + 1) - (g == 0 ? 100 : g);
-        if (G != not_a_year && G != tG) goto broken;
-        G = tG;
-      }
-      if (Y < static_cast<int>(year::min()) || Y > static_cast<int>(year::max()))
-        Y = not_a_year;
-      bool computed = false;
-      if (G != not_a_year && V != not_a_week_num && wd != not_a_weekday) {
-        year_month_day ymd_trial = sys_days(year{G - 1} / December / Thursday[last]) +
-                                   (Monday - Thursday) + weeks{V - 1} +
-                                   (weekday{static_cast<unsigned>(wd)} - Monday);
-        if (Y == not_a_year)
-          Y = static_cast<int>(ymd_trial.year());
-        else if (year{Y} != ymd_trial.year())
-          goto broken;
-        if (m == not_a_month)
-          m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
-        else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
-          goto broken;
-        if (d == not_a_day)
-          d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
-        else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
-          goto broken;
-        computed = true;
-      }
-      if (Y != not_a_year && U != not_a_week_num && wd != not_a_weekday) {
-        year_month_day ymd_trial = sys_days(year{Y} / January / Sunday[1]) +
-                                   weeks{U - 1} +
-                                   (weekday{static_cast<unsigned>(wd)} - Sunday);
-        if (Y == not_a_year)
-          Y = static_cast<int>(ymd_trial.year());
-        else if (year{Y} != ymd_trial.year())
-          goto broken;
-        if (m == not_a_month)
-          m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
-        else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
-          goto broken;
-        if (d == not_a_day)
-          d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
-        else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
-          goto broken;
-        computed = true;
-      }
-      if (Y != not_a_year && W != not_a_week_num && wd != not_a_weekday) {
-        year_month_day ymd_trial = sys_days(year{Y} / January / Monday[1]) +
-                                   weeks{W - 1} +
-                                   (weekday{static_cast<unsigned>(wd)} - Monday);
-        if (Y == not_a_year)
-          Y = static_cast<int>(ymd_trial.year());
-        else if (year{Y} != ymd_trial.year())
-          goto broken;
-        if (m == not_a_month)
-          m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
-        else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
-          goto broken;
-        if (d == not_a_day)
-          d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
-        else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
-          goto broken;
-        computed = true;
-      }
-      if (j != 0 && Y != not_a_year) {
-        auto ymd_trial = year_month_day{local_days(year{Y} / 1 / 1) + days{j - 1}};
-        if (m == 0)
-          m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
-        else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
-          goto broken;
-        if (d == 0)
-          d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
-        else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
-          goto broken;
-      }
-      auto ymd = year{Y} / m / d;
-      if (ymd.ok()) {
-        if (wd == not_a_weekday)
-          wd = static_cast<int>((weekday(sys_days(ymd)) - Sunday).count());
-        else if (wd != static_cast<int>((weekday(sys_days(ymd)) - Sunday).count()))
-          goto broken;
-        if (!computed) {
-          if (G != not_a_year || V != not_a_week_num) {
-            sys_days sd = ymd;
-            auto G_trial = year_month_day{sd + days{3}}.year();
-            auto start = sys_days((G_trial - years{1}) / December / Thursday[last]) +
-                         (Monday - Thursday);
-            if (sd < start) {
-              --G_trial;
-              if (V != not_a_week_num)
-                start = sys_days((G_trial - years{1}) / December / Thursday[last]) +
-                        (Monday - Thursday);
-            }
-            if (G != not_a_year && G != static_cast<int>(G_trial)) goto broken;
-            if (V != not_a_week_num) {
-              auto V_trial = duration_cast<weeks>(sd - start).count() + 1;
-              if (V != V_trial) goto broken;
-            }
-          }
-          if (U != not_a_week_num) {
-            auto start = sys_days(Sunday[1] / January / ymd.year());
-            auto U_trial = floor<weeks>(sys_days(ymd) - start).count() + 1;
-            if (U != U_trial) goto broken;
-          }
-          if (W != not_a_week_num) {
-            auto start = sys_days(Monday[1] / January / ymd.year());
-            auto W_trial = floor<weeks>(sys_days(ymd) - start).count() + 1;
-            if (W != W_trial) goto broken;
-          }
-        }
-      }
-      fds.ymd = ymd;
-      if (I != not_a_hour_12_value) {
-        if (!(1 <= I && I <= 12)) goto broken;
-        if (p != not_a_ampm) {
-          // p is in [0, 1] == [AM, PM]
-          // Store trial H in I
-          if (I == 12) --p;
-          I += p * 12;
-          // Either set H from I or make sure H and I are consistent
-          if (H == not_a_hour)
-            H = I;
-          else if (I != H)
-            goto broken;
-        } else  // p == not_a_ampm
-        {
-          // if H, make sure H and I could be consistent
-          if (H != not_a_hour) {
-            if (I == 12) {
-              if (H != 0 && H != 12) goto broken;
-            } else if (!(I == H || I == H + 12)) {
-              goto broken;
-            }
-          }
-        }
-      }
-      if (H != not_a_hour) {
-        fds.has_tod = true;
-        fds.tod = time_of_day<Duration>{hours{H}};
-      }
-      if (M != not_a_minute) {
-        fds.has_tod = true;
-        fds.tod.m_ = minutes{M};
-      }
-      if (s != not_a_second) {
-        fds.has_tod = true;
-        fds.tod.s_ = detail::decimal_format_seconds<Duration>{s};
-      }
-      if (wd != not_a_weekday) fds.wd = weekday{static_cast<unsigned>(wd)};
-      if (abbrev != nullptr) *abbrev = std::move(temp_abbrev);
-      if (offset != nullptr && temp_offset != not_a_offset) *offset = temp_offset;
-    }
-    return is;
-  }
-broken:
-  is.setstate(ios_base::failbit);
-  return is;
-}
-
-template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>& from_stream(
-    std::basic_istream<CharT, Traits>& is, const CharT* fmt, year& y,
-    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-    std::chrono::minutes* offset = nullptr) {
-  using namespace std;
-  using namespace std::chrono;
-  using CT = seconds;
-  fields<CT> fds{};
-  from_stream(is, fmt, fds, abbrev, offset);
-  if (!fds.ymd.year().ok()) is.setstate(ios::failbit);
-  if (!is.fail()) y = fds.ymd.year();
-  return is;
-}
-
-template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>& from_stream(
-    std::basic_istream<CharT, Traits>& is, const CharT* fmt, month& m,
-    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-    std::chrono::minutes* offset = nullptr) {
-  using namespace std;
-  using namespace std::chrono;
-  using CT = seconds;
-  fields<CT> fds{};
-  from_stream(is, fmt, fds, abbrev, offset);
-  if (!fds.ymd.month().ok()) is.setstate(ios::failbit);
-  if (!is.fail()) m = fds.ymd.month();
-  return is;
-}
-
-template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>& from_stream(
-    std::basic_istream<CharT, Traits>& is, const CharT* fmt, day& d,
-    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-    std::chrono::minutes* offset = nullptr) {
-  using namespace std;
-  using namespace std::chrono;
-  using CT = seconds;
-  fields<CT> fds{};
-  from_stream(is, fmt, fds, abbrev, offset);
-  if (!fds.ymd.day().ok()) is.setstate(ios::failbit);
-  if (!is.fail()) d = fds.ymd.day();
-  return is;
-}
-
-template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>& from_stream(
-    std::basic_istream<CharT, Traits>& is, const CharT* fmt, weekday& wd,
-    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-    std::chrono::minutes* offset = nullptr) {
-  using namespace std;
-  using namespace std::chrono;
-  using CT = seconds;
-  fields<CT> fds{};
-  from_stream(is, fmt, fds, abbrev, offset);
-  if (!fds.wd.ok()) is.setstate(ios::failbit);
-  if (!is.fail()) wd = fds.wd;
-  return is;
-}
-
-template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>& from_stream(
-    std::basic_istream<CharT, Traits>& is, const CharT* fmt, year_month& ym,
-    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-    std::chrono::minutes* offset = nullptr) {
-  using namespace std;
-  using namespace std::chrono;
-  using CT = seconds;
-  fields<CT> fds{};
-  from_stream(is, fmt, fds, abbrev, offset);
-  if (!fds.ymd.month().ok()) is.setstate(ios::failbit);
-  if (!is.fail()) ym = fds.ymd.year() / fds.ymd.month();
-  return is;
-}
-
-template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>& from_stream(
-    std::basic_istream<CharT, Traits>& is, const CharT* fmt, month_day& md,
-    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-    std::chrono::minutes* offset = nullptr) {
-  using namespace std;
-  using namespace std::chrono;
-  using CT = seconds;
-  fields<CT> fds{};
-  from_stream(is, fmt, fds, abbrev, offset);
-  if (!fds.ymd.month().ok() || !fds.ymd.day().ok()) is.setstate(ios::failbit);
-  if (!is.fail()) md = fds.ymd.month() / fds.ymd.day();
-  return is;
-}
-
-template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>& from_stream(
-    std::basic_istream<CharT, Traits>& is, const CharT* fmt, year_month_day& ymd,
-    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-    std::chrono::minutes* offset = nullptr) {
-  using namespace std;
-  using namespace std::chrono;
-  using CT = seconds;
-  fields<CT> fds{};
-  from_stream(is, fmt, fds, abbrev, offset);
-  if (!fds.ymd.ok()) is.setstate(ios::failbit);
-  if (!is.fail()) ymd = fds.ymd;
-  return is;
-}
-
-template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>& from_stream(
-    std::basic_istream<CharT, Traits>& is, const CharT* fmt, sys_time<Duration>& tp,
-    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-    std::chrono::minutes* offset = nullptr) {
-  using namespace std;
-  using namespace std::chrono;
-  using CT = typename common_type<Duration, seconds>::type;
-  minutes offset_local{};
-  auto offptr = offset ? offset : &offset_local;
-  fields<CT> fds{};
-  fds.has_tod = true;
-  from_stream(is, fmt, fds, abbrev, offptr);
-  if (!fds.ymd.ok() || !fds.tod.in_conventional_range()) is.setstate(ios::failbit);
-  if (!is.fail())
-    tp = round<Duration>(sys_days(fds.ymd) - *offptr + fds.tod.to_duration());
-  return is;
-}
-
-template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>& from_stream(
-    std::basic_istream<CharT, Traits>& is, const CharT* fmt, local_time<Duration>& tp,
-    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-    std::chrono::minutes* offset = nullptr) {
-  using namespace std;
-  using namespace std::chrono;
-  using CT = typename common_type<Duration, seconds>::type;
-  fields<CT> fds{};
-  fds.has_tod = true;
-  from_stream(is, fmt, fds, abbrev, offset);
-  if (!fds.ymd.ok() || !fds.tod.in_conventional_range()) is.setstate(ios::failbit);
-  if (!is.fail())
-    tp = round<Duration>(local_seconds{local_days(fds.ymd)} + fds.tod.to_duration());
-  return is;
-}
-
-template <class Rep, class Period, class CharT, class Traits,
-          class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>& from_stream(
-    std::basic_istream<CharT, Traits>& is, const CharT* fmt,
-    std::chrono::duration<Rep, Period>& d,
-    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-    std::chrono::minutes* offset = nullptr) {
-  using namespace std;
-  using namespace std::chrono;
-  using Duration = std::chrono::duration<Rep, Period>;
-  using CT = typename common_type<Duration, seconds>::type;
-  fields<CT> fds{};
-  from_stream(is, fmt, fds, abbrev, offset);
-  if (!fds.has_tod) is.setstate(ios::failbit);
-  if (!is.fail()) d = duration_cast<Duration>(fds.tod.to_duration());
-  return is;
-}
-
-template <class Parsable, class CharT, class Traits = std::char_traits<CharT>,
-          class Alloc = std::allocator<CharT>>
-struct parse_manip {
-  const std::basic_string<CharT, Traits, Alloc> format_;
-  Parsable& tp_;
-  std::basic_string<CharT, Traits, Alloc>* abbrev_;
-  std::chrono::minutes* offset_;
-
- public:
-  parse_manip(std::basic_string<CharT, Traits, Alloc> format, Parsable& tp,
-              std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-              std::chrono::minutes* offset = nullptr)
-      : format_(std::move(format)), tp_(tp), abbrev_(abbrev), offset_(offset) {}
-};
-
-template <class Parsable, class CharT, class Traits, class Alloc>
-std::basic_istream<CharT, Traits>& operator>>(
-    std::basic_istream<CharT, Traits>& is,
-    const parse_manip<Parsable, CharT, Traits, Alloc>& x) {
-  return from_stream(is, x.format_.c_str(), x.tp_, x.abbrev_, x.offset_);
-}
-
-template <class Parsable, class CharT, class Traits, class Alloc>
-inline auto parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp)
-    -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
-                            format.c_str(), tp),
-                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp}) {
-  return {format, tp};
-}
-
-template <class Parsable, class CharT, class Traits, class Alloc>
-inline auto parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp,
-                  std::basic_string<CharT, Traits, Alloc>& abbrev)
-    -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
-                            format.c_str(), tp, &abbrev),
-                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev}) {
-  return {format, tp, &abbrev};
-}
-
-template <class Parsable, class CharT, class Traits, class Alloc>
-inline auto parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp,
-                  std::chrono::minutes& offset)
-    -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
-                            format.c_str(), tp,
-                            std::declval<std::basic_string<CharT, Traits, Alloc>*>(),
-                            &offset),
-                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, nullptr,
-                                                            &offset}) {
-  return {format, tp, nullptr, &offset};
-}
-
-template <class Parsable, class CharT, class Traits, class Alloc>
-inline auto parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp,
-                  std::basic_string<CharT, Traits, Alloc>& abbrev,
-                  std::chrono::minutes& offset)
-    -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
-                            format.c_str(), tp, &abbrev, &offset),
-                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev,
-                                                            &offset}) {
-  return {format, tp, &abbrev, &offset};
-}
-
-// const CharT* formats
-
-template <class Parsable, class CharT>
-inline auto parse(const CharT* format, Parsable& tp)
-    -> decltype(from_stream(std::declval<std::basic_istream<CharT>&>(), format, tp),
-                parse_manip<Parsable, CharT>{format, tp}) {
-  return {format, tp};
-}
-
-template <class Parsable, class CharT, class Traits, class Alloc>
-inline auto parse(const CharT* format, Parsable& tp,
-                  std::basic_string<CharT, Traits, Alloc>& abbrev)
-    -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(), format,
-                            tp, &abbrev),
-                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev}) {
-  return {format, tp, &abbrev};
-}
-
-template <class Parsable, class CharT>
-inline auto parse(const CharT* format, Parsable& tp, std::chrono::minutes& offset)
-    -> decltype(from_stream(std::declval<std::basic_istream<CharT>&>(), format, tp,
-                            std::declval<std::basic_string<CharT>*>(), &offset),
-                parse_manip<Parsable, CharT>{format, tp, nullptr, &offset}) {
-  return {format, tp, nullptr, &offset};
-}
-
-template <class Parsable, class CharT, class Traits, class Alloc>
-inline auto parse(const CharT* format, Parsable& tp,
-                  std::basic_string<CharT, Traits, Alloc>& abbrev,
-                  std::chrono::minutes& offset)
-    -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(), format,
-                            tp, &abbrev, &offset),
-                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev,
-                                                            &offset}) {
-  return {format, tp, &abbrev, &offset};
-}
-
-// duration streaming
-
-namespace detail {
-
-template <class CharT, std::size_t N>
-class string_literal;
-
-template <class CharT1, class CharT2, std::size_t N1, std::size_t N2>
-inline CONSTCD14 string_literal<
-    typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type,
-    N1 + N2 - 1>
-operator+(const string_literal<CharT1, N1>& x,
-          const string_literal<CharT2, N2>& y) NOEXCEPT;
-
-template <class CharT, std::size_t N>
-class string_literal {
-  CharT p_[N];
-
-  CONSTCD11 string_literal() NOEXCEPT : p_{} {}
-
- public:
-  using const_iterator = const CharT*;
-
-  string_literal(string_literal const&) = default;
-  string_literal& operator=(string_literal const&) = delete;
-
-  template <std::size_t N1 = 2, class = typename std::enable_if<N1 == N>::type>
-  CONSTCD11 string_literal(CharT c) NOEXCEPT : p_{c} {}
-
-  template <std::size_t N1 = 3, class = typename std::enable_if<N1 == N>::type>
-  CONSTCD11 string_literal(CharT c1, CharT c2) NOEXCEPT : p_{c1, c2} {}
-
-  template <std::size_t N1 = 4, class = typename std::enable_if<N1 == N>::type>
-  CONSTCD11 string_literal(CharT c1, CharT c2, CharT c3) NOEXCEPT : p_{c1, c2, c3} {}
-
-  CONSTCD14 string_literal(const CharT (&a)[N]) NOEXCEPT : p_{} {
-    for (std::size_t i = 0; i < N; ++i) p_[i] = a[i];
-  }
-
-  template <class U = CharT, class = typename std::enable_if<(1 < sizeof(U))>::type>
-  CONSTCD14 string_literal(const char (&a)[N]) NOEXCEPT : p_{} {
-    for (std::size_t i = 0; i < N; ++i) p_[i] = a[i];
-  }
-
-  template <class CharT2,
-            class = typename std::enable_if<!std::is_same<CharT2, CharT>::value>::type>
-  CONSTCD14 string_literal(string_literal<CharT2, N> const& a) NOEXCEPT : p_{} {
-    for (std::size_t i = 0; i < N; ++i) p_[i] = a[i];
-  }
-
-  CONSTCD11 const CharT* data() const NOEXCEPT { return p_; }
-  CONSTCD11 std::size_t size() const NOEXCEPT { return N - 1; }
-
-  CONSTCD11 const_iterator begin() const NOEXCEPT { return p_; }
-  CONSTCD11 const_iterator end() const NOEXCEPT { return p_ + N - 1; }
-
-  CONSTCD11 CharT const& operator[](std::size_t n) const NOEXCEPT { return p_[n]; }
-
-  template <class Traits>
-  friend std::basic_ostream<CharT, Traits>& operator<<(
-      std::basic_ostream<CharT, Traits>& os, const string_literal& s) {
-    return os << s.p_;
-  }
-
-  template <class CharT1, class CharT2, std::size_t N1, std::size_t N2>
-  friend CONSTCD14 string_literal<
-      typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type,
-      N1 + N2 - 1>
-  operator+(const string_literal<CharT1, N1>& x,
-            const string_literal<CharT2, N2>& y) NOEXCEPT;
-};
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 3> operator+(
-    const string_literal<CharT, 2>& x, const string_literal<CharT, 2>& y) NOEXCEPT {
-  return string_literal<CharT, 3>(x[0], y[0]);
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 4> operator+(
-    const string_literal<CharT, 3>& x, const string_literal<CharT, 2>& y) NOEXCEPT {
-  return string_literal<CharT, 4>(x[0], x[1], y[0]);
-}
-
-template <class CharT1, class CharT2, std::size_t N1, std::size_t N2>
-CONSTCD14 inline string_literal<
-    typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type,
-    N1 + N2 - 1>
-operator+(const string_literal<CharT1, N1>& x,
-          const string_literal<CharT2, N2>& y) NOEXCEPT {
-  using CT =
-      typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type;
-
-  string_literal<CT, N1 + N2 - 1> r;
-  std::size_t i = 0;
-  for (; i < N1 - 1; ++i) r.p_[i] = CT(x.p_[i]);
-  for (std::size_t j = 0; j < N2; ++j, ++i) r.p_[i] = CT(y.p_[j]);
-
-  return r;
-}
-
-template <class CharT, class Traits, class Alloc, std::size_t N>
-inline std::basic_string<CharT, Traits, Alloc> operator+(
-    std::basic_string<CharT, Traits, Alloc> x, const string_literal<CharT, N>& y) {
-  x.append(y.data(), y.size());
-  return x;
-}
-
-#if __cplusplus >= 201402 && (!defined(__EDG_VERSION__) || __EDG_VERSION__ > 411) && \
-    (!defined(__SUNPRO_CC) || __SUNPRO_CC > 0x5150)
-
-template <class CharT,
-          class = std::enable_if_t<
-              std::is_same<CharT, char>{} || std::is_same<CharT, wchar_t>{} ||
-              std::is_same<CharT, char16_t>{} || std::is_same<CharT, char32_t>{}>>
-CONSTCD14 inline string_literal<CharT, 2> msl(CharT c) NOEXCEPT {
-  return string_literal<CharT, 2>{c};
-}
-
-CONSTCD14
-inline std::size_t to_string_len(std::intmax_t i) {
-  std::size_t r = 0;
-  do {
-    i /= 10;
-    ++r;
-  } while (i > 0);
-  return r;
-}
-
-template <std::intmax_t N>
-    CONSTCD14 inline std::enable_if_t <
-    N<10, string_literal<char, to_string_len(N) + 1>> msl() NOEXCEPT {
-  return msl(char(N % 10 + '0'));
-}
-
-template <std::intmax_t N>
-CONSTCD14 inline std::enable_if_t<10 <= N, string_literal<char, to_string_len(N) + 1>>
-msl() NOEXCEPT {
-  return msl<N / 10>() + msl(char(N % 10 + '0'));
-}
-
-template <class CharT, std::intmax_t N, std::intmax_t D>
-CONSTCD14 inline std::enable_if_t<
-    std::ratio<N, D>::type::den != 1,
-    string_literal<CharT, to_string_len(std::ratio<N, D>::type::num) +
-                              to_string_len(std::ratio<N, D>::type::den) + 4>>
-msl(std::ratio<N, D>) NOEXCEPT {
-  using R = typename std::ratio<N, D>::type;
-  return msl(CharT{'['}) + msl<R::num>() + msl(CharT{'/'}) + msl<R::den>() +
-         msl(CharT{']'});
-}
-
-template <class CharT, std::intmax_t N, std::intmax_t D>
-CONSTCD14 inline std::enable_if_t<
-    std::ratio<N, D>::type::den == 1,
-    string_literal<CharT, to_string_len(std::ratio<N, D>::type::num) + 3>>
-msl(std::ratio<N, D>) NOEXCEPT {
-  using R = typename std::ratio<N, D>::type;
-  return msl(CharT{'['}) + msl<R::num>() + msl(CharT{']'});
-}
-
-#else  // __cplusplus < 201402 || (defined(__EDG_VERSION__) && __EDG_VERSION__ <= 411)
-
-inline std::string to_string(std::uint64_t x) { return std::to_string(x); }
-
-template <class CharT>
-inline std::basic_string<CharT> to_string(std::uint64_t x) {
-  auto y = std::to_string(x);
-  return std::basic_string<CharT>(y.begin(), y.end());
-}
-
-template <class CharT, std::intmax_t N, std::intmax_t D>
-inline typename std::enable_if<std::ratio<N, D>::type::den != 1,
-                               std::basic_string<CharT>>::type
-msl(std::ratio<N, D>) {
-  using R = typename std::ratio<N, D>::type;
-  return std::basic_string<CharT>(1, '[') + to_string<CharT>(R::num) + CharT{'/'} +
-         to_string<CharT>(R::den) + CharT{']'};
-}
-
-template <class CharT, std::intmax_t N, std::intmax_t D>
-inline typename std::enable_if<std::ratio<N, D>::type::den == 1,
-                               std::basic_string<CharT>>::type
-msl(std::ratio<N, D>) {
-  using R = typename std::ratio<N, D>::type;
-  return std::basic_string<CharT>(1, '[') + to_string<CharT>(R::num) + CharT{']'};
-}
-
-#endif  // __cplusplus < 201402 || (defined(__EDG_VERSION__) && __EDG_VERSION__ <= 411)
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 2> msl(std::atto) NOEXCEPT {
-  return string_literal<CharT, 2>{'a'};
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 2> msl(std::femto) NOEXCEPT {
-  return string_literal<CharT, 2>{'f'};
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 2> msl(std::pico) NOEXCEPT {
-  return string_literal<CharT, 2>{'p'};
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 2> msl(std::nano) NOEXCEPT {
-  return string_literal<CharT, 2>{'n'};
-}
-
-template <class CharT>
-CONSTCD11 inline typename std::enable_if<std::is_same<CharT, char>::value,
-                                         string_literal<char, 3>>::type
-msl(std::micro) NOEXCEPT {
-  return string_literal<char, 3>{'\xC2', '\xB5'};
-}
-
-template <class CharT>
-CONSTCD11 inline typename std::enable_if<!std::is_same<CharT, char>::value,
-                                         string_literal<CharT, 2>>::type
-msl(std::micro) NOEXCEPT {
-  return string_literal<CharT, 2>{CharT{static_cast<unsigned char>('\xB5')}};
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 2> msl(std::milli) NOEXCEPT {
-  return string_literal<CharT, 2>{'m'};
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 2> msl(std::centi) NOEXCEPT {
-  return string_literal<CharT, 2>{'c'};
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 3> msl(std::deca) NOEXCEPT {
-  return string_literal<CharT, 3>{'d', 'a'};
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 2> msl(std::deci) NOEXCEPT {
-  return string_literal<CharT, 2>{'d'};
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 2> msl(std::hecto) NOEXCEPT {
-  return string_literal<CharT, 2>{'h'};
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 2> msl(std::kilo) NOEXCEPT {
-  return string_literal<CharT, 2>{'k'};
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 2> msl(std::mega) NOEXCEPT {
-  return string_literal<CharT, 2>{'M'};
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 2> msl(std::giga) NOEXCEPT {
-  return string_literal<CharT, 2>{'G'};
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 2> msl(std::tera) NOEXCEPT {
-  return string_literal<CharT, 2>{'T'};
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 2> msl(std::peta) NOEXCEPT {
-  return string_literal<CharT, 2>{'P'};
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 2> msl(std::exa) NOEXCEPT {
-  return string_literal<CharT, 2>{'E'};
-}
-
-template <class CharT, class Period>
-CONSTCD11 inline auto get_units(Period p)
-    -> decltype(msl<CharT>(p) + string_literal<CharT, 2>{'s'}) {
-  return msl<CharT>(p) + string_literal<CharT, 2>{'s'};
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 2> get_units(std::ratio<1>) {
-  return string_literal<CharT, 2>{'s'};
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 2> get_units(std::ratio<3600>) {
-  return string_literal<CharT, 2>{'h'};
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 4> get_units(std::ratio<60>) {
-  return string_literal<CharT, 4>{'m', 'i', 'n'};
-}
-
-template <class CharT>
-CONSTCD11 inline string_literal<CharT, 2> get_units(std::ratio<86400>) {
-  return string_literal<CharT, 2>{'d'};
-}
-
-template <class CharT, class Traits = std::char_traits<CharT>>
-struct make_string;
-
-template <>
-struct make_string<char> {
-  template <class Rep>
-  static std::string from(Rep n) {
-    return std::to_string(n);
-  }
-};
-
-template <class Traits>
-struct make_string<char, Traits> {
-  template <class Rep>
-  static std::basic_string<char, Traits> from(Rep n) {
-    auto s = std::to_string(n);
-    return std::basic_string<char, Traits>(s.begin(), s.end());
-  }
-};
-
-template <>
-struct make_string<wchar_t> {
-  template <class Rep>
-  static std::wstring from(Rep n) {
-    return std::to_wstring(n);
-  }
-};
-
-template <class Traits>
-struct make_string<wchar_t, Traits> {
-  template <class Rep>
-  static std::basic_string<wchar_t, Traits> from(Rep n) {
-    auto s = std::to_wstring(n);
-    return std::basic_string<wchar_t, Traits>(s.begin(), s.end());
-  }
-};
-
-}  // namespace detail
-
-template <class CharT, class Traits, class Rep, class Period>
-inline std::basic_ostream<CharT, Traits>& operator<<(
-    std::basic_ostream<CharT, Traits>& os, const std::chrono::duration<Rep, Period>& d) {
-  using namespace detail;
-  return os << make_string<CharT, Traits>::from(d.count()) +
-                   get_units<CharT>(typename Period::type{});
-}
-
-}  // namespace date
-
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
-
-#endif  // DATE_H
diff --git a/cpp/src/arrow/util/decimal-test.cc b/cpp/src/arrow/util/decimal-test.cc
index 94c270280ea3c..73ac48cf88f20 100644
--- a/cpp/src/arrow/util/decimal-test.cc
+++ b/cpp/src/arrow/util/decimal-test.cc
@@ -417,8 +417,8 @@ TEST(Decimal128Test, TestFromBigEndian) {
       auto negated = -value;
       little_endian = negated.ToBytes();
       std::reverse(little_endian.begin(), little_endian.end());
-      // Convert all of the bytes since we have to include the sign bit
-      ASSERT_OK(Decimal128::FromBigEndian(little_endian.data(), 16, &out));
+      // The sign bit is looked up in the MSB
+      ASSERT_OK(Decimal128::FromBigEndian(little_endian.data() + 15 - ii, ii + 1, &out));
       ASSERT_EQ(negated, out);
 
       // Take the complement and convert to big endian
@@ -466,4 +466,108 @@ TEST(Decimal128Test, TestToInteger) {
   ASSERT_RAISES(Invalid, invalid_int64.ToInteger(&out2));
 }
 
+TEST(Decimal128Test, GetWholeAndFraction) {
+  Decimal128 value("123456");
+  Decimal128 whole;
+  Decimal128 fraction;
+  int32_t out;
+
+  value.GetWholeAndFraction(0, &whole, &fraction);
+  ASSERT_OK(whole.ToInteger(&out));
+  ASSERT_EQ(123456, out);
+  ASSERT_OK(fraction.ToInteger(&out));
+  ASSERT_EQ(0, out);
+
+  value.GetWholeAndFraction(1, &whole, &fraction);
+  ASSERT_OK(whole.ToInteger(&out));
+  ASSERT_EQ(12345, out);
+  ASSERT_OK(fraction.ToInteger(&out));
+  ASSERT_EQ(6, out);
+
+  value.GetWholeAndFraction(5, &whole, &fraction);
+  ASSERT_OK(whole.ToInteger(&out));
+  ASSERT_EQ(1, out);
+  ASSERT_OK(fraction.ToInteger(&out));
+  ASSERT_EQ(23456, out);
+
+  value.GetWholeAndFraction(7, &whole, &fraction);
+  ASSERT_OK(whole.ToInteger(&out));
+  ASSERT_EQ(0, out);
+  ASSERT_OK(fraction.ToInteger(&out));
+  ASSERT_EQ(123456, out);
+}
+
+TEST(Decimal128Test, GetWholeAndFractionNegative) {
+  Decimal128 value("-123456");
+  Decimal128 whole;
+  Decimal128 fraction;
+  int32_t out;
+
+  value.GetWholeAndFraction(0, &whole, &fraction);
+  ASSERT_OK(whole.ToInteger(&out));
+  ASSERT_EQ(-123456, out);
+  ASSERT_OK(fraction.ToInteger(&out));
+  ASSERT_EQ(0, out);
+
+  value.GetWholeAndFraction(1, &whole, &fraction);
+  ASSERT_OK(whole.ToInteger(&out));
+  ASSERT_EQ(-12345, out);
+  ASSERT_OK(fraction.ToInteger(&out));
+  ASSERT_EQ(-6, out);
+
+  value.GetWholeAndFraction(5, &whole, &fraction);
+  ASSERT_OK(whole.ToInteger(&out));
+  ASSERT_EQ(-1, out);
+  ASSERT_OK(fraction.ToInteger(&out));
+  ASSERT_EQ(-23456, out);
+
+  value.GetWholeAndFraction(7, &whole, &fraction);
+  ASSERT_OK(whole.ToInteger(&out));
+  ASSERT_EQ(0, out);
+  ASSERT_OK(fraction.ToInteger(&out));
+  ASSERT_EQ(-123456, out);
+}
+
+TEST(Decimal128Test, IncreaseScale) {
+  Decimal128 result;
+  int32_t out;
+
+  result = Decimal128("1234").IncreaseScaleBy(3);
+  ASSERT_OK(result.ToInteger(&out));
+  ASSERT_EQ(1234000, out);
+
+  result = Decimal128("-1234").IncreaseScaleBy(3);
+  ASSERT_OK(result.ToInteger(&out));
+  ASSERT_EQ(-1234000, out);
+}
+
+TEST(Decimal128Test, ReduceScaleAndRound) {
+  Decimal128 result;
+  int32_t out;
+
+  result = Decimal128("123456").ReduceScaleBy(1, false);
+  ASSERT_OK(result.ToInteger(&out));
+  ASSERT_EQ(12345, out);
+
+  result = Decimal128("123456").ReduceScaleBy(1, true);
+  ASSERT_OK(result.ToInteger(&out));
+  ASSERT_EQ(12346, out);
+
+  result = Decimal128("123451").ReduceScaleBy(1, true);
+  ASSERT_OK(result.ToInteger(&out));
+  ASSERT_EQ(12345, out);
+
+  result = Decimal128("-123789").ReduceScaleBy(2, true);
+  ASSERT_OK(result.ToInteger(&out));
+  ASSERT_EQ(-1238, out);
+
+  result = Decimal128("-123749").ReduceScaleBy(2, true);
+  ASSERT_OK(result.ToInteger(&out));
+  ASSERT_EQ(-1237, out);
+
+  result = Decimal128("-123750").ReduceScaleBy(2, true);
+  ASSERT_OK(result.ToInteger(&out));
+  ASSERT_EQ(-1238, out);
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 9d22e005e7276..347a07dcf8ccc 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -29,80 +29,23 @@
 #include "arrow/status.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/decimal.h"
+#include "arrow/util/int-util.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 
 namespace arrow {
 
-static const Decimal128 ScaleMultipliers[] = {
-    Decimal128(0LL),
-    Decimal128(10LL),
-    Decimal128(100LL),
-    Decimal128(1000LL),
-    Decimal128(10000LL),
-    Decimal128(100000LL),
-    Decimal128(1000000LL),
-    Decimal128(10000000LL),
-    Decimal128(100000000LL),
-    Decimal128(1000000000LL),
-    Decimal128(10000000000LL),
-    Decimal128(100000000000LL),
-    Decimal128(1000000000000LL),
-    Decimal128(10000000000000LL),
-    Decimal128(100000000000000LL),
-    Decimal128(1000000000000000LL),
-    Decimal128(10000000000000000LL),
-    Decimal128(100000000000000000LL),
-    Decimal128(1000000000000000000LL),
-    Decimal128(0LL, 10000000000000000000ULL),
-    Decimal128(5LL, 7766279631452241920ULL),
-    Decimal128(54LL, 3875820019684212736ULL),
-    Decimal128(542LL, 1864712049423024128ULL),
-    Decimal128(5421LL, 200376420520689664ULL),
-    Decimal128(54210LL, 2003764205206896640ULL),
-    Decimal128(542101LL, 1590897978359414784ULL),
-    Decimal128(5421010LL, 15908979783594147840ULL),
-    Decimal128(54210108LL, 11515845246265065472ULL),
-    Decimal128(542101086LL, 4477988020393345024ULL),
-    Decimal128(5421010862LL, 7886392056514347008ULL),
-    Decimal128(54210108624LL, 5076944270305263616ULL),
-    Decimal128(542101086242LL, 13875954555633532928ULL),
-    Decimal128(5421010862427LL, 9632337040368467968ULL),
-    Decimal128(54210108624275LL, 4089650035136921600ULL),
-    Decimal128(542101086242752LL, 4003012203950112768ULL),
-    Decimal128(5421010862427522LL, 3136633892082024448ULL),
-    Decimal128(54210108624275221LL, 12919594847110692864ULL),
-    Decimal128(542101086242752217LL, 68739955140067328ULL),
-    Decimal128(5421010862427522170LL, 687399551400673280ULL)};
-
-static constexpr uint64_t kIntMask = 0xFFFFFFFF;
-static constexpr auto kCarryBit = static_cast<uint64_t>(1) << static_cast<uint64_t>(32);
+using internal::SafeLeftShift;
+using internal::SafeSignedAdd;
 
 Decimal128::Decimal128(const std::string& str) : Decimal128() {
   Status status(Decimal128::FromString(str, this));
   DCHECK(status.ok()) << status.message();
 }
 
-Decimal128::Decimal128(const uint8_t* bytes)
-    : Decimal128(BitUtil::FromLittleEndian(reinterpret_cast<const int64_t*>(bytes)[1]),
-                 BitUtil::FromLittleEndian(reinterpret_cast<const uint64_t*>(bytes)[0])) {
-}
-
-std::array<uint8_t, 16> Decimal128::ToBytes() const {
-  std::array<uint8_t, 16> out{{0}};
-  ToBytes(out.data());
-  return out;
-}
-
-void Decimal128::ToBytes(uint8_t* out) const {
-  DCHECK_NE(out, nullptr);
-  reinterpret_cast<uint64_t*>(out)[0] = BitUtil::ToLittleEndian(low_bits_);
-  reinterpret_cast<int64_t*>(out)[1] = BitUtil::ToLittleEndian(high_bits_);
-}
-
-static constexpr Decimal128 kTenTo36(static_cast<int64_t>(0xC097CE7BC90715),
-                                     0xB34B9F1000000000);
-static constexpr Decimal128 kTenTo18(0xDE0B6B3A7640000);
+static const Decimal128 kTenTo36(static_cast<int64_t>(0xC097CE7BC90715),
+                                 0xB34B9F1000000000);
+static const Decimal128 kTenTo18(0xDE0B6B3A7640000);
 
 std::string Decimal128::ToIntegerString() const {
   Decimal128 remainder;
@@ -111,8 +54,7 @@ std::string Decimal128::ToIntegerString() const {
 
   // get anything above 10 ** 36 and print it
   Decimal128 top;
-  Status s = Divide(kTenTo36, &top, &remainder);
-  DCHECK(s.ok()) << s.message();
+  DCHECK_OK(Divide(kTenTo36, &top, &remainder));
 
   if (top != 0) {
     buf << static_cast<int64_t>(top);
@@ -122,7 +64,7 @@ std::string Decimal128::ToIntegerString() const {
 
   // now get anything above 10 ** 18 and print it
   Decimal128 tail;
-  s = remainder.Divide(kTenTo18, &top, &tail);
+  auto s = remainder.Divide(kTenTo18, &top, &tail);
 
   if (need_fill || top != 0) {
     if (need_fill) {
@@ -144,11 +86,11 @@ std::string Decimal128::ToIntegerString() const {
 }
 
 Decimal128::operator int64_t() const {
-  DCHECK(high_bits_ == 0 || high_bits_ == -1)
+  DCHECK(high_bits() == 0 || high_bits() == -1)
       << "Trying to cast an Decimal128 greater than the value range of a "
          "int64_t. high_bits_ must be equal to 0 or -1, got: "
-      << high_bits_;
-  return static_cast<int64_t>(low_bits_);
+      << high_bits();
+  return static_cast<int64_t>(low_bits());
 }
 
 static std::string ToStringNegativeScale(const std::string& str,
@@ -337,17 +279,15 @@ bool ParseDecimalComponents(const char* s, size_t size, DecimalComponents* out)
 
 }  // namespace
 
-Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* precision,
-                              int32_t* scale) {
+Status Decimal128::FromString(const util::string_view& s, Decimal128* out,
+                              int32_t* precision, int32_t* scale) {
   if (s.empty()) {
     return Status::Invalid("Empty string cannot be converted to decimal");
   }
 
   DecimalComponents dec;
   if (!ParseDecimalComponents(s.data(), s.size(), &dec)) {
-    std::stringstream ss;
-    ss << "The string '" << s << "' is not a valid decimal number";
-    return Status::Invalid(ss.str());
+    return Status::Invalid("The string '", s, "' is not a valid decimal number");
   }
   std::string exponent_value = dec.exponent_sign + dec.exponent_digits;
 
@@ -381,7 +321,7 @@ Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* pr
 
     if (scale != nullptr && *scale < 0) {
       const int32_t abs_scale = std::abs(*scale);
-      *out *= ScaleMultipliers[abs_scale];
+      *out *= GetScaleMultiplier(abs_scale);
 
       if (precision != nullptr) {
         *precision += abs_scale;
@@ -393,493 +333,18 @@ Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* pr
   return Status::OK();
 }
 
-Decimal128& Decimal128::Negate() {
-  low_bits_ = ~low_bits_ + 1;
-  high_bits_ = ~high_bits_;
-  if (low_bits_ == 0) {
-    ++high_bits_;
-  }
-  return *this;
-}
-
-Decimal128& Decimal128::Abs() { return *this < 0 ? Negate() : *this; }
-
-Decimal128& Decimal128::operator+=(const Decimal128& right) {
-  const uint64_t sum = low_bits_ + right.low_bits_;
-  high_bits_ += right.high_bits_;
-  if (sum < low_bits_) {
-    ++high_bits_;
-  }
-  low_bits_ = sum;
-  return *this;
-}
-
-Decimal128& Decimal128::operator-=(const Decimal128& right) {
-  const uint64_t diff = low_bits_ - right.low_bits_;
-  high_bits_ -= right.high_bits_;
-  if (diff > low_bits_) {
-    --high_bits_;
-  }
-  low_bits_ = diff;
-  return *this;
-}
-
-Decimal128& Decimal128::operator/=(const Decimal128& right) {
-  Decimal128 remainder;
-  Status s = Divide(right, this, &remainder);
-  DCHECK(s.ok());
-  return *this;
-}
-
-Decimal128& Decimal128::operator|=(const Decimal128& right) {
-  low_bits_ |= right.low_bits_;
-  high_bits_ |= right.high_bits_;
-  return *this;
-}
-
-Decimal128& Decimal128::operator&=(const Decimal128& right) {
-  low_bits_ &= right.low_bits_;
-  high_bits_ &= right.high_bits_;
-  return *this;
-}
-
-Decimal128& Decimal128::operator<<=(uint32_t bits) {
-  if (bits != 0) {
-    if (bits < 64) {
-      high_bits_ <<= bits;
-      high_bits_ |= (low_bits_ >> (64 - bits));
-      low_bits_ <<= bits;
-    } else if (bits < 128) {
-      high_bits_ = static_cast<int64_t>(low_bits_) << (bits - 64);
-      low_bits_ = 0;
-    } else {
-      high_bits_ = 0;
-      low_bits_ = 0;
-    }
-  }
-  return *this;
-}
-
-Decimal128& Decimal128::operator>>=(uint32_t bits) {
-  if (bits != 0) {
-    if (bits < 64) {
-      low_bits_ >>= bits;
-      low_bits_ |= static_cast<uint64_t>(high_bits_ << (64 - bits));
-      high_bits_ = static_cast<int64_t>(static_cast<uint64_t>(high_bits_) >> bits);
-    } else if (bits < 128) {
-      low_bits_ = static_cast<uint64_t>(high_bits_ >> (bits - 64));
-      high_bits_ = static_cast<int64_t>(high_bits_ >= 0L ? 0L : -1L);
-    } else {
-      high_bits_ = static_cast<int64_t>(high_bits_ >= 0L ? 0L : -1L);
-      low_bits_ = static_cast<uint64_t>(high_bits_);
-    }
-  }
-  return *this;
-}
-
-Decimal128& Decimal128::operator*=(const Decimal128& right) {
-  // Break the left and right numbers into 32 bit chunks
-  // so that we can multiply them without overflow.
-  const uint64_t L0 = static_cast<uint64_t>(high_bits_) >> 32;
-  const uint64_t L1 = static_cast<uint64_t>(high_bits_) & kIntMask;
-  const uint64_t L2 = low_bits_ >> 32;
-  const uint64_t L3 = low_bits_ & kIntMask;
-
-  const uint64_t R0 = static_cast<uint64_t>(right.high_bits_) >> 32;
-  const uint64_t R1 = static_cast<uint64_t>(right.high_bits_) & kIntMask;
-  const uint64_t R2 = right.low_bits_ >> 32;
-  const uint64_t R3 = right.low_bits_ & kIntMask;
-
-  uint64_t product = L3 * R3;
-  low_bits_ = product & kIntMask;
-
-  uint64_t sum = product >> 32;
-
-  product = L2 * R3;
-  sum += product;
-
-  product = L3 * R2;
-  sum += product;
-
-  low_bits_ += sum << 32;
-
-  high_bits_ = static_cast<int64_t>(sum < product ? kCarryBit : 0);
-  if (sum < product) {
-    high_bits_ += kCarryBit;
-  }
-
-  high_bits_ += static_cast<int64_t>(sum >> 32);
-  high_bits_ += L1 * R3 + L2 * R2 + L3 * R1;
-  high_bits_ += (L0 * R3 + L1 * R2 + L2 * R1 + L3 * R0) << 32;
-  return *this;
-}
-
-/// Expands the given value into an array of ints so that we can work on
-/// it. The array will be converted to an absolute value and the wasNegative
-/// flag will be set appropriately. The array will remove leading zeros from
-/// the value.
-/// \param array an array of length 4 to set with the value
-/// \param was_negative a flag for whether the value was original negative
-/// \result the output length of the array
-static int64_t FillInArray(const Decimal128& value, uint32_t* array, bool& was_negative) {
-  uint64_t high;
-  uint64_t low;
-  const int64_t highbits = value.high_bits();
-  const uint64_t lowbits = value.low_bits();
-
-  if (highbits < 0) {
-    low = ~lowbits + 1;
-    high = static_cast<uint64_t>(~highbits);
-    if (low == 0) {
-      ++high;
-    }
-    was_negative = true;
-  } else {
-    low = lowbits;
-    high = static_cast<uint64_t>(highbits);
-    was_negative = false;
-  }
-
-  if (high != 0) {
-    if (high > std::numeric_limits<uint32_t>::max()) {
-      array[0] = static_cast<uint32_t>(high >> 32);
-      array[1] = static_cast<uint32_t>(high);
-      array[2] = static_cast<uint32_t>(low >> 32);
-      array[3] = static_cast<uint32_t>(low);
-      return 4;
-    }
-
-    array[0] = static_cast<uint32_t>(high);
-    array[1] = static_cast<uint32_t>(low >> 32);
-    array[2] = static_cast<uint32_t>(low);
-    return 3;
-  }
-
-  if (low >= std::numeric_limits<uint32_t>::max()) {
-    array[0] = static_cast<uint32_t>(low >> 32);
-    array[1] = static_cast<uint32_t>(low);
-    return 2;
-  }
-
-  if (low == 0) {
-    return 0;
-  }
-
-  array[0] = static_cast<uint32_t>(low);
-  return 1;
-}
-
-/// Shift the number in the array left by bits positions.
-/// \param array the number to shift, must have length elements
-/// \param length the number of entries in the array
-/// \param bits the number of bits to shift (0 <= bits < 32)
-static void ShiftArrayLeft(uint32_t* array, int64_t length, int64_t bits) {
-  if (length > 0 && bits != 0) {
-    for (int64_t i = 0; i < length - 1; ++i) {
-      array[i] = (array[i] << bits) | (array[i + 1] >> (32 - bits));
-    }
-    array[length - 1] <<= bits;
-  }
-}
-
-/// Shift the number in the array right by bits positions.
-/// \param array the number to shift, must have length elements
-/// \param length the number of entries in the array
-/// \param bits the number of bits to shift (0 <= bits < 32)
-static void ShiftArrayRight(uint32_t* array, int64_t length, int64_t bits) {
-  if (length > 0 && bits != 0) {
-    for (int64_t i = length - 1; i > 0; --i) {
-      array[i] = (array[i] >> bits) | (array[i - 1] << (32 - bits));
-    }
-    array[0] >>= bits;
-  }
-}
-
-/// \brief Fix the signs of the result and remainder at the end of the division based on
-/// the signs of the dividend and divisor.
-static void FixDivisionSigns(Decimal128* result, Decimal128* remainder,
-                             bool dividend_was_negative, bool divisor_was_negative) {
-  if (dividend_was_negative != divisor_was_negative) {
-    result->Negate();
-  }
-
-  if (dividend_was_negative) {
-    remainder->Negate();
-  }
-}
-
-/// \brief Build a Decimal128 from a list of ints.
-static Status BuildFromArray(Decimal128* value, uint32_t* array, int64_t length) {
-  switch (length) {
-    case 0:
-      *value = {static_cast<int64_t>(0)};
-      break;
-    case 1:
-      *value = {static_cast<int64_t>(array[0])};
-      break;
-    case 2:
-      *value = {static_cast<int64_t>(0),
-                (static_cast<uint64_t>(array[0]) << 32) + array[1]};
-      break;
-    case 3:
-      *value = {static_cast<int64_t>(array[0]),
-                (static_cast<uint64_t>(array[1]) << 32) + array[2]};
-      break;
-    case 4:
-      *value = {(static_cast<int64_t>(array[0]) << 32) + array[1],
-                (static_cast<uint64_t>(array[2]) << 32) + array[3]};
-      break;
-    case 5:
-      if (array[0] != 0) {
-        return Status::Invalid("Can't build Decimal128 with 5 ints.");
-      }
-      *value = {(static_cast<int64_t>(array[1]) << 32) + array[2],
-                (static_cast<uint64_t>(array[3]) << 32) + array[4]};
-      break;
-    default:
-      return Status::Invalid("Unsupported length for building Decimal128");
-  }
-
-  return Status::OK();
-}
-
-/// \brief Do a division where the divisor fits into a single 32 bit value.
-static Status SingleDivide(const uint32_t* dividend, int64_t dividend_length,
-                           uint32_t divisor, Decimal128* remainder,
-                           bool dividend_was_negative, bool divisor_was_negative,
-                           Decimal128* result) {
-  uint64_t r = 0;
-  uint32_t result_array[5];
-  for (int64_t j = 0; j < dividend_length; j++) {
-    r <<= 32;
-    r += dividend[j];
-    result_array[j] = static_cast<uint32_t>(r / divisor);
-    r %= divisor;
-  }
-  RETURN_NOT_OK(BuildFromArray(result, result_array, dividend_length));
-  *remainder = static_cast<int64_t>(r);
-  FixDivisionSigns(result, remainder, dividend_was_negative, divisor_was_negative);
-  return Status::OK();
-}
-
-Status Decimal128::Divide(const Decimal128& divisor, Decimal128* result,
-                          Decimal128* remainder) const {
-  // Split the dividend and divisor into integer pieces so that we can
-  // work on them.
-  uint32_t dividend_array[5];
-  uint32_t divisor_array[4];
-  bool dividend_was_negative;
-  bool divisor_was_negative;
-  // leave an extra zero before the dividend
-  dividend_array[0] = 0;
-  int64_t dividend_length =
-      FillInArray(*this, dividend_array + 1, dividend_was_negative) + 1;
-  int64_t divisor_length = FillInArray(divisor, divisor_array, divisor_was_negative);
-
-  // Handle some of the easy cases.
-  if (dividend_length <= divisor_length) {
-    *remainder = *this;
-    *result = 0;
-    return Status::OK();
-  }
-
-  if (divisor_length == 0) {
-    return Status::Invalid("Division by 0 in Decimal128");
-  }
-
-  if (divisor_length == 1) {
-    return SingleDivide(dividend_array, dividend_length, divisor_array[0], remainder,
-                        dividend_was_negative, divisor_was_negative, result);
-  }
-
-  int64_t result_length = dividend_length - divisor_length;
-  uint32_t result_array[4];
-
-  // Normalize by shifting both by a multiple of 2 so that
-  // the digit guessing is better. The requirement is that
-  // divisor_array[0] is greater than 2**31.
-  int64_t normalize_bits = BitUtil::CountLeadingZeros(divisor_array[0]);
-  ShiftArrayLeft(divisor_array, divisor_length, normalize_bits);
-  ShiftArrayLeft(dividend_array, dividend_length, normalize_bits);
-
-  // compute each digit in the result
-  for (int64_t j = 0; j < result_length; ++j) {
-    // Guess the next digit. At worst it is two too large
-    uint32_t guess = std::numeric_limits<uint32_t>::max();
-    const auto high_dividend =
-        static_cast<uint64_t>(dividend_array[j]) << 32 | dividend_array[j + 1];
-    if (dividend_array[j] != divisor_array[0]) {
-      guess = static_cast<uint32_t>(high_dividend / divisor_array[0]);
-    }
-
-    // catch all of the cases where guess is two too large and most of the
-    // cases where it is one too large
-    auto rhat = static_cast<uint32_t>(high_dividend -
-                                      guess * static_cast<uint64_t>(divisor_array[0]));
-    while (static_cast<uint64_t>(divisor_array[1]) * guess >
-           (static_cast<uint64_t>(rhat) << 32) + dividend_array[j + 2]) {
-      --guess;
-      rhat += divisor_array[0];
-      if (static_cast<uint64_t>(rhat) < divisor_array[0]) {
-        break;
-      }
-    }
-
-    // subtract off the guess * divisor from the dividend
-    uint64_t mult = 0;
-    for (int64_t i = divisor_length - 1; i >= 0; --i) {
-      mult += static_cast<uint64_t>(guess) * divisor_array[i];
-      uint32_t prev = dividend_array[j + i + 1];
-      dividend_array[j + i + 1] -= static_cast<uint32_t>(mult);
-      mult >>= 32;
-      if (dividend_array[j + i + 1] > prev) {
-        ++mult;
-      }
-    }
-    uint32_t prev = dividend_array[j];
-    dividend_array[j] -= static_cast<uint32_t>(mult);
-
-    // if guess was too big, we add back divisor
-    if (dividend_array[j] > prev) {
-      --guess;
-      uint32_t carry = 0;
-      for (int64_t i = divisor_length - 1; i >= 0; --i) {
-        const auto sum =
-            static_cast<uint64_t>(divisor_array[i]) + dividend_array[j + i + 1] + carry;
-        dividend_array[j + i + 1] = static_cast<uint32_t>(sum);
-        carry = static_cast<uint32_t>(sum >> 32);
-      }
-      dividend_array[j] += carry;
-    }
-
-    result_array[j] = guess;
-  }
-
-  // denormalize the remainder
-  ShiftArrayRight(dividend_array, dividend_length, normalize_bits);
-
-  // return result and remainder
-  RETURN_NOT_OK(BuildFromArray(result, result_array, result_length));
-  RETURN_NOT_OK(BuildFromArray(remainder, dividend_array, dividend_length));
-
-  FixDivisionSigns(result, remainder, dividend_was_negative, divisor_was_negative);
-  return Status::OK();
-}
-
-bool operator==(const Decimal128& left, const Decimal128& right) {
-  return left.high_bits() == right.high_bits() && left.low_bits() == right.low_bits();
-}
-
-bool operator!=(const Decimal128& left, const Decimal128& right) {
-  return !operator==(left, right);
-}
-
-bool operator<(const Decimal128& left, const Decimal128& right) {
-  return left.high_bits() < right.high_bits() ||
-         (left.high_bits() == right.high_bits() && left.low_bits() < right.low_bits());
-}
-
-bool operator<=(const Decimal128& left, const Decimal128& right) {
-  return !operator>(left, right);
-}
-
-bool operator>(const Decimal128& left, const Decimal128& right) {
-  return operator<(right, left);
-}
-
-bool operator>=(const Decimal128& left, const Decimal128& right) {
-  return !operator<(left, right);
-}
-
-Decimal128 operator-(const Decimal128& operand) {
-  Decimal128 result(operand.high_bits(), operand.low_bits());
-  return result.Negate();
-}
-
-Decimal128 operator~(const Decimal128& operand) {
-  Decimal128 result(~operand.high_bits(), ~operand.low_bits());
-  return result;
-}
-
-Decimal128 operator+(const Decimal128& left, const Decimal128& right) {
-  Decimal128 result(left.high_bits(), left.low_bits());
-  result += right;
-  return result;
-}
-
-Decimal128 operator-(const Decimal128& left, const Decimal128& right) {
-  Decimal128 result(left.high_bits(), left.low_bits());
-  result -= right;
-  return result;
-}
-
-Decimal128 operator*(const Decimal128& left, const Decimal128& right) {
-  Decimal128 result(left.high_bits(), left.low_bits());
-  result *= right;
-  return result;
-}
-
-Decimal128 operator/(const Decimal128& left, const Decimal128& right) {
-  Decimal128 remainder;
-  Decimal128 result;
-  Status s = left.Divide(right, &result, &remainder);
-  DCHECK(s.ok());
-  return result;
-}
-
-Decimal128 operator%(const Decimal128& left, const Decimal128& right) {
-  Decimal128 remainder;
-  Decimal128 result;
-  Status s = left.Divide(right, &result, &remainder);
-  DCHECK(s.ok());
-  return remainder;
-}
-
-static bool RescaleWouldCauseDataLoss(const Decimal128& value, int32_t delta_scale,
-                                      int32_t abs_delta_scale, Decimal128* result) {
-  Decimal128 multiplier(ScaleMultipliers[abs_delta_scale]);
-
-  if (delta_scale < 0) {
-    DCHECK_NE(multiplier, 0);
-    Decimal128 remainder;
-    Status status = value.Divide(multiplier, result, &remainder);
-    DCHECK(status.ok()) << status.message();
-    return remainder != 0;
-  }
-
-  *result = value * multiplier;
-  return (value < 0) ? *result > value : *result < value;
+Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* precision,
+                              int32_t* scale) {
+  return FromString(util::string_view(s), out, precision, scale);
 }
 
-Status Decimal128::Rescale(int32_t original_scale, int32_t new_scale,
-                           Decimal128* out) const {
-  DCHECK_NE(out, nullptr) << "out is nullptr";
-  DCHECK_NE(original_scale, new_scale) << "original_scale != new_scale";
-
-  const int32_t delta_scale = new_scale - original_scale;
-  const int32_t abs_delta_scale = std::abs(delta_scale);
-
-  DCHECK_GE(abs_delta_scale, 1);
-  DCHECK_LE(abs_delta_scale, 38);
-
-  Decimal128 result(*this);
-  const bool rescale_would_cause_data_loss =
-      RescaleWouldCauseDataLoss(result, delta_scale, abs_delta_scale, out);
-
-  // Fail if we overflow or truncate
-  if (ARROW_PREDICT_FALSE(rescale_would_cause_data_loss)) {
-    std::stringstream buf;
-    buf << "Rescaling decimal value " << ToString(original_scale)
-        << " from original scale of " << original_scale << " to new scale of "
-        << new_scale << " would cause data loss";
-    return Status::Invalid(buf.str());
-  }
-
-  return Status::OK();
+Status Decimal128::FromString(const char* s, Decimal128* out, int32_t* precision,
+                              int32_t* scale) {
+  return FromString(util::string_view(s), out, precision, scale);
 }
 
 // Helper function used by Decimal128::FromBigEndian
-static inline uint64_t FromBigEndian(const uint8_t* bytes, int32_t length) {
+static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length) {
   // We don't bounds check the length here because this is called by
   // FromBigEndian that has a Decimal128 as its out parameters and
   // that function is already checking the length of the bytes and only
@@ -896,47 +361,77 @@ Status Decimal128::FromBigEndian(const uint8_t* bytes, int32_t length, Decimal12
   static constexpr int32_t kMinDecimalBytes = 1;
   static constexpr int32_t kMaxDecimalBytes = 16;
 
-  int64_t high;
-  uint64_t low;
+  int64_t high, low;
 
   if (length < kMinDecimalBytes || length > kMaxDecimalBytes) {
-    std::ostringstream stream;
-    stream << "Length of byte array passed to Decimal128::FromBigEndian ";
-    stream << "was " << length << ", but must be between ";
-    stream << kMinDecimalBytes << " and " << kMaxDecimalBytes;
-    return Status::Invalid(stream.str());
+    return Status::Invalid("Length of byte array passed to Decimal128::FromBigEndian ",
+                           "was ", length, ", but must be between ", kMinDecimalBytes,
+                           " and ", kMaxDecimalBytes);
   }
 
-  /// Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
-  /// sign bit.
+  // Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
+  // sign bit.
   const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;
 
-  /// Sign extend the low bits if necessary
-  low = UINT64_MAX * (is_negative && length < 8);
-  high = -1 * (is_negative && length < kMaxDecimalBytes);
-
-  /// Stop byte of the high bytes
+  // 1. Extract the high bytes
+  // Stop byte of the high bytes
   const int32_t high_bits_offset = std::max(0, length - 8);
+  const auto high_bits = UInt64FromBigEndian(bytes, high_bits_offset);
 
-  /// Shift left enough bits to make room for the incoming int64_t
-  high <<= high_bits_offset * CHAR_BIT;
-
-  /// Preserve the upper bits by inplace OR-ing the int64_t
-  uint64_t value = arrow::FromBigEndian(bytes, high_bits_offset);
-  high |= value;
+  if (high_bits_offset == 8) {
+    // Avoid undefined shift by 64 below
+    high = high_bits;
+  } else {
+    high = -1 * (is_negative && length < kMaxDecimalBytes);
+    // Shift left enough bits to make room for the incoming int64_t
+    high = SafeLeftShift(high, high_bits_offset * CHAR_BIT);
+    // Preserve the upper bits by inplace OR-ing the int64_t
+    high |= high_bits;
+  }
 
-  /// Stop byte of the low bytes
+  // 2. Extract the low bytes
+  // Stop byte of the low bytes
   const int32_t low_bits_offset = std::min(length, 8);
+  const auto low_bits =
+      UInt64FromBigEndian(bytes + high_bits_offset, length - high_bits_offset);
 
-  /// Shift left enough bits to make room for the incoming uint64_t
-  low <<= low_bits_offset * CHAR_BIT;
-
-  /// Preserve the upper bits by inplace OR-ing the uint64_t
-  value = arrow::FromBigEndian(bytes + high_bits_offset, length - high_bits_offset);
-  low |= value;
+  if (low_bits_offset == 8) {
+    // Avoid undefined shift by 64 below
+    low = low_bits;
+  } else {
+    // Sign extend the low bits if necessary
+    low = -1 * (is_negative && length < 8);
+    // Shift left enough bits to make room for the incoming int64_t
+    low = SafeLeftShift(low, low_bits_offset * CHAR_BIT);
+    // Preserve the upper bits by inplace OR-ing the int64_t
+    low |= low_bits;
+  }
 
-  *out = Decimal128(high, low);
+  *out = Decimal128(high, static_cast<uint64_t>(low));
   return Status::OK();
 }
 
+Status Decimal128::ToArrowStatus(DecimalStatus dstatus) const {
+  Status status;
+
+  switch (dstatus) {
+    case DecimalStatus::kSuccess:
+      status = Status::OK();
+      break;
+
+    case DecimalStatus::kDivideByZero:
+      status = Status::Invalid("Division by 0 in Decimal128");
+      break;
+
+    case DecimalStatus::kOverflow:
+      status = Status::Invalid("Overflow occurred during Decimal128 operation.");
+      break;
+
+    case DecimalStatus::kRescaleDataLoss:
+      status = Status::Invalid("Rescaling decimal value would cause data loss");
+      break;
+  }
+  return status;
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h
index 26b82a42f70a7..4c61a1736d04e 100644
--- a/cpp/src/arrow/util/decimal.h
+++ b/cpp/src/arrow/util/decimal.h
@@ -15,8 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef ARROW_DECIMAL_H
-#define ARROW_DECIMAL_H
+#pragma once
 
 #include <array>
 #include <cstdint>
@@ -26,9 +25,8 @@
 #include <type_traits>
 
 #include "arrow/status.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/type_traits.h"
-#include "arrow/util/visibility.h"
+#include "arrow/util/basic_decimal.h"
+#include "arrow/util/string_view.h"
 
 namespace arrow {
 
@@ -39,80 +37,47 @@ namespace arrow {
 /// Semi-numerical Algorithms section 4.3.1.
 ///
 /// Adapted from the Apache ORC C++ implementation
-class ARROW_EXPORT Decimal128 {
+///
+/// The implementation is split into two parts :
+///
+/// 1. BasicDecimal128
+///    - can be safely compiled to IR without references to libstdc++.
+/// 2. Decimal128
+///    - has additional functionality on top of BasicDecimal128 to deal with
+///      strings and streams.
+class ARROW_EXPORT Decimal128 : public BasicDecimal128 {
  public:
-  /// \brief Create an Decimal128 from the two's complement representation.
-  constexpr Decimal128(int64_t high, uint64_t low) noexcept
-      : low_bits_(low), high_bits_(high) {}
-
-  /// \brief Empty constructor creates an Decimal128 with a value of 0.
-  constexpr Decimal128() noexcept : Decimal128(0, 0) {}
+  /// \cond FALSE
+  // (need to avoid a duplicate definition in Sphinx)
+  using BasicDecimal128::BasicDecimal128;
+  /// \endcond
 
-  /// \brief Convert any integer value into an Decimal128.
-  template <typename T,
-            typename = typename std::enable_if<std::is_integral<T>::value, T>::type>
-  constexpr Decimal128(T value) noexcept
-      : Decimal128(static_cast<int64_t>(value) >= 0 ? 0 : -1,
-                   static_cast<uint64_t>(value)) {}
+  /// \brief constructor creates a Decimal128 from a BasicDecimal128.
+  constexpr Decimal128(const BasicDecimal128& value) noexcept : BasicDecimal128(value) {}
 
   /// \brief Parse the number from a base 10 string representation.
   explicit Decimal128(const std::string& value);
 
-  /// \brief Create an Decimal128 from an array of bytes. Bytes are assumed to be in
-  /// little endian byte order.
-  explicit Decimal128(const uint8_t* bytes);
-
-  /// \brief Negate the current value
-  Decimal128& Negate();
-
-  /// \brief Absolute value
-  Decimal128& Abs();
-
-  /// \brief Add a number to this one. The result is truncated to 128 bits.
-  Decimal128& operator+=(const Decimal128& right);
+  /// \brief Empty constructor creates a Decimal128 with a value of 0.
+  // This is required on some older compilers.
+  constexpr Decimal128() noexcept : BasicDecimal128() {}
 
-  /// \brief Subtract a number from this one. The result is truncated to 128 bits.
-  Decimal128& operator-=(const Decimal128& right);
-
-  /// \brief Multiply this number by another number. The result is truncated to 128 bits.
-  Decimal128& operator*=(const Decimal128& right);
-
-  /// Divide this number by right and return the result. This operation is
-  /// not destructive.
+  /// Divide this number by right and return the result.
+  ///
+  /// This operation is not destructive.
   /// The answer rounds to zero. Signs work like:
   ///   21 /  5 ->  4,  1
   ///  -21 /  5 -> -4, -1
   ///   21 / -5 -> -4,  1
   ///  -21 / -5 ->  4, -1
-  /// \param divisor the number to divide by
-  /// \param remainder the remainder after the division
+  /// \param[in] divisor the number to divide by
+  /// \param[out] result the quotient
+  /// \param[out] remainder the remainder after the division
   Status Divide(const Decimal128& divisor, Decimal128* result,
-                Decimal128* remainder) const;
-
-  /// \brief In-place division.
-  Decimal128& operator/=(const Decimal128& right);
-
-  /// \brief Bitwise or between two Decimal128.
-  Decimal128& operator|=(const Decimal128& right);
-
-  /// \brief Bitwise and between two Decimal128.
-  Decimal128& operator&=(const Decimal128& right);
-
-  /// \brief Shift left by the given number of bits.
-  Decimal128& operator<<=(uint32_t bits);
-
-  /// \brief Shift right by the given number of bits. Negative values will
-  Decimal128& operator>>=(uint32_t bits);
-
-  /// \brief Get the high bits of the two's complement representation of the number.
-  inline int64_t high_bits() const { return high_bits_; }
-
-  /// \brief Get the low bits of the two's complement representation of the number.
-  inline uint64_t low_bits() const { return low_bits_; }
-
-  /// \brief Return the raw bytes of the value in little-endian byte order.
-  std::array<uint8_t, 16> ToBytes() const;
-  void ToBytes(uint8_t* out) const;
+                Decimal128* remainder) const {
+    auto dstatus = BasicDecimal128::Divide(divisor, result, remainder);
+    return ToArrowStatus(dstatus);
+  }
 
   /// \brief Convert the Decimal128 value to a base 10 decimal string with the given
   /// scale.
@@ -124,18 +89,25 @@ class ARROW_EXPORT Decimal128 {
   /// \brief Cast this value to an int64_t.
   explicit operator int64_t() const;
 
-  /// \brief Convert a decimal string to an Decimal128 value, optionally including
+  /// \brief Convert a decimal string to a Decimal128 value, optionally including
   /// precision and scale if they're passed in and not null.
+  static Status FromString(const util::string_view& s, Decimal128* out,
+                           int32_t* precision = NULLPTR, int32_t* scale = NULLPTR);
   static Status FromString(const std::string& s, Decimal128* out,
                            int32_t* precision = NULLPTR, int32_t* scale = NULLPTR);
+  static Status FromString(const char* s, Decimal128* out, int32_t* precision = NULLPTR,
+                           int32_t* scale = NULLPTR);
 
-  /// \brief Convert from a big endian byte representation. The length must be
-  ///        between 1 and 16
+  /// \brief Convert from a big-endian byte representation. The length must be
+  ///        between 1 and 16.
   /// \return error status if the length is an invalid value
   static Status FromBigEndian(const uint8_t* data, int32_t length, Decimal128* out);
 
   /// \brief Convert Decimal128 from one scale to another
-  Status Rescale(int32_t original_scale, int32_t new_scale, Decimal128* out) const;
+  Status Rescale(int32_t original_scale, int32_t new_scale, Decimal128* out) const {
+    auto dstatus = BasicDecimal128::Rescale(original_scale, new_scale, out);
+    return ToArrowStatus(dstatus);
+  }
 
   /// \brief Convert to a signed integer
   template <typename T, typename = internal::EnableIfIsOneOf<T, int32_t, int64_t>>
@@ -144,34 +116,16 @@ class ARROW_EXPORT Decimal128 {
     constexpr auto max_value = std::numeric_limits<T>::max();
     const auto& self = *this;
     if (self < min_value || self > max_value) {
-      std::stringstream buf;
-      buf << "Invalid cast from Decimal128 to " << sizeof(T) << " byte integer";
-      return Status::Invalid(buf.str());
+      return Status::Invalid("Invalid cast from Decimal128 to ", sizeof(T),
+                             " byte integer");
     }
-    *out = static_cast<T>(low_bits_);
+    *out = static_cast<T>(low_bits());
     return Status::OK();
   }
 
  private:
-  uint64_t low_bits_;
-  int64_t high_bits_;
+  /// Converts internal error code to Status
+  Status ToArrowStatus(DecimalStatus dstatus) const;
 };
 
-ARROW_EXPORT bool operator==(const Decimal128& left, const Decimal128& right);
-ARROW_EXPORT bool operator!=(const Decimal128& left, const Decimal128& right);
-ARROW_EXPORT bool operator<(const Decimal128& left, const Decimal128& right);
-ARROW_EXPORT bool operator<=(const Decimal128& left, const Decimal128& right);
-ARROW_EXPORT bool operator>(const Decimal128& left, const Decimal128& right);
-ARROW_EXPORT bool operator>=(const Decimal128& left, const Decimal128& right);
-
-ARROW_EXPORT Decimal128 operator-(const Decimal128& operand);
-ARROW_EXPORT Decimal128 operator~(const Decimal128& operand);
-ARROW_EXPORT Decimal128 operator+(const Decimal128& left, const Decimal128& right);
-ARROW_EXPORT Decimal128 operator-(const Decimal128& left, const Decimal128& right);
-ARROW_EXPORT Decimal128 operator*(const Decimal128& left, const Decimal128& right);
-ARROW_EXPORT Decimal128 operator/(const Decimal128& left, const Decimal128& right);
-ARROW_EXPORT Decimal128 operator%(const Decimal128& left, const Decimal128& right);
-
 }  // namespace arrow
-
-#endif  //  ARROW_DECIMAL_H
diff --git a/cpp/src/arrow/util/hash-util.h b/cpp/src/arrow/util/hash-util.h
index fd69cb9438c12..509b7e64035db 100644
--- a/cpp/src/arrow/util/hash-util.h
+++ b/cpp/src/arrow/util/hash-util.h
@@ -134,10 +134,13 @@ class HashUtil {
     switch (nbytes) {
       case 3:
         h1 = HW_crc32_u8(h1, p[3]);
+        // fallthrough
       case 2:
         h2 = HW_crc32_u8(h2, p[2]);
+        // fallthrough
       case 1:
         h1 = HW_crc32_u8(h1, p[1]);
+        // fallthrough
       case 0:
         break;
       default:
diff --git a/cpp/src/arrow/util/hashing-benchmark.cc b/cpp/src/arrow/util/hashing-benchmark.cc
index 7d91f0f536ac1..ee70391815084 100644
--- a/cpp/src/arrow/util/hashing-benchmark.cc
+++ b/cpp/src/arrow/util/hashing-benchmark.cc
@@ -49,13 +49,13 @@ static std::vector<std::string> MakeStrings(int32_t n_values, int32_t min_length
 
   // Generate strings between 2 and 20 bytes
   std::uniform_int_distribution<int32_t> length_dist(min_length, max_length);
-  std::independent_bits_engine<std::default_random_engine, 8, uint8_t> bytes_gen(42);
+  std::independent_bits_engine<std::default_random_engine, 8, uint16_t> bytes_gen(42);
 
   std::generate(values.begin(), values.end(), [&]() {
     auto length = length_dist(gen);
     std::string s(length, 'X');
     for (int32_t i = 0; i < length; ++i) {
-      s[i] = bytes_gen();
+      s[i] = static_cast<uint8_t>(bytes_gen());
     }
     return s;
   });
@@ -74,6 +74,7 @@ static void BM_HashIntegers(benchmark::State& state) {  // NOLINT non-const refe
     benchmark::DoNotOptimize(total);
   }
   state.SetBytesProcessed(2 * state.iterations() * values.size() * sizeof(int64_t));
+  state.SetItemsProcessed(2 * state.iterations() * values.size());
 }
 
 static void BenchmarkStringHashing(benchmark::State& state,  // NOLINT non-const reference
@@ -92,6 +93,7 @@ static void BenchmarkStringHashing(benchmark::State& state,  // NOLINT non-const
     benchmark::DoNotOptimize(total);
   }
   state.SetBytesProcessed(2 * state.iterations() * total_size);
+  state.SetItemsProcessed(2 * state.iterations() * values.size());
 }
 
 static void BM_HashSmallStrings(benchmark::State& state) {  // NOLINT non-const reference
diff --git a/cpp/src/arrow/util/hashing.h b/cpp/src/arrow/util/hashing.h
index ee368fb4e314c..3dde0beeb194e 100644
--- a/cpp/src/arrow/util/hashing.h
+++ b/cpp/src/arrow/util/hashing.h
@@ -102,6 +102,18 @@ struct ScalarHelper<Scalar, AlgNum,
   }
 };
 
+template <typename Scalar, uint64_t AlgNum>
+struct ScalarHelper<
+    Scalar, AlgNum,
+    typename std::enable_if<std::is_same<util::string_view, Scalar>::value>::type>
+    : public ScalarHelperBase<Scalar, AlgNum> {
+  // ScalarHelper specialization for util::string_view
+
+  static hash_t ComputeHash(const util::string_view& value) {
+    return ComputeStringHash<AlgNum>(value.data(), static_cast<int64_t>(value.size()));
+  }
+};
+
 template <typename Scalar, uint64_t AlgNum>
 struct ScalarHelper<Scalar, AlgNum,
                     typename std::enable_if<std::is_floating_point<Scalar>::value>::type>
@@ -332,7 +344,7 @@ class ScalarMemoTable {
   explicit ScalarMemoTable(int64_t entries = 0)
       : hash_table_(static_cast<uint64_t>(entries)) {}
 
-  int32_t Get(const Scalar value) const {
+  int32_t Get(const Scalar& value) const {
     auto cmp_func = [value](const Payload* payload) -> bool {
       return ScalarHelper<Scalar, 0>::CompareScalars(payload->value, value);
     };
@@ -346,7 +358,7 @@ class ScalarMemoTable {
   }
 
   template <typename Func1, typename Func2>
-  int32_t GetOrInsert(const Scalar value, Func1&& on_found, Func2&& on_not_found) {
+  int32_t GetOrInsert(const Scalar& value, Func1&& on_found, Func2&& on_not_found) {
     auto cmp_func = [value](const Payload* payload) -> bool {
       return ScalarHelper<Scalar, 0>::CompareScalars(value, payload->value);
     };
@@ -364,7 +376,7 @@ class ScalarMemoTable {
     return memo_index;
   }
 
-  int32_t GetOrInsert(const Scalar value) {
+  int32_t GetOrInsert(const Scalar& value) {
     return GetOrInsert(value, [](int32_t i) {}, [](int32_t i) {});
   }
 
@@ -389,6 +401,7 @@ class ScalarMemoTable {
     Scalar value;
     int32_t memo_index;
   };
+
   using HashTableType = HashTableTemplateType<Payload>;
   using HashTableEntry = typename HashTableType::Entry;
   HashTableType hash_table_;
@@ -621,9 +634,11 @@ class BinaryMemoTable {
   struct Payload {
     int32_t memo_index;
   };
+
   using HashTableType = HashTable<Payload>;
   using HashTableEntry = typename HashTable<Payload>::Entry;
   HashTableType hash_table_;
+
   std::vector<int32_t> offsets_;
   std::string values_;
 
@@ -651,25 +666,6 @@ template <typename T>
 struct HashTraits<T, enable_if_8bit_int<T>> {
   using c_type = typename T::c_type;
   using MemoTableType = SmallScalarMemoTable<typename T::c_type>;
-
-  static Status GetDictionaryArrayData(MemoryPool* pool,
-                                       const std::shared_ptr<DataType>& type,
-                                       const MemoTableType& memo_table,
-                                       int64_t start_offset,
-                                       std::shared_ptr<ArrayData>* out) {
-    std::shared_ptr<Buffer> dict_buffer;
-    auto dict_length = static_cast<int64_t>(memo_table.size()) - start_offset;
-    // This makes a copy, but we assume a dictionary array is usually small
-    // compared to the size of the dictionary-using array.
-    // (also, copying the dictionary values is cheap compared to the cost
-    //  of building the memo table)
-    RETURN_NOT_OK(
-        AllocateBuffer(pool, TypeTraits<T>::bytes_required(dict_length), &dict_buffer));
-    memo_table.CopyValues(static_cast<int32_t>(start_offset),
-                          reinterpret_cast<c_type*>(dict_buffer->mutable_data()));
-    *out = ArrayData::Make(type, dict_length, {nullptr, dict_buffer}, 0 /* null_count */);
-    return Status::OK();
-  }
 };
 
 template <typename T>
@@ -677,25 +673,6 @@ struct HashTraits<
     T, typename std::enable_if<has_c_type<T>::value && !is_8bit_int<T>::value>::type> {
   using c_type = typename T::c_type;
   using MemoTableType = ScalarMemoTable<c_type, HashTable>;
-
-  static Status GetDictionaryArrayData(MemoryPool* pool,
-                                       const std::shared_ptr<DataType>& type,
-                                       const MemoTableType& memo_table,
-                                       int64_t start_offset,
-                                       std::shared_ptr<ArrayData>* out) {
-    std::shared_ptr<Buffer> dict_buffer;
-    auto dict_length = static_cast<int64_t>(memo_table.size()) - start_offset;
-    // This makes a copy, but we assume a dictionary array is usually small
-    // compared to the size of the dictionary-using array.
-    // (also, copying the dictionary values is cheap compared to the cost
-    //  of building the memo table)
-    RETURN_NOT_OK(
-        AllocateBuffer(pool, TypeTraits<T>::bytes_required(dict_length), &dict_buffer));
-    memo_table.CopyValues(static_cast<int32_t>(start_offset),
-                          reinterpret_cast<c_type*>(dict_buffer->mutable_data()));
-    *out = ArrayData::Make(type, dict_length, {nullptr, dict_buffer}, 0 /* null_count */);
-    return Status::OK();
-  }
 };
 
 template <typename T>
diff --git a/cpp/src/arrow/util/int-util-test.cc b/cpp/src/arrow/util/int-util-test.cc
index 51fd96e4ea25a..5eba531d874e0 100644
--- a/cpp/src/arrow/util/int-util-test.cc
+++ b/cpp/src/arrow/util/int-util-test.cc
@@ -17,14 +17,12 @@
 
 #include <algorithm>
 #include <cstdint>
-#include <memory>
 #include <random>
 #include <utility>
 #include <vector>
 
 #include <gtest/gtest.h>
 
-#include "arrow/test-util.h"
 #include "arrow/util/int-util.h"
 
 namespace arrow {
@@ -375,5 +373,14 @@ TEST(IntWidth, NullsMany) {
   }
 }
 
+TEST(TransposeInts, Int8ToInt64) {
+  std::vector<int8_t> src = {1, 3, 5, 0, 3, 2};
+  std::vector<int32_t> transpose_map = {1111, 2222, 3333, 4444, 5555, 6666, 7777};
+  std::vector<int64_t> dest(src.size());
+
+  TransposeInts(src.data(), dest.data(), 6, transpose_map.data());
+  ASSERT_EQ(dest, std::vector<int64_t>({2222, 4444, 6666, 1111, 4444, 3333}));
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/int-util.cc b/cpp/src/arrow/util/int-util.cc
index ced1cd1c20da2..d81044b3cafdc 100644
--- a/cpp/src/arrow/util/int-util.cc
+++ b/cpp/src/arrow/util/int-util.cc
@@ -402,5 +402,45 @@ void DowncastUInts(const uint64_t* source, uint64_t* dest, int64_t length) {
   memcpy(dest, source, length * sizeof(int64_t));
 }
 
+template <typename InputInt, typename OutputInt>
+void TransposeInts(const InputInt* src, OutputInt* dest, int64_t length,
+                   const int32_t* transpose_map) {
+  while (length >= 4) {
+    dest[0] = static_cast<OutputInt>(transpose_map[src[0]]);
+    dest[1] = static_cast<OutputInt>(transpose_map[src[1]]);
+    dest[2] = static_cast<OutputInt>(transpose_map[src[2]]);
+    dest[3] = static_cast<OutputInt>(transpose_map[src[3]]);
+    length -= 4;
+    src += 4;
+    dest += 4;
+  }
+  while (length > 0) {
+    *dest++ = static_cast<OutputInt>(transpose_map[*src++]);
+    --length;
+  }
+}
+
+#define INSTANTIATE(SRC, DEST)              \
+  template ARROW_EXPORT void TransposeInts( \
+      const SRC* source, DEST* dest, int64_t length, const int32_t* transpose_map);
+
+#define INSTANTIATE_ALL_DEST(DEST) \
+  INSTANTIATE(int8_t, DEST)        \
+  INSTANTIATE(int16_t, DEST)       \
+  INSTANTIATE(int32_t, DEST)       \
+  INSTANTIATE(int64_t, DEST)
+
+#define INSTANTIATE_ALL()       \
+  INSTANTIATE_ALL_DEST(int8_t)  \
+  INSTANTIATE_ALL_DEST(int16_t) \
+  INSTANTIATE_ALL_DEST(int32_t) \
+  INSTANTIATE_ALL_DEST(int64_t)
+
+INSTANTIATE_ALL()
+
+#undef INSTANTIATE
+#undef INSTANTIATE_ALL
+#undef INSTANTIATE_ALL_DEST
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/int-util.h b/cpp/src/arrow/util/int-util.h
index 68355d34549ac..d3ae09f75cfa6 100644
--- a/cpp/src/arrow/util/int-util.h
+++ b/cpp/src/arrow/util/int-util.h
@@ -19,6 +19,7 @@
 #define ARROW_UTIL_INT_UTIL_H
 
 #include <cstdint>
+#include <type_traits>
 
 #include "arrow/util/visibility.h"
 
@@ -63,6 +64,25 @@ void DowncastUInts(const uint64_t* source, uint32_t* dest, int64_t length);
 ARROW_EXPORT
 void DowncastUInts(const uint64_t* source, uint64_t* dest, int64_t length);
 
+template <typename InputInt, typename OutputInt>
+ARROW_EXPORT void TransposeInts(const InputInt* source, OutputInt* dest, int64_t length,
+                                const int32_t* transpose_map);
+
+/// Signed addition with well-defined behaviour on overflow (as unsigned)
+template <typename SignedInt>
+SignedInt SafeSignedAdd(SignedInt u, SignedInt v) {
+  using UnsignedInt = typename std::make_unsigned<SignedInt>::type;
+  return static_cast<SignedInt>(static_cast<UnsignedInt>(u) +
+                                static_cast<UnsignedInt>(v));
+}
+
+/// Signed left shift with well-defined behaviour on negative numbers or overflow
+template <typename SignedInt, typename Shift>
+SignedInt SafeLeftShift(SignedInt u, Shift shift) {
+  using UnsignedInt = typename std::make_unsigned<SignedInt>::type;
+  return static_cast<SignedInt>(static_cast<UnsignedInt>(u) << shift);
+}
+
 }  // namespace internal
 }  // namespace arrow
 
diff --git a/cpp/src/arrow/util/io-util.cc b/cpp/src/arrow/util/io-util.cc
index 8db5db442841b..5d67fe87fa0e5 100644
--- a/cpp/src/arrow/util/io-util.cc
+++ b/cpp/src/arrow/util/io-util.cc
@@ -113,10 +113,8 @@ static inline Status CheckFileOpResult(int ret, int errno_actual,
                                        const PlatformFilename& file_name,
                                        const char* opname) {
   if (ret == -1) {
-    std::stringstream ss;
-    ss << "Failed to " << opname << " file: " << file_name.string();
-    ss << " , error: " << std::strerror(errno_actual);
-    return Status::IOError(ss.str());
+    return Status::IOError("Failed to ", opname, " file: ", file_name.string(),
+                           " , error: ", std::strerror(errno_actual));
   }
   return Status::OK();
 }
@@ -146,8 +144,8 @@ Status FileNameFromString(const std::string& file_name, PlatformFilename* out) {
 Status FileOpenReadable(const PlatformFilename& file_name, int* fd) {
   int ret, errno_actual;
 #if defined(_MSC_VER)
-  errno_actual = _wsopen_s(fd, file_name.wstring().c_str(), _O_RDONLY | _O_BINARY,
-                           _SH_DENYNO, _S_IREAD);
+  errno_actual = _wsopen_s(fd, file_name.wstring().c_str(),
+                           _O_RDONLY | _O_BINARY | _O_NOINHERIT, _SH_DENYNO, _S_IREAD);
   ret = *fd;
 #else
   ret = *fd = open(file_name.c_str(), O_RDONLY | O_BINARY);
@@ -162,7 +160,7 @@ Status FileOpenWritable(const PlatformFilename& file_name, bool write_only, bool
   int ret, errno_actual;
 
 #if defined(_MSC_VER)
-  int oflag = _O_CREAT | _O_BINARY;
+  int oflag = _O_CREAT | _O_BINARY | _O_NOINHERIT;
   int pmode = _S_IWRITE;
   if (!write_only) {
     pmode |= _S_IREAD;
@@ -232,12 +230,18 @@ Status CreatePipe(int fd[2]) {
 #endif
 
   if (ret == -1) {
-    return Status::IOError(std::string("Error creating pipe: ") +
-                           std::string(strerror(errno)));
+    return Status::IOError("Error creating pipe: ", std::strerror(errno));
   }
   return Status::OK();
 }
 
+static Status StatusFromErrno(const char* prefix) {
+#ifdef _WIN32
+  errno = __map_mman_error(GetLastError(), EPERM);
+#endif
+  return Status::IOError(prefix, std::strerror(errno));
+}
+
 //
 // Compatible way to remap a memory map
 //
@@ -251,18 +255,12 @@ Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes,
   HANDLE fm, h;
 
   if (!UnmapViewOfFile(addr)) {
-    errno = __map_mman_error(GetLastError(), EPERM);
-    std::stringstream ss;
-    ss << "UnmapViewOfFile failed: " << std::strerror(errno);
-    return Status::IOError(ss.str());
+    return StatusFromErrno("UnmapViewOfFile failed: ");
   }
 
   h = reinterpret_cast<HANDLE>(_get_osfhandle(fildes));
   if (h == INVALID_HANDLE_VALUE) {
-    errno = __map_mman_error(GetLastError(), EPERM);
-    std::stringstream ss;
-    ss << "cannot get file handle: " << std::strerror(errno);
-    return Status::IOError(ss.str());
+    return StatusFromErrno("Cannot get file handle: ");
   }
 
   LONG new_size_low = static_cast<LONG>(new_size & 0xFFFFFFFFL);
@@ -272,18 +270,12 @@ Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes,
   SetEndOfFile(h);
   fm = CreateFileMapping(h, NULL, PAGE_READWRITE, 0, 0, "");
   if (fm == NULL) {
-    errno = __map_mman_error(GetLastError(), EPERM);
-    std::stringstream ss;
-    ss << "mremap failed: " << std::strerror(errno);
-    return Status::IOError(ss.str());
+    return StatusFromErrno("CreateFileMapping failed: ");
   }
   *new_addr = MapViewOfFile(fm, FILE_MAP_WRITE, 0, 0, new_size);
   CloseHandle(fm);
   if (new_addr == NULL) {
-    errno = __map_mman_error(GetLastError(), EPERM);
-    std::stringstream ss;
-    ss << "mremap failed: " << std::strerror(errno);
-    return Status::IOError(ss.str());
+    return StatusFromErrno("MapViewOfFile failed: ");
   }
   return Status::OK();
 #else
@@ -291,26 +283,26 @@ Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes,
   // we have to close the mmap first, truncate the file to the new size
   // and recreate the mmap
   if (munmap(addr, old_size) == -1) {
-    std::stringstream ss;
-    ss << "munmap failed: " << std::strerror(errno);
-    return Status::IOError(ss.str());
+    return StatusFromErrno("munmap failed: ");
   }
   if (ftruncate(fildes, new_size) == -1) {
-    std::stringstream ss;
-    ss << "cannot truncate file: " << std::strerror(errno);
-    return Status::IOError(ss.str());
+    return StatusFromErrno("ftruncate failed: ");
   }
   // we set READ / WRITE flags on the new map, since we could only have
   // unlarged a RW map in the first place
   *new_addr = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, fildes, 0);
+  if (*new_addr == MAP_FAILED) {
+    return StatusFromErrno("mmap failed: ");
+  }
   return Status::OK();
 #else
   if (ftruncate(fildes, new_size) == -1) {
-    std::stringstream ss;
-    ss << "file truncate failed: " << std::strerror(errno);
-    return Status::IOError(ss.str());
+    return StatusFromErrno("ftruncate failed: ");
   }
   *new_addr = mremap(addr, old_size, new_size, MREMAP_MAYMOVE);
+  if (*new_addr == MAP_FAILED) {
+    return StatusFromErrno("mremap failed: ");
+  }
   return Status::OK();
 #endif
 #endif
diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h
index 4cce700db970b..5ea78206a73ee 100644
--- a/cpp/src/arrow/util/logging.h
+++ b/cpp/src/arrow/util/logging.h
@@ -18,10 +18,29 @@
 #ifndef ARROW_UTIL_LOGGING_H
 #define ARROW_UTIL_LOGGING_H
 
+#ifdef GANDIVA_IR
+
+// The LLVM IR code doesn't have an NDEBUG mode. And, it shouldn't include references to
+// streams or stdc++. So, making the DCHECK calls void in that case.
+
+#define ARROW_IGNORE_EXPR(expr) ((void)(expr))
+
+#define DCHECK(condition) ARROW_IGNORE_EXPR(condition)
+#define DCHECK_OK(status) ARROW_IGNORE_EXPR(status)
+#define DCHECK_EQ(val1, val2) ARROW_IGNORE_EXPR(val1)
+#define DCHECK_NE(val1, val2) ARROW_IGNORE_EXPR(val1)
+#define DCHECK_LE(val1, val2) ARROW_IGNORE_EXPR(val1)
+#define DCHECK_LT(val1, val2) ARROW_IGNORE_EXPR(val1)
+#define DCHECK_GE(val1, val2) ARROW_IGNORE_EXPR(val1)
+#define DCHECK_GT(val1, val2) ARROW_IGNORE_EXPR(val1)
+
+#else  // !GANDIVA_IR
+
 #include <iostream>
 #include <memory>
 #include <string>
 
+#include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -155,6 +174,8 @@ class ARROW_EXPORT ArrowLog : public ArrowLogBase {
   static void InstallFailureSignalHandler();
 
  private:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(ArrowLog);
+
   // Hide the implementation of log provider by void *.
   // Otherwise, lib user may define the same macro to use the correct header file.
   void* logging_provider_;
@@ -182,5 +203,6 @@ class ARROW_EXPORT Voidify {
 
 }  // namespace util
 }  // namespace arrow
+#endif  // GANDIVA_IR
 
 #endif  // ARROW_UTIL_LOGGING_H
diff --git a/cpp/src/arrow/util/machine-benchmark.cc b/cpp/src/arrow/util/machine-benchmark.cc
new file mode 100644
index 0000000000000..ad3f413e7f0fd
--- /dev/null
+++ b/cpp/src/arrow/util/machine-benchmark.cc
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Non-Arrow system benchmarks, provided for convenience.
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+#include <random>
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+
+namespace arrow {
+
+// Generate a vector of indices such as following the indices describes
+// a path over the whole vector.  The path is randomized to avoid triggering
+// automatic prefetching in the CPU.
+std::vector<int32_t> RandomPath(int32_t size) {
+  std::default_random_engine gen(42);
+  std::vector<int32_t> indices(size);
+
+  for (int32_t i = 0; i < size; ++i) {
+    indices[i] = i;
+  }
+  std::shuffle(indices.begin(), indices.end(), gen);
+  std::vector<int32_t> path(size, -999999);
+  int32_t prev;
+  prev = indices[size - 1];
+  for (int32_t i = 0; i < size; ++i) {
+    int32_t next = indices[i];
+    path[prev] = next;
+    prev = next;
+  }
+  return path;
+}
+
+// Cache / main memory latency, depending on the working set size
+static void BM_memory_latency(benchmark::State& state) {
+  const auto niters = static_cast<int32_t>(state.range(0));
+  const std::vector<int32_t> path = RandomPath(niters / 4);
+
+  int32_t total = 0;
+  int32_t index = 0;
+  for (auto _ : state) {
+    total += index;
+    index = path[index];
+  }
+  benchmark::DoNotOptimize(total);
+  state.SetItemsProcessed(state.iterations());
+}
+
+BENCHMARK(BM_memory_latency)->RangeMultiplier(2)->Range(2 << 10, 2 << 24);
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/macros.h b/cpp/src/arrow/util/macros.h
index 1d188820837fc..5f1934d732ca7 100644
--- a/cpp/src/arrow/util/macros.h
+++ b/cpp/src/arrow/util/macros.h
@@ -18,6 +18,9 @@
 #ifndef ARROW_UTIL_MACROS_H
 #define ARROW_UTIL_MACROS_H
 
+#define ARROW_STRINGIFY(x) #x
+#define ARROW_CONCAT(x, y) x##y
+
 // From Google gutil
 #ifndef ARROW_DISALLOW_COPY_AND_ASSIGN
 #define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName) \
@@ -111,6 +114,15 @@
 #endif
 #endif  // !defined(MANUALLY_ALIGNED_STRUCT)
 
+// ----------------------------------------------------------------------
+// Convenience macro disabling a particular UBSan check in a function
+
+#if defined(__clang__)
+#define ARROW_DISABLE_UBSAN(feature) __attribute__((no_sanitize(feature)))
+#else
+#define ARROW_DISABLE_UBSAN(feature)
+#endif
+
 // ----------------------------------------------------------------------
 // From googletest
 // (also in parquet-cpp)
diff --git a/cpp/src/arrow/util/number-parsing-benchmark.cc b/cpp/src/arrow/util/number-parsing-benchmark.cc
index 28ef76abe7281..42c7b31ae6757 100644
--- a/cpp/src/arrow/util/number-parsing-benchmark.cc
+++ b/cpp/src/arrow/util/number-parsing-benchmark.cc
@@ -43,7 +43,7 @@ static std::vector<std::string> MakeIntStrings(int32_t num_items) {
   for (int32_t i = 0; i < num_items; ++i) {
     strings.push_back(base_strings[i % base_strings.size()]);
   }
-  return base_strings;
+  return strings;
 }
 
 static std::vector<std::string> MakeFloatStrings(int32_t num_items) {
@@ -54,7 +54,18 @@ static std::vector<std::string> MakeFloatStrings(int32_t num_items) {
   for (int32_t i = 0; i < num_items; ++i) {
     strings.push_back(base_strings[i % base_strings.size()]);
   }
-  return base_strings;
+  return strings;
+}
+
+static std::vector<std::string> MakeTimestampStrings(int32_t num_items) {
+  std::vector<std::string> base_strings = {"2018-11-13 17:11:10", "2018-11-13 11:22:33",
+                                           "2016-02-29 11:22:33"};
+
+  std::vector<std::string> strings;
+  for (int32_t i = 0; i < num_items; ++i) {
+    strings.push_back(base_strings[i % base_strings.size()]);
+  }
+  return strings;
 }
 
 template <typename ARROW_TYPE, typename C_TYPE = typename ARROW_TYPE::c_type>
@@ -97,6 +108,29 @@ static void BM_FloatParsing(benchmark::State& state) {  // NOLINT non-const refe
   state.SetItemsProcessed(state.iterations() * strings.size());
 }
 
+template <TimeUnit::type UNIT>
+static void BM_TimestampParsing(benchmark::State& state) {  // NOLINT non-const reference
+  using c_type = TimestampType::c_type;
+
+  auto strings = MakeTimestampStrings(1000);
+  auto type = timestamp(UNIT);
+  StringConverter<TimestampType> converter(type);
+
+  while (state.KeepRunning()) {
+    c_type total = 0;
+    for (const auto& s : strings) {
+      c_type value;
+      if (!converter(s.data(), s.length(), &value)) {
+        std::cerr << "Conversion failed for '" << s << "'";
+        std::abort();
+      }
+      total += value;
+    }
+    benchmark::DoNotOptimize(total);
+  }
+  state.SetItemsProcessed(state.iterations() * strings.size());
+}
+
 BENCHMARK_TEMPLATE(BM_IntegerParsing, Int8Type);
 BENCHMARK_TEMPLATE(BM_IntegerParsing, Int16Type);
 BENCHMARK_TEMPLATE(BM_IntegerParsing, Int32Type);
@@ -109,5 +143,10 @@ BENCHMARK_TEMPLATE(BM_IntegerParsing, UInt64Type);
 BENCHMARK_TEMPLATE(BM_FloatParsing, FloatType);
 BENCHMARK_TEMPLATE(BM_FloatParsing, DoubleType);
 
+BENCHMARK_TEMPLATE(BM_TimestampParsing, TimeUnit::SECOND);
+BENCHMARK_TEMPLATE(BM_TimestampParsing, TimeUnit::MILLI);
+BENCHMARK_TEMPLATE(BM_TimestampParsing, TimeUnit::MICRO);
+BENCHMARK_TEMPLATE(BM_TimestampParsing, TimeUnit::NANO);
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/parsing.h b/cpp/src/arrow/util/parsing.h
index aa1f820257e79..fc6ca0404785c 100644
--- a/cpp/src/arrow/util/parsing.h
+++ b/cpp/src/arrow/util/parsing.h
@@ -34,7 +34,7 @@
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
-#include "arrow/util/date.h"
+#include "arrow/vendored/datetime.h"
 
 namespace arrow {
 namespace internal {
@@ -335,7 +335,10 @@ class StringToSignedIntConverterMixin {
       if (ARROW_PREDICT_FALSE(unsigned_value > max_negative)) {
         return false;
       }
-      *out = static_cast<value_type>(-static_cast<value_type>(unsigned_value));
+      // To avoid both compiler warnings (with unsigned negation)
+      // and undefined behaviour (with signed negation overflow),
+      // use the expanded formula for 2's complement negation.
+      *out = static_cast<value_type>(~unsigned_value + 1);
     } else {
       if (ARROW_PREDICT_FALSE(unsigned_value > max_positive)) {
         return false;
@@ -372,7 +375,7 @@ class StringConverter<TimestampType> {
     // - "YYYY-MM-DD[ T]hh:mm:ss"
     // - "YYYY-MM-DD[ T]hh:mm:ssZ"
     // UTC is always assumed, and the DataType's timezone is ignored.
-    date::year_month_day ymd;
+    arrow::util::date::year_month_day ymd;
     if (ARROW_PREDICT_FALSE(length < 10)) {
       return false;
     }
@@ -380,7 +383,7 @@ class StringConverter<TimestampType> {
       if (ARROW_PREDICT_FALSE(!ParseYYYY_MM_DD(s, &ymd))) {
         return false;
       }
-      return ConvertTimePoint(date::sys_days(ymd), out);
+      return ConvertTimePoint(arrow::util::date::sys_days(ymd), out);
     }
     if (ARROW_PREDICT_FALSE(s[10] != ' ') && ARROW_PREDICT_FALSE(s[10] != 'T')) {
       return false;
@@ -396,7 +399,7 @@ class StringConverter<TimestampType> {
       if (ARROW_PREDICT_FALSE(!ParseHH_MM_SS(s + 11, &seconds))) {
         return false;
       }
-      return ConvertTimePoint(date::sys_days(ymd) + seconds, out);
+      return ConvertTimePoint(arrow::util::date::sys_days(ymd) + seconds, out);
     }
     return false;
   }
@@ -419,12 +422,13 @@ class StringConverter<TimestampType> {
         *out = std::chrono::duration_cast<std::chrono::nanoseconds>(duration).count();
         return true;
     }
-    // Unreachable
+    // Unreachable, but suppress compiler warning
     assert(0);
+    *out = 0;
     return true;
   }
 
-  bool ParseYYYY_MM_DD(const char* s, date::year_month_day* out) {
+  bool ParseYYYY_MM_DD(const char* s, arrow::util::date::year_month_day* out) {
     uint16_t year;
     uint8_t month, day;
     if (ARROW_PREDICT_FALSE(s[4] != '-') || ARROW_PREDICT_FALSE(s[7] != '-')) {
@@ -439,7 +443,8 @@ class StringConverter<TimestampType> {
     if (ARROW_PREDICT_FALSE(!detail::ParseUnsigned(s + 8, 2, &day))) {
       return false;
     }
-    *out = {date::year{year}, date::month{month}, date::day{day}};
+    *out = {arrow::util::date::year{year}, arrow::util::date::month{month},
+            arrow::util::date::day{day}};
     return out->ok();
   }
 
diff --git a/cpp/src/arrow/util/rle-encoding-test.cc b/cpp/src/arrow/util/rle-encoding-test.cc
index 88382618653e9..aac1b1523990c 100644
--- a/cpp/src/arrow/util/rle-encoding-test.cc
+++ b/cpp/src/arrow/util/rle-encoding-test.cc
@@ -193,7 +193,7 @@ void ValidateRle(const vector<int>& values, int bit_width, uint8_t* expected_enc
     EXPECT_EQ(encoded_len, expected_len);
   }
   if (expected_encoding != NULL) {
-    EXPECT_EQ(memcmp(buffer, expected_encoding, expected_len), 0);
+    EXPECT_EQ(memcmp(buffer, expected_encoding, encoded_len), 0);
   }
 
   // Verify read
diff --git a/cpp/src/arrow/util/rle-encoding.h b/cpp/src/arrow/util/rle-encoding.h
index a97543d5be799..acefc8e3f7583 100644
--- a/cpp/src/arrow/util/rle-encoding.h
+++ b/cpp/src/arrow/util/rle-encoding.h
@@ -436,6 +436,7 @@ bool RleDecoder::NextCounts() {
     literal_count_ = (indicator_value >> 1) * 8;
   } else {
     repeat_count_ = indicator_value >> 1;
+    // XXX (ARROW-4018) this is not big-endian compatible
     bool result =
         bit_reader_.GetAligned<T>(static_cast<int>(BitUtil::CeilDiv(bit_width_, 8)),
                                   reinterpret_cast<T*>(&current_value_));
diff --git a/cpp/src/arrow/util/string_builder.h b/cpp/src/arrow/util/string_builder.h
new file mode 100644
index 0000000000000..7b3e10742a9a9
--- /dev/null
+++ b/cpp/src/arrow/util/string_builder.h
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License. template <typename T>
+
+#ifndef ARROW_UTIL_STRING_BUILDER_H
+#define ARROW_UTIL_STRING_BUILDER_H
+
+#include <sstream>
+#include <string>
+#include <utility>
+
+namespace arrow {
+namespace util {
+
+template <typename Head>
+void StringBuilderRecursive(std::stringstream& stream, Head&& head) {
+  stream << head;
+}
+
+template <typename Head, typename... Tail>
+void StringBuilderRecursive(std::stringstream& stream, Head&& head, Tail&&... tail) {
+  StringBuilderRecursive(stream, std::forward<Head>(head));
+  StringBuilderRecursive(stream, std::forward<Tail>(tail)...);
+}
+
+template <typename... Args>
+std::string StringBuilder(Args&&... args) {
+  std::stringstream stream;
+
+  StringBuilderRecursive(stream, std::forward<Args>(args)...);
+
+  return stream.str();
+}
+
+}  // namespace util
+}  // namespace arrow
+
+#endif  // ARROW_UTIL_STRING_BUILDER_H
diff --git a/cpp/src/arrow/util/string_view.h b/cpp/src/arrow/util/string_view.h
index 2ee594a9e9ad3..a1a813726e4f0 100644
--- a/cpp/src/arrow/util/string_view.h
+++ b/cpp/src/arrow/util/string_view.h
@@ -18,7 +18,7 @@
 #ifndef ARROW_UTIL_STRING_VIEW_H
 #define ARROW_UTIL_STRING_VIEW_H
 
-#include "arrow/util/string_view/string_view.hpp"
+#include "arrow/vendored/string_view.hpp"  // IWYU pragma: export
 
 namespace arrow {
 namespace util {
diff --git a/cpp/src/arrow/util/task-group.cc b/cpp/src/arrow/util/task-group.cc
index 3ea63fc5ad80e..52c40bd46d1d3 100644
--- a/cpp/src/arrow/util/task-group.cc
+++ b/cpp/src/arrow/util/task-group.cc
@@ -17,9 +17,11 @@
 
 #include "arrow/util/task-group.h"
 
+#include <atomic>
 #include <condition_variable>
 #include <cstdint>
 #include <mutex>
+#include <utility>
 
 #include "arrow/util/logging.h"
 #include "arrow/util/thread-pool.h"
@@ -41,6 +43,8 @@ class SerialTaskGroup : public TaskGroup {
 
   Status current_status() override { return status_; }
 
+  bool ok() override { return status_.ok(); }
+
   Status Finish() override {
     if (!finished_) {
       finished_ = true;
@@ -70,7 +74,8 @@ class SerialTaskGroup : public TaskGroup {
 
 class ThreadedTaskGroup : public TaskGroup {
  public:
-  explicit ThreadedTaskGroup(ThreadPool* thread_pool) : thread_pool_(thread_pool) {}
+  explicit ThreadedTaskGroup(ThreadPool* thread_pool)
+      : thread_pool_(thread_pool), nremaining_(0), ok_(true) {}
 
   ~ThreadedTaskGroup() override {
     // Make sure all pending tasks are finished, so that dangling references
@@ -79,22 +84,19 @@ class ThreadedTaskGroup : public TaskGroup {
   }
 
   void AppendReal(std::function<Status()> task) override {
-    std::lock_guard<std::mutex> lock(mutex_);
-    DCHECK(!finished_);
-
-    if (status_.ok()) {
-      ++nremaining_;
-      status_ = thread_pool_->Spawn([&, task]() {
-        std::unique_lock<std::mutex> lock(mutex_);
-        if (status_.ok()) {
-          lock.unlock();
+    // The hot path is unlocked thanks to atomics
+    // Only if an error occurs is the lock taken
+    if (ok_.load(std::memory_order_acquire)) {
+      nremaining_.fetch_add(1, std::memory_order_acquire);
+      Status st = thread_pool_->Spawn([this, task]() {
+        if (ok_.load(std::memory_order_acquire)) {
           // XXX what about exceptions?
           Status st = task();
-          lock.lock();
-          status_ &= st;
+          UpdateStatus(std::move(st));
         }
         OneTaskDone();
       });
+      UpdateStatus(std::move(st));
     }
   }
 
@@ -103,15 +105,15 @@ class ThreadedTaskGroup : public TaskGroup {
     return status_;
   }
 
+  bool ok() override { return ok_.load(); }
+
   Status Finish() override {
     std::unique_lock<std::mutex> lock(mutex_);
     if (!finished_) {
-      cv_.wait(lock, [&]() { return nremaining_ == 0; });
+      cv_.wait(lock, [&]() { return nremaining_.load() == 0; });
       // Current tasks may start other tasks, so only set this when done
       finished_ = true;
       if (parent_) {
-        // Need to lock parent
-        std::lock_guard<std::mutex> parent_lock(parent_->mutex_);
         parent_->OneTaskDone();
       }
     }
@@ -124,26 +126,42 @@ class ThreadedTaskGroup : public TaskGroup {
     std::lock_guard<std::mutex> lock(mutex_);
     auto child = new ThreadedTaskGroup(thread_pool_);
     child->parent_ = this;
-    nremaining_++;
+    nremaining_.fetch_add(1, std::memory_order_acquire);
     return std::shared_ptr<TaskGroup>(child);
   }
 
  protected:
+  void UpdateStatus(Status&& st) {
+    // Must be called unlocked, only locks on error
+    if (ARROW_PREDICT_FALSE(!st.ok())) {
+      std::lock_guard<std::mutex> lock(mutex_);
+      ok_.store(false, std::memory_order_release);
+      status_ &= std::move(st);
+    }
+  }
+
   void OneTaskDone() {
-    // We are locked
-    --nremaining_;
-    DCHECK_GE(nremaining_, 0);
-    if (nremaining_ == 0) {
+    // Can be called unlocked thanks to atomics
+    auto nremaining = nremaining_.fetch_sub(1, std::memory_order_release) - 1;
+    DCHECK_GE(nremaining, 0);
+    if (nremaining == 0) {
+      // Take the lock so that ~ThreadedTaskGroup cannot destroy cv
+      // before cv.notify_one() has returned
+      std::unique_lock<std::mutex> lock(mutex_);
       cv_.notify_one();
     }
   }
 
+  // These members are usable unlocked
   ThreadPool* thread_pool_;
+  std::atomic<int32_t> nremaining_;
+  std::atomic<bool> ok_;
+
+  // These members use locking
   std::mutex mutex_;
   std::condition_variable cv_;
   Status status_;
   bool finished_ = false;
-  int32_t nremaining_ = 0;
   ThreadedTaskGroup* parent_ = nullptr;
 };
 
diff --git a/cpp/src/arrow/util/task-group.h b/cpp/src/arrow/util/task-group.h
index 450b6da5884fc..390d9476e59bd 100644
--- a/cpp/src/arrow/util/task-group.h
+++ b/cpp/src/arrow/util/task-group.h
@@ -59,7 +59,7 @@ class ARROW_EXPORT TaskGroup {
   virtual Status current_status() = 0;
 
   /// Whether some tasks have already failed.  Non-blocking , useful for stopping early.
-  bool ok() { return current_status().ok(); }
+  virtual bool ok() = 0;
 
   /// How many tasks can typically be executed in parallel.
   /// This is only a hint, useful for testing or debugging.
diff --git a/cpp/src/arrow/util/thread-pool-benchmark.cc b/cpp/src/arrow/util/thread-pool-benchmark.cc
new file mode 100644
index 0000000000000..8d855d3acba09
--- /dev/null
+++ b/cpp/src/arrow/util/thread-pool-benchmark.cc
@@ -0,0 +1,202 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstdlib>
+#include <functional>
+#include <iostream>
+#include <limits>
+#include <memory>
+#include <random>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/test-util.h"
+#include "arrow/util/task-group.h"
+#include "arrow/util/thread-pool.h"
+
+namespace arrow {
+namespace internal {
+
+struct Workload {
+  explicit Workload(int32_t size) : size_(size), data_(kDataSize) {
+    std::default_random_engine gen(42);
+    std::uniform_int_distribution<uint64_t> dist(0, std::numeric_limits<uint64_t>::max());
+    std::generate(data_.begin(), data_.end(), [&]() { return dist(gen); });
+  }
+
+  void operator()();
+
+ private:
+  static constexpr int32_t kDataSize = 32;
+
+  int32_t size_;
+  std::vector<uint64_t> data_;
+};
+
+void Workload::operator()() {
+  uint64_t result = 0;
+  for (int32_t i = 0; i < size_ / kDataSize; ++i) {
+    for (const auto v : data_) {
+      result = (result << (v % 64)) - v;
+    }
+  }
+  benchmark::DoNotOptimize(result);
+}
+
+struct Task {
+  explicit Task(int32_t size) : workload_(size) {}
+
+  Status operator()() {
+    workload_();
+    return Status::OK();
+  }
+
+ private:
+  Workload workload_;
+};
+
+// This benchmark simply provides a baseline indicating the raw cost of our workload
+// depending on the workload size.  Number of items / second in this (serial)
+// benchmark can be compared to the numbers obtained in BM_ThreadPoolSpawn.
+static void BM_WorkloadCost(benchmark::State& state) {
+  const auto workload_size = static_cast<int32_t>(state.range(0));
+
+  Workload workload(workload_size);
+  for (auto _ : state) {
+    workload();
+  }
+
+  state.SetItemsProcessed(state.iterations());
+}
+
+// Benchmark ThreadPool::Spawn
+static void BM_ThreadPoolSpawn(benchmark::State& state) {
+  const auto nthreads = static_cast<int>(state.range(0));
+  const auto workload_size = static_cast<int32_t>(state.range(1));
+
+  Workload workload(workload_size);
+
+  // Spawn enough tasks to make the pool start up overhead negligible
+  const int32_t nspawns = 200000000 / workload_size + 1;
+
+  for (auto _ : state) {
+    state.PauseTiming();
+    std::shared_ptr<ThreadPool> pool;
+    ABORT_NOT_OK(ThreadPool::Make(nthreads, &pool));
+    state.ResumeTiming();
+
+    for (int32_t i = 0; i < nspawns; ++i) {
+      // Pass the task by reference to avoid copying it around
+      ABORT_NOT_OK(pool->Spawn(std::ref(workload)));
+    }
+
+    // Wait for all tasks to finish
+    ABORT_NOT_OK(pool->Shutdown(true /* wait */));
+    state.PauseTiming();
+    pool.reset();
+    state.ResumeTiming();
+  }
+  state.SetItemsProcessed(state.iterations() * nspawns);
+}
+
+// Benchmark serial TaskGroup
+static void BM_SerialTaskGroup(benchmark::State& state) {
+  const auto workload_size = static_cast<int32_t>(state.range(0));
+
+  Task task(workload_size);
+
+  const int32_t nspawns = 10000000 / workload_size + 1;
+
+  for (auto _ : state) {
+    auto task_group = TaskGroup::MakeSerial();
+    for (int32_t i = 0; i < nspawns; ++i) {
+      // Pass the task by reference to avoid copying it around
+      task_group->Append(std::ref(task));
+    }
+    ABORT_NOT_OK(task_group->Finish());
+  }
+  state.SetItemsProcessed(state.iterations() * nspawns);
+}
+
+// Benchmark threaded TaskGroup
+static void BM_ThreadedTaskGroup(benchmark::State& state) {
+  const auto nthreads = static_cast<int>(state.range(0));
+  const auto workload_size = static_cast<int32_t>(state.range(1));
+
+  std::shared_ptr<ThreadPool> pool;
+  ABORT_NOT_OK(ThreadPool::Make(nthreads, &pool));
+
+  Task task(workload_size);
+
+  const int32_t nspawns = 10000000 / workload_size + 1;
+
+  for (auto _ : state) {
+    auto task_group = TaskGroup::MakeThreaded(pool.get());
+    for (int32_t i = 0; i < nspawns; ++i) {
+      // Pass the task by reference to avoid copying it around
+      task_group->Append(std::ref(task));
+    }
+    ABORT_NOT_OK(task_group->Finish());
+  }
+  ABORT_NOT_OK(pool->Shutdown(true /* wait */));
+
+  state.SetItemsProcessed(state.iterations() * nspawns);
+}
+
+static const int32_t kWorkloadSizes[] = {1000, 10000, 100000};
+
+static void WorkloadCost_Customize(benchmark::internal::Benchmark* b) {
+  for (const auto w : kWorkloadSizes) {
+    b->Args({w});
+  }
+  b->ArgNames({"task_cost"});
+}
+
+static void ThreadPoolSpawn_Customize(benchmark::internal::Benchmark* b) {
+  for (const int32_t w : kWorkloadSizes) {
+    for (const int nthreads : {1, 2, 4, 8}) {
+      b->Args({nthreads, w});
+    }
+  }
+  b->ArgNames({"threads", "task_cost"});
+}
+
+static const int kRepetitions = 1;
+
+BENCHMARK(BM_WorkloadCost)->Repetitions(kRepetitions)->Apply(WorkloadCost_Customize);
+
+BENCHMARK(BM_ThreadPoolSpawn)
+    ->UseRealTime()
+    ->Repetitions(kRepetitions)
+    ->Apply(ThreadPoolSpawn_Customize);
+
+BENCHMARK(BM_SerialTaskGroup)
+    ->UseRealTime()
+    ->Repetitions(kRepetitions)
+    ->Apply(WorkloadCost_Customize);
+
+BENCHMARK(BM_ThreadedTaskGroup)
+    ->UseRealTime()
+    ->Repetitions(kRepetitions)
+    ->Apply(ThreadPoolSpawn_Customize);
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/thread-pool-test.cc b/cpp/src/arrow/util/thread-pool-test.cc
index 6d7b9e230f080..c0deb20ccdde1 100644
--- a/cpp/src/arrow/util/thread-pool-test.cc
+++ b/cpp/src/arrow/util/thread-pool-test.cc
@@ -298,7 +298,8 @@ TEST_F(TestThreadPool, Submit) {
 
 // Test fork safety on Unix
 
-#if !(defined(_WIN32) || defined(ARROW_VALGRIND))
+#if !(defined(_WIN32) || defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || \
+      defined(THREAD_SANITIZER))
 TEST_F(TestThreadPool, ForkSafety) {
   pid_t child_pid;
   int child_status;
diff --git a/cpp/src/arrow/util/thread-pool.cc b/cpp/src/arrow/util/thread-pool.cc
index 751b264b42f59..17ad9c4972fa2 100644
--- a/cpp/src/arrow/util/thread-pool.cc
+++ b/cpp/src/arrow/util/thread-pool.cc
@@ -34,6 +34,9 @@ namespace internal {
 struct ThreadPool::State {
   State() : desired_capacity_(0), please_shutdown_(false), quick_shutdown_(false) {}
 
+  // NOTE: in case locking becomes too expensive, we can investigate lock-free FIFOs
+  // such as https://github.com/cameron314/concurrentqueue
+
   std::mutex mutex_;
   std::condition_variable cv_;
   std::condition_variable cv_shutdown_;
diff --git a/cpp/src/arrow/util/trie-benchmark.cc b/cpp/src/arrow/util/trie-benchmark.cc
new file mode 100644
index 0000000000000..acc2892689ff4
--- /dev/null
+++ b/cpp/src/arrow/util/trie-benchmark.cc
@@ -0,0 +1,221 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/test-util.h"
+#include "arrow/util/trie.h"
+
+namespace arrow {
+namespace internal {
+
+static inline bool InlinedNullLookup(util::string_view s) {
+  // An inlined version of trie lookup for a specific set of strings
+  // (see AllNulls())
+  auto size = s.length();
+  auto data = s.data();
+  if (size == 0) {
+    return false;
+  }
+  if (size == 1) {
+    return false;
+  }
+
+  auto chars = reinterpret_cast<const char*>(data);
+  auto first = chars[0];
+  auto second = chars[1];
+  switch (first) {
+    case 'N': {
+      // "NA", "N/A", "NaN", "NULL"
+      if (size == 2) {
+        return second == 'A';
+      }
+      auto third = chars[2];
+      if (size == 3) {
+        return (second == '/' && third == 'A') || (second == 'a' && third == 'N');
+      }
+      if (size == 4) {
+        return (second == 'U' && third == 'L' && chars[3] == 'L');
+      }
+      return false;
+    }
+    case 'n': {
+      // "n/a", "nan", "null"
+      if (size == 2) {
+        return false;
+      }
+      auto third = chars[2];
+      if (size == 3) {
+        return (second == '/' && third == 'a') || (second == 'a' && third == 'n');
+      }
+      if (size == 4) {
+        return (second == 'u' && third == 'l' && chars[3] == 'l');
+      }
+      return false;
+    }
+    case '1': {
+      // '1.#IND', '1.#QNAN'
+      if (size == 6) {
+        // '#' is the most unlikely char here, check it first
+        return (chars[2] == '#' && chars[1] == '.' && chars[3] == 'I' &&
+                chars[4] == 'N' && chars[5] == 'D');
+      }
+      if (size == 7) {
+        return (chars[2] == '#' && chars[1] == '.' && chars[3] == 'Q' &&
+                chars[4] == 'N' && chars[5] == 'A' && chars[6] == 'N');
+      }
+      return false;
+    }
+    case '-': {
+      switch (second) {
+        case 'N':
+          // "-NaN"
+          return (size == 4 && chars[2] == 'a' && chars[3] == 'N');
+        case 'n':
+          // "-nan"
+          return (size == 4 && chars[2] == 'a' && chars[3] == 'n');
+        case '1':
+          // "-1.#IND", "-1.#QNAN"
+          if (size == 7) {
+            return (chars[3] == '#' && chars[2] == '.' && chars[4] == 'I' &&
+                    chars[5] == 'N' && chars[6] == 'D');
+          }
+          if (size == 8) {
+            return (chars[3] == '#' && chars[2] == '.' && chars[4] == 'Q' &&
+                    chars[5] == 'N' && chars[6] == 'A' && chars[7] == 'N');
+          }
+          return false;
+        default:
+          return false;
+      }
+    }
+    case '#': {
+      // "#N/A", "#N/A N/A", "#NA"
+      if (size < 3 || chars[1] != 'N') {
+        return false;
+      }
+      auto third = chars[2];
+      if (size == 3) {
+        return third == 'A';
+      }
+      if (size == 4) {
+        return third == '/' && chars[3] == 'A';
+      }
+      if (size == 8) {
+        return std::memcmp(data + 2, "/A N/A", 5) == 0;
+      }
+      return false;
+    }
+    default:
+      return false;
+  }
+}
+
+std::vector<std::string> AllNulls() {
+  return {"#N/A",    "#N/A N/A", "#NA", "-1.#IND", "-1.#QNAN", "-NaN", "-nan", "1.#IND",
+          "1.#QNAN", "N/A",      "NA",  "NULL",    "NaN",      "n/a",  "nan",  "null"};
+}
+
+Trie MakeNullsTrie() {
+  auto nulls = AllNulls();
+
+  TrieBuilder builder;
+  for (const auto& str : AllNulls()) {
+    ABORT_NOT_OK(builder.Append(str));
+  }
+  return builder.Finish();
+}
+
+std::vector<std::string> Expand(const std::vector<std::string>& base, size_t n) {
+  std::vector<std::string> result;
+  result.reserve(n);
+
+  while (true) {
+    for (const auto& v : base) {
+      result.push_back(v);
+      if (result.size() == n) {
+        return result;
+      }
+    }
+  }
+}
+
+static void BenchmarkTrieLookups(benchmark::State& state,  // NOLINT non-const reference
+                                 const std::vector<std::string>& strings) {
+  Trie trie = MakeNullsTrie();
+  int32_t total = 0;
+
+  auto lookups = Expand(strings, 100);
+
+  for (auto _ : state) {
+    for (const auto& s : lookups) {
+      total += trie.Find(s);
+    }
+  }
+  benchmark::DoNotOptimize(total);
+  state.SetItemsProcessed(state.iterations() * lookups.size());
+}
+
+static void BenchmarkInlinedTrieLookups(
+    benchmark::State& state,  // NOLINT non-const reference
+    const std::vector<std::string>& strings) {
+  int32_t total = 0;
+
+  auto lookups = Expand(strings, 100);
+
+  for (auto _ : state) {
+    for (const auto& s : lookups) {
+      total += InlinedNullLookup(s);
+    }
+  }
+  benchmark::DoNotOptimize(total);
+  state.SetItemsProcessed(state.iterations() * lookups.size());
+}
+
+static void BM_TrieLookupFound(benchmark::State& state) {  // NOLINT non-const reference
+  BenchmarkTrieLookups(state, {"N/A", "null", "-1.#IND", "N/A"});
+}
+
+static void BM_TrieLookupNotFound(
+    benchmark::State& state) {  // NOLINT non-const reference
+  BenchmarkTrieLookups(state, {"None", "1.0", "", "abc"});
+}
+
+static void BM_InlinedTrieLookupFound(
+    benchmark::State& state) {  // NOLINT non-const reference
+  BenchmarkInlinedTrieLookups(state, {"N/A", "null", "-1.#IND", "N/A"});
+}
+
+static void BM_InlinedTrieLookupNotFound(
+    benchmark::State& state) {  // NOLINT non-const reference
+  BenchmarkInlinedTrieLookups(state, {"None", "1.0", "", "abc"});
+}
+
+static const int kRepetitions = 2;
+
+BENCHMARK(BM_TrieLookupFound)->Repetitions(kRepetitions);
+BENCHMARK(BM_TrieLookupNotFound)->Repetitions(kRepetitions);
+BENCHMARK(BM_InlinedTrieLookupFound)->Repetitions(kRepetitions);
+BENCHMARK(BM_InlinedTrieLookupNotFound)->Repetitions(kRepetitions);
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/trie-test.cc b/cpp/src/arrow/util/trie-test.cc
new file mode 100644
index 0000000000000..33eefa9d9335f
--- /dev/null
+++ b/cpp/src/arrow/util/trie-test.cc
@@ -0,0 +1,283 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "arrow/test-util.h"
+#include "arrow/util/trie.h"
+
+namespace arrow {
+namespace internal {
+
+TEST(SmallString, Basics) {
+  using SS = SmallString<5>;
+  {
+    SS s;
+    ASSERT_EQ(s.length(), 0);
+    ASSERT_EQ(util::string_view(s), util::string_view(""));
+    ASSERT_EQ(s, "");
+    ASSERT_NE(s, "x");
+    ASSERT_EQ(sizeof(s), 6);
+  }
+  {
+    SS s("abc");
+    ASSERT_EQ(s.length(), 3);
+    ASSERT_EQ(util::string_view(s), util::string_view("abc"));
+    ASSERT_EQ(std::memcmp(s.data(), "abc", 3), 0);
+    ASSERT_EQ(s, "abc");
+    ASSERT_NE(s, "ab");
+  }
+}
+
+TEST(SmallString, Assign) {
+  using SS = SmallString<5>;
+  auto s = SS();
+
+  s = util::string_view("abc");
+  ASSERT_EQ(s.length(), 3);
+  ASSERT_EQ(util::string_view(s), util::string_view("abc"));
+  ASSERT_EQ(std::memcmp(s.data(), "abc", 3), 0);
+  ASSERT_EQ(s, "abc");
+  ASSERT_NE(s, "ab");
+
+  s = std::string("ghijk");
+  ASSERT_EQ(s.length(), 5);
+  ASSERT_EQ(util::string_view(s), util::string_view("ghijk"));
+  ASSERT_EQ(std::memcmp(s.data(), "ghijk", 5), 0);
+  ASSERT_EQ(s, "ghijk");
+  ASSERT_NE(s, "");
+
+  s = SS("xy");
+  ASSERT_EQ(s.length(), 2);
+  ASSERT_EQ(util::string_view(s), util::string_view("xy"));
+  ASSERT_EQ(std::memcmp(s.data(), "xy", 2), 0);
+  ASSERT_EQ(s, "xy");
+  ASSERT_NE(s, "xyz");
+}
+
+TEST(SmallString, Substr) {
+  using SS = SmallString<5>;
+  {
+    auto s = SS();
+    ASSERT_EQ(s.substr(0), "");
+    ASSERT_EQ(s.substr(0, 2), "");
+  }
+  {
+    auto s = SS("abcd");
+    ASSERT_EQ(s.substr(0), "abcd");
+    ASSERT_EQ(s.substr(1), "bcd");
+    ASSERT_EQ(s.substr(4), "");
+    ASSERT_EQ(s.substr(0, 0), "");
+    ASSERT_EQ(s.substr(0, 3), "abc");
+    ASSERT_EQ(s.substr(0, 4), "abcd");
+    ASSERT_EQ(s.substr(1, 0), "");
+    ASSERT_EQ(s.substr(1, 2), "bc");
+    ASSERT_EQ(s.substr(4, 0), "");
+    ASSERT_EQ(s.substr(4, 1), "");
+  }
+}
+
+static std::vector<std::string> AllNulls() {
+  return {"#N/A",    "#N/A N/A", "#NA", "-1.#IND", "-1.#QNAN", "-NaN", "-nan", "1.#IND",
+          "1.#QNAN", "N/A",      "NA",  "NULL",    "NaN",      "n/a",  "nan",  "null"};
+}
+
+static void TestTrieContents(const Trie& trie, const std::vector<std::string>& entries) {
+  std::unordered_map<std::string, int32_t> control;
+  auto n_entries = static_cast<int32_t>(entries.size());
+
+  // Build control container
+  for (int32_t i = 0; i < n_entries; ++i) {
+    auto p = control.insert({entries[i], i});
+    ASSERT_TRUE(p.second);
+  }
+
+  // Check all existing entries in trie
+  for (int32_t i = 0; i < n_entries; ++i) {
+    ASSERT_EQ(i, trie.Find(entries[i])) << "for string '" << entries[i] << "'";
+  }
+
+  auto CheckNotExists = [&control, &trie](const std::string& s) {
+    auto p = control.find(s);
+    if (p == control.end()) {
+      ASSERT_EQ(-1, trie.Find(s)) << "for string '" << s << "'";
+    }
+  };
+
+  // Check potentially non-existing strings
+  CheckNotExists("");
+  CheckNotExists("X");
+  CheckNotExists("abcdefxxxxxxxxxxxxxxx");
+
+  // Check potentially non-existing variations of existing entries
+  for (const auto& e : entries) {
+    CheckNotExists(e + "X");
+    if (e.size() > 0) {
+      CheckNotExists(e.substr(0, 1));
+      auto prefix = e.substr(0, e.size() - 1);
+      CheckNotExists(prefix);
+      CheckNotExists(prefix + "X");
+      auto split_at = e.size() / 2;
+      CheckNotExists(e.substr(0, split_at) + 'x' + e.substr(split_at + 1));
+    }
+  }
+}
+
+static void TestTrieContents(const std::vector<std::string>& entries) {
+  TrieBuilder builder;
+  for (const auto& s : entries) {
+    ASSERT_OK(builder.Append(s));
+  }
+  const Trie trie = builder.Finish();
+  ASSERT_OK(trie.Validate());
+
+  TestTrieContents(trie, entries);
+}
+
+TEST(Trie, Empty) {
+  TrieBuilder builder;
+  const Trie trie = builder.Finish();
+  ASSERT_OK(trie.Validate());
+
+  ASSERT_EQ(-1, trie.Find(""));
+  ASSERT_EQ(-1, trie.Find("x"));
+}
+
+TEST(Trie, EmptyString) {
+  TrieBuilder builder;
+  ASSERT_OK(builder.Append(""));
+  const Trie trie = builder.Finish();
+  ASSERT_OK(trie.Validate());
+
+  ASSERT_EQ(0, trie.Find(""));
+  ASSERT_EQ(-1, trie.Find("x"));
+}
+
+TEST(Trie, Basics1) {
+  TestTrieContents({"abc", "de", "f"});
+  TestTrieContents({"abc", "de", "f", ""});
+}
+
+TEST(Trie, Basics2) {
+  TestTrieContents({"a", "abc", "abcd", "abcdef"});
+  TestTrieContents({"", "a", "abc", "abcd", "abcdef"});
+}
+
+TEST(Trie, Basics3) {
+  TestTrieContents({"abcd", "ab", "a"});
+  TestTrieContents({"abcd", "ab", "a", ""});
+}
+
+TEST(Trie, LongStrings) {
+  TestTrieContents({"abcdefghijklmnopqr", "abcdefghijklmnoprq", "defghijklmnopqrst"});
+  TestTrieContents({"abcdefghijklmnopqr", "abcdefghijklmnoprq", "abcde"});
+}
+
+TEST(Trie, NullChars) {
+  const std::string empty;
+  const std::string nul(1, '\x00');
+  std::string a, b, c, d;
+  a = "x" + nul + "y";
+  b = "x" + nul + "z";
+  c = nul + "y";
+  d = nul;
+  ASSERT_EQ(a.length(), 3);
+  ASSERT_EQ(d.length(), 1);
+
+  TestTrieContents({a, b, c, d});
+  TestTrieContents({a, b, c});
+  TestTrieContents({a, b, c, d, ""});
+  TestTrieContents({a, b, c, ""});
+  TestTrieContents({d, c, b, a});
+  TestTrieContents({c, b, a});
+  TestTrieContents({d, c, b, a, ""});
+  TestTrieContents({c, b, a, ""});
+}
+
+TEST(Trie, NegativeChars) {
+  // Test with characters >= 0x80 (to check the absence of sign issues)
+  TestTrieContents({"\x7f\x80\x81\xff", "\x7f\x80\x81", "\x7f\xff\x81", "\xff\x80\x81"});
+}
+
+TEST(Trie, CSVNulls) { TestTrieContents(AllNulls()); }
+
+TEST(Trie, Duplicates) {
+  {
+    TrieBuilder builder;
+    ASSERT_OK(builder.Append("ab"));
+    ASSERT_OK(builder.Append("abc"));
+    ASSERT_RAISES(Invalid, builder.Append("abc"));
+    ASSERT_OK(builder.Append("abcd"));
+    ASSERT_RAISES(Invalid, builder.Append("ab"));
+    ASSERT_OK(builder.Append("abcde"));
+    const Trie trie = builder.Finish();
+
+    TestTrieContents(trie, {"ab", "abc", "abcd", "abcde"});
+  }
+  {
+    // With allow_duplicates = true
+    TrieBuilder builder;
+    ASSERT_OK(builder.Append("ab", true));
+    ASSERT_OK(builder.Append("abc", true));
+    ASSERT_OK(builder.Append("abc", true));
+    ASSERT_OK(builder.Append("abcd", true));
+    ASSERT_OK(builder.Append("ab", true));
+    ASSERT_OK(builder.Append("abcde", true));
+    const Trie trie = builder.Finish();
+
+    TestTrieContents(trie, {"ab", "abc", "abcd", "abcde"});
+  }
+}
+
+TEST(Trie, CapacityError) {
+  // A trie uses 16-bit indices into various internal structures and
+  // therefore has limited size available.
+  TrieBuilder builder;
+  uint8_t first, second, third;
+  bool had_capacity_error = false;
+  uint8_t s[] = "\x00\x00\x00\x00";
+
+  for (first = 1; first < 125; ++first) {
+    s[0] = first;
+    for (second = 1; second < 125; ++second) {
+      s[1] = second;
+      for (third = 1; third < 125; ++third) {
+        s[2] = third;
+        auto st = builder.Append(reinterpret_cast<const char*>(s));
+        if (st.IsCapacityError()) {
+          DCHECK_GE(first, 2);
+          had_capacity_error = true;
+          break;
+        } else {
+          ASSERT_OK(st);
+        }
+      }
+    }
+  }
+  ASSERT_TRUE(had_capacity_error) << "Should have produced CapacityError";
+}
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/trie.cc b/cpp/src/arrow/util/trie.cc
new file mode 100644
index 0000000000000..eaa02b7c5352e
--- /dev/null
+++ b/cpp/src/arrow/util/trie.cc
@@ -0,0 +1,209 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/trie.h"
+
+#include <iostream>
+#include <utility>
+
+#include "arrow/util/logging.h"
+
+namespace arrow {
+namespace internal {
+
+Status Trie::Validate() const {
+  const auto n_nodes = static_cast<fast_index_type>(nodes_.size());
+  if (size_ > n_nodes) {
+    return Status::Invalid("Number of entries larger than number of nodes");
+  }
+  for (const auto& node : nodes_) {
+    if (node.found_index_ >= size_) {
+      return Status::Invalid("Found index >= size");
+    }
+    if (node.child_lookup_ != -1 &&
+        node.child_lookup_ * 256 >
+            static_cast<fast_index_type>(lookup_table_.size() - 256)) {
+      return Status::Invalid("Child lookup base doesn't point to 256 valid indices");
+    }
+  }
+  for (const auto index : lookup_table_) {
+    if (index >= n_nodes) {
+      return Status::Invalid("Child lookup index out of bounds");
+    }
+  }
+  return Status::OK();
+}
+
+void Trie::Dump(const Node* node, const std::string& indent) const {
+  std::cerr << "[\"" << node->substring_ << "\"]";
+  if (node->found_index_ >= 0) {
+    std::cerr << " *";
+  }
+  std::cerr << "\n";
+  if (node->child_lookup_ >= 0) {
+    auto child_indent = indent + "   ";
+    std::cerr << child_indent << "|\n";
+    for (fast_index_type i = 0; i < 256; ++i) {
+      auto child_index = lookup_table_[node->child_lookup_ * 256 + i];
+      if (child_index >= 0) {
+        const Node* child = &nodes_[child_index];
+        std::cerr << child_indent << "|-> '" << static_cast<char>(i) << "' (" << i
+                  << ") -> ";
+        Dump(child, child_indent);
+      }
+    }
+  }
+}
+
+void Trie::Dump() const { Dump(&nodes_[0], ""); }
+
+TrieBuilder::TrieBuilder() { trie_.nodes_.push_back(Trie::Node{-1, -1, ""}); }
+
+Status TrieBuilder::AppendChildNode(Trie::Node* parent, uint8_t ch, Trie::Node&& node) {
+  if (parent->child_lookup_ == -1) {
+    RETURN_NOT_OK(ExtendLookupTable(&parent->child_lookup_));
+  }
+  auto parent_lookup = parent->child_lookup_ * 256 + ch;
+
+  DCHECK_EQ(trie_.lookup_table_[parent_lookup], -1);
+  if (trie_.nodes_.size() >= static_cast<size_t>(kMaxIndex)) {
+    return Status::CapacityError("Trie out of bounds");
+  }
+  trie_.nodes_.push_back(std::move(node));
+  trie_.lookup_table_[parent_lookup] = static_cast<index_type>(trie_.nodes_.size() - 1);
+  return Status::OK();
+}
+
+Status TrieBuilder::CreateChildNode(Trie::Node* parent, uint8_t ch,
+                                    util::string_view substring) {
+  const auto kMaxSubstringLength = Trie::kMaxSubstringLength;
+
+  while (substring.length() > kMaxSubstringLength) {
+    // Substring doesn't fit in node => create intermediate node
+    auto mid_node = Trie::Node{-1, -1, substring.substr(0, kMaxSubstringLength)};
+    RETURN_NOT_OK(AppendChildNode(parent, ch, std::move(mid_node)));
+    // Recurse
+    parent = &trie_.nodes_.back();
+    ch = static_cast<uint8_t>(substring[kMaxSubstringLength]);
+    substring = substring.substr(kMaxSubstringLength + 1);
+  }
+
+  // Create final matching node
+  auto child_node = Trie::Node{trie_.size_, -1, substring};
+  RETURN_NOT_OK(AppendChildNode(parent, ch, std::move(child_node)));
+  ++trie_.size_;
+  return Status::OK();
+}
+
+Status TrieBuilder::CreateChildNode(Trie::Node* parent, char ch,
+                                    util::string_view substring) {
+  return CreateChildNode(parent, static_cast<uint8_t>(ch), substring);
+}
+
+Status TrieBuilder::ExtendLookupTable(index_type* out_index) {
+  auto cur_size = trie_.lookup_table_.size();
+  auto cur_index = cur_size / 256;
+  if (cur_index > static_cast<size_t>(kMaxIndex)) {
+    return Status::CapacityError("Trie out of bounds");
+  }
+  trie_.lookup_table_.resize(cur_size + 256, -1);
+  *out_index = static_cast<index_type>(cur_index);
+  return Status::OK();
+}
+
+Status TrieBuilder::SplitNode(fast_index_type node_index, fast_index_type split_at) {
+  Trie::Node* node = &trie_.nodes_[node_index];
+
+  DCHECK_LT(split_at, node->substring_length());
+
+  // Before:
+  //   {node} -> [...]
+  // After:
+  //   {node} -> [c] -> {out_node} -> [...]
+  auto child_node = Trie::Node{node->found_index_, node->child_lookup_,
+                               node->substring_.substr(split_at + 1)};
+  auto ch = node->substring_[split_at];
+  node->child_lookup_ = -1;
+  node->found_index_ = -1;
+  node->substring_ = node->substring_.substr(0, split_at);
+  RETURN_NOT_OK(AppendChildNode(node, ch, std::move(child_node)));
+
+  return Status::OK();
+}
+
+Status TrieBuilder::Append(util::string_view s, bool allow_duplicate) {
+  // Find or create node for string
+  fast_index_type node_index = 0;
+  fast_index_type pos = 0;
+  fast_index_type remaining = static_cast<fast_index_type>(s.length());
+
+  while (true) {
+    Trie::Node* node = &trie_.nodes_[node_index];
+    const auto substring_length = node->substring_length();
+    const auto substring_data = node->substring_data();
+
+    for (fast_index_type i = 0; i < substring_length; ++i) {
+      if (remaining == 0) {
+        // New string too short => need to split node
+        RETURN_NOT_OK(SplitNode(node_index, i));
+        // Current node matches exactly
+        node = &trie_.nodes_[node_index];
+        node->found_index_ = trie_.size_++;
+        return Status::OK();
+      }
+      if (s[pos] != substring_data[i]) {
+        // Mismatching substring => need to split node
+        RETURN_NOT_OK(SplitNode(node_index, i));
+        // Create new node for mismatching char
+        node = &trie_.nodes_[node_index];
+        return CreateChildNode(node, s[pos], s.substr(pos + 1));
+      }
+      ++pos;
+      --remaining;
+    }
+    if (remaining == 0) {
+      // Node matches exactly
+      if (node->found_index_ >= 0) {
+        if (allow_duplicate) {
+          return Status::OK();
+        } else {
+          return Status::Invalid("Duplicate entry in trie");
+        }
+      }
+      node->found_index_ = trie_.size_++;
+      return Status::OK();
+    }
+    // Lookup child using next input character
+    if (node->child_lookup_ == -1) {
+      // Need to extend lookup table for this node
+      RETURN_NOT_OK(ExtendLookupTable(&node->child_lookup_));
+    }
+    auto c = static_cast<uint8_t>(s[pos++]);
+    --remaining;
+    node_index = trie_.lookup_table_[node->child_lookup_ * 256 + c];
+    if (node_index == -1) {
+      // Child not found => need to create child node
+      return CreateChildNode(node, c, s.substr(pos));
+    }
+    node = &trie_.nodes_[node_index];
+  }
+}
+
+Trie TrieBuilder::Finish() { return std::move(trie_); }
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/trie.h b/cpp/src/arrow/util/trie.h
new file mode 100644
index 0000000000000..3e82bfd8ee28f
--- /dev/null
+++ b/cpp/src/arrow/util/trie.h
@@ -0,0 +1,245 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_UTIL_TRIE_H
+#define ARROW_UTIL_TRIE_H
+
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+// A non-zero-terminated small string class.
+// std::string usually has a small string optimization
+// (see review at https://shaharmike.com/cpp/std-string/)
+// but this one allows tight control and optimization of memory layout.
+template <uint8_t N>
+class SmallString {
+ public:
+  SmallString() : length_(0) {}
+
+  template <typename T>
+  SmallString(const T& v) {  // NOLINT implicit constructor
+    *this = util::string_view(v);
+  }
+
+  SmallString& operator=(const util::string_view s) {
+#ifndef NDEBUG
+    CheckSize(s.size());
+#endif
+    length_ = static_cast<uint8_t>(s.size());
+    std::memcpy(data_, s.data(), length_);
+    return *this;
+  }
+
+  SmallString& operator=(const std::string& s) {
+    *this = util::string_view(s);
+    return *this;
+  }
+
+  SmallString& operator=(const char* s) {
+    *this = util::string_view(s);
+    return *this;
+  }
+
+  explicit operator util::string_view() const {
+    return util::string_view(data_, length_);
+  }
+
+  const char* data() const { return data_; }
+  size_t length() const { return length_; }
+  bool empty() const { return length_ == 0; }
+  char operator[](size_t pos) const {
+#ifdef NDEBUG
+    assert(pos <= length_);
+#endif
+    return data_[pos];
+  }
+
+  SmallString substr(size_t pos) const {
+    return SmallString(util::string_view(*this).substr(pos));
+  }
+
+  SmallString substr(size_t pos, size_t count) const {
+    return SmallString(util::string_view(*this).substr(pos, count));
+  }
+
+  template <typename T>
+  bool operator==(T&& other) const {
+    return util::string_view(*this) == util::string_view(std::forward<T>(other));
+  }
+
+  template <typename T>
+  bool operator!=(T&& other) const {
+    return util::string_view(*this) != util::string_view(std::forward<T>(other));
+  }
+
+ protected:
+  uint8_t length_;
+  char data_[N];
+
+#ifndef NDEBUG
+  void CheckSize(size_t n) { assert(n <= N); }
+#endif
+};
+
+template <uint8_t N>
+std::ostream& operator<<(std::ostream& os, const SmallString<N>& str) {
+  return os << util::string_view(str);
+}
+
+// A trie class for byte strings, optimized for small sets of short strings.
+// This class is immutable by design, use a TrieBuilder to construct it.
+class ARROW_EXPORT Trie {
+  using index_type = int16_t;
+  using fast_index_type = int_fast16_t;
+
+ public:
+  Trie() : size_(0) {}
+  Trie(Trie&&) = default;
+  Trie& operator=(Trie&&) = default;
+
+  int32_t Find(util::string_view s) const {
+    const Node* node = &nodes_[0];
+    fast_index_type pos = 0;
+    fast_index_type remaining = static_cast<fast_index_type>(s.length());
+
+    while (remaining > 0) {
+      auto substring_length = node->substring_length();
+      if (substring_length > 0) {
+        auto substring_data = node->substring_data();
+        if (remaining < substring_length) {
+          // Input too short
+          return -1;
+        }
+        for (fast_index_type i = 0; i < substring_length; ++i) {
+          if (s[pos++] != substring_data[i]) {
+            // Mismatching substring
+            return -1;
+          }
+          --remaining;
+        }
+        if (remaining == 0) {
+          // Matched node exactly
+          return node->found_index_;
+        }
+      }
+      // Lookup child using next input character
+      if (node->child_lookup_ == -1) {
+        // Input too long
+        return -1;
+      }
+      auto c = static_cast<uint8_t>(s[pos++]);
+      --remaining;
+      auto child_index = lookup_table_[node->child_lookup_ * 256 + c];
+      if (child_index == -1) {
+        // Child not found
+        return -1;
+      }
+      node = &nodes_[child_index];
+    }
+
+    // Input exhausted
+    if (node->substring_.empty()) {
+      // Matched node exactly
+      return node->found_index_;
+    } else {
+      return -1;
+    }
+  }
+
+  Status Validate() const;
+
+  void Dump() const;
+
+ protected:
+  static constexpr size_t kNodeSize = 16;
+  static constexpr auto kMaxSubstringLength =
+      kNodeSize - 2 * sizeof(index_type) - sizeof(int8_t);
+
+  struct Node {
+    // If this node is a valid end of string, index of found string, otherwise -1
+    index_type found_index_;
+    // Base index for child lookup in lookup_table_ (-1 if no child nodes)
+    index_type child_lookup_;
+    // The substring for this node.
+    SmallString<kMaxSubstringLength> substring_;
+
+    fast_index_type substring_length() const {
+      return static_cast<fast_index_type>(substring_.length());
+    }
+    const char* substring_data() const { return substring_.data(); }
+  };
+
+  static_assert(sizeof(Node) == kNodeSize, "Unexpected node size");
+
+  ARROW_DISALLOW_COPY_AND_ASSIGN(Trie);
+
+  void Dump(const Node* node, const std::string& indent) const;
+
+  // Node table: entry 0 is the root node
+  std::vector<Node> nodes_;
+
+  // Indexed lookup structure: gives index in node table, or -1 if not found
+  std::vector<index_type> lookup_table_;
+
+  // Number of entries
+  index_type size_;
+
+  friend class TrieBuilder;
+};
+
+class ARROW_EXPORT TrieBuilder {
+  using index_type = Trie::index_type;
+  using fast_index_type = Trie::fast_index_type;
+
+ public:
+  TrieBuilder();
+  Status Append(util::string_view s, bool allow_duplicate = false);
+  Trie Finish();
+
+ protected:
+  // Extend the lookup table by 256 entries, return the index of the new span
+  Status ExtendLookupTable(index_type* out_lookup_index);
+  // Split the node given by the index at the substring index `split_at`
+  Status SplitNode(fast_index_type node_index, fast_index_type split_at);
+  // Append an already constructed child node to the parent
+  Status AppendChildNode(Trie::Node* parent, uint8_t ch, Trie::Node&& node);
+  // Create a matching child node from this parent
+  Status CreateChildNode(Trie::Node* parent, uint8_t ch, util::string_view substring);
+  Status CreateChildNode(Trie::Node* parent, char ch, util::string_view substring);
+
+  Trie trie_;
+
+  static constexpr auto kMaxIndex = std::numeric_limits<index_type>::max();
+};
+
+}  // namespace internal
+}  // namespace arrow
+
+#endif  // ARROW_UTIL_TRIE_H
diff --git a/cpp/src/arrow/util/utf8.h b/cpp/src/arrow/util/utf8.h
index f5a18be05a92f..072c2188f7081 100644
--- a/cpp/src/arrow/util/utf8.h
+++ b/cpp/src/arrow/util/utf8.h
@@ -24,6 +24,7 @@
 #include <memory>
 
 #include "arrow/util/macros.h"
+#include "arrow/util/string_view.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -157,6 +158,13 @@ inline bool ValidateUTF8(const uint8_t* data, int64_t size) {
   return ARROW_PREDICT_TRUE(state == internal::kUTF8ValidateAccept);
 }
 
+inline bool ValidateUTF8(const util::string_view& str) {
+  const uint8_t* data = reinterpret_cast<const uint8_t*>(str.data());
+  const size_t length = str.size();
+
+  return ValidateUTF8(data, length);
+}
+
 }  // namespace util
 }  // namespace arrow
 
diff --git a/cpp/src/arrow/util/variant.h b/cpp/src/arrow/util/variant.h
index 1aa9aa3732fdf..fecaa5107c660 100644
--- a/cpp/src/arrow/util/variant.h
+++ b/cpp/src/arrow/util/variant.h
@@ -1,1105 +1,35 @@
-// Copyright (c) MapBox
-// All rights reserved.
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
 //
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
+//   http://www.apache.org/licenses/LICENSE-2.0
 //
-// - Redistributions of source code must retain the above copyright notice, this
-//   list of conditions and the following disclaimer.
-// - Redistributions in binary form must reproduce the above copyright notice, this
-//   list of conditions and the following disclaimer in the documentation and/or
-//   other materials provided with the distribution.
-// - Neither the name "MapBox" nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
-// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-// ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
 
 #ifndef ARROW_UTIL_VARIANT_H
 #define ARROW_UTIL_VARIANT_H
 
-#include <cassert>
-#include <cstddef>   // size_t
-#include <new>       // operator new
-#include <stdexcept> // runtime_error
-#include <string>
-#include <tuple>
-#include <type_traits>
-#include <typeinfo>
-#include <utility>
-#include <functional>
-#include <limits>
-
-#include <arrow/util/macros.h>
-#include <arrow/util/variant/recursive_wrapper.h>
-#include <arrow/util/variant/variant_visitor.h>
-
-
-#ifdef _MSC_VER
-// https://msdn.microsoft.com/en-us/library/bw1hbe6y.aspx
-# ifdef NDEBUG
-#  define VARIANT_INLINE __forceinline
-# else
-#  define VARIANT_INLINE //__declspec(noinline)
-# endif
-#else
-# ifdef NDEBUG
-#  define VARIANT_INLINE //inline __attribute__((always_inline))
-# else
-#  define VARIANT_INLINE __attribute__((noinline))
-# endif
-#endif
-// clang-format on
-
-// Exceptions
-#if defined( __EXCEPTIONS) || defined( _MSC_VER)
-#define HAS_EXCEPTIONS
-#endif
-
-#define VARIANT_MAJOR_VERSION 1
-#define VARIANT_MINOR_VERSION 1
-#define VARIANT_PATCH_VERSION 0
-
-#define VARIANT_VERSION (VARIANT_MAJOR_VERSION * 100000) + (VARIANT_MINOR_VERSION * 100) + (VARIANT_PATCH_VERSION)
+#include "arrow/vendored/variant/variant.hpp"  // IWYU pragma: export
+#include "arrow/vendored/variant/variant_io.hpp"
 
 namespace arrow {
 namespace util {
 
-// XXX This should derive from std::logic_error instead of std::runtime_error.
-//     See https://github.com/mapbox/variant/issues/48 for details.
-class bad_variant_access : public std::runtime_error
-{
-
-public:
-    explicit bad_variant_access(const std::string& what_arg)
-        : runtime_error(what_arg) {}
-
-    explicit bad_variant_access(const char* what_arg)
-        : runtime_error(what_arg) {}
-
-}; // class bad_variant_access
-
-#if !defined(ARROW_VARIANT_MINIMIZE_SIZE)
-using type_index_t = std::size_t;
-#else
-#if defined(ARROW_VARIANT_OPTIMIZE_FOR_SPEED)
-using type_index_t = std::uint_fast8_t;
-#else
-using type_index_t = std::uint_least8_t;
-#endif
-#endif
-
-namespace detail {
-
-static constexpr type_index_t invalid_value = type_index_t(-1);
-
-template <typename T, typename... Types>
-struct direct_type;
-
-template <typename T, typename First, typename... Types>
-struct direct_type<T, First, Types...>
-{
-    static constexpr type_index_t index = std::is_same<T, First>::value
-        ? sizeof...(Types)
-        : direct_type<T, Types...>::index;
-};
-
-template <typename T>
-struct direct_type<T>
-{
-    static constexpr type_index_t index = invalid_value;
-};
-
-#if __cpp_lib_logical_traits >= 201510L
-
-using std::conjunction;
-using std::disjunction;
-
-#else
-
-template <typename...>
-struct conjunction : std::true_type {};
-
-template <typename B1>
-struct conjunction<B1> : B1 {};
-
-template <typename B1, typename B2>
-struct conjunction<B1, B2> : std::conditional<B1::value, B2, B1>::type {};
-
-template <typename B1, typename... Bs>
-struct conjunction<B1, Bs...> : std::conditional<B1::value, conjunction<Bs...>, B1>::type {};
-
-template <typename...>
-struct disjunction : std::false_type {};
-
-template <typename B1>
-struct disjunction<B1> : B1 {};
-
-template <typename B1, typename B2>
-struct disjunction<B1, B2> : std::conditional<B1::value, B1, B2>::type {};
-
-template <typename B1, typename... Bs>
-struct disjunction<B1, Bs...> : std::conditional<B1::value, B1, disjunction<Bs...>>::type {};
-
-#endif
-
-template <typename T, typename... Types>
-struct convertible_type;
-
-template <typename T, typename First, typename... Types>
-struct convertible_type<T, First, Types...>
-{
-    static constexpr type_index_t index = std::is_convertible<T, First>::value
-        ? disjunction<std::is_convertible<T, Types>...>::value ? invalid_value : sizeof...(Types)
-        : convertible_type<T, Types...>::index;
-};
-
-template <typename T>
-struct convertible_type<T>
-{
-    static constexpr type_index_t index = invalid_value;
-};
-
-template <typename T, typename... Types>
-struct value_traits
-{
-    using value_type = typename std::remove_const<typename std::remove_reference<T>::type>::type;
-    using value_type_wrapper = recursive_wrapper<value_type>;
-    static constexpr type_index_t direct_index = direct_type<value_type, Types...>::index;
-    static constexpr bool is_direct = direct_index != invalid_value;
-    static constexpr type_index_t index_direct_or_wrapper = is_direct ? direct_index : direct_type<value_type_wrapper, Types...>::index;
-    static constexpr bool is_direct_or_wrapper = index_direct_or_wrapper != invalid_value;
-    static constexpr type_index_t index = is_direct_or_wrapper ? index_direct_or_wrapper : convertible_type<value_type, Types...>::index;
-    static constexpr bool is_valid = index != invalid_value;
-    static constexpr type_index_t tindex = is_valid ? sizeof...(Types)-index : 0;
-    using target_type = typename std::tuple_element<tindex, std::tuple<void, Types...>>::type;
-};
-
-template <typename T, typename R = void>
-struct enable_if_type
-{
-    using type = R;
-};
-
-template <typename F, typename V, typename Enable = void>
-struct result_of_unary_visit
-{
-    using type = typename std::result_of<F(V&)>::type;
-};
-
-template <typename F, typename V>
-struct result_of_unary_visit<F, V, typename enable_if_type<typename F::result_type>::type>
-{
-    using type = typename F::result_type;
-};
-
-template <typename F, typename V, typename Enable = void>
-struct result_of_binary_visit
-{
-    using type = typename std::result_of<F(V&, V&)>::type;
-};
-
-template <typename F, typename V>
-struct result_of_binary_visit<F, V, typename enable_if_type<typename F::result_type>::type>
-{
-    using type = typename F::result_type;
-};
-
-template <type_index_t arg1, type_index_t... others>
-struct static_max;
-
-template <type_index_t arg>
-struct static_max<arg>
-{
-    static const type_index_t value = arg;
-};
-
-template <type_index_t arg1, type_index_t arg2, type_index_t... others>
-struct static_max<arg1, arg2, others...>
-{
-    static const type_index_t value = arg1 >= arg2 ? static_max<arg1, others...>::value : static_max<arg2, others...>::value;
-};
-
-template <typename... Types>
-struct variant_helper;
-
-template <typename T, typename... Types>
-struct variant_helper<T, Types...>
-{
-    VARIANT_INLINE static void destroy(const type_index_t type_index, void* data)
-    {
-        if (type_index == sizeof...(Types))
-        {
-            reinterpret_cast<T*>(data)->~T();
-        }
-        else
-        {
-            variant_helper<Types...>::destroy(type_index, data);
-        }
-    }
-
-    VARIANT_INLINE static void move(const type_index_t old_type_index, void* old_value, void* new_value)
-    {
-        if (old_type_index == sizeof...(Types))
-        {
-            new (new_value) T(std::move(*reinterpret_cast<T*>(old_value)));
-        }
-        else
-        {
-            variant_helper<Types...>::move(old_type_index, old_value, new_value);
-        }
-    }
-
-    VARIANT_INLINE static void copy(const type_index_t old_type_index, const void* old_value, void* new_value)
-    {
-        if (old_type_index == sizeof...(Types))
-        {
-            new (new_value) T(*reinterpret_cast<const T*>(old_value));
-        }
-        else
-        {
-            variant_helper<Types...>::copy(old_type_index, old_value, new_value);
-        }
-    }
-};
-
-template <>
-struct variant_helper<>
-{
-    VARIANT_INLINE static void destroy(const type_index_t, void*) {}
-    VARIANT_INLINE static void move(const type_index_t, void*, void*) {}
-    VARIANT_INLINE static void copy(const type_index_t, const void*, void*) {}
-};
-
-template <typename T>
-struct unwrapper
-{
-    static T const& apply_const(T const& obj) { return obj; }
-    static T& apply(T& obj) { return obj; }
-};
-
-template <typename T>
-struct unwrapper<recursive_wrapper<T>>
-{
-    static auto apply_const(recursive_wrapper<T> const& obj)
-        -> typename recursive_wrapper<T>::type const&
-    {
-        return obj.get();
-    }
-    static auto apply(recursive_wrapper<T>& obj)
-        -> typename recursive_wrapper<T>::type&
-    {
-        return obj.get();
-    }
-};
-
-template <typename T>
-struct unwrapper<std::reference_wrapper<T>>
-{
-    static auto apply_const(std::reference_wrapper<T> const& obj)
-        -> typename std::reference_wrapper<T>::type const&
-    {
-        return obj.get();
-    }
-    static auto apply(std::reference_wrapper<T>& obj)
-        -> typename std::reference_wrapper<T>::type&
-    {
-        return obj.get();
-    }
-};
-
-template <typename F, typename V, typename R, typename... Types>
-struct dispatcher;
-
-template <typename F, typename V, typename R, typename T, typename... Types>
-struct dispatcher<F, V, R, T, Types...>
-{
-    VARIANT_INLINE static R apply_const(V const& v, F&& f)
-    {
-        if (v.template is<T>())
-        {
-            return f(unwrapper<T>::apply_const(v.template get_unchecked<T>()));
-        }
-        else
-        {
-            return dispatcher<F, V, R, Types...>::apply_const(v, std::forward<F>(f));
-        }
-    }
-
-    VARIANT_INLINE static R apply(V& v, F&& f)
-    {
-        if (v.template is<T>())
-        {
-            return f(unwrapper<T>::apply(v.template get_unchecked<T>()));
-        }
-        else
-        {
-            return dispatcher<F, V, R, Types...>::apply(v, std::forward<F>(f));
-        }
-    }
-};
-
-template <typename F, typename V, typename R, typename T>
-struct dispatcher<F, V, R, T>
-{
-    VARIANT_INLINE static R apply_const(V const& v, F&& f)
-    {
-        return f(unwrapper<T>::apply_const(v.template get_unchecked<T>()));
-    }
-
-    VARIANT_INLINE static R apply(V& v, F&& f)
-    {
-        return f(unwrapper<T>::apply(v.template get_unchecked<T>()));
-    }
-};
-
-template <typename F, typename V, typename R, typename T, typename... Types>
-struct binary_dispatcher_rhs;
-
-template <typename F, typename V, typename R, typename T0, typename T1, typename... Types>
-struct binary_dispatcher_rhs<F, V, R, T0, T1, Types...>
-{
-    VARIANT_INLINE static R apply_const(V const& lhs, V const& rhs, F&& f)
-    {
-        if (rhs.template is<T1>()) // call binary functor
-        {
-            return f(unwrapper<T0>::apply_const(lhs.template get_unchecked<T0>()),
-                     unwrapper<T1>::apply_const(rhs.template get_unchecked<T1>()));
-        }
-        else
-        {
-            return binary_dispatcher_rhs<F, V, R, T0, Types...>::apply_const(lhs, rhs, std::forward<F>(f));
-        }
-    }
-
-    VARIANT_INLINE static R apply(V& lhs, V& rhs, F&& f)
-    {
-        if (rhs.template is<T1>()) // call binary functor
-        {
-            return f(unwrapper<T0>::apply(lhs.template get_unchecked<T0>()),
-                     unwrapper<T1>::apply(rhs.template get_unchecked<T1>()));
-        }
-        else
-        {
-            return binary_dispatcher_rhs<F, V, R, T0, Types...>::apply(lhs, rhs, std::forward<F>(f));
-        }
-    }
-};
-
-template <typename F, typename V, typename R, typename T0, typename T1>
-struct binary_dispatcher_rhs<F, V, R, T0, T1>
-{
-    VARIANT_INLINE static R apply_const(V const& lhs, V const& rhs, F&& f)
-    {
-        return f(unwrapper<T0>::apply_const(lhs.template get_unchecked<T0>()),
-                 unwrapper<T1>::apply_const(rhs.template get_unchecked<T1>()));
-    }
-
-    VARIANT_INLINE static R apply(V& lhs, V& rhs, F&& f)
-    {
-        return f(unwrapper<T0>::apply(lhs.template get_unchecked<T0>()),
-                 unwrapper<T1>::apply(rhs.template get_unchecked<T1>()));
-    }
-};
-
-template <typename F, typename V, typename R, typename T, typename... Types>
-struct binary_dispatcher_lhs;
-
-template <typename F, typename V, typename R, typename T0, typename T1, typename... Types>
-struct binary_dispatcher_lhs<F, V, R, T0, T1, Types...>
-{
-    VARIANT_INLINE static R apply_const(V const& lhs, V const& rhs, F&& f)
-    {
-        if (lhs.template is<T1>()) // call binary functor
-        {
-            return f(unwrapper<T1>::apply_const(lhs.template get_unchecked<T1>()),
-                     unwrapper<T0>::apply_const(rhs.template get_unchecked<T0>()));
-        }
-        else
-        {
-            return binary_dispatcher_lhs<F, V, R, T0, Types...>::apply_const(lhs, rhs, std::forward<F>(f));
-        }
-    }
-
-    VARIANT_INLINE static R apply(V& lhs, V& rhs, F&& f)
-    {
-        if (lhs.template is<T1>()) // call binary functor
-        {
-            return f(unwrapper<T1>::apply(lhs.template get_unchecked<T1>()),
-                     unwrapper<T0>::apply(rhs.template get_unchecked<T0>()));
-        }
-        else
-        {
-            return binary_dispatcher_lhs<F, V, R, T0, Types...>::apply(lhs, rhs, std::forward<F>(f));
-        }
-    }
-};
-
-template <typename F, typename V, typename R, typename T0, typename T1>
-struct binary_dispatcher_lhs<F, V, R, T0, T1>
-{
-    VARIANT_INLINE static R apply_const(V const& lhs, V const& rhs, F&& f)
-    {
-        return f(unwrapper<T1>::apply_const(lhs.template get_unchecked<T1>()),
-                 unwrapper<T0>::apply_const(rhs.template get_unchecked<T0>()));
-    }
-
-    VARIANT_INLINE static R apply(V& lhs, V& rhs, F&& f)
-    {
-        return f(unwrapper<T1>::apply(lhs.template get_unchecked<T1>()),
-                 unwrapper<T0>::apply(rhs.template get_unchecked<T0>()));
-    }
-};
-
-template <typename F, typename V, typename R, typename... Types>
-struct binary_dispatcher;
-
-template <typename F, typename V, typename R, typename T, typename... Types>
-struct binary_dispatcher<F, V, R, T, Types...>
-{
-    VARIANT_INLINE static R apply_const(V const& v0, V const& v1, F&& f)
-    {
-        if (v0.template is<T>())
-        {
-            if (v1.template is<T>())
-            {
-                return f(unwrapper<T>::apply_const(v0.template get_unchecked<T>()),
-                         unwrapper<T>::apply_const(v1.template get_unchecked<T>())); // call binary functor
-            }
-            else
-            {
-                return binary_dispatcher_rhs<F, V, R, T, Types...>::apply_const(v0, v1, std::forward<F>(f));
-            }
-        }
-        else if (v1.template is<T>())
-        {
-            return binary_dispatcher_lhs<F, V, R, T, Types...>::apply_const(v0, v1, std::forward<F>(f));
-        }
-        return binary_dispatcher<F, V, R, Types...>::apply_const(v0, v1, std::forward<F>(f));
-    }
-
-    VARIANT_INLINE static R apply(V& v0, V& v1, F&& f)
-    {
-        if (v0.template is<T>())
-        {
-            if (v1.template is<T>())
-            {
-                return f(unwrapper<T>::apply(v0.template get_unchecked<T>()),
-                         unwrapper<T>::apply(v1.template get_unchecked<T>())); // call binary functor
-            }
-            else
-            {
-                return binary_dispatcher_rhs<F, V, R, T, Types...>::apply(v0, v1, std::forward<F>(f));
-            }
-        }
-        else if (v1.template is<T>())
-        {
-            return binary_dispatcher_lhs<F, V, R, T, Types...>::apply(v0, v1, std::forward<F>(f));
-        }
-        return binary_dispatcher<F, V, R, Types...>::apply(v0, v1, std::forward<F>(f));
-    }
-};
-
-template <typename F, typename V, typename R, typename T>
-struct binary_dispatcher<F, V, R, T>
-{
-    VARIANT_INLINE static R apply_const(V const& v0, V const& v1, F&& f)
-    {
-        return f(unwrapper<T>::apply_const(v0.template get_unchecked<T>()),
-                 unwrapper<T>::apply_const(v1.template get_unchecked<T>())); // call binary functor
-    }
-
-    VARIANT_INLINE static R apply(V& v0, V& v1, F&& f)
-    {
-        return f(unwrapper<T>::apply(v0.template get_unchecked<T>()),
-                 unwrapper<T>::apply(v1.template get_unchecked<T>())); // call binary functor
-    }
-};
-
-// comparator functors
-struct equal_comp
-{
-    template <typename T>
-    bool operator()(T const& lhs, T const& rhs) const
-    {
-        return lhs == rhs;
-    }
-};
-
-struct less_comp
-{
-    template <typename T>
-    bool operator()(T const& lhs, T const& rhs) const
-    {
-        return lhs < rhs;
-    }
-};
-
-template <typename Variant, typename Comp>
-class comparer
-{
-public:
-    explicit comparer(Variant const& lhs) noexcept
-        : lhs_(lhs) {}
-    comparer& operator=(comparer const&) = delete;
-    // visitor
-    template <typename T>
-    bool operator()(T const& rhs_content) const
-    {
-        T const& lhs_content = lhs_.template get_unchecked<T>();
-        return Comp()(lhs_content, rhs_content);
-    }
-
-private:
-    Variant const& lhs_;
-};
-
-// hashing visitor
-struct hasher
-{
-    template <typename T>
-    std::size_t operator()(const T& hashable) const
-    {
-        return std::hash<T>{}(hashable);
-    }
-};
-
-} // namespace detail
-
-struct no_init {};
-
-template <typename... Types>
-class variant
-{
-    static_assert(sizeof...(Types) > 0, "Template parameter type list of variant can not be empty.");
-    static_assert(!detail::disjunction<std::is_reference<Types>...>::value, "Variant can not hold reference types. Maybe use std::reference_wrapper?");
-    static_assert(!detail::disjunction<std::is_array<Types>...>::value, "Variant can not hold array types.");
-    static_assert(sizeof...(Types) < std::numeric_limits<type_index_t>::max(), "Internal index type must be able to accommodate all alternatives.");
-private:
-    static const std::size_t data_size = detail::static_max<sizeof(Types)...>::value;
-    static const std::size_t data_align = detail::static_max<alignof(Types)...>::value;
-public:
-    struct adapted_variant_tag;
-    using types = std::tuple<Types...>;
-private:
-    using first_type = typename std::tuple_element<0, types>::type;
-    using data_type = typename std::aligned_storage<data_size, data_align>::type;
-    using helper_type = detail::variant_helper<Types...>;
-
-    type_index_t type_index;
-    data_type data;
-
-public:
-    VARIANT_INLINE variant() noexcept(std::is_nothrow_default_constructible<first_type>::value)
-        : type_index(sizeof...(Types)-1)
-    {
-        static_assert(std::is_default_constructible<first_type>::value, "First type in variant must be default constructible to allow default construction of variant.");
-        new (&data) first_type();
-    }
-
-    VARIANT_INLINE variant(no_init) noexcept
-        : type_index(detail::invalid_value) {}
-
-    // http://isocpp.org/blog/2012/11/universal-references-in-c11-scott-meyers
-    template <typename T, typename Traits = detail::value_traits<T, Types...>,
-              typename Enable = typename std::enable_if<Traits::is_valid && !std::is_same<variant<Types...>, typename Traits::value_type>::value>::type >
-    VARIANT_INLINE variant(T&& val) noexcept(std::is_nothrow_constructible<typename Traits::target_type, T&&>::value)
-        : type_index(Traits::index)
-    {
-        new (&data) typename Traits::target_type(std::forward<T>(val));
-    }
-
-    VARIANT_INLINE variant(variant<Types...> const& old)
-        : type_index(old.type_index)
-    {
-        helper_type::copy(old.type_index, &old.data, &data);
-    }
-
-    VARIANT_INLINE variant(variant<Types...>&& old)
-        noexcept(detail::conjunction<std::is_nothrow_move_constructible<Types>...>::value)
-        : type_index(old.type_index)
-    {
-        helper_type::move(old.type_index, &old.data, &data);
-    }
-
-private:
-    VARIANT_INLINE void copy_assign(variant<Types...> const& rhs)
-    {
-        helper_type::destroy(type_index, &data);
-        type_index = detail::invalid_value;
-        helper_type::copy(rhs.type_index, &rhs.data, &data);
-        type_index = rhs.type_index;
-    }
-
-    VARIANT_INLINE void move_assign(variant<Types...>&& rhs)
-    {
-        helper_type::destroy(type_index, &data);
-        type_index = detail::invalid_value;
-        helper_type::move(rhs.type_index, &rhs.data, &data);
-        type_index = rhs.type_index;
-    }
-
-public:
-    VARIANT_INLINE variant<Types...>& operator=(variant<Types...>&& other)
-    {
-        move_assign(std::move(other));
-        return *this;
-    }
-
-    VARIANT_INLINE variant<Types...>& operator=(variant<Types...> const& other)
-    {
-        copy_assign(other);
-        return *this;
-    }
-
-    // conversions
-    // move-assign
-    template <typename T>
-    VARIANT_INLINE variant<Types...>& operator=(T&& rhs) noexcept
-    {
-        variant<Types...> temp(std::forward<T>(rhs));
-        move_assign(std::move(temp));
-        return *this;
-    }
-
-    // copy-assign
-    template <typename T>
-    VARIANT_INLINE variant<Types...>& operator=(T const& rhs)
-    {
-        variant<Types...> temp(rhs);
-        copy_assign(temp);
-        return *this;
-    }
-
-    template <typename T, typename std::enable_if<
-                          (detail::direct_type<T, Types...>::index != detail::invalid_value)>::type* = NULLPTR>
-    VARIANT_INLINE bool is() const
-    {
-        return type_index == detail::direct_type<T, Types...>::index;
-    }
-
-    template <typename T,typename std::enable_if<
-                         (detail::direct_type<recursive_wrapper<T>, Types...>::index != detail::invalid_value)>::type* = NULLPTR>
-    VARIANT_INLINE bool is() const
-    {
-        return type_index == detail::direct_type<recursive_wrapper<T>, Types...>::index;
-    }
-
-    VARIANT_INLINE bool valid() const
-    {
-        return type_index != detail::invalid_value;
-    }
-
-    template <typename T, typename... Args>
-    VARIANT_INLINE void set(Args&&... args)
-    {
-        helper_type::destroy(type_index, &data);
-        type_index = detail::invalid_value;
-        new (&data) T(std::forward<Args>(args)...);
-        type_index = detail::direct_type<T, Types...>::index;
-    }
-
-    // get_unchecked<T>()
-    template <typename T, typename std::enable_if<
-                          (detail::direct_type<T, Types...>::index != detail::invalid_value)>::type* = NULLPTR>
-    VARIANT_INLINE T& get_unchecked()
-    {
-        return *reinterpret_cast<T*>(&data);
-    }
-
-#ifdef HAS_EXCEPTIONS
-    // get<T>()
-    template <typename T, typename std::enable_if<
-                          (detail::direct_type<T, Types...>::index != detail::invalid_value)>::type* = NULLPTR>
-    VARIANT_INLINE T& get()
-    {
-        if (type_index == detail::direct_type<T, Types...>::index)
-        {
-            return *reinterpret_cast<T*>(&data);
-        }
-        else
-        {
-            throw bad_variant_access("in get<T>()");
-        }
-    }
-#endif
-
-    template <typename T, typename std::enable_if<
-                          (detail::direct_type<T, Types...>::index != detail::invalid_value)>::type* = NULLPTR>
-    VARIANT_INLINE T const& get_unchecked() const
-    {
-        return *reinterpret_cast<T const*>(&data);
-    }
-
-#ifdef HAS_EXCEPTIONS
-    template <typename T, typename std::enable_if<
-                          (detail::direct_type<T, Types...>::index != detail::invalid_value)>::type* = NULLPTR>
-    VARIANT_INLINE T const& get() const
-    {
-        if (type_index == detail::direct_type<T, Types...>::index)
-        {
-            return *reinterpret_cast<T const*>(&data);
-        }
-        else
-        {
-            throw bad_variant_access("in get<T>()");
-        }
-    }
-#endif
-
-    // get_unchecked<T>() - T stored as recursive_wrapper<T>
-    template <typename T, typename std::enable_if<
-                          (detail::direct_type<recursive_wrapper<T>, Types...>::index != detail::invalid_value)>::type* = NULLPTR>
-    VARIANT_INLINE T& get_unchecked()
-    {
-        return (*reinterpret_cast<recursive_wrapper<T>*>(&data)).get();
-    }
-
-#ifdef HAS_EXCEPTIONS
-    // get<T>() - T stored as recursive_wrapper<T>
-    template <typename T, typename std::enable_if<
-                          (detail::direct_type<recursive_wrapper<T>, Types...>::index != detail::invalid_value)>::type* = NULLPTR>
-    VARIANT_INLINE T& get()
-    {
-        if (type_index == detail::direct_type<recursive_wrapper<T>, Types...>::index)
-        {
-            return (*reinterpret_cast<recursive_wrapper<T>*>(&data)).get();
-        }
-        else
-        {
-            throw bad_variant_access("in get<T>()");
-        }
-    }
-#endif
-
-    template <typename T, typename std::enable_if<
-                          (detail::direct_type<recursive_wrapper<T>, Types...>::index != detail::invalid_value)>::type* = NULLPTR>
-    VARIANT_INLINE T const& get_unchecked() const
-    {
-        return (*reinterpret_cast<recursive_wrapper<T> const*>(&data)).get();
-    }
-
-#ifdef HAS_EXCEPTIONS
-    template <typename T, typename std::enable_if<
-                          (detail::direct_type<recursive_wrapper<T>, Types...>::index != detail::invalid_value)>::type* = NULLPTR>
-    VARIANT_INLINE T const& get() const
-    {
-        if (type_index == detail::direct_type<recursive_wrapper<T>, Types...>::index)
-        {
-            return (*reinterpret_cast<recursive_wrapper<T> const*>(&data)).get();
-        }
-        else
-        {
-            throw bad_variant_access("in get<T>()");
-        }
-    }
-#endif
-
-    // get_unchecked<T>() - T stored as std::reference_wrapper<T>
-    template <typename T, typename std::enable_if<
-                          (detail::direct_type<std::reference_wrapper<T>, Types...>::index != detail::invalid_value)>::type* = NULLPTR>
-    VARIANT_INLINE T& get_unchecked()
-    {
-        return (*reinterpret_cast<std::reference_wrapper<T>*>(&data)).get();
-    }
-
-#ifdef HAS_EXCEPTIONS
-    // get<T>() - T stored as std::reference_wrapper<T>
-    template <typename T, typename std::enable_if<
-                          (detail::direct_type<std::reference_wrapper<T>, Types...>::index != detail::invalid_value)>::type* = NULLPTR>
-    VARIANT_INLINE T& get()
-    {
-        if (type_index == detail::direct_type<std::reference_wrapper<T>, Types...>::index)
-        {
-            return (*reinterpret_cast<std::reference_wrapper<T>*>(&data)).get();
-        }
-        else
-        {
-            throw bad_variant_access("in get<T>()");
-        }
-    }
-#endif
-
-    template <typename T, typename std::enable_if<
-                          (detail::direct_type<std::reference_wrapper<T const>, Types...>::index != detail::invalid_value)>::type* = NULLPTR>
-    VARIANT_INLINE T const& get_unchecked() const
-    {
-        return (*reinterpret_cast<std::reference_wrapper<T const> const*>(&data)).get();
-    }
-
-#ifdef HAS_EXCEPTIONS
-    template <typename T, typename std::enable_if<
-                          (detail::direct_type<std::reference_wrapper<T const>, Types...>::index != detail::invalid_value)>::type* = NULLPTR>
-    VARIANT_INLINE T const& get() const
-    {
-        if (type_index == detail::direct_type<std::reference_wrapper<T const>, Types...>::index)
-        {
-            return (*reinterpret_cast<std::reference_wrapper<T const> const*>(&data)).get();
-        }
-        else
-        {
-            throw bad_variant_access("in get<T>()");
-        }
-    }
-#endif
-
-    // This function is deprecated because it returns an internal index field.
-    // Use which() instead.
-    ARROW_DEPRECATED("Use which() instead")
-    VARIANT_INLINE type_index_t get_type_index() const
-    {
-        return type_index;
-    }
-
-    VARIANT_INLINE int which() const noexcept
-    {
-        return static_cast<int>(sizeof...(Types) - type_index - 1);
-    }
-
-    template <typename T, typename std::enable_if<
-                          (detail::direct_type<T, Types...>::index != detail::invalid_value)>::type* = NULLPTR>
-    VARIANT_INLINE static constexpr int which() noexcept
-    {
-        return static_cast<int>(sizeof...(Types)-detail::direct_type<T, Types...>::index - 1);
-    }
-
-    // visitor
-    // unary
-    template <typename F, typename V, typename R = typename detail::result_of_unary_visit<F, first_type>::type>
-    auto VARIANT_INLINE static visit(V const& v, F&& f)
-        -> decltype(detail::dispatcher<F, V, R, Types...>::apply_const(v, std::forward<F>(f)))
-    {
-        return detail::dispatcher<F, V, R, Types...>::apply_const(v, std::forward<F>(f));
-    }
-    // non-const
-    template <typename F, typename V, typename R = typename detail::result_of_unary_visit<F, first_type>::type>
-    auto VARIANT_INLINE static visit(V& v, F&& f)
-        -> decltype(detail::dispatcher<F, V, R, Types...>::apply(v, std::forward<F>(f)))
-    {
-        return detail::dispatcher<F, V, R, Types...>::apply(v, std::forward<F>(f));
-    }
-
-    // binary
-    // const
-    template <typename F, typename V, typename R = typename detail::result_of_binary_visit<F, first_type>::type>
-    auto VARIANT_INLINE static binary_visit(V const& v0, V const& v1, F&& f)
-        -> decltype(detail::binary_dispatcher<F, V, R, Types...>::apply_const(v0, v1, std::forward<F>(f)))
-    {
-        return detail::binary_dispatcher<F, V, R, Types...>::apply_const(v0, v1, std::forward<F>(f));
-    }
-    // non-const
-    template <typename F, typename V, typename R = typename detail::result_of_binary_visit<F, first_type>::type>
-    auto VARIANT_INLINE static binary_visit(V& v0, V& v1, F&& f)
-        -> decltype(detail::binary_dispatcher<F, V, R, Types...>::apply(v0, v1, std::forward<F>(f)))
-    {
-        return detail::binary_dispatcher<F, V, R, Types...>::apply(v0, v1, std::forward<F>(f));
-    }
-
-    // match
-    // unary
-    template <typename... Fs>
-    auto VARIANT_INLINE match(Fs&&... fs) const
-        -> decltype(variant::visit(*this, ::arrow::util::make_visitor(std::forward<Fs>(fs)...)))
-    {
-        return variant::visit(*this, ::arrow::util::make_visitor(std::forward<Fs>(fs)...));
-    }
-    // non-const
-    template <typename... Fs>
-    auto VARIANT_INLINE match(Fs&&... fs)
-        -> decltype(variant::visit(*this, ::arrow::util::make_visitor(std::forward<Fs>(fs)...)))
-    {
-        return variant::visit(*this, ::arrow::util::make_visitor(std::forward<Fs>(fs)...));
-    }
-
-    ~variant() noexcept // no-throw destructor
-    {
-        helper_type::destroy(type_index, &data);
-    }
-
-    // comparison operators
-    // equality
-    VARIANT_INLINE bool operator==(variant const& rhs) const
-    {
-        assert(valid() && rhs.valid());
-        if (this->which() != rhs.which())
-        {
-            return false;
-        }
-        detail::comparer<variant, detail::equal_comp> visitor(*this);
-        return visit(rhs, visitor);
-    }
-
-    VARIANT_INLINE bool operator!=(variant const& rhs) const
-    {
-        return !(*this == rhs);
-    }
-
-    // less than
-    VARIANT_INLINE bool operator<(variant const& rhs) const
-    {
-        assert(valid() && rhs.valid());
-        if (this->which() != rhs.which())
-        {
-            return this->which() < rhs.which();
-        }
-        detail::comparer<variant, detail::less_comp> visitor(*this);
-        return visit(rhs, visitor);
-    }
-    VARIANT_INLINE bool operator>(variant const& rhs) const
-    {
-        return rhs < *this;
-    }
-    VARIANT_INLINE bool operator<=(variant const& rhs) const
-    {
-        return !(*this > rhs);
-    }
-    VARIANT_INLINE bool operator>=(variant const& rhs) const
-    {
-        return !(*this < rhs);
-    }
-};
-
-// unary visitor interface
-// const
-template <typename F, typename V>
-auto VARIANT_INLINE apply_visitor(F&& f, V const& v) -> decltype(V::visit(v, std::forward<F>(f)))
-{
-    return V::visit(v, std::forward<F>(f));
-}
-
-// non-const
-template <typename F, typename V>
-auto VARIANT_INLINE apply_visitor(F&& f, V& v) -> decltype(V::visit(v, std::forward<F>(f)))
-{
-    return V::visit(v, std::forward<F>(f));
-}
-
-// binary visitor interface
-// const
-template <typename F, typename V>
-auto VARIANT_INLINE apply_visitor(F&& f, V const& v0, V const& v1) -> decltype(V::binary_visit(v0, v1, std::forward<F>(f)))
-{
-    return V::binary_visit(v0, v1, std::forward<F>(f));
-}
-
-// non-const
-template <typename F, typename V>
-auto VARIANT_INLINE apply_visitor(F&& f, V& v0, V& v1) -> decltype(V::binary_visit(v0, v1, std::forward<F>(f)))
-{
-    return V::binary_visit(v0, v1, std::forward<F>(f));
-}
-
-// getter interface
-
-#ifdef HAS_EXCEPTIONS
-template <typename ResultType, typename T>
-auto get(T& var)->decltype(var.template get<ResultType>())
-{
-    return var.template get<ResultType>();
-}
-#endif
-
-template <typename ResultType, typename T>
-ResultType& get_unchecked(T& var)
-{
-    return var.template get_unchecked<ResultType>();
-}
-
-#ifdef HAS_EXCEPTIONS
-template <typename ResultType, typename T>
-auto get(T const& var)->decltype(var.template get<ResultType>())
-{
-    return var.template get<ResultType>();
-}
-#endif
-
-template <typename ResultType, typename T>
-ResultType const& get_unchecked(T const& var)
-{
-    return var.template get_unchecked<ResultType>();
-}
-// variant_size
-template <typename T>
-struct variant_size;
-
-//variable templates is c++14
-//template <typename T>
-//constexpr std::size_t variant_size_v = variant_size<T>::value;
-
-template <typename T>
-struct variant_size<const T>
-    : variant_size<T> {};
-
-template <typename T>
-struct variant_size<volatile T>
-    : variant_size<T> {};
-
-template <typename T>
-struct variant_size<const volatile T>
-    : variant_size<T> {};
-
-template <typename... Types>
-struct variant_size<variant<Types...>>
-    : std::integral_constant<std::size_t, sizeof...(Types)> {};
-
-// variant_alternative
-template <std::size_t Index, typename T>
-struct variant_alternative;
-
-#if defined(__clang__)
-#if __has_builtin(__type_pack_element)
-#define has_type_pack_element
-#endif
-#endif
-
-#if defined(has_type_pack_element)
-template <std::size_t Index, typename ...Types>
-struct variant_alternative<Index, variant<Types...>>
-{
-    static_assert(sizeof...(Types) > Index , "Index out of range");
-    using type = __type_pack_element<Index, Types...>;
-};
-#else
-template <std::size_t Index, typename First, typename...Types>
-struct variant_alternative<Index, variant<First, Types...>>
-    : variant_alternative<Index - 1, variant<Types...>>
-{
-    static_assert(sizeof...(Types) > Index -1 , "Index out of range");
-};
-
-template <typename First, typename...Types>
-struct variant_alternative<0, variant<First, Types...>>
-{
-    using type = First;
-};
-
-#endif
-
-template <size_t Index, typename T>
-using variant_alternative_t = typename variant_alternative<Index, T>::type;
-
-template <size_t Index, typename T>
-struct variant_alternative<Index, const T>
-    : std::add_const<variant_alternative<Index, T>> {};
-
-template <size_t Index, typename T>
-struct variant_alternative<Index, volatile T>
-    : std::add_volatile<variant_alternative<Index, T>> {};
-
-template <size_t Index, typename T>
-struct variant_alternative<Index, const volatile T>
-    : std::add_cv<variant_alternative<Index, T>> {};
+using mapbox::util::apply_visitor;  // seems akin to std::visit
+using mapbox::util::bad_variant_access;
+using mapbox::util::get;
+using mapbox::util::variant;
 
-} // namespace util
-} // namespace arrow
+}  // namespace util
+}  // namespace arrow
 
-#endif // ARROW_UTIL_VARIANT_H
+#endif  // ARROW_UTIL_VARIANT_H
diff --git a/cpp/src/arrow/util/variant/optional.h b/cpp/src/arrow/util/variant/optional.h
deleted file mode 100644
index 4c6671061fe80..0000000000000
--- a/cpp/src/arrow/util/variant/optional.h
+++ /dev/null
@@ -1,100 +0,0 @@
-// Copyright (c) MapBox
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice, this
-//   list of conditions and the following disclaimer.
-// - Redistributions in binary form must reproduce the above copyright notice, this
-//   list of conditions and the following disclaimer in the documentation and/or
-//   other materials provided with the distribution.
-// - Neither the name "MapBox" nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
-// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-// ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef ARROW_UTIL_VARIANT_OPTIONAL_H
-#define ARROW_UTIL_VARIANT_OPTIONAL_H
-
-#pragma message("This implementation of optional is deprecated. See https://github.com/mapbox/variant/issues/64.")
-
-#include <type_traits>
-#include <utility>
-
-#include <arrow/util/variant.h>
-
-namespace arrow {
-namespace util {
-
-template <typename T>
-class optional
-{
-    static_assert(!std::is_reference<T>::value, "optional doesn't support references");
-
-    struct none_type
-    {
-    };
-
-    variant<none_type, T> variant_;
-
-public:
-    optional() = default;
-
-    optional(optional const& rhs)
-    {
-        if (this != &rhs)
-        { // protect against invalid self-assignment
-            variant_ = rhs.variant_;
-        }
-    }
-
-    optional(T const& v) { variant_ = v; }
-
-    explicit operator bool() const noexcept { return variant_.template is<T>(); }
-
-    T const& get() const { return variant_.template get<T>(); }
-    T& get() { return variant_.template get<T>(); }
-
-    T const& operator*() const { return this->get(); }
-    T operator*() { return this->get(); }
-
-    optional& operator=(T const& v)
-    {
-        variant_ = v;
-        return *this;
-    }
-
-    optional& operator=(optional const& rhs)
-    {
-        if (this != &rhs)
-        {
-            variant_ = rhs.variant_;
-        }
-        return *this;
-    }
-
-    template <typename... Args>
-    void emplace(Args&&... args)
-    {
-        variant_ = T{std::forward<Args>(args)...};
-    }
-
-    void reset() { variant_ = none_type{}; }
-
-}; // class optional
-
-} // namespace util
-} // namespace arrow
-
-#endif // ARROW_UTIL_VARIANT_OPTIONAL_H
diff --git a/cpp/src/arrow/util/variant/variant_cast.h b/cpp/src/arrow/util/variant/variant_cast.h
deleted file mode 100644
index 71ae80b5dfab6..0000000000000
--- a/cpp/src/arrow/util/variant/variant_cast.h
+++ /dev/null
@@ -1,114 +0,0 @@
-// Copyright (c) MapBox
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice, this
-//   list of conditions and the following disclaimer.
-// - Redistributions in binary form must reproduce the above copyright notice, this
-//   list of conditions and the following disclaimer in the documentation and/or
-//   other materials provided with the distribution.
-// - Neither the name "MapBox" nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
-// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-// ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef ARROW_UTIL_VARIANT_CAST_H
-#define ARROW_UTIL_VARIANT_CAST_H
-
-#include <type_traits>
-
-#include "arrow/util/macros.h"
-
-namespace arrow {
-namespace util {
-
-namespace detail {
-
-template <class T>
-class static_caster
-{
-public:
-    template <class V>
-    T& operator()(V& v) const
-    {
-        return static_cast<T&>(v);
-    }
-};
-
-template <class T>
-class dynamic_caster
-{
-public:
-    using result_type = T&;
-    template <class V>
-    T& operator()(V& v, typename std::enable_if<!std::is_polymorphic<V>::value>::type* = NULLPTR) const
-    {
-        throw std::bad_cast();
-    }
-    template <class V>
-    T& operator()(V& v, typename std::enable_if<std::is_polymorphic<V>::value>::type* = NULLPTR) const
-    {
-        return dynamic_cast<T&>(v);
-    }
-};
-
-template <class T>
-class dynamic_caster<T*>
-{
-public:
-    using result_type = T*;
-    template <class V>
-    T* operator()(V& v, typename std::enable_if<!std::is_polymorphic<V>::value>::type* = NULLPTR) const
-    {
-        return NULLPTR;
-    }
-    template <class V>
-    T* operator()(V& v, typename std::enable_if<std::is_polymorphic<V>::value>::type* = NULLPTR) const
-    {
-        return dynamic_cast<T*>(&v);
-    }
-};
-}
-
-template <class T, class V>
-typename detail::dynamic_caster<T>::result_type
-dynamic_variant_cast(V& v)
-{
-    return arrow::util::apply_visitor(detail::dynamic_caster<T>(), v);
-}
-
-template <class T, class V>
-typename detail::dynamic_caster<const T>::result_type
-dynamic_variant_cast(const V& v)
-{
-    return arrow::util::apply_visitor(detail::dynamic_caster<const T>(), v);
-}
-
-template <class T, class V>
-T& static_variant_cast(V& v)
-{
-    return arrow::util::apply_visitor(detail::static_caster<T>(), v);
-}
-
-template <class T, class V>
-const T& static_variant_cast(const V& v)
-{
-    return arrow::util::apply_visitor(detail::static_caster<const T>(), v);
-}
-
-}  // namespace util
-}  // namespace arrow
-
-#endif // ARROW_UTIL_VARIANT_CAST_H
diff --git a/cpp/src/arrow/util/variant/variant_io.h b/cpp/src/arrow/util/variant/variant_io.h
deleted file mode 100644
index 5541a81f7035f..0000000000000
--- a/cpp/src/arrow/util/variant/variant_io.h
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright (c) MapBox
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice, this
-//   list of conditions and the following disclaimer.
-// - Redistributions in binary form must reproduce the above copyright notice, this
-//   list of conditions and the following disclaimer in the documentation and/or
-//   other materials provided with the distribution.
-// - Neither the name "MapBox" nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
-// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-// ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef ARROW_UTIL_VARIANT_IO_H
-#define ARROW_UTIL_VARIANT_IO_H
-
-#include <iosfwd>
-
-#include <arrow/util/variant.h>
-
-namespace arrow {
-namespace util {
-
-namespace detail {
-// operator<< helper
-template <typename Out>
-class printer
-{
-public:
-    explicit printer(Out& out)
-        : out_(out) {}
-    printer& operator=(printer const&) = delete;
-
-    // visitor
-    template <typename T>
-    void operator()(T const& operand) const
-    {
-        out_ << operand;
-    }
-
-private:
-    Out& out_;
-};
-}
-
-// operator<<
-template <typename CharT, typename Traits, typename... Types>
-VARIANT_INLINE std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& out, variant<Types...> const& rhs)
-{
-    detail::printer<std::basic_ostream<CharT, Traits>> visitor(out);
-    apply_visitor(visitor, rhs);
-    return out;
-}
-
-} // namespace util
-} // namespace arrow
-
-#endif // ARROW_UTIL_VARIANT_IO_H
diff --git a/cpp/src/arrow/util/variant/variant_visitor.h b/cpp/src/arrow/util/variant/variant_visitor.h
deleted file mode 100644
index 66b1dfea3d7c9..0000000000000
--- a/cpp/src/arrow/util/variant/variant_visitor.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright (c) MapBox
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice, this
-//   list of conditions and the following disclaimer.
-// - Redistributions in binary form must reproduce the above copyright notice, this
-//   list of conditions and the following disclaimer in the documentation and/or
-//   other materials provided with the distribution.
-// - Neither the name "MapBox" nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
-// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-// ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef ARROW_UTIL_VARIANT_VISITOR_HPP
-#define ARROW_UTIL_VARIANT_VISITOR_HPP
-
-#include <utility>
-
-namespace arrow {
-namespace util {
-
-template <typename... Fns>
-struct visitor;
-
-template <typename Fn>
-struct visitor<Fn> : Fn
-{
-    using Fn::operator();
-
-    template<typename T>
-    visitor(T&& fn) : Fn(std::forward<T>(fn)) {}
-};
-
-template <typename Fn, typename... Fns>
-struct visitor<Fn, Fns...> : Fn, visitor<Fns...>
-{
-    using Fn::operator();
-    using visitor<Fns...>::operator();
-
-    template<typename T, typename... Ts>
-    visitor(T&& fn, Ts&&... fns)
-        : Fn(std::forward<T>(fn))
-        , visitor<Fns...>(std::forward<Ts>(fns)...) {}
-};
-
-template <typename... Fns>
-visitor<typename std::decay<Fns>::type...> make_visitor(Fns&&... fns)
-{
-    return visitor<typename std::decay<Fns>::type...>
-        (std::forward<Fns>(fns)...);
-}
-
-} // namespace util
-} // namespace arrow
-
-#endif // ARROW_UTIL_VARIANT_VISITOR_HPP
diff --git a/cpp/src/arrow/util/visibility.h b/cpp/src/arrow/util/visibility.h
index 34aa752fd2153..b224717a62d19 100644
--- a/cpp/src/arrow/util/visibility.h
+++ b/cpp/src/arrow/util/visibility.h
@@ -43,15 +43,6 @@
 #endif
 #endif  // Non-Windows
 
-// gcc and clang disagree about how to handle template visibility when you have
-// explicit specializations https://llvm.org/bugs/show_bug.cgi?id=24815
-
-#if defined(__clang__)
-#define ARROW_EXTERN_TEMPLATE extern template class ARROW_EXPORT
-#else
-#define ARROW_EXTERN_TEMPLATE extern template class
-#endif
-
 // This is a complicated topic, some reading on it:
 // http://www.codesynthesis.com/~boris/blog/2010/01/18/dll-export-cxx-templates/
 #if defined(_MSC_VER) || defined(__clang__)
diff --git a/cpp/src/arrow/vendored/CMakeLists.txt b/cpp/src/arrow/vendored/CMakeLists.txt
new file mode 100644
index 0000000000000..04ea67aa45d04
--- /dev/null
+++ b/cpp/src/arrow/vendored/CMakeLists.txt
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARROW_INSTALL_ALL_HEADERS("arrow/vendored")
+
+add_subdirectory(variant)
diff --git a/cpp/src/arrow/vendored/datetime.h b/cpp/src/arrow/vendored/datetime.h
new file mode 100644
index 0000000000000..424313a5f5d14
--- /dev/null
+++ b/cpp/src/arrow/vendored/datetime.h
@@ -0,0 +1,21 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/vendored/datetime/date.h"
+#include "arrow/vendored/datetime/tz.h"
diff --git a/cpp/src/arrow/vendored/datetime/README.md b/cpp/src/arrow/vendored/datetime/README.md
new file mode 100644
index 0000000000000..ff156ea310095
--- /dev/null
+++ b/cpp/src/arrow/vendored/datetime/README.md
@@ -0,0 +1,21 @@
+<!--
+The MIT License (MIT)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+-->
+
+# Utilities for supporting date time functions
+
+Sources for datetime are adapted from Howard Hinnant's date library
+(https://github.com/HowardHinnant/date).
+
+Sources are taken from v2.4.1 release of the above project.
+
diff --git a/cpp/src/arrow/vendored/datetime/date.h b/cpp/src/arrow/vendored/datetime/date.h
new file mode 100644
index 0000000000000..f2889e416b054
--- /dev/null
+++ b/cpp/src/arrow/vendored/datetime/date.h
@@ -0,0 +1,8028 @@
+#ifndef DATE_H
+#define DATE_H
+
+// The MIT License (MIT)
+//
+// Copyright (c) 2015, 2016, 2017 Howard Hinnant
+// Copyright (c) 2016 Adrian Colomitchi
+// Copyright (c) 2017 Florian Dang
+// Copyright (c) 2017 Paul Thompson
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+// Our apologies.  When the previous paragraph was written, lowercase had not yet
+// been invented (that would involve another several millennia of evolution).
+// We did not mean to shout.
+
+#ifndef HAS_STRING_VIEW
+#  if __cplusplus >= 201703
+#    define HAS_STRING_VIEW 1
+#  else
+#    define HAS_STRING_VIEW 0
+#  endif
+#endif  // HAS_STRING_VIEW
+
+#include <cassert>
+#include <algorithm>
+#include <cctype>
+#include <chrono>
+#include <climits>
+#if !(__cplusplus >= 201402)
+#  include <cmath>
+#endif
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <ctime>
+#include <ios>
+#include <istream>
+#include <iterator>
+#include <limits>
+#include <locale>
+#include <memory>
+#include <ostream>
+#include <ratio>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#if HAS_STRING_VIEW
+# include <string_view>
+#endif
+#include <utility>
+#include <type_traits>
+
+#ifdef __GNUC__
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wpedantic"
+# if __GNUC__ < 5
+   // GCC 4.9 Bug 61489 Wrong warning with -Wmissing-field-initializers
+#  pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+# endif
+#endif
+
+namespace arrow
+{
+namespace util
+{
+namespace date
+{
+
+//---------------+
+// Configuration |
+//---------------+
+
+#ifndef ONLY_C_LOCALE
+#  define ONLY_C_LOCALE 0
+#endif
+
+#if defined(_MSC_VER) && (!defined(__clang__) || (_MSC_VER < 1910))
+// MSVC
+#  if _MSC_VER < 1910
+//   before VS2017
+#    define CONSTDATA const
+#    define CONSTCD11
+#    define CONSTCD14
+#    define NOEXCEPT _NOEXCEPT
+#  else
+//   VS2017 and later
+#    define CONSTDATA constexpr const
+#    define CONSTCD11 constexpr
+#    define CONSTCD14 constexpr
+#    define NOEXCEPT noexcept
+#  endif
+
+#elif defined(__SUNPRO_CC) && __SUNPRO_CC <= 0x5150
+// Oracle Developer Studio 12.6 and earlier
+#  define CONSTDATA constexpr const
+#  define CONSTCD11 constexpr
+#  define CONSTCD14
+#  define NOEXCEPT noexcept
+
+#elif __cplusplus >= 201402
+// C++14
+#  define CONSTDATA constexpr const
+#  define CONSTCD11 constexpr
+#  define CONSTCD14 constexpr
+#  define NOEXCEPT noexcept
+#else
+// C++11
+#  define CONSTDATA constexpr const
+#  define CONSTCD11 constexpr
+#  define CONSTCD14
+#  define NOEXCEPT noexcept
+#endif
+
+#ifndef HAS_VOID_T
+#  if __cplusplus >= 201703
+#    define HAS_VOID_T 1
+#  else
+#    define HAS_VOID_T 0
+#  endif
+#endif  // HAS_VOID_T
+
+// Protect from Oracle sun macro
+#ifdef sun
+#  undef sun
+#endif
+
+//-----------+
+// Interface |
+//-----------+
+
+// durations
+
+using days = std::chrono::duration
+    <int, std::ratio_multiply<std::ratio<24>, std::chrono::hours::period>>;
+
+using weeks = std::chrono::duration
+    <int, std::ratio_multiply<std::ratio<7>, days::period>>;
+
+using years = std::chrono::duration
+    <int, std::ratio_multiply<std::ratio<146097, 400>, days::period>>;
+
+using months = std::chrono::duration
+    <int, std::ratio_divide<years::period, std::ratio<12>>>;
+
+// time_point
+
+template <class Duration>
+    using sys_time = std::chrono::time_point<std::chrono::system_clock, Duration>;
+
+using sys_days    = sys_time<days>;
+using sys_seconds = sys_time<std::chrono::seconds>;
+
+struct local_t {};
+
+template <class Duration>
+    using local_time = std::chrono::time_point<local_t, Duration>;
+
+using local_seconds = local_time<std::chrono::seconds>;
+using local_days    = local_time<days>;
+
+// types
+
+struct last_spec
+{
+    explicit last_spec() = default;
+};
+
+class day;
+class month;
+class year;
+
+class weekday;
+class weekday_indexed;
+class weekday_last;
+
+class month_day;
+class month_day_last;
+class month_weekday;
+class month_weekday_last;
+
+class year_month;
+
+class year_month_day;
+class year_month_day_last;
+class year_month_weekday;
+class year_month_weekday_last;
+
+// date composition operators
+
+CONSTCD11 year_month operator/(const year& y, const month& m) NOEXCEPT;
+CONSTCD11 year_month operator/(const year& y, int          m) NOEXCEPT;
+
+CONSTCD11 month_day operator/(const day& d, const month& m) NOEXCEPT;
+CONSTCD11 month_day operator/(const day& d, int          m) NOEXCEPT;
+CONSTCD11 month_day operator/(const month& m, const day& d) NOEXCEPT;
+CONSTCD11 month_day operator/(const month& m, int        d) NOEXCEPT;
+CONSTCD11 month_day operator/(int          m, const day& d) NOEXCEPT;
+
+CONSTCD11 month_day_last operator/(const month& m, last_spec) NOEXCEPT;
+CONSTCD11 month_day_last operator/(int          m, last_spec) NOEXCEPT;
+CONSTCD11 month_day_last operator/(last_spec, const month& m) NOEXCEPT;
+CONSTCD11 month_day_last operator/(last_spec, int          m) NOEXCEPT;
+
+CONSTCD11 month_weekday operator/(const month& m, const weekday_indexed& wdi) NOEXCEPT;
+CONSTCD11 month_weekday operator/(int          m, const weekday_indexed& wdi) NOEXCEPT;
+CONSTCD11 month_weekday operator/(const weekday_indexed& wdi, const month& m) NOEXCEPT;
+CONSTCD11 month_weekday operator/(const weekday_indexed& wdi, int          m) NOEXCEPT;
+
+CONSTCD11 month_weekday_last operator/(const month& m, const weekday_last& wdl) NOEXCEPT;
+CONSTCD11 month_weekday_last operator/(int          m, const weekday_last& wdl) NOEXCEPT;
+CONSTCD11 month_weekday_last operator/(const weekday_last& wdl, const month& m) NOEXCEPT;
+CONSTCD11 month_weekday_last operator/(const weekday_last& wdl, int          m) NOEXCEPT;
+
+CONSTCD11 year_month_day operator/(const year_month& ym, const day& d) NOEXCEPT;
+CONSTCD11 year_month_day operator/(const year_month& ym, int        d) NOEXCEPT;
+CONSTCD11 year_month_day operator/(const year& y, const month_day& md) NOEXCEPT;
+CONSTCD11 year_month_day operator/(int         y, const month_day& md) NOEXCEPT;
+CONSTCD11 year_month_day operator/(const month_day& md, const year& y) NOEXCEPT;
+CONSTCD11 year_month_day operator/(const month_day& md, int         y) NOEXCEPT;
+
+CONSTCD11
+    year_month_day_last operator/(const year_month& ym,   last_spec) NOEXCEPT;
+CONSTCD11
+    year_month_day_last operator/(const year& y, const month_day_last& mdl) NOEXCEPT;
+CONSTCD11
+    year_month_day_last operator/(int         y, const month_day_last& mdl) NOEXCEPT;
+CONSTCD11
+    year_month_day_last operator/(const month_day_last& mdl, const year& y) NOEXCEPT;
+CONSTCD11
+    year_month_day_last operator/(const month_day_last& mdl, int         y) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday
+operator/(const year_month& ym, const weekday_indexed& wdi) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday
+operator/(const year&        y, const month_weekday&   mwd) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday
+operator/(int                y, const month_weekday&   mwd) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday
+operator/(const month_weekday& mwd, const year&          y) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday
+operator/(const month_weekday& mwd, int                  y) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday_last
+operator/(const year_month& ym, const weekday_last& wdl) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday_last
+operator/(const year& y, const month_weekday_last& mwdl) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday_last
+operator/(int         y, const month_weekday_last& mwdl) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday_last
+operator/(const month_weekday_last& mwdl, const year& y) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday_last
+operator/(const month_weekday_last& mwdl, int         y) NOEXCEPT;
+
+// Detailed interface
+
+// day
+
+class day
+{
+    unsigned char d_;
+
+public:
+    day() = default;
+    explicit CONSTCD11 day(unsigned d) NOEXCEPT;
+
+    CONSTCD14 day& operator++()    NOEXCEPT;
+    CONSTCD14 day  operator++(int) NOEXCEPT;
+    CONSTCD14 day& operator--()    NOEXCEPT;
+    CONSTCD14 day  operator--(int) NOEXCEPT;
+
+    CONSTCD14 day& operator+=(const days& d) NOEXCEPT;
+    CONSTCD14 day& operator-=(const days& d) NOEXCEPT;
+
+    CONSTCD11 explicit operator unsigned() const NOEXCEPT;
+    CONSTCD11 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const day& x, const day& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const day& x, const day& y) NOEXCEPT;
+CONSTCD11 bool operator< (const day& x, const day& y) NOEXCEPT;
+CONSTCD11 bool operator> (const day& x, const day& y) NOEXCEPT;
+CONSTCD11 bool operator<=(const day& x, const day& y) NOEXCEPT;
+CONSTCD11 bool operator>=(const day& x, const day& y) NOEXCEPT;
+
+CONSTCD11 day  operator+(const day&  x, const days& y) NOEXCEPT;
+CONSTCD11 day  operator+(const days& x, const day&  y) NOEXCEPT;
+CONSTCD11 day  operator-(const day&  x, const days& y) NOEXCEPT;
+CONSTCD11 days operator-(const day&  x, const day&  y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const day& d);
+
+// month
+
+class month
+{
+    unsigned char m_;
+
+public:
+    month() = default;
+    explicit CONSTCD11 month(unsigned m) NOEXCEPT;
+
+    CONSTCD14 month& operator++()    NOEXCEPT;
+    CONSTCD14 month  operator++(int) NOEXCEPT;
+    CONSTCD14 month& operator--()    NOEXCEPT;
+    CONSTCD14 month  operator--(int) NOEXCEPT;
+
+    CONSTCD14 month& operator+=(const months& m) NOEXCEPT;
+    CONSTCD14 month& operator-=(const months& m) NOEXCEPT;
+
+    CONSTCD11 explicit operator unsigned() const NOEXCEPT;
+    CONSTCD11 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const month& x, const month& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const month& x, const month& y) NOEXCEPT;
+CONSTCD11 bool operator< (const month& x, const month& y) NOEXCEPT;
+CONSTCD11 bool operator> (const month& x, const month& y) NOEXCEPT;
+CONSTCD11 bool operator<=(const month& x, const month& y) NOEXCEPT;
+CONSTCD11 bool operator>=(const month& x, const month& y) NOEXCEPT;
+
+CONSTCD14 month  operator+(const month&  x, const months& y) NOEXCEPT;
+CONSTCD14 month  operator+(const months& x,  const month& y) NOEXCEPT;
+CONSTCD14 month  operator-(const month&  x, const months& y) NOEXCEPT;
+CONSTCD14 months operator-(const month&  x,  const month& y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month& m);
+
+// year
+
+class year
+{
+    short y_;
+
+public:
+    year() = default;
+    explicit CONSTCD11 year(int y) NOEXCEPT;
+
+    CONSTCD14 year& operator++()    NOEXCEPT;
+    CONSTCD14 year  operator++(int) NOEXCEPT;
+    CONSTCD14 year& operator--()    NOEXCEPT;
+    CONSTCD14 year  operator--(int) NOEXCEPT;
+
+    CONSTCD14 year& operator+=(const years& y) NOEXCEPT;
+    CONSTCD14 year& operator-=(const years& y) NOEXCEPT;
+
+    CONSTCD11 year operator-() const NOEXCEPT;
+    CONSTCD11 year operator+() const NOEXCEPT;
+
+    CONSTCD11 bool is_leap() const NOEXCEPT;
+
+    CONSTCD11 explicit operator int() const NOEXCEPT;
+    CONSTCD11 bool ok() const NOEXCEPT;
+
+    static CONSTCD11 year min() NOEXCEPT;
+    static CONSTCD11 year max() NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const year& x, const year& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const year& x, const year& y) NOEXCEPT;
+CONSTCD11 bool operator< (const year& x, const year& y) NOEXCEPT;
+CONSTCD11 bool operator> (const year& x, const year& y) NOEXCEPT;
+CONSTCD11 bool operator<=(const year& x, const year& y) NOEXCEPT;
+CONSTCD11 bool operator>=(const year& x, const year& y) NOEXCEPT;
+
+CONSTCD11 year  operator+(const year&  x, const years& y) NOEXCEPT;
+CONSTCD11 year  operator+(const years& x, const year&  y) NOEXCEPT;
+CONSTCD11 year  operator-(const year&  x, const years& y) NOEXCEPT;
+CONSTCD11 years operator-(const year&  x, const year&  y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year& y);
+
+// weekday
+
+class weekday
+{
+    unsigned char wd_;
+public:
+    weekday() = default;
+    explicit CONSTCD11 weekday(unsigned wd) NOEXCEPT;
+    CONSTCD11 weekday(const sys_days& dp) NOEXCEPT;
+    CONSTCD11 explicit weekday(const local_days& dp) NOEXCEPT;
+
+    CONSTCD14 weekday& operator++()    NOEXCEPT;
+    CONSTCD14 weekday  operator++(int) NOEXCEPT;
+    CONSTCD14 weekday& operator--()    NOEXCEPT;
+    CONSTCD14 weekday  operator--(int) NOEXCEPT;
+
+    CONSTCD14 weekday& operator+=(const days& d) NOEXCEPT;
+    CONSTCD14 weekday& operator-=(const days& d) NOEXCEPT;
+
+    CONSTCD11 explicit operator unsigned() const NOEXCEPT;
+    CONSTCD11 bool ok() const NOEXCEPT;
+
+    CONSTCD11 weekday_indexed operator[](unsigned index) const NOEXCEPT;
+    CONSTCD11 weekday_last    operator[](last_spec)      const NOEXCEPT;
+
+private:
+    static CONSTCD11 unsigned char weekday_from_days(int z) NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const weekday& x, const weekday& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const weekday& x, const weekday& y) NOEXCEPT;
+
+CONSTCD14 weekday operator+(const weekday& x, const days&    y) NOEXCEPT;
+CONSTCD14 weekday operator+(const days&    x, const weekday& y) NOEXCEPT;
+CONSTCD14 weekday operator-(const weekday& x, const days&    y) NOEXCEPT;
+CONSTCD14 days    operator-(const weekday& x, const weekday& y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const weekday& wd);
+
+// weekday_indexed
+
+class weekday_indexed
+{
+    unsigned char wd_    : 4;
+    unsigned char index_ : 4;
+
+public:
+    weekday_indexed() = default;
+    CONSTCD11 weekday_indexed(const date::weekday& wd, unsigned index) NOEXCEPT;
+
+    CONSTCD11 date::weekday weekday() const NOEXCEPT;
+    CONSTCD11 unsigned index() const NOEXCEPT;
+    CONSTCD11 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const weekday_indexed& wdi);
+
+// weekday_last
+
+class weekday_last
+{
+    date::weekday wd_;
+
+public:
+    explicit CONSTCD11 weekday_last(const date::weekday& wd) NOEXCEPT;
+
+    CONSTCD11 date::weekday weekday() const NOEXCEPT;
+    CONSTCD11 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const weekday_last& x, const weekday_last& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const weekday_last& x, const weekday_last& y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const weekday_last& wdl);
+
+// year_month
+
+class year_month
+{
+    date::year  y_;
+    date::month m_;
+
+public:
+    year_month() = default;
+    CONSTCD11 year_month(const date::year& y, const date::month& m) NOEXCEPT;
+
+    CONSTCD11 date::year  year()  const NOEXCEPT;
+    CONSTCD11 date::month month() const NOEXCEPT;
+
+    CONSTCD14 year_month& operator+=(const months& dm) NOEXCEPT;
+    CONSTCD14 year_month& operator-=(const months& dm) NOEXCEPT;
+    CONSTCD14 year_month& operator+=(const years& dy) NOEXCEPT;
+    CONSTCD14 year_month& operator-=(const years& dy) NOEXCEPT;
+
+    CONSTCD11 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const year_month& x, const year_month& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const year_month& x, const year_month& y) NOEXCEPT;
+CONSTCD11 bool operator< (const year_month& x, const year_month& y) NOEXCEPT;
+CONSTCD11 bool operator> (const year_month& x, const year_month& y) NOEXCEPT;
+CONSTCD11 bool operator<=(const year_month& x, const year_month& y) NOEXCEPT;
+CONSTCD11 bool operator>=(const year_month& x, const year_month& y) NOEXCEPT;
+
+CONSTCD14 year_month operator+(const year_month& ym, const months& dm) NOEXCEPT;
+CONSTCD14 year_month operator+(const months& dm, const year_month& ym) NOEXCEPT;
+CONSTCD14 year_month operator-(const year_month& ym, const months& dm) NOEXCEPT;
+
+CONSTCD11 months operator-(const year_month& x, const year_month& y) NOEXCEPT;
+CONSTCD11 year_month operator+(const year_month& ym, const years& dy) NOEXCEPT;
+CONSTCD11 year_month operator+(const years& dy, const year_month& ym) NOEXCEPT;
+CONSTCD11 year_month operator-(const year_month& ym, const years& dy) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month& ym);
+
+// month_day
+
+class month_day
+{
+    date::month m_;
+    date::day   d_;
+
+public:
+    month_day() = default;
+    CONSTCD11 month_day(const date::month& m, const date::day& d) NOEXCEPT;
+
+    CONSTCD11 date::month month() const NOEXCEPT;
+    CONSTCD11 date::day   day() const NOEXCEPT;
+
+    CONSTCD14 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const month_day& x, const month_day& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const month_day& x, const month_day& y) NOEXCEPT;
+CONSTCD11 bool operator< (const month_day& x, const month_day& y) NOEXCEPT;
+CONSTCD11 bool operator> (const month_day& x, const month_day& y) NOEXCEPT;
+CONSTCD11 bool operator<=(const month_day& x, const month_day& y) NOEXCEPT;
+CONSTCD11 bool operator>=(const month_day& x, const month_day& y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month_day& md);
+
+// month_day_last
+
+class month_day_last
+{
+    date::month m_;
+
+public:
+    CONSTCD11 explicit month_day_last(const date::month& m) NOEXCEPT;
+
+    CONSTCD11 date::month month() const NOEXCEPT;
+    CONSTCD11 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const month_day_last& x, const month_day_last& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const month_day_last& x, const month_day_last& y) NOEXCEPT;
+CONSTCD11 bool operator< (const month_day_last& x, const month_day_last& y) NOEXCEPT;
+CONSTCD11 bool operator> (const month_day_last& x, const month_day_last& y) NOEXCEPT;
+CONSTCD11 bool operator<=(const month_day_last& x, const month_day_last& y) NOEXCEPT;
+CONSTCD11 bool operator>=(const month_day_last& x, const month_day_last& y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month_day_last& mdl);
+
+// month_weekday
+
+class month_weekday
+{
+    date::month           m_;
+    date::weekday_indexed wdi_;
+public:
+    CONSTCD11 month_weekday(const date::month& m,
+                            const date::weekday_indexed& wdi) NOEXCEPT;
+
+    CONSTCD11 date::month           month()           const NOEXCEPT;
+    CONSTCD11 date::weekday_indexed weekday_indexed() const NOEXCEPT;
+
+    CONSTCD11 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const month_weekday& x, const month_weekday& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const month_weekday& x, const month_weekday& y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month_weekday& mwd);
+
+// month_weekday_last
+
+class month_weekday_last
+{
+    date::month        m_;
+    date::weekday_last wdl_;
+
+public:
+    CONSTCD11 month_weekday_last(const date::month& m,
+                                 const date::weekday_last& wd) NOEXCEPT;
+
+    CONSTCD11 date::month        month()        const NOEXCEPT;
+    CONSTCD11 date::weekday_last weekday_last() const NOEXCEPT;
+
+    CONSTCD11 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11
+    bool operator==(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT;
+CONSTCD11
+    bool operator!=(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month_weekday_last& mwdl);
+
+// class year_month_day
+
+class year_month_day
+{
+    date::year  y_;
+    date::month m_;
+    date::day   d_;
+
+public:
+    year_month_day() = default;
+    CONSTCD11 year_month_day(const date::year& y, const date::month& m,
+                             const date::day& d) NOEXCEPT;
+    CONSTCD14 year_month_day(const year_month_day_last& ymdl) NOEXCEPT;
+
+    CONSTCD14 year_month_day(sys_days dp) NOEXCEPT;
+    CONSTCD14 explicit year_month_day(local_days dp) NOEXCEPT;
+
+    CONSTCD14 year_month_day& operator+=(const months& m) NOEXCEPT;
+    CONSTCD14 year_month_day& operator-=(const months& m) NOEXCEPT;
+    CONSTCD14 year_month_day& operator+=(const years& y)  NOEXCEPT;
+    CONSTCD14 year_month_day& operator-=(const years& y)  NOEXCEPT;
+
+    CONSTCD11 date::year  year()  const NOEXCEPT;
+    CONSTCD11 date::month month() const NOEXCEPT;
+    CONSTCD11 date::day   day()   const NOEXCEPT;
+
+    CONSTCD14 operator sys_days() const NOEXCEPT;
+    CONSTCD14 explicit operator local_days() const NOEXCEPT;
+    CONSTCD14 bool ok() const NOEXCEPT;
+
+private:
+    static CONSTCD14 year_month_day from_days(days dp) NOEXCEPT;
+    CONSTCD14 days to_days() const NOEXCEPT;
+};
+
+CONSTCD11 bool operator==(const year_month_day& x, const year_month_day& y) NOEXCEPT;
+CONSTCD11 bool operator!=(const year_month_day& x, const year_month_day& y) NOEXCEPT;
+CONSTCD11 bool operator< (const year_month_day& x, const year_month_day& y) NOEXCEPT;
+CONSTCD11 bool operator> (const year_month_day& x, const year_month_day& y) NOEXCEPT;
+CONSTCD11 bool operator<=(const year_month_day& x, const year_month_day& y) NOEXCEPT;
+CONSTCD11 bool operator>=(const year_month_day& x, const year_month_day& y) NOEXCEPT;
+
+CONSTCD14 year_month_day operator+(const year_month_day& ymd, const months& dm) NOEXCEPT;
+CONSTCD14 year_month_day operator+(const months& dm, const year_month_day& ymd) NOEXCEPT;
+CONSTCD14 year_month_day operator-(const year_month_day& ymd, const months& dm) NOEXCEPT;
+CONSTCD11 year_month_day operator+(const year_month_day& ymd, const years& dy)  NOEXCEPT;
+CONSTCD11 year_month_day operator+(const years& dy, const year_month_day& ymd)  NOEXCEPT;
+CONSTCD11 year_month_day operator-(const year_month_day& ymd, const years& dy)  NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_day& ymd);
+
+// year_month_day_last
+
+class year_month_day_last
+{
+    date::year           y_;
+    date::month_day_last mdl_;
+
+public:
+    CONSTCD11 year_month_day_last(const date::year& y,
+                                  const date::month_day_last& mdl) NOEXCEPT;
+
+    CONSTCD14 year_month_day_last& operator+=(const months& m) NOEXCEPT;
+    CONSTCD14 year_month_day_last& operator-=(const months& m) NOEXCEPT;
+    CONSTCD14 year_month_day_last& operator+=(const years& y)  NOEXCEPT;
+    CONSTCD14 year_month_day_last& operator-=(const years& y)  NOEXCEPT;
+
+    CONSTCD11 date::year           year()           const NOEXCEPT;
+    CONSTCD11 date::month          month()          const NOEXCEPT;
+    CONSTCD11 date::month_day_last month_day_last() const NOEXCEPT;
+    CONSTCD14 date::day            day()            const NOEXCEPT;
+
+    CONSTCD14 operator sys_days() const NOEXCEPT;
+    CONSTCD14 explicit operator local_days() const NOEXCEPT;
+    CONSTCD11 bool ok() const NOEXCEPT;
+};
+
+CONSTCD11
+    bool operator==(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
+CONSTCD11
+    bool operator!=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
+CONSTCD11
+    bool operator< (const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
+CONSTCD11
+    bool operator> (const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
+CONSTCD11
+    bool operator<=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
+CONSTCD11
+    bool operator>=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
+
+CONSTCD14
+year_month_day_last
+operator+(const year_month_day_last& ymdl, const months& dm) NOEXCEPT;
+
+CONSTCD14
+year_month_day_last
+operator+(const months& dm, const year_month_day_last& ymdl) NOEXCEPT;
+
+CONSTCD11
+year_month_day_last
+operator+(const year_month_day_last& ymdl, const years& dy) NOEXCEPT;
+
+CONSTCD11
+year_month_day_last
+operator+(const years& dy, const year_month_day_last& ymdl) NOEXCEPT;
+
+CONSTCD14
+year_month_day_last
+operator-(const year_month_day_last& ymdl, const months& dm) NOEXCEPT;
+
+CONSTCD11
+year_month_day_last
+operator-(const year_month_day_last& ymdl, const years& dy) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_day_last& ymdl);
+
+// year_month_weekday
+
+class year_month_weekday
+{
+    date::year            y_;
+    date::month           m_;
+    date::weekday_indexed wdi_;
+
+public:
+    year_month_weekday() = default;
+    CONSTCD11 year_month_weekday(const date::year& y, const date::month& m,
+                                   const date::weekday_indexed& wdi) NOEXCEPT;
+    CONSTCD14 year_month_weekday(const sys_days& dp) NOEXCEPT;
+    CONSTCD14 explicit year_month_weekday(const local_days& dp) NOEXCEPT;
+
+    CONSTCD14 year_month_weekday& operator+=(const months& m) NOEXCEPT;
+    CONSTCD14 year_month_weekday& operator-=(const months& m) NOEXCEPT;
+    CONSTCD14 year_month_weekday& operator+=(const years& y)  NOEXCEPT;
+    CONSTCD14 year_month_weekday& operator-=(const years& y)  NOEXCEPT;
+
+    CONSTCD11 date::year year() const NOEXCEPT;
+    CONSTCD11 date::month month() const NOEXCEPT;
+    CONSTCD11 date::weekday weekday() const NOEXCEPT;
+    CONSTCD11 unsigned index() const NOEXCEPT;
+    CONSTCD11 date::weekday_indexed weekday_indexed() const NOEXCEPT;
+
+    CONSTCD14 operator sys_days() const NOEXCEPT;
+    CONSTCD14 explicit operator local_days() const NOEXCEPT;
+    CONSTCD14 bool ok() const NOEXCEPT;
+
+private:
+    static CONSTCD14 year_month_weekday from_days(days dp) NOEXCEPT;
+    CONSTCD14 days to_days() const NOEXCEPT;
+};
+
+CONSTCD11
+    bool operator==(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT;
+CONSTCD11
+    bool operator!=(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT;
+
+CONSTCD14
+year_month_weekday
+operator+(const year_month_weekday& ymwd, const months& dm) NOEXCEPT;
+
+CONSTCD14
+year_month_weekday
+operator+(const months& dm, const year_month_weekday& ymwd) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday
+operator+(const year_month_weekday& ymwd, const years& dy) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday
+operator+(const years& dy, const year_month_weekday& ymwd) NOEXCEPT;
+
+CONSTCD14
+year_month_weekday
+operator-(const year_month_weekday& ymwd, const months& dm) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday
+operator-(const year_month_weekday& ymwd, const years& dy) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_weekday& ymwdi);
+
+// year_month_weekday_last
+
+class year_month_weekday_last
+{
+    date::year y_;
+    date::month m_;
+    date::weekday_last wdl_;
+
+public:
+    CONSTCD11 year_month_weekday_last(const date::year& y, const date::month& m,
+                                      const date::weekday_last& wdl) NOEXCEPT;
+
+    CONSTCD14 year_month_weekday_last& operator+=(const months& m) NOEXCEPT;
+    CONSTCD14 year_month_weekday_last& operator-=(const months& m) NOEXCEPT;
+    CONSTCD14 year_month_weekday_last& operator+=(const years& y) NOEXCEPT;
+    CONSTCD14 year_month_weekday_last& operator-=(const years& y) NOEXCEPT;
+
+    CONSTCD11 date::year year() const NOEXCEPT;
+    CONSTCD11 date::month month() const NOEXCEPT;
+    CONSTCD11 date::weekday weekday() const NOEXCEPT;
+    CONSTCD11 date::weekday_last weekday_last() const NOEXCEPT;
+
+    CONSTCD14 operator sys_days() const NOEXCEPT;
+    CONSTCD14 explicit operator local_days() const NOEXCEPT;
+    CONSTCD11 bool ok() const NOEXCEPT;
+
+private:
+    CONSTCD14 days to_days() const NOEXCEPT;
+};
+
+CONSTCD11
+bool
+operator==(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT;
+
+CONSTCD11
+bool
+operator!=(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT;
+
+CONSTCD14
+year_month_weekday_last
+operator+(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT;
+
+CONSTCD14
+year_month_weekday_last
+operator+(const months& dm, const year_month_weekday_last& ymwdl) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday_last
+operator+(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday_last
+operator+(const years& dy, const year_month_weekday_last& ymwdl) NOEXCEPT;
+
+CONSTCD14
+year_month_weekday_last
+operator-(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT;
+
+CONSTCD11
+year_month_weekday_last
+operator-(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT;
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_weekday_last& ymwdl);
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+inline namespace literals
+{
+
+CONSTCD11 date::day  operator "" _d(unsigned long long d) NOEXCEPT;
+CONSTCD11 date::year operator "" _y(unsigned long long y) NOEXCEPT;
+
+// CONSTDATA date::month jan{1};
+// CONSTDATA date::month feb{2};
+// CONSTDATA date::month mar{3};
+// CONSTDATA date::month apr{4};
+// CONSTDATA date::month may{5};
+// CONSTDATA date::month jun{6};
+// CONSTDATA date::month jul{7};
+// CONSTDATA date::month aug{8};
+// CONSTDATA date::month sep{9};
+// CONSTDATA date::month oct{10};
+// CONSTDATA date::month nov{11};
+// CONSTDATA date::month dec{12};
+//
+// CONSTDATA date::weekday sun{0u};
+// CONSTDATA date::weekday mon{1u};
+// CONSTDATA date::weekday tue{2u};
+// CONSTDATA date::weekday wed{3u};
+// CONSTDATA date::weekday thu{4u};
+// CONSTDATA date::weekday fri{5u};
+// CONSTDATA date::weekday sat{6u};
+
+}  // inline namespace literals
+#endif // !defined(_MSC_VER) || (_MSC_VER >= 1900)
+
+#if HAS_VOID_T
+
+template <class T, class = std::void_t<>>
+struct is_clock
+    : std::false_type
+{};
+
+template <class T>
+struct is_clock<T, std::void_t<decltype(T::now()), typename T::rep, typename T::period,
+                               typename T::duration, typename T::time_point,
+                               decltype(T::is_steady)>>
+    : std::true_type
+{};
+
+#endif  // HAS_VOID_T
+
+//----------------+
+// Implementation |
+//----------------+
+
+// utilities
+namespace detail {
+
+template<class CharT, class Traits = std::char_traits<CharT>>
+class save_stream
+{
+    std::basic_ostream<CharT, Traits>& os_;
+    CharT fill_;
+    std::ios::fmtflags flags_;
+    std::locale loc_;
+
+public:
+    ~save_stream()
+    {
+        os_.fill(fill_);
+        os_.flags(flags_);
+        os_.imbue(loc_);
+    }
+
+    save_stream(const save_stream&) = delete;
+    save_stream& operator=(const save_stream&) = delete;
+
+    explicit save_stream(std::basic_ostream<CharT, Traits>& os)
+        : os_(os)
+        , fill_(os.fill())
+        , flags_(os.flags())
+        , loc_(os.getloc())
+        {}
+};
+
+template <class T>
+struct choose_trunc_type
+{
+    static const int digits = std::numeric_limits<T>::digits;
+    using type = typename std::conditional
+                 <
+                     digits < 32,
+                     std::int32_t,
+                     typename std::conditional
+                     <
+                         digits < 64,
+                         std::int64_t,
+#ifdef __SIZEOF_INT128__
+                         __int128
+#else
+                         std::int64_t
+#endif
+                     >::type
+                 >::type;
+};
+
+template <class T>
+CONSTCD11
+inline
+typename std::enable_if
+<
+    !std::chrono::treat_as_floating_point<T>::value,
+    T
+>::type
+trunc(T t) NOEXCEPT
+{
+    return t;
+}
+
+template <class T>
+CONSTCD14
+inline
+typename std::enable_if
+<
+    std::chrono::treat_as_floating_point<T>::value,
+    T
+>::type
+trunc(T t) NOEXCEPT
+{
+    using namespace std;
+    using I = typename choose_trunc_type<T>::type;
+    CONSTDATA auto digits = numeric_limits<T>::digits;
+    static_assert(digits < numeric_limits<I>::digits, "");
+    CONSTDATA auto max = I{1} << (digits-1);
+    CONSTDATA auto min = -max;
+    const auto negative = t < T{0};
+    if (min <= t && t <= max && t != 0 && t == t)
+    {
+        t = static_cast<T>(static_cast<I>(t));
+        if (t == 0 && negative)
+            t = -t;
+    }
+    return t;
+}
+
+template <std::intmax_t Xp, std::intmax_t Yp>
+struct static_gcd
+{
+    static const std::intmax_t value = static_gcd<Yp, Xp % Yp>::value;
+};
+
+template <std::intmax_t Xp>
+struct static_gcd<Xp, 0>
+{
+    static const std::intmax_t value = Xp;
+};
+
+template <>
+struct static_gcd<0, 0>
+{
+    static const std::intmax_t value = 1;
+};
+
+template <class R1, class R2>
+struct no_overflow
+{
+private:
+    static const std::intmax_t gcd_n1_n2 = static_gcd<R1::num, R2::num>::value;
+    static const std::intmax_t gcd_d1_d2 = static_gcd<R1::den, R2::den>::value;
+    static const std::intmax_t n1 = R1::num / gcd_n1_n2;
+    static const std::intmax_t d1 = R1::den / gcd_d1_d2;
+    static const std::intmax_t n2 = R2::num / gcd_n1_n2;
+    static const std::intmax_t d2 = R2::den / gcd_d1_d2;
+    static const std::intmax_t max = -((std::intmax_t(1) <<
+                                       (sizeof(std::intmax_t) * CHAR_BIT - 1)) + 1);
+
+    template <std::intmax_t Xp, std::intmax_t Yp, bool overflow>
+    struct mul    // overflow == false
+    {
+        static const std::intmax_t value = Xp * Yp;
+    };
+
+    template <std::intmax_t Xp, std::intmax_t Yp>
+    struct mul<Xp, Yp, true>
+    {
+        static const std::intmax_t value = 1;
+    };
+
+public:
+    static const bool value = (n1 <= max / d2) && (n2 <= max / d1);
+    typedef std::ratio<mul<n1, d2, !value>::value,
+                       mul<n2, d1, !value>::value> type;
+};
+
+}  // detail
+
+// trunc towards zero
+template <class To, class Rep, class Period>
+CONSTCD11
+inline
+typename std::enable_if
+<
+    detail::no_overflow<Period, typename To::period>::value,
+    To
+>::type
+trunc(const std::chrono::duration<Rep, Period>& d)
+{
+    return To{detail::trunc(std::chrono::duration_cast<To>(d).count())};
+}
+
+template <class To, class Rep, class Period>
+CONSTCD11
+inline
+typename std::enable_if
+<
+    !detail::no_overflow<Period, typename To::period>::value,
+    To
+>::type
+trunc(const std::chrono::duration<Rep, Period>& d)
+{
+    using namespace std::chrono;
+    using rep = typename std::common_type<Rep, typename To::rep>::type;
+    return To{detail::trunc(duration_cast<To>(duration_cast<duration<rep>>(d)).count())};
+}
+
+#ifndef HAS_CHRONO_ROUNDING
+#  if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190023918 || (_MSC_FULL_VER >= 190000000 && defined (__clang__)))
+#    define HAS_CHRONO_ROUNDING 1
+#  elif defined(__cpp_lib_chrono) && __cplusplus > 201402 && __cpp_lib_chrono >= 201510
+#    define HAS_CHRONO_ROUNDING 1
+#  elif defined(_LIBCPP_VERSION) && __cplusplus > 201402 && _LIBCPP_VERSION >= 3800
+#    define HAS_CHRONO_ROUNDING 1
+#  else
+#    define HAS_CHRONO_ROUNDING 0
+#  endif
+#endif  // HAS_CHRONO_ROUNDING
+
+#if HAS_CHRONO_ROUNDING == 0
+
+// round down
+template <class To, class Rep, class Period>
+CONSTCD14
+inline
+typename std::enable_if
+<
+    detail::no_overflow<Period, typename To::period>::value,
+    To
+>::type
+floor(const std::chrono::duration<Rep, Period>& d)
+{
+    auto t = trunc<To>(d);
+    if (t > d)
+        return t - To{1};
+    return t;
+}
+
+template <class To, class Rep, class Period>
+CONSTCD14
+inline
+typename std::enable_if
+<
+    !detail::no_overflow<Period, typename To::period>::value,
+    To
+>::type
+floor(const std::chrono::duration<Rep, Period>& d)
+{
+    using namespace std::chrono;
+    using rep = typename std::common_type<Rep, typename To::rep>::type;
+    return floor<To>(floor<duration<rep>>(d));
+}
+
+// round to nearest, to even on tie
+template <class To, class Rep, class Period>
+CONSTCD14
+inline
+To
+round(const std::chrono::duration<Rep, Period>& d)
+{
+    auto t0 = floor<To>(d);
+    auto t1 = t0 + To{1};
+    if (t1 == To{0} && t0 < To{0})
+        t1 = -t1;
+    auto diff0 = d - t0;
+    auto diff1 = t1 - d;
+    if (diff0 == diff1)
+    {
+        if (t0 - trunc<To>(t0/2)*2 == To{0})
+            return t0;
+        return t1;
+    }
+    if (diff0 < diff1)
+        return t0;
+    return t1;
+}
+
+// round up
+template <class To, class Rep, class Period>
+CONSTCD14
+inline
+To
+ceil(const std::chrono::duration<Rep, Period>& d)
+{
+    auto t = trunc<To>(d);
+    if (t < d)
+        return t + To{1};
+    return t;
+}
+
+template <class Rep, class Period,
+          class = typename std::enable_if
+          <
+              std::numeric_limits<Rep>::is_signed
+          >::type>
+CONSTCD11
+std::chrono::duration<Rep, Period>
+abs(std::chrono::duration<Rep, Period> d)
+{
+    return d >= d.zero() ? d : -d;
+}
+
+// round down
+template <class To, class Clock, class FromDuration>
+CONSTCD11
+inline
+std::chrono::time_point<Clock, To>
+floor(const std::chrono::time_point<Clock, FromDuration>& tp)
+{
+    using std::chrono::time_point;
+    return time_point<Clock, To>{date::floor<To>(tp.time_since_epoch())};
+}
+
+// round to nearest, to even on tie
+template <class To, class Clock, class FromDuration>
+CONSTCD11
+inline
+std::chrono::time_point<Clock, To>
+round(const std::chrono::time_point<Clock, FromDuration>& tp)
+{
+    using std::chrono::time_point;
+    return time_point<Clock, To>{round<To>(tp.time_since_epoch())};
+}
+
+// round up
+template <class To, class Clock, class FromDuration>
+CONSTCD11
+inline
+std::chrono::time_point<Clock, To>
+ceil(const std::chrono::time_point<Clock, FromDuration>& tp)
+{
+    using std::chrono::time_point;
+    return time_point<Clock, To>{ceil<To>(tp.time_since_epoch())};
+}
+
+#else  // HAS_CHRONO_ROUNDING == 1
+
+using std::chrono::floor;
+using std::chrono::ceil;
+using std::chrono::round;
+using std::chrono::abs;
+
+#endif  // HAS_CHRONO_ROUNDING
+
+// trunc towards zero
+template <class To, class Clock, class FromDuration>
+CONSTCD11
+inline
+std::chrono::time_point<Clock, To>
+trunc(const std::chrono::time_point<Clock, FromDuration>& tp)
+{
+    using std::chrono::time_point;
+    return time_point<Clock, To>{trunc<To>(tp.time_since_epoch())};
+}
+
+// day
+
+CONSTCD11 inline day::day(unsigned d) NOEXCEPT : d_(static_cast<unsigned char>(d)) {}
+CONSTCD14 inline day& day::operator++() NOEXCEPT {++d_; return *this;}
+CONSTCD14 inline day day::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;}
+CONSTCD14 inline day& day::operator--() NOEXCEPT {--d_; return *this;}
+CONSTCD14 inline day day::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;}
+CONSTCD14 inline day& day::operator+=(const days& d) NOEXCEPT {*this = *this + d; return *this;}
+CONSTCD14 inline day& day::operator-=(const days& d) NOEXCEPT {*this = *this - d; return *this;}
+CONSTCD11 inline day::operator unsigned() const NOEXCEPT {return d_;}
+CONSTCD11 inline bool day::ok() const NOEXCEPT {return 1 <= d_ && d_ <= 31;}
+
+CONSTCD11
+inline
+bool
+operator==(const day& x, const day& y) NOEXCEPT
+{
+    return static_cast<unsigned>(x) == static_cast<unsigned>(y);
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const day& x, const day& y) NOEXCEPT
+{
+    return !(x == y);
+}
+
+CONSTCD11
+inline
+bool
+operator<(const day& x, const day& y) NOEXCEPT
+{
+    return static_cast<unsigned>(x) < static_cast<unsigned>(y);
+}
+
+CONSTCD11
+inline
+bool
+operator>(const day& x, const day& y) NOEXCEPT
+{
+    return y < x;
+}
+
+CONSTCD11
+inline
+bool
+operator<=(const day& x, const day& y) NOEXCEPT
+{
+    return !(y < x);
+}
+
+CONSTCD11
+inline
+bool
+operator>=(const day& x, const day& y) NOEXCEPT
+{
+    return !(x < y);
+}
+
+CONSTCD11
+inline
+days
+operator-(const day& x, const day& y) NOEXCEPT
+{
+    return days{static_cast<days::rep>(static_cast<unsigned>(x)
+                                     - static_cast<unsigned>(y))};
+}
+
+CONSTCD11
+inline
+day
+operator+(const day& x, const days& y) NOEXCEPT
+{
+    return day{static_cast<unsigned>(x) + static_cast<unsigned>(y.count())};
+}
+
+CONSTCD11
+inline
+day
+operator+(const days& x, const day& y) NOEXCEPT
+{
+    return y + x;
+}
+
+CONSTCD11
+inline
+day
+operator-(const day& x, const days& y) NOEXCEPT
+{
+    return x + -y;
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const day& d)
+{
+    detail::save_stream<CharT, Traits> _(os);
+    os.fill('0');
+    os.flags(std::ios::dec | std::ios::right);
+    os.width(2);
+    os << static_cast<unsigned>(d);
+    if (!d.ok())
+        os << " is not a valid day";
+    return os;
+}
+
+// month
+
+CONSTCD11 inline month::month(unsigned m) NOEXCEPT : m_(static_cast<decltype(m_)>(m)) {}
+CONSTCD14 inline month& month::operator++() NOEXCEPT {*this += months{1}; return *this;}
+CONSTCD14 inline month month::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;}
+CONSTCD14 inline month& month::operator--() NOEXCEPT {*this -= months{1}; return *this;}
+CONSTCD14 inline month month::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;}
+
+CONSTCD14
+inline
+month&
+month::operator+=(const months& m) NOEXCEPT
+{
+    *this = *this + m;
+    return *this;
+}
+
+CONSTCD14
+inline
+month&
+month::operator-=(const months& m) NOEXCEPT
+{
+    *this = *this - m;
+    return *this;
+}
+
+CONSTCD11 inline month::operator unsigned() const NOEXCEPT {return m_;}
+CONSTCD11 inline bool month::ok() const NOEXCEPT {return 1 <= m_ && m_ <= 12;}
+
+CONSTCD11
+inline
+bool
+operator==(const month& x, const month& y) NOEXCEPT
+{
+    return static_cast<unsigned>(x) == static_cast<unsigned>(y);
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const month& x, const month& y) NOEXCEPT
+{
+    return !(x == y);
+}
+
+CONSTCD11
+inline
+bool
+operator<(const month& x, const month& y) NOEXCEPT
+{
+    return static_cast<unsigned>(x) < static_cast<unsigned>(y);
+}
+
+CONSTCD11
+inline
+bool
+operator>(const month& x, const month& y) NOEXCEPT
+{
+    return y < x;
+}
+
+CONSTCD11
+inline
+bool
+operator<=(const month& x, const month& y) NOEXCEPT
+{
+    return !(y < x);
+}
+
+CONSTCD11
+inline
+bool
+operator>=(const month& x, const month& y) NOEXCEPT
+{
+    return !(x < y);
+}
+
+CONSTCD14
+inline
+months
+operator-(const month& x, const month& y) NOEXCEPT
+{
+    auto const d = static_cast<unsigned>(x) - static_cast<unsigned>(y);
+    return months(d <= 11 ? d : d + 12);
+}
+
+CONSTCD14
+inline
+month
+operator+(const month& x, const months& y) NOEXCEPT
+{
+    auto const mu = static_cast<long long>(static_cast<unsigned>(x)) + (y.count() - 1);
+    auto const yr = (mu >= 0 ? mu : mu-11) / 12;
+    return month{static_cast<unsigned>(mu - yr * 12 + 1)};
+}
+
+CONSTCD14
+inline
+month
+operator+(const months& x, const month& y) NOEXCEPT
+{
+    return y + x;
+}
+
+CONSTCD14
+inline
+month
+operator-(const month& x, const months& y) NOEXCEPT
+{
+    return x + -y;
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month& m)
+{
+    if (m.ok())
+    {
+        CharT fmt[] = {'%', 'b', 0};
+        os << format(os.getloc(), fmt, m);
+    }
+    else
+        os << static_cast<unsigned>(m) << " is not a valid month";
+    return os;
+}
+
+// year
+
+CONSTCD11 inline year::year(int y) NOEXCEPT : y_(static_cast<decltype(y_)>(y)) {}
+CONSTCD14 inline year& year::operator++() NOEXCEPT {++y_; return *this;}
+CONSTCD14 inline year year::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;}
+CONSTCD14 inline year& year::operator--() NOEXCEPT {--y_; return *this;}
+CONSTCD14 inline year year::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;}
+CONSTCD14 inline year& year::operator+=(const years& y) NOEXCEPT {*this = *this + y; return *this;}
+CONSTCD14 inline year& year::operator-=(const years& y) NOEXCEPT {*this = *this - y; return *this;}
+CONSTCD11 inline year year::operator-() const NOEXCEPT {return year{-y_};}
+CONSTCD11 inline year year::operator+() const NOEXCEPT {return *this;}
+
+CONSTCD11
+inline
+bool
+year::is_leap() const NOEXCEPT
+{
+    return y_ % 4 == 0 && (y_ % 100 != 0 || y_ % 400 == 0);
+}
+
+CONSTCD11 inline year::operator int() const NOEXCEPT {return y_;}
+
+CONSTCD11
+inline
+bool
+year::ok() const NOEXCEPT
+{
+    return y_ != std::numeric_limits<short>::min();
+}
+
+CONSTCD11
+inline
+year
+year::min() NOEXCEPT
+{
+    return year{-32767};
+}
+
+CONSTCD11
+inline
+year
+year::max() NOEXCEPT
+{
+    return year{32767};
+}
+
+CONSTCD11
+inline
+bool
+operator==(const year& x, const year& y) NOEXCEPT
+{
+    return static_cast<int>(x) == static_cast<int>(y);
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const year& x, const year& y) NOEXCEPT
+{
+    return !(x == y);
+}
+
+CONSTCD11
+inline
+bool
+operator<(const year& x, const year& y) NOEXCEPT
+{
+    return static_cast<int>(x) < static_cast<int>(y);
+}
+
+CONSTCD11
+inline
+bool
+operator>(const year& x, const year& y) NOEXCEPT
+{
+    return y < x;
+}
+
+CONSTCD11
+inline
+bool
+operator<=(const year& x, const year& y) NOEXCEPT
+{
+    return !(y < x);
+}
+
+CONSTCD11
+inline
+bool
+operator>=(const year& x, const year& y) NOEXCEPT
+{
+    return !(x < y);
+}
+
+CONSTCD11
+inline
+years
+operator-(const year& x, const year& y) NOEXCEPT
+{
+    return years{static_cast<int>(x) - static_cast<int>(y)};
+}
+
+CONSTCD11
+inline
+year
+operator+(const year& x, const years& y) NOEXCEPT
+{
+    return year{static_cast<int>(x) + y.count()};
+}
+
+CONSTCD11
+inline
+year
+operator+(const years& x, const year& y) NOEXCEPT
+{
+    return y + x;
+}
+
+CONSTCD11
+inline
+year
+operator-(const year& x, const years& y) NOEXCEPT
+{
+    return year{static_cast<int>(x) - y.count()};
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year& y)
+{
+    detail::save_stream<CharT, Traits> _(os);
+    os.fill('0');
+    os.flags(std::ios::dec | std::ios::internal);
+    os.width(4 + (y < year{0}));
+    os << static_cast<int>(y);
+    if (!y.ok())
+        os << " is not a valid year";
+    return os;
+}
+
+// weekday
+
+CONSTCD11
+inline
+unsigned char
+weekday::weekday_from_days(int z) NOEXCEPT
+{
+    return static_cast<unsigned char>(static_cast<unsigned>(
+        z >= -4 ? (z+4) % 7 : (z+5) % 7 + 6));
+}
+
+CONSTCD11
+inline
+weekday::weekday(unsigned wd) NOEXCEPT
+    : wd_(static_cast<decltype(wd_)>(wd))
+    {}
+
+CONSTCD11
+inline
+weekday::weekday(const sys_days& dp) NOEXCEPT
+    : wd_(weekday_from_days(dp.time_since_epoch().count()))
+    {}
+
+CONSTCD11
+inline
+weekday::weekday(const local_days& dp) NOEXCEPT
+    : wd_(weekday_from_days(dp.time_since_epoch().count()))
+    {}
+
+CONSTCD14 inline weekday& weekday::operator++() NOEXCEPT {*this += days{1}; return *this;}
+CONSTCD14 inline weekday weekday::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;}
+CONSTCD14 inline weekday& weekday::operator--() NOEXCEPT {*this -= days{1}; return *this;}
+CONSTCD14 inline weekday weekday::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;}
+
+CONSTCD14
+inline
+weekday&
+weekday::operator+=(const days& d) NOEXCEPT
+{
+    *this = *this + d;
+    return *this;
+}
+
+CONSTCD14
+inline
+weekday&
+weekday::operator-=(const days& d) NOEXCEPT
+{
+    *this = *this - d;
+    return *this;
+}
+
+CONSTCD11
+inline
+weekday::operator unsigned() const NOEXCEPT
+{
+    return static_cast<unsigned>(wd_);
+}
+
+CONSTCD11 inline bool weekday::ok() const NOEXCEPT {return wd_ <= 6;}
+
+CONSTCD11
+inline
+bool
+operator==(const weekday& x, const weekday& y) NOEXCEPT
+{
+    return static_cast<unsigned>(x) == static_cast<unsigned>(y);
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const weekday& x, const weekday& y) NOEXCEPT
+{
+    return !(x == y);
+}
+
+CONSTCD14
+inline
+days
+operator-(const weekday& x, const weekday& y) NOEXCEPT
+{
+    auto const diff = static_cast<unsigned>(x) - static_cast<unsigned>(y);
+    return days{diff <= 6 ? diff : diff + 7};
+}
+
+CONSTCD14
+inline
+weekday
+operator+(const weekday& x, const days& y) NOEXCEPT
+{
+    auto const wdu = static_cast<long long>(static_cast<unsigned>(x)) + y.count();
+    auto const wk = (wdu >= 0 ? wdu : wdu-6) / 7;
+    return weekday{static_cast<unsigned>(wdu - wk * 7)};
+}
+
+CONSTCD14
+inline
+weekday
+operator+(const days& x, const weekday& y) NOEXCEPT
+{
+    return y + x;
+}
+
+CONSTCD14
+inline
+weekday
+operator-(const weekday& x, const days& y) NOEXCEPT
+{
+    return x + -y;
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const weekday& wd)
+{
+    if (wd.ok())
+    {
+        CharT fmt[] = {'%', 'a', 0};
+        os << format(fmt, wd);
+    }
+    else
+        os << static_cast<unsigned>(wd) << " is not a valid weekday";
+    return os;
+}
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+inline namespace literals
+{
+
+CONSTCD11
+inline
+date::day
+operator "" _d(unsigned long long d) NOEXCEPT
+{
+    return date::day{static_cast<unsigned>(d)};
+}
+
+CONSTCD11
+inline
+date::year
+operator "" _y(unsigned long long y) NOEXCEPT
+{
+    return date::year(static_cast<int>(y));
+}
+#endif  // !defined(_MSC_VER) || (_MSC_VER >= 1900)
+
+CONSTDATA date::last_spec last{};
+
+CONSTDATA date::month jan{1};
+CONSTDATA date::month feb{2};
+CONSTDATA date::month mar{3};
+CONSTDATA date::month apr{4};
+CONSTDATA date::month may{5};
+CONSTDATA date::month jun{6};
+CONSTDATA date::month jul{7};
+CONSTDATA date::month aug{8};
+CONSTDATA date::month sep{9};
+CONSTDATA date::month oct{10};
+CONSTDATA date::month nov{11};
+CONSTDATA date::month dec{12};
+
+CONSTDATA date::weekday sun{0u};
+CONSTDATA date::weekday mon{1u};
+CONSTDATA date::weekday tue{2u};
+CONSTDATA date::weekday wed{3u};
+CONSTDATA date::weekday thu{4u};
+CONSTDATA date::weekday fri{5u};
+CONSTDATA date::weekday sat{6u};
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+}  // inline namespace literals
+#endif
+
+CONSTDATA date::month January{1};
+CONSTDATA date::month February{2};
+CONSTDATA date::month March{3};
+CONSTDATA date::month April{4};
+CONSTDATA date::month May{5};
+CONSTDATA date::month June{6};
+CONSTDATA date::month July{7};
+CONSTDATA date::month August{8};
+CONSTDATA date::month September{9};
+CONSTDATA date::month October{10};
+CONSTDATA date::month November{11};
+CONSTDATA date::month December{12};
+
+CONSTDATA date::weekday Sunday{0u};
+CONSTDATA date::weekday Monday{1u};
+CONSTDATA date::weekday Tuesday{2u};
+CONSTDATA date::weekday Wednesday{3u};
+CONSTDATA date::weekday Thursday{4u};
+CONSTDATA date::weekday Friday{5u};
+CONSTDATA date::weekday Saturday{6u};
+
+// weekday_indexed
+
+CONSTCD11
+inline
+weekday
+weekday_indexed::weekday() const NOEXCEPT
+{
+    return date::weekday{static_cast<unsigned>(wd_)};
+}
+
+CONSTCD11 inline unsigned weekday_indexed::index() const NOEXCEPT {return index_;}
+
+CONSTCD11
+inline
+bool
+weekday_indexed::ok() const NOEXCEPT
+{
+    return weekday().ok() && 1 <= index_ && index_ <= 5;
+}
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wconversion"
+#endif  // __GNUC__
+
+CONSTCD11
+inline
+weekday_indexed::weekday_indexed(const date::weekday& wd, unsigned index) NOEXCEPT
+    : wd_(static_cast<decltype(wd_)>(static_cast<unsigned>(wd)))
+    , index_(static_cast<decltype(index_)>(index))
+    {}
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic pop
+#endif  // __GNUC__
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const weekday_indexed& wdi)
+{
+    os << wdi.weekday() << '[' << wdi.index();
+    if (!(1 <= wdi.index() && wdi.index() <= 5))
+        os << " is not a valid index";
+    os << ']';
+    return os;
+}
+
+CONSTCD11
+inline
+weekday_indexed
+weekday::operator[](unsigned index) const NOEXCEPT
+{
+    return {*this, index};
+}
+
+CONSTCD11
+inline
+bool
+operator==(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT
+{
+    return x.weekday() == y.weekday() && x.index() == y.index();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT
+{
+    return !(x == y);
+}
+
+// weekday_last
+
+CONSTCD11 inline date::weekday weekday_last::weekday() const NOEXCEPT {return wd_;}
+CONSTCD11 inline bool weekday_last::ok() const NOEXCEPT {return wd_.ok();}
+CONSTCD11 inline weekday_last::weekday_last(const date::weekday& wd) NOEXCEPT : wd_(wd) {}
+
+CONSTCD11
+inline
+bool
+operator==(const weekday_last& x, const weekday_last& y) NOEXCEPT
+{
+    return x.weekday() == y.weekday();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const weekday_last& x, const weekday_last& y) NOEXCEPT
+{
+    return !(x == y);
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const weekday_last& wdl)
+{
+    return os << wdl.weekday() << "[last]";
+}
+
+CONSTCD11
+inline
+weekday_last
+weekday::operator[](last_spec) const NOEXCEPT
+{
+    return weekday_last{*this};
+}
+
+// year_month
+
+CONSTCD11
+inline
+year_month::year_month(const date::year& y, const date::month& m) NOEXCEPT
+    : y_(y)
+    , m_(m)
+    {}
+
+CONSTCD11 inline year year_month::year() const NOEXCEPT {return y_;}
+CONSTCD11 inline month year_month::month() const NOEXCEPT {return m_;}
+CONSTCD11 inline bool year_month::ok() const NOEXCEPT {return y_.ok() && m_.ok();}
+
+CONSTCD14
+inline
+year_month&
+year_month::operator+=(const months& dm) NOEXCEPT
+{
+    *this = *this + dm;
+    return *this;
+}
+
+CONSTCD14
+inline
+year_month&
+year_month::operator-=(const months& dm) NOEXCEPT
+{
+    *this = *this - dm;
+    return *this;
+}
+
+CONSTCD14
+inline
+year_month&
+year_month::operator+=(const years& dy) NOEXCEPT
+{
+    *this = *this + dy;
+    return *this;
+}
+
+CONSTCD14
+inline
+year_month&
+year_month::operator-=(const years& dy) NOEXCEPT
+{
+    *this = *this - dy;
+    return *this;
+}
+
+CONSTCD11
+inline
+bool
+operator==(const year_month& x, const year_month& y) NOEXCEPT
+{
+    return x.year() == y.year() && x.month() == y.month();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const year_month& x, const year_month& y) NOEXCEPT
+{
+    return !(x == y);
+}
+
+CONSTCD11
+inline
+bool
+operator<(const year_month& x, const year_month& y) NOEXCEPT
+{
+    return x.year() < y.year() ? true
+        : (x.year() > y.year() ? false
+        : (x.month() < y.month()));
+}
+
+CONSTCD11
+inline
+bool
+operator>(const year_month& x, const year_month& y) NOEXCEPT
+{
+    return y < x;
+}
+
+CONSTCD11
+inline
+bool
+operator<=(const year_month& x, const year_month& y) NOEXCEPT
+{
+    return !(y < x);
+}
+
+CONSTCD11
+inline
+bool
+operator>=(const year_month& x, const year_month& y) NOEXCEPT
+{
+    return !(x < y);
+}
+
+CONSTCD14
+inline
+year_month
+operator+(const year_month& ym, const months& dm) NOEXCEPT
+{
+    auto dmi = static_cast<int>(static_cast<unsigned>(ym.month())) - 1 + dm.count();
+    auto dy = (dmi >= 0 ? dmi : dmi-11) / 12;
+    dmi = dmi - dy * 12 + 1;
+    return (ym.year() + years(dy)) / month(static_cast<unsigned>(dmi));
+}
+
+CONSTCD14
+inline
+year_month
+operator+(const months& dm, const year_month& ym) NOEXCEPT
+{
+    return ym + dm;
+}
+
+CONSTCD14
+inline
+year_month
+operator-(const year_month& ym, const months& dm) NOEXCEPT
+{
+    return ym + -dm;
+}
+
+CONSTCD11
+inline
+months
+operator-(const year_month& x, const year_month& y) NOEXCEPT
+{
+    return (x.year() - y.year()) +
+            months(static_cast<unsigned>(x.month()) - static_cast<unsigned>(y.month()));
+}
+
+CONSTCD11
+inline
+year_month
+operator+(const year_month& ym, const years& dy) NOEXCEPT
+{
+    return (ym.year() + dy) / ym.month();
+}
+
+CONSTCD11
+inline
+year_month
+operator+(const years& dy, const year_month& ym) NOEXCEPT
+{
+    return ym + dy;
+}
+
+CONSTCD11
+inline
+year_month
+operator-(const year_month& ym, const years& dy) NOEXCEPT
+{
+    return ym + -dy;
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month& ym)
+{
+    return os << ym.year() << '/' << ym.month();
+}
+
+// month_day
+
+CONSTCD11
+inline
+month_day::month_day(const date::month& m, const date::day& d) NOEXCEPT
+    : m_(m)
+    , d_(d)
+    {}
+
+CONSTCD11 inline date::month month_day::month() const NOEXCEPT {return m_;}
+CONSTCD11 inline date::day month_day::day() const NOEXCEPT {return d_;}
+
+CONSTCD14
+inline
+bool
+month_day::ok() const NOEXCEPT
+{
+    CONSTDATA date::day d[] =
+    {
+        date::day(31), date::day(29), date::day(31),
+        date::day(30), date::day(31), date::day(30),
+        date::day(31), date::day(31), date::day(30),
+        date::day(31), date::day(30), date::day(31)
+    };
+    return m_.ok() && date::day{1} <= d_ && d_ <= d[static_cast<unsigned>(m_)-1];
+}
+
+CONSTCD11
+inline
+bool
+operator==(const month_day& x, const month_day& y) NOEXCEPT
+{
+    return x.month() == y.month() && x.day() == y.day();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const month_day& x, const month_day& y) NOEXCEPT
+{
+    return !(x == y);
+}
+
+CONSTCD11
+inline
+bool
+operator<(const month_day& x, const month_day& y) NOEXCEPT
+{
+    return x.month() < y.month() ? true
+        : (x.month() > y.month() ? false
+        : (x.day() < y.day()));
+}
+
+CONSTCD11
+inline
+bool
+operator>(const month_day& x, const month_day& y) NOEXCEPT
+{
+    return y < x;
+}
+
+CONSTCD11
+inline
+bool
+operator<=(const month_day& x, const month_day& y) NOEXCEPT
+{
+    return !(y < x);
+}
+
+CONSTCD11
+inline
+bool
+operator>=(const month_day& x, const month_day& y) NOEXCEPT
+{
+    return !(x < y);
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month_day& md)
+{
+    return os << md.month() << '/' << md.day();
+}
+
+// month_day_last
+
+CONSTCD11 inline month month_day_last::month() const NOEXCEPT {return m_;}
+CONSTCD11 inline bool month_day_last::ok() const NOEXCEPT {return m_.ok();}
+CONSTCD11 inline month_day_last::month_day_last(const date::month& m) NOEXCEPT : m_(m) {}
+
+CONSTCD11
+inline
+bool
+operator==(const month_day_last& x, const month_day_last& y) NOEXCEPT
+{
+    return x.month() == y.month();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const month_day_last& x, const month_day_last& y) NOEXCEPT
+{
+    return !(x == y);
+}
+
+CONSTCD11
+inline
+bool
+operator<(const month_day_last& x, const month_day_last& y) NOEXCEPT
+{
+    return x.month() < y.month();
+}
+
+CONSTCD11
+inline
+bool
+operator>(const month_day_last& x, const month_day_last& y) NOEXCEPT
+{
+    return y < x;
+}
+
+CONSTCD11
+inline
+bool
+operator<=(const month_day_last& x, const month_day_last& y) NOEXCEPT
+{
+    return !(y < x);
+}
+
+CONSTCD11
+inline
+bool
+operator>=(const month_day_last& x, const month_day_last& y) NOEXCEPT
+{
+    return !(x < y);
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month_day_last& mdl)
+{
+    return os << mdl.month() << "/last";
+}
+
+// month_weekday
+
+CONSTCD11
+inline
+month_weekday::month_weekday(const date::month& m,
+                             const date::weekday_indexed& wdi) NOEXCEPT
+    : m_(m)
+    , wdi_(wdi)
+    {}
+
+CONSTCD11 inline month month_weekday::month() const NOEXCEPT {return m_;}
+
+CONSTCD11
+inline
+weekday_indexed
+month_weekday::weekday_indexed() const NOEXCEPT
+{
+    return wdi_;
+}
+
+CONSTCD11
+inline
+bool
+month_weekday::ok() const NOEXCEPT
+{
+    return m_.ok() && wdi_.ok();
+}
+
+CONSTCD11
+inline
+bool
+operator==(const month_weekday& x, const month_weekday& y) NOEXCEPT
+{
+    return x.month() == y.month() && x.weekday_indexed() == y.weekday_indexed();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const month_weekday& x, const month_weekday& y) NOEXCEPT
+{
+    return !(x == y);
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month_weekday& mwd)
+{
+    return os << mwd.month() << '/' << mwd.weekday_indexed();
+}
+
+// month_weekday_last
+
+CONSTCD11
+inline
+month_weekday_last::month_weekday_last(const date::month& m,
+                                       const date::weekday_last& wdl) NOEXCEPT
+    : m_(m)
+    , wdl_(wdl)
+    {}
+
+CONSTCD11 inline month month_weekday_last::month() const NOEXCEPT {return m_;}
+
+CONSTCD11
+inline
+weekday_last
+month_weekday_last::weekday_last() const NOEXCEPT
+{
+    return wdl_;
+}
+
+CONSTCD11
+inline
+bool
+month_weekday_last::ok() const NOEXCEPT
+{
+    return m_.ok() && wdl_.ok();
+}
+
+CONSTCD11
+inline
+bool
+operator==(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT
+{
+    return x.month() == y.month() && x.weekday_last() == y.weekday_last();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT
+{
+    return !(x == y);
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const month_weekday_last& mwdl)
+{
+    return os << mwdl.month() << '/' << mwdl.weekday_last();
+}
+
+// year_month_day_last
+
+CONSTCD11
+inline
+year_month_day_last::year_month_day_last(const date::year& y,
+                                         const date::month_day_last& mdl) NOEXCEPT
+    : y_(y)
+    , mdl_(mdl)
+    {}
+
+CONSTCD14
+inline
+year_month_day_last&
+year_month_day_last::operator+=(const months& m) NOEXCEPT
+{
+    *this = *this + m;
+    return *this;
+}
+
+CONSTCD14
+inline
+year_month_day_last&
+year_month_day_last::operator-=(const months& m) NOEXCEPT
+{
+    *this = *this - m;
+    return *this;
+}
+
+CONSTCD14
+inline
+year_month_day_last&
+year_month_day_last::operator+=(const years& y) NOEXCEPT
+{
+    *this = *this + y;
+    return *this;
+}
+
+CONSTCD14
+inline
+year_month_day_last&
+year_month_day_last::operator-=(const years& y) NOEXCEPT
+{
+    *this = *this - y;
+    return *this;
+}
+
+CONSTCD11 inline year year_month_day_last::year() const NOEXCEPT {return y_;}
+CONSTCD11 inline month year_month_day_last::month() const NOEXCEPT {return mdl_.month();}
+
+CONSTCD11
+inline
+month_day_last
+year_month_day_last::month_day_last() const NOEXCEPT
+{
+    return mdl_;
+}
+
+CONSTCD14
+inline
+day
+year_month_day_last::day() const NOEXCEPT
+{
+    CONSTDATA date::day d[] =
+    {
+        date::day(31), date::day(28), date::day(31),
+        date::day(30), date::day(31), date::day(30),
+        date::day(31), date::day(31), date::day(30),
+        date::day(31), date::day(30), date::day(31)
+    };
+    return month() != feb || !y_.is_leap() ?
+        d[static_cast<unsigned>(month()) - 1] : date::day{29};
+}
+
+CONSTCD14
+inline
+year_month_day_last::operator sys_days() const NOEXCEPT
+{
+    return sys_days(year()/month()/day());
+}
+
+CONSTCD14
+inline
+year_month_day_last::operator local_days() const NOEXCEPT
+{
+    return local_days(year()/month()/day());
+}
+
+CONSTCD11
+inline
+bool
+year_month_day_last::ok() const NOEXCEPT
+{
+    return y_.ok() && mdl_.ok();
+}
+
+CONSTCD11
+inline
+bool
+operator==(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
+{
+    return x.year() == y.year() && x.month_day_last() == y.month_day_last();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
+{
+    return !(x == y);
+}
+
+CONSTCD11
+inline
+bool
+operator<(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
+{
+    return x.year() < y.year() ? true
+        : (x.year() > y.year() ? false
+        : (x.month_day_last() < y.month_day_last()));
+}
+
+CONSTCD11
+inline
+bool
+operator>(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
+{
+    return y < x;
+}
+
+CONSTCD11
+inline
+bool
+operator<=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
+{
+    return !(y < x);
+}
+
+CONSTCD11
+inline
+bool
+operator>=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
+{
+    return !(x < y);
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_day_last& ymdl)
+{
+    return os << ymdl.year() << '/' << ymdl.month_day_last();
+}
+
+CONSTCD14
+inline
+year_month_day_last
+operator+(const year_month_day_last& ymdl, const months& dm) NOEXCEPT
+{
+    return (ymdl.year() / ymdl.month() + dm) / last;
+}
+
+CONSTCD14
+inline
+year_month_day_last
+operator+(const months& dm, const year_month_day_last& ymdl) NOEXCEPT
+{
+    return ymdl + dm;
+}
+
+CONSTCD14
+inline
+year_month_day_last
+operator-(const year_month_day_last& ymdl, const months& dm) NOEXCEPT
+{
+    return ymdl + (-dm);
+}
+
+CONSTCD11
+inline
+year_month_day_last
+operator+(const year_month_day_last& ymdl, const years& dy) NOEXCEPT
+{
+    return {ymdl.year()+dy, ymdl.month_day_last()};
+}
+
+CONSTCD11
+inline
+year_month_day_last
+operator+(const years& dy, const year_month_day_last& ymdl) NOEXCEPT
+{
+    return ymdl + dy;
+}
+
+CONSTCD11
+inline
+year_month_day_last
+operator-(const year_month_day_last& ymdl, const years& dy) NOEXCEPT
+{
+    return ymdl + (-dy);
+}
+
+// year_month_day
+
+CONSTCD11
+inline
+year_month_day::year_month_day(const date::year& y, const date::month& m,
+                               const date::day& d) NOEXCEPT
+    : y_(y)
+    , m_(m)
+    , d_(d)
+    {}
+
+CONSTCD14
+inline
+year_month_day::year_month_day(const year_month_day_last& ymdl) NOEXCEPT
+    : y_(ymdl.year())
+    , m_(ymdl.month())
+    , d_(ymdl.day())
+    {}
+
+CONSTCD14
+inline
+year_month_day::year_month_day(sys_days dp) NOEXCEPT
+    : year_month_day(from_days(dp.time_since_epoch()))
+    {}
+
+CONSTCD14
+inline
+year_month_day::year_month_day(local_days dp) NOEXCEPT
+    : year_month_day(from_days(dp.time_since_epoch()))
+    {}
+
+CONSTCD11 inline year year_month_day::year() const NOEXCEPT {return y_;}
+CONSTCD11 inline month year_month_day::month() const NOEXCEPT {return m_;}
+CONSTCD11 inline day year_month_day::day() const NOEXCEPT {return d_;}
+
+CONSTCD14
+inline
+year_month_day&
+year_month_day::operator+=(const months& m) NOEXCEPT
+{
+    *this = *this + m;
+    return *this;
+}
+
+CONSTCD14
+inline
+year_month_day&
+year_month_day::operator-=(const months& m) NOEXCEPT
+{
+    *this = *this - m;
+    return *this;
+}
+
+CONSTCD14
+inline
+year_month_day&
+year_month_day::operator+=(const years& y) NOEXCEPT
+{
+    *this = *this + y;
+    return *this;
+}
+
+CONSTCD14
+inline
+year_month_day&
+year_month_day::operator-=(const years& y) NOEXCEPT
+{
+    *this = *this - y;
+    return *this;
+}
+
+CONSTCD14
+inline
+days
+year_month_day::to_days() const NOEXCEPT
+{
+    static_assert(std::numeric_limits<unsigned>::digits >= 18,
+             "This algorithm has not been ported to a 16 bit unsigned integer");
+    static_assert(std::numeric_limits<int>::digits >= 20,
+             "This algorithm has not been ported to a 16 bit signed integer");
+    auto const y = static_cast<int>(y_) - (m_ <= feb);
+    auto const m = static_cast<unsigned>(m_);
+    auto const d = static_cast<unsigned>(d_);
+    auto const era = (y >= 0 ? y : y-399) / 400;
+    auto const yoe = static_cast<unsigned>(y - era * 400);       // [0, 399]
+    auto const doy = (153*(m > 2 ? m-3 : m+9) + 2)/5 + d-1;      // [0, 365]
+    auto const doe = yoe * 365 + yoe/4 - yoe/100 + doy;          // [0, 146096]
+    return days{era * 146097 + static_cast<int>(doe) - 719468};
+}
+
+CONSTCD14
+inline
+year_month_day::operator sys_days() const NOEXCEPT
+{
+    return sys_days{to_days()};
+}
+
+CONSTCD14
+inline
+year_month_day::operator local_days() const NOEXCEPT
+{
+    return local_days{to_days()};
+}
+
+CONSTCD14
+inline
+bool
+year_month_day::ok() const NOEXCEPT
+{
+    if (!(y_.ok() && m_.ok()))
+        return false;
+    return date::day{1} <= d_ && d_ <= (y_ / m_ / last).day();
+}
+
+CONSTCD11
+inline
+bool
+operator==(const year_month_day& x, const year_month_day& y) NOEXCEPT
+{
+    return x.year() == y.year() && x.month() == y.month() && x.day() == y.day();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const year_month_day& x, const year_month_day& y) NOEXCEPT
+{
+    return !(x == y);
+}
+
+CONSTCD11
+inline
+bool
+operator<(const year_month_day& x, const year_month_day& y) NOEXCEPT
+{
+    return x.year() < y.year() ? true
+        : (x.year() > y.year() ? false
+        : (x.month() < y.month() ? true
+        : (x.month() > y.month() ? false
+        : (x.day() < y.day()))));
+}
+
+CONSTCD11
+inline
+bool
+operator>(const year_month_day& x, const year_month_day& y) NOEXCEPT
+{
+    return y < x;
+}
+
+CONSTCD11
+inline
+bool
+operator<=(const year_month_day& x, const year_month_day& y) NOEXCEPT
+{
+    return !(y < x);
+}
+
+CONSTCD11
+inline
+bool
+operator>=(const year_month_day& x, const year_month_day& y) NOEXCEPT
+{
+    return !(x < y);
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_day& ymd)
+{
+    detail::save_stream<CharT, Traits> _(os);
+    os.fill('0');
+    os.flags(std::ios::dec | std::ios::right);
+    os << ymd.year() << '-';
+    os.width(2);
+    os << static_cast<unsigned>(ymd.month()) << '-';
+    os << ymd.day();
+    if (!ymd.ok())
+        os << " is not a valid date";
+    return os;
+}
+
+CONSTCD14
+inline
+year_month_day
+year_month_day::from_days(days dp) NOEXCEPT
+{
+    static_assert(std::numeric_limits<unsigned>::digits >= 18,
+             "This algorithm has not been ported to a 16 bit unsigned integer");
+    static_assert(std::numeric_limits<int>::digits >= 20,
+             "This algorithm has not been ported to a 16 bit signed integer");
+    auto const z = dp.count() + 719468;
+    auto const era = (z >= 0 ? z : z - 146096) / 146097;
+    auto const doe = static_cast<unsigned>(z - era * 146097);          // [0, 146096]
+    auto const yoe = (doe - doe/1460 + doe/36524 - doe/146096) / 365;  // [0, 399]
+    auto const y = static_cast<days::rep>(yoe) + era * 400;
+    auto const doy = doe - (365*yoe + yoe/4 - yoe/100);                // [0, 365]
+    auto const mp = (5*doy + 2)/153;                                   // [0, 11]
+    auto const d = doy - (153*mp+2)/5 + 1;                             // [1, 31]
+    auto const m = mp < 10 ? mp+3 : mp-9;                              // [1, 12]
+    return year_month_day{date::year{y + (m <= 2)}, date::month(m), date::day(d)};
+}
+
+CONSTCD14
+inline
+year_month_day
+operator+(const year_month_day& ymd, const months& dm) NOEXCEPT
+{
+    return (ymd.year() / ymd.month() + dm) / ymd.day();
+}
+
+CONSTCD14
+inline
+year_month_day
+operator+(const months& dm, const year_month_day& ymd) NOEXCEPT
+{
+    return ymd + dm;
+}
+
+CONSTCD14
+inline
+year_month_day
+operator-(const year_month_day& ymd, const months& dm) NOEXCEPT
+{
+    return ymd + (-dm);
+}
+
+CONSTCD11
+inline
+year_month_day
+operator+(const year_month_day& ymd, const years& dy) NOEXCEPT
+{
+    return (ymd.year() + dy) / ymd.month() / ymd.day();
+}
+
+CONSTCD11
+inline
+year_month_day
+operator+(const years& dy, const year_month_day& ymd) NOEXCEPT
+{
+    return ymd + dy;
+}
+
+CONSTCD11
+inline
+year_month_day
+operator-(const year_month_day& ymd, const years& dy) NOEXCEPT
+{
+    return ymd + (-dy);
+}
+
+// year_month_weekday
+
+CONSTCD11
+inline
+year_month_weekday::year_month_weekday(const date::year& y, const date::month& m,
+                                       const date::weekday_indexed& wdi)
+        NOEXCEPT
+    : y_(y)
+    , m_(m)
+    , wdi_(wdi)
+    {}
+
+CONSTCD14
+inline
+year_month_weekday::year_month_weekday(const sys_days& dp) NOEXCEPT
+    : year_month_weekday(from_days(dp.time_since_epoch()))
+    {}
+
+CONSTCD14
+inline
+year_month_weekday::year_month_weekday(const local_days& dp) NOEXCEPT
+    : year_month_weekday(from_days(dp.time_since_epoch()))
+    {}
+
+CONSTCD14
+inline
+year_month_weekday&
+year_month_weekday::operator+=(const months& m) NOEXCEPT
+{
+    *this = *this + m;
+    return *this;
+}
+
+CONSTCD14
+inline
+year_month_weekday&
+year_month_weekday::operator-=(const months& m) NOEXCEPT
+{
+    *this = *this - m;
+    return *this;
+}
+
+CONSTCD14
+inline
+year_month_weekday&
+year_month_weekday::operator+=(const years& y) NOEXCEPT
+{
+    *this = *this + y;
+    return *this;
+}
+
+CONSTCD14
+inline
+year_month_weekday&
+year_month_weekday::operator-=(const years& y) NOEXCEPT
+{
+    *this = *this - y;
+    return *this;
+}
+
+CONSTCD11 inline year year_month_weekday::year() const NOEXCEPT {return y_;}
+CONSTCD11 inline month year_month_weekday::month() const NOEXCEPT {return m_;}
+
+CONSTCD11
+inline
+weekday
+year_month_weekday::weekday() const NOEXCEPT
+{
+    return wdi_.weekday();
+}
+
+CONSTCD11
+inline
+unsigned
+year_month_weekday::index() const NOEXCEPT
+{
+    return wdi_.index();
+}
+
+CONSTCD11
+inline
+weekday_indexed
+year_month_weekday::weekday_indexed() const NOEXCEPT
+{
+    return wdi_;
+}
+
+CONSTCD14
+inline
+year_month_weekday::operator sys_days() const NOEXCEPT
+{
+    return sys_days{to_days()};
+}
+
+CONSTCD14
+inline
+year_month_weekday::operator local_days() const NOEXCEPT
+{
+    return local_days{to_days()};
+}
+
+CONSTCD14
+inline
+bool
+year_month_weekday::ok() const NOEXCEPT
+{
+    if (!y_.ok() || !m_.ok() || !wdi_.weekday().ok() || wdi_.index() < 1)
+        return false;
+    if (wdi_.index() <= 4)
+        return true;
+    auto d2 = wdi_.weekday() - date::weekday(static_cast<sys_days>(y_/m_/1)) + days((wdi_.index()-1)*7 + 1);
+    return static_cast<unsigned>(d2.count()) <= static_cast<unsigned>((y_/m_/last).day());
+}
+
+CONSTCD14
+inline
+year_month_weekday
+year_month_weekday::from_days(days d) NOEXCEPT
+{
+    sys_days dp{d};
+    auto const wd = date::weekday(dp);
+    auto const ymd = year_month_day(dp);
+    return {ymd.year(), ymd.month(), wd[(static_cast<unsigned>(ymd.day())-1)/7+1]};
+}
+
+CONSTCD14
+inline
+days
+year_month_weekday::to_days() const NOEXCEPT
+{
+    auto d = sys_days(y_/m_/1);
+    return (d + (wdi_.weekday() - date::weekday(d) + days{(wdi_.index()-1)*7})
+           ).time_since_epoch();
+}
+
+CONSTCD11
+inline
+bool
+operator==(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT
+{
+    return x.year() == y.year() && x.month() == y.month() &&
+           x.weekday_indexed() == y.weekday_indexed();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT
+{
+    return !(x == y);
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_weekday& ymwdi)
+{
+    return os << ymwdi.year() << '/' << ymwdi.month()
+              << '/' << ymwdi.weekday_indexed();
+}
+
+CONSTCD14
+inline
+year_month_weekday
+operator+(const year_month_weekday& ymwd, const months& dm) NOEXCEPT
+{
+    return (ymwd.year() / ymwd.month() + dm) / ymwd.weekday_indexed();
+}
+
+CONSTCD14
+inline
+year_month_weekday
+operator+(const months& dm, const year_month_weekday& ymwd) NOEXCEPT
+{
+    return ymwd + dm;
+}
+
+CONSTCD14
+inline
+year_month_weekday
+operator-(const year_month_weekday& ymwd, const months& dm) NOEXCEPT
+{
+    return ymwd + (-dm);
+}
+
+CONSTCD11
+inline
+year_month_weekday
+operator+(const year_month_weekday& ymwd, const years& dy) NOEXCEPT
+{
+    return {ymwd.year()+dy, ymwd.month(), ymwd.weekday_indexed()};
+}
+
+CONSTCD11
+inline
+year_month_weekday
+operator+(const years& dy, const year_month_weekday& ymwd) NOEXCEPT
+{
+    return ymwd + dy;
+}
+
+CONSTCD11
+inline
+year_month_weekday
+operator-(const year_month_weekday& ymwd, const years& dy) NOEXCEPT
+{
+    return ymwd + (-dy);
+}
+
+// year_month_weekday_last
+
+CONSTCD11
+inline
+year_month_weekday_last::year_month_weekday_last(const date::year& y,
+                                                 const date::month& m,
+                                                 const date::weekday_last& wdl) NOEXCEPT
+    : y_(y)
+    , m_(m)
+    , wdl_(wdl)
+    {}
+
+CONSTCD14
+inline
+year_month_weekday_last&
+year_month_weekday_last::operator+=(const months& m) NOEXCEPT
+{
+    *this = *this + m;
+    return *this;
+}
+
+CONSTCD14
+inline
+year_month_weekday_last&
+year_month_weekday_last::operator-=(const months& m) NOEXCEPT
+{
+    *this = *this - m;
+    return *this;
+}
+
+CONSTCD14
+inline
+year_month_weekday_last&
+year_month_weekday_last::operator+=(const years& y) NOEXCEPT
+{
+    *this = *this + y;
+    return *this;
+}
+
+CONSTCD14
+inline
+year_month_weekday_last&
+year_month_weekday_last::operator-=(const years& y) NOEXCEPT
+{
+    *this = *this - y;
+    return *this;
+}
+
+CONSTCD11 inline year year_month_weekday_last::year() const NOEXCEPT {return y_;}
+CONSTCD11 inline month year_month_weekday_last::month() const NOEXCEPT {return m_;}
+
+CONSTCD11
+inline
+weekday
+year_month_weekday_last::weekday() const NOEXCEPT
+{
+    return wdl_.weekday();
+}
+
+CONSTCD11
+inline
+weekday_last
+year_month_weekday_last::weekday_last() const NOEXCEPT
+{
+    return wdl_;
+}
+
+CONSTCD14
+inline
+year_month_weekday_last::operator sys_days() const NOEXCEPT
+{
+    return sys_days{to_days()};
+}
+
+CONSTCD14
+inline
+year_month_weekday_last::operator local_days() const NOEXCEPT
+{
+    return local_days{to_days()};
+}
+
+CONSTCD11
+inline
+bool
+year_month_weekday_last::ok() const NOEXCEPT
+{
+    return y_.ok() && m_.ok() && wdl_.ok();
+}
+
+CONSTCD14
+inline
+days
+year_month_weekday_last::to_days() const NOEXCEPT
+{
+    auto const d = sys_days(y_/m_/last);
+    return (d - (date::weekday{d} - wdl_.weekday())).time_since_epoch();
+}
+
+CONSTCD11
+inline
+bool
+operator==(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT
+{
+    return x.year() == y.year() && x.month() == y.month() &&
+           x.weekday_last() == y.weekday_last();
+}
+
+CONSTCD11
+inline
+bool
+operator!=(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT
+{
+    return !(x == y);
+}
+
+template<class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_weekday_last& ymwdl)
+{
+    return os << ymwdl.year() << '/' << ymwdl.month() << '/' << ymwdl.weekday_last();
+}
+
+CONSTCD14
+inline
+year_month_weekday_last
+operator+(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT
+{
+    return (ymwdl.year() / ymwdl.month() + dm) / ymwdl.weekday_last();
+}
+
+CONSTCD14
+inline
+year_month_weekday_last
+operator+(const months& dm, const year_month_weekday_last& ymwdl) NOEXCEPT
+{
+    return ymwdl + dm;
+}
+
+CONSTCD14
+inline
+year_month_weekday_last
+operator-(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT
+{
+    return ymwdl + (-dm);
+}
+
+CONSTCD11
+inline
+year_month_weekday_last
+operator+(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT
+{
+    return {ymwdl.year()+dy, ymwdl.month(), ymwdl.weekday_last()};
+}
+
+CONSTCD11
+inline
+year_month_weekday_last
+operator+(const years& dy, const year_month_weekday_last& ymwdl) NOEXCEPT
+{
+    return ymwdl + dy;
+}
+
+CONSTCD11
+inline
+year_month_weekday_last
+operator-(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT
+{
+    return ymwdl + (-dy);
+}
+
+// year_month from operator/()
+
+CONSTCD11
+inline
+year_month
+operator/(const year& y, const month& m) NOEXCEPT
+{
+    return {y, m};
+}
+
+CONSTCD11
+inline
+year_month
+operator/(const year& y, int   m) NOEXCEPT
+{
+    return y / month(static_cast<unsigned>(m));
+}
+
+// month_day from operator/()
+
+CONSTCD11
+inline
+month_day
+operator/(const month& m, const day& d) NOEXCEPT
+{
+    return {m, d};
+}
+
+CONSTCD11
+inline
+month_day
+operator/(const day& d, const month& m) NOEXCEPT
+{
+    return m / d;
+}
+
+CONSTCD11
+inline
+month_day
+operator/(const month& m, int d) NOEXCEPT
+{
+    return m / day(static_cast<unsigned>(d));
+}
+
+CONSTCD11
+inline
+month_day
+operator/(int m, const day& d) NOEXCEPT
+{
+    return month(static_cast<unsigned>(m)) / d;
+}
+
+CONSTCD11 inline month_day operator/(const day& d, int m) NOEXCEPT {return m / d;}
+
+// month_day_last from operator/()
+
+CONSTCD11
+inline
+month_day_last
+operator/(const month& m, last_spec) NOEXCEPT
+{
+    return month_day_last{m};
+}
+
+CONSTCD11
+inline
+month_day_last
+operator/(last_spec, const month& m) NOEXCEPT
+{
+    return m/last;
+}
+
+CONSTCD11
+inline
+month_day_last
+operator/(int m, last_spec) NOEXCEPT
+{
+    return month(static_cast<unsigned>(m))/last;
+}
+
+CONSTCD11
+inline
+month_day_last
+operator/(last_spec, int m) NOEXCEPT
+{
+    return m/last;
+}
+
+// month_weekday from operator/()
+
+CONSTCD11
+inline
+month_weekday
+operator/(const month& m, const weekday_indexed& wdi) NOEXCEPT
+{
+    return {m, wdi};
+}
+
+CONSTCD11
+inline
+month_weekday
+operator/(const weekday_indexed& wdi, const month& m) NOEXCEPT
+{
+    return m / wdi;
+}
+
+CONSTCD11
+inline
+month_weekday
+operator/(int m, const weekday_indexed& wdi) NOEXCEPT
+{
+    return month(static_cast<unsigned>(m)) / wdi;
+}
+
+CONSTCD11
+inline
+month_weekday
+operator/(const weekday_indexed& wdi, int m) NOEXCEPT
+{
+    return m / wdi;
+}
+
+// month_weekday_last from operator/()
+
+CONSTCD11
+inline
+month_weekday_last
+operator/(const month& m, const weekday_last& wdl) NOEXCEPT
+{
+    return {m, wdl};
+}
+
+CONSTCD11
+inline
+month_weekday_last
+operator/(const weekday_last& wdl, const month& m) NOEXCEPT
+{
+    return m / wdl;
+}
+
+CONSTCD11
+inline
+month_weekday_last
+operator/(int m, const weekday_last& wdl) NOEXCEPT
+{
+    return month(static_cast<unsigned>(m)) / wdl;
+}
+
+CONSTCD11
+inline
+month_weekday_last
+operator/(const weekday_last& wdl, int m) NOEXCEPT
+{
+    return m / wdl;
+}
+
+// year_month_day from operator/()
+
+CONSTCD11
+inline
+year_month_day
+operator/(const year_month& ym, const day& d) NOEXCEPT
+{
+    return {ym.year(), ym.month(), d};
+}
+
+CONSTCD11
+inline
+year_month_day
+operator/(const year_month& ym, int d)  NOEXCEPT
+{
+    return ym / day(static_cast<unsigned>(d));
+}
+
+CONSTCD11
+inline
+year_month_day
+operator/(const year& y, const month_day& md) NOEXCEPT
+{
+    return y / md.month() / md.day();
+}
+
+CONSTCD11
+inline
+year_month_day
+operator/(int y, const month_day& md) NOEXCEPT
+{
+    return year(y) / md;
+}
+
+CONSTCD11
+inline
+year_month_day
+operator/(const month_day& md, const year& y)  NOEXCEPT
+{
+    return y / md;
+}
+
+CONSTCD11
+inline
+year_month_day
+operator/(const month_day& md, int y) NOEXCEPT
+{
+    return year(y) / md;
+}
+
+// year_month_day_last from operator/()
+
+CONSTCD11
+inline
+year_month_day_last
+operator/(const year_month& ym, last_spec) NOEXCEPT
+{
+    return {ym.year(), month_day_last{ym.month()}};
+}
+
+CONSTCD11
+inline
+year_month_day_last
+operator/(const year& y, const month_day_last& mdl) NOEXCEPT
+{
+    return {y, mdl};
+}
+
+CONSTCD11
+inline
+year_month_day_last
+operator/(int y, const month_day_last& mdl) NOEXCEPT
+{
+    return year(y) / mdl;
+}
+
+CONSTCD11
+inline
+year_month_day_last
+operator/(const month_day_last& mdl, const year& y) NOEXCEPT
+{
+    return y / mdl;
+}
+
+CONSTCD11
+inline
+year_month_day_last
+operator/(const month_day_last& mdl, int y) NOEXCEPT
+{
+    return year(y) / mdl;
+}
+
+// year_month_weekday from operator/()
+
+CONSTCD11
+inline
+year_month_weekday
+operator/(const year_month& ym, const weekday_indexed& wdi) NOEXCEPT
+{
+    return {ym.year(), ym.month(), wdi};
+}
+
+CONSTCD11
+inline
+year_month_weekday
+operator/(const year& y, const month_weekday& mwd) NOEXCEPT
+{
+    return {y, mwd.month(), mwd.weekday_indexed()};
+}
+
+CONSTCD11
+inline
+year_month_weekday
+operator/(int y, const month_weekday& mwd) NOEXCEPT
+{
+    return year(y) / mwd;
+}
+
+CONSTCD11
+inline
+year_month_weekday
+operator/(const month_weekday& mwd, const year& y) NOEXCEPT
+{
+    return y / mwd;
+}
+
+CONSTCD11
+inline
+year_month_weekday
+operator/(const month_weekday& mwd, int y) NOEXCEPT
+{
+    return year(y) / mwd;
+}
+
+// year_month_weekday_last from operator/()
+
+CONSTCD11
+inline
+year_month_weekday_last
+operator/(const year_month& ym, const weekday_last& wdl) NOEXCEPT
+{
+    return {ym.year(), ym.month(), wdl};
+}
+
+CONSTCD11
+inline
+year_month_weekday_last
+operator/(const year& y, const month_weekday_last& mwdl) NOEXCEPT
+{
+    return {y, mwdl.month(), mwdl.weekday_last()};
+}
+
+CONSTCD11
+inline
+year_month_weekday_last
+operator/(int y, const month_weekday_last& mwdl) NOEXCEPT
+{
+    return year(y) / mwdl;
+}
+
+CONSTCD11
+inline
+year_month_weekday_last
+operator/(const month_weekday_last& mwdl, const year& y) NOEXCEPT
+{
+    return y / mwdl;
+}
+
+CONSTCD11
+inline
+year_month_weekday_last
+operator/(const month_weekday_last& mwdl, int y) NOEXCEPT
+{
+    return year(y) / mwdl;
+}
+
+template <class Duration>
+struct fields;
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+          const fields<Duration>& fds, const std::string* abbrev = nullptr,
+          const std::chrono::seconds* offset_sec = nullptr);
+
+template <class CharT, class Traits, class Duration, class Alloc>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+            fields<Duration>& fds, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+            std::chrono::minutes* offset = nullptr);
+
+// time_of_day
+
+enum {am = 1, pm};
+
+namespace detail
+{
+
+// width<n>::value is the number of fractional decimal digits in 1/n
+// width<0>::value and width<1>::value are defined to be 0
+// If 1/n takes more than 18 fractional decimal digits,
+//   the result is truncated to 19.
+// Example:  width<2>::value    ==  1
+// Example:  width<3>::value    == 19
+// Example:  width<4>::value    ==  2
+// Example:  width<10>::value   ==  1
+// Example:  width<1000>::value ==  3
+template <std::uint64_t n, std::uint64_t d = 10, unsigned w = 0,
+          bool should_continue = !(n < 2) && d != 0 && (w < 19)>
+struct width
+{
+    static CONSTDATA unsigned value = 1 + width<n, d%n*10, w+1>::value;
+};
+
+template <std::uint64_t n, std::uint64_t d, unsigned w>
+struct width<n, d, w, false>
+{
+    static CONSTDATA unsigned value = 0;
+};
+
+template <unsigned exp>
+struct static_pow10
+{
+private:
+    static CONSTDATA std::uint64_t h = static_pow10<exp/2>::value;
+public:
+    static CONSTDATA std::uint64_t value = h * h * (exp % 2 ? 10 : 1);
+};
+
+template <>
+struct static_pow10<0>
+{
+    static CONSTDATA std::uint64_t value = 1;
+};
+
+template <class Rep, unsigned w, bool in_range = (w < 19)>
+struct make_precision
+{
+    using type = std::chrono::duration<Rep,
+                                       std::ratio<1, static_pow10<w>::value>>;
+    static CONSTDATA unsigned width = w;
+};
+
+template <class Rep, unsigned w>
+struct make_precision<Rep, w, false>
+{
+    using type = std::chrono::duration<Rep, std::micro>;
+    static CONSTDATA unsigned width = 6;
+};
+
+template <class Duration,
+          unsigned w = width<std::common_type<
+                                 Duration,
+                                 std::chrono::seconds>::type::period::den>::value>
+class decimal_format_seconds
+{
+public:
+    using rep = typename std::common_type<Duration, std::chrono::seconds>::type::rep;
+    using precision = typename make_precision<rep, w>::type;
+    static auto CONSTDATA width = make_precision<rep, w>::width;
+
+private:
+    std::chrono::seconds s_;
+    precision            sub_s_;
+
+public:
+    CONSTCD11 decimal_format_seconds()
+        : s_()
+        , sub_s_()
+        {}
+
+    CONSTCD11 explicit decimal_format_seconds(const Duration& d) NOEXCEPT
+        : s_(std::chrono::duration_cast<std::chrono::seconds>(d))
+        , sub_s_(std::chrono::duration_cast<precision>(d - s_))
+        {}
+
+    CONSTCD14 std::chrono::seconds& seconds() NOEXCEPT {return s_;}
+    CONSTCD11 std::chrono::seconds seconds() const NOEXCEPT {return s_;}
+    CONSTCD11 precision subseconds() const NOEXCEPT {return sub_s_;}
+
+    CONSTCD14 precision to_duration() const NOEXCEPT
+    {
+        return s_ + sub_s_;
+    }
+
+    CONSTCD11 bool in_conventional_range() const NOEXCEPT
+    {
+        using namespace std::chrono;
+        return sub_s_ < std::chrono::seconds{1} && s_ < minutes{1};
+    }
+
+    template <class CharT, class Traits>
+    friend
+    std::basic_ostream<CharT, Traits>&
+    operator<<(std::basic_ostream<CharT, Traits>& os, const decimal_format_seconds& x)
+    {
+        date::detail::save_stream<CharT, Traits> _(os);
+        os.fill('0');
+        os.flags(std::ios::dec | std::ios::right);
+        os.width(2);
+        os << x.s_.count() <<
+              std::use_facet<std::numpunct<char>>(os.getloc()).decimal_point();
+        os.width(width);
+        os << static_cast<std::int64_t>(x.sub_s_.count());
+        return os;
+    }
+};
+
+template <class Duration>
+class decimal_format_seconds<Duration, 0>
+{
+    static CONSTDATA unsigned w = 0;
+public:
+    using rep = typename std::common_type<Duration, std::chrono::seconds>::type::rep;
+    using precision = std::chrono::duration<rep>;
+    static auto CONSTDATA width = make_precision<rep, w>::width;
+private:
+
+    std::chrono::seconds s_;
+
+public:
+    CONSTCD11 decimal_format_seconds() : s_() {}
+    CONSTCD11 explicit decimal_format_seconds(const precision& s) NOEXCEPT
+        : s_(s)
+        {}
+
+    CONSTCD14 std::chrono::seconds& seconds() NOEXCEPT {return s_;}
+    CONSTCD11 std::chrono::seconds seconds() const NOEXCEPT {return s_;}
+    CONSTCD14 precision to_duration() const NOEXCEPT {return s_;}
+
+    CONSTCD11 bool in_conventional_range() const NOEXCEPT
+    {
+        using namespace std::chrono;
+        return s_ < minutes{1};
+    }
+
+    template <class CharT, class Traits>
+    friend
+    std::basic_ostream<CharT, Traits>&
+    operator<<(std::basic_ostream<CharT, Traits>& os, const decimal_format_seconds& x)
+    {
+        date::detail::save_stream<CharT, Traits> _(os);
+        os.fill('0');
+        os.flags(std::ios::dec | std::ios::right);
+        os.width(2);
+        os << x.s_.count();
+        return os;
+    }
+};
+
+enum class classify
+{
+    not_valid,
+    hour,
+    minute,
+    second,
+    subsecond
+};
+
+template <class Duration>
+struct classify_duration
+{
+    static CONSTDATA classify value =
+        std::is_convertible<Duration, std::chrono::hours>::value
+                ? classify::hour :
+        std::is_convertible<Duration, std::chrono::minutes>::value
+                ? classify::minute :
+        std::is_convertible<Duration, std::chrono::seconds>::value
+                ? classify::second :
+        std::chrono::treat_as_floating_point<typename Duration::rep>::value
+                ? classify::not_valid :
+                classify::subsecond;
+};
+
+template <class Rep, class Period>
+inline
+CONSTCD11
+typename std::enable_if
+         <
+            std::numeric_limits<Rep>::is_signed,
+            std::chrono::duration<Rep, Period>
+         >::type
+abs(std::chrono::duration<Rep, Period> d)
+{
+    return d >= d.zero() ? d : -d;
+}
+
+template <class Rep, class Period>
+inline
+CONSTCD11
+typename std::enable_if
+         <
+            !std::numeric_limits<Rep>::is_signed,
+            std::chrono::duration<Rep, Period>
+         >::type
+abs(std::chrono::duration<Rep, Period> d)
+{
+    return d;
+}
+
+class time_of_day_base
+{
+protected:
+    std::chrono::hours   h_;
+    unsigned char mode_;
+    bool          neg_;
+
+    enum {is24hr};
+
+    CONSTCD11 time_of_day_base() NOEXCEPT
+        : h_(0)
+        , mode_(static_cast<decltype(mode_)>(is24hr))
+        , neg_(false)
+        {}
+
+
+    CONSTCD11 time_of_day_base(std::chrono::hours h, bool neg, unsigned m) NOEXCEPT
+        : h_(detail::abs(h))
+        , mode_(static_cast<decltype(mode_)>(m))
+        , neg_(neg)
+        {}
+
+    CONSTCD14 void make24() NOEXCEPT;
+    CONSTCD14 void make12() NOEXCEPT;
+
+    CONSTCD14 std::chrono::hours to24hr() const;
+
+    CONSTCD11 bool in_conventional_range() const NOEXCEPT
+    {
+        return !neg_ && h_ < days{1};
+    }
+};
+
+CONSTCD14
+inline
+std::chrono::hours
+time_of_day_base::to24hr() const
+{
+    auto h = h_;
+    if (mode_ == am || mode_ == pm)
+    {
+        CONSTDATA auto h12 = std::chrono::hours(12);
+        if (mode_ == pm)
+        {
+            if (h != h12)
+                h = h + h12;
+        }
+        else if (h == h12)
+            h = std::chrono::hours(0);
+    }
+    return h;
+}
+
+CONSTCD14
+inline
+void
+time_of_day_base::make24() NOEXCEPT
+{
+    h_ = to24hr();
+    mode_ = is24hr;
+}
+
+CONSTCD14
+inline
+void
+time_of_day_base::make12() NOEXCEPT
+{
+    if (mode_ == is24hr)
+    {
+        CONSTDATA auto h12 = std::chrono::hours(12);
+        if (h_ >= h12)
+        {
+            if (h_ > h12)
+                h_ = h_ - h12;
+            mode_ = pm;
+        }
+        else
+        {
+            if (h_ == std::chrono::hours(0))
+                h_ = h12;
+            mode_ = am;
+        }
+    }
+}
+
+template <class Duration, detail::classify = detail::classify_duration<Duration>::value>
+class time_of_day_storage;
+
+template <class Rep, class Period>
+class time_of_day_storage<std::chrono::duration<Rep, Period>, detail::classify::hour>
+    : private detail::time_of_day_base
+{
+    using base = detail::time_of_day_base;
+
+public:
+    using precision = std::chrono::hours;
+
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+    CONSTCD11 time_of_day_storage() NOEXCEPT = default;
+#else
+    CONSTCD11 time_of_day_storage() = default;
+#endif /* !defined(_MSC_VER) || _MSC_VER >= 1900 */
+
+    CONSTCD11 explicit time_of_day_storage(std::chrono::hours since_midnight) NOEXCEPT
+        : base(since_midnight, since_midnight < std::chrono::hours{0}, is24hr)
+        {}
+
+    CONSTCD11 explicit time_of_day_storage(std::chrono::hours h, unsigned md) NOEXCEPT
+        : base(h, h < std::chrono::hours{0}, md)
+        {}
+
+    CONSTCD11 std::chrono::hours hours() const NOEXCEPT {return h_;}
+    CONSTCD11 unsigned mode() const NOEXCEPT {return mode_;}
+
+    CONSTCD14 explicit operator precision() const NOEXCEPT
+    {
+        auto p = to24hr();
+        if (neg_)
+            p = -p;
+        return p;
+    }
+
+    CONSTCD14 precision to_duration() const NOEXCEPT
+    {
+        return static_cast<precision>(*this);
+    }
+
+    CONSTCD14 time_of_day_storage& make24() NOEXCEPT {base::make24(); return *this;}
+    CONSTCD14 time_of_day_storage& make12() NOEXCEPT {base::make12(); return *this;}
+
+    CONSTCD11 bool in_conventional_range() const NOEXCEPT
+    {
+        return base::in_conventional_range();
+    }
+
+    template<class CharT, class Traits>
+    friend
+    std::basic_ostream<CharT, Traits>&
+    operator<<(std::basic_ostream<CharT, Traits>& os, const time_of_day_storage& t)
+    {
+        using namespace std;
+        detail::save_stream<CharT, Traits> _(os);
+        if (t.neg_)
+            os << '-';
+        os.fill('0');
+        os.flags(std::ios::dec | std::ios::right);
+        if (t.mode_ != am && t.mode_ != pm)
+            os.width(2);
+        os << t.h_.count();
+        switch (t.mode_)
+        {
+        case time_of_day_storage::is24hr:
+            os << "00";
+            break;
+        case am:
+            os << "am";
+            break;
+        case pm:
+            os << "pm";
+            break;
+        }
+        return os;
+    }
+};
+
+template <class Rep, class Period>
+class time_of_day_storage<std::chrono::duration<Rep, Period>, detail::classify::minute>
+    : private detail::time_of_day_base
+{
+    using base = detail::time_of_day_base;
+
+    std::chrono::minutes m_;
+
+public:
+   using precision = std::chrono::minutes;
+
+   CONSTCD11 time_of_day_storage() NOEXCEPT
+        : base()
+        , m_(0)
+        {}
+
+   CONSTCD11 explicit time_of_day_storage(std::chrono::minutes since_midnight) NOEXCEPT
+        : base(std::chrono::duration_cast<std::chrono::hours>(since_midnight),
+               since_midnight < std::chrono::minutes{0}, is24hr)
+        , m_(detail::abs(since_midnight) - h_)
+        {}
+
+    CONSTCD11 explicit time_of_day_storage(std::chrono::hours h, std::chrono::minutes m,
+                                           unsigned md) NOEXCEPT
+        : base(h, false, md)
+        , m_(m)
+        {}
+
+    CONSTCD11 std::chrono::hours hours() const NOEXCEPT {return h_;}
+    CONSTCD11 std::chrono::minutes minutes() const NOEXCEPT {return m_;}
+    CONSTCD11 unsigned mode() const NOEXCEPT {return mode_;}
+
+    CONSTCD14 explicit operator precision() const NOEXCEPT
+    {
+        auto p = to24hr() + m_;
+        if (neg_)
+            p = -p;
+        return p;
+    }
+
+    CONSTCD14 precision to_duration() const NOEXCEPT
+    {
+        return static_cast<precision>(*this);
+    }
+
+    CONSTCD14 time_of_day_storage& make24() NOEXCEPT {base::make24(); return *this;}
+    CONSTCD14 time_of_day_storage& make12() NOEXCEPT {base::make12(); return *this;}
+
+    CONSTCD11 bool in_conventional_range() const NOEXCEPT
+    {
+        return base::in_conventional_range() && m_ < std::chrono::hours{1};
+    }
+
+    template<class CharT, class Traits>
+    friend
+    std::basic_ostream<CharT, Traits>&
+    operator<<(std::basic_ostream<CharT, Traits>& os, const time_of_day_storage& t)
+    {
+        using namespace std;
+        detail::save_stream<CharT, Traits> _(os);
+        if (t.neg_)
+            os << '-';
+        os.fill('0');
+        os.flags(std::ios::dec | std::ios::right);
+        if (t.mode_ != am && t.mode_ != pm)
+            os.width(2);
+        os << t.h_.count() << ':';
+        os.width(2);
+        os << t.m_.count();
+        switch (t.mode_)
+        {
+        case am:
+            os << "am";
+            break;
+        case pm:
+            os << "pm";
+            break;
+        }
+        return os;
+    }
+};
+
+template <class Rep, class Period>
+class time_of_day_storage<std::chrono::duration<Rep, Period>, detail::classify::second>
+    : private detail::time_of_day_base
+{
+    using base = detail::time_of_day_base;
+    using dfs = decimal_format_seconds<std::chrono::seconds>;
+
+    std::chrono::minutes m_;
+    dfs                  s_;
+
+public:
+    using precision = std::chrono::seconds;
+
+    CONSTCD11 time_of_day_storage() NOEXCEPT
+        : base()
+        , m_(0)
+        , s_()
+        {}
+
+    CONSTCD11 explicit time_of_day_storage(std::chrono::seconds since_midnight) NOEXCEPT
+        : base(std::chrono::duration_cast<std::chrono::hours>(since_midnight),
+               since_midnight < std::chrono::seconds{0}, is24hr)
+        , m_(std::chrono::duration_cast<std::chrono::minutes>(detail::abs(since_midnight) - h_))
+        , s_(detail::abs(since_midnight) - h_ - m_)
+        {}
+
+    CONSTCD11 explicit time_of_day_storage(std::chrono::hours h, std::chrono::minutes m,
+                                           std::chrono::seconds s, unsigned md) NOEXCEPT
+        : base(h, false, md)
+        , m_(m)
+        , s_(s)
+        {}
+
+    CONSTCD11 std::chrono::hours hours() const NOEXCEPT {return h_;}
+    CONSTCD11 std::chrono::minutes minutes() const NOEXCEPT {return m_;}
+    CONSTCD14 std::chrono::seconds& seconds() NOEXCEPT {return s_.seconds();}
+    CONSTCD11 std::chrono::seconds seconds() const NOEXCEPT {return s_.seconds();}
+    CONSTCD11 unsigned mode() const NOEXCEPT {return mode_;}
+
+    CONSTCD14 explicit operator precision() const NOEXCEPT
+    {
+        auto p = to24hr() + s_.to_duration() + m_;
+        if (neg_)
+            p = -p;
+        return p;
+    }
+
+    CONSTCD14 precision to_duration() const NOEXCEPT
+    {
+        return static_cast<precision>(*this);
+    }
+
+    CONSTCD14 time_of_day_storage& make24() NOEXCEPT {base::make24(); return *this;}
+    CONSTCD14 time_of_day_storage& make12() NOEXCEPT {base::make12(); return *this;}
+
+    CONSTCD11 bool in_conventional_range() const NOEXCEPT
+    {
+        return base::in_conventional_range() && m_ < std::chrono::hours{1} &&
+                                                s_.in_conventional_range();
+    }
+
+    template<class CharT, class Traits>
+    friend
+    std::basic_ostream<CharT, Traits>&
+    operator<<(std::basic_ostream<CharT, Traits>& os, const time_of_day_storage& t)
+    {
+        using namespace std;
+        detail::save_stream<CharT, Traits> _(os);
+        if (t.neg_)
+            os << '-';
+        os.fill('0');
+        os.flags(std::ios::dec | std::ios::right);
+        if (t.mode_ != am && t.mode_ != pm)
+            os.width(2);
+        os << t.h_.count() << ':';
+        os.width(2);
+        os << t.m_.count() << ':' << t.s_;
+        switch (t.mode_)
+        {
+        case am:
+            os << "am";
+            break;
+        case pm:
+            os << "pm";
+            break;
+        }
+        return os;
+    }
+
+    template <class CharT, class Traits, class Duration>
+    friend
+    std::basic_ostream<CharT, Traits>&
+    date::to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+          const fields<Duration>& fds, const std::string* abbrev,
+          const std::chrono::seconds* offset_sec);
+
+    template <class CharT, class Traits, class Duration, class Alloc>
+    friend
+    std::basic_istream<CharT, Traits>&
+    date::from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+          fields<Duration>& fds,
+          std::basic_string<CharT, Traits, Alloc>* abbrev, std::chrono::minutes* offset);
+};
+
+template <class Rep, class Period>
+class time_of_day_storage<std::chrono::duration<Rep, Period>, detail::classify::subsecond>
+    : private detail::time_of_day_base
+{
+public:
+    using Duration = std::chrono::duration<Rep, Period>;
+    using dfs = decimal_format_seconds<typename std::common_type<Duration,
+                                       std::chrono::seconds>::type>;
+    using precision = typename dfs::precision;
+
+private:
+    using base = detail::time_of_day_base;
+
+    std::chrono::minutes m_;
+    dfs                  s_;
+
+public:
+    CONSTCD11 time_of_day_storage() NOEXCEPT
+        : base()
+        , m_(0)
+        , s_()
+        {}
+
+    CONSTCD11 explicit time_of_day_storage(Duration since_midnight) NOEXCEPT
+        : base(date::trunc<std::chrono::hours>(since_midnight),
+               since_midnight < Duration{0}, is24hr)
+        , m_(date::trunc<std::chrono::minutes>(detail::abs(since_midnight) - h_))
+        , s_(detail::abs(since_midnight) - h_ - m_)
+        {}
+
+    CONSTCD11 explicit time_of_day_storage(std::chrono::hours h, std::chrono::minutes m,
+                                           std::chrono::seconds s, precision sub_s,
+                                           unsigned md) NOEXCEPT
+        : base(h, false, md)
+        , m_(m)
+        , s_(s + sub_s)
+        {}
+
+    CONSTCD11 std::chrono::hours hours() const NOEXCEPT {return h_;}
+    CONSTCD11 std::chrono::minutes minutes() const NOEXCEPT {return m_;}
+    CONSTCD14 std::chrono::seconds& seconds() NOEXCEPT {return s_.seconds();}
+    CONSTCD11 std::chrono::seconds seconds() const NOEXCEPT {return s_.seconds();}
+    CONSTCD11 precision subseconds() const NOEXCEPT {return s_.subseconds();}
+    CONSTCD11 unsigned mode() const NOEXCEPT {return mode_;}
+
+    CONSTCD14 explicit operator precision() const NOEXCEPT
+    {
+        auto p = to24hr() + s_.to_duration() + m_;
+        if (neg_)
+            p = -p;
+        return p;
+    }
+
+    CONSTCD14 precision to_duration() const NOEXCEPT
+    {
+        return static_cast<precision>(*this);
+    }
+
+    CONSTCD14 time_of_day_storage& make24() NOEXCEPT {base::make24(); return *this;}
+    CONSTCD14 time_of_day_storage& make12() NOEXCEPT {base::make12(); return *this;}
+
+    CONSTCD11 bool in_conventional_range() const NOEXCEPT
+    {
+        return base::in_conventional_range() && m_ < std::chrono::hours{1} &&
+                                                s_.in_conventional_range();
+    }
+
+    template<class CharT, class Traits>
+    friend
+    std::basic_ostream<CharT, Traits>&
+    operator<<(std::basic_ostream<CharT, Traits>& os, const time_of_day_storage& t)
+    {
+        using namespace std;
+        detail::save_stream<CharT, Traits> _(os);
+        if (t.neg_)
+            os << '-';
+        os.fill('0');
+        os.flags(std::ios::dec | std::ios::right);
+        if (t.mode_ != am && t.mode_ != pm)
+            os.width(2);
+        os << t.h_.count() << ':';
+        os.width(2);
+        os << t.m_.count() << ':' << t.s_;
+        switch (t.mode_)
+        {
+        case am:
+            os << "am";
+            break;
+        case pm:
+            os << "pm";
+            break;
+        }
+        return os;
+    }
+
+    template <class CharT, class Traits, class Duration>
+    friend
+    std::basic_ostream<CharT, Traits>&
+    date::to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+          const fields<Duration>& fds, const std::string* abbrev,
+          const std::chrono::seconds* offset_sec);
+
+    template <class CharT, class Traits, class Duration, class Alloc>
+    friend
+    std::basic_istream<CharT, Traits>&
+    date::from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+          fields<Duration>& fds,
+          std::basic_string<CharT, Traits, Alloc>* abbrev, std::chrono::minutes* offset);
+};
+
+}  // namespace detail
+
+template <class Duration>
+class time_of_day
+    : public detail::time_of_day_storage<Duration>
+{
+    using base = detail::time_of_day_storage<Duration>;
+public:
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+    CONSTCD11 time_of_day() NOEXCEPT = default;
+#else
+    CONSTCD11 time_of_day() = default;
+#endif /* !defined(_MSC_VER) || _MSC_VER >= 1900 */
+
+    CONSTCD11 explicit time_of_day(Duration since_midnight) NOEXCEPT
+        : base(since_midnight)
+        {}
+
+    template <class Arg0, class Arg1, class ...Args>
+    CONSTCD11
+    explicit time_of_day(Arg0&& arg0, Arg1&& arg1, Args&& ...args) NOEXCEPT
+        : base(std::forward<Arg0>(arg0), std::forward<Arg1>(arg1), std::forward<Args>(args)...)
+        {}
+};
+
+template <class Rep, class Period,
+          class = typename std::enable_if
+              <!std::chrono::treat_as_floating_point<Rep>::value>::type>
+CONSTCD11
+inline
+time_of_day<std::chrono::duration<Rep, Period>>
+make_time(const std::chrono::duration<Rep, Period>& d)
+{
+    return time_of_day<std::chrono::duration<Rep, Period>>(d);
+}
+
+CONSTCD11
+inline
+time_of_day<std::chrono::hours>
+make_time(const std::chrono::hours& h, unsigned md)
+{
+    return time_of_day<std::chrono::hours>(h, md);
+}
+
+CONSTCD11
+inline
+time_of_day<std::chrono::minutes>
+make_time(const std::chrono::hours& h, const std::chrono::minutes& m,
+          unsigned md)
+{
+    return time_of_day<std::chrono::minutes>(h, m, md);
+}
+
+CONSTCD11
+inline
+time_of_day<std::chrono::seconds>
+make_time(const std::chrono::hours& h, const std::chrono::minutes& m,
+          const std::chrono::seconds& s, unsigned md)
+{
+    return time_of_day<std::chrono::seconds>(h, m, s, md);
+}
+
+template <class Rep, class Period,
+          class = typename std::enable_if<std::ratio_less<Period,
+                                                          std::ratio<1>>::value>::type>
+CONSTCD11
+inline
+time_of_day<std::chrono::duration<Rep, Period>>
+make_time(const std::chrono::hours& h, const std::chrono::minutes& m,
+          const std::chrono::seconds& s, const std::chrono::duration<Rep, Period>& sub_s,
+          unsigned md)
+{
+    return time_of_day<std::chrono::duration<Rep, Period>>(h, m, s, sub_s, md);
+}
+
+template <class CharT, class Traits, class Duration>
+inline
+typename std::enable_if
+<
+    !std::chrono::treat_as_floating_point<typename Duration::rep>::value &&
+        std::ratio_less<typename Duration::period, days::period>::value
+    , std::basic_ostream<CharT, Traits>&
+>::type
+operator<<(std::basic_ostream<CharT, Traits>& os, const sys_time<Duration>& tp)
+{
+    auto const dp = date::floor<days>(tp);
+    return os << year_month_day(dp) << ' ' << make_time(tp-dp);
+}
+
+template <class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const sys_days& dp)
+{
+    return os << year_month_day(dp);
+}
+
+template <class CharT, class Traits, class Duration>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const local_time<Duration>& ut)
+{
+    return (os << sys_time<Duration>{ut.time_since_epoch()});
+}
+
+// to_stream
+
+template <class Duration>
+struct fields
+{
+    year_month_day        ymd{year{0}/0/0};
+    weekday               wd{7u};
+    time_of_day<Duration> tod{};
+
+    fields() = default;
+
+    fields(year_month_day ymd_) : ymd(ymd_) {}
+    fields(weekday wd_) : wd(wd_) {}
+    fields(time_of_day<Duration> tod_) : tod(tod_) {}
+
+    fields(year_month_day ymd_, weekday wd_) : ymd(ymd_), wd(wd_) {}
+    fields(year_month_day ymd_, time_of_day<Duration> tod_) : ymd(ymd_), tod(tod_) {}
+
+    fields(weekday wd_, time_of_day<Duration> tod_) : wd(wd_), tod(tod_) {}
+
+    fields(year_month_day ymd_, weekday wd_, time_of_day<Duration> tod_)
+        : ymd(ymd_)
+        , wd(wd_)
+        , tod(tod_)
+        {}
+};
+
+namespace detail
+{
+
+template <class CharT, class Traits, class Duration>
+unsigned
+extract_weekday(std::basic_ostream<CharT, Traits>& os, const fields<Duration>& fds)
+{
+    if (!fds.ymd.ok() && !fds.wd.ok())
+    {
+        // fds does not contain a valid weekday
+        os.setstate(std::ios::failbit);
+        return 7;
+    }
+    unsigned wd;
+    if (fds.ymd.ok())
+    {
+        wd = static_cast<unsigned>(weekday{fds.ymd});
+        if (fds.wd.ok() && wd != static_cast<unsigned>(fds.wd))
+        {
+            // fds.ymd and fds.wd are inconsistent
+            os.setstate(std::ios::failbit);
+            return 7;
+        }
+    }
+    else
+        wd = static_cast<unsigned>(fds.wd);
+    return wd;
+}
+
+template <class CharT, class Traits, class Duration>
+unsigned
+extract_month(std::basic_ostream<CharT, Traits>& os, const fields<Duration>& fds)
+{
+    if (!fds.ymd.month().ok())
+    {
+        // fds does not contain a valid month
+        os.setstate(std::ios::failbit);
+        return 0;
+    }
+    return static_cast<unsigned>(fds.ymd.month());
+}
+
+}  // namespace detail
+
+#if ONLY_C_LOCALE
+
+namespace detail
+{
+
+inline
+std::pair<const std::string*, const std::string*>
+weekday_names()
+{
+    using namespace std;
+    static const string nm[] =
+    {
+        "Sunday",
+        "Monday",
+        "Tuesday",
+        "Wednesday",
+        "Thursday",
+        "Friday",
+        "Saturday",
+        "Sun",
+        "Mon",
+        "Tue",
+        "Wed",
+        "Thu",
+        "Fri",
+        "Sat"
+    };
+    return make_pair(nm, nm+sizeof(nm)/sizeof(nm[0]));
+}
+
+inline
+std::pair<const std::string*, const std::string*>
+month_names()
+{
+    using namespace std;
+    static const string nm[] =
+    {
+        "January",
+        "February",
+        "March",
+        "April",
+        "May",
+        "June",
+        "July",
+        "August",
+        "September",
+        "October",
+        "November",
+        "December",
+        "Jan",
+        "Feb",
+        "Mar",
+        "Apr",
+        "May",
+        "Jun",
+        "Jul",
+        "Aug",
+        "Sep",
+        "Oct",
+        "Nov",
+        "Dec"
+    };
+    return make_pair(nm, nm+sizeof(nm)/sizeof(nm[0]));
+}
+
+inline
+std::pair<const std::string*, const std::string*>
+ampm_names()
+{
+    using namespace std;
+    static const string nm[] =
+    {
+        "AM",
+        "PM"
+    };
+    return make_pair(nm, nm+sizeof(nm)/sizeof(nm[0]));
+}
+
+template <class CharT, class Traits, class FwdIter>
+FwdIter
+scan_keyword(std::basic_istream<CharT, Traits>& is, FwdIter kb, FwdIter ke)
+{
+    using namespace std;
+    size_t nkw = static_cast<size_t>(std::distance(kb, ke));
+    const unsigned char doesnt_match = '\0';
+    const unsigned char might_match = '\1';
+    const unsigned char does_match = '\2';
+    unsigned char statbuf[100];
+    unsigned char* status = statbuf;
+    unique_ptr<unsigned char, void(*)(void*)> stat_hold(0, free);
+    if (nkw > sizeof(statbuf))
+    {
+        status = (unsigned char*)malloc(nkw);
+        if (status == nullptr)
+            throw bad_alloc();
+        stat_hold.reset(status);
+    }
+    size_t n_might_match = nkw;  // At this point, any keyword might match
+    size_t n_does_match = 0;     // but none of them definitely do
+    // Initialize all statuses to might_match, except for "" keywords are does_match
+    unsigned char* st = status;
+    for (auto ky = kb; ky != ke; ++ky, ++st)
+    {
+        if (!ky->empty())
+            *st = might_match;
+        else
+        {
+            *st = does_match;
+            --n_might_match;
+            ++n_does_match;
+        }
+    }
+    // While there might be a match, test keywords against the next CharT
+    for (size_t indx = 0; is && n_might_match > 0; ++indx)
+    {
+        // Peek at the next CharT but don't consume it
+        auto ic = is.peek();
+        if (ic == EOF)
+        {
+            is.setstate(ios::eofbit);
+            break;
+        }
+        auto c = static_cast<char>(toupper(ic));
+        bool consume = false;
+        // For each keyword which might match, see if the indx character is c
+        // If a match if found, consume c
+        // If a match is found, and that is the last character in the keyword,
+        //    then that keyword matches.
+        // If the keyword doesn't match this character, then change the keyword
+        //    to doesn't match
+        st = status;
+        for (auto ky = kb; ky != ke; ++ky, ++st)
+        {
+            if (*st == might_match)
+            {
+                if (c == static_cast<char>(toupper((*ky)[indx])))
+                {
+                    consume = true;
+                    if (ky->size() == indx+1)
+                    {
+                        *st = does_match;
+                        --n_might_match;
+                        ++n_does_match;
+                    }
+                }
+                else
+                {
+                    *st = doesnt_match;
+                    --n_might_match;
+                }
+            }
+        }
+        // consume if we matched a character
+        if (consume)
+        {
+            (void)is.get();
+            // If we consumed a character and there might be a matched keyword that
+            //   was marked matched on a previous iteration, then such keywords
+            //   are now marked as not matching.
+            if (n_might_match + n_does_match > 1)
+            {
+                st = status;
+                for (auto ky = kb; ky != ke; ++ky, ++st)
+                {
+                    if (*st == does_match && ky->size() != indx+1)
+                    {
+                        *st = doesnt_match;
+                        --n_does_match;
+                    }
+                }
+            }
+        }
+    }
+    // We've exited the loop because we hit eof and/or we have no more "might matches".
+    // Return the first matching result
+    for (st = status; kb != ke; ++kb, ++st)
+        if (*st == does_match)
+            break;
+    if (kb == ke)
+        is.setstate(ios_base::failbit);
+    return kb;
+}
+
+}  // namespace detail
+
+#endif  // ONLY_C_LOCALE
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+          const fields<Duration>& fds, const std::string* abbrev,
+          const std::chrono::seconds* offset_sec)
+{
+    using namespace std;
+    using namespace std::chrono;
+    using namespace detail;
+    tm tm{};
+#if !ONLY_C_LOCALE
+    auto& facet = use_facet<time_put<CharT>>(os.getloc());
+#endif
+    const CharT* command = nullptr;
+    CharT modified = CharT{};
+    for (; *fmt; ++fmt)
+    {
+        switch (*fmt)
+        {
+        case 'a':
+        case 'A':
+            if (command)
+            {
+                if (modified == CharT{})
+                {
+                    tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
+                    if (os.fail())
+                        return os;
+#if !ONLY_C_LOCALE
+                    const CharT f[] = {'%', *fmt};
+                    facet.put(os, os, os.fill(), &tm, begin(f), end(f));
+#else  // ONLY_C_LOCALE
+                    os << weekday_names().first[tm.tm_wday+7*(*fmt == 'a')];
+#endif  // ONLY_C_LOCALE
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                    modified = CharT{};
+                }
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'b':
+        case 'B':
+        case 'h':
+            if (command)
+            {
+                if (modified == CharT{})
+                {
+                    tm.tm_mon = static_cast<int>(extract_month(os, fds)) - 1;
+#if !ONLY_C_LOCALE
+                    const CharT f[] = {'%', *fmt};
+                    facet.put(os, os, os.fill(), &tm, begin(f), end(f));
+#else  // ONLY_C_LOCALE
+                    os << month_names().first[tm.tm_mon+12*(*fmt == 'b')];
+#endif  // ONLY_C_LOCALE
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                    modified = CharT{};
+                }
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'c':
+        case 'x':
+            if (command)
+            {
+                if (modified == CharT{'O'})
+                    os << CharT{'%'} << modified << *fmt;
+                else
+                {
+#if !ONLY_C_LOCALE
+                    tm = std::tm{};
+                    auto const& ymd = fds.ymd;
+                    auto ld = local_days(ymd);
+                    tm.tm_sec = static_cast<int>(fds.tod.seconds().count());
+                    tm.tm_min = static_cast<int>(fds.tod.minutes().count());
+                    tm.tm_hour = static_cast<int>(fds.tod.hours().count());
+                    tm.tm_mday = static_cast<int>(static_cast<unsigned>(ymd.day()));
+                    tm.tm_mon = static_cast<int>(extract_month(os, fds) - 1);
+                    tm.tm_year = static_cast<int>(ymd.year()) - 1900;
+                    tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
+                    if (os.fail())
+                        return os;
+                    tm.tm_yday = static_cast<int>((ld - local_days(ymd.year()/1/1)).count());
+                    CharT f[3] = {'%'};
+                    auto fe = begin(f) + 1;
+                    if (modified == CharT{'E'})
+                        *fe++ = modified;
+                    *fe++ = *fmt;
+                    facet.put(os, os, os.fill(), &tm, begin(f), fe);
+#else  // ONLY_C_LOCALE
+                    if (*fmt == 'c')
+                    {
+                        auto wd = static_cast<int>(extract_weekday(os, fds));
+                        os << weekday_names().first[static_cast<unsigned>(wd)+7]
+                           << ' ';
+                        os << month_names().first[extract_month(os, fds)-1+12] << ' ';
+                        auto d = static_cast<int>(static_cast<unsigned>(fds.ymd.day()));
+                        if (d < 10)
+                            os << ' ';
+                        os << d << ' '
+                           << make_time(duration_cast<seconds>(fds.tod.to_duration()))
+                           << ' ' << fds.ymd.year();
+
+                    }
+                    else  // *fmt == 'x'
+                    {
+                        auto const& ymd = fds.ymd;
+                        save_stream<CharT, Traits> _(os);
+                        os.fill('0');
+                        os.flags(std::ios::dec | std::ios::right);
+                        os.width(2);
+                        os << static_cast<unsigned>(ymd.month()) << CharT{'/'};
+                        os.width(2);
+                        os << static_cast<unsigned>(ymd.day()) << CharT{'/'};
+                        os.width(2);
+                        os << static_cast<int>(ymd.year()) % 100;
+                    }
+#endif  // ONLY_C_LOCALE
+                }
+                command = nullptr;
+                modified = CharT{};
+            }
+            else
+                os << *fmt;
+            break;
+        case 'C':
+            if (command)
+            {
+                auto y = static_cast<int>(fds.ymd.year());
+#if !ONLY_C_LOCALE
+                if (modified == CharT{})
+                {
+#endif
+                    save_stream<CharT, Traits> _(os);
+                    os.fill('0');
+                    os.flags(std::ios::dec | std::ios::right);
+                    if (y >= 0)
+                    {
+                        os.width(2);
+                        os << y/100;
+                    }
+                    else
+                    {
+                        os << CharT{'-'};
+                        os.width(2);
+                        os << -(y-99)/100;
+                    }
+#if !ONLY_C_LOCALE
+                }
+                else if (modified == CharT{'E'})
+                {
+                    tm.tm_year = y - 1900;
+                    CharT f[3] = {'%', 'E', 'C'};
+                    facet.put(os, os, os.fill(), &tm, begin(f), end(f));
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                }
+#endif
+                command = nullptr;
+                modified = CharT{};
+            }
+            else
+                os << *fmt;
+            break;
+        case 'd':
+        case 'e':
+            if (command)
+            {
+                auto d = static_cast<int>(static_cast<unsigned>(fds.ymd.day()));
+#if !ONLY_C_LOCALE
+                if (modified == CharT{})
+                {
+#endif
+                    save_stream<CharT, Traits> _(os);
+                    if (*fmt == CharT{'d'})
+                        os.fill('0');
+                    os.flags(std::ios::dec | std::ios::right);
+                    os.width(2);
+                    os << d;
+#if !ONLY_C_LOCALE
+                }
+                else if (modified == CharT{'O'})
+                {
+                    tm.tm_mday = d;
+                    CharT f[3] = {'%', 'O', *fmt};
+                    facet.put(os, os, os.fill(), &tm, begin(f), end(f));
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                }
+#endif
+                command = nullptr;
+                modified = CharT{};
+            }
+            else
+                os << *fmt;
+            break;
+        case 'D':
+            if (command)
+            {
+                if (modified == CharT{})
+                {
+                    auto const& ymd = fds.ymd;
+                    save_stream<CharT, Traits> _(os);
+                    os.fill('0');
+                    os.flags(std::ios::dec | std::ios::right);
+                    os.width(2);
+                    os << static_cast<unsigned>(ymd.month()) << CharT{'/'};
+                    os.width(2);
+                    os << static_cast<unsigned>(ymd.day()) << CharT{'/'};
+                    os.width(2);
+                    os << static_cast<int>(ymd.year()) % 100;
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                    modified = CharT{};
+                }
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'F':
+            if (command)
+            {
+                if (modified == CharT{})
+                {
+                    auto const& ymd = fds.ymd;
+                    save_stream<CharT, Traits> _(os);
+                    os.fill('0');
+                    os.flags(std::ios::dec | std::ios::right);
+                    os.width(4);
+                    os << static_cast<int>(ymd.year()) << CharT{'-'};
+                    os.width(2);
+                    os << static_cast<unsigned>(ymd.month()) << CharT{'-'};
+                    os.width(2);
+                    os << static_cast<unsigned>(ymd.day());
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                    modified = CharT{};
+                }
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'g':
+        case 'G':
+            if (command)
+            {
+                if (modified == CharT{})
+                {
+                    auto ld = local_days(fds.ymd);
+                    auto y = year_month_day{ld + days{3}}.year();
+                    auto start = local_days((y - years{1})/date::dec/thu[last]) + (mon-thu);
+                    if (ld < start)
+                        --y;
+                    if (*fmt == CharT{'G'})
+                        os << y;
+                    else
+                    {
+                        save_stream<CharT, Traits> _(os);
+                        os.fill('0');
+                        os.flags(std::ios::dec | std::ios::right);
+                        os.width(2);
+                        os << std::abs(static_cast<int>(y)) % 100;
+                    }
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                    modified = CharT{};
+                }
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'H':
+        case 'I':
+            if (command)
+            {
+                auto hms = fds.tod;
+#if !ONLY_C_LOCALE
+                if (modified == CharT{})
+                {
+#endif
+                    if (*fmt == CharT{'I'})
+                        hms.make12();
+                    if (hms.hours() < hours{10})
+                        os << CharT{'0'};
+                    os << hms.hours().count();
+#if !ONLY_C_LOCALE
+                }
+                else if (modified == CharT{'O'})
+                {
+                    const CharT f[] = {'%', modified, *fmt};
+                    tm.tm_hour = static_cast<int>(hms.hours().count());
+                    facet.put(os, os, os.fill(), &tm, begin(f), end(f));
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                }
+#endif
+                modified = CharT{};
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'j':
+            if (command)
+            {
+                if (modified == CharT{})
+                {
+                    auto ld = local_days(fds.ymd);
+                    auto y = fds.ymd.year();
+                    auto doy = ld - local_days(y/jan/1) + days{1};
+                    save_stream<CharT, Traits> _(os);
+                    os.fill('0');
+                    os.flags(std::ios::dec | std::ios::right);
+                    os.width(3);
+                    os << doy.count();
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                    modified = CharT{};
+                }
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'm':
+            if (command)
+            {
+                auto m = static_cast<unsigned>(fds.ymd.month());
+#if !ONLY_C_LOCALE
+                if (modified == CharT{})
+                {
+#endif
+                    if (m < 10)
+                        os << CharT{'0'};
+                    os << m;
+#if !ONLY_C_LOCALE
+                }
+                else if (modified == CharT{'O'})
+                {
+                    const CharT f[] = {'%', modified, *fmt};
+                    tm.tm_mon = static_cast<int>(m-1);
+                    facet.put(os, os, os.fill(), &tm, begin(f), end(f));
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                }
+#endif
+                modified = CharT{};
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'M':
+            if (command)
+            {
+#if !ONLY_C_LOCALE
+                if (modified == CharT{})
+                {
+#endif
+                    if (fds.tod.minutes() < minutes{10})
+                        os << CharT{'0'};
+                    os << fds.tod.minutes().count();
+#if !ONLY_C_LOCALE
+                }
+                else if (modified == CharT{'O'})
+                {
+                    const CharT f[] = {'%', modified, *fmt};
+                    tm.tm_min = static_cast<int>(fds.tod.minutes().count());
+                    facet.put(os, os, os.fill(), &tm, begin(f), end(f));
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                }
+#endif
+                modified = CharT{};
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'n':
+            if (command)
+            {
+                if (modified == CharT{})
+                    os << CharT{'\n'};
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                    modified = CharT{};
+                }
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'p':
+            if (command)
+            {
+#if !ONLY_C_LOCALE
+                if (modified == CharT{})
+                {
+                    const CharT f[] = {'%', *fmt};
+                    tm.tm_hour = static_cast<int>(fds.tod.hours().count());
+                    facet.put(os, os, os.fill(), &tm, begin(f), end(f));
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                }
+#else
+                if (fds.tod.hours() < hours{12})
+                    os << ampm_names().first[0];
+                else
+                    os << ampm_names().first[1];
+#endif
+                modified = CharT{};
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'r':
+            if (command)
+            {
+#if !ONLY_C_LOCALE
+                if (modified == CharT{})
+                {
+                    const CharT f[] = {'%', *fmt};
+                    tm.tm_hour = static_cast<int>(fds.tod.hours().count());
+                    tm.tm_min = static_cast<int>(fds.tod.minutes().count());
+                    tm.tm_sec = static_cast<int>(fds.tod.seconds().count());
+                    facet.put(os, os, os.fill(), &tm, begin(f), end(f));
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                }
+#else
+                time_of_day<seconds> tod(duration_cast<seconds>(fds.tod.to_duration()));
+                tod.make12();
+                save_stream<CharT, Traits> _(os);
+                os.fill('0');
+                os.width(2);
+                os << tod.hours().count() << CharT{':'};
+                os.width(2);
+                os << tod.minutes().count() << CharT{':'};
+                os.width(2);
+                os << tod.seconds().count() << CharT{' '};
+                tod.make24();
+                if (tod.hours() < hours{12})
+                    os << ampm_names().first[0];
+                else
+                    os << ampm_names().first[1];
+#endif
+                modified = CharT{};
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'R':
+            if (command)
+            {
+                if (modified == CharT{})
+                {
+                    if (fds.tod.hours() < hours{10})
+                        os << CharT{'0'};
+                    os << fds.tod.hours().count() << CharT{':'};
+                    if (fds.tod.minutes() < minutes{10})
+                        os << CharT{'0'};
+                    os << fds.tod.minutes().count();
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                    modified = CharT{};
+                }
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'S':
+            if (command)
+            {
+#if !ONLY_C_LOCALE
+                if (modified == CharT{})
+                {
+#endif
+                    os << fds.tod.s_;
+#if !ONLY_C_LOCALE
+                }
+                else if (modified == CharT{'O'})
+                {
+                    const CharT f[] = {'%', modified, *fmt};
+                    tm.tm_sec = static_cast<int>(fds.tod.s_.seconds().count());
+                    facet.put(os, os, os.fill(), &tm, begin(f), end(f));
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                }
+#endif
+                modified = CharT{};
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 't':
+            if (command)
+            {
+                if (modified == CharT{})
+                    os << CharT{'\t'};
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                    modified = CharT{};
+                }
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'T':
+            if (command)
+            {
+                if (modified == CharT{})
+                {
+                    os << fds.tod;
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                    modified = CharT{};
+                }
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'u':
+            if (command)
+            {
+                auto wd = extract_weekday(os, fds);
+                if (os.fail())
+                    return os;
+#if !ONLY_C_LOCALE
+                if (modified == CharT{})
+                {
+#endif
+                    os << (wd != 0 ? wd : 7u);
+#if !ONLY_C_LOCALE
+                }
+                else if (modified == CharT{'O'})
+                {
+                    const CharT f[] = {'%', modified, *fmt};
+                    tm.tm_wday = static_cast<int>(wd);
+                    facet.put(os, os, os.fill(), &tm, begin(f), end(f));
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                }
+#endif
+                modified = CharT{};
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'U':
+            if (command)
+            {
+                auto const& ymd = fds.ymd;
+                auto ld = local_days(ymd);
+#if !ONLY_C_LOCALE
+                if (modified == CharT{})
+                {
+#endif
+                    auto st = local_days(sun[1]/jan/ymd.year());
+                    if (ld < st)
+                        os << CharT{'0'} << CharT{'0'};
+                    else
+                    {
+                        auto wn = duration_cast<weeks>(ld - st).count() + 1;
+                        if (wn < 10)
+                            os << CharT{'0'};
+                        os << wn;
+                    }
+ #if !ONLY_C_LOCALE
+               }
+                else if (modified == CharT{'O'})
+                {
+                    const CharT f[] = {'%', modified, *fmt};
+                    tm.tm_year = static_cast<int>(ymd.year()) - 1900;
+                    tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
+                    if (os.fail())
+                        return os;
+                    tm.tm_yday = static_cast<int>((ld - local_days(ymd.year()/1/1)).count());
+                    facet.put(os, os, os.fill(), &tm, begin(f), end(f));
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                }
+#endif
+                modified = CharT{};
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'V':
+            if (command)
+            {
+                auto ld = local_days(fds.ymd);
+#if !ONLY_C_LOCALE
+                if (modified == CharT{})
+                {
+#endif
+                    auto y = year_month_day{ld + days{3}}.year();
+                    auto st = local_days((y - years{1})/12/thu[last]) + (mon-thu);
+                    if (ld < st)
+                    {
+                        --y;
+                        st = local_days((y - years{1})/12/thu[last]) + (mon-thu);
+                    }
+                    auto wn = duration_cast<weeks>(ld - st).count() + 1;
+                    if (wn < 10)
+                        os << CharT{'0'};
+                    os << wn;
+#if !ONLY_C_LOCALE
+                }
+                else if (modified == CharT{'O'})
+                {
+                    const CharT f[] = {'%', modified, *fmt};
+                    auto const& ymd = fds.ymd;
+                    tm.tm_year = static_cast<int>(ymd.year()) - 1900;
+                    tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
+                    if (os.fail())
+                        return os;
+                    tm.tm_yday = static_cast<int>((ld - local_days(ymd.year()/1/1)).count());
+                    facet.put(os, os, os.fill(), &tm, begin(f), end(f));
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                }
+#endif
+                modified = CharT{};
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'w':
+            if (command)
+            {
+                auto wd = extract_weekday(os, fds);
+                if (os.fail())
+                    return os;
+#if !ONLY_C_LOCALE
+                if (modified == CharT{})
+                {
+#endif
+                    os << wd;
+#if !ONLY_C_LOCALE
+                }
+                else if (modified == CharT{'O'})
+                {
+                    const CharT f[] = {'%', modified, *fmt};
+                    tm.tm_wday = static_cast<int>(wd);
+                    facet.put(os, os, os.fill(), &tm, begin(f), end(f));
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                }
+#endif
+                modified = CharT{};
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'W':
+            if (command)
+            {
+                auto const& ymd = fds.ymd;
+                auto ld = local_days(ymd);
+#if !ONLY_C_LOCALE
+                if (modified == CharT{})
+                {
+#endif
+                    auto st = local_days(mon[1]/jan/ymd.year());
+                    if (ld < st)
+                        os << CharT{'0'} << CharT{'0'};
+                    else
+                    {
+                        auto wn = duration_cast<weeks>(ld - st).count() + 1;
+                        if (wn < 10)
+                            os << CharT{'0'};
+                        os << wn;
+                    }
+#if !ONLY_C_LOCALE
+                }
+                else if (modified == CharT{'O'})
+                {
+                    const CharT f[] = {'%', modified, *fmt};
+                    tm.tm_year = static_cast<int>(ymd.year()) - 1900;
+                    tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
+                    if (os.fail())
+                        return os;
+                    tm.tm_yday = static_cast<int>((ld - local_days(ymd.year()/1/1)).count());
+                    facet.put(os, os, os.fill(), &tm, begin(f), end(f));
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                }
+#endif
+                modified = CharT{};
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'X':
+            if (command)
+            {
+#if !ONLY_C_LOCALE
+                if (modified == CharT{'O'})
+                    os << CharT{'%'} << modified << *fmt;
+                else
+                {
+                    tm = std::tm{};
+                    tm.tm_sec = static_cast<int>(fds.tod.seconds().count());
+                    tm.tm_min = static_cast<int>(fds.tod.minutes().count());
+                    tm.tm_hour = static_cast<int>(fds.tod.hours().count());
+                    CharT f[3] = {'%'};
+                    auto fe = begin(f) + 1;
+                    if (modified == CharT{'E'})
+                        *fe++ = modified;
+                    *fe++ = *fmt;
+                    facet.put(os, os, os.fill(), &tm, begin(f), fe);
+                }
+#else
+                os << fds.tod;
+#endif
+                command = nullptr;
+                modified = CharT{};
+            }
+            else
+                os << *fmt;
+            break;
+        case 'y':
+            if (command)
+            {
+                auto y = static_cast<int>(fds.ymd.year());
+#if !ONLY_C_LOCALE
+                if (modified == CharT{})
+                {
+#endif
+                    y = std::abs(y) % 100;
+                    if (y < 10)
+                        os << CharT{'0'};
+                    os << y;
+#if !ONLY_C_LOCALE
+                }
+                else
+                {
+                    const CharT f[] = {'%', modified, *fmt};
+                    tm.tm_year = y - 1900;
+                    facet.put(os, os, os.fill(), &tm, begin(f), end(f));
+                }
+#endif
+                modified = CharT{};
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'Y':
+            if (command)
+            {
+                auto y = fds.ymd.year();
+#if !ONLY_C_LOCALE
+                if (modified == CharT{})
+                {
+#endif
+                    os << y;
+#if !ONLY_C_LOCALE
+                }
+                else if (modified == CharT{'E'})
+                {
+                    const CharT f[] = {'%', modified, *fmt};
+                    tm.tm_year = static_cast<int>(y) - 1900;
+                    facet.put(os, os, os.fill(), &tm, begin(f), end(f));
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                }
+#endif
+                modified = CharT{};
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'z':
+            if (command)
+            {
+                if (offset_sec == nullptr)
+                {
+                    // Can not format %z with unknown offset
+                    os.setstate(ios::failbit);
+                    return os;
+                }
+                auto m = duration_cast<minutes>(*offset_sec);
+                auto neg = m < minutes{0};
+                m = date::abs(m);
+                auto h = duration_cast<hours>(m);
+                m -= h;
+                if (neg)
+                    os << CharT{'-'};
+                else
+                    os << CharT{'+'};
+                if (h < hours{10})
+                    os << CharT{'0'};
+                os << h.count();
+                if (modified != CharT{})
+                    os << CharT{':'};
+                if (m < minutes{10})
+                    os << CharT{'0'};
+                os << m.count();
+                command = nullptr;
+                modified = CharT{};
+            }
+            else
+                os << *fmt;
+            break;
+        case 'Z':
+            if (command)
+            {
+                if (modified == CharT{})
+                {
+                    if (abbrev == nullptr)
+                    {
+                        // Can not format %Z with unknown time_zone
+                        os.setstate(ios::failbit);
+                        return os;
+                    }
+                    for (auto c : *abbrev)
+                        os << CharT(c);
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                    modified = CharT{};
+                }
+                command = nullptr;
+            }
+            else
+                os << *fmt;
+            break;
+        case 'E':
+        case 'O':
+            if (command)
+            {
+                if (modified == CharT{})
+                {
+                    modified = *fmt;
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << *fmt;
+                    command = nullptr;
+                    modified = CharT{};
+                }
+            }
+            else
+                os << *fmt;
+            break;
+        case '%':
+            if (command)
+            {
+                if (modified == CharT{})
+                {
+                    os << CharT{'%'};
+                    command = nullptr;
+                }
+                else
+                {
+                    os << CharT{'%'} << modified << CharT{'%'};
+                    command = nullptr;
+                    modified = CharT{};
+                }
+            }
+            else
+                command = fmt;
+            break;
+        default:
+            if (command)
+            {
+                os << CharT{'%'};
+                command = nullptr;
+            }
+            if (modified != CharT{})
+            {
+                os << modified;
+                modified = CharT{};
+            }
+            os << *fmt;
+            break;
+        }
+    }
+    if (command)
+        os << CharT{'%'};
+    if (modified != CharT{})
+        os << modified;
+    return os;
+}
+
+template <class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const year& y)
+{
+    using CT = std::chrono::seconds;
+    fields<CT> fds{y/0/0};
+    return to_stream(os, fmt, fds);
+}
+
+template <class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const month& m)
+{
+    using CT = std::chrono::seconds;
+    fields<CT> fds{m/0/0};
+    return to_stream(os, fmt, fds);
+}
+
+template <class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const day& d)
+{
+    using CT = std::chrono::seconds;
+    fields<CT> fds{d/0/0};
+    return to_stream(os, fmt, fds);
+}
+
+template <class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const weekday& wd)
+{
+    using CT = std::chrono::seconds;
+    fields<CT> fds{wd};
+    return to_stream(os, fmt, fds);
+}
+
+template <class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const year_month& ym)
+{
+    using CT = std::chrono::seconds;
+    fields<CT> fds{ym/0};
+    return to_stream(os, fmt, fds);
+}
+
+template <class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const month_day& md)
+{
+    using CT = std::chrono::seconds;
+    fields<CT> fds{md/0};
+    return to_stream(os, fmt, fds);
+}
+
+template <class CharT, class Traits>
+inline
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+          const year_month_day& ymd)
+{
+    using CT = std::chrono::seconds;
+    fields<CT> fds{ymd};
+    return to_stream(os, fmt, fds);
+}
+
+template <class CharT, class Traits, class Rep, class Period>
+inline
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+          const std::chrono::duration<Rep, Period>& d)
+{
+    using Duration = std::chrono::duration<Rep, Period>;
+    using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
+    fields<CT> fds{time_of_day<CT>{d}};
+    return to_stream(os, fmt, fds);
+}
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+          const local_time<Duration>& tp, const std::string* abbrev = nullptr,
+          const std::chrono::seconds* offset_sec = nullptr)
+{
+    using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
+    auto ld = floor<days>(tp);
+    fields<CT> fds{year_month_day{ld}, time_of_day<CT>{tp-local_seconds{ld}}};
+    return to_stream(os, fmt, fds, abbrev, offset_sec);
+}
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+          const sys_time<Duration>& tp)
+{
+    using namespace std::chrono;
+    using CT = typename std::common_type<Duration, seconds>::type;
+    const std::string abbrev("UTC");
+    CONSTDATA seconds offset{0};
+    auto sd = floor<days>(tp);
+    fields<CT> fds{year_month_day{sd}, time_of_day<CT>{tp-sys_seconds{sd}}};
+    return to_stream(os, fmt, fds, &abbrev, &offset);
+}
+
+// format
+
+template <class CharT, class Streamable>
+auto
+format(const std::locale& loc, const CharT* fmt, const Streamable& tp)
+    -> decltype(to_stream(std::declval<std::basic_ostream<CharT>&>(), fmt, tp),
+                std::basic_string<CharT>{})
+{
+    std::basic_ostringstream<CharT> os;
+    os.exceptions(std::ios::failbit | std::ios::badbit);
+    os.imbue(loc);
+    to_stream(os, fmt, tp);
+    return os.str();
+}
+
+template <class CharT, class Streamable>
+auto
+format(const CharT* fmt, const Streamable& tp)
+    -> decltype(to_stream(std::declval<std::basic_ostream<CharT>&>(), fmt, tp),
+                std::basic_string<CharT>{})
+{
+    std::basic_ostringstream<CharT> os;
+    os.exceptions(std::ios::failbit | std::ios::badbit);
+    to_stream(os, fmt, tp);
+    return os.str();
+}
+
+template <class CharT, class Traits, class Alloc, class Streamable>
+auto
+format(const std::locale& loc, const std::basic_string<CharT, Traits, Alloc>& fmt,
+       const Streamable& tp)
+    -> decltype(to_stream(std::declval<std::basic_ostream<CharT, Traits>&>(), fmt.c_str(), tp),
+                std::basic_string<CharT, Traits, Alloc>{})
+{
+    std::basic_ostringstream<CharT, Traits, Alloc> os;
+    os.exceptions(std::ios::failbit | std::ios::badbit);
+    os.imbue(loc);
+    to_stream(os, fmt.c_str(), tp);
+    return os.str();
+}
+
+template <class CharT, class Traits, class Alloc, class Streamable>
+auto
+format(const std::basic_string<CharT, Traits, Alloc>& fmt, const Streamable& tp)
+    -> decltype(to_stream(std::declval<std::basic_ostream<CharT, Traits>&>(), fmt.c_str(), tp),
+                std::basic_string<CharT, Traits, Alloc>{})
+{
+    std::basic_ostringstream<CharT, Traits, Alloc> os;
+    os.exceptions(std::ios::failbit | std::ios::badbit);
+    to_stream(os, fmt.c_str(), tp);
+    return os.str();
+}
+
+// parse
+
+namespace detail
+{
+
+template <class CharT, class Traits>
+bool
+read_char(std::basic_istream<CharT, Traits>& is, CharT fmt, std::ios::iostate& err)
+{
+    auto ic = is.get();
+    if (Traits::eq_int_type(ic, Traits::eof()) ||
+       !Traits::eq(Traits::to_char_type(ic), fmt))
+    {
+        err |= std::ios::failbit;
+        is.setstate(std::ios::failbit);
+        return false;
+    }
+    return true;
+}
+
+template <class CharT, class Traits>
+unsigned
+read_unsigned(std::basic_istream<CharT, Traits>& is, unsigned m = 1, unsigned M = 10)
+{
+    unsigned x = 0;
+    unsigned count = 0;
+    while (true)
+    {
+        auto ic = is.peek();
+        if (Traits::eq_int_type(ic, Traits::eof()))
+            break;
+        auto c = static_cast<char>(Traits::to_char_type(ic));
+        if (!('0' <= c && c <= '9'))
+            break;
+        (void)is.get();
+        ++count;
+        x = 10*x + static_cast<unsigned>(c - '0');
+        if (count == M)
+            break;
+    }
+    if (count < m)
+        is.setstate(std::ios::failbit);
+    return x;
+}
+
+template <class CharT, class Traits>
+int
+read_signed(std::basic_istream<CharT, Traits>& is, unsigned m = 1, unsigned M = 10)
+{
+    auto ic = is.peek();
+    if (!Traits::eq_int_type(ic, Traits::eof()))
+    {
+        auto c = static_cast<char>(Traits::to_char_type(ic));
+        if (('0' <= c && c <= '9') || c == '-' || c == '+')
+        {
+            if (c == '-' || c == '+')
+                (void)is.get();
+            auto x = static_cast<int>(read_unsigned(is, std::max(m, 1u), M));
+            if (!is.fail())
+            {
+                if (c == '-')
+                    x = -x;
+                return x;
+            }
+        }
+    }
+    if (m > 0)
+        is.setstate(std::ios::failbit);
+    return 0;
+}
+
+template <class CharT, class Traits>
+long double
+read_long_double(std::basic_istream<CharT, Traits>& is, unsigned m = 1, unsigned M = 10)
+{
+    using namespace std;
+    unsigned count = 0;
+    auto decimal_point = Traits::to_int_type(
+        use_facet<numpunct<CharT>>(is.getloc()).decimal_point());
+    std::string buf;
+    while (true)
+    {
+        auto ic = is.peek();
+        if (Traits::eq_int_type(ic, Traits::eof()))
+            break;
+        if (Traits::eq_int_type(ic, decimal_point))
+        {
+            buf += '.';
+            decimal_point = Traits::eof();
+            is.get();
+        }
+        else
+        {
+            auto c = static_cast<char>(Traits::to_char_type(ic));
+            if (!('0' <= c && c <= '9'))
+                break;
+            buf += c;
+            (void)is.get();
+        }
+        if (++count == M)
+            break;
+    }
+    if (count < m)
+    {
+        is.setstate(std::ios::failbit);
+        return 0;
+    }
+    return std::stold(buf);
+}
+
+struct rs
+{
+    int& i;
+    unsigned m;
+    unsigned M;
+};
+
+struct ru
+{
+    int& i;
+    unsigned m;
+    unsigned M;
+};
+
+struct rld
+{
+    long double& i;
+    unsigned m;
+    unsigned M;
+};
+
+template <class CharT, class Traits>
+void
+read(std::basic_istream<CharT, Traits>&)
+{
+}
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, CharT a0, Args&& ...args);
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, rs a0, Args&& ...args);
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, ru a0, Args&& ...args);
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, int a0, Args&& ...args);
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, rld a0, Args&& ...args);
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, CharT a0, Args&& ...args)
+{
+    // No-op if a0 == CharT{}
+    if (a0 != CharT{})
+    {
+        auto ic = is.peek();
+        if (Traits::eq_int_type(ic, Traits::eof()))
+        {
+            is.setstate(std::ios::failbit | std::ios::eofbit);
+            return;
+        }
+        if (!Traits::eq(Traits::to_char_type(ic), a0))
+        {
+            is.setstate(std::ios::failbit);
+            return;
+        }
+        (void)is.get();
+    }
+    read(is, std::forward<Args>(args)...);
+}
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, rs a0, Args&& ...args)
+{
+    auto x = read_signed(is, a0.m, a0.M);
+    if (is.fail())
+        return;
+    a0.i = x;
+    read(is, std::forward<Args>(args)...);
+}
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, ru a0, Args&& ...args)
+{
+    auto x = read_unsigned(is, a0.m, a0.M);
+    if (is.fail())
+        return;
+    a0.i = static_cast<int>(x);
+    read(is, std::forward<Args>(args)...);
+}
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, int a0, Args&& ...args)
+{
+    if (a0 != -1)
+    {
+        auto u = static_cast<unsigned>(a0);
+        CharT buf[std::numeric_limits<unsigned>::digits10+2] = {};
+        auto e = buf;
+        do
+        {
+            *e++ = CharT(u % 10) + CharT{'0'};
+            u /= 10;
+        } while (u > 0);
+        std::reverse(buf, e);
+        for (auto p = buf; p != e && is.rdstate() == std::ios::goodbit; ++p)
+            read(is, *p);
+    }
+    if (is.rdstate() == std::ios::goodbit)
+        read(is, std::forward<Args>(args)...);
+}
+
+template <class CharT, class Traits, class ...Args>
+void
+read(std::basic_istream<CharT, Traits>& is, rld a0, Args&& ...args)
+{
+    auto x = read_long_double(is, a0.m, a0.M);
+    if (is.fail())
+        return;
+    a0.i = x;
+    read(is, std::forward<Args>(args)...);
+}
+
+}  // namespace detail;
+
+template <class CharT, class Traits, class Duration, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+            fields<Duration>& fds, std::basic_string<CharT, Traits, Alloc>* abbrev,
+            std::chrono::minutes* offset)
+{
+    using namespace std;
+    using namespace std::chrono;
+    typename basic_istream<CharT, Traits>::sentry ok{is, true};
+    if (ok)
+    {
+#if !ONLY_C_LOCALE
+        auto& f = use_facet<time_get<CharT>>(is.getloc());
+        std::tm tm{};
+#endif
+        std::basic_string<CharT, Traits, Alloc> temp_abbrev;
+        minutes temp_offset{};
+        const CharT* command = nullptr;
+        auto modified = CharT{};
+        auto width = -1;
+        CONSTDATA int not_a_year = numeric_limits<short>::min();
+        int Y = not_a_year;
+        CONSTDATA int not_a_century = not_a_year / 100;
+        int C = not_a_century;
+        CONSTDATA int not_a_2digit_year = 100;
+        int y = not_a_2digit_year;
+        int m{};
+        int d{};
+        int j{};
+        CONSTDATA int not_a_weekday = 7;
+        int wd = not_a_weekday;
+        CONSTDATA int not_a_hour_12_value = 0;
+        int I = not_a_hour_12_value;
+        hours h{};
+        minutes min{};
+        Duration s{};
+        int g = not_a_2digit_year;
+        int G = not_a_year;
+        CONSTDATA int not_a_week_num = 100;
+        int V = not_a_week_num;
+        int U = not_a_week_num;
+        int W = not_a_week_num;
+        using detail::read;
+        using detail::rs;
+        using detail::ru;
+        using detail::rld;
+        for (; *fmt && is.rdstate() == std::ios::goodbit; ++fmt)
+        {
+            switch (*fmt)
+            {
+            case 'a':
+            case 'A':
+                if (command)
+                {
+#if !ONLY_C_LOCALE
+                    ios_base::iostate err = ios_base::goodbit;
+                    f.get(is, nullptr, is, err, &tm, command, fmt+1);
+                    if ((err & ios::failbit) == 0)
+                        wd = tm.tm_wday;
+                    is.setstate(err);
+#else
+                    auto nm = detail::weekday_names();
+                    auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
+                    if (!is.fail())
+                        wd = i % 7;
+#endif
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'b':
+            case 'B':
+            case 'h':
+                if (command)
+                {
+#if !ONLY_C_LOCALE
+                    ios_base::iostate err = ios_base::goodbit;
+                    f.get(is, nullptr, is, err, &tm, command, fmt+1);
+                    if ((err & ios::failbit) == 0)
+                        m = tm.tm_mon + 1;
+                    is.setstate(err);
+#else
+                    auto nm = detail::month_names();
+                    auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
+                    if (!is.fail())
+                        m = i % 12 + 1;
+#endif
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'c':
+                if (command)
+                {
+#if !ONLY_C_LOCALE
+                    ios_base::iostate err = ios_base::goodbit;
+                    f.get(is, nullptr, is, err, &tm, command, fmt+1);
+                    if ((err & ios::failbit) == 0)
+                    {
+                        Y = tm.tm_year + 1900;
+                        m = tm.tm_mon + 1;
+                        d = tm.tm_mday;
+                        h = hours{tm.tm_hour};
+                        min = minutes{tm.tm_min};
+                        s = duration_cast<Duration>(seconds{tm.tm_sec});
+                    }
+                    is.setstate(err);
+#else
+                    auto nm = detail::weekday_names();
+                    auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
+                    if (is.fail())
+                        goto broken;
+                    wd = i % 7;
+                    ws(is);
+                    nm = detail::month_names();
+                    i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
+                    if (is.fail())
+                        goto broken;
+                    m = i % 12 + 1;
+                    ws(is);
+                    read(is, rs{d, 1, 2});
+                    if (is.fail())
+                        goto broken;
+                    ws(is);
+                    using dfs = detail::decimal_format_seconds<Duration>;
+                    CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
+                    int H;
+                    int M;
+                    long double S;
+                    read(is, ru{H, 1, 2}, CharT{':'}, ru{M, 1, 2},
+                                          CharT{':'}, rld{S, 1, w});
+                    if (is.fail())
+                        goto broken;
+                    h = hours{H};
+                    min = minutes{M};
+                    s = round<Duration>(duration<long double>{S});
+                    ws(is);
+                    read(is, rs{Y, 1, 4u});
+#endif
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'x':
+                if (command)
+                {
+#if !ONLY_C_LOCALE
+                    ios_base::iostate err = ios_base::goodbit;
+                    f.get(is, nullptr, is, err, &tm, command, fmt+1);
+                    if ((err & ios::failbit) == 0)
+                    {
+                        Y = tm.tm_year + 1900;
+                        m = tm.tm_mon + 1;
+                        d = tm.tm_mday;
+                    }
+                    is.setstate(err);
+#else
+                    read(is, ru{m, 1, 2}, CharT{'/'}, ru{d, 1, 2}, CharT{'/'},
+                             rs{y, 1, 2});
+#endif
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'X':
+                if (command)
+                {
+#if !ONLY_C_LOCALE
+                    ios_base::iostate err = ios_base::goodbit;
+                    f.get(is, nullptr, is, err, &tm, command, fmt+1);
+                    if ((err & ios::failbit) == 0)
+                    {
+                        h = hours{tm.tm_hour};
+                        min = minutes{tm.tm_min};
+                        s = duration_cast<Duration>(seconds{tm.tm_sec});
+                    }
+                    is.setstate(err);
+#else
+                    using dfs = detail::decimal_format_seconds<Duration>;
+                    CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
+                    int H;
+                    int M;
+                    long double S;
+                    read(is, ru{H, 1, 2}, CharT{':'}, ru{M, 1, 2},
+                                          CharT{':'}, rld{S, 1, w});
+                    if (!is.fail())
+                    {
+                        h = hours{H};
+                        min = minutes{M};
+                        s = round<Duration>(duration<long double>{S});
+                    }
+#endif
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'C':
+                if (command)
+                {
+#if !ONLY_C_LOCALE
+                    if (modified == CharT{})
+                    {
+#endif
+                        read(is, rs{C, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+#if !ONLY_C_LOCALE
+                    }
+                    else
+                    {
+                        ios_base::iostate err = ios_base::goodbit;
+                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
+                        if ((err & ios::failbit) == 0)
+                        {
+                            auto tY = tm.tm_year + 1900;
+                            C = (tY >= 0 ? tY : tY-99) / 100;
+                        }
+                        is.setstate(err);
+                    }
+#endif
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'D':
+                if (command)
+                {
+                    if (modified == CharT{})
+                        read(is, ru{m, 1, 2}, CharT{'\0'}, CharT{'/'}, CharT{'\0'},
+                                 ru{d, 1, 2}, CharT{'\0'}, CharT{'/'}, CharT{'\0'},
+                                 rs{y, 1, 2});
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'F':
+                if (command)
+                {
+                    if (modified == CharT{})
+                        read(is, rs{Y, 1, width == -1 ? 4u : static_cast<unsigned>(width)},
+                                 CharT{'-'}, ru{m, 1, 2}, CharT{'-'}, ru{d, 1, 2});
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'd':
+            case 'e':
+                if (command)
+                {
+#if !ONLY_C_LOCALE
+                    if (modified == CharT{})
+#endif
+                        read(is, rs{d, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+#if !ONLY_C_LOCALE
+                    else if (modified == CharT{'O'})
+                    {
+                        ios_base::iostate err = ios_base::goodbit;
+                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
+                        command = nullptr;
+                        width = -1;
+                        modified = CharT{};
+                        if ((err & ios::failbit) == 0)
+                            d = tm.tm_mday;
+                        is.setstate(err);
+                    }
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+#endif
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'H':
+                if (command)
+                {
+#if !ONLY_C_LOCALE
+                    if (modified == CharT{})
+                    {
+#endif
+                        int H;
+                        read(is, ru{H, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+                        if (!is.fail())
+                            h = hours{H};
+#if !ONLY_C_LOCALE
+                    }
+                    else if (modified == CharT{'O'})
+                    {
+                        ios_base::iostate err = ios_base::goodbit;
+                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
+                        if ((err & ios::failbit) == 0)
+                            h = hours{tm.tm_hour};
+                        is.setstate(err);
+                    }
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+#endif
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'I':
+                if (command)
+                {
+                    if (modified == CharT{})
+                    {
+                        // reads in an hour into I, but most be in [1, 12]
+                        read(is, rs{I, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+                        if (I != not_a_hour_12_value)
+                        {
+                            if (!(1 <= I && I <= 12))
+                            {
+                                I = not_a_hour_12_value;
+                                goto broken;
+                            }
+                        }
+                    }
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+               break;
+            case 'j':
+                if (command)
+                {
+                    if (modified == CharT{})
+                        read(is, ru{j, 1, width == -1 ? 3u : static_cast<unsigned>(width)});
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'M':
+                if (command)
+                {
+#if !ONLY_C_LOCALE
+                    if (modified == CharT{})
+                    {
+#endif
+                        int M;
+                        read(is, ru{M, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+                        if (!is.fail())
+                            min = minutes{M};
+#if !ONLY_C_LOCALE
+                    }
+                    else if (modified == CharT{'O'})
+                    {
+                        ios_base::iostate err = ios_base::goodbit;
+                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
+                        if ((err & ios::failbit) == 0)
+                            min = minutes{tm.tm_min};
+                        is.setstate(err);
+                    }
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+#endif
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'm':
+                if (command)
+                {
+#if !ONLY_C_LOCALE
+                    if (modified == CharT{})
+#endif
+                        read(is, rs{m, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+#if !ONLY_C_LOCALE
+                    else if (modified == CharT{'O'})
+                    {
+                        ios_base::iostate err = ios_base::goodbit;
+                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
+                        if ((err & ios::failbit) == 0)
+                            m = tm.tm_mon + 1;
+                        is.setstate(err);
+                    }
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+#endif
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'n':
+            case 't':
+                if (command)
+                {
+                    // %n matches a single white space character
+                    // %t matches 0 or 1 white space characters
+                    auto ic = is.peek();
+                    if (Traits::eq_int_type(ic, Traits::eof()))
+                    {
+                        ios_base::iostate err = ios_base::eofbit;
+                        if (*fmt == 'n')
+                            err |= ios_base::failbit;
+                        is.setstate(err);
+                        break;
+                    }
+                    if (isspace(ic))
+                    {
+                        (void)is.get();
+                    }
+                    else if (*fmt == 'n')
+                        is.setstate(ios_base::failbit);
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'p':
+                // Error if haven't yet seen %I
+                if (command)
+                {
+#if !ONLY_C_LOCALE
+                    if (modified == CharT{})
+                    {
+                        if (I == not_a_hour_12_value)
+                            goto broken;
+                        tm = std::tm{};
+                        tm.tm_hour = I;
+                        ios_base::iostate err = ios_base::goodbit;
+                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
+                        if (err & ios::failbit)
+                            goto broken;
+                        h = hours{tm.tm_hour};
+                        I = not_a_hour_12_value;
+                    }
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+#else
+                    if (I == not_a_hour_12_value)
+                        goto broken;
+                    auto nm = detail::ampm_names();
+                    auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
+                    if (is.fail())
+                        goto broken;
+                    h = hours{I};
+                    if (i == 1)
+                    {
+                        if (h != hours{12})
+                            h += hours{12};
+                    }
+                    else if (h == hours{12})
+                        h = hours{0};
+                    I = not_a_hour_12_value;
+#endif
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+
+               break;
+            case 'r':
+                if (command)
+                {
+#if !ONLY_C_LOCALE
+                    ios_base::iostate err = ios_base::goodbit;
+                    f.get(is, nullptr, is, err, &tm, command, fmt+1);
+                    if ((err & ios::failbit) == 0)
+                    {
+                        h = hours{tm.tm_hour};
+                        min = minutes{tm.tm_min};
+                        s = duration_cast<Duration>(seconds{tm.tm_sec});
+                    }
+                    is.setstate(err);
+#else
+                    using dfs = detail::decimal_format_seconds<Duration>;
+                    CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
+                    int H;
+                    int M;
+                    long double S;
+                    read(is, ru{H, 1, 2}, CharT{':'}, ru{M, 1, 2},
+                                          CharT{':'}, rld{S, 1, w});
+                    if (is.fail() || !(1 <= H && H <= 12))
+                        goto broken;
+                    ws(is);
+                    auto nm = detail::ampm_names();
+                    auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
+                    if (is.fail())
+                        goto broken;
+                    h = hours{H};
+                    if (i == 1)
+                    {
+                        if (h != hours{12})
+                            h += hours{12};
+                    }
+                    else if (h == hours{12})
+                        h = hours{0};
+                    min = minutes{M};
+                    s = round<Duration>(duration<long double>{S});
+#endif
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'R':
+                if (command)
+                {
+                    if (modified == CharT{})
+                    {
+                        int H, M;
+                        read(is, ru{H, 1, 2}, CharT{'\0'}, CharT{':'}, CharT{'\0'},
+                                 ru{M, 1, 2}, CharT{'\0'});
+                        if (!is.fail())
+                        {
+                            h = hours{H};
+                            min = minutes{M};
+                        }
+                    }
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'S':
+                if (command)
+                {
+ #if !ONLY_C_LOCALE
+                   if (modified == CharT{})
+                    {
+#endif
+                        using dfs = detail::decimal_format_seconds<Duration>;
+                        CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
+                        long double S;
+                        read(is, rld{S, 1, width == -1 ? w : static_cast<unsigned>(width)});
+                        if (!is.fail())
+                            s = round<Duration>(duration<long double>{S});
+#if !ONLY_C_LOCALE
+                    }
+                    else if (modified == CharT{'O'})
+                    {
+                        ios_base::iostate err = ios_base::goodbit;
+                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
+                        if ((err & ios::failbit) == 0)
+                            s = duration_cast<Duration>(seconds{tm.tm_sec});
+                        is.setstate(err);
+                    }
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+#endif
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'T':
+                if (command)
+                {
+                    if (modified == CharT{})
+                    {
+                        using dfs = detail::decimal_format_seconds<Duration>;
+                        CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
+                        int H;
+                        int M;
+                        long double S;
+                        read(is, ru{H, 1, 2}, CharT{':'}, ru{M, 1, 2},
+                                              CharT{':'}, rld{S, 1, w});
+                        if (!is.fail())
+                        {
+                            h = hours{H};
+                            min = minutes{M};
+                            s = round<Duration>(duration<long double>{S});
+                        }
+                    }
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'Y':
+                if (command)
+                {
+#if !ONLY_C_LOCALE
+                    if (modified == CharT{})
+#endif
+                        read(is, rs{Y, 1, width == -1 ? 4u : static_cast<unsigned>(width)});
+#if !ONLY_C_LOCALE
+                    else if (modified == CharT{'E'})
+                    {
+                        ios_base::iostate err = ios_base::goodbit;
+                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
+                        if ((err & ios::failbit) == 0)
+                            Y = tm.tm_year + 1900;
+                        is.setstate(err);
+                    }
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+#endif
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'y':
+                if (command)
+                {
+#if !ONLY_C_LOCALE
+                    if (modified == CharT{})
+#endif
+                        read(is, ru{y, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+#if !ONLY_C_LOCALE
+                    else
+                    {
+                        ios_base::iostate err = ios_base::goodbit;
+                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
+                        if ((err & ios::failbit) == 0)
+                            Y = tm.tm_year + 1900;
+                        is.setstate(err);
+                    }
+#endif
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'g':
+                if (command)
+                {
+                    if (modified == CharT{})
+                        read(is, ru{g, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'G':
+                if (command)
+                {
+                    if (modified == CharT{})
+                        read(is, rs{G, 1, width == -1 ? 4u : static_cast<unsigned>(width)});
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'U':
+                if (command)
+                {
+                    if (modified == CharT{})
+                        read(is, ru{U, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'V':
+                if (command)
+                {
+                    if (modified == CharT{})
+                        read(is, ru{V, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'W':
+                if (command)
+                {
+                    if (modified == CharT{})
+                        read(is, ru{W, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'u':
+            case 'w':
+                if (command)
+                {
+#if !ONLY_C_LOCALE
+                    if (modified == CharT{})
+                    {
+#endif
+                        read(is, ru{wd, 1, width == -1 ? 1u : static_cast<unsigned>(width)});
+                        if (!is.fail() && *fmt == 'u')
+                        {
+                            if (wd == 7)
+                                wd = 0;
+                            else if (wd == 0)
+                                wd = 7;
+                        }
+#if !ONLY_C_LOCALE
+                    }
+                    else if (modified == CharT{'O'})
+                    {
+                        ios_base::iostate err = ios_base::goodbit;
+                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
+                        if ((err & ios::failbit) == 0)
+                            wd = tm.tm_wday;
+                        is.setstate(err);
+                    }
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+#endif
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'E':
+            case 'O':
+                if (command)
+                {
+                    if (modified == CharT{})
+                    {
+                        modified = *fmt;
+                    }
+                    else
+                    {
+                        read(is, CharT{'%'}, width, modified, *fmt);
+                        command = nullptr;
+                        width = -1;
+                        modified = CharT{};
+                    }
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case '%':
+                if (command)
+                {
+                    if (modified == CharT{})
+                        read(is, *fmt);
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    command = fmt;
+                break;
+            case 'z':
+                if (command)
+                {
+                    int H, M;
+                    if (modified == CharT{})
+                    {
+                        read(is, rs{H, 2, 2});
+                        if (!is.fail())
+                            temp_offset = hours{H};
+                        if (is.good())
+                        {
+                            auto ic = is.peek();
+                            if (!Traits::eq_int_type(ic, Traits::eof()))
+                            {
+                                auto c = static_cast<char>(Traits::to_char_type(ic));
+                                if ('0' <= c && c <= '9')
+                                {
+                                    read(is, ru{M, 2, 2});
+                                    if (!is.fail())
+                                        temp_offset += minutes{ H < 0 ? -M : M };
+                                }
+                            }
+                        }
+                    }
+                    else
+                    {
+                        read(is, rs{H, 1, 2});
+                        if (!is.fail())
+                            temp_offset = hours{H};
+                        if (is.good())
+                        {
+                            auto ic = is.peek();
+                            if (!Traits::eq_int_type(ic, Traits::eof()))
+                            {
+                                auto c = static_cast<char>(Traits::to_char_type(ic));
+                                if (c == ':')
+                                {
+                                    (void)is.get();
+                                    read(is, ru{M, 2, 2});
+                                    if (!is.fail())
+                                        temp_offset += minutes{ H < 0 ? -M : M };
+                                }
+                            }
+                        }
+                    }
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            case 'Z':
+                if (command)
+                {
+                    if (modified == CharT{})
+                    {
+                        if (!temp_abbrev.empty())
+                            is.setstate(ios::failbit);
+                        else
+                        {
+                            while (is.rdstate() == std::ios::goodbit)
+                            {
+                                auto i = is.rdbuf()->sgetc();
+                                if (Traits::eq_int_type(i, Traits::eof()))
+                                {
+                                    is.setstate(ios::eofbit);
+                                    break;
+                                }
+                                auto wc = Traits::to_char_type(i);
+                                auto c = static_cast<char>(wc);
+                                // is c a valid time zone name or abbreviation character?
+                                if (!(CharT{1} < wc && wc < CharT{127}) || !(isalnum(c) ||
+                                        c == '_' || c == '/' || c == '-' || c == '+'))
+                                    break;
+                                temp_abbrev.push_back(c);
+                                is.rdbuf()->sbumpc();
+                            }
+                            if (temp_abbrev.empty())
+                                is.setstate(ios::failbit);
+                        }
+                    }
+                    else
+                        read(is, CharT{'%'}, width, modified, *fmt);
+                    command = nullptr;
+                    width = -1;
+                    modified = CharT{};
+                }
+                else
+                    read(is, *fmt);
+                break;
+            default:
+                if (command)
+                {
+                    if (width == -1 && modified == CharT{} && '0' <= *fmt && *fmt <= '9')
+                    {
+                        width = static_cast<char>(*fmt) - '0';
+                        while ('0' <= fmt[1] && fmt[1] <= '9')
+                            width = 10*width + static_cast<char>(*++fmt) - '0';
+                    }
+                    else
+                    {
+                        if (modified == CharT{})
+                            read(is, CharT{'%'}, width, *fmt);
+                        else
+                            read(is, CharT{'%'}, width, modified, *fmt);
+                        command = nullptr;
+                        width = -1;
+                        modified = CharT{};
+                    }
+                }
+                else  // !command
+                {
+                    if (isspace(*fmt))
+                        ws(is); // space matches 0 or more white space characters
+                    else
+                        read(is, *fmt);
+                }
+                break;
+            }
+        }
+        // is.rdstate() != ios::goodbit || *fmt == CharT{}
+        if (is.rdstate() == ios::goodbit && command)
+        {
+            if (modified == CharT{})
+                read(is, CharT{'%'}, width);
+            else
+                read(is, CharT{'%'}, width, modified);
+        }
+        if (is.rdstate() != ios::goodbit && *fmt != CharT{} && !is.fail())
+            is.setstate(ios::failbit);
+        if (!is.fail())
+        {
+            if (y != not_a_2digit_year)
+            {
+                // Convert y and an optional C to Y
+                if (!(0 <= y && y <= 99))
+                    goto broken;
+                if (C == not_a_century)
+                {
+                    if (Y == not_a_year)
+                    {
+                        if (y >= 69)
+                            C = 19;
+                        else
+                            C = 20;
+                    }
+                    else
+                    {
+                        C = (Y >= 0 ? Y : Y-100) / 100;
+                    }
+                }
+                int tY;
+                if (C >= 0)
+                    tY = 100*C + y;
+                else
+                    tY = 100*(C+1) - (y == 0 ? 100 : y);
+                if (Y != not_a_year && Y != tY)
+                    goto broken;
+                Y = tY;
+            }
+            if (g != not_a_2digit_year)
+            {
+                // Convert g and an optional C to G
+                if (!(0 <= g && g <= 99))
+                    goto broken;
+                if (C == not_a_century)
+                {
+                    if (G == not_a_year)
+                    {
+                        if (g >= 69)
+                            C = 19;
+                        else
+                            C = 20;
+                    }
+                    else
+                    {
+                        C = (G >= 0 ? G : G-100) / 100;
+                    }
+                }
+                int tG;
+                if (C >= 0)
+                    tG = 100*C + g;
+                else
+                    tG = 100*(C+1) - (g == 0 ? 100 : g);
+                if (G != not_a_year && G != tG)
+                    goto broken;
+                G = tG;
+            }
+            if (G != not_a_year)
+            {
+                // Convert G, V and wd to Y, m and d
+                if (V == not_a_week_num || wd == not_a_weekday)
+                    goto broken;
+                auto ymd = year_month_day{local_days(year{G-1}/dec/thu[last]) +
+                                          (mon-thu) + weeks{V-1} +
+                                          (weekday{static_cast<unsigned>(wd)}-mon)};
+                if (Y == not_a_year)
+                    Y = static_cast<int>(ymd.year());
+                else if (year{Y} != ymd.year())
+                    goto broken;
+                if (m == 0)
+                    m = static_cast<int>(static_cast<unsigned>(ymd.month()));
+                else if (month(static_cast<unsigned>(m)) != ymd.month())
+                    goto broken;
+                if (d == 0)
+                    d = static_cast<int>(static_cast<unsigned>(ymd.day()));
+                else if (day(static_cast<unsigned>(d)) != ymd.day())
+                    goto broken;
+            }
+            if (j != 0 && Y != not_a_year)
+            {
+                auto ymd = year_month_day{local_days(year{Y}/1/1) + days{j-1}};
+                if (m == 0)
+                    m = static_cast<int>(static_cast<unsigned>(ymd.month()));
+                else if (month(static_cast<unsigned>(m)) != ymd.month())
+                    goto broken;
+                if (d == 0)
+                    d = static_cast<int>(static_cast<unsigned>(ymd.day()));
+                else if (day(static_cast<unsigned>(d)) != ymd.day())
+                    goto broken;
+            }
+            if (U != not_a_week_num && Y != not_a_year)
+            {
+                if (wd == not_a_weekday)
+                    goto broken;
+                sys_days sd;
+                if (U == 0)
+                    sd = year{Y-1}/dec/weekday{static_cast<unsigned>(wd)}[last];
+                else
+                    sd = sys_days(year{Y}/jan/sun[1]) + weeks{U-1} +
+                         (weekday{static_cast<unsigned>(wd)} - sun);
+                year_month_day ymd = sd;
+                if (year{Y} != ymd.year())
+                    goto broken;
+                if (m == 0)
+                    m = static_cast<int>(static_cast<unsigned>(ymd.month()));
+                else if (month(static_cast<unsigned>(m)) != ymd.month())
+                    goto broken;
+                if (d == 0)
+                    d = static_cast<int>(static_cast<unsigned>(ymd.day()));
+                else if (day(static_cast<unsigned>(d)) != ymd.day())
+                    goto broken;
+            }
+            if (W != not_a_week_num && Y != not_a_year)
+            {
+                if (wd == not_a_weekday)
+                    goto broken;
+                sys_days sd;
+                if (W == 0)
+                    sd = year{Y-1}/dec/weekday{static_cast<unsigned>(wd)}[last];
+                else
+                    sd = sys_days(year{Y}/jan/mon[1]) + weeks{W-1} +
+                         (weekday{static_cast<unsigned>(wd)} - mon);
+                year_month_day ymd = sd;
+                if (year{Y} != ymd.year())
+                    goto broken;
+                if (m == 0)
+                    m = static_cast<int>(static_cast<unsigned>(ymd.month()));
+                else if (month(static_cast<unsigned>(m)) != ymd.month())
+                    goto broken;
+                if (d == 0)
+                    d = static_cast<int>(static_cast<unsigned>(ymd.day()));
+                else if (day(static_cast<unsigned>(d)) != ymd.day())
+                    goto broken;
+            }
+            if (Y < static_cast<int>(year::min()) || Y > static_cast<int>(year::max()))
+                Y = not_a_year;
+            auto ymd = year{Y}/m/d;
+            if (wd != not_a_weekday && ymd.ok())
+            {
+                if (weekday{static_cast<unsigned>(wd)} != weekday(ymd))
+                    goto broken;
+            }
+            fds.ymd = ymd;
+            fds.tod = time_of_day<Duration>{h};
+            fds.tod.m_ = min;
+            fds.tod.s_ = detail::decimal_format_seconds<Duration>{s};
+            if (wd != not_a_weekday)
+                fds.wd = weekday{static_cast<unsigned>(wd)};
+            if (abbrev != nullptr)
+                *abbrev = std::move(temp_abbrev);
+            if (offset != nullptr)
+                *offset = temp_offset;
+        }
+        return is;
+    }
+broken:
+    is.setstate(ios_base::failbit);
+    return is;
+}
+
+template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, year& y,
+            std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+            std::chrono::minutes* offset = nullptr)
+{
+    using namespace std;
+    using namespace std::chrono;
+    using CT = seconds;
+    fields<CT> fds{};
+    from_stream(is, fmt, fds, abbrev, offset);
+    if (!fds.ymd.year().ok())
+        is.setstate(ios::failbit);
+    if (!is.fail())
+        y = fds.ymd.year();
+    return is;
+}
+
+template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, month& m,
+            std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+            std::chrono::minutes* offset = nullptr)
+{
+    using namespace std;
+    using namespace std::chrono;
+    using CT = seconds;
+    fields<CT> fds{};
+    from_stream(is, fmt, fds, abbrev, offset);
+    if (!fds.ymd.month().ok())
+        is.setstate(ios::failbit);
+    if (!is.fail())
+        m = fds.ymd.month();
+    return is;
+}
+
+template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, day& d,
+            std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+            std::chrono::minutes* offset = nullptr)
+{
+    using namespace std;
+    using namespace std::chrono;
+    using CT = seconds;
+    fields<CT> fds{};
+    from_stream(is, fmt, fds, abbrev, offset);
+    if (!fds.ymd.day().ok())
+        is.setstate(ios::failbit);
+    if (!is.fail())
+        d = fds.ymd.day();
+    return is;
+}
+
+template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, weekday& wd,
+            std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+            std::chrono::minutes* offset = nullptr)
+{
+    using namespace std;
+    using namespace std::chrono;
+    using CT = seconds;
+    fields<CT> fds{};
+    from_stream(is, fmt, fds, abbrev, offset);
+    if (!fds.wd.ok())
+        is.setstate(ios::failbit);
+    if (!is.fail())
+        wd = fds.wd;
+    return is;
+}
+
+template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, year_month& ym,
+            std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+            std::chrono::minutes* offset = nullptr)
+{
+    using namespace std;
+    using namespace std::chrono;
+    using CT = seconds;
+    fields<CT> fds{};
+    from_stream(is, fmt, fds, abbrev, offset);
+    if (!fds.ymd.month().ok())
+        is.setstate(ios::failbit);
+    if (!is.fail())
+        ym = fds.ymd.year()/fds.ymd.month();
+    return is;
+}
+
+template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, month_day& md,
+            std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+            std::chrono::minutes* offset = nullptr)
+{
+    using namespace std;
+    using namespace std::chrono;
+    using CT = seconds;
+    fields<CT> fds{};
+    from_stream(is, fmt, fds, abbrev, offset);
+    if (!fds.ymd.month().ok() || !fds.ymd.day().ok())
+        is.setstate(ios::failbit);
+    if (!is.fail())
+        md = fds.ymd.month()/fds.ymd.day();
+    return is;
+}
+
+template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+            year_month_day& ymd, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+            std::chrono::minutes* offset = nullptr)
+{
+    using namespace std;
+    using namespace std::chrono;
+    using CT = seconds;
+    fields<CT> fds{};
+    from_stream(is, fmt, fds, abbrev, offset);
+    if (!fds.ymd.ok())
+        is.setstate(ios::failbit);
+    if (!is.fail())
+        ymd = fds.ymd;
+    return is;
+}
+
+template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+            sys_time<Duration>& tp, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+            std::chrono::minutes* offset = nullptr)
+{
+    using namespace std;
+    using namespace std::chrono;
+    using CT = typename common_type<Duration, seconds>::type;
+    minutes offset_local{};
+    auto offptr = offset ? offset : &offset_local;
+    fields<CT> fds{};
+    from_stream(is, fmt, fds, abbrev, offptr);
+    if (!fds.ymd.ok() || !fds.tod.in_conventional_range())
+        is.setstate(ios::failbit);
+    if (!is.fail())
+        tp = round<Duration>(sys_days(fds.ymd) - *offptr + fds.tod.to_duration());
+    return is;
+}
+
+template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+            local_time<Duration>& tp, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+            std::chrono::minutes* offset = nullptr)
+{
+    using namespace std;
+    using namespace std::chrono;
+    using CT = typename common_type<Duration, seconds>::type;
+    fields<CT> fds{};
+    from_stream(is, fmt, fds, abbrev, offset);
+    if (!fds.ymd.ok() || !fds.tod.in_conventional_range())
+        is.setstate(ios::failbit);
+    if (!is.fail())
+        tp = round<Duration>(local_seconds{local_days(fds.ymd)} + fds.tod.to_duration());
+    return is;
+}
+
+template <class Rep, class Period, class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+            std::chrono::duration<Rep, Period>& d,
+            std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+            std::chrono::minutes* offset = nullptr)
+{
+    using namespace std;
+    using namespace std::chrono;
+    using Duration = std::chrono::duration<Rep, Period>;
+    using CT = typename common_type<Duration, seconds>::type;
+    fields<CT> fds{};
+    from_stream(is, fmt, fds, abbrev, offset);
+    if (!is.fail())
+        d = duration_cast<Duration>(fds.tod.to_duration());
+    return is;
+}
+
+template <class Parsable, class CharT, class Traits = std::char_traits<CharT>,
+          class Alloc = std::allocator<CharT>>
+struct parse_manip
+{
+    const std::basic_string<CharT, Traits, Alloc> format_;
+    Parsable&                                     tp_;
+    std::basic_string<CharT, Traits, Alloc>*      abbrev_;
+    std::chrono::minutes*                         offset_;
+
+public:
+    parse_manip(std::basic_string<CharT, Traits, Alloc> format, Parsable& tp,
+                std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+                std::chrono::minutes* offset = nullptr)
+        : format_(std::move(format))
+        , tp_(tp)
+        , abbrev_(abbrev)
+        , offset_(offset)
+        {}
+
+};
+
+template <class Parsable, class CharT, class Traits, class Alloc>
+std::basic_istream<CharT, Traits>&
+operator>>(std::basic_istream<CharT, Traits>& is,
+           const parse_manip<Parsable, CharT, Traits, Alloc>& x)
+{
+    return from_stream(is, x.format_.c_str(), x.tp_, x.abbrev_, x.offset_);
+}
+
+template <class Parsable, class CharT, class Traits, class Alloc>
+inline
+auto
+parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp)
+    -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
+                            format.c_str(), tp),
+                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp})
+{
+    return {format, tp};
+}
+
+template <class Parsable, class CharT, class Traits, class Alloc>
+inline
+auto
+parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp,
+      std::basic_string<CharT, Traits, Alloc>& abbrev)
+    -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
+                            format.c_str(), tp, &abbrev),
+                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev})
+{
+    return {format, tp, &abbrev};
+}
+
+template <class Parsable, class CharT, class Traits, class Alloc>
+inline
+auto
+parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp,
+      std::chrono::minutes& offset)
+    -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
+                            format.c_str(), tp, nullptr, &offset),
+                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, nullptr, &offset})
+{
+    return {format, tp, nullptr, &offset};
+}
+
+template <class Parsable, class CharT, class Traits, class Alloc>
+inline
+auto
+parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp,
+      std::basic_string<CharT, Traits, Alloc>& abbrev, std::chrono::minutes& offset)
+    -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
+                            format.c_str(), tp, &abbrev, &offset),
+                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev, &offset})
+{
+    return {format, tp, &abbrev, &offset};
+}
+
+// const CharT* formats
+
+template <class Parsable, class CharT>
+inline
+auto
+parse(const CharT* format, Parsable& tp)
+    -> decltype(from_stream(std::declval<std::basic_istream<CharT>&>(), format, tp),
+                parse_manip<Parsable, CharT>{format, tp})
+{
+    return {format, tp};
+}
+
+template <class Parsable, class CharT, class Traits, class Alloc>
+inline
+auto
+parse(const CharT* format, Parsable& tp, std::basic_string<CharT, Traits, Alloc>& abbrev)
+    -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(), format,
+                            tp, &abbrev),
+                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev})
+{
+    return {format, tp, &abbrev};
+}
+
+template <class Parsable, class CharT>
+inline
+auto
+parse(const CharT* format, Parsable& tp, std::chrono::minutes& offset)
+    -> decltype(from_stream(std::declval<std::basic_istream<CharT>&>(), format,
+                            tp, nullptr, &offset),
+                parse_manip<Parsable, CharT>{format, tp, nullptr, &offset})
+{
+    return {format, tp, nullptr, &offset};
+}
+
+template <class Parsable, class CharT, class Traits, class Alloc>
+inline
+auto
+parse(const CharT* format, Parsable& tp,
+      std::basic_string<CharT, Traits, Alloc>& abbrev, std::chrono::minutes& offset)
+    -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(), format,
+                            tp, &abbrev, &offset),
+                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev, &offset})
+{
+    return {format, tp, &abbrev, &offset};
+}
+
+// duration streaming
+
+namespace detail
+{
+
+#if __cplusplus >= 201402  && (!defined(__EDG_VERSION__) || __EDG_VERSION__ > 411) \
+                           && (!defined(__SUNPRO_CC) || __SUNPRO_CC > 0x5150)
+
+template <class CharT, std::size_t N>
+class string_literal
+{
+    CharT p_[N];
+
+public:
+    using const_iterator = const CharT*;
+
+    string_literal(string_literal const&) = default;
+    string_literal& operator=(string_literal const&) = delete;
+
+    template <std::size_t N1 = 2,
+              class = std::enable_if_t<N1 == N>>
+    CONSTCD14 string_literal(CharT c) NOEXCEPT
+        : p_{c}
+    {
+    }
+
+    CONSTCD14 string_literal(const CharT(&a)[N]) NOEXCEPT
+        : p_{}
+    {
+        for (std::size_t i = 0; i < N; ++i)
+            p_[i] = a[i];
+    }
+
+    template <class U = CharT, class = std::enable_if_t<1 < sizeof(U)>>
+    CONSTCD14 string_literal(const char(&a)[N]) NOEXCEPT
+        : p_{}
+    {
+        for (std::size_t i = 0; i < N; ++i)
+            p_[i] = a[i];
+    }
+
+    template <class CharT2, class = std::enable_if_t<!std::is_same<CharT2, CharT>{}>>
+    CONSTCD14 string_literal(string_literal<CharT2, N> const& a) NOEXCEPT
+        : p_{}
+    {
+        for (std::size_t i = 0; i < N; ++i)
+            p_[i] = a[i];
+    }
+
+    template <std::size_t N1, std::size_t N2,
+              class = std::enable_if_t<N1 + N2 - 1 == N>>
+    CONSTCD14 string_literal(const string_literal<CharT, N1>& x,
+                             const string_literal<CharT, N2>& y) NOEXCEPT
+        : p_{}
+    {
+        std::size_t i = 0;
+        for (; i < N1-1; ++i)
+            p_[i] = x[i];
+        for (std::size_t j = 0; j < N2; ++j, ++i)
+            p_[i] = y[j];
+    }
+
+    CONSTCD14 const CharT* data() const NOEXCEPT {return p_;}
+    CONSTCD14 std::size_t size() const NOEXCEPT {return N-1;}
+
+    CONSTCD14 const_iterator begin() const NOEXCEPT {return p_;}
+    CONSTCD14 const_iterator end()   const NOEXCEPT {return p_ + N-1;}
+
+    CONSTCD14 CharT const& operator[](std::size_t n) const NOEXCEPT
+    {
+        return p_[n];
+    }
+
+    template <class Traits>
+    friend
+    std::basic_ostream<CharT, Traits>&
+    operator<<(std::basic_ostream<CharT, Traits>& os, const string_literal& s)
+    {
+        return os << s.p_;
+    }
+};
+
+template <class CharT1, class CharT2, std::size_t N1, std::size_t N2>
+CONSTCD14
+inline
+string_literal<std::conditional_t<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>,
+               N1 + N2 - 1>
+operator+(const string_literal<CharT1, N1>& x, const string_literal<CharT2, N2>& y) NOEXCEPT
+{
+    using CharT = std::conditional_t<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>;
+    return string_literal<CharT, N1 + N2 - 1>{string_literal<CharT, N1>{x},
+                                              string_literal<CharT, N2>{y}};
+}
+
+template <class CharT, class Traits, class Alloc, std::size_t N>
+inline
+std::basic_string<CharT, Traits, Alloc>
+operator+(std::basic_string<CharT, Traits, Alloc> x,
+          const string_literal<CharT, N>& y) NOEXCEPT
+{
+    x.append(y.data(), y.size());
+    return x;
+}
+
+template <class CharT, std::size_t N>
+CONSTCD14
+inline
+string_literal<CharT, N>
+msl(const CharT(&a)[N]) NOEXCEPT
+{
+    return string_literal<CharT, N>{a};
+}
+
+template <class CharT,
+          class = std::enable_if_t<std::is_same<CharT, char>{} ||
+                                   std::is_same<CharT, wchar_t>{} ||
+                                   std::is_same<CharT, char16_t>{} ||
+                                   std::is_same<CharT, char32_t>{}>>
+CONSTCD14
+inline
+string_literal<CharT, 2>
+msl(CharT c) NOEXCEPT
+{
+    return string_literal<CharT, 2>{c};
+}
+
+CONSTCD14
+inline
+std::size_t
+to_string_len(std::intmax_t i)
+{
+    std::size_t r = 0;
+    do
+    {
+        i /= 10;
+        ++r;
+    } while (i > 0);
+    return r;
+}
+
+template <std::intmax_t N>
+CONSTCD14
+inline
+std::enable_if_t
+<
+    N < 10,
+    string_literal<char, to_string_len(N)+1>
+>
+msl() NOEXCEPT
+{
+    return msl(char(N % 10 + '0'));
+}
+
+template <std::intmax_t N>
+CONSTCD14
+inline
+std::enable_if_t
+<
+    10 <= N,
+    string_literal<char, to_string_len(N)+1>
+>
+msl() NOEXCEPT
+{
+    return msl<N/10>() + msl(char(N % 10 + '0'));
+}
+
+template <class CharT, std::intmax_t N, std::intmax_t D>
+CONSTCD14
+inline
+std::enable_if_t
+<
+    std::ratio<N, D>::type::den != 1,
+    string_literal<CharT, to_string_len(std::ratio<N, D>::type::num) +
+                          to_string_len(std::ratio<N, D>::type::den) + 4>
+>
+msl(std::ratio<N, D>) NOEXCEPT
+{
+    using R = typename std::ratio<N, D>::type;
+    return msl(CharT{'['}) + msl<R::num>() + msl(CharT{'/'}) +
+                             msl<R::den>() + msl(CharT{']'});
+}
+
+template <class CharT, std::intmax_t N, std::intmax_t D>
+CONSTCD14
+inline
+std::enable_if_t
+<
+    std::ratio<N, D>::type::den == 1,
+    string_literal<CharT, to_string_len(std::ratio<N, D>::type::num) + 3>
+>
+msl(std::ratio<N, D>) NOEXCEPT
+{
+    using R = typename std::ratio<N, D>::type;
+    return msl(CharT{'['}) + msl<R::num>() + msl(CharT{']'});
+}
+
+template <class CharT>
+CONSTCD14
+inline
+auto
+msl(std::atto) NOEXCEPT
+{
+    return msl(CharT{'a'});
+}
+
+template <class CharT>
+CONSTCD14
+inline
+auto
+msl(std::femto) NOEXCEPT
+{
+    return msl(CharT{'f'});
+}
+
+template <class CharT>
+CONSTCD14
+inline
+auto
+msl(std::pico) NOEXCEPT
+{
+    return msl(CharT{'p'});
+}
+
+template <class CharT>
+CONSTCD14
+inline
+auto
+msl(std::nano) NOEXCEPT
+{
+    return msl(CharT{'n'});
+}
+
+template <class CharT>
+CONSTCD14
+inline
+std::enable_if_t
+<
+    std::is_same<CharT, char>{},
+    string_literal<char, 3>
+>
+msl(std::micro) NOEXCEPT
+{
+    return string_literal<char, 3>{"\xC2\xB5"};
+}
+
+template <class CharT>
+CONSTCD14
+inline
+std::enable_if_t
+<
+    !std::is_same<CharT, char>{},
+    string_literal<CharT, 2>
+>
+msl(std::micro) NOEXCEPT
+{
+    return string_literal<CharT, 2>{CharT{static_cast<unsigned char>('\xB5')}};
+}
+
+template <class CharT>
+CONSTCD14
+inline
+auto
+msl(std::milli) NOEXCEPT
+{
+    return msl(CharT{'m'});
+}
+
+template <class CharT>
+CONSTCD14
+inline
+auto
+msl(std::centi) NOEXCEPT
+{
+    return msl(CharT{'c'});
+}
+
+template <class CharT>
+CONSTCD14
+inline
+auto
+msl(std::deci) NOEXCEPT
+{
+    return msl(CharT{'d'});
+}
+
+template <class CharT>
+CONSTCD14
+inline
+auto
+msl(std::deca) NOEXCEPT
+{
+    return string_literal<CharT, 3>{"da"};
+}
+
+template <class CharT>
+CONSTCD14
+inline
+auto
+msl(std::hecto) NOEXCEPT
+{
+    return msl(CharT{'h'});
+}
+
+template <class CharT>
+CONSTCD14
+inline
+auto
+msl(std::kilo) NOEXCEPT
+{
+    return msl(CharT{'k'});
+}
+
+template <class CharT>
+CONSTCD14
+inline
+auto
+msl(std::mega) NOEXCEPT
+{
+    return msl(CharT{'M'});
+}
+
+template <class CharT>
+CONSTCD14
+inline
+auto
+msl(std::giga) NOEXCEPT
+{
+    return msl(CharT{'G'});
+}
+
+template <class CharT>
+CONSTCD14
+inline
+auto
+msl(std::tera) NOEXCEPT
+{
+    return msl(CharT{'T'});
+}
+
+template <class CharT>
+CONSTCD14
+inline
+auto
+msl(std::peta) NOEXCEPT
+{
+    return msl(CharT{'P'});
+}
+
+template <class CharT>
+CONSTCD14
+inline
+auto
+msl(std::exa) NOEXCEPT
+{
+    return msl(CharT{'E'});
+}
+
+template <class CharT, class Period>
+CONSTCD14
+auto
+get_units(Period p)
+{
+    return msl<CharT>(p) + string_literal<CharT, 2>{"s"};
+}
+
+template <class CharT>
+CONSTCD14
+auto
+get_units(std::ratio<1>)
+{
+    return string_literal<CharT, 2>{"s"};
+}
+
+template <class CharT>
+CONSTCD14
+auto
+get_units(std::ratio<60>)
+{
+    return string_literal<CharT, 4>{"min"};
+}
+
+template <class CharT>
+CONSTCD14
+auto
+get_units(std::ratio<3600>)
+{
+    return string_literal<CharT, 2>{"h"};
+}
+
+#else  // __cplusplus < 201402 || (defined(__EDG_VERSION__) && __EDG_VERSION__ <= 411)
+
+inline
+std::string
+to_string(std::uint64_t x)
+{
+    return std::to_string(x);
+}
+
+template <class CharT>
+std::basic_string<CharT>
+to_string(std::uint64_t x)
+{
+    auto y = std::to_string(x);
+    return std::basic_string<CharT>(y.begin(), y.end());
+}
+
+template <class CharT, std::intmax_t N, std::intmax_t D>
+inline
+typename std::enable_if
+<
+    std::ratio<N, D>::type::den != 1,
+    std::basic_string<CharT>
+>::type
+msl(std::ratio<N, D>)
+{
+    using R = typename std::ratio<N, D>::type;
+    return std::basic_string<CharT>(1, '[') + to_string<CharT>(R::num) + CharT{'/'} +
+                                              to_string<CharT>(R::den) + CharT{']'};
+}
+
+template <class CharT, std::intmax_t N, std::intmax_t D>
+inline
+typename std::enable_if
+<
+    std::ratio<N, D>::type::den == 1,
+    std::basic_string<CharT>
+>::type
+msl(std::ratio<N, D>)
+{
+    using R = typename std::ratio<N, D>::type;
+    return std::basic_string<CharT>(1, '[') + to_string<CharT>(R::num) + CharT{']'};
+}
+
+template <class CharT>
+inline
+std::basic_string<CharT>
+msl(std::atto)
+{
+    return {'a'};
+}
+
+template <class CharT>
+inline
+std::basic_string<CharT>
+msl(std::femto)
+{
+    return {'f'};
+}
+
+template <class CharT>
+inline
+std::basic_string<CharT>
+msl(std::pico)
+{
+    return {'p'};
+}
+
+template <class CharT>
+inline
+std::basic_string<CharT>
+msl(std::nano)
+{
+    return {'n'};
+}
+
+template <class CharT>
+inline
+typename std::enable_if
+<
+    std::is_same<CharT, char>::value,
+    std::string
+>::type
+msl(std::micro)
+{
+    return "\xC2\xB5";
+}
+
+template <class CharT>
+inline
+typename std::enable_if
+<
+    !std::is_same<CharT, char>::value,
+    std::basic_string<CharT>
+>::type
+msl(std::micro)
+{
+    return {CharT(static_cast<unsigned char>('\xB5'))};
+}
+
+template <class CharT>
+inline
+std::basic_string<CharT>
+msl(std::milli)
+{
+    return {'m'};
+}
+
+template <class CharT>
+inline
+std::basic_string<CharT>
+msl(std::centi)
+{
+    return {'c'};
+}
+
+template <class CharT>
+inline
+std::basic_string<CharT>
+msl(std::deci)
+{
+    return {'d'};
+}
+
+template <class CharT>
+inline
+std::basic_string<CharT>
+msl(std::deca)
+{
+    return {'d', 'a'};
+}
+
+template <class CharT>
+inline
+std::basic_string<CharT>
+msl(std::hecto)
+{
+    return {'h'};
+}
+
+template <class CharT>
+inline
+std::basic_string<CharT>
+msl(std::kilo)
+{
+    return {'k'};
+}
+
+template <class CharT>
+inline
+std::basic_string<CharT>
+msl(std::mega)
+{
+    return {'M'};
+}
+
+template <class CharT>
+inline
+std::basic_string<CharT>
+msl(std::giga)
+{
+    return {'G'};
+}
+
+template <class CharT>
+inline
+std::basic_string<CharT>
+msl(std::tera)
+{
+    return {'T'};
+}
+
+template <class CharT>
+inline
+std::basic_string<CharT>
+msl(std::peta)
+{
+    return {'P'};
+}
+
+template <class CharT>
+inline
+std::basic_string<CharT>
+msl(std::exa)
+{
+    return {'E'};
+}
+
+template <class CharT, class Period>
+std::basic_string<CharT>
+get_units(Period p)
+{
+    return msl<CharT>(p) + CharT{'s'};
+}
+
+template <class CharT>
+std::basic_string<CharT>
+get_units(std::ratio<1>)
+{
+    return {'s'};
+}
+
+template <class CharT>
+std::basic_string<CharT>
+get_units(std::ratio<60>)
+{
+    return {'m', 'i', 'n'};
+}
+
+template <class CharT>
+std::basic_string<CharT>
+get_units(std::ratio<3600>)
+{
+    return {'h'};
+}
+
+#endif  // __cplusplus < 201402 || (defined(__EDG_VERSION__) && __EDG_VERSION__ <= 411)
+
+template <class CharT, class Traits = std::char_traits<CharT>>
+struct make_string;
+
+template <>
+struct make_string<char>
+{
+    template <class Rep>
+    static
+    std::string
+    from(Rep n)
+    {
+        return std::to_string(n);
+    }
+};
+
+template <class Traits>
+struct make_string<char, Traits>
+{
+    template <class Rep>
+    static
+    std::basic_string<char, Traits>
+    from(Rep n)
+    {
+        auto s = std::to_string(n);
+        return std::basic_string<char, Traits>(s.begin(), s.end());
+    }
+};
+
+template <>
+struct make_string<wchar_t>
+{
+    template <class Rep>
+    static
+    std::wstring
+    from(Rep n)
+    {
+        return std::to_wstring(n);
+    }
+};
+
+template <class Traits>
+struct make_string<wchar_t, Traits>
+{
+    template <class Rep>
+    static
+    std::basic_string<wchar_t, Traits>
+    from(Rep n)
+    {
+        auto s = std::to_wstring(n);
+        return std::basic_string<wchar_t, Traits>(s.begin(), s.end());
+    }
+};
+
+}  // namespace detail
+
+template <class CharT, class Traits, class Rep, class Period>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os,
+           const std::chrono::duration<Rep, Period>& d)
+{
+    using namespace detail;
+    return os << make_string<CharT, Traits>::from(d.count()) +
+                 get_units<CharT>(typename Period::type{});
+}
+
+}  // namespace date
+}  // namespace util
+}  // namespace arrow
+
+
+#ifdef __GNUC__
+# pragma GCC diagnostic pop
+#endif
+
+
+#endif  // DATE_H
diff --git a/cpp/src/arrow/vendored/datetime/ios.h b/cpp/src/arrow/vendored/datetime/ios.h
new file mode 100644
index 0000000000000..23dc1671aa9fb
--- /dev/null
+++ b/cpp/src/arrow/vendored/datetime/ios.h
@@ -0,0 +1,56 @@
+//
+//  ios.h
+//  DateTimeLib
+//
+// The MIT License (MIT)
+//
+// Copyright (c) 2016 Alexander Kormanovsky
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef ios_hpp
+#define ios_hpp
+
+#if __APPLE__
+# include <TargetConditionals.h>
+# if TARGET_OS_IPHONE
+#   include <string>
+
+    namespace arrow
+    {
+    namespace util
+    {
+    namespace date
+    {
+    namespace iOSUtils
+    {
+
+    std::string get_tzdata_path();
+    std::string get_current_timezone();
+
+    }  // namespace iOSUtils
+    }  // namespace date
+    }  // namespace util
+    }  // namespace arrow
+
+# endif  // TARGET_OS_IPHONE
+#else   // !__APPLE__
+# define TARGET_OS_IPHONE 0
+#endif  // !__APPLE__
+#endif // ios_hpp
diff --git a/cpp/src/arrow/vendored/datetime/tz.cpp b/cpp/src/arrow/vendored/datetime/tz.cpp
new file mode 100644
index 0000000000000..e05423e13c61c
--- /dev/null
+++ b/cpp/src/arrow/vendored/datetime/tz.cpp
@@ -0,0 +1,3797 @@
+// The MIT License (MIT)
+//
+// Copyright (c) 2015, 2016, 2017 Howard Hinnant
+// Copyright (c) 2015 Ville Voutilainen
+// Copyright (c) 2016 Alexander Kormanovsky
+// Copyright (c) 2016, 2017 Jiangang Zhuang
+// Copyright (c) 2017 Nicolas Veloz Savino
+// Copyright (c) 2017 Florian Dang
+// Copyright (c) 2017 Aaron Bishop
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+// Our apologies.  When the previous paragraph was written, lowercase had not yet
+// been invented (that would involve another several millennia of evolution).
+// We did not mean to shout.
+
+// wesm: This is required so that symbols are properly exported from the DLL
+#include "visibility.h"
+
+#ifdef _WIN32
+   // windows.h will be included directly and indirectly (e.g. by curl).
+   // We need to define these macros to prevent windows.h bringing in
+   // more than we need and do it early so windows.h doesn't get included
+   // without these macros having been defined.
+   // min/max macros interfere with the C++ versions.
+#  ifndef NOMINMAX
+#    define NOMINMAX
+#  endif
+   // We don't need all that Windows has to offer.
+#  ifndef WIN32_LEAN_AND_MEAN
+#    define WIN32_LEAN_AND_MEAN
+#  endif
+
+   // for wcstombs
+#  ifndef _CRT_SECURE_NO_WARNINGS
+#    define _CRT_SECURE_NO_WARNINGS
+#  endif
+
+   // None of this happens with the MS SDK (at least VS14 which I tested), but:
+   // Compiling with mingw, we get "error: 'KF_FLAG_DEFAULT' was not declared in this scope."
+   // and error: 'SHGetKnownFolderPath' was not declared in this scope.".
+   // It seems when using mingw NTDDI_VERSION is undefined and that
+   // causes KNOWN_FOLDER_FLAG and the KF_ flags to not get defined.
+   // So we must define NTDDI_VERSION to get those flags on mingw.
+   // The docs say though here:
+   // https://msdn.microsoft.com/en-nz/library/windows/desktop/aa383745(v=vs.85).aspx
+   // that "If you define NTDDI_VERSION, you must also define _WIN32_WINNT."
+   // So we declare we require Vista or greater.
+#  ifdef __MINGW32__
+
+#    ifndef NTDDI_VERSION
+#      define NTDDI_VERSION 0x06000000
+#      define _WIN32_WINNT _WIN32_WINNT_VISTA
+#    elif NTDDI_VERSION < 0x06000000
+#      warning "If this fails to compile NTDDI_VERSION may be to low. See comments above."
+#    endif
+     // But once we define the values above we then get this linker error:
+     // "tz.cpp:(.rdata$.refptr.FOLDERID_Downloads[.refptr.FOLDERID_Downloads]+0x0): "
+     //     "undefined reference to `FOLDERID_Downloads'"
+     // which #include <initguid.h> cures see:
+     // https://support.microsoft.com/en-us/kb/130869
+#    include <initguid.h>
+     // But with <initguid.h> included, the error moves on to:
+     // error: 'FOLDERID_Downloads' was not declared in this scope
+     // Which #include <knownfolders.h> cures.
+#    include <knownfolders.h>
+
+#  endif  // __MINGW32__
+
+#  include <windows.h>
+#endif  // _WIN32
+
+#include "tz_private.h"
+
+#ifdef __APPLE__
+#  include "ios.h"
+#else
+#  define TARGET_OS_IPHONE 0
+#endif
+
+#if USE_OS_TZDB
+#  include <dirent.h>
+#endif
+#include <algorithm>
+#include <cctype>
+#include <cstdlib>
+#include <cstring>
+#include <exception>
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <memory>
+#if USE_OS_TZDB
+#  include <queue>
+#endif
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+#include <sys/stat.h>
+
+// unistd.h is used on some platforms as part of the the means to get
+// the current time zone. On Win32 windows.h provides a means to do it.
+// gcc/mingw supports unistd.h on Win32 but MSVC does not.
+
+#ifdef _WIN32
+#  include <io.h> // _unlink etc.
+
+#  if defined(__clang__)
+    struct IUnknown;    // fix for issue with static_cast<> in objbase.h
+                        //   (see https://github.com/philsquared/Catch/issues/690)
+#  endif
+
+#  include <shlobj.h> // CoTaskFree, ShGetKnownFolderPath etc.
+#  if HAS_REMOTE_API
+#    include <direct.h> // _mkdir
+#    include <shellapi.h> // ShFileOperation etc.
+#  endif  // HAS_REMOTE_API
+#else   // !_WIN32
+#  include <unistd.h>
+#  if !USE_OS_TZDB
+#    include <wordexp.h>
+#  endif
+#  include <limits.h>
+#  include <string.h>
+#  if !USE_SHELL_API
+#    include <sys/stat.h>
+#    include <sys/fcntl.h>
+#    include <dirent.h>
+#    include <cstring>
+#    include <sys/wait.h>
+#    include <sys/types.h>
+#  endif //!USE_SHELL_API
+#endif  // !_WIN32
+
+
+#if HAS_REMOTE_API
+   // Note curl includes windows.h so we must include curl AFTER definitions of things
+   // that affect windows.h such as NOMINMAX.
+#if defined(_MSC_VER) && defined(SHORTENED_CURL_INCLUDE)
+   // For rmt_curl nuget package
+#  include <curl.h>
+#else
+#  include <curl/curl.h>
+#endif
+#endif
+
+#ifdef _WIN32
+static CONSTDATA char folder_delimiter = '\\';
+#else   // !_WIN32
+static CONSTDATA char folder_delimiter = '/';
+#endif  // !_WIN32
+
+#if defined(__GNUC__) && __GNUC__ < 5
+   // GCC 4.9 Bug 61489 Wrong warning with -Wmissing-field-initializers
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#endif  // defined(__GNUC__) && __GNUC__ < 5
+
+#if !USE_OS_TZDB
+
+#  ifdef _WIN32
+
+namespace
+{
+    struct task_mem_deleter
+    {
+        void operator()(wchar_t buf[])
+        {
+            if (buf != nullptr)
+                CoTaskMemFree(buf);
+        }
+    };
+    using co_task_mem_ptr = std::unique_ptr<wchar_t[], task_mem_deleter>;
+}
+
+// We might need to know certain locations even if not using the remote API,
+// so keep these routines out of that block for now.
+static
+std::string
+get_known_folder(const GUID& folderid)
+{
+    std::string folder;
+    PWSTR pfolder = nullptr;
+    HRESULT hr = SHGetKnownFolderPath(folderid, KF_FLAG_DEFAULT, nullptr, &pfolder);
+    if (SUCCEEDED(hr))
+    {
+        co_task_mem_ptr folder_ptr(pfolder);
+        folder = std::string(folder_ptr.get(), folder_ptr.get() + wcslen(folder_ptr.get()));
+    }
+    return folder;
+}
+
+// Usually something like "c:\Users\username\Downloads".
+static
+std::string
+get_download_folder()
+{
+    return get_known_folder(FOLDERID_Downloads);
+}
+
+#  else // !_WIN32
+
+#    if !defined(INSTALL) || HAS_REMOTE_API
+
+static
+std::string
+expand_path(std::string path)
+{
+#      if TARGET_OS_IPHONE
+    return date::iOSUtils::get_tzdata_path();
+#      else  // !TARGET_OS_IPHONE
+    ::wordexp_t w{};
+    std::unique_ptr<::wordexp_t, void(*)(::wordexp_t*)> hold{&w, ::wordfree};
+    ::wordexp(path.c_str(), &w, 0);
+    if (w.we_wordc != 1)
+        throw std::runtime_error("Cannot expand path: " + path);
+    path = w.we_wordv[0];
+    return path;
+#      endif  // !TARGET_OS_IPHONE
+}
+
+static
+std::string
+get_download_folder()
+{
+    return expand_path("~/Downloads");
+}
+
+#    endif // !defined(INSTALL) || HAS_REMOTE_API
+
+#  endif  // !_WIN32
+
+#endif  // !USE_OS_TZDB
+
+namespace arrow
+{
+namespace util
+{
+namespace date
+{
+// +---------------------+
+// | Begin Configuration |
+// +---------------------+
+
+using namespace detail;
+
+#if !USE_OS_TZDB
+
+static
+std::string&
+access_install()
+{
+    static std::string install
+#ifndef INSTALL
+
+    = get_download_folder() + folder_delimiter + "tzdata";
+
+#else   // !INSTALL
+
+#  define STRINGIZEIMP(x) #x
+#  define STRINGIZE(x) STRINGIZEIMP(x)
+
+    = STRINGIZE(INSTALL) + std::string(1, folder_delimiter) + "tzdata";
+
+    #undef STRINGIZEIMP
+    #undef STRINGIZE
+#endif  // !INSTALL
+
+    return install;
+}
+
+void
+set_install(const std::string& s)
+{
+    access_install() = s;
+}
+
+static
+const std::string&
+get_install()
+{
+    static const std::string& ref = access_install();
+    return ref;
+}
+
+#if HAS_REMOTE_API
+static
+std::string
+get_download_gz_file(const std::string& version)
+{
+    auto file = get_install() + version + ".tar.gz";
+    return file;
+}
+#endif  // HAS_REMOTE_API
+
+#endif  // !USE_OS_TZDB
+
+// These can be used to reduce the range of the database to save memory
+CONSTDATA auto min_year = date::year::min();
+CONSTDATA auto max_year = date::year::max();
+
+CONSTDATA auto min_day = date::jan/1;
+CONSTDATA auto max_day = date::dec/31;
+
+#if USE_OS_TZDB
+
+CONSTCD14 const sys_seconds min_seconds = sys_days(min_year/min_day);
+
+#endif  // USE_OS_TZDB
+
+#ifndef _WIN32
+
+static
+std::string
+discover_tz_dir()
+{
+    struct stat sb;
+    using namespace std;
+#  ifndef __APPLE__
+    CONSTDATA auto tz_dir_default = "/usr/share/zoneinfo";
+    CONSTDATA auto tz_dir_buildroot = "/usr/share/zoneinfo/uclibc";
+
+    // Check special path which is valid for buildroot with uclibc builds
+    if(stat(tz_dir_buildroot, &sb) == 0 && S_ISDIR(sb.st_mode))
+        return tz_dir_buildroot;
+    else if(stat(tz_dir_default, &sb) == 0 && S_ISDIR(sb.st_mode))
+        return tz_dir_default;
+    else
+        throw runtime_error("discover_tz_dir failed to find zoneinfo\n");
+#  else  // __APPLE__
+#      if TARGET_OS_IPHONE
+    return "/var/db/timezone/zoneinfo";
+#      else
+    CONSTDATA auto timezone = "/etc/localtime";
+    if (!(lstat(timezone, &sb) == 0 && S_ISLNK(sb.st_mode) && sb.st_size > 0))
+        throw runtime_error("discover_tz_dir failed\n");
+    string result;
+    char rp[PATH_MAX+1] = {};
+    if (readlink(timezone, rp, sizeof(rp)-1) > 0)
+        result = string(rp);
+    else
+        throw system_error(errno, system_category(), "readlink() failed");
+    auto i = result.find("zoneinfo");
+    if (i == string::npos)
+        throw runtime_error("discover_tz_dir failed to find zoneinfo\n");
+    i = result.find('/', i);
+    if (i == string::npos)
+        throw runtime_error("discover_tz_dir failed to find '/'\n");
+    return result.substr(0, i);
+#      endif
+#  endif  // __APPLE__
+}
+
+static
+const std::string&
+get_tz_dir()
+{
+    static const std::string tz_dir = discover_tz_dir();
+    return tz_dir;
+}
+
+#endif
+
+// +-------------------+
+// | End Configuration |
+// +-------------------+
+
+namespace detail
+{
+struct undocumented {explicit undocumented() = default;};
+}
+
+#ifndef _MSC_VER
+static_assert(min_year <= max_year, "Configuration error");
+#endif
+
+static std::unique_ptr<tzdb> init_tzdb();
+
+tzdb_list::~tzdb_list()
+{
+    const tzdb* ptr = head_;
+    head_ = nullptr;
+    while (ptr != nullptr)
+    {
+        auto next = ptr->next;
+        delete ptr;
+        ptr = next;
+    }
+}
+
+tzdb_list::tzdb_list(tzdb_list&& x) noexcept
+   : head_{x.head_.exchange(nullptr)}
+{
+}
+
+void
+tzdb_list::push_front(tzdb* tzdb) noexcept
+{
+    tzdb->next = head_;
+    head_ = tzdb;
+}
+
+tzdb_list::const_iterator
+tzdb_list::erase_after(const_iterator p) noexcept
+{
+    auto t = p.p_->next;
+    p.p_->next = p.p_->next->next;
+    delete t;
+    return ++p;
+}
+
+struct tzdb_list::undocumented_helper
+{
+    static void push_front(tzdb_list& db_list, tzdb* tzdb) noexcept
+    {
+        db_list.push_front(tzdb);
+    }
+};
+
+static
+tzdb_list
+create_tzdb()
+{
+    tzdb_list tz_db;
+    tzdb_list::undocumented_helper::push_front(tz_db, init_tzdb().release());
+    return tz_db;
+}
+
+tzdb_list&
+get_tzdb_list()
+{
+    static tzdb_list tz_db = create_tzdb();
+    return tz_db;
+}
+
+#if !USE_OS_TZDB
+
+#ifdef _WIN32
+
+static
+void
+sort_zone_mappings(std::vector<date::detail::timezone_mapping>& mappings)
+{
+    std::sort(mappings.begin(), mappings.end(),
+        [](const date::detail::timezone_mapping& lhs,
+           const date::detail::timezone_mapping& rhs)->bool
+    {
+        auto other_result = lhs.other.compare(rhs.other);
+        if (other_result < 0)
+            return true;
+        else if (other_result == 0)
+        {
+            auto territory_result = lhs.territory.compare(rhs.territory);
+            if (territory_result < 0)
+                return true;
+            else if (territory_result == 0)
+            {
+                if (lhs.type < rhs.type)
+                    return true;
+            }
+        }
+        return false;
+    });
+}
+
+static
+bool
+native_to_standard_timezone_name(const std::string& native_tz_name,
+                                 std::string& standard_tz_name)
+{
+    // TOOD! Need be a case insensitive compare?
+    if (native_tz_name == "UTC")
+    {
+        standard_tz_name = "Etc/UTC";
+        return true;
+    }
+    standard_tz_name.clear();
+    // TODO! we can improve on linear search.
+    const auto& mappings = date::get_tzdb().mappings;
+    for (const auto& tzm : mappings)
+    {
+        if (tzm.other == native_tz_name)
+        {
+            standard_tz_name = tzm.type;
+            return true;
+        }
+    }
+    return false;
+}
+
+// Parse this XML file:
+// http://unicode.org/repos/cldr/trunk/common/supplemental/windowsZones.xml
+// The parsing method is designed to be simple and quick. It is not overly
+// forgiving of change but it should diagnose basic format issues.
+// See timezone_mapping structure for more info.
+static
+std::vector<detail::timezone_mapping>
+load_timezone_mappings_from_xml_file(const std::string& input_path)
+{
+    std::size_t line_num = 0;
+    std::vector<detail::timezone_mapping> mappings;
+    std::string line;
+
+    std::ifstream is(input_path);
+    if (!is.is_open())
+    {
+        // We don't emit file exceptions because that's an implementation detail.
+        std::string msg = "Error opening time zone mapping file \"";
+        msg += input_path;
+        msg += "\".";
+        throw std::runtime_error(msg);
+    }
+
+    auto error = [&input_path, &line_num](const char* info)
+    {
+        std::string msg = "Error loading time zone mapping file \"";
+        msg += input_path;
+        msg += "\" at line ";
+        msg += std::to_string(line_num);
+        msg += ": ";
+        msg += info;
+        throw std::runtime_error(msg);
+    };
+    // [optional space]a="b"
+    auto read_attribute = [&line, &error]
+                          (const char* name, std::string& value, std::size_t startPos)
+                          ->std::size_t
+    {
+        value.clear();
+        // Skip leading space before attribute name.
+        std::size_t spos = line.find_first_not_of(' ', startPos);
+        if (spos == std::string::npos)
+            spos = startPos;
+        // Assume everything up to next = is the attribute name
+        // and that an = will always delimit that.
+        std::size_t epos = line.find('=', spos);
+        if (epos == std::string::npos)
+            error("Expected \'=\' right after attribute name.");
+        std::size_t name_len = epos - spos;
+        // Expect the name we find matches the name we expect.
+        if (line.compare(spos, name_len, name) != 0)
+        {
+            std::string msg;
+            msg = "Expected attribute name \'";
+            msg += name;
+            msg += "\' around position ";
+            msg += std::to_string(spos);
+            msg += " but found something else.";
+            error(msg.c_str());
+        }
+        ++epos; // Skip the '=' that is after the attribute name.
+        spos = epos;
+        if (spos < line.length() && line[spos] == '\"')
+            ++spos; // Skip the quote that is before the attribute value.
+        else
+        {
+            std::string msg = "Expected '\"' to begin value of attribute \'";
+            msg += name;
+            msg += "\'.";
+            error(msg.c_str());
+        }
+        epos = line.find('\"', spos);
+        if (epos == std::string::npos)
+        {
+            std::string msg = "Expected '\"' to end value of attribute \'";
+            msg += name;
+            msg += "\'.";
+            error(msg.c_str());
+        }
+        // Extract everything in between the quotes. Note no escaping is done.
+        std::size_t value_len = epos - spos;
+        value.assign(line, spos, value_len);
+        ++epos; // Skip the quote that is after the attribute value;
+        return epos;
+    };
+
+    // Quick but not overly forgiving XML mapping file processing.
+    bool mapTimezonesOpenTagFound = false;
+    bool mapTimezonesCloseTagFound = false;
+    std::size_t mapZonePos = std::string::npos;
+    std::size_t mapTimezonesPos = std::string::npos;
+    CONSTDATA char mapTimeZonesOpeningTag[] = { "<mapTimezones " };
+    CONSTDATA char mapZoneOpeningTag[] = { "<mapZone " };
+    CONSTDATA std::size_t mapZoneOpeningTagLen = sizeof(mapZoneOpeningTag) /
+                                                 sizeof(mapZoneOpeningTag[0]) - 1;
+    while (!mapTimezonesOpenTagFound)
+    {
+        std::getline(is, line);
+        ++line_num;
+        if (is.eof())
+        {
+            // If there is no mapTimezones tag is it an error?
+            // Perhaps if there are no mapZone mappings it might be ok for
+            // its parent mapTimezones element to be missing?
+            // We treat this as an error though on the assumption that if there
+            // really are no mappings we should still get a mapTimezones parent
+            // element but no mapZone elements inside. Assuming we must
+            // find something will hopefully at least catch more drastic formatting
+            // changes or errors than if we don't do this and assume nothing found.
+            error("Expected a mapTimezones opening tag.");
+        }
+        mapTimezonesPos = line.find(mapTimeZonesOpeningTag);
+        mapTimezonesOpenTagFound = (mapTimezonesPos != std::string::npos);
+    }
+
+    // NOTE: We could extract the version info that follows the opening
+    // mapTimezones tag and compare that to the version of other data we have.
+    // I would have expected them to be kept in synch but testing has shown
+    // it typically does not match anyway. So what's the point?
+    while (!mapTimezonesCloseTagFound)
+    {
+        std::ws(is);
+        std::getline(is, line);
+        ++line_num;
+        if (is.eof())
+            error("Expected a mapTimezones closing tag.");
+        if (line.empty())
+            continue;
+        mapZonePos = line.find(mapZoneOpeningTag);
+        if (mapZonePos != std::string::npos)
+        {
+            mapZonePos += mapZoneOpeningTagLen;
+            detail::timezone_mapping zm{};
+            std::size_t pos = read_attribute("other", zm.other, mapZonePos);
+            pos = read_attribute("territory", zm.territory, pos);
+            read_attribute("type", zm.type, pos);
+            mappings.push_back(std::move(zm));
+
+            continue;
+        }
+        mapTimezonesPos = line.find("</mapTimezones>");
+        mapTimezonesCloseTagFound = (mapTimezonesPos != std::string::npos);
+        if (!mapTimezonesCloseTagFound)
+        {
+            std::size_t commentPos = line.find("<!--");
+            if (commentPos == std::string::npos)
+                error("Unexpected mapping record found. A xml mapZone or comment "
+                      "attribute or mapTimezones closing tag was expected.");
+        }
+    }
+
+    is.close();
+    return mappings;
+}
+
+#endif  // _WIN32
+
+// Parsing helpers
+
+static
+std::string
+parse3(std::istream& in)
+{
+    std::string r(3, ' ');
+    ws(in);
+    r[0] = static_cast<char>(in.get());
+    r[1] = static_cast<char>(in.get());
+    r[2] = static_cast<char>(in.get());
+    return r;
+}
+
+static
+unsigned
+parse_dow(std::istream& in)
+{
+    CONSTDATA char*const dow_names[] =
+        {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
+    auto s = parse3(in);
+    auto dow = std::find(std::begin(dow_names), std::end(dow_names), s) - dow_names;
+    if (dow >= std::end(dow_names) - std::begin(dow_names))
+        throw std::runtime_error("oops: bad dow name: " + s);
+    return static_cast<unsigned>(dow);
+}
+
+static
+unsigned
+parse_month(std::istream& in)
+{
+    CONSTDATA char*const month_names[] =
+        {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
+         "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
+    auto s = parse3(in);
+    auto m = std::find(std::begin(month_names), std::end(month_names), s) - month_names;
+    if (m >= std::end(month_names) - std::begin(month_names))
+        throw std::runtime_error("oops: bad month name: " + s);
+    return static_cast<unsigned>(++m);
+}
+
+static
+std::chrono::seconds
+parse_unsigned_time(std::istream& in)
+{
+    using namespace std::chrono;
+    int x;
+    in >> x;
+    auto r = seconds{hours{x}};
+    if (!in.eof() && in.peek() == ':')
+    {
+        in.get();
+        in >> x;
+        r += minutes{x};
+        if (!in.eof() && in.peek() == ':')
+        {
+            in.get();
+            in >> x;
+            r += seconds{x};
+        }
+    }
+    return r;
+}
+
+static
+std::chrono::seconds
+parse_signed_time(std::istream& in)
+{
+    ws(in);
+    auto sign = 1;
+    if (in.peek() == '-')
+    {
+        sign = -1;
+        in.get();
+    }
+    else if (in.peek() == '+')
+        in.get();
+    return sign * parse_unsigned_time(in);
+}
+
+// MonthDayTime
+
+detail::MonthDayTime::MonthDayTime(local_seconds tp, tz timezone)
+    : zone_(timezone)
+{
+    using namespace date;
+    const auto dp = date::floor<days>(tp);
+    const auto hms = make_time(tp - dp);
+    const auto ymd = year_month_day(dp);
+    u = ymd.month() / ymd.day();
+    h_ = hms.hours();
+    m_ = hms.minutes();
+    s_ = hms.seconds();
+}
+
+detail::MonthDayTime::MonthDayTime(const date::month_day& md, tz timezone)
+    : zone_(timezone)
+{
+    u = md;
+}
+
+date::day
+detail::MonthDayTime::day() const
+{
+    switch (type_)
+    {
+    case month_day:
+        return u.month_day_.day();
+    case month_last_dow:
+        return date::day{31};
+    case lteq:
+    case gteq:
+        break;
+    }
+    return u.month_day_weekday_.month_day_.day();
+}
+
+date::month
+detail::MonthDayTime::month() const
+{
+    switch (type_)
+    {
+    case month_day:
+        return u.month_day_.month();
+    case month_last_dow:
+        return u.month_weekday_last_.month();
+    case lteq:
+    case gteq:
+        break;
+    }
+    return u.month_day_weekday_.month_day_.month();
+}
+
+int
+detail::MonthDayTime::compare(date::year y, const MonthDayTime& x, date::year yx,
+                      std::chrono::seconds offset, std::chrono::minutes prev_save) const
+{
+    if (zone_ != x.zone_)
+    {
+        auto dp0 = to_sys_days(y);
+        auto dp1 = x.to_sys_days(yx);
+        if (std::abs((dp0-dp1).count()) > 1)
+            return dp0 < dp1 ? -1 : 1;
+        if (zone_ == tz::local)
+        {
+            auto tp0 = to_time_point(y) - prev_save;
+            if (x.zone_ == tz::utc)
+                tp0 -= offset;
+            auto tp1 = x.to_time_point(yx);
+            return tp0 < tp1 ? -1 : tp0 == tp1 ? 0 : 1;
+        }
+        else if (zone_ == tz::standard)
+        {
+            auto tp0 = to_time_point(y);
+            auto tp1 = x.to_time_point(yx);
+            if (x.zone_ == tz::local)
+                tp1 -= prev_save;
+            else
+                tp0 -= offset;
+            return tp0 < tp1 ? -1 : tp0 == tp1 ? 0 : 1;
+        }
+        // zone_ == tz::utc
+        auto tp0 = to_time_point(y);
+        auto tp1 = x.to_time_point(yx);
+        if (x.zone_ == tz::local)
+            tp1 -= offset + prev_save;
+        else
+            tp1 -= offset;
+        return tp0 < tp1 ? -1 : tp0 == tp1 ? 0 : 1;
+    }
+    auto const t0 = to_time_point(y);
+    auto const t1 = x.to_time_point(yx);
+    return t0 < t1 ? -1 : t0 == t1 ? 0 : 1;
+}
+
+sys_seconds
+detail::MonthDayTime::to_sys(date::year y, std::chrono::seconds offset,
+                     std::chrono::seconds save) const
+{
+    using namespace date;
+    using namespace std::chrono;
+    auto until_utc = to_time_point(y);
+    if (zone_ == tz::standard)
+        until_utc -= offset;
+    else if (zone_ == tz::local)
+        until_utc -= offset + save;
+    return until_utc;
+}
+
+detail::MonthDayTime::U&
+detail::MonthDayTime::U::operator=(const date::month_day& x)
+{
+    month_day_ = x;
+    return *this;
+}
+
+detail::MonthDayTime::U&
+detail::MonthDayTime::U::operator=(const date::month_weekday_last& x)
+{
+    month_weekday_last_ = x;
+    return *this;
+}
+
+detail::MonthDayTime::U&
+detail::MonthDayTime::U::operator=(const pair& x)
+{
+    month_day_weekday_ = x;
+    return *this;
+}
+
+date::sys_days
+detail::MonthDayTime::to_sys_days(date::year y) const
+{
+    using namespace std::chrono;
+    using namespace date;
+    switch (type_)
+    {
+    case month_day:
+        return sys_days(y/u.month_day_);
+    case month_last_dow:
+        return sys_days(y/u.month_weekday_last_);
+    case lteq:
+        {
+            auto const x = y/u.month_day_weekday_.month_day_;
+            auto const wd1 = weekday(static_cast<sys_days>(x));
+            auto const wd0 = u.month_day_weekday_.weekday_;
+            return sys_days(x) - (wd1-wd0);
+        }
+    case gteq:
+        break;
+    }
+    auto const x = y/u.month_day_weekday_.month_day_;
+    auto const wd1 = u.month_day_weekday_.weekday_;
+    auto const wd0 = weekday(static_cast<sys_days>(x));
+    return sys_days(x) + (wd1-wd0);
+}
+
+sys_seconds
+detail::MonthDayTime::to_time_point(date::year y) const
+{
+    // Add seconds first to promote to largest rep early to prevent overflow
+    return to_sys_days(y) + s_ + h_ + m_;
+}
+
+void
+detail::MonthDayTime::canonicalize(date::year y)
+{
+    using namespace std::chrono;
+    using namespace date;
+    switch (type_)
+    {
+    case month_day:
+        return;
+    case month_last_dow:
+        {
+            auto const ymd = year_month_day(sys_days(y/u.month_weekday_last_));
+            u.month_day_ = ymd.month()/ymd.day();
+            type_ = month_day;
+            return;
+        }
+    case lteq:
+        {
+            auto const x = y/u.month_day_weekday_.month_day_;
+            auto const wd1 = weekday(static_cast<sys_days>(x));
+            auto const wd0 = u.month_day_weekday_.weekday_;
+            auto const ymd = year_month_day(sys_days(x) - (wd1-wd0));
+            u.month_day_ = ymd.month()/ymd.day();
+            type_ = month_day;
+            return;
+        }
+    case gteq:
+        {
+            auto const x = y/u.month_day_weekday_.month_day_;
+            auto const wd1 = u.month_day_weekday_.weekday_;
+            auto const wd0 = weekday(static_cast<sys_days>(x));
+            auto const ymd = year_month_day(sys_days(x) + (wd1-wd0));
+            u.month_day_ = ymd.month()/ymd.day();
+            type_ = month_day;
+            return;
+        }
+    }
+}
+
+std::istream&
+detail::operator>>(std::istream& is, MonthDayTime& x)
+{
+    using namespace date;
+    using namespace std::chrono;
+    assert(((std::ios::failbit | std::ios::badbit) & is.exceptions()) ==
+            (std::ios::failbit | std::ios::badbit));
+    x = MonthDayTime{};
+    if (!is.eof() && ws(is) && !is.eof() && is.peek() != '#')
+    {
+        auto m = parse_month(is);
+        if (!is.eof() && ws(is) && !is.eof() && is.peek() != '#')
+        {
+            if (is.peek() == 'l')
+            {
+                for (int i = 0; i < 4; ++i)
+                    is.get();
+                auto dow = parse_dow(is);
+                x.type_ = MonthDayTime::month_last_dow;
+                x.u = date::month(m)/weekday(dow)[last];
+            }
+            else if (std::isalpha(is.peek()))
+            {
+                auto dow = parse_dow(is);
+                char c{};
+                is >> c;
+                if (c == '<' || c == '>')
+                {
+                    char c2{};
+                    is >> c2;
+                    if (c2 != '=')
+                        throw std::runtime_error(std::string("bad operator: ") + c + c2);
+                    int d;
+                    is >> d;
+                    if (d < 1 || d > 31)
+                        throw std::runtime_error(std::string("bad operator: ") + c + c2
+                                 + std::to_string(d));
+                    x.type_ = c == '<' ? MonthDayTime::lteq : MonthDayTime::gteq;
+                    x.u = MonthDayTime::pair{ date::month(m) / d, date::weekday(dow) };
+                }
+                else
+                    throw std::runtime_error(std::string("bad operator: ") + c);
+            }
+            else  // if (std::isdigit(is.peek())
+            {
+                int d;
+                is >> d;
+                if (d < 1 || d > 31)
+                    throw std::runtime_error(std::string("day of month: ")
+                             + std::to_string(d));
+                x.type_ = MonthDayTime::month_day;
+                x.u = date::month(m)/d;
+            }
+            if (!is.eof() && ws(is) && !is.eof() && is.peek() != '#')
+            {
+                int t;
+                is >> t;
+                x.h_ = hours{t};
+                if (!is.eof() && is.peek() == ':')
+                {
+                    is.get();
+                    is >> t;
+                    x.m_ = minutes{t};
+                    if (!is.eof() && is.peek() == ':')
+                    {
+                        is.get();
+                        is >> t;
+                        x.s_ = seconds{t};
+                    }
+                }
+                if (!is.eof() && std::isalpha(is.peek()))
+                {
+                    char c;
+                    is >> c;
+                    switch (c)
+                    {
+                    case 's':
+                        x.zone_ = tz::standard;
+                        break;
+                    case 'u':
+                        x.zone_ = tz::utc;
+                        break;
+                    }
+                }
+            }
+        }
+        else
+        {
+            x.u = month{m}/1;
+        }
+    }
+    return is;
+}
+
+std::ostream&
+detail::operator<<(std::ostream& os, const MonthDayTime& x)
+{
+    switch (x.type_)
+    {
+    case MonthDayTime::month_day:
+        os << x.u.month_day_ << "                  ";
+        break;
+    case MonthDayTime::month_last_dow:
+        os << x.u.month_weekday_last_ << "           ";
+        break;
+    case MonthDayTime::lteq:
+        os << x.u.month_day_weekday_.weekday_ << " on or before "
+           << x.u.month_day_weekday_.month_day_ << "  ";
+        break;
+    case MonthDayTime::gteq:
+        if ((static_cast<unsigned>(x.day()) - 1) % 7 == 0)
+        {
+            os << (x.u.month_day_weekday_.month_day_.month() /
+                   x.u.month_day_weekday_.weekday_[
+                       (static_cast<unsigned>(x.day()) - 1)/7+1]) << "              ";
+        }
+        else
+        {
+            os << x.u.month_day_weekday_.weekday_ << " on or after "
+               << x.u.month_day_weekday_.month_day_ << "  ";
+        }
+        break;
+    }
+    os << date::make_time(x.s_ + x.h_ + x.m_);
+    if (x.zone_ == tz::utc)
+        os << "UTC   ";
+    else if (x.zone_ == tz::standard)
+        os << "STD   ";
+    else
+        os << "      ";
+    return os;
+}
+
+// Rule
+
+detail::Rule::Rule(const std::string& s)
+{
+    try
+    {
+        using namespace date;
+        using namespace std::chrono;
+        std::istringstream in(s);
+        in.exceptions(std::ios::failbit | std::ios::badbit);
+        std::string word;
+        in >> word >> name_;
+        int x;
+        ws(in);
+        if (std::isalpha(in.peek()))
+        {
+            in >> word;
+            if (word == "min")
+            {
+                starting_year_ = year::min();
+            }
+            else
+                throw std::runtime_error("Didn't find expected word: " + word);
+        }
+        else
+        {
+            in >> x;
+            starting_year_ = year{x};
+        }
+        std::ws(in);
+        if (std::isalpha(in.peek()))
+        {
+            in >> word;
+            if (word == "only")
+            {
+                ending_year_ = starting_year_;
+            }
+            else if (word == "max")
+            {
+                ending_year_ = year::max();
+            }
+            else
+                throw std::runtime_error("Didn't find expected word: " + word);
+        }
+        else
+        {
+            in >> x;
+            ending_year_ = year{x};
+        }
+        in >> word;  // TYPE (always "-")
+        assert(word == "-");
+        in >> starting_at_;
+        save_ = duration_cast<minutes>(parse_signed_time(in));
+        in >> abbrev_;
+        if (abbrev_ == "-")
+            abbrev_.clear();
+        assert(hours{-1} <= save_ && save_ <= hours{2});
+    }
+    catch (...)
+    {
+        std::cerr << s << '\n';
+        std::cerr << *this << '\n';
+        throw;
+    }
+}
+
+detail::Rule::Rule(const Rule& r, date::year starting_year, date::year ending_year)
+    : name_(r.name_)
+    , starting_year_(starting_year)
+    , ending_year_(ending_year)
+    , starting_at_(r.starting_at_)
+    , save_(r.save_)
+    , abbrev_(r.abbrev_)
+{
+}
+
+bool
+detail::operator==(const Rule& x, const Rule& y)
+{
+    if (std::tie(x.name_, x.save_, x.starting_year_, x.ending_year_) ==
+        std::tie(y.name_, y.save_, y.starting_year_, y.ending_year_))
+        return x.month() == y.month() && x.day() == y.day();
+    return false;
+}
+
+bool
+detail::operator<(const Rule& x, const Rule& y)
+{
+    using namespace std::chrono;
+    auto const xm = x.month();
+    auto const ym = y.month();
+    if (std::tie(x.name_, x.starting_year_, xm, x.ending_year_) <
+        std::tie(y.name_, y.starting_year_, ym, y.ending_year_))
+        return true;
+    if (std::tie(x.name_, x.starting_year_, xm, x.ending_year_) >
+        std::tie(y.name_, y.starting_year_, ym, y.ending_year_))
+        return false;
+    return x.day() < y.day();
+}
+
+bool
+detail::operator==(const Rule& x, const date::year& y)
+{
+    return x.starting_year_ <= y && y <= x.ending_year_;
+}
+
+bool
+detail::operator<(const Rule& x, const date::year& y)
+{
+    return x.ending_year_ < y;
+}
+
+bool
+detail::operator==(const date::year& x, const Rule& y)
+{
+    return y.starting_year_ <= x && x <= y.ending_year_;
+}
+
+bool
+detail::operator<(const date::year& x, const Rule& y)
+{
+    return x < y.starting_year_;
+}
+
+bool
+detail::operator==(const Rule& x, const std::string& y)
+{
+    return x.name() == y;
+}
+
+bool
+detail::operator<(const Rule& x, const std::string& y)
+{
+    return x.name() < y;
+}
+
+bool
+detail::operator==(const std::string& x, const Rule& y)
+{
+    return y.name() == x;
+}
+
+bool
+detail::operator<(const std::string& x, const Rule& y)
+{
+    return x < y.name();
+}
+
+std::ostream&
+detail::operator<<(std::ostream& os, const Rule& r)
+{
+    using namespace date;
+    using namespace std::chrono;
+    detail::save_stream<char> _(os);
+    os.fill(' ');
+    os.flags(std::ios::dec | std::ios::left);
+    os.width(15);
+    os << r.name_;
+    os << r.starting_year_ << "    " << r.ending_year_ << "    ";
+    os << r.starting_at_;
+    if (r.save_ >= minutes{0})
+        os << ' ';
+    os << date::make_time(r.save_) << "   ";
+    os << r.abbrev_;
+    return os;
+}
+
+date::day
+detail::Rule::day() const
+{
+    return starting_at_.day();
+}
+
+date::month
+detail::Rule::month() const
+{
+    return starting_at_.month();
+}
+
+struct find_rule_by_name
+{
+    bool operator()(const Rule& x, const std::string& nm) const
+    {
+        return x.name() < nm;
+    }
+
+    bool operator()(const std::string& nm, const Rule& x) const
+    {
+        return nm < x.name();
+    }
+};
+
+bool
+detail::Rule::overlaps(const Rule& x, const Rule& y)
+{
+    // assume x.starting_year_ <= y.starting_year_;
+    if (!(x.starting_year_ <= y.starting_year_))
+    {
+        std::cerr << x << '\n';
+        std::cerr << y << '\n';
+        assert(x.starting_year_ <= y.starting_year_);
+    }
+    if (y.starting_year_ > x.ending_year_)
+        return false;
+    return !(x.starting_year_ == y.starting_year_ && x.ending_year_ == y.ending_year_);
+}
+
+void
+detail::Rule::split(std::vector<Rule>& rules, std::size_t i, std::size_t k, std::size_t& e)
+{
+    using namespace date;
+    using difference_type = std::vector<Rule>::iterator::difference_type;
+    // rules[i].starting_year_ <= rules[k].starting_year_ &&
+    //     rules[i].ending_year_ >= rules[k].starting_year_ &&
+    //     (rules[i].starting_year_ != rules[k].starting_year_ ||
+    //      rules[i].ending_year_ != rules[k].ending_year_)
+    assert(rules[i].starting_year_ <= rules[k].starting_year_ &&
+           rules[i].ending_year_ >= rules[k].starting_year_ &&
+           (rules[i].starting_year_ != rules[k].starting_year_ ||
+            rules[i].ending_year_ != rules[k].ending_year_));
+    if (rules[i].starting_year_ == rules[k].starting_year_)
+    {
+        if (rules[k].ending_year_ < rules[i].ending_year_)
+        {
+            rules.insert(rules.begin() + static_cast<difference_type>(k+1),
+                         Rule(rules[i], rules[k].ending_year_ + years{1},
+                              std::move(rules[i].ending_year_)));
+            ++e;
+            rules[i].ending_year_ = rules[k].ending_year_;
+        }
+        else  // rules[k].ending_year_ > rules[i].ending_year_
+        {
+            rules.insert(rules.begin() + static_cast<difference_type>(k+1),
+                         Rule(rules[k], rules[i].ending_year_ + years{1},
+                              std::move(rules[k].ending_year_)));
+            ++e;
+            rules[k].ending_year_ = rules[i].ending_year_;
+        }
+    }
+    else  // rules[i].starting_year_ < rules[k].starting_year_
+    {
+        if (rules[k].ending_year_ < rules[i].ending_year_)
+        {
+            rules.insert(rules.begin() + static_cast<difference_type>(k),
+                         Rule(rules[i], rules[k].starting_year_, rules[k].ending_year_));
+            ++k;
+            rules.insert(rules.begin() + static_cast<difference_type>(k+1),
+                         Rule(rules[i], rules[k].ending_year_ + years{1},
+                              std::move(rules[i].ending_year_)));
+            rules[i].ending_year_ = rules[k].starting_year_ - years{1};
+            e += 2;
+        }
+        else if (rules[k].ending_year_ > rules[i].ending_year_)
+        {
+            rules.insert(rules.begin() + static_cast<difference_type>(k),
+                         Rule(rules[i], rules[k].starting_year_, rules[i].ending_year_));
+            ++k;
+            rules.insert(rules.begin() + static_cast<difference_type>(k+1),
+                         Rule(rules[k], rules[i].ending_year_ + years{1},
+                         std::move(rules[k].ending_year_)));
+            e += 2;
+            rules[k].ending_year_ = std::move(rules[i].ending_year_);
+            rules[i].ending_year_ = rules[k].starting_year_ - years{1};
+        }
+        else  // rules[k].ending_year_ == rules[i].ending_year_
+        {
+            rules.insert(rules.begin() + static_cast<difference_type>(k),
+                         Rule(rules[i], rules[k].starting_year_,
+                         std::move(rules[i].ending_year_)));
+            ++k;
+            ++e;
+            rules[i].ending_year_ = rules[k].starting_year_ - years{1};
+        }
+    }
+}
+
+void
+detail::Rule::split_overlaps(std::vector<Rule>& rules, std::size_t i, std::size_t& e)
+{
+    using difference_type = std::vector<Rule>::iterator::difference_type;
+    auto j = i;
+    for (; i + 1 < e; ++i)
+    {
+        for (auto k = i + 1; k < e; ++k)
+        {
+            if (overlaps(rules[i], rules[k]))
+            {
+                split(rules, i, k, e);
+                std::sort(rules.begin() + static_cast<difference_type>(i),
+                          rules.begin() + static_cast<difference_type>(e));
+            }
+        }
+    }
+    for (; j < e; ++j)
+    {
+        if (rules[j].starting_year() == rules[j].ending_year())
+            rules[j].starting_at_.canonicalize(rules[j].starting_year());
+    }
+}
+
+void
+detail::Rule::split_overlaps(std::vector<Rule>& rules)
+{
+    using difference_type = std::vector<Rule>::iterator::difference_type;
+    for (std::size_t i = 0; i < rules.size();)
+    {
+        auto e = static_cast<std::size_t>(std::upper_bound(
+            rules.cbegin()+static_cast<difference_type>(i), rules.cend(), rules[i].name(),
+            [](const std::string& nm, const Rule& x)
+            {
+                return nm < x.name();
+            }) - rules.cbegin());
+        split_overlaps(rules, i, e);
+        auto first_rule = rules.begin() + static_cast<difference_type>(i);
+        auto last_rule = rules.begin() + static_cast<difference_type>(e);
+        auto t = std::lower_bound(first_rule, last_rule, min_year);
+        if (t > first_rule+1)
+        {
+            if (t == last_rule || t->starting_year() >= min_year)
+                --t;
+            auto d = static_cast<std::size_t>(t - first_rule);
+            rules.erase(first_rule, t);
+            e -= d;
+        }
+        first_rule = rules.begin() + static_cast<difference_type>(i);
+        last_rule = rules.begin() + static_cast<difference_type>(e);
+        t = std::upper_bound(first_rule, last_rule, max_year);
+        if (t != last_rule)
+        {
+            auto d = static_cast<std::size_t>(last_rule - t);
+            rules.erase(t, last_rule);
+            e -= d;
+        }
+        i = e;
+    }
+    rules.shrink_to_fit();
+}
+
+// Find the rule that comes chronologically before Rule r.  For multi-year rules,
+// y specifies which rules in r.  For single year rules, y is assumed to be equal
+// to the year specified by r.
+// Returns a pointer to the chronologically previous rule, and the year within
+// that rule.  If there is no previous rule, returns nullptr and year::min().
+// Preconditions:
+//     r->starting_year() <= y && y <= r->ending_year()
+static
+std::pair<const Rule*, date::year>
+find_previous_rule(const Rule* r, date::year y)
+{
+    using namespace date;
+    auto const& rules = get_tzdb().rules;
+    if (y == r->starting_year())
+    {
+        if (r == &rules.front() || r->name() != r[-1].name())
+            std::terminate();  // never called with first rule
+        --r;
+        if (y == r->starting_year())
+            return {r, y};
+        return {r, r->ending_year()};
+    }
+    if (r == &rules.front() || r->name() != r[-1].name() ||
+        r[-1].starting_year() < r->starting_year())
+    {
+        while (r < &rules.back() && r->name() == r[1].name() &&
+               r->starting_year() == r[1].starting_year())
+            ++r;
+        return {r, --y};
+    }
+    --r;
+    return {r, y};
+}
+
+// Find the rule that comes chronologically after Rule r.  For multi-year rules,
+// y specifies which rules in r.  For single year rules, y is assumed to be equal
+// to the year specified by r.
+// Returns a pointer to the chronologically next rule, and the year within
+// that rule.  If there is no next rule, return a pointer to a defaulted rule
+// and y+1.
+// Preconditions:
+//     first <= r && r < last && r->starting_year() <= y && y <= r->ending_year()
+//     [first, last) all have the same name
+static
+std::pair<const Rule*, date::year>
+find_next_rule(const Rule* first_rule, const Rule* last_rule, const Rule* r, date::year y)
+{
+    using namespace date;
+    if (y == r->ending_year())
+    {
+        if (r == last_rule-1)
+            return {nullptr, year::max()};
+        ++r;
+        if (y == r->ending_year())
+            return {r, y};
+        return {r, r->starting_year()};
+    }
+    if (r == last_rule-1 || r->ending_year() < r[1].ending_year())
+    {
+        while (r > first_rule && r->starting_year() == r[-1].starting_year())
+            --r;
+        return {r, ++y};
+    }
+    ++r;
+    return {r, y};
+}
+
+// Find the rule that comes chronologically after Rule r.  For multi-year rules,
+// y specifies which rules in r.  For single year rules, y is assumed to be equal
+// to the year specified by r.
+// Returns a pointer to the chronologically next rule, and the year within
+// that rule.  If there is no next rule, return nullptr and year::max().
+// Preconditions:
+//     r->starting_year() <= y && y <= r->ending_year()
+static
+std::pair<const Rule*, date::year>
+find_next_rule(const Rule* r, date::year y)
+{
+    using namespace date;
+    auto const& rules = get_tzdb().rules;
+    if (y == r->ending_year())
+    {
+        if (r == &rules.back() || r->name() != r[1].name())
+            return {nullptr, year::max()};
+        ++r;
+        if (y == r->ending_year())
+            return {r, y};
+        return {r, r->starting_year()};
+    }
+    if (r == &rules.back() || r->name() != r[1].name() ||
+        r->ending_year() < r[1].ending_year())
+    {
+        while (r > &rules.front() && r->name() == r[-1].name() &&
+               r->starting_year() == r[-1].starting_year())
+            --r;
+        return {r, ++y};
+    }
+    ++r;
+    return {r, y};
+}
+
+static
+const Rule*
+find_first_std_rule(const std::pair<const Rule*, const Rule*>& eqr)
+{
+    auto r = eqr.first;
+    auto ry = r->starting_year();
+    while (r->save() != std::chrono::minutes{0})
+    {
+        std::tie(r, ry) = find_next_rule(eqr.first, eqr.second, r, ry);
+        if (r == nullptr)
+            throw std::runtime_error("Could not find standard offset in rule "
+                                     + eqr.first->name());
+    }
+    return r;
+}
+
+static
+std::pair<const Rule*, date::year>
+find_rule_for_zone(const std::pair<const Rule*, const Rule*>& eqr,
+                   const date::year& y, const std::chrono::seconds& offset,
+                   const MonthDayTime& mdt)
+{
+    assert(eqr.first != nullptr);
+    assert(eqr.second != nullptr);
+
+    using namespace std::chrono;
+    using namespace date;
+    auto r = eqr.first;
+    auto ry = r->starting_year();
+    auto prev_save = minutes{0};
+    auto prev_year = year::min();
+    const Rule* prev_rule = nullptr;
+    while (r != nullptr)
+    {
+        if (mdt.compare(y, r->mdt(), ry, offset, prev_save) <= 0)
+            break;
+        prev_rule = r;
+        prev_year = ry;
+        prev_save = prev_rule->save();
+        std::tie(r, ry) = find_next_rule(eqr.first, eqr.second, r, ry);
+    }
+    return {prev_rule, prev_year};
+}
+
+static
+std::pair<const Rule*, date::year>
+find_rule_for_zone(const std::pair<const Rule*, const Rule*>& eqr,
+                   const sys_seconds& tp_utc,
+                   const local_seconds& tp_std,
+                   const local_seconds& tp_loc)
+{
+    using namespace std::chrono;
+    using namespace date;
+    auto r = eqr.first;
+    auto ry = r->starting_year();
+    auto prev_save = minutes{0};
+    auto prev_year = year::min();
+    const Rule* prev_rule = nullptr;
+    while (r != nullptr)
+    {
+        bool found = false;
+        switch (r->mdt().zone())
+        {
+        case tz::utc:
+            found = tp_utc < r->mdt().to_time_point(ry);
+            break;
+        case tz::standard:
+            found = sys_seconds{tp_std.time_since_epoch()} < r->mdt().to_time_point(ry);
+            break;
+        case tz::local:
+            found = sys_seconds{tp_loc.time_since_epoch()} < r->mdt().to_time_point(ry);
+            break;
+        }
+        if (found)
+            break;
+        prev_rule = r;
+        prev_year = ry;
+        prev_save = prev_rule->save();
+        std::tie(r, ry) = find_next_rule(eqr.first, eqr.second, r, ry);
+    }
+    return {prev_rule, prev_year};
+}
+
+static
+sys_info
+find_rule(const std::pair<const Rule*, date::year>& first_rule,
+          const std::pair<const Rule*, date::year>& last_rule,
+          const date::year& y, const std::chrono::seconds& offset,
+          const MonthDayTime& mdt, const std::chrono::minutes& initial_save,
+          const std::string& initial_abbrev)
+{
+    using namespace std::chrono;
+    using namespace date;
+    auto r = first_rule.first;
+    auto ry = first_rule.second;
+    sys_info x{sys_days(year::min()/min_day), sys_days(year::max()/max_day),
+               seconds{0}, initial_save, initial_abbrev};
+    while (r != nullptr)
+    {
+        auto tr = r->mdt().to_sys(ry, offset, x.save);
+        auto tx = mdt.to_sys(y, offset, x.save);
+        // Find last rule where tx >= tr
+        if (tx <= tr || (r == last_rule.first && ry == last_rule.second))
+        {
+            if (tx < tr && r == first_rule.first && ry == first_rule.second)
+            {
+                x.end = r->mdt().to_sys(ry, offset, x.save);
+                break;
+            }
+            if (tx < tr)
+            {
+                std::tie(r, ry) = find_previous_rule(r, ry);  // can't return nullptr for r
+                assert(r != nullptr);
+            }
+            // r != nullptr && tx >= tr (if tr were to be recomputed)
+            auto prev_save = initial_save;
+            if (!(r == first_rule.first && ry == first_rule.second))
+                prev_save = find_previous_rule(r, ry).first->save();
+            x.begin = r->mdt().to_sys(ry, offset, prev_save);
+            x.save = r->save();
+            x.abbrev = r->abbrev();
+            if (!(r == last_rule.first && ry == last_rule.second))
+            {
+                std::tie(r, ry) = find_next_rule(r, ry);  // can't return nullptr for r
+                assert(r != nullptr);
+                x.end = r->mdt().to_sys(ry, offset, x.save);
+            }
+            else
+                x.end = sys_days(year::max()/max_day);
+            break;
+        }
+        x.save = r->save();
+        std::tie(r, ry) = find_next_rule(r, ry);  // Can't return nullptr for r
+        assert(r != nullptr);
+    }
+    return x;
+}
+
+// zonelet
+
+detail::zonelet::~zonelet()
+{
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+    using minutes = std::chrono::minutes;
+    using string = std::string;
+    if (tag_ == has_save)
+        u.save_.~minutes();
+    else
+        u.rule_.~string();
+#endif
+}
+
+detail::zonelet::zonelet()
+{
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+    ::new(&u.rule_) std::string();
+#endif
+}
+
+detail::zonelet::zonelet(const zonelet& i)
+    : gmtoff_(i.gmtoff_)
+    , tag_(i.tag_)
+    , format_(i.format_)
+    , until_year_(i.until_year_)
+    , until_date_(i.until_date_)
+    , until_utc_(i.until_utc_)
+    , until_std_(i.until_std_)
+    , until_loc_(i.until_loc_)
+    , initial_save_(i.initial_save_)
+    , initial_abbrev_(i.initial_abbrev_)
+    , first_rule_(i.first_rule_)
+    , last_rule_(i.last_rule_)
+{
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+    if (tag_ == has_save)
+        ::new(&u.save_) std::chrono::minutes(i.u.save_);
+    else
+        ::new(&u.rule_) std::string(i.u.rule_);
+#else
+    if (tag_ == has_save)
+        u.save_ = i.u.save_;
+    else
+        u.rule_ = i.u.rule_;
+#endif
+}
+
+#endif  // !USE_OS_TZDB
+
+// time_zone
+
+#if USE_OS_TZDB
+
+time_zone::time_zone(const std::string& s, detail::undocumented)
+    : name_(s)
+    , adjusted_(new std::once_flag{})
+{
+}
+
+enum class endian
+{
+    native = __BYTE_ORDER__,
+    little = __ORDER_LITTLE_ENDIAN__,
+    big    = __ORDER_BIG_ENDIAN__
+};
+
+static
+inline
+std::uint32_t
+reverse_bytes(std::uint32_t i)
+{
+    return
+        (i & 0xff000000u) >> 24 |
+        (i & 0x00ff0000u) >> 8 |
+        (i & 0x0000ff00u) << 8 |
+        (i & 0x000000ffu) << 24;
+}
+
+static
+inline
+std::uint64_t
+reverse_bytes(std::uint64_t i)
+{
+    return
+        (i & 0xff00000000000000ull) >> 56 |
+        (i & 0x00ff000000000000ull) >> 40 |
+        (i & 0x0000ff0000000000ull) >> 24 |
+        (i & 0x000000ff00000000ull) >> 8 |
+        (i & 0x00000000ff000000ull) << 8 |
+        (i & 0x0000000000ff0000ull) << 24 |
+        (i & 0x000000000000ff00ull) << 40 |
+        (i & 0x00000000000000ffull) << 56;
+}
+
+template <class T>
+static
+inline
+void
+maybe_reverse_bytes(T&, std::false_type)
+{
+}
+
+static
+inline
+void
+maybe_reverse_bytes(std::int32_t& t, std::true_type)
+{
+    t = static_cast<std::int32_t>(reverse_bytes(static_cast<std::uint32_t>(t)));
+}
+
+static
+inline
+void
+maybe_reverse_bytes(std::int64_t& t, std::true_type)
+{
+    t = static_cast<std::int64_t>(reverse_bytes(static_cast<std::uint64_t>(t)));
+}
+
+template <class T>
+static
+inline
+void
+maybe_reverse_bytes(T& t)
+{
+    maybe_reverse_bytes(t, std::integral_constant<bool,
+                                                  endian::native == endian::little>{});
+}
+
+static
+void
+load_header(std::istream& inf)
+{
+    // Read TZif
+    auto t = inf.get();
+    auto z = inf.get();
+    auto i = inf.get();
+    auto f = inf.get();
+#ifndef NDEBUG
+    assert(t == 'T');
+    assert(z == 'Z');
+    assert(i == 'i');
+    assert(f == 'f');
+#else
+    (void)t;
+    (void)z;
+    (void)i;
+    (void)f;
+#endif
+}
+
+static
+unsigned char
+load_version(std::istream& inf)
+{
+    // Read version
+    auto v = inf.get();
+    assert(v != EOF);
+    return static_cast<unsigned char>(v);
+}
+
+static
+void
+skip_reserve(std::istream& inf)
+{
+    inf.ignore(15);
+}
+
+static
+void
+load_counts(std::istream& inf,
+            std::int32_t& tzh_ttisgmtcnt, std::int32_t& tzh_ttisstdcnt,
+            std::int32_t& tzh_leapcnt,    std::int32_t& tzh_timecnt,
+            std::int32_t& tzh_typecnt,    std::int32_t& tzh_charcnt)
+{
+    // Read counts;
+    inf.read(reinterpret_cast<char*>(&tzh_ttisgmtcnt), 4);
+    maybe_reverse_bytes(tzh_ttisgmtcnt);
+    inf.read(reinterpret_cast<char*>(&tzh_ttisstdcnt), 4);
+    maybe_reverse_bytes(tzh_ttisstdcnt);
+    inf.read(reinterpret_cast<char*>(&tzh_leapcnt), 4);
+    maybe_reverse_bytes(tzh_leapcnt);
+    inf.read(reinterpret_cast<char*>(&tzh_timecnt), 4);
+    maybe_reverse_bytes(tzh_timecnt);
+    inf.read(reinterpret_cast<char*>(&tzh_typecnt), 4);
+    maybe_reverse_bytes(tzh_typecnt);
+    inf.read(reinterpret_cast<char*>(&tzh_charcnt), 4);
+    maybe_reverse_bytes(tzh_charcnt);
+}
+
+template <class TimeType>
+static
+std::vector<detail::transition>
+load_transitions(std::istream& inf, std::int32_t tzh_timecnt)
+{
+    // Read transitions
+    using namespace std::chrono;
+    std::vector<detail::transition> transitions;
+    transitions.reserve(static_cast<unsigned>(tzh_timecnt));
+    for (std::int32_t i = 0; i < tzh_timecnt; ++i)
+    {
+        TimeType t;
+        inf.read(reinterpret_cast<char*>(&t), sizeof(t));
+        maybe_reverse_bytes(t);
+        transitions.emplace_back(sys_seconds{seconds{t}});
+        if (transitions.back().timepoint < min_seconds)
+            transitions.back().timepoint = min_seconds;
+    }
+    return transitions;
+}
+
+static
+std::vector<std::uint8_t>
+load_indices(std::istream& inf, std::int32_t tzh_timecnt)
+{
+    // Read indices
+    std::vector<std::uint8_t> indices;
+    indices.reserve(static_cast<unsigned>(tzh_timecnt));
+    for (std::int32_t i = 0; i < tzh_timecnt; ++i)
+    {
+        std::uint8_t t;
+        inf.read(reinterpret_cast<char*>(&t), sizeof(t));
+        indices.emplace_back(t);
+    }
+    return indices;
+}
+
+static
+std::vector<ttinfo>
+load_ttinfo(std::istream& inf, std::int32_t tzh_typecnt)
+{
+    // Read ttinfo
+    std::vector<ttinfo> ttinfos;
+    ttinfos.reserve(static_cast<unsigned>(tzh_typecnt));
+    for (std::int32_t i = 0; i < tzh_typecnt; ++i)
+    {
+        ttinfo t;
+        inf.read(reinterpret_cast<char*>(&t), 6);
+        maybe_reverse_bytes(t.tt_gmtoff);
+        ttinfos.emplace_back(t);
+    }
+    return ttinfos;
+}
+
+static
+std::string
+load_abbreviations(std::istream& inf, std::int32_t tzh_charcnt)
+{
+    // Read abbreviations
+    std::string abbrev;
+    abbrev.resize(static_cast<unsigned>(tzh_charcnt), '\0');
+    inf.read(&abbrev[0], tzh_charcnt);
+    return abbrev;
+}
+
+#if !MISSING_LEAP_SECONDS
+
+template <class TimeType>
+static
+std::vector<leap>
+load_leaps(std::istream& inf, std::int32_t tzh_leapcnt)
+{
+    // Read tzh_leapcnt pairs
+    using namespace std::chrono;
+    std::vector<leap> leap_seconds;
+    leap_seconds.reserve(tzh_leapcnt);
+    for (std::int32_t i = 0; i < tzh_leapcnt; ++i)
+    {
+        TimeType     t0;
+        std::int32_t t1;
+        inf.read(reinterpret_cast<char*>(&t0), sizeof(t0));
+        inf.read(reinterpret_cast<char*>(&t1), sizeof(t1));
+        maybe_reverse_bytes(t0);
+        maybe_reverse_bytes(t1);
+        leap_seconds.emplace_back(sys_seconds{seconds{t0 - (t1-1)}},
+                                  detail::undocumented{});
+    }
+    return leap_seconds;
+}
+
+template <class TimeType>
+static
+std::vector<leap>
+load_leap_data(std::istream& inf,
+               std::int32_t tzh_leapcnt, std::int32_t tzh_timecnt,
+               std::int32_t tzh_typecnt, std::int32_t tzh_charcnt)
+{
+    inf.ignore(tzh_timecnt*sizeof(TimeType) + tzh_timecnt + tzh_typecnt*6 + tzh_charcnt);
+    return load_leaps<TimeType>(inf, tzh_leapcnt);
+}
+
+static
+std::vector<leap>
+load_just_leaps(std::istream& inf)
+{
+    // Read tzh_leapcnt pairs
+    using namespace std::chrono;
+    load_header(inf);
+    auto v = load_version(inf);
+    std::int32_t tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
+                 tzh_timecnt,    tzh_typecnt,    tzh_charcnt;
+    skip_reserve(inf);
+    load_counts(inf, tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
+                     tzh_timecnt,    tzh_typecnt,    tzh_charcnt);
+    if (v == 0)
+        return load_leap_data<int32_t>(inf, tzh_leapcnt, tzh_timecnt, tzh_typecnt,
+                                       tzh_charcnt);
+#if !defined(NDEBUG)
+    inf.ignore((4+1)*tzh_timecnt + 6*tzh_typecnt + tzh_charcnt + 8*tzh_leapcnt +
+               tzh_ttisstdcnt + tzh_ttisgmtcnt);
+    load_header(inf);
+    auto v2 = load_version(inf);
+    assert(v == v2);
+    skip_reserve(inf);
+#else  // defined(NDEBUG)
+    inf.ignore((4+1)*tzh_timecnt + 6*tzh_typecnt + tzh_charcnt + 8*tzh_leapcnt +
+               tzh_ttisstdcnt + tzh_ttisgmtcnt + (4+1+15));
+#endif  // defined(NDEBUG)
+    load_counts(inf, tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
+                     tzh_timecnt,    tzh_typecnt,    tzh_charcnt);
+    return load_leap_data<int64_t>(inf, tzh_leapcnt, tzh_timecnt, tzh_typecnt,
+                                   tzh_charcnt);
+}
+
+#endif  // !MISSING_LEAP_SECONDS
+
+template <class TimeType>
+void
+time_zone::load_data(std::istream& inf,
+                     std::int32_t tzh_leapcnt, std::int32_t tzh_timecnt,
+                     std::int32_t tzh_typecnt, std::int32_t tzh_charcnt)
+{
+    using namespace std::chrono;
+    transitions_ = load_transitions<TimeType>(inf, tzh_timecnt);
+    auto indices = load_indices(inf, tzh_timecnt);
+    auto infos = load_ttinfo(inf, tzh_typecnt);
+    auto abbrev = load_abbreviations(inf, tzh_charcnt);
+#if !MISSING_LEAP_SECONDS
+    auto& leap_seconds = get_tzdb_list().front().leaps;
+    if (leap_seconds.empty() && tzh_leapcnt > 0)
+        leap_seconds = load_leaps<TimeType>(inf, tzh_leapcnt);
+#endif
+    ttinfos_.reserve(infos.size());
+    for (auto& info : infos)
+    {
+        ttinfos_.push_back({seconds{info.tt_gmtoff},
+                            abbrev.c_str() + info.tt_abbrind,
+                            info.tt_isdst != 0});
+    }
+    auto i = 0u;
+    if (transitions_.empty() || transitions_.front().timepoint != min_seconds)
+    {
+        transitions_.emplace(transitions_.begin(), min_seconds);
+        auto tf = std::find_if(ttinfos_.begin(), ttinfos_.end(),
+                               [](const expanded_ttinfo& ti)
+                                   {return ti.is_dst == 0;});
+        if (tf == ttinfos_.end())
+            tf = ttinfos_.begin();
+        transitions_[i].info = &*tf;
+        ++i;
+    }
+    for (auto j = 0u; i < transitions_.size(); ++i, ++j)
+        transitions_[i].info = ttinfos_.data() + indices[j];
+}
+
+void
+time_zone::init_impl()
+{
+    using namespace std;
+    using namespace std::chrono;
+    auto name = get_tz_dir() + ('/' + name_);
+    std::ifstream inf(name);
+    if (!inf.is_open())
+        throw std::runtime_error{"Unable to open " + name};
+    inf.exceptions(std::ios::failbit | std::ios::badbit);
+    load_header(inf);
+    auto v = load_version(inf);
+    std::int32_t tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
+                 tzh_timecnt,    tzh_typecnt,    tzh_charcnt;
+    skip_reserve(inf);
+    load_counts(inf, tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
+                     tzh_timecnt,    tzh_typecnt,    tzh_charcnt);
+    if (v == 0)
+    {
+        load_data<int32_t>(inf, tzh_leapcnt, tzh_timecnt, tzh_typecnt, tzh_charcnt);
+    }
+    else
+    {
+#if !defined(NDEBUG)
+        inf.ignore((4+1)*tzh_timecnt + 6*tzh_typecnt + tzh_charcnt + 8*tzh_leapcnt +
+                   tzh_ttisstdcnt + tzh_ttisgmtcnt);
+        load_header(inf);
+        auto v2 = load_version(inf);
+        assert(v == v2);
+        skip_reserve(inf);
+#else  // defined(NDEBUG)
+        inf.ignore((4+1)*tzh_timecnt + 6*tzh_typecnt + tzh_charcnt + 8*tzh_leapcnt +
+                   tzh_ttisstdcnt + tzh_ttisgmtcnt + (4+1+15));
+#endif  // defined(NDEBUG)
+        load_counts(inf, tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
+                         tzh_timecnt,    tzh_typecnt,    tzh_charcnt);
+        load_data<int64_t>(inf, tzh_leapcnt, tzh_timecnt, tzh_typecnt, tzh_charcnt);
+    }
+#if !MISSING_LEAP_SECONDS
+    if (tzh_leapcnt > 0)
+    {
+        auto& leap_seconds = get_tzdb_list().front().leaps;
+        auto itr = leap_seconds.begin();
+        auto l = itr->date();
+        seconds leap_count{0};
+        for (auto t = std::upper_bound(transitions_.begin(), transitions_.end(), l,
+                                       [](const sys_seconds& x, const transition& ct)
+                                       {
+                                           return x < ct.timepoint;
+                                       });
+                  t != transitions_.end(); ++t)
+        {
+            while (t->timepoint >= l)
+            {
+                ++leap_count;
+                if (++itr == leap_seconds.end())
+                    l = sys_days(max_year/max_day);
+                else
+                    l = itr->date() + leap_count;
+            }
+            t->timepoint -= leap_count;
+        }
+    }
+#endif  // !MISSING_LEAP_SECONDS
+    auto b = transitions_.begin();
+    auto i = transitions_.end();
+    if (i != b)
+    {
+        for (--i; i != b; --i)
+        {
+            if (i->info->offset == i[-1].info->offset &&
+                i->info->abbrev == i[-1].info->abbrev &&
+                i->info->is_dst == i[-1].info->is_dst)
+                i = transitions_.erase(i);
+        }
+    }
+}
+
+void
+time_zone::init() const
+{
+    std::call_once(*adjusted_, [this]() {const_cast<time_zone*>(this)->init_impl();});
+}
+
+sys_info
+time_zone::load_sys_info(std::vector<detail::transition>::const_iterator i) const
+{
+    using namespace std::chrono;
+    assert(!transitions_.empty());
+    assert(i != transitions_.begin());
+    sys_info r;
+    r.begin = i[-1].timepoint;
+    r.end = i != transitions_.end() ? i->timepoint :
+                                      sys_seconds(sys_days(year::max()/max_day));
+    r.offset = i[-1].info->offset;
+    r.save = i[-1].info->is_dst ? minutes{1} : minutes{0};
+    r.abbrev = i[-1].info->abbrev;
+    return r;
+}
+
+sys_info
+time_zone::get_info_impl(sys_seconds tp) const
+{
+    using namespace std;
+    init();
+    return load_sys_info(upper_bound(transitions_.begin(), transitions_.end(), tp,
+                                     [](const sys_seconds& x, const transition& t)
+                                     {
+                                         return x < t.timepoint;
+                                     }));
+}
+
+local_info
+time_zone::get_info_impl(local_seconds tp) const
+{
+    using namespace std::chrono;
+    init();
+    local_info i;
+    i.result = local_info::unique;
+    auto tr = upper_bound(transitions_.begin(), transitions_.end(), tp,
+                          [](const local_seconds& x, const transition& t)
+                          {
+                              return sys_seconds{x.time_since_epoch()} -
+                                                         t.info->offset < t.timepoint;
+                          });
+    i.first = load_sys_info(tr);
+    auto tps = sys_seconds{(tp - i.first.offset).time_since_epoch()};
+    if (tps < i.first.begin + days{1} && tr != transitions_.begin())
+    {
+        i.second = load_sys_info(--tr);
+        tps = sys_seconds{(tp - i.second.offset).time_since_epoch()};
+        if (tps < i.second.end)
+        {
+           i.result = local_info::ambiguous;
+           std::swap(i.first, i.second);
+        }
+        else
+        {
+            i.second = {};
+        }
+    }
+    else if (tps >= i.first.end && tr != transitions_.end())
+    {
+        i.second = load_sys_info(++tr);
+        tps = sys_seconds{(tp - i.second.offset).time_since_epoch()};
+        if (tps < i.second.begin)
+            i.result = local_info::nonexistent;
+        else
+            i.second = {};
+    }
+    return i;
+}
+
+std::ostream&
+operator<<(std::ostream& os, const time_zone& z)
+{
+    using namespace std::chrono;
+    z.init();
+    os << z.name_ << '\n';
+    os << "Initially:           ";
+    auto const& t = z.transitions_.front();
+    if (t.info->offset >= seconds{0})
+        os << '+';
+    os << make_time(t.info->offset);
+    if (t.info->is_dst > 0)
+        os << " daylight ";
+    else
+        os << " standard ";
+    os << t.info->abbrev << '\n';
+    for (auto i = std::next(z.transitions_.cbegin()); i < z.transitions_.cend(); ++i)
+        os << *i << '\n';
+    return os;
+}
+
+#if !MISSING_LEAP_SECONDS
+
+leap::leap(const sys_seconds& s, detail::undocumented)
+    : date_(s)
+{
+}
+
+#endif  // !MISSING_LEAP_SECONDS
+
+#else  // !USE_OS_TZDB
+
+time_zone::time_zone(const std::string& s, detail::undocumented)
+    : adjusted_(new std::once_flag{})
+{
+    try
+    {
+        using namespace date;
+        std::istringstream in(s);
+        in.exceptions(std::ios::failbit | std::ios::badbit);
+        std::string word;
+        in >> word >> name_;
+        parse_info(in);
+    }
+    catch (...)
+    {
+        std::cerr << s << '\n';
+        std::cerr << *this << '\n';
+        zonelets_.pop_back();
+        throw;
+    }
+}
+
+sys_info
+time_zone::get_info_impl(sys_seconds tp) const
+{
+    return get_info_impl(tp, static_cast<int>(tz::utc));
+}
+
+local_info
+time_zone::get_info_impl(local_seconds tp) const
+{
+    using namespace std::chrono;
+    local_info i{};
+    i.first = get_info_impl(sys_seconds{tp.time_since_epoch()}, static_cast<int>(tz::local));
+    auto tps = sys_seconds{(tp - i.first.offset).time_since_epoch()};
+    if (tps < i.first.begin)
+    {
+        i.second = std::move(i.first);
+        i.first = get_info_impl(i.second.begin - seconds{1}, static_cast<int>(tz::utc));
+        i.result = local_info::nonexistent;
+    }
+    else if (i.first.end - tps <= days{1})
+    {
+        i.second = get_info_impl(i.first.end, static_cast<int>(tz::utc));
+        tps = sys_seconds{(tp - i.second.offset).time_since_epoch()};
+        if (tps >= i.second.begin)
+            i.result = local_info::ambiguous;
+        else
+            i.second = {};
+    }
+    return i;
+}
+
+void
+time_zone::add(const std::string& s)
+{
+    try
+    {
+        std::istringstream in(s);
+        in.exceptions(std::ios::failbit | std::ios::badbit);
+        ws(in);
+        if (!in.eof() && in.peek() != '#')
+            parse_info(in);
+    }
+    catch (...)
+    {
+        std::cerr << s << '\n';
+        std::cerr << *this << '\n';
+        zonelets_.pop_back();
+        throw;
+    }
+}
+
+void
+time_zone::parse_info(std::istream& in)
+{
+    using namespace date;
+    using namespace std::chrono;
+    zonelets_.emplace_back();
+    auto& zonelet = zonelets_.back();
+    zonelet.gmtoff_ = parse_signed_time(in);
+    in >> zonelet.u.rule_;
+    if (zonelet.u.rule_ == "-")
+        zonelet.u.rule_.clear();
+    in >> zonelet.format_;
+    if (!in.eof())
+        ws(in);
+    if (in.eof() || in.peek() == '#')
+    {
+        zonelet.until_year_ = year::max();
+        zonelet.until_date_ = MonthDayTime(max_day, tz::utc);
+    }
+    else
+    {
+        int y;
+        in >> y;
+        zonelet.until_year_ = year{y};
+        in >> zonelet.until_date_;
+        zonelet.until_date_.canonicalize(zonelet.until_year_);
+    }
+    if ((zonelet.until_year_ < min_year) ||
+            (zonelets_.size() > 1 && zonelets_.end()[-2].until_year_ > max_year))
+        zonelets_.pop_back();
+}
+
+void
+time_zone::adjust_infos(const std::vector<Rule>& rules)
+{
+    using namespace std::chrono;
+    using namespace date;
+    const zonelet* prev_zonelet = nullptr;
+    for (auto& z : zonelets_)
+    {
+        std::pair<const Rule*, const Rule*> eqr{};
+        std::istringstream in;
+        in.exceptions(std::ios::failbit | std::ios::badbit);
+        // Classify info as rule-based, has save, or neither
+        if (!z.u.rule_.empty())
+        {
+            // Find out if this zonelet has a rule or a save
+            eqr = std::equal_range(rules.data(), rules.data() + rules.size(), z.u.rule_);
+            if (eqr.first == eqr.second)
+            {
+                // The rule doesn't exist.  Assume this is a save
+                try
+                {
+                    using namespace std::chrono;
+                    using string = std::string;
+                    in.str(z.u.rule_);
+                    auto tmp = duration_cast<minutes>(parse_signed_time(in));
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+                    z.u.rule_.~string();
+                    z.tag_ = zonelet::has_save;
+                    ::new(&z.u.save_) minutes(tmp);
+#else
+                    z.u.rule_.clear();
+                    z.tag_ = zonelet::has_save;
+                    z.u.save_ = tmp;
+#endif
+                }
+                catch (...)
+                {
+                    std::cerr << name_ << " : " << z.u.rule_ << '\n';
+                    throw;
+                }
+            }
+        }
+        else
+        {
+            // This zone::zonelet has no rule and no save
+            z.tag_ = zonelet::is_empty;
+        }
+
+        minutes final_save{0};
+        if (z.tag_ == zonelet::has_save)
+        {
+            final_save = z.u.save_;
+        }
+        else if (z.tag_ == zonelet::has_rule)
+        {
+            z.last_rule_ = find_rule_for_zone(eqr, z.until_year_, z.gmtoff_,
+                                              z.until_date_);
+            if (z.last_rule_.first != nullptr)
+                final_save = z.last_rule_.first->save();
+        }
+        z.until_utc_ = z.until_date_.to_sys(z.until_year_, z.gmtoff_, final_save);
+        z.until_std_ = local_seconds{z.until_utc_.time_since_epoch()} + z.gmtoff_;
+        z.until_loc_ = z.until_std_ + final_save;
+
+        if (z.tag_ == zonelet::has_rule)
+        {
+            if (prev_zonelet != nullptr)
+            {
+                z.first_rule_ = find_rule_for_zone(eqr, prev_zonelet->until_utc_,
+                                                        prev_zonelet->until_std_,
+                                                        prev_zonelet->until_loc_);
+                if (z.first_rule_.first != nullptr)
+                {
+                    z.initial_save_ = z.first_rule_.first->save();
+                    z.initial_abbrev_ = z.first_rule_.first->abbrev();
+                    if (z.first_rule_ != z.last_rule_)
+                    {
+                        z.first_rule_ = find_next_rule(eqr.first, eqr.second,
+                                                       z.first_rule_.first,
+                                                       z.first_rule_.second);
+                    }
+                    else
+                    {
+                        z.first_rule_ = std::make_pair(nullptr, year::min());
+                        z.last_rule_ = std::make_pair(nullptr, year::max());
+                    }
+                }
+            }
+            if (z.first_rule_.first == nullptr && z.last_rule_.first != nullptr)
+            {
+                z.first_rule_ = std::make_pair(eqr.first, eqr.first->starting_year());
+                z.initial_abbrev_ = find_first_std_rule(eqr)->abbrev();
+            }
+        }
+
+#ifndef NDEBUG
+        if (z.first_rule_.first == nullptr)
+        {
+            assert(z.first_rule_.second == year::min());
+            assert(z.last_rule_.first == nullptr);
+            assert(z.last_rule_.second == year::max());
+        }
+        else
+        {
+            assert(z.last_rule_.first != nullptr);
+        }
+#endif
+        prev_zonelet = &z;
+    }
+}
+
+static
+std::string
+format_abbrev(std::string format, const std::string& variable, std::chrono::seconds off,
+                                                               std::chrono::minutes save)
+{
+    using namespace std::chrono;
+    auto k = format.find("%s");
+    if (k != std::string::npos)
+    {
+        format.replace(k, 2, variable);
+    }
+    else
+    {
+        k = format.find('/');
+        if (k != std::string::npos)
+        {
+            if (save == minutes{0})
+                format.erase(k);
+            else
+                format.erase(0, k+1);
+        }
+        else
+        {
+            k = format.find("%z");
+            if (k != std::string::npos)
+            {
+                std::string temp;
+                if (off < seconds{0})
+                {
+                    temp = '-';
+                    off = -off;
+                }
+                else
+                    temp = '+';
+                auto h = date::floor<hours>(off);
+                off -= h;
+                if (h < hours{10})
+                    temp += '0';
+                temp += std::to_string(h.count());
+                if (off > seconds{0})
+                {
+                    auto m = date::floor<minutes>(off);
+                    off -= m;
+                    if (m < minutes{10})
+                        temp += '0';
+                    temp += std::to_string(m.count());
+                    if (off > seconds{0})
+                    {
+                        if (off < seconds{10})
+                            temp += '0';
+                        temp += std::to_string(off.count());
+                    }
+                }
+                format.replace(k, 2, temp);
+            }
+        }
+    }
+    return format;
+}
+
+sys_info
+time_zone::get_info_impl(sys_seconds tp, int tz_int) const
+{
+    using namespace std::chrono;
+    using namespace date;
+    tz timezone = static_cast<tz>(tz_int);
+    assert(timezone != tz::standard);
+    auto y = year_month_day(floor<days>(tp)).year();
+    if (y < min_year || y > max_year)
+        throw std::runtime_error("The year " + std::to_string(static_cast<int>(y)) +
+            " is out of range:[" + std::to_string(static_cast<int>(min_year)) + ", "
+                                 + std::to_string(static_cast<int>(max_year)) + "]");
+    std::call_once(*adjusted_,
+                   [this]()
+                   {
+                       const_cast<time_zone*>(this)->adjust_infos(get_tzdb().rules);
+                   });
+    auto i = std::upper_bound(zonelets_.begin(), zonelets_.end(), tp,
+        [timezone](sys_seconds t, const zonelet& zl)
+        {
+            return timezone == tz::utc ? t < zl.until_utc_ :
+                                         t < sys_seconds{zl.until_loc_.time_since_epoch()};
+        });
+
+    sys_info r{};
+    if (i != zonelets_.end())
+    {
+        if (i->tag_ == zonelet::has_save)
+        {
+            if (i != zonelets_.begin())
+                r.begin = i[-1].until_utc_;
+            else
+                r.begin = sys_days(year::min()/min_day);
+            r.end = i->until_utc_;
+            r.offset = i->gmtoff_ + i->u.save_;
+            r.save = i->u.save_;
+        }
+        else if (i->u.rule_.empty())
+        {
+            if (i != zonelets_.begin())
+                r.begin = i[-1].until_utc_;
+            else
+                r.begin = sys_days(year::min()/min_day);
+            r.end = i->until_utc_;
+            r.offset = i->gmtoff_;
+        }
+        else
+        {
+            r = find_rule(i->first_rule_, i->last_rule_, y, i->gmtoff_,
+                          MonthDayTime(local_seconds{tp.time_since_epoch()}, timezone),
+                          i->initial_save_, i->initial_abbrev_);
+            r.offset = i->gmtoff_ + r.save;
+            if (i != zonelets_.begin() && r.begin < i[-1].until_utc_)
+                r.begin = i[-1].until_utc_;
+            if (r.end > i->until_utc_)
+                r.end = i->until_utc_;
+        }
+        r.abbrev = format_abbrev(i->format_, r.abbrev, r.offset, r.save);
+        assert(r.begin < r.end);
+    }
+    return r;
+}
+
+std::ostream&
+operator<<(std::ostream& os, const time_zone& z)
+{
+    using namespace date;
+    using namespace std::chrono;
+    detail::save_stream<char> _(os);
+    os.fill(' ');
+    os.flags(std::ios::dec | std::ios::left);
+    std::call_once(*z.adjusted_,
+                   [&z]()
+                   {
+                       const_cast<time_zone&>(z).adjust_infos(get_tzdb().rules);
+                   });
+    os.width(35);
+    os << z.name_;
+    std::string indent;
+    for (auto const& s : z.zonelets_)
+    {
+        os << indent;
+        if (s.gmtoff_ >= seconds{0})
+            os << ' ';
+        os << make_time(s.gmtoff_) << "   ";
+        os.width(15);
+        if (s.tag_ != zonelet::has_save)
+            os << s.u.rule_;
+        else
+        {
+            std::ostringstream tmp;
+            tmp << make_time(s.u.save_);
+            os <<  tmp.str();
+        }
+        os.width(8);
+        os << s.format_ << "   ";
+        os << s.until_year_ << ' ' << s.until_date_;
+        os << "   " << s.until_utc_ << " UTC";
+        os << "   " << s.until_std_ << " STD";
+        os << "   " << s.until_loc_;
+        os << "   " << make_time(s.initial_save_);
+        os << "   " << s.initial_abbrev_;
+        if (s.first_rule_.first != nullptr)
+            os << "   {" << *s.first_rule_.first << ", " << s.first_rule_.second << '}';
+        else
+            os << "   {" << "nullptr" << ", " << s.first_rule_.second << '}';
+        if (s.last_rule_.first != nullptr)
+            os << "   {" << *s.last_rule_.first << ", " << s.last_rule_.second << '}';
+        else
+            os << "   {" << "nullptr" << ", " << s.last_rule_.second << '}';
+        os << '\n';
+        if (indent.empty())
+            indent = std::string(35, ' ');
+    }
+    return os;
+}
+
+#endif  // !USE_OS_TZDB
+
+#if !MISSING_LEAP_SECONDS
+
+std::ostream&
+operator<<(std::ostream& os, const leap& x)
+{
+    using namespace date;
+    return os << x.date_ << "  +";
+}
+
+#endif  // !MISSING_LEAP_SECONDS
+
+#if USE_OS_TZDB
+
+# ifdef __APPLE__
+static
+std::string
+get_version()
+{
+    using namespace std;
+    auto path = get_tz_dir() + string("/+VERSION");
+    ifstream in{path};
+    string version;
+    in >> version;
+    if (in.fail())
+        throw std::runtime_error("Unable to get Timezone database version from " + path);
+    return version;
+}
+# endif
+
+static
+std::unique_ptr<tzdb>
+init_tzdb()
+{
+    std::unique_ptr<tzdb> db(new tzdb);
+
+    //Iterate through folders
+    std::queue<std::string> subfolders;
+    subfolders.emplace(get_tz_dir());
+    struct dirent* d;
+    struct stat s;
+    while (!subfolders.empty())
+    {
+        auto dirname = std::move(subfolders.front());
+        subfolders.pop();
+        auto dir = opendir(dirname.c_str());
+        if (!dir)
+            continue;
+        while ((d = readdir(dir)) != nullptr)
+        {
+            // Ignore these files:
+            if (d->d_name[0]                      == '.'    || // curdir, prevdir, hidden
+                memcmp(d->d_name, "posix", 5)     == 0      || // starts with posix
+                strcmp(d->d_name, "Factory")      == 0      ||
+                strcmp(d->d_name, "iso3166.tab")  == 0      ||
+                strcmp(d->d_name, "right")        == 0      ||
+                strcmp(d->d_name, "+VERSION")     == 0      ||
+                strcmp(d->d_name, "zone.tab")     == 0      ||
+                strcmp(d->d_name, "zone1970.tab") == 0      ||
+                strcmp(d->d_name, "leap-seconds.list") == 0   )
+                continue;
+            auto subname = dirname + folder_delimiter + d->d_name;
+            if(stat(subname.c_str(), &s) == 0)
+            {
+                if(S_ISDIR(s.st_mode))
+                {
+                    if(!S_ISLNK(s.st_mode))
+                    {
+                        subfolders.push(subname);
+                    }
+                }
+                else
+                {
+                    db->zones.emplace_back(subname.substr(get_tz_dir().size()+1),
+                                           detail::undocumented{});
+                }
+            }
+        }
+        closedir(dir);
+    }
+    db->zones.shrink_to_fit();
+    std::sort(db->zones.begin(), db->zones.end());
+#  if !MISSING_LEAP_SECONDS
+    std::ifstream in(get_tz_dir() + std::string(1, folder_delimiter) + "right/UTC",
+                     std::ios_base::binary);
+    if (in)
+    {
+        in.exceptions(std::ios::failbit | std::ios::badbit);
+        db->leaps = load_just_leaps(in);
+    }
+    else
+    {
+        in.clear();
+        in.open(get_tz_dir() + std::string(1, folder_delimiter) +
+                "UTC", std::ios_base::binary);
+        if (!in)
+            throw std::runtime_error("Unable to extract leap second information");
+        in.exceptions(std::ios::failbit | std::ios::badbit);
+        db->leaps = load_just_leaps(in);
+    }
+#  endif  // !MISSING_LEAP_SECONDS
+#  ifdef __APPLE__
+    db->version = get_version();
+#  endif
+    return db;
+}
+
+#else  // !USE_OS_TZDB
+
+// link
+
+link::link(const std::string& s)
+{
+    using namespace date;
+    std::istringstream in(s);
+    in.exceptions(std::ios::failbit | std::ios::badbit);
+    std::string word;
+    in >> word >> target_ >> name_;
+}
+
+std::ostream&
+operator<<(std::ostream& os, const link& x)
+{
+    using namespace date;
+    detail::save_stream<char> _(os);
+    os.fill(' ');
+    os.flags(std::ios::dec | std::ios::left);
+    os.width(35);
+    return os << x.name_ << " --> " << x.target_;
+}
+
+// leap
+
+leap::leap(const std::string& s, detail::undocumented)
+{
+    using namespace date;
+    std::istringstream in(s);
+    in.exceptions(std::ios::failbit | std::ios::badbit);
+    std::string word;
+    int y;
+    MonthDayTime date;
+    in >> word >> y >> date;
+    date_ = date.to_time_point(year(y));
+}
+
+static
+bool
+file_exists(const std::string& filename)
+{
+#ifdef _WIN32
+    return ::_access(filename.c_str(), 0) == 0;
+#else
+    return ::access(filename.c_str(), F_OK) == 0;
+#endif
+}
+
+#if HAS_REMOTE_API
+
+// CURL tools
+
+static
+int
+curl_global()
+{
+    if (::curl_global_init(CURL_GLOBAL_DEFAULT) != 0)
+        throw std::runtime_error("CURL global initialization failed");
+    return 0;
+}
+
+namespace
+{
+
+struct curl_deleter
+{
+    void operator()(CURL* p) const
+    {
+        ::curl_easy_cleanup(p);
+    }
+};
+
+}  // unnamed namespace
+
+static
+std::unique_ptr<CURL, curl_deleter>
+curl_init()
+{
+    static const auto curl_is_now_initiailized = curl_global();
+    (void)curl_is_now_initiailized;
+    return std::unique_ptr<CURL, curl_deleter>{::curl_easy_init()};
+}
+
+static
+bool
+download_to_string(const std::string& url, std::string& str)
+{
+    str.clear();
+    auto curl = curl_init();
+    if (!curl)
+        return false;
+    std::string version;
+    curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
+    curl_write_callback write_cb = [](char* contents, std::size_t size, std::size_t nmemb,
+                                      void* userp) -> std::size_t
+    {
+        auto& userstr = *static_cast<std::string*>(userp);
+        auto realsize = size * nmemb;
+        userstr.append(contents, realsize);
+        return realsize;
+    };
+    curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, write_cb);
+    curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &str);
+    curl_easy_setopt(curl.get(), CURLOPT_SSL_VERIFYPEER, false);
+    auto res = curl_easy_perform(curl.get());
+    return (res == CURLE_OK);
+}
+
+namespace
+{
+    enum class download_file_options { binary, text };
+}
+
+static
+bool
+download_to_file(const std::string& url, const std::string& local_filename,
+                 download_file_options opts)
+{
+    auto curl = curl_init();
+    if (!curl)
+        return false;
+    curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
+    curl_easy_setopt(curl.get(), CURLOPT_SSL_VERIFYPEER, false);
+    curl_write_callback write_cb = [](char* contents, std::size_t size, std::size_t nmemb,
+                                      void* userp) -> std::size_t
+    {
+        auto& of = *static_cast<std::ofstream*>(userp);
+        auto realsize = size * nmemb;
+        of.write(contents, static_cast<std::streamsize>(realsize));
+        return realsize;
+    };
+    curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, write_cb);
+    decltype(curl_easy_perform(curl.get())) res;
+    {
+        std::ofstream of(local_filename,
+                         opts == download_file_options::binary ?
+                             std::ofstream::out | std::ofstream::binary :
+                             std::ofstream::out);
+        of.exceptions(std::ios::badbit);
+        curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &of);
+        res = curl_easy_perform(curl.get());
+    }
+    return res == CURLE_OK;
+}
+
+std::string
+remote_version()
+{
+    std::string version;
+    std::string str;
+    if (download_to_string("https://www.iana.org/time-zones", str))
+    {
+        CONSTDATA char db[] = "/time-zones/releases/tzdata";
+        CONSTDATA auto db_size = sizeof(db) - 1;
+        auto p = str.find(db, 0, db_size);
+        const int ver_str_len = 5;
+        if (p != std::string::npos && p + (db_size + ver_str_len) <= str.size())
+            version = str.substr(p + db_size, ver_str_len);
+    }
+    return version;
+}
+
+
+// TODO! Using system() create a process and a console window.
+// This is useful to see what errors may occur but is slow and distracting.
+// Consider implementing this functionality more directly, such as
+// using _mkdir and CreateProcess etc.
+// But use the current means now as matches Unix implementations and while
+// in proof of concept / testing phase.
+// TODO! Use <filesystem> eventually.
+static
+bool
+remove_folder_and_subfolders(const std::string& folder)
+{
+#  ifdef _WIN32
+#    if USE_SHELL_API
+    // Delete the folder contents by deleting the folder.
+    std::string cmd = "rd /s /q \"";
+    cmd += folder;
+    cmd += '\"';
+    return std::system(cmd.c_str()) == EXIT_SUCCESS;
+#    else  // !USE_SHELL_API
+    // Create a buffer containing the path to delete. It must be terminated
+    // by two nuls. Who designs these API's...
+    std::vector<char> from;
+    from.assign(folder.begin(), folder.end());
+    from.push_back('\0');
+    from.push_back('\0');
+    SHFILEOPSTRUCT fo{}; // Zero initialize.
+    fo.wFunc = FO_DELETE;
+    fo.pFrom = from.data();
+    fo.fFlags = FOF_NO_UI;
+    int ret = SHFileOperation(&fo);
+    if (ret == 0 && !fo.fAnyOperationsAborted)
+        return true;
+    return false;
+#    endif  // !USE_SHELL_API
+#  else   // !_WIN32
+#    if USE_SHELL_API
+    return std::system(("rm -R " + folder).c_str()) == EXIT_SUCCESS;
+#    else // !USE_SHELL_API
+    struct dir_deleter {
+        dir_deleter() {}
+        void operator()(DIR* d) const
+        {
+            if (d != nullptr)
+            {
+                int result = closedir(d);
+                assert(result == 0);
+            }
+        }
+    };
+    using closedir_ptr = std::unique_ptr<DIR, dir_deleter>;
+
+    std::string filename;
+    struct stat statbuf;
+    std::size_t folder_len = folder.length();
+    struct dirent* p = nullptr;
+
+    closedir_ptr d(opendir(folder.c_str()));
+    bool r = d.get() != nullptr;
+    while (r && (p=readdir(d.get())) != nullptr)
+    {
+        if (strcmp(p->d_name, ".") == 0 || strcmp(p->d_name, "..") == 0)
+           continue;
+
+        // + 2 for path delimiter and nul terminator.
+        std::size_t buf_len = folder_len + strlen(p->d_name) + 2;
+        filename.resize(buf_len);
+        std::size_t path_len = static_cast<std::size_t>(
+            snprintf(&filename[0], buf_len, "%s/%s", folder.c_str(), p->d_name));
+        assert(path_len == buf_len - 1);
+        filename.resize(path_len);
+
+        if (stat(filename.c_str(), &statbuf) == 0)
+            r = S_ISDIR(statbuf.st_mode)
+              ? remove_folder_and_subfolders(filename)
+              : unlink(filename.c_str()) == 0;
+    }
+    d.reset();
+
+    if (r)
+        r = rmdir(folder.c_str()) == 0;
+
+    return r;
+#    endif // !USE_SHELL_API
+#  endif  // !_WIN32
+}
+
+static
+bool
+make_directory(const std::string& folder)
+{
+#  ifdef _WIN32
+#    if USE_SHELL_API
+    // Re-create the folder.
+    std::string cmd = "mkdir \"";
+    cmd += folder;
+    cmd += '\"';
+    return std::system(cmd.c_str()) == EXIT_SUCCESS;
+#    else  // !USE_SHELL_API
+    return _mkdir(folder.c_str()) == 0;
+#    endif // !USE_SHELL_API
+#  else  // !_WIN32
+#    if USE_SHELL_API
+    return std::system(("mkdir " + folder).c_str()) == EXIT_SUCCESS;
+#    else  // !USE_SHELL_API
+    return mkdir(folder.c_str(), 0777) == 0;
+#    endif  // !USE_SHELL_API
+#  endif  // !_WIN32
+}
+
+static
+bool
+delete_file(const std::string& file)
+{
+#  ifdef _WIN32
+#    if USE_SHELL_API
+    std::string cmd = "del \"";
+    cmd += file;
+    cmd += '\"';
+    return std::system(cmd.c_str()) == 0;
+#    else  // !USE_SHELL_API
+    return _unlink(file.c_str()) == 0;
+#    endif // !USE_SHELL_API
+#  else  // !_WIN32
+#    if USE_SHELL_API
+    return std::system(("rm " + file).c_str()) == EXIT_SUCCESS;
+#    else // !USE_SHELL_API
+    return unlink(file.c_str()) == 0;
+#    endif // !USE_SHELL_API
+#  endif  // !_WIN32
+}
+
+#  ifdef _WIN32
+
+static
+bool
+move_file(const std::string& from, const std::string& to)
+{
+#    if USE_SHELL_API
+    std::string cmd = "move \"";
+    cmd += from;
+    cmd += "\" \"";
+    cmd += to;
+    cmd += '\"';
+    return std::system(cmd.c_str()) == EXIT_SUCCESS;
+#    else  // !USE_SHELL_API
+    return !!::MoveFile(from.c_str(), to.c_str());
+#    endif // !USE_SHELL_API
+}
+
+// Usually something like "c:\Program Files".
+static
+std::string
+get_program_folder()
+{
+    return get_known_folder(FOLDERID_ProgramFiles);
+}
+
+// Note folder can and usually does contain spaces.
+static
+std::string
+get_unzip_program()
+{
+    std::string path;
+
+    // 7-Zip appears to note its location in the registry.
+    // If that doesn't work, fall through and take a guess, but it will likely be wrong.
+    HKEY hKey = nullptr;
+    if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, "SOFTWARE\\7-Zip", 0, KEY_READ, &hKey) == ERROR_SUCCESS)
+    {
+        char value_buffer[MAX_PATH + 1]; // fyi 260 at time of writing.
+        // in/out parameter. Documentation say that size is a count of bytes not chars.
+        DWORD size = sizeof(value_buffer) - sizeof(value_buffer[0]);
+        DWORD tzi_type = REG_SZ;
+        // Testing shows Path key value is "C:\Program Files\7-Zip\" i.e. always with trailing \.
+        bool got_value = (RegQueryValueExA(hKey, "Path", nullptr, &tzi_type,
+            reinterpret_cast<LPBYTE>(value_buffer), &size) == ERROR_SUCCESS);
+        RegCloseKey(hKey); // Close now incase of throw later.
+        if (got_value)
+        {
+            // Function does not guarantee to null terminate.
+            value_buffer[size / sizeof(value_buffer[0])] = '\0';
+            path = value_buffer;
+            if (!path.empty())
+            {
+                path += "7z.exe";
+                return path;
+            }
+        }
+    }
+    path += get_program_folder();
+    path += folder_delimiter;
+    path += "7-Zip\\7z.exe";
+    return path;
+}
+
+#    if !USE_SHELL_API
+static
+int
+run_program(const std::string& command)
+{
+    STARTUPINFO si{};
+    si.cb = sizeof(si);
+    PROCESS_INFORMATION pi{};
+
+    // Allegedly CreateProcess overwrites the command line. Ugh.
+    std::string mutable_command(command);
+    if (CreateProcess(nullptr, &mutable_command[0],
+        nullptr, nullptr, FALSE, CREATE_NO_WINDOW, nullptr, nullptr, &si, &pi))
+    {
+        WaitForSingleObject(pi.hProcess, INFINITE);
+        DWORD exit_code;
+        bool got_exit_code = !!GetExitCodeProcess(pi.hProcess, &exit_code);
+        CloseHandle(pi.hProcess);
+        CloseHandle(pi.hThread);
+        // Not 100% sure about this still active thing is correct,
+        // but I'm going with it because I *think* WaitForSingleObject might
+        // return in some cases without INFINITE-ly waiting.
+        // But why/wouldn't GetExitCodeProcess return false in that case?
+        if (got_exit_code && exit_code != STILL_ACTIVE)
+            return static_cast<int>(exit_code);
+    }
+    return EXIT_FAILURE;
+}
+#    endif // !USE_SHELL_API
+
+static
+std::string
+get_download_tar_file(const std::string& version)
+{
+    auto file = get_install();
+    file += folder_delimiter;
+    file += "tzdata";
+    file += version;
+    file += ".tar";
+    return file;
+}
+
+static
+bool
+extract_gz_file(const std::string& version, const std::string& gz_file,
+                const std::string& dest_folder)
+{
+    auto unzip_prog = get_unzip_program();
+    bool unzip_result = false;
+    // Use the unzip program to extract the tar file from the archive.
+
+    // Aim to create a string like:
+    // "C:\Program Files\7-Zip\7z.exe" x "C:\Users\SomeUser\Downloads\tzdata2016d.tar.gz"
+    //     -o"C:\Users\SomeUser\Downloads\tzdata"
+    std::string cmd;
+    cmd = '\"';
+    cmd += unzip_prog;
+    cmd += "\" x \"";
+    cmd += gz_file;
+    cmd += "\" -o\"";
+    cmd += dest_folder;
+    cmd += '\"';
+
+#    if USE_SHELL_API
+    // When using shelling out with std::system() extra quotes are required around the
+    // whole command. It's weird but necessary it seems, see:
+    // http://stackoverflow.com/q/27975969/576911
+
+    cmd = "\"" + cmd + "\"";
+    if (std::system(cmd.c_str()) == EXIT_SUCCESS)
+        unzip_result = true;
+#    else  // !USE_SHELL_API
+    if (run_program(cmd) == EXIT_SUCCESS)
+        unzip_result = true;
+#    endif // !USE_SHELL_API
+    if (unzip_result)
+        delete_file(gz_file);
+
+    // Use the unzip program extract the data from the tar file that was
+    // just extracted from the archive.
+    auto tar_file = get_download_tar_file(version);
+    cmd = '\"';
+    cmd += unzip_prog;
+    cmd += "\" x \"";
+    cmd += tar_file;
+    cmd += "\" -o\"";
+    cmd += get_install();
+    cmd += '\"';
+#    if USE_SHELL_API
+    cmd = "\"" + cmd + "\"";
+    if (std::system(cmd.c_str()) == EXIT_SUCCESS)
+        unzip_result = true;
+#    else  // !USE_SHELL_API
+    if (run_program(cmd) == EXIT_SUCCESS)
+        unzip_result = true;
+#    endif // !USE_SHELL_API
+
+    if (unzip_result)
+        delete_file(tar_file);
+
+    return unzip_result;
+}
+
+static
+std::string
+get_download_mapping_file(const std::string& version)
+{
+    auto file = get_install() + version + "windowsZones.xml";
+    return file;
+}
+
+#  else  // !_WIN32
+
+#    if !USE_SHELL_API
+static
+int
+run_program(const char* prog, const char*const args[])
+{
+    pid_t pid = fork();
+    if (pid == -1) // Child failed to start.
+        return EXIT_FAILURE;
+
+    if (pid != 0)
+    {
+        // We are in the parent. Child started. Wait for it.
+        pid_t ret;
+        int status;
+        while ((ret = waitpid(pid, &status, 0)) == -1)
+        {
+            if (errno != EINTR)
+                break;
+        }
+        if (ret != -1)
+        {
+            if (WIFEXITED(status))
+                return WEXITSTATUS(status);
+        }
+        printf("Child issues!\n");
+
+        return EXIT_FAILURE; // Not sure what status of child is.
+    }
+    else // We are in the child process. Start the program the parent wants to run.
+    {
+
+        if (execv(prog, const_cast<char**>(args)) == -1) // Does not return.
+        {
+            perror("unreachable 0\n");
+            _Exit(127);
+        }
+        printf("unreachable 2\n");
+    }
+    printf("unreachable 2\n");
+    // Unreachable.
+    assert(false);
+    exit(EXIT_FAILURE);
+    return EXIT_FAILURE;
+}
+#    endif // !USE_SHELL_API
+
+static
+bool
+extract_gz_file(const std::string&, const std::string& gz_file, const std::string&)
+{
+#    if USE_SHELL_API
+    bool unzipped = std::system(("tar -xzf " + gz_file + " -C " + get_install()).c_str()) == EXIT_SUCCESS;
+#    else  // !USE_SHELL_API
+    const char prog[] = {"/usr/bin/tar"};
+    const char*const args[] =
+    {
+        prog, "-xzf", gz_file.c_str(), "-C", get_install().c_str(), nullptr
+    };
+    bool unzipped = (run_program(prog, args) == EXIT_SUCCESS);
+#    endif // !USE_SHELL_API
+    if (unzipped)
+    {
+        delete_file(gz_file);
+        return true;
+    }
+    return false;
+}
+
+#  endif // !_WIN32
+
+bool
+remote_download(const std::string& version)
+{
+    assert(!version.empty());
+
+#  ifdef _WIN32
+    // Download folder should be always available for Windows
+#  else  // !_WIN32
+    // Create download folder if it does not exist on UNIX system
+    auto download_folder = get_download_folder();
+    if (!file_exists(download_folder))
+    {
+        make_directory(download_folder);
+    }
+#  endif  // _WIN32
+
+    auto url = "https://data.iana.org/time-zones/releases/tzdata" + version +
+               ".tar.gz";
+    bool result = download_to_file(url, get_download_gz_file(version),
+                                   download_file_options::binary);
+#  ifdef _WIN32
+    if (result)
+    {
+        auto mapping_file = get_download_mapping_file(version);
+        result = download_to_file("http://unicode.org/repos/cldr/trunk/common/"
+                                  "supplemental/windowsZones.xml",
+            mapping_file, download_file_options::text);
+    }
+#  endif  // _WIN32
+    return result;
+}
+
+bool
+remote_install(const std::string& version)
+{
+    auto success = false;
+    assert(!version.empty());
+
+    std::string install = get_install();
+    auto gz_file = get_download_gz_file(version);
+    if (file_exists(gz_file))
+    {
+        if (file_exists(install))
+            remove_folder_and_subfolders(install);
+        if (make_directory(install))
+        {
+            if (extract_gz_file(version, gz_file, install))
+                success = true;
+#  ifdef _WIN32
+            auto mapping_file_source = get_download_mapping_file(version);
+            auto mapping_file_dest = get_install();
+            mapping_file_dest += folder_delimiter;
+            mapping_file_dest += "windowsZones.xml";
+            if (!move_file(mapping_file_source, mapping_file_dest))
+                success = false;
+#  endif  // _WIN32
+        }
+    }
+    return success;
+}
+
+#endif  // HAS_REMOTE_API
+
+static
+std::string
+get_version(const std::string& path)
+{
+    std::string version;
+    std::ifstream infile(path + "version");
+    if (infile.is_open())
+    {
+        infile >> version;
+        if (!infile.fail())
+            return version;
+    }
+    else
+    {
+        infile.open(path + "NEWS");
+        while (infile)
+        {
+            infile >> version;
+            if (version == "Release")
+            {
+                infile >> version;
+                return version;
+            }
+        }
+    }
+    throw std::runtime_error("Unable to get Timezone database version from " + path);
+}
+
+static
+std::unique_ptr<tzdb>
+init_tzdb()
+{
+    using namespace date;
+    const std::string install = get_install();
+    const std::string path = install + folder_delimiter;
+    std::string line;
+    bool continue_zone = false;
+    std::unique_ptr<tzdb> db(new tzdb);
+
+#if AUTO_DOWNLOAD
+    if (!file_exists(install))
+    {
+        auto rv = remote_version();
+        if (!rv.empty() && remote_download(rv))
+        {
+            if (!remote_install(rv))
+            {
+                std::string msg = "Timezone database version \"";
+                msg += rv;
+                msg += "\" did not install correctly to \"";
+                msg += install;
+                msg += "\"";
+                throw std::runtime_error(msg);
+            }
+        }
+        if (!file_exists(install))
+        {
+            std::string msg = "Timezone database not found at \"";
+            msg += install;
+            msg += "\"";
+            throw std::runtime_error(msg);
+        }
+        db->version = get_version(path);
+    }
+    else
+    {
+        db->version = get_version(path);
+        auto rv = remote_version();
+        if (!rv.empty() && db->version != rv)
+        {
+            if (remote_download(rv))
+            {
+                remote_install(rv);
+                db->version = get_version(path);
+            }
+        }
+    }
+#else  // !AUTO_DOWNLOAD
+    if (!file_exists(install))
+    {
+        std::string msg = "Timezone database not found at \"";
+        msg += install;
+        msg += "\"";
+        throw std::runtime_error(msg);
+    }
+    db->version = get_version(path);
+#endif  // !AUTO_DOWNLOAD
+
+    CONSTDATA char*const files[] =
+    {
+        "africa", "antarctica", "asia", "australasia", "backward", "etcetera", "europe",
+        "pacificnew", "northamerica", "southamerica", "systemv", "leapseconds"
+    };
+
+    for (const auto& filename : files)
+    {
+        std::ifstream infile(path + filename);
+        while (infile)
+        {
+            std::getline(infile, line);
+            if (!line.empty() && line[0] != '#')
+            {
+                std::istringstream in(line);
+                std::string word;
+                in >> word;
+                if (word == "Rule")
+                {
+                    db->rules.push_back(Rule(line));
+                    continue_zone = false;
+                }
+                else if (word == "Link")
+                {
+                    db->links.push_back(link(line));
+                    continue_zone = false;
+                }
+                else if (word == "Leap")
+                {
+                    db->leaps.push_back(leap(line, detail::undocumented{}));
+                    continue_zone = false;
+                }
+                else if (word == "Zone")
+                {
+                    db->zones.push_back(time_zone(line, detail::undocumented{}));
+                    continue_zone = true;
+                }
+                else if (line[0] == '\t' && continue_zone)
+                {
+                    db->zones.back().add(line);
+                }
+                else
+                {
+                    std::cerr << line << '\n';
+                }
+            }
+        }
+    }
+    std::sort(db->rules.begin(), db->rules.end());
+    Rule::split_overlaps(db->rules);
+    std::sort(db->zones.begin(), db->zones.end());
+    db->zones.shrink_to_fit();
+    std::sort(db->links.begin(), db->links.end());
+    db->links.shrink_to_fit();
+    std::sort(db->leaps.begin(), db->leaps.end());
+    db->leaps.shrink_to_fit();
+
+#ifdef _WIN32
+    std::string mapping_file = get_install() + folder_delimiter + "windowsZones.xml";
+    db->mappings = load_timezone_mappings_from_xml_file(mapping_file);
+    sort_zone_mappings(db->mappings);
+#endif // _WIN32
+
+    return db;
+}
+
+const tzdb&
+reload_tzdb()
+{
+#if AUTO_DOWNLOAD
+    auto const& v = get_tzdb_list().front().version;
+    if (!v.empty() && v == remote_version())
+        return get_tzdb_list().front();
+#endif  // AUTO_DOWNLOAD
+    tzdb_list::undocumented_helper::push_front(get_tzdb_list(), init_tzdb().release());
+    return get_tzdb_list().front();
+}
+
+#endif  // !USE_OS_TZDB
+
+const tzdb&
+get_tzdb()
+{
+    return get_tzdb_list().front();
+}
+
+const time_zone*
+#if HAS_STRING_VIEW
+tzdb::locate_zone(std::string_view tz_name) const
+#else
+tzdb::locate_zone(const std::string& tz_name) const
+#endif
+{
+    auto zi = std::lower_bound(zones.begin(), zones.end(), tz_name,
+#if HAS_STRING_VIEW
+        [](const time_zone& z, const std::string_view& nm)
+#else
+        [](const time_zone& z, const std::string& nm)
+#endif
+        {
+            return z.name() < nm;
+        });
+    if (zi == zones.end() || zi->name() != tz_name)
+    {
+#if !USE_OS_TZDB
+        auto li = std::lower_bound(links.begin(), links.end(), tz_name,
+#if HAS_STRING_VIEW
+        [](const link& z, const std::string_view& nm)
+#else
+        [](const link& z, const std::string& nm)
+#endif
+        {
+            return z.name() < nm;
+        });
+        if (li != links.end() && li->name() == tz_name)
+        {
+            zi = std::lower_bound(zones.begin(), zones.end(), li->target(),
+                [](const time_zone& z, const std::string& nm)
+                {
+                    return z.name() < nm;
+                });
+            if (zi != zones.end() && zi->name() == li->target())
+                return &*zi;
+        }
+#endif  // !USE_OS_TZDB
+        throw std::runtime_error(std::string(tz_name) + " not found in timezone database");
+    }
+    return &*zi;
+}
+
+const time_zone*
+#if HAS_STRING_VIEW
+locate_zone(std::string_view tz_name)
+#else
+locate_zone(const std::string& tz_name)
+#endif
+{
+    return get_tzdb().locate_zone(tz_name);
+}
+
+#if USE_OS_TZDB
+
+std::ostream&
+operator<<(std::ostream& os, const tzdb& db)
+{
+    os << "Version: " << db.version << "\n\n";
+    for (const auto& x : db.zones)
+        os << x << '\n';
+#if !MISSING_LEAP_SECONDS
+    os << '\n';
+    for (const auto& x : db.leaps)
+        os << x << '\n';
+#endif  // !MISSING_LEAP_SECONDS
+    return os;
+}
+
+#else  // !USE_OS_TZDB
+
+std::ostream&
+operator<<(std::ostream& os, const tzdb& db)
+{
+    os << "Version: " << db.version << '\n';
+    std::string title("--------------------------------------------"
+                      "--------------------------------------------\n"
+                      "Name           ""Start Y ""End Y   "
+                      "Beginning                              ""Offset  "
+                      "Designator\n"
+                      "--------------------------------------------"
+                      "--------------------------------------------\n");
+    int count = 0;
+    for (const auto& x : db.rules)
+    {
+        if (count++ % 50 == 0)
+            os << title;
+        os << x << '\n';
+    }
+    os << '\n';
+    title = std::string("---------------------------------------------------------"
+                        "--------------------------------------------------------\n"
+                        "Name                               ""Offset      "
+                        "Rule           ""Abrev      ""Until\n"
+                        "---------------------------------------------------------"
+                        "--------------------------------------------------------\n");
+    count = 0;
+    for (const auto& x : db.zones)
+    {
+        if (count++ % 10 == 0)
+            os << title;
+        os << x << '\n';
+    }
+    os << '\n';
+    title = std::string("---------------------------------------------------------"
+                        "--------------------------------------------------------\n"
+                        "Alias                                   ""To\n"
+                        "---------------------------------------------------------"
+                        "--------------------------------------------------------\n");
+    count = 0;
+    for (const auto& x : db.links)
+    {
+        if (count++ % 45 == 0)
+            os << title;
+        os << x << '\n';
+    }
+    os << '\n';
+    title = std::string("---------------------------------------------------------"
+                        "--------------------------------------------------------\n"
+                        "Leap second on\n"
+                        "---------------------------------------------------------"
+                        "--------------------------------------------------------\n");
+    os << title;
+    for (const auto& x : db.leaps)
+        os << x << '\n';
+    return os;
+}
+
+#endif  // !USE_OS_TZDB
+
+// -----------------------
+
+#ifdef _WIN32
+
+static
+std::string
+getTimeZoneKeyName()
+{
+    DYNAMIC_TIME_ZONE_INFORMATION dtzi{};
+    auto result = GetDynamicTimeZoneInformation(&dtzi);
+    if (result == TIME_ZONE_ID_INVALID)
+        throw std::runtime_error("current_zone(): GetDynamicTimeZoneInformation()"
+                                 " reported TIME_ZONE_ID_INVALID.");
+    auto wlen = wcslen(dtzi.TimeZoneKeyName);
+    char buf[128] = {};
+    assert(sizeof(buf) >= wlen+1);
+    wcstombs(buf, dtzi.TimeZoneKeyName, wlen);
+    if (strcmp(buf, "Coordinated Universal Time") == 0)
+        return "UTC";
+    return buf;
+}
+
+const time_zone*
+tzdb::current_zone() const
+{
+    std::string win_tzid = getTimeZoneKeyName();
+    std::string standard_tzid;
+    if (!native_to_standard_timezone_name(win_tzid, standard_tzid))
+    {
+        std::string msg;
+        msg = "current_zone() failed: A mapping from the Windows Time Zone id \"";
+        msg += win_tzid;
+        msg += "\" was not found in the time zone mapping database.";
+        throw std::runtime_error(msg);
+    }
+    return locate_zone(standard_tzid);
+}
+
+#else  // !_WIN32
+
+const time_zone*
+tzdb::current_zone() const
+{
+    // On some OS's a file called /etc/localtime may
+    // exist and it may be either a real file
+    // containing time zone details or a symlink to such a file.
+    // On MacOS and BSD Unix if this file is a symlink it
+    // might resolve to a path like this:
+    // "/usr/share/zoneinfo/America/Los_Angeles"
+    // If it does, we try to determine the current
+    // timezone from the remainder of the path by removing the prefix
+    // and hoping the rest resolves to a valid timezone.
+    // It may not always work though. If it doesn't then an
+    // exception will be thrown by local_timezone.
+    // The path may also take a relative form:
+    // "../usr/share/zoneinfo/America/Los_Angeles".
+    {
+        struct stat sb;
+        CONSTDATA auto timezone = "/etc/localtime";
+        if (lstat(timezone, &sb) == 0 && S_ISLNK(sb.st_mode) && sb.st_size > 0) {
+            using namespace std;
+            string result;
+            char rp[PATH_MAX+1] = {};
+            if (readlink(timezone, rp, sizeof(rp)-1) > 0)
+                result = string(rp);
+            else
+                throw system_error(errno, system_category(), "readlink() failed");
+
+            const size_t pos = result.find(get_tz_dir());
+            if (pos != result.npos)
+                result.erase(0, get_tz_dir().size() + 1 + pos);
+            return locate_zone(result);
+        }
+    }
+    // On embedded systems e.g. buildroot with uclibc the timezone is linked
+    // into /etc/TZ which is a symlink to path like this:
+    // "/usr/share/zoneinfo/uclibc/America/Los_Angeles"
+    // If it does, we try to determine the current
+    // timezone from the remainder of the path by removing the prefix
+    // and hoping the rest resolves to valid timezone.
+    // It may not always work though. If it doesn't then an
+    // exception will be thrown by local_timezone.
+    // The path may also take a relative form:
+    // "../usr/share/zoneinfo/uclibc/America/Los_Angeles".
+    {
+        struct stat sb;
+        CONSTDATA auto timezone = "/etc/TZ";
+        if (lstat(timezone, &sb) == 0 && S_ISLNK(sb.st_mode) && sb.st_size > 0) {
+            using namespace std;
+            string result;
+            char rp[PATH_MAX+1] = {};
+            if (readlink(timezone, rp, sizeof(rp)-1) > 0)
+                result = string(rp);
+            else
+                throw system_error(errno, system_category(), "readlink() failed");
+
+            const size_t pos = result.find(get_tz_dir());
+            if (pos != result.npos)
+                result.erase(0, get_tz_dir().size() + 1 + pos);
+            return locate_zone(result);
+        }
+    }
+    {
+    // On some versions of some linux distro's (e.g. Ubuntu),
+    // the current timezone might be in the first line of
+    // the /etc/timezone file.
+        std::ifstream timezone_file("/etc/timezone");
+        if (timezone_file.is_open())
+        {
+            std::string result;
+            std::getline(timezone_file, result);
+            if (!result.empty())
+                return locate_zone(result);
+        }
+        // Fall through to try other means.
+    }
+    {
+    // On some versions of some bsd distro's (e.g. FreeBSD),
+    // the current timezone might be in the first line of
+    // the /var/db/zoneinfo file.
+        std::ifstream timezone_file("/var/db/zoneinfo");
+        if (timezone_file.is_open())
+        {
+            std::string result;
+            std::getline(timezone_file, result);
+            if (!result.empty())
+                return locate_zone(result);
+        }
+        // Fall through to try other means.
+    }
+    {
+    // On some versions of some bsd distro's (e.g. iOS),
+    // it is not possible to use file based approach,
+    // we switch to system API, calling functions in
+    // CoreFoundation framework.
+#if TARGET_OS_IPHONE
+        std::string result = date::iOSUtils::get_current_timezone();
+        if (!result.empty())
+            return locate_zone(result);
+#endif
+    // Fall through to try other means.
+    }
+    {
+    // On some versions of some linux distro's (e.g. Red Hat),
+    // the current timezone might be in the first line of
+    // the /etc/sysconfig/clock file as:
+    // ZONE="US/Eastern"
+        std::ifstream timezone_file("/etc/sysconfig/clock");
+        std::string result;
+        while (timezone_file)
+        {
+            std::getline(timezone_file, result);
+            auto p = result.find("ZONE=\"");
+            if (p != std::string::npos)
+            {
+                result.erase(p, p+6);
+                result.erase(result.rfind('"'));
+                return locate_zone(result);
+            }
+        }
+        // Fall through to try other means.
+    }
+    throw std::runtime_error("Could not get current timezone");
+}
+
+#endif  // !_WIN32
+
+const time_zone*
+current_zone()
+{
+    return get_tzdb().current_zone();
+}
+
+}  // namespace date
+}  // namespace util
+}  // namespace arrow
+
+#if defined(__GNUC__) && __GNUC__ < 5
+# pragma GCC diagnostic pop
+#endif
diff --git a/cpp/src/arrow/vendored/datetime/tz.h b/cpp/src/arrow/vendored/datetime/tz.h
new file mode 100644
index 0000000000000..db78b2df971d2
--- /dev/null
+++ b/cpp/src/arrow/vendored/datetime/tz.h
@@ -0,0 +1,2593 @@
+#ifndef TZ_H
+#define TZ_H
+
+// The MIT License (MIT)
+//
+// Copyright (c) 2015, 2016, 2017 Howard Hinnant
+// Copyright (c) 2017 Jiangang Zhuang
+// Copyright (c) 2017 Aaron Bishop
+// Copyright (c) 2017 Tomasz Kamiński
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+// Our apologies.  When the previous paragraph was written, lowercase had not yet
+// been invented (that would involve another several millennia of evolution).
+// We did not mean to shout.
+
+// Get more recent database at http://www.iana.org/time-zones
+
+// The notion of "current timezone" is something the operating system is expected to "just
+// know". How it knows this is system specific. It's often a value set by the user at OS
+// installation time and recorded by the OS somewhere. On Linux and Mac systems the current
+// timezone name is obtained by looking at the name or contents of a particular file on
+// disk. On Windows the current timezone name comes from the registry. In either method,
+// there is no guarantee that the "native" current timezone name obtained will match any
+// of the "Standard" names in this library's "database". On Linux, the names usually do
+// seem to match so mapping functions to map from native to "Standard" are typically not
+// required. On Windows, the names are never "Standard" so mapping is always required.
+// Technically any OS may use the mapping process but currently only Windows does use it.
+
+///////////////////////////////////////////////////
+
+// Windows does not support OS timezone database
+#ifdef _WIN32
+#  define USE_OS_TZDB 0
+#else
+#  define USE_OS_TZDB 1
+#endif
+#define HAS_REMOTE_API 0
+
+////////////////////////////////////////////////////
+
+#ifndef USE_OS_TZDB
+#  define USE_OS_TZDB 0
+#endif
+
+#ifndef HAS_REMOTE_API
+#  if USE_OS_TZDB == 0
+#    ifdef _WIN32
+#      define HAS_REMOTE_API 0
+#    else
+#      define HAS_REMOTE_API 1
+#    endif
+#  else  // HAS_REMOTE_API makes no since when using the OS timezone database
+#    define HAS_REMOTE_API 0
+#  endif
+#endif
+
+#ifdef __clang__
+# pragma clang diagnostic push
+# pragma clang diagnostic ignored "-Wconstant-logical-operand"
+#endif
+
+static_assert(!(USE_OS_TZDB && HAS_REMOTE_API),
+              "USE_OS_TZDB and HAS_REMOTE_API can not be used together");
+
+#ifdef __clang__
+# pragma clang diagnostic pop
+#endif
+
+#ifndef AUTO_DOWNLOAD
+#  define AUTO_DOWNLOAD HAS_REMOTE_API
+#endif
+
+static_assert(HAS_REMOTE_API == 0 ? AUTO_DOWNLOAD == 0 : true,
+              "AUTO_DOWNLOAD can not be turned on without HAS_REMOTE_API");
+
+#ifndef USE_SHELL_API
+#  define USE_SHELL_API 1
+#endif
+
+#if USE_OS_TZDB
+#  ifdef _WIN32
+#    error "USE_OS_TZDB can not be used on Windows"
+#  endif
+#  ifndef MISSING_LEAP_SECONDS
+#    ifdef __APPLE__
+#      define MISSING_LEAP_SECONDS 1
+#    else
+#      define MISSING_LEAP_SECONDS 0
+#    endif
+#  endif
+#else
+#  define MISSING_LEAP_SECONDS 0
+#endif
+
+#ifndef HAS_DEDUCTION_GUIDES
+#  if __cplusplus >= 201703
+#    define HAS_DEDUCTION_GUIDES 1
+#  else
+#    define HAS_DEDUCTION_GUIDES 0
+#  endif
+#endif  // HAS_DEDUCTION_GUIDES
+
+#include "date.h"
+
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#include "tz_private.h"
+#endif
+
+#include <algorithm>
+#include <atomic>
+#include <cassert>
+#include <chrono>
+#include <istream>
+#include <locale>
+#include <memory>
+#include <mutex>
+#include <ostream>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#ifdef _WIN32
+#  ifdef DATE_BUILD_DLL
+#    define DATE_API __declspec(dllexport)
+#  elif defined(DATE_USE_DLL)
+#    define DATE_API __declspec(dllimport)
+#  else
+#    define DATE_API
+#  endif
+#else
+#  ifdef DATE_BUILD_DLL
+#    define DATE_API __attribute__ ((visibility ("default")))
+#  else
+#    define DATE_API
+#  endif
+#endif
+
+namespace arrow
+{
+namespace util
+{
+namespace date
+{
+
+enum class choose {earliest, latest};
+
+namespace detail
+{
+    struct undocumented;
+}
+
+struct sys_info
+{
+    sys_seconds          begin;
+    sys_seconds          end;
+    std::chrono::seconds offset;
+    std::chrono::minutes save;
+    std::string          abbrev;
+};
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const sys_info& r)
+{
+    os << r.begin << '\n';
+    os << r.end << '\n';
+    os << make_time(r.offset) << "\n";
+    os << make_time(r.save) << "\n";
+    os << r.abbrev << '\n';
+    return os;
+}
+
+struct local_info
+{
+    enum {unique, nonexistent, ambiguous} result;
+    sys_info first;
+    sys_info second;
+};
+
+template<class CharT, class Traits>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const local_info& r)
+{
+    if (r.result == local_info::nonexistent)
+        os << "nonexistent between\n";
+    else if (r.result == local_info::ambiguous)
+        os << "ambiguous between\n";
+    os << r.first;
+    if (r.result != local_info::unique)
+    {
+        os << "and\n";
+        os << r.second;
+    }
+    return os;
+}
+
+class nonexistent_local_time
+    : public std::runtime_error
+{
+public:
+    template <class Duration>
+        nonexistent_local_time(local_time<Duration> tp, const local_info& i);
+
+private:
+    template <class Duration>
+    static
+    std::string
+    make_msg(local_time<Duration> tp, const local_info& i);
+};
+
+template <class Duration>
+inline
+nonexistent_local_time::nonexistent_local_time(local_time<Duration> tp,
+                                               const local_info& i)
+    : std::runtime_error(make_msg(tp, i))
+{
+}
+
+template <class Duration>
+std::string
+nonexistent_local_time::make_msg(local_time<Duration> tp, const local_info& i)
+{
+    assert(i.result == local_info::nonexistent);
+    std::ostringstream os;
+    os << tp << " is in a gap between\n"
+       << local_seconds{i.first.end.time_since_epoch()} + i.first.offset << ' '
+       << i.first.abbrev << " and\n"
+       << local_seconds{i.second.begin.time_since_epoch()} + i.second.offset << ' '
+       << i.second.abbrev
+       << " which are both equivalent to\n"
+       << i.first.end << " UTC";
+    return os.str();
+}
+
+class ambiguous_local_time
+    : public std::runtime_error
+{
+public:
+    template <class Duration>
+        ambiguous_local_time(local_time<Duration> tp, const local_info& i);
+
+private:
+    template <class Duration>
+    static
+    std::string
+    make_msg(local_time<Duration> tp, const local_info& i);
+};
+
+template <class Duration>
+inline
+ambiguous_local_time::ambiguous_local_time(local_time<Duration> tp, const local_info& i)
+    : std::runtime_error(make_msg(tp, i))
+{
+}
+
+template <class Duration>
+std::string
+ambiguous_local_time::make_msg(local_time<Duration> tp, const local_info& i)
+{
+    assert(i.result == local_info::ambiguous);
+    std::ostringstream os;
+    os << tp << " is ambiguous.  It could be\n"
+       << tp << ' ' << i.first.abbrev << " == "
+       << tp - i.first.offset << " UTC or\n"
+       << tp << ' ' << i.second.abbrev  << " == "
+       << tp - i.second.offset  << " UTC";
+    return os.str();
+}
+
+class time_zone;
+
+#if HAS_STRING_VIEW
+DATE_API const time_zone* locate_zone(std::string_view tz_name);
+#else
+DATE_API const time_zone* locate_zone(const std::string& tz_name);
+#endif
+
+DATE_API const time_zone* current_zone();
+
+template <class T>
+struct zoned_traits
+{
+};
+
+template <>
+struct zoned_traits<const time_zone*>
+{
+    static
+    const time_zone*
+    default_zone()
+    {
+        return date::locate_zone("Etc/UTC");
+    }
+
+#if HAS_STRING_VIEW
+
+    static
+    const time_zone*
+    locate_zone(std::string_view name)
+    {
+        return date::locate_zone(name);
+    }
+
+#else  // !HAS_STRING_VIEW
+
+    static
+    const time_zone*
+    locate_zone(const std::string& name)
+    {
+        return date::locate_zone(name);
+    }
+
+    static
+    const time_zone*
+    locate_zone(const char* name)
+    {
+        return date::locate_zone(name);
+    }
+
+#endif  // !HAS_STRING_VIEW
+};
+
+template <class Duration, class TimeZonePtr>
+class zoned_time;
+
+template <class Duration1, class Duration2, class TimeZonePtr>
+bool
+operator==(const zoned_time<Duration1, TimeZonePtr>& x,
+           const zoned_time<Duration2, TimeZonePtr>& y);
+
+template <class Duration, class TimeZonePtr = const time_zone*>
+class zoned_time
+{
+public:
+    using duration = typename std::common_type<Duration, std::chrono::seconds>::type;
+
+private:
+    TimeZonePtr        zone_;
+    sys_time<duration> tp_;
+
+public:
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+    template <class T = TimeZonePtr,
+              class = decltype(zoned_traits<T>::default_zone())>
+#endif
+        zoned_time();
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+    template <class T = TimeZonePtr,
+              class = decltype(zoned_traits<T>::default_zone())>
+#endif
+        zoned_time(const sys_time<Duration>& st);
+    explicit zoned_time(TimeZonePtr z);
+
+#if HAS_STRING_VIEW
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_constructible
+                  <
+                      zoned_time,
+                      decltype(zoned_traits<T>::locate_zone(std::string_view()))
+                  >::value
+              >::type>
+        explicit zoned_time(std::string_view name);
+#else
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_constructible
+                  <
+                      zoned_time,
+                      decltype(zoned_traits<T>::locate_zone(std::string()))
+                  >::value
+              >::type>
+#endif
+        explicit zoned_time(const std::string& name);
+#endif
+
+    template <class Duration2,
+              class = typename std::enable_if
+                      <
+                          std::is_convertible<sys_time<Duration2>,
+                                              sys_time<Duration>>::value
+                      >::type>
+        zoned_time(const zoned_time<Duration2, TimeZonePtr>& zt) NOEXCEPT;
+
+    zoned_time(TimeZonePtr z, const sys_time<Duration>& st);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_convertible
+                  <
+                      decltype(std::declval<T&>()->to_sys(local_time<Duration>{})),
+                      sys_time<duration>
+                  >::value
+              >::type>
+#endif
+        zoned_time(TimeZonePtr z, const local_time<Duration>& tp);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_convertible
+                  <
+                      decltype(std::declval<T&>()->to_sys(local_time<Duration>{},
+                                                          choose::earliest)),
+                      sys_time<duration>
+                  >::value
+              >::type>
+#endif
+        zoned_time(TimeZonePtr z, const local_time<Duration>& tp, choose c);
+
+    template <class Duration2, class TimeZonePtr2,
+              class = typename std::enable_if
+                      <
+                          std::is_convertible<sys_time<Duration2>,
+                                              sys_time<Duration>>::value
+                      >::type>
+        zoned_time(TimeZonePtr z, const zoned_time<Duration2, TimeZonePtr2>& zt);
+
+    template <class Duration2, class TimeZonePtr2,
+              class = typename std::enable_if
+                      <
+                          std::is_convertible<sys_time<Duration2>,
+                                              sys_time<Duration>>::value
+                      >::type>
+        zoned_time(TimeZonePtr z, const zoned_time<Duration2, TimeZonePtr2>& zt, choose);
+
+#if HAS_STRING_VIEW
+
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_constructible
+                  <
+                      zoned_time,
+                      decltype(zoned_traits<T>::locate_zone(std::string_view())),
+                      sys_time<Duration>
+                  >::value
+              >::type>
+        zoned_time(std::string_view name, const sys_time<Duration>& st);
+
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_constructible
+                  <
+                      zoned_time,
+                      decltype(zoned_traits<T>::locate_zone(std::string_view())),
+                      local_time<Duration>
+                  >::value
+              >::type>
+        zoned_time(std::string_view name, const local_time<Duration>& tp);
+
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_constructible
+                  <
+                      zoned_time,
+                      decltype(zoned_traits<T>::locate_zone(std::string_view())),
+                      local_time<Duration>,
+                      choose
+                  >::value
+              >::type>
+        zoned_time(std::string_view name,   const local_time<Duration>& tp, choose c);
+
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_constructible
+                  <
+                      zoned_time,
+                      decltype(zoned_traits<T>::locate_zone(std::string_view())),
+                      zoned_time
+                  >::value
+              >::type>
+        zoned_time(std::string_view name, const zoned_time& zt);
+
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_constructible
+                  <
+                      zoned_time,
+                      decltype(zoned_traits<T>::locate_zone(std::string_view())),
+                      zoned_time,
+                      choose
+                  >::value
+              >::type>
+        zoned_time(std::string_view name, const zoned_time& zt, choose);
+
+#else  // !HAS_STRING_VIEW
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_constructible
+                  <
+                      zoned_time,
+                      decltype(zoned_traits<T>::locate_zone(std::string())),
+                      sys_time<Duration>
+                  >::value
+              >::type>
+#endif
+        zoned_time(const std::string& name, const sys_time<Duration>& st);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_constructible
+                  <
+                      zoned_time,
+                      decltype(zoned_traits<T>::locate_zone(std::string())),
+                      sys_time<Duration>
+                  >::value
+              >::type>
+#endif
+        zoned_time(const char* name, const sys_time<Duration>& st);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_constructible
+                  <
+                      zoned_time,
+                      decltype(zoned_traits<T>::locate_zone(std::string())),
+                      local_time<Duration>
+                  >::value
+              >::type>
+#endif
+        zoned_time(const std::string& name, const local_time<Duration>& tp);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_constructible
+                  <
+                      zoned_time,
+                      decltype(zoned_traits<T>::locate_zone(std::string())),
+                      local_time<Duration>
+                  >::value
+              >::type>
+#endif
+        zoned_time(const char* name, const local_time<Duration>& tp);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_constructible
+                  <
+                      zoned_time,
+                      decltype(zoned_traits<T>::locate_zone(std::string())),
+                      local_time<Duration>,
+                      choose
+                  >::value
+              >::type>
+#endif
+        zoned_time(const std::string& name, const local_time<Duration>& tp, choose c);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_constructible
+                  <
+                      zoned_time,
+                      decltype(zoned_traits<T>::locate_zone(std::string())),
+                      local_time<Duration>,
+                      choose
+                  >::value
+              >::type>
+#endif
+        zoned_time(const char* name, const local_time<Duration>& tp, choose c);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_constructible
+                  <
+                      zoned_time,
+                      decltype(zoned_traits<T>::locate_zone(std::string())),
+                      zoned_time
+                  >::value
+              >::type>
+#endif
+        zoned_time(const std::string& name, const zoned_time& zt);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_constructible
+                  <
+                      zoned_time,
+                      decltype(zoned_traits<T>::locate_zone(std::string())),
+                      zoned_time
+                  >::value
+              >::type>
+#endif
+        zoned_time(const char* name, const zoned_time& zt);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_constructible
+                  <
+                      zoned_time,
+                      decltype(zoned_traits<T>::locate_zone(std::string())),
+                      zoned_time,
+                      choose
+                  >::value
+              >::type>
+#endif
+        zoned_time(const std::string& name, const zoned_time& zt, choose);
+
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+    template <class T = TimeZonePtr,
+              class = typename std::enable_if
+              <
+                  std::is_constructible
+                  <
+                      zoned_time,
+                      decltype(zoned_traits<T>::locate_zone(std::string())),
+                      zoned_time,
+                      choose
+                  >::value
+              >::type>
+#endif
+        zoned_time(const char* name, const zoned_time& zt, choose);
+
+#endif  // !HAS_STRING_VIEW
+
+    zoned_time& operator=(const sys_time<Duration>& st);
+    zoned_time& operator=(const local_time<Duration>& ut);
+
+    explicit operator sys_time<duration>() const;
+    explicit operator local_time<duration>() const;
+
+    TimeZonePtr          get_time_zone() const;
+    local_time<duration> get_local_time() const;
+    sys_time<duration>   get_sys_time() const;
+    sys_info             get_info() const;
+
+    template <class Duration1, class Duration2, class TimeZonePtr1>
+    friend
+    bool
+    operator==(const zoned_time<Duration1, TimeZonePtr1>& x,
+               const zoned_time<Duration2, TimeZonePtr1>& y);
+
+    template <class CharT, class Traits, class Duration1, class TimeZonePtr1>
+    friend
+    std::basic_ostream<CharT, Traits>&
+    operator<<(std::basic_ostream<CharT, Traits>& os,
+               const zoned_time<Duration1, TimeZonePtr1>& t);
+
+private:
+    template <class D, class T> friend class zoned_time;
+};
+
+using zoned_seconds = zoned_time<std::chrono::seconds>;
+
+#if HAS_DEDUCTION_GUIDES
+
+zoned_time()
+    -> zoned_time<std::chrono::seconds>;
+
+template <class Duration>
+zoned_time(sys_time<Duration>)
+    -> zoned_time<std::common_type_t<Duration, std::chrono::seconds>>;
+
+template <class TimeZonePtr>
+zoned_time(TimeZonePtr)
+    -> zoned_time<std::chrono::seconds, TimeZonePtr>;
+
+template <class TimeZonePtr, class Duration>
+zoned_time(TimeZonePtr, sys_time<Duration>)
+    -> zoned_time<std::common_type_t<Duration, std::chrono::seconds>, TimeZonePtr>;
+
+template <class TimeZonePtr, class Duration>
+zoned_time(TimeZonePtr, local_time<Duration>, choose = choose::earliest)
+    -> zoned_time<std::common_type_t<Duration, std::chrono::seconds>, TimeZonePtr>;
+
+#if HAS_STRING_VIEW
+
+zoned_time(std::string_view)
+    -> zoned_time<std::chrono::seconds>;
+
+template <class Duration>
+zoned_time(std::string_view, sys_time<Duration>)
+    -> zoned_time<std::common_type_t<Duration, std::chrono::seconds>>;
+
+template <class Duration>
+zoned_time(std::string_view, local_time<Duration>, choose = choose::earliest)
+    -> zoned_time<std::common_type_t<Duration, std::chrono::seconds>>;
+
+#else  // !HAS_STRING_VIEW
+
+zoned_time(std::string)
+    -> zoned_time<std::chrono::seconds>;
+
+template <class Duration>
+zoned_time(std::string, sys_time<Duration>)
+    -> zoned_time<std::common_type_t<Duration, std::chrono::seconds>>;
+
+template <class Duration>
+zoned_time(std::string, local_time<Duration>, choose = choose::earliest)
+    -> zoned_time<std::common_type_t<Duration, std::chrono::seconds>>;
+
+#endif  // !HAS_STRING_VIEW
+
+template <class Duration>
+zoned_time(const char*, sys_time<Duration>)
+    -> zoned_time<std::common_type_t<Duration, std::chrono::seconds>>;
+
+template <class Duration>
+zoned_time(const char*, local_time<Duration>, choose = choose::earliest)
+    -> zoned_time<std::common_type_t<Duration, std::chrono::seconds>>;
+
+template <class Duration, class TimeZonePtr, class TimeZonePtr2>
+zoned_time(TimeZonePtr, zoned_time<Duration, TimeZonePtr2>, choose = choose::earliest)
+    -> zoned_time<Duration, TimeZonePtr>;
+
+#endif  // HAS_DEDUCTION_GUIDES
+
+template <class Duration1, class Duration2, class TimeZonePtr>
+inline
+bool
+operator==(const zoned_time<Duration1, TimeZonePtr>& x,
+           const zoned_time<Duration2, TimeZonePtr>& y)
+{
+    return x.zone_ == y.zone_ && x.tp_ == y.tp_;
+}
+
+template <class Duration1, class Duration2, class TimeZonePtr>
+inline
+bool
+operator!=(const zoned_time<Duration1, TimeZonePtr>& x,
+           const zoned_time<Duration2, TimeZonePtr>& y)
+{
+    return !(x == y);
+}
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+
+namespace detail
+{
+#  if USE_OS_TZDB
+    struct transition;
+    struct expanded_ttinfo;
+#  else  // !USE_OS_TZDB
+    struct zonelet;
+    class Rule;
+#  endif  // !USE_OS_TZDB
+}
+
+#endif  // !defined(_MSC_VER) || (_MSC_VER >= 1900)
+
+class time_zone
+{
+private:
+    std::string                          name_;
+#if USE_OS_TZDB
+    std::vector<detail::transition>      transitions_;
+    std::vector<detail::expanded_ttinfo> ttinfos_;
+#else  // !USE_OS_TZDB
+    std::vector<detail::zonelet>         zonelets_;
+#endif  // !USE_OS_TZDB
+    std::unique_ptr<std::once_flag>      adjusted_;
+
+public:
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+    time_zone(time_zone&&) = default;
+    time_zone& operator=(time_zone&&) = default;
+#else   // defined(_MSC_VER) && (_MSC_VER < 1900)
+    time_zone(time_zone&& src);
+    time_zone& operator=(time_zone&& src);
+#endif  // defined(_MSC_VER) && (_MSC_VER < 1900)
+
+    DATE_API explicit time_zone(const std::string& s, detail::undocumented);
+
+    const std::string& name() const NOEXCEPT;
+
+    template <class Duration> sys_info   get_info(sys_time<Duration> st) const;
+    template <class Duration> local_info get_info(local_time<Duration> tp) const;
+
+    template <class Duration>
+        sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+        to_sys(local_time<Duration> tp) const;
+
+    template <class Duration>
+        sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+        to_sys(local_time<Duration> tp, choose z) const;
+
+    template <class Duration>
+        local_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+        to_local(sys_time<Duration> tp) const;
+
+    friend bool operator==(const time_zone& x, const time_zone& y) NOEXCEPT;
+    friend bool operator< (const time_zone& x, const time_zone& y) NOEXCEPT;
+    friend DATE_API std::ostream& operator<<(std::ostream& os, const time_zone& z);
+
+#if !USE_OS_TZDB
+    DATE_API void add(const std::string& s);
+#endif  // !USE_OS_TZDB
+
+private:
+    DATE_API sys_info   get_info_impl(sys_seconds tp) const;
+    DATE_API local_info get_info_impl(local_seconds tp) const;
+
+    template <class Duration>
+        sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+        to_sys_impl(local_time<Duration> tp, choose z, std::false_type) const;
+    template <class Duration>
+        sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+        to_sys_impl(local_time<Duration> tp, choose, std::true_type) const;
+
+#if USE_OS_TZDB
+    DATE_API void init() const;
+    DATE_API void init_impl();
+    DATE_API sys_info
+        load_sys_info(std::vector<detail::transition>::const_iterator i) const;
+
+    template <class TimeType>
+    DATE_API void
+    load_data(std::istream& inf, std::int32_t tzh_leapcnt, std::int32_t tzh_timecnt,
+                                 std::int32_t tzh_typecnt, std::int32_t tzh_charcnt);
+#else  // !USE_OS_TZDB
+    DATE_API sys_info   get_info_impl(sys_seconds tp, int timezone) const;
+    DATE_API void adjust_infos(const std::vector<detail::Rule>& rules);
+    DATE_API void parse_info(std::istream& in);
+#endif  // !USE_OS_TZDB
+};
+
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+
+inline
+time_zone::time_zone(time_zone&& src)
+    : name_(std::move(src.name_))
+    , zonelets_(std::move(src.zonelets_))
+    , adjusted_(std::move(src.adjusted_))
+    {}
+
+inline
+time_zone&
+time_zone::operator=(time_zone&& src)
+{
+    name_ = std::move(src.name_);
+    zonelets_ = std::move(src.zonelets_);
+    adjusted_ = std::move(src.adjusted_);
+    return *this;
+}
+
+#endif  // defined(_MSC_VER) && (_MSC_VER < 1900)
+
+inline
+const std::string&
+time_zone::name() const NOEXCEPT
+{
+    return name_;
+}
+
+template <class Duration>
+inline
+sys_info
+time_zone::get_info(sys_time<Duration> st) const
+{
+    using namespace std::chrono;
+    return get_info_impl(date::floor<seconds>(st));
+}
+
+template <class Duration>
+inline
+local_info
+time_zone::get_info(local_time<Duration> tp) const
+{
+    using namespace std::chrono;
+    return get_info_impl(date::floor<seconds>(tp));
+}
+
+template <class Duration>
+inline
+sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+time_zone::to_sys(local_time<Duration> tp) const
+{
+    return to_sys_impl(tp, choose{}, std::true_type{});
+}
+
+template <class Duration>
+inline
+sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+time_zone::to_sys(local_time<Duration> tp, choose z) const
+{
+    return to_sys_impl(tp, z, std::false_type{});
+}
+
+template <class Duration>
+inline
+local_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+time_zone::to_local(sys_time<Duration> tp) const
+{
+    using LT = local_time<typename std::common_type<Duration, std::chrono::seconds>::type>;
+    auto i = get_info(tp);
+    return LT{(tp + i.offset).time_since_epoch()};
+}
+
+inline bool operator==(const time_zone& x, const time_zone& y) NOEXCEPT {return x.name_ == y.name_;}
+inline bool operator< (const time_zone& x, const time_zone& y) NOEXCEPT {return x.name_ < y.name_;}
+
+inline bool operator!=(const time_zone& x, const time_zone& y) NOEXCEPT {return !(x == y);}
+inline bool operator> (const time_zone& x, const time_zone& y) NOEXCEPT {return   y < x;}
+inline bool operator<=(const time_zone& x, const time_zone& y) NOEXCEPT {return !(y < x);}
+inline bool operator>=(const time_zone& x, const time_zone& y) NOEXCEPT {return !(x < y);}
+
+template <class Duration>
+sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+time_zone::to_sys_impl(local_time<Duration> tp, choose z, std::false_type) const
+{
+    using namespace date;
+    using namespace std::chrono;
+    auto i = get_info(tp);
+    if (i.result == local_info::nonexistent)
+    {
+        return i.first.end;
+    }
+    else if (i.result == local_info::ambiguous)
+    {
+        if (z == choose::latest)
+            return sys_time<Duration>{tp.time_since_epoch()} - i.second.offset;
+    }
+    return sys_time<Duration>{tp.time_since_epoch()} - i.first.offset;
+}
+
+template <class Duration>
+sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+time_zone::to_sys_impl(local_time<Duration> tp, choose, std::true_type) const
+{
+    using namespace date;
+    using namespace std::chrono;
+    auto i = get_info(tp);
+    if (i.result == local_info::nonexistent)
+        throw nonexistent_local_time(tp, i);
+    else if (i.result == local_info::ambiguous)
+        throw ambiguous_local_time(tp, i);
+    return sys_time<Duration>{tp.time_since_epoch()} - i.first.offset;
+}
+
+#if !USE_OS_TZDB
+
+class link
+{
+private:
+    std::string name_;
+    std::string target_;
+public:
+    DATE_API explicit link(const std::string& s);
+
+    const std::string& name() const {return name_;}
+    const std::string& target() const {return target_;}
+
+    friend bool operator==(const link& x, const link& y) {return x.name_ == y.name_;}
+    friend bool operator< (const link& x, const link& y) {return x.name_ < y.name_;}
+
+    friend DATE_API std::ostream& operator<<(std::ostream& os, const link& x);
+};
+
+inline bool operator!=(const link& x, const link& y) {return !(x == y);}
+inline bool operator> (const link& x, const link& y) {return   y < x;}
+inline bool operator<=(const link& x, const link& y) {return !(y < x);}
+inline bool operator>=(const link& x, const link& y) {return !(x < y);}
+
+#endif  // !USE_OS_TZDB
+
+#if !MISSING_LEAP_SECONDS
+
+class leap
+{
+private:
+    sys_seconds date_;
+
+public:
+#if USE_OS_TZDB
+    DATE_API explicit leap(const sys_seconds& s, detail::undocumented);
+#else
+    DATE_API explicit leap(const std::string& s, detail::undocumented);
+#endif
+
+    sys_seconds date() const {return date_;}
+
+    friend bool operator==(const leap& x, const leap& y) {return x.date_ == y.date_;}
+    friend bool operator< (const leap& x, const leap& y) {return x.date_ < y.date_;}
+
+    template <class Duration>
+    friend
+    bool
+    operator==(const leap& x, const sys_time<Duration>& y)
+    {
+        return x.date_ == y;
+    }
+
+    template <class Duration>
+    friend
+    bool
+    operator< (const leap& x, const sys_time<Duration>& y)
+    {
+        return x.date_ < y;
+    }
+
+    template <class Duration>
+    friend
+    bool
+    operator< (const sys_time<Duration>& x, const leap& y)
+    {
+        return x < y.date_;
+    }
+
+    friend DATE_API std::ostream& operator<<(std::ostream& os, const leap& x);
+};
+
+inline bool operator!=(const leap& x, const leap& y) {return !(x == y);}
+inline bool operator> (const leap& x, const leap& y) {return   y < x;}
+inline bool operator<=(const leap& x, const leap& y) {return !(y < x);}
+inline bool operator>=(const leap& x, const leap& y) {return !(x < y);}
+
+template <class Duration>
+inline
+bool
+operator==(const sys_time<Duration>& x, const leap& y)
+{
+    return y == x;
+}
+
+template <class Duration>
+inline
+bool
+operator!=(const leap& x, const sys_time<Duration>& y)
+{
+    return !(x == y);
+}
+
+template <class Duration>
+inline
+bool
+operator!=(const sys_time<Duration>& x, const leap& y)
+{
+    return !(x == y);
+}
+
+template <class Duration>
+inline
+bool
+operator> (const leap& x, const sys_time<Duration>& y)
+{
+    return y < x;
+}
+
+template <class Duration>
+inline
+bool
+operator> (const sys_time<Duration>& x, const leap& y)
+{
+    return y < x;
+}
+
+template <class Duration>
+inline
+bool
+operator<=(const leap& x, const sys_time<Duration>& y)
+{
+    return !(y < x);
+}
+
+template <class Duration>
+inline
+bool
+operator<=(const sys_time<Duration>& x, const leap& y)
+{
+    return !(y < x);
+}
+
+template <class Duration>
+inline
+bool
+operator>=(const leap& x, const sys_time<Duration>& y)
+{
+    return !(x < y);
+}
+
+template <class Duration>
+inline
+bool
+operator>=(const sys_time<Duration>& x, const leap& y)
+{
+    return !(x < y);
+}
+
+#endif  // !MISSING_LEAP_SECONDS
+
+#ifdef _WIN32
+
+namespace detail
+{
+
+// The time zone mapping is modelled after this data file:
+// http://unicode.org/repos/cldr/trunk/common/supplemental/windowsZones.xml
+// and the field names match the element names from the mapZone element
+// of windowsZones.xml.
+// The website displays this file here:
+// http://www.unicode.org/cldr/charts/latest/supplemental/zone_tzid.html
+// The html view is sorted before being displayed but is otherwise the same
+// There is a mapping between the os centric view (in this case windows)
+// the html displays uses and the generic view the xml file.
+// That mapping is this:
+// display column "windows" -> xml field "other".
+// display column "region"  -> xml field "territory".
+// display column "tzid"    -> xml field "type".
+// This structure uses the generic terminology because it could be
+// used to to support other os/native name conversions, not just windows,
+// and using the same generic names helps retain the connection to the
+// origin of the data that we are using.
+struct timezone_mapping
+{
+    timezone_mapping(const char* other, const char* territory, const char* type)
+        : other(other), territory(territory), type(type)
+    {
+    }
+    timezone_mapping() = default;
+    std::string other;
+    std::string territory;
+    std::string type;
+};
+
+}  // detail
+
+#endif  // _WIN32
+
+struct tzdb
+{
+    std::string               version = "unknown";
+    std::vector<time_zone>    zones;
+#if !USE_OS_TZDB
+    std::vector<link>         links;
+#endif
+#if !MISSING_LEAP_SECONDS
+    std::vector<leap>         leaps;
+#endif
+#if !USE_OS_TZDB
+    std::vector<detail::Rule> rules;
+#endif
+#ifdef _WIN32
+    std::vector<detail::timezone_mapping> mappings;
+#endif
+    tzdb* next = nullptr;
+
+    tzdb() = default;
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+    tzdb(tzdb&&) = default;
+    tzdb& operator=(tzdb&&) = default;
+#else  // defined(_MSC_VER) && (_MSC_VER < 1900)
+    tzdb(tzdb&& src)
+        : version(std::move(src.version))
+        , zones(std::move(src.zones))
+        , links(std::move(src.links))
+        , leaps(std::move(src.leaps))
+        , rules(std::move(src.rules))
+        , mappings(std::move(src.mappings))
+    {}
+
+    tzdb& operator=(tzdb&& src)
+    {
+        version = std::move(src.version);
+        zones = std::move(src.zones);
+        links = std::move(src.links);
+        leaps = std::move(src.leaps);
+        rules = std::move(src.rules);
+        mappings = std::move(src.mappings);
+        return *this;
+    }
+#endif  // defined(_MSC_VER) && (_MSC_VER < 1900)
+
+#if HAS_STRING_VIEW
+    const time_zone* locate_zone(std::string_view tz_name) const;
+#else
+    const time_zone* locate_zone(const std::string& tz_name) const;
+#endif
+    const time_zone* current_zone() const;
+};
+
+using TZ_DB = tzdb;
+
+DATE_API std::ostream&
+operator<<(std::ostream& os, const tzdb& db);
+
+DATE_API const tzdb& get_tzdb();
+
+class tzdb_list
+{
+    std::atomic<tzdb*> head_{nullptr};
+
+public:
+    ~tzdb_list();
+    tzdb_list() = default;
+    tzdb_list(tzdb_list&& x) noexcept;
+
+    const tzdb& front() const noexcept {return *head_;}
+          tzdb& front()       noexcept {return *head_;}
+
+    class const_iterator;
+
+    const_iterator begin() const noexcept;
+    const_iterator end() const noexcept;
+
+    const_iterator cbegin() const noexcept;
+    const_iterator cend() const noexcept;
+
+    const_iterator erase_after(const_iterator p) noexcept;
+
+    struct undocumented_helper;
+private:
+    void push_front(tzdb* tzdb) noexcept;
+};
+
+class tzdb_list::const_iterator
+{
+    tzdb* p_ = nullptr;
+
+    explicit const_iterator(tzdb* p) noexcept : p_{p} {}
+public:
+    const_iterator() = default;
+
+    using iterator_category = std::forward_iterator_tag;
+    using value_type        = tzdb;
+    using reference         = const value_type&;
+    using pointer           = const value_type*;
+    using difference_type   = std::ptrdiff_t;
+
+    reference operator*() const noexcept {return *p_;}
+    pointer  operator->() const noexcept {return p_;}
+
+    const_iterator& operator++() noexcept {p_ = p_->next; return *this;}
+    const_iterator  operator++(int) noexcept {auto t = *this; ++(*this); return t;}
+
+    friend
+    bool
+    operator==(const const_iterator& x, const const_iterator& y) noexcept
+        {return x.p_ == y.p_;}
+
+    friend
+    bool
+    operator!=(const const_iterator& x, const const_iterator& y) noexcept
+        {return !(x == y);}
+
+    friend class tzdb_list;
+};
+
+inline
+tzdb_list::const_iterator
+tzdb_list::begin() const noexcept
+{
+    return const_iterator{head_};
+}
+
+inline
+tzdb_list::const_iterator
+tzdb_list::end() const noexcept
+{
+    return const_iterator{nullptr};
+}
+
+inline
+tzdb_list::const_iterator
+tzdb_list::cbegin() const noexcept
+{
+    return begin();
+}
+
+inline
+tzdb_list::const_iterator
+tzdb_list::cend() const noexcept
+{
+    return end();
+}
+
+DATE_API tzdb_list& get_tzdb_list();
+
+#if !USE_OS_TZDB
+
+DATE_API const tzdb& reload_tzdb();
+DATE_API void        set_install(const std::string& install);
+
+#endif  // !USE_OS_TZDB
+
+#if HAS_REMOTE_API
+
+DATE_API std::string remote_version();
+DATE_API bool        remote_download(const std::string& version);
+DATE_API bool        remote_install(const std::string& version);
+
+#endif
+
+// zoned_time
+
+namespace detail
+{
+
+template <class T>
+inline
+T*
+to_raw_pointer(T* p) noexcept
+{
+    return p;
+}
+
+template <class Pointer>
+inline
+auto
+to_raw_pointer(Pointer p) noexcept
+    -> decltype(detail::to_raw_pointer(p.operator->()))
+{
+    return detail::to_raw_pointer(p.operator->());
+}
+
+}  // namespace detail
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time()
+    : zone_(zoned_traits<TimeZonePtr>::default_zone())
+    {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const sys_time<Duration>& st)
+    : zone_(zoned_traits<TimeZonePtr>::default_zone())
+    , tp_(st)
+    {}
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(TimeZonePtr z)
+    : zone_(std::move(z))
+    {assert(detail::to_raw_pointer(zone_) != nullptr);}
+
+#if HAS_STRING_VIEW
+
+template <class Duration, class TimeZonePtr>
+template <class, class>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(std::string_view name)
+    : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name))
+    {}
+
+#else  // !HAS_STRING_VIEW
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const std::string& name)
+    : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name))
+    {}
+
+#endif  // !HAS_STRING_VIEW
+
+template <class Duration, class TimeZonePtr>
+template <class Duration2, class>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const zoned_time<Duration2, TimeZonePtr>& zt) NOEXCEPT
+    : zone_(zt.zone_)
+    , tp_(zt.tp_)
+    {}
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(TimeZonePtr z, const sys_time<Duration>& st)
+    : zone_(std::move(z))
+    , tp_(st)
+    {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(TimeZonePtr z, const local_time<Duration>& t)
+    : zone_(std::move(z))
+    , tp_(zone_->to_sys(t))
+    {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(TimeZonePtr z, const local_time<Duration>& t,
+                                              choose c)
+    : zone_(std::move(z))
+    , tp_(zone_->to_sys(t, c))
+    {}
+
+template <class Duration, class TimeZonePtr>
+template <class Duration2, class TimeZonePtr2, class>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(TimeZonePtr z,
+                                              const zoned_time<Duration2, TimeZonePtr2>& zt)
+    : zone_(std::move(z))
+    , tp_(zt.tp_)
+    {}
+
+template <class Duration, class TimeZonePtr>
+template <class Duration2, class TimeZonePtr2, class>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(TimeZonePtr z,
+                                      const zoned_time<Duration2, TimeZonePtr2>& zt, choose)
+    : zoned_time(std::move(z), zt)
+    {}
+
+#if HAS_STRING_VIEW
+
+template <class Duration, class TimeZonePtr>
+template <class, class>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(std::string_view name,
+                                              const sys_time<Duration>& st)
+    : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), st)
+    {}
+
+template <class Duration, class TimeZonePtr>
+template <class, class>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(std::string_view name,
+                                              const local_time<Duration>& t)
+    : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), t)
+    {}
+
+template <class Duration, class TimeZonePtr>
+template <class, class>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(std::string_view name,
+                                              const local_time<Duration>& t, choose c)
+    : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), t, c)
+    {}
+
+template <class Duration, class TimeZonePtr>
+template <class, class>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(std::string_view name, const zoned_time& zt)
+    : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), zt)
+    {}
+
+template <class Duration, class TimeZonePtr>
+template <class, class>
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(std::string_view name,
+                                              const zoned_time& zt, choose c)
+    : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), zt, c)
+    {}
+
+#else  // !HAS_STRING_VIEW
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const std::string& name,
+                                              const sys_time<Duration>& st)
+    : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), st)
+    {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const char* name,
+                                              const sys_time<Duration>& st)
+    : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), st)
+    {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const std::string& name,
+                                              const local_time<Duration>& t)
+    : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), t)
+    {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const char* name,
+                                              const local_time<Duration>& t)
+    : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), t)
+    {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const std::string& name,
+                                              const local_time<Duration>& t, choose c)
+    : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), t, c)
+    {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const char* name,
+                                              const local_time<Duration>& t, choose c)
+    : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), t, c)
+    {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const std::string& name,
+                                              const zoned_time& zt)
+    : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), zt)
+    {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const char* name, const zoned_time& zt)
+    : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), zt)
+    {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const std::string& name,
+                                              const zoned_time& zt, choose c)
+    : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), zt, c)
+    {}
+
+template <class Duration, class TimeZonePtr>
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+template <class T, class>
+#endif
+inline
+zoned_time<Duration, TimeZonePtr>::zoned_time(const char* name,
+                                              const zoned_time& zt, choose c)
+    : zoned_time(zoned_traits<TimeZonePtr>::locate_zone(name), zt, c)
+    {}
+
+#endif  // HAS_STRING_VIEW
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>&
+zoned_time<Duration, TimeZonePtr>::operator=(const sys_time<Duration>& st)
+{
+    tp_ = st;
+    return *this;
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>&
+zoned_time<Duration, TimeZonePtr>::operator=(const local_time<Duration>& ut)
+{
+    tp_ = zone_->to_sys(ut);
+    return *this;
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>::operator local_time<typename zoned_time<Duration, TimeZonePtr>::duration>() const
+{
+    return get_local_time();
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>::operator sys_time<typename zoned_time<Duration, TimeZonePtr>::duration>() const
+{
+    return get_sys_time();
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+TimeZonePtr
+zoned_time<Duration, TimeZonePtr>::get_time_zone() const
+{
+    return zone_;
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+local_time<typename zoned_time<Duration, TimeZonePtr>::duration>
+zoned_time<Duration, TimeZonePtr>::get_local_time() const
+{
+    return zone_->to_local(tp_);
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+sys_time<typename zoned_time<Duration, TimeZonePtr>::duration>
+zoned_time<Duration, TimeZonePtr>::get_sys_time() const
+{
+    return tp_;
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+sys_info
+zoned_time<Duration, TimeZonePtr>::get_info() const
+{
+    return zone_->get_info(tp_);
+}
+
+// make_zoned_time
+
+inline
+zoned_time<std::chrono::seconds>
+make_zoned()
+{
+    return zoned_time<std::chrono::seconds>();
+}
+
+template <class Duration>
+inline
+zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+make_zoned(const sys_time<Duration>& tp)
+{
+    return zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type>(tp);
+}
+
+template <class TimeZonePtr
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+          , class = typename std::enable_if
+          <
+            std::is_class
+            <
+                typename std::decay
+                <
+                    decltype(*detail::to_raw_pointer(std::declval<TimeZonePtr&>()))
+                >::type
+            >{}
+          >::type
+#endif
+         >
+inline
+zoned_time<std::chrono::seconds, TimeZonePtr>
+make_zoned(TimeZonePtr z)
+{
+    return zoned_time<std::chrono::seconds, TimeZonePtr>(std::move(z));
+}
+
+inline
+zoned_seconds
+make_zoned(const std::string& name)
+{
+    return zoned_seconds(name);
+}
+
+template <class Duration, class TimeZonePtr
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+          , class = typename std::enable_if
+          <
+            std::is_class<typename std::decay<decltype(*std::declval<TimeZonePtr&>())>::type>{}
+          >::type
+#endif
+         >
+inline
+zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type, TimeZonePtr>
+make_zoned(TimeZonePtr zone, const local_time<Duration>& tp)
+{
+    return zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type,
+                      TimeZonePtr>(std::move(zone), tp);
+}
+
+template <class Duration, class TimeZonePtr
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+          , class = typename std::enable_if
+          <
+            std::is_class<typename std::decay<decltype(*std::declval<TimeZonePtr&>())>::type>{}
+          >::type
+#endif
+         >
+inline
+zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type, TimeZonePtr>
+make_zoned(TimeZonePtr zone, const local_time<Duration>& tp, choose c)
+{
+    return zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type,
+                      TimeZonePtr>(std::move(zone), tp, c);
+}
+
+template <class Duration>
+inline
+zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+make_zoned(const std::string& name, const local_time<Duration>& tp)
+{
+    return zoned_time<typename std::common_type<Duration,
+                      std::chrono::seconds>::type>(name, tp);
+}
+
+template <class Duration>
+inline
+zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+make_zoned(const std::string& name, const local_time<Duration>& tp, choose c)
+{
+    return zoned_time<typename std::common_type<Duration,
+                      std::chrono::seconds>::type>(name, tp, c);
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>
+make_zoned(TimeZonePtr zone, const zoned_time<Duration, TimeZonePtr>& zt)
+{
+    return zoned_time<Duration, TimeZonePtr>(std::move(zone), zt);
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>
+make_zoned(const std::string& name, const zoned_time<Duration, TimeZonePtr>& zt)
+{
+    return zoned_time<Duration, TimeZonePtr>(name, zt);
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>
+make_zoned(TimeZonePtr zone, const zoned_time<Duration, TimeZonePtr>& zt, choose c)
+{
+    return zoned_time<Duration, TimeZonePtr>(std::move(zone), zt, c);
+}
+
+template <class Duration, class TimeZonePtr>
+inline
+zoned_time<Duration, TimeZonePtr>
+make_zoned(const std::string& name, const zoned_time<Duration, TimeZonePtr>& zt, choose c)
+{
+    return zoned_time<Duration, TimeZonePtr>(name, zt, c);
+}
+
+template <class Duration, class TimeZonePtr
+#if !defined(_MSC_VER) || (_MSC_VER > 1900)
+          , class = typename std::enable_if
+          <
+            std::is_class<typename std::decay<decltype(*std::declval<TimeZonePtr&>())>::type>{}
+          >::type
+#endif
+         >
+inline
+zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type, TimeZonePtr>
+make_zoned(TimeZonePtr zone, const sys_time<Duration>& st)
+{
+    return zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type,
+                      TimeZonePtr>(std::move(zone), st);
+}
+
+template <class Duration>
+inline
+zoned_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+make_zoned(const std::string& name, const sys_time<Duration>& st)
+{
+    return zoned_time<typename std::common_type<Duration,
+                      std::chrono::seconds>::type>(name, st);
+}
+
+template <class CharT, class Traits, class Duration, class TimeZonePtr>
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+          const zoned_time<Duration, TimeZonePtr>& tp)
+{
+    using duration = typename zoned_time<Duration, TimeZonePtr>::duration;
+    using LT = local_time<duration>;
+    auto const tz = tp.get_time_zone();
+    auto const st = tp.get_sys_time();
+    auto const info = tz->get_info(st);
+    return to_stream(os, fmt, LT{(st+info.offset).time_since_epoch()},
+                     &info.abbrev, &info.offset);
+}
+
+template <class CharT, class Traits, class Duration, class TimeZonePtr>
+inline
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const zoned_time<Duration, TimeZonePtr>& t)
+{
+    const CharT fmt[] = {'%', 'F', ' ', '%', 'T', ' ', '%', 'Z', CharT{}};
+    return to_stream(os, fmt, t);
+}
+
+#if !MISSING_LEAP_SECONDS
+
+class utc_clock
+{
+public:
+    using duration                  = std::chrono::system_clock::duration;
+    using rep                       = duration::rep;
+    using period                    = duration::period;
+    using time_point                = std::chrono::time_point<utc_clock>;
+    static CONSTDATA bool is_steady = false;
+
+    static time_point now();
+
+    template<typename Duration>
+    static
+    std::chrono::time_point<std::chrono::system_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
+    to_sys(const std::chrono::time_point<utc_clock, Duration>&);
+
+    template<typename Duration>
+    static
+    std::chrono::time_point<utc_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
+    from_sys(const std::chrono::time_point<std::chrono::system_clock, Duration>&);
+};
+
+template <class Duration>
+    using utc_time = std::chrono::time_point<utc_clock, Duration>;
+
+using utc_seconds = utc_time<std::chrono::seconds>;
+
+template <class Duration>
+utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+utc_clock::from_sys(const sys_time<Duration>& st)
+{
+    using namespace std::chrono;
+    using duration = typename std::common_type<Duration, seconds>::type;
+    auto const& leaps = get_tzdb().leaps;
+    auto const lt = std::upper_bound(leaps.begin(), leaps.end(), st);
+    return utc_time<duration>{st.time_since_epoch() + seconds{lt-leaps.begin()}};
+}
+
+// Return pair<is_leap_second, seconds{number_of_leap_seconds_since_1970}>
+// first is true if ut is during a leap second insertion, otherwise false.
+// If ut is during a leap second insertion, that leap second is included in the count
+template <class Duration>
+std::pair<bool, std::chrono::seconds>
+is_leap_second(date::utc_time<Duration> const& ut)
+{
+    using namespace date;
+    using namespace std::chrono;
+    using duration = typename std::common_type<Duration, seconds>::type;
+    auto const& leaps = get_tzdb().leaps;
+    auto tp = sys_time<duration>{ut.time_since_epoch()};
+    auto const lt = std::upper_bound(leaps.begin(), leaps.end(), tp);
+    auto ds = seconds{lt-leaps.begin()};
+    tp -= ds;
+    auto ls = false;
+    if (lt > leaps.begin())
+    {
+        if (tp < lt[-1])
+        {
+            if (tp >= lt[-1].date() - seconds{1})
+                ls = true;
+            else
+                --ds;
+        }
+    }
+    return {ls, ds};
+}
+
+template <class Duration>
+sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+utc_clock::to_sys(const utc_time<Duration>& ut)
+{
+    using namespace std::chrono;
+    using duration = typename std::common_type<Duration, seconds>::type;
+    auto ls = is_leap_second(ut);
+    auto tp = sys_time<duration>{ut.time_since_epoch() - ls.second};
+    if (ls.first)
+        tp = floor<seconds>(tp) + seconds{1} - duration{1};
+    return tp;
+}
+
+inline
+utc_clock::time_point
+utc_clock::now()
+{
+    using namespace std::chrono;
+    return from_sys(system_clock::now());
+}
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+          const utc_time<Duration>& t)
+{
+    using namespace std;
+    using namespace std::chrono;
+    using CT = typename common_type<Duration, seconds>::type;
+    const string abbrev("UTC");
+    CONSTDATA seconds offset{0};
+    auto ls = is_leap_second(t);
+    auto tp = sys_time<CT>{t.time_since_epoch() - ls.second};
+    auto const sd = floor<days>(tp);
+    year_month_day ymd = sd;
+    auto time = make_time(tp - sys_seconds{sd});
+    time.seconds() += seconds{ls.first};
+    fields<CT> fds{ymd, time};
+    return to_stream(os, fmt, fds, &abbrev, &offset);
+}
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const utc_time<Duration>& t)
+{
+    const CharT fmt[] = {'%', 'F', ' ', '%', 'T', CharT{}};
+    return to_stream(os, fmt, t);
+}
+
+template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+            utc_time<Duration>& tp, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+            std::chrono::minutes* offset = nullptr)
+{
+    using namespace std;
+    using namespace std::chrono;
+    using CT = typename common_type<Duration, seconds>::type;
+    minutes offset_local{};
+    auto offptr = offset ? offset : &offset_local;
+    fields<CT> fds{};
+    from_stream(is, fmt, fds, abbrev, offptr);
+    if (!fds.ymd.ok())
+        is.setstate(ios::failbit);
+    if (!is.fail())
+    {
+        bool is_60_sec = fds.tod.seconds() == seconds{60};
+        if (is_60_sec)
+            fds.tod.seconds() -= seconds{1};
+        auto tmp = utc_clock::from_sys(sys_days(fds.ymd) - *offptr + fds.tod.to_duration());
+        if (is_60_sec)
+            tmp += seconds{1};
+        if (is_60_sec != is_leap_second(tmp).first || !fds.tod.in_conventional_range())
+        {
+            is.setstate(ios::failbit);
+            return is;
+        }
+        tp = time_point_cast<Duration>(tmp);
+    }
+    return is;
+}
+
+// tai_clock
+
+class tai_clock
+{
+public:
+    using duration                  = std::chrono::system_clock::duration;
+    using rep                       = duration::rep;
+    using period                    = duration::period;
+    using time_point                = std::chrono::time_point<tai_clock>;
+    static const bool is_steady     = false;
+
+    static time_point now();
+
+    template<typename Duration>
+    static
+    std::chrono::time_point<utc_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
+    to_utc(const std::chrono::time_point<tai_clock, Duration>&) NOEXCEPT;
+
+    template<typename Duration>
+    static
+    std::chrono::time_point<tai_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
+    from_utc(const std::chrono::time_point<utc_clock, Duration>&) NOEXCEPT;
+};
+
+template <class Duration>
+    using tai_time = std::chrono::time_point<tai_clock, Duration>;
+
+using tai_seconds = tai_time<std::chrono::seconds>;
+
+template <class Duration>
+inline
+utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+tai_clock::to_utc(const tai_time<Duration>& t) NOEXCEPT
+{
+    using namespace std::chrono;
+    using duration = typename std::common_type<Duration, seconds>::type;
+    return utc_time<duration>{t.time_since_epoch()} -
+            (sys_days(year{1970}/jan/1) - sys_days(year{1958}/jan/1) + seconds{10});
+}
+
+template <class Duration>
+inline
+tai_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+tai_clock::from_utc(const utc_time<Duration>& t) NOEXCEPT
+{
+    using namespace std::chrono;
+    using duration = typename std::common_type<Duration, seconds>::type;
+    return tai_time<duration>{t.time_since_epoch()} +
+            (sys_days(year{1970}/jan/1) - sys_days(year{1958}/jan/1) + seconds{10});
+}
+
+inline
+tai_clock::time_point
+tai_clock::now()
+{
+    using namespace std::chrono;
+    return from_utc(utc_clock::now());
+}
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+          const tai_time<Duration>& t)
+{
+    using namespace std;
+    using namespace std::chrono;
+    using CT = typename common_type<Duration, seconds>::type;
+    const string abbrev("TAI");
+    CONSTDATA seconds offset{0};
+    auto tp = sys_time<CT>{t.time_since_epoch()} -
+              seconds(sys_days(year{1970}/jan/1) - sys_days(year{1958}/jan/1));
+    auto const sd = floor<days>(tp);
+    year_month_day ymd = sd;
+    auto time = make_time(tp - sys_seconds{sd});
+    fields<CT> fds{ymd, time};
+    return to_stream(os, fmt, fds, &abbrev, &offset);
+}
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const tai_time<Duration>& t)
+{
+    const CharT fmt[] = {'%', 'F', ' ', '%', 'T', CharT{}};
+    return to_stream(os, fmt, t);
+}
+
+template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+            tai_time<Duration>& tp,
+            std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+            std::chrono::minutes* offset = nullptr)
+{
+    using namespace std;
+    using namespace std::chrono;
+    using CT = typename common_type<Duration, seconds>::type;
+    minutes offset_local{};
+    auto offptr = offset ? offset : &offset_local;
+    fields<CT> fds{};
+    from_stream(is, fmt, fds, abbrev, offptr);
+    if (!fds.ymd.ok() || !fds.tod.in_conventional_range())
+        is.setstate(ios::failbit);
+    if (!is.fail())
+        tp = tai_time<Duration>{duration_cast<Duration>(
+                (sys_days(fds.ymd) +
+                 (sys_days(year{1970}/jan/1) - sys_days(year{1958}/jan/1)) -
+                 *offptr + fds.tod.to_duration()).time_since_epoch())};
+    return is;
+}
+
+// gps_clock
+
+class gps_clock
+{
+public:
+    using duration                  = std::chrono::system_clock::duration;
+    using rep                       = duration::rep;
+    using period                    = duration::period;
+    using time_point                = std::chrono::time_point<gps_clock>;
+    static const bool is_steady     = false;
+
+    static time_point now();
+
+    template<typename Duration>
+    static
+    std::chrono::time_point<utc_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
+    to_utc(const std::chrono::time_point<gps_clock, Duration>&) NOEXCEPT;
+
+    template<typename Duration>
+    static
+    std::chrono::time_point<gps_clock, typename std::common_type<Duration, std::chrono::seconds>::type>
+    from_utc(const std::chrono::time_point<utc_clock, Duration>&) NOEXCEPT;
+
+};
+
+template <class Duration>
+    using gps_time = std::chrono::time_point<gps_clock, Duration>;
+
+using gps_seconds = gps_time<std::chrono::seconds>;
+
+template <class Duration>
+inline
+utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+gps_clock::to_utc(const gps_time<Duration>& t) NOEXCEPT
+{
+    using namespace std::chrono;
+    using duration = typename std::common_type<Duration, seconds>::type;
+    return utc_time<duration>{t.time_since_epoch()} +
+            (sys_days(year{1980}/jan/sun[1]) - sys_days(year{1970}/jan/1) + seconds{9});
+}
+
+template <class Duration>
+inline
+gps_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+gps_clock::from_utc(const utc_time<Duration>& t) NOEXCEPT
+{
+    using namespace std::chrono;
+    using duration = typename std::common_type<Duration, seconds>::type;
+    return gps_time<duration>{t.time_since_epoch()} -
+            (sys_days(year{1980}/jan/sun[1]) - sys_days(year{1970}/jan/1) + seconds{9});
+}
+
+inline
+gps_clock::time_point
+gps_clock::now()
+{
+    using namespace std::chrono;
+    return from_utc(utc_clock::now());
+}
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+          const gps_time<Duration>& t)
+{
+    using namespace std;
+    using namespace std::chrono;
+    using CT = typename common_type<Duration, seconds>::type;
+    const string abbrev("GPS");
+    CONSTDATA seconds offset{0};
+    auto tp = sys_time<CT>{t.time_since_epoch()} +
+              seconds(sys_days(year{1980}/jan/sun[1]) - sys_days(year{1970}/jan/1));
+    auto const sd = floor<days>(tp);
+    year_month_day ymd = sd;
+    auto time = make_time(tp - sys_seconds{sd});
+    fields<CT> fds{ymd, time};
+    return to_stream(os, fmt, fds, &abbrev, &offset);
+}
+
+template <class CharT, class Traits, class Duration>
+std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const gps_time<Duration>& t)
+{
+    const CharT fmt[] = {'%', 'F', ' ', '%', 'T', CharT{}};
+    return to_stream(os, fmt, t);
+}
+
+template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>&
+from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+            gps_time<Duration>& tp,
+            std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+            std::chrono::minutes* offset = nullptr)
+{
+    using namespace std;
+    using namespace std::chrono;
+    using CT = typename common_type<Duration, seconds>::type;
+    minutes offset_local{};
+    auto offptr = offset ? offset : &offset_local;
+    fields<CT> fds{};
+    from_stream(is, fmt, fds, abbrev, offptr);
+    if (!fds.ymd.ok() || !fds.tod.in_conventional_range())
+        is.setstate(ios::failbit);
+    if (!is.fail())
+        tp = gps_time<Duration>{duration_cast<Duration>(
+                (sys_days(fds.ymd) -
+                 (sys_days(year{1980}/jan/sun[1]) - sys_days(year{1970}/jan/1)) -
+                 *offptr + fds.tod.to_duration()).time_since_epoch())};
+    return is;
+}
+
+// clock_time_conversion
+
+template <class DstClock, class SrcClock>
+struct clock_time_conversion
+{};
+
+template <>
+struct clock_time_conversion<std::chrono::system_clock, std::chrono::system_clock>
+{
+    template <class Duration>
+    sys_time<Duration>
+    operator()(const sys_time<Duration>& st) const
+    {
+        return st;
+    }
+};
+
+template <>
+struct clock_time_conversion<utc_clock, utc_clock>
+{
+    template <class Duration>
+    utc_time<Duration>
+    operator()(const utc_time<Duration>& ut) const
+    {
+        return ut;
+    }
+};
+
+template <>
+struct clock_time_conversion<utc_clock, std::chrono::system_clock>
+{
+    template <class Duration>
+    utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+    operator()(const sys_time<Duration>& st) const
+    {
+        return utc_clock::from_sys(st);
+    }
+};
+
+template <>
+struct clock_time_conversion<std::chrono::system_clock, utc_clock>
+{
+    template <class Duration>
+    sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+    operator()(const utc_time<Duration>& ut) const
+    {
+        return utc_clock::to_sys(ut);
+    }
+};
+
+template <class Clock>
+struct clock_time_conversion<Clock, Clock>
+{
+    template <class Duration>
+    std::chrono::time_point<Clock, Duration>
+    operator()(const std::chrono::time_point<Clock, Duration>& tp) const
+    {
+        return tp;
+    }
+};
+
+namespace ctc_detail
+{
+
+template <class Clock, class Duration>
+    using time_point = std::chrono::time_point<Clock, Duration>;
+
+using std::declval;
+using std::chrono::system_clock;
+
+//Check if TimePoint is time for given clock,
+//if not emits hard error
+template <class Clock, class TimePoint>
+struct return_clock_time
+{
+    using clock_time_point = time_point<Clock, typename TimePoint::duration>;
+    using type             = TimePoint;
+
+    static_assert(std::is_same<TimePoint, clock_time_point>::value,
+                  "time point with appropariate clock shall be returned");
+};
+
+// Check if Clock has to_sys method accepting TimePoint with given duration const& and
+// returning sys_time. If so has nested type member equal to return type to_sys.
+template <class Clock, class Duration, class = void>
+struct return_to_sys
+{};
+
+template <class Clock, class Duration>
+struct return_to_sys
+       <
+           Clock, Duration,
+           decltype(Clock::to_sys(declval<time_point<Clock, Duration> const&>()), void())
+       >
+    : return_clock_time
+      <
+          system_clock,
+          decltype(Clock::to_sys(declval<time_point<Clock, Duration> const&>()))
+      >
+{};
+
+// Similiar to above
+template <class Clock, class Duration, class = void>
+struct return_from_sys
+{};
+
+template <class Clock, class Duration>
+struct return_from_sys
+       <
+           Clock, Duration,
+           decltype(Clock::from_sys(declval<time_point<system_clock, Duration> const&>()),
+                    void())
+       >
+    : return_clock_time
+      <
+          Clock,
+          decltype(Clock::from_sys(declval<time_point<system_clock, Duration> const&>()))
+      >
+{};
+
+// Similiar to above
+template <class Clock, class Duration, class = void>
+struct return_to_utc
+{};
+
+template <class Clock, class Duration>
+struct return_to_utc
+       <
+           Clock, Duration,
+           decltype(Clock::to_utc(declval<time_point<Clock, Duration> const&>()), void())
+       >
+    : return_clock_time
+      <
+          utc_clock,
+          decltype(Clock::to_utc(declval<time_point<Clock, Duration> const&>()))>
+{};
+
+// Similiar to above
+template <class Clock, class Duration, class = void>
+struct return_from_utc
+{};
+
+template <class Clock, class Duration>
+struct return_from_utc
+       <
+           Clock, Duration,
+           decltype(Clock::from_utc(declval<time_point<utc_clock, Duration> const&>()),
+                    void())
+       >
+    : return_clock_time
+      <
+          Clock,
+          decltype(Clock::from_utc(declval<time_point<utc_clock, Duration> const&>()))
+      >
+{};
+
+}  // namespace ctc_detail
+
+template <class SrcClock>
+struct clock_time_conversion<std::chrono::system_clock, SrcClock>
+{
+    template <class Duration>
+    typename ctc_detail::return_to_sys<SrcClock, Duration>::type
+    operator()(const std::chrono::time_point<SrcClock, Duration>& tp) const
+    {
+        return SrcClock::to_sys(tp);
+    }
+};
+
+template <class DstClock>
+struct clock_time_conversion<DstClock, std::chrono::system_clock>
+{
+    template <class Duration>
+    typename ctc_detail::return_from_sys<DstClock, Duration>::type
+    operator()(const sys_time<Duration>& st) const
+    {
+        return DstClock::from_sys(st);
+    }
+};
+
+template <class SrcClock>
+struct clock_time_conversion<utc_clock, SrcClock>
+{
+    template <class Duration>
+    typename ctc_detail::return_to_utc<SrcClock, Duration>::type
+    operator()(const std::chrono::time_point<SrcClock, Duration>& tp) const
+    {
+        return SrcClock::to_utc(tp);
+    }
+};
+
+template <class DstClock>
+struct clock_time_conversion<DstClock, utc_clock>
+{
+    template <class Duration>
+    typename ctc_detail::return_from_utc<DstClock, Duration>::type
+    operator()(const utc_time<Duration>& ut) const
+    {
+        return DstClock::from_utc(ut);
+    }
+};
+
+namespace clock_cast_detail
+{
+
+template <class Clock, class Duration>
+    using time_point = std::chrono::time_point<Clock, Duration>;
+using std::chrono::system_clock;
+
+template <class DstClock, class SrcClock, class Duration>
+auto
+conv_clock(const time_point<SrcClock, Duration>& t)
+    -> decltype(std::declval<clock_time_conversion<DstClock, SrcClock>>()(t))
+{
+    return clock_time_conversion<DstClock, SrcClock>{}(t);
+}
+
+//direct trait conversion, 1st candidate
+template <class DstClock, class SrcClock, class Duration>
+auto
+cc_impl(const time_point<SrcClock, Duration>& t, const time_point<SrcClock, Duration>*)
+    -> decltype(conv_clock<DstClock>(t))
+{
+    return conv_clock<DstClock>(t);
+}
+
+//conversion through sys, 2nd candidate
+template <class DstClock, class SrcClock, class Duration>
+auto
+cc_impl(const time_point<SrcClock, Duration>& t, const void*)
+    -> decltype(conv_clock<DstClock>(conv_clock<system_clock>(t)))
+{
+    return conv_clock<DstClock>(conv_clock<system_clock>(t));
+}
+
+//conversion through utc, 2nd candidate
+template <class DstClock, class SrcClock, class Duration>
+auto
+cc_impl(const time_point<SrcClock, Duration>& t, const void*)
+    -> decltype(0,  // MSVC_WORKAROUND
+                conv_clock<DstClock>(conv_clock<utc_clock>(t)))
+{
+    return conv_clock<DstClock>(conv_clock<utc_clock>(t));
+}
+
+//conversion through sys and utc, 3rd candidate
+template <class DstClock, class SrcClock, class Duration>
+auto
+cc_impl(const time_point<SrcClock, Duration>& t, ...)
+    -> decltype(conv_clock<DstClock>(conv_clock<utc_clock>(conv_clock<system_clock>(t))))
+{
+    return conv_clock<DstClock>(conv_clock<utc_clock>(conv_clock<system_clock>(t)));
+}
+
+//conversion through utc and sys, 3rd candidate
+template <class DstClock, class SrcClock, class Duration>
+auto
+cc_impl(const time_point<SrcClock, Duration>& t, ...)
+    -> decltype(0,  // MSVC_WORKAROUND
+                conv_clock<DstClock>(conv_clock<system_clock>(conv_clock<utc_clock>(t))))
+{
+    return conv_clock<DstClock>(conv_clock<system_clock>(conv_clock<utc_clock>(t)));
+}
+
+}  // namespace clock_cast_detail
+
+template <class DstClock, class SrcClock, class Duration>
+auto
+clock_cast(const std::chrono::time_point<SrcClock, Duration>& tp)
+    -> decltype(clock_cast_detail::cc_impl<DstClock>(tp, &tp))
+{
+    return clock_cast_detail::cc_impl<DstClock>(tp, &tp);
+}
+
+// Deprecated API
+
+template <class Duration>
+inline
+sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_sys_time(const utc_time<Duration>& t)
+{
+    return utc_clock::to_sys(t);
+}
+
+template <class Duration>
+inline
+sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_sys_time(const tai_time<Duration>& t)
+{
+    return utc_clock::to_sys(tai_clock::to_utc(t));
+}
+
+template <class Duration>
+inline
+sys_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_sys_time(const gps_time<Duration>& t)
+{
+    return utc_clock::to_sys(gps_clock::to_utc(t));
+}
+
+
+template <class Duration>
+inline
+utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_utc_time(const sys_time<Duration>& t)
+{
+    return utc_clock::from_sys(t);
+}
+
+template <class Duration>
+inline
+utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_utc_time(const tai_time<Duration>& t)
+{
+    return tai_clock::to_utc(t);
+}
+
+template <class Duration>
+inline
+utc_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_utc_time(const gps_time<Duration>& t)
+{
+    return gps_clock::to_utc(t);
+}
+
+
+template <class Duration>
+inline
+tai_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_tai_time(const sys_time<Duration>& t)
+{
+    return tai_clock::from_utc(utc_clock::from_sys(t));
+}
+
+template <class Duration>
+inline
+tai_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_tai_time(const utc_time<Duration>& t)
+{
+    return tai_clock::from_utc(t);
+}
+
+template <class Duration>
+inline
+tai_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_tai_time(const gps_time<Duration>& t)
+{
+    return tai_clock::from_utc(gps_clock::to_utc(t));
+}
+
+
+template <class Duration>
+inline
+gps_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_gps_time(const sys_time<Duration>& t)
+{
+    return gps_clock::from_utc(utc_clock::from_sys(t));
+}
+
+template <class Duration>
+inline
+gps_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_gps_time(const utc_time<Duration>& t)
+{
+    return gps_clock::from_utc(t);
+}
+
+template <class Duration>
+inline
+gps_time<typename std::common_type<Duration, std::chrono::seconds>::type>
+to_gps_time(const tai_time<Duration>& t)
+{
+    return gps_clock::from_utc(tai_clock::to_utc(t));
+}
+
+#endif  // !MISSING_LEAP_SECONDS
+
+}  // namespace date
+}  // namespace util
+}  // namespace arrow
+
+#endif  // TZ_H
diff --git a/cpp/src/arrow/vendored/datetime/tz_private.h b/cpp/src/arrow/vendored/datetime/tz_private.h
new file mode 100644
index 0000000000000..cafa5ea2bf1a8
--- /dev/null
+++ b/cpp/src/arrow/vendored/datetime/tz_private.h
@@ -0,0 +1,324 @@
+#ifndef TZ_PRIVATE_H
+#define TZ_PRIVATE_H
+
+// The MIT License (MIT)
+//
+// Copyright (c) 2015, 2016 Howard Hinnant
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+// Our apologies.  When the previous paragraph was written, lowercase had not yet
+// been invented (that would involve another several millennia of evolution).
+// We did not mean to shout.
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+#include "tz.h"
+#else
+#include "date.h"
+#include <vector>
+#endif
+
+namespace arrow
+{
+namespace util
+{
+namespace date
+{
+
+namespace detail
+{
+
+#if !USE_OS_TZDB
+
+enum class tz {utc, local, standard};
+
+//forward declare to avoid warnings in gcc 6.2
+class MonthDayTime;
+std::istream& operator>>(std::istream& is, MonthDayTime& x);
+std::ostream& operator<<(std::ostream& os, const MonthDayTime& x);
+
+
+class MonthDayTime
+{
+private:
+    struct pair
+    {
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+        pair() : month_day_(date::jan / 1), weekday_(0U) {}
+
+        pair(const date::month_day& month_day, const date::weekday& weekday)
+            : month_day_(month_day), weekday_(weekday) {}
+#endif
+
+        date::month_day month_day_;
+        date::weekday   weekday_;
+    };
+
+    enum Type {month_day, month_last_dow, lteq, gteq};
+
+    Type                         type_{month_day};
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+    union U
+#else
+    struct U
+#endif
+    {
+        date::month_day          month_day_;
+        date::month_weekday_last month_weekday_last_;
+        pair                     month_day_weekday_;
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+        U() : month_day_{date::jan/1} {}
+#else
+        U() :
+            month_day_(date::jan/1),
+            month_weekday_last_(date::month(0U), date::weekday_last(date::weekday(0U)))
+        {}
+
+#endif // !defined(_MSC_VER) || (_MSC_VER >= 1900)
+
+        U& operator=(const date::month_day& x);
+        U& operator=(const date::month_weekday_last& x);
+        U& operator=(const pair& x);
+    } u;
+
+    std::chrono::hours           h_{0};
+    std::chrono::minutes         m_{0};
+    std::chrono::seconds         s_{0};
+    tz                           zone_{tz::local};
+
+public:
+    MonthDayTime() = default;
+    MonthDayTime(local_seconds tp, tz timezone);
+    MonthDayTime(const date::month_day& md, tz timezone);
+
+    date::day day() const;
+    date::month month() const;
+    tz zone() const {return zone_;}
+
+    void canonicalize(date::year y);
+
+    sys_seconds
+       to_sys(date::year y, std::chrono::seconds offset, std::chrono::seconds save) const;
+    sys_days to_sys_days(date::year y) const;
+
+    sys_seconds to_time_point(date::year y) const;
+    int compare(date::year y, const MonthDayTime& x, date::year yx,
+                std::chrono::seconds offset, std::chrono::minutes prev_save) const;
+
+    friend std::istream& operator>>(std::istream& is, MonthDayTime& x);
+    friend std::ostream& operator<<(std::ostream& os, const MonthDayTime& x);
+};
+
+// A Rule specifies one or more set of datetimes without using an offset.
+// Multiple dates are specified with multiple years.  The years in effect
+// go from starting_year_ to ending_year_, inclusive.  starting_year_ <=
+// ending_year_. save_ is in effect for times from the specified time
+// onward, including the specified time. When the specified time is
+// local, it uses the save_ from the chronologically previous Rule, or if
+// there is none, 0.
+
+//forward declare to avoid warnings in gcc 6.2
+class Rule;
+bool operator==(const Rule& x, const Rule& y);
+bool operator<(const Rule& x, const Rule& y);
+bool operator==(const Rule& x, const date::year& y);
+bool operator<(const Rule& x, const date::year& y);
+bool operator==(const date::year& x, const Rule& y);
+bool operator<(const date::year& x, const Rule& y);
+bool operator==(const Rule& x, const std::string& y);
+bool operator<(const Rule& x, const std::string& y);
+bool operator==(const std::string& x, const Rule& y);
+bool operator<(const std::string& x, const Rule& y);
+std::ostream& operator<<(std::ostream& os, const Rule& r);
+
+class Rule
+{
+private:
+    std::string          name_;
+    date::year           starting_year_{0};
+    date::year           ending_year_{0};
+    MonthDayTime         starting_at_;
+    std::chrono::minutes save_{0};
+    std::string          abbrev_;
+
+public:
+    Rule() = default;
+    explicit Rule(const std::string& s);
+    Rule(const Rule& r, date::year starting_year, date::year ending_year);
+
+    const std::string& name() const {return name_;}
+    const std::string& abbrev() const {return abbrev_;}
+
+    const MonthDayTime&         mdt()           const {return starting_at_;}
+    const date::year&           starting_year() const {return starting_year_;}
+    const date::year&           ending_year()   const {return ending_year_;}
+    const std::chrono::minutes& save()          const {return save_;}
+
+    static void split_overlaps(std::vector<Rule>& rules);
+
+    friend bool operator==(const Rule& x, const Rule& y);
+    friend bool operator<(const Rule& x, const Rule& y);
+    friend bool operator==(const Rule& x, const date::year& y);
+    friend bool operator<(const Rule& x, const date::year& y);
+    friend bool operator==(const date::year& x, const Rule& y);
+    friend bool operator<(const date::year& x, const Rule& y);
+    friend bool operator==(const Rule& x, const std::string& y);
+    friend bool operator<(const Rule& x, const std::string& y);
+    friend bool operator==(const std::string& x, const Rule& y);
+    friend bool operator<(const std::string& x, const Rule& y);
+
+    friend std::ostream& operator<<(std::ostream& os, const Rule& r);
+
+private:
+    date::day day() const;
+    date::month month() const;
+    static void split_overlaps(std::vector<Rule>& rules, std::size_t i, std::size_t& e);
+    static bool overlaps(const Rule& x, const Rule& y);
+    static void split(std::vector<Rule>& rules, std::size_t i, std::size_t k,
+                      std::size_t& e);
+};
+
+inline bool operator!=(const Rule& x, const Rule& y) {return !(x == y);}
+inline bool operator> (const Rule& x, const Rule& y) {return   y < x;}
+inline bool operator<=(const Rule& x, const Rule& y) {return !(y < x);}
+inline bool operator>=(const Rule& x, const Rule& y) {return !(x < y);}
+
+inline bool operator!=(const Rule& x, const date::year& y) {return !(x == y);}
+inline bool operator> (const Rule& x, const date::year& y) {return   y < x;}
+inline bool operator<=(const Rule& x, const date::year& y) {return !(y < x);}
+inline bool operator>=(const Rule& x, const date::year& y) {return !(x < y);}
+
+inline bool operator!=(const date::year& x, const Rule& y) {return !(x == y);}
+inline bool operator> (const date::year& x, const Rule& y) {return   y < x;}
+inline bool operator<=(const date::year& x, const Rule& y) {return !(y < x);}
+inline bool operator>=(const date::year& x, const Rule& y) {return !(x < y);}
+
+inline bool operator!=(const Rule& x, const std::string& y) {return !(x == y);}
+inline bool operator> (const Rule& x, const std::string& y) {return   y < x;}
+inline bool operator<=(const Rule& x, const std::string& y) {return !(y < x);}
+inline bool operator>=(const Rule& x, const std::string& y) {return !(x < y);}
+
+inline bool operator!=(const std::string& x, const Rule& y) {return !(x == y);}
+inline bool operator> (const std::string& x, const Rule& y) {return   y < x;}
+inline bool operator<=(const std::string& x, const Rule& y) {return !(y < x);}
+inline bool operator>=(const std::string& x, const Rule& y) {return !(x < y);}
+
+struct zonelet
+{
+    enum tag {has_rule, has_save, is_empty};
+
+    std::chrono::seconds gmtoff_;
+    tag tag_ = has_rule;
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+    union U
+#else
+    struct U
+#endif
+    {
+        std::string          rule_;
+        std::chrono::minutes save_;
+
+        ~U() {}
+        U() {}
+        U(const U&) {}
+        U& operator=(const U&) = delete;
+    } u;
+
+    std::string                        format_;
+    date::year                         until_year_{0};
+    MonthDayTime                       until_date_;
+    sys_seconds                        until_utc_;
+    local_seconds                      until_std_;
+    local_seconds                      until_loc_;
+    std::chrono::minutes               initial_save_{};
+    std::string                        initial_abbrev_;
+    std::pair<const Rule*, date::year> first_rule_{nullptr, date::year::min()};
+    std::pair<const Rule*, date::year> last_rule_{nullptr, date::year::max()};
+
+    ~zonelet();
+    zonelet();
+    zonelet(const zonelet& i);
+    zonelet& operator=(const zonelet&) = delete;
+};
+
+#else  // USE_OS_TZDB
+
+struct ttinfo
+{
+    std::int32_t  tt_gmtoff;
+    unsigned char tt_isdst;
+    unsigned char tt_abbrind;
+    unsigned char pad[2];
+};
+
+static_assert(sizeof(ttinfo) == 8, "");
+
+struct expanded_ttinfo
+{
+    std::chrono::seconds offset;
+    std::string          abbrev;
+    bool                 is_dst;
+};
+
+struct transition
+{
+    sys_seconds            timepoint;
+    const expanded_ttinfo* info;
+
+    transition(sys_seconds tp, const expanded_ttinfo* i = nullptr)
+        : timepoint(tp)
+        , info(i)
+        {}
+
+    friend
+    std::ostream&
+    operator<<(std::ostream& os, const transition& t)
+    {
+        using namespace date;
+        using namespace std::chrono;
+        using date::operator<<;
+        os << t.timepoint << "Z ";
+        if (t.info->offset >= seconds{0})
+            os << '+';
+        os << make_time(t.info->offset);
+        if (t.info->is_dst > 0)
+            os << " daylight ";
+        else
+            os << " standard ";
+        os << t.info->abbrev;
+        return os;
+    }
+};
+
+#endif  // USE_OS_TZDB
+
+}  // namespace detail
+
+}  // namespace date
+}  // namespace util
+}  // namespace arrow
+
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#include "tz.h"
+#endif
+
+#endif  // TZ_PRIVATE_H
diff --git a/cpp/src/arrow/vendored/datetime/visibility.h b/cpp/src/arrow/vendored/datetime/visibility.h
new file mode 100644
index 0000000000000..ae031238d85ac
--- /dev/null
+++ b/cpp/src/arrow/vendored/datetime/visibility.h
@@ -0,0 +1,26 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#if defined(ARROW_STATIC)
+// intentially empty
+#elif defined(ARROW_EXPORTING)
+#define DATE_BUILD_DLL
+#else
+#define DATE_USE_DLL
+#endif
diff --git a/cpp/src/arrow/util/string_view/string_view.hpp b/cpp/src/arrow/vendored/string_view.hpp
similarity index 100%
rename from cpp/src/arrow/util/string_view/string_view.hpp
rename to cpp/src/arrow/vendored/string_view.hpp
diff --git a/cpp/src/arrow/vendored/variant/CMakeLists.txt b/cpp/src/arrow/vendored/variant/CMakeLists.txt
new file mode 100644
index 0000000000000..de26f938d72f3
--- /dev/null
+++ b/cpp/src/arrow/vendored/variant/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARROW_INSTALL_ALL_HEADERS("arrow/vendored/variant")
diff --git a/cpp/src/arrow/util/variant/recursive_wrapper.h b/cpp/src/arrow/vendored/variant/recursive_wrapper.hpp
similarity index 89%
rename from cpp/src/arrow/util/variant/recursive_wrapper.h
rename to cpp/src/arrow/vendored/variant/recursive_wrapper.hpp
index c9d9385394b38..96b6a3f217f5b 100644
--- a/cpp/src/arrow/util/variant/recursive_wrapper.h
+++ b/cpp/src/arrow/vendored/variant/recursive_wrapper.hpp
@@ -1,7 +1,9 @@
-#ifndef ARROW_UTIL_VARIANT_RECURSIVE_WRAPPER_H
-#define ARROW_UTIL_VARIANT_RECURSIVE_WRAPPER_H
+// Vendored from https://github.com/mapbox/variant at tag v1.1.5
 
-// Based on variant/recursive_wrapper.h from boost.
+#ifndef MAPBOX_UTIL_RECURSIVE_WRAPPER_HPP
+#define MAPBOX_UTIL_RECURSIVE_WRAPPER_HPP
+
+// Based on variant/recursive_wrapper.hpp from boost.
 //
 // Original license:
 //
@@ -15,7 +17,7 @@
 #include <cassert>
 #include <utility>
 
-namespace arrow {
+namespace mapbox {
 namespace util {
 
 template <typename T>
@@ -117,6 +119,6 @@ inline void swap(recursive_wrapper<T>& lhs, recursive_wrapper<T>& rhs) noexcept
     lhs.swap(rhs);
 }
 } // namespace util
-} // namespace arrow
+} // namespace mapbox
 
-#endif // ARROW_UTIL_VARIANT_RECURSIVE_WRAPPER_H
+#endif // MAPBOX_UTIL_RECURSIVE_WRAPPER_HPP
diff --git a/cpp/src/arrow/vendored/variant/variant.hpp b/cpp/src/arrow/vendored/variant/variant.hpp
new file mode 100644
index 0000000000000..bb399dece1d57
--- /dev/null
+++ b/cpp/src/arrow/vendored/variant/variant.hpp
@@ -0,0 +1,1029 @@
+// Vendored from https://github.com/mapbox/variant at tag v1.1.5
+
+#ifndef MAPBOX_UTIL_VARIANT_HPP
+#define MAPBOX_UTIL_VARIANT_HPP
+
+#include <cassert>
+#include <cstddef>   // size_t
+#include <new>       // operator new
+#include <stdexcept> // runtime_error
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <typeinfo>
+#include <utility>
+#include <functional>
+
+#include "recursive_wrapper.hpp"
+#include "variant_visitor.hpp"
+
+// clang-format off
+// [[deprecated]] is only available in C++14, use this for the time being
+#if __cplusplus <= 201103L
+# ifdef __GNUC__
+#  define MAPBOX_VARIANT_DEPRECATED __attribute__((deprecated))
+# elif defined(_MSC_VER)
+#  define MAPBOX_VARIANT_DEPRECATED __declspec(deprecated)
+# else
+#  define MAPBOX_VARIANT_DEPRECATED
+# endif
+#else
+#  define MAPBOX_VARIANT_DEPRECATED [[deprecated]]
+#endif
+
+
+#ifdef _MSC_VER
+// https://msdn.microsoft.com/en-us/library/bw1hbe6y.aspx
+# ifdef NDEBUG
+#  define VARIANT_INLINE __forceinline
+# else
+#  define VARIANT_INLINE //__declspec(noinline)
+# endif
+#else
+# ifdef NDEBUG
+#  define VARIANT_INLINE //inline __attribute__((always_inline))
+# else
+#  define VARIANT_INLINE __attribute__((noinline))
+# endif
+#endif
+// clang-format on
+
+// Exceptions
+#if defined( __EXCEPTIONS) || defined( _MSC_VER)
+#define HAS_EXCEPTIONS
+#endif
+
+#define VARIANT_MAJOR_VERSION 1
+#define VARIANT_MINOR_VERSION 1
+#define VARIANT_PATCH_VERSION 0
+
+#define VARIANT_VERSION (VARIANT_MAJOR_VERSION * 100000) + (VARIANT_MINOR_VERSION * 100) + (VARIANT_PATCH_VERSION)
+
+namespace mapbox {
+namespace util {
+
+// XXX This should derive from std::logic_error instead of std::runtime_error.
+//     See https://github.com/mapbox/variant/issues/48 for details.
+class bad_variant_access : public std::runtime_error
+{
+
+public:
+    explicit bad_variant_access(const std::string& what_arg)
+        : runtime_error(what_arg) {}
+
+    explicit bad_variant_access(const char* what_arg)
+        : runtime_error(what_arg) {}
+
+}; // class bad_variant_access
+
+template <typename R = void>
+struct MAPBOX_VARIANT_DEPRECATED static_visitor
+{
+    using result_type = R;
+
+protected:
+    static_visitor() {}
+    ~static_visitor() {}
+};
+
+namespace detail {
+
+static constexpr std::size_t invalid_value = std::size_t(-1);
+
+template <typename T, typename... Types>
+struct direct_type;
+
+template <typename T, typename First, typename... Types>
+struct direct_type<T, First, Types...>
+{
+    static constexpr std::size_t index = std::is_same<T, First>::value
+        ? sizeof...(Types)
+        : direct_type<T, Types...>::index;
+};
+
+template <typename T>
+struct direct_type<T>
+{
+    static constexpr std::size_t index = invalid_value;
+};
+
+#if __cpp_lib_logical_traits >= 201510L
+
+using std::conjunction;
+using std::disjunction;
+
+#else
+
+template <typename...>
+struct conjunction : std::true_type {};
+
+template <typename B1>
+struct conjunction<B1> : B1 {};
+
+template <typename B1, typename B2>
+struct conjunction<B1, B2> : std::conditional<B1::value, B2, B1>::type {};
+
+template <typename B1, typename... Bs>
+struct conjunction<B1, Bs...> : std::conditional<B1::value, conjunction<Bs...>, B1>::type {};
+
+template <typename...>
+struct disjunction : std::false_type {};
+
+template <typename B1>
+struct disjunction<B1> : B1 {};
+
+template <typename B1, typename B2>
+struct disjunction<B1, B2> : std::conditional<B1::value, B1, B2>::type {};
+
+template <typename B1, typename... Bs>
+struct disjunction<B1, Bs...> : std::conditional<B1::value, B1, disjunction<Bs...>>::type {};
+
+#endif
+
+template <typename T, typename... Types>
+struct convertible_type;
+
+template <typename T, typename First, typename... Types>
+struct convertible_type<T, First, Types...>
+{
+    static constexpr std::size_t index = std::is_convertible<T, First>::value
+        ? disjunction<std::is_convertible<T, Types>...>::value ? invalid_value : sizeof...(Types)
+        : convertible_type<T, Types...>::index;
+};
+
+template <typename T>
+struct convertible_type<T>
+{
+    static constexpr std::size_t index = invalid_value;
+};
+
+template <typename T, typename... Types>
+struct value_traits
+{
+    using value_type = typename std::remove_const<typename std::remove_reference<T>::type>::type;
+    static constexpr std::size_t direct_index = direct_type<value_type, Types...>::index;
+    static constexpr bool is_direct = direct_index != invalid_value;
+    static constexpr std::size_t index = is_direct ? direct_index : convertible_type<value_type, Types...>::index;
+    static constexpr bool is_valid = index != invalid_value;
+    static constexpr std::size_t tindex = is_valid ? sizeof...(Types)-index : 0;
+    using target_type = typename std::tuple_element<tindex, std::tuple<void, Types...>>::type;
+};
+
+template <typename T, typename R = void>
+struct enable_if_type
+{
+    using type = R;
+};
+
+template <typename F, typename V, typename Enable = void>
+struct result_of_unary_visit
+{
+    using type = typename std::result_of<F(V&)>::type;
+};
+
+template <typename F, typename V>
+struct result_of_unary_visit<F, V, typename enable_if_type<typename F::result_type>::type>
+{
+    using type = typename F::result_type;
+};
+
+template <typename F, typename V, typename Enable = void>
+struct result_of_binary_visit
+{
+    using type = typename std::result_of<F(V&, V&)>::type;
+};
+
+template <typename F, typename V>
+struct result_of_binary_visit<F, V, typename enable_if_type<typename F::result_type>::type>
+{
+    using type = typename F::result_type;
+};
+
+template <std::size_t arg1, std::size_t... others>
+struct static_max;
+
+template <std::size_t arg>
+struct static_max<arg>
+{
+    static const std::size_t value = arg;
+};
+
+template <std::size_t arg1, std::size_t arg2, std::size_t... others>
+struct static_max<arg1, arg2, others...>
+{
+    static const std::size_t value = arg1 >= arg2 ? static_max<arg1, others...>::value : static_max<arg2, others...>::value;
+};
+
+template <typename... Types>
+struct variant_helper;
+
+template <typename T, typename... Types>
+struct variant_helper<T, Types...>
+{
+    VARIANT_INLINE static void destroy(const std::size_t type_index, void* data)
+    {
+        if (type_index == sizeof...(Types))
+        {
+            reinterpret_cast<T*>(data)->~T();
+        }
+        else
+        {
+            variant_helper<Types...>::destroy(type_index, data);
+        }
+    }
+
+    VARIANT_INLINE static void move(const std::size_t old_type_index, void* old_value, void* new_value)
+    {
+        if (old_type_index == sizeof...(Types))
+        {
+            new (new_value) T(std::move(*reinterpret_cast<T*>(old_value)));
+        }
+        else
+        {
+            variant_helper<Types...>::move(old_type_index, old_value, new_value);
+        }
+    }
+
+    VARIANT_INLINE static void copy(const std::size_t old_type_index, const void* old_value, void* new_value)
+    {
+        if (old_type_index == sizeof...(Types))
+        {
+            new (new_value) T(*reinterpret_cast<const T*>(old_value));
+        }
+        else
+        {
+            variant_helper<Types...>::copy(old_type_index, old_value, new_value);
+        }
+    }
+};
+
+template <>
+struct variant_helper<>
+{
+    VARIANT_INLINE static void destroy(const std::size_t, void*) {}
+    VARIANT_INLINE static void move(const std::size_t, void*, void*) {}
+    VARIANT_INLINE static void copy(const std::size_t, const void*, void*) {}
+};
+
+template <typename T>
+struct unwrapper
+{
+    static T const& apply_const(T const& obj) { return obj; }
+    static T& apply(T& obj) { return obj; }
+};
+
+template <typename T>
+struct unwrapper<recursive_wrapper<T>>
+{
+    static auto apply_const(recursive_wrapper<T> const& obj)
+        -> typename recursive_wrapper<T>::type const&
+    {
+        return obj.get();
+    }
+    static auto apply(recursive_wrapper<T>& obj)
+        -> typename recursive_wrapper<T>::type&
+    {
+        return obj.get();
+    }
+};
+
+template <typename T>
+struct unwrapper<std::reference_wrapper<T>>
+{
+    static auto apply_const(std::reference_wrapper<T> const& obj)
+        -> typename std::reference_wrapper<T>::type const&
+    {
+        return obj.get();
+    }
+    static auto apply(std::reference_wrapper<T>& obj)
+        -> typename std::reference_wrapper<T>::type&
+    {
+        return obj.get();
+    }
+};
+
+template <typename F, typename V, typename R, typename... Types>
+struct dispatcher;
+
+template <typename F, typename V, typename R, typename T, typename... Types>
+struct dispatcher<F, V, R, T, Types...>
+{
+    VARIANT_INLINE static R apply_const(V const& v, F&& f)
+    {
+        if (v.template is<T>())
+        {
+            return f(unwrapper<T>::apply_const(v.template get_unchecked<T>()));
+        }
+        else
+        {
+            return dispatcher<F, V, R, Types...>::apply_const(v, std::forward<F>(f));
+        }
+    }
+
+    VARIANT_INLINE static R apply(V& v, F&& f)
+    {
+        if (v.template is<T>())
+        {
+            return f(unwrapper<T>::apply(v.template get_unchecked<T>()));
+        }
+        else
+        {
+            return dispatcher<F, V, R, Types...>::apply(v, std::forward<F>(f));
+        }
+    }
+};
+
+template <typename F, typename V, typename R, typename T>
+struct dispatcher<F, V, R, T>
+{
+    VARIANT_INLINE static R apply_const(V const& v, F&& f)
+    {
+        return f(unwrapper<T>::apply_const(v.template get_unchecked<T>()));
+    }
+
+    VARIANT_INLINE static R apply(V& v, F&& f)
+    {
+        return f(unwrapper<T>::apply(v.template get_unchecked<T>()));
+    }
+};
+
+template <typename F, typename V, typename R, typename T, typename... Types>
+struct binary_dispatcher_rhs;
+
+template <typename F, typename V, typename R, typename T0, typename T1, typename... Types>
+struct binary_dispatcher_rhs<F, V, R, T0, T1, Types...>
+{
+    VARIANT_INLINE static R apply_const(V const& lhs, V const& rhs, F&& f)
+    {
+        if (rhs.template is<T1>()) // call binary functor
+        {
+            return f(unwrapper<T0>::apply_const(lhs.template get_unchecked<T0>()),
+                     unwrapper<T1>::apply_const(rhs.template get_unchecked<T1>()));
+        }
+        else
+        {
+            return binary_dispatcher_rhs<F, V, R, T0, Types...>::apply_const(lhs, rhs, std::forward<F>(f));
+        }
+    }
+
+    VARIANT_INLINE static R apply(V& lhs, V& rhs, F&& f)
+    {
+        if (rhs.template is<T1>()) // call binary functor
+        {
+            return f(unwrapper<T0>::apply(lhs.template get_unchecked<T0>()),
+                     unwrapper<T1>::apply(rhs.template get_unchecked<T1>()));
+        }
+        else
+        {
+            return binary_dispatcher_rhs<F, V, R, T0, Types...>::apply(lhs, rhs, std::forward<F>(f));
+        }
+    }
+};
+
+template <typename F, typename V, typename R, typename T0, typename T1>
+struct binary_dispatcher_rhs<F, V, R, T0, T1>
+{
+    VARIANT_INLINE static R apply_const(V const& lhs, V const& rhs, F&& f)
+    {
+        return f(unwrapper<T0>::apply_const(lhs.template get_unchecked<T0>()),
+                 unwrapper<T1>::apply_const(rhs.template get_unchecked<T1>()));
+    }
+
+    VARIANT_INLINE static R apply(V& lhs, V& rhs, F&& f)
+    {
+        return f(unwrapper<T0>::apply(lhs.template get_unchecked<T0>()),
+                 unwrapper<T1>::apply(rhs.template get_unchecked<T1>()));
+    }
+};
+
+template <typename F, typename V, typename R, typename T, typename... Types>
+struct binary_dispatcher_lhs;
+
+template <typename F, typename V, typename R, typename T0, typename T1, typename... Types>
+struct binary_dispatcher_lhs<F, V, R, T0, T1, Types...>
+{
+    VARIANT_INLINE static R apply_const(V const& lhs, V const& rhs, F&& f)
+    {
+        if (lhs.template is<T1>()) // call binary functor
+        {
+            return f(unwrapper<T1>::apply_const(lhs.template get_unchecked<T1>()),
+                     unwrapper<T0>::apply_const(rhs.template get_unchecked<T0>()));
+        }
+        else
+        {
+            return binary_dispatcher_lhs<F, V, R, T0, Types...>::apply_const(lhs, rhs, std::forward<F>(f));
+        }
+    }
+
+    VARIANT_INLINE static R apply(V& lhs, V& rhs, F&& f)
+    {
+        if (lhs.template is<T1>()) // call binary functor
+        {
+            return f(unwrapper<T1>::apply(lhs.template get_unchecked<T1>()),
+                     unwrapper<T0>::apply(rhs.template get_unchecked<T0>()));
+        }
+        else
+        {
+            return binary_dispatcher_lhs<F, V, R, T0, Types...>::apply(lhs, rhs, std::forward<F>(f));
+        }
+    }
+};
+
+template <typename F, typename V, typename R, typename T0, typename T1>
+struct binary_dispatcher_lhs<F, V, R, T0, T1>
+{
+    VARIANT_INLINE static R apply_const(V const& lhs, V const& rhs, F&& f)
+    {
+        return f(unwrapper<T1>::apply_const(lhs.template get_unchecked<T1>()),
+                 unwrapper<T0>::apply_const(rhs.template get_unchecked<T0>()));
+    }
+
+    VARIANT_INLINE static R apply(V& lhs, V& rhs, F&& f)
+    {
+        return f(unwrapper<T1>::apply(lhs.template get_unchecked<T1>()),
+                 unwrapper<T0>::apply(rhs.template get_unchecked<T0>()));
+    }
+};
+
+template <typename F, typename V, typename R, typename... Types>
+struct binary_dispatcher;
+
+template <typename F, typename V, typename R, typename T, typename... Types>
+struct binary_dispatcher<F, V, R, T, Types...>
+{
+    VARIANT_INLINE static R apply_const(V const& v0, V const& v1, F&& f)
+    {
+        if (v0.template is<T>())
+        {
+            if (v1.template is<T>())
+            {
+                return f(unwrapper<T>::apply_const(v0.template get_unchecked<T>()),
+                         unwrapper<T>::apply_const(v1.template get_unchecked<T>())); // call binary functor
+            }
+            else
+            {
+                return binary_dispatcher_rhs<F, V, R, T, Types...>::apply_const(v0, v1, std::forward<F>(f));
+            }
+        }
+        else if (v1.template is<T>())
+        {
+            return binary_dispatcher_lhs<F, V, R, T, Types...>::apply_const(v0, v1, std::forward<F>(f));
+        }
+        return binary_dispatcher<F, V, R, Types...>::apply_const(v0, v1, std::forward<F>(f));
+    }
+
+    VARIANT_INLINE static R apply(V& v0, V& v1, F&& f)
+    {
+        if (v0.template is<T>())
+        {
+            if (v1.template is<T>())
+            {
+                return f(unwrapper<T>::apply(v0.template get_unchecked<T>()),
+                         unwrapper<T>::apply(v1.template get_unchecked<T>())); // call binary functor
+            }
+            else
+            {
+                return binary_dispatcher_rhs<F, V, R, T, Types...>::apply(v0, v1, std::forward<F>(f));
+            }
+        }
+        else if (v1.template is<T>())
+        {
+            return binary_dispatcher_lhs<F, V, R, T, Types...>::apply(v0, v1, std::forward<F>(f));
+        }
+        return binary_dispatcher<F, V, R, Types...>::apply(v0, v1, std::forward<F>(f));
+    }
+};
+
+template <typename F, typename V, typename R, typename T>
+struct binary_dispatcher<F, V, R, T>
+{
+    VARIANT_INLINE static R apply_const(V const& v0, V const& v1, F&& f)
+    {
+        return f(unwrapper<T>::apply_const(v0.template get_unchecked<T>()),
+                 unwrapper<T>::apply_const(v1.template get_unchecked<T>())); // call binary functor
+    }
+
+    VARIANT_INLINE static R apply(V& v0, V& v1, F&& f)
+    {
+        return f(unwrapper<T>::apply(v0.template get_unchecked<T>()),
+                 unwrapper<T>::apply(v1.template get_unchecked<T>())); // call binary functor
+    }
+};
+
+// comparator functors
+struct equal_comp
+{
+    template <typename T>
+    bool operator()(T const& lhs, T const& rhs) const
+    {
+        return lhs == rhs;
+    }
+};
+
+struct less_comp
+{
+    template <typename T>
+    bool operator()(T const& lhs, T const& rhs) const
+    {
+        return lhs < rhs;
+    }
+};
+
+template <typename Variant, typename Comp>
+class comparer
+{
+public:
+    explicit comparer(Variant const& lhs) noexcept
+        : lhs_(lhs) {}
+    comparer& operator=(comparer const&) = delete;
+    // visitor
+    template <typename T>
+    bool operator()(T const& rhs_content) const
+    {
+        T const& lhs_content = lhs_.template get_unchecked<T>();
+        return Comp()(lhs_content, rhs_content);
+    }
+
+private:
+    Variant const& lhs_;
+};
+
+// hashing visitor
+struct hasher
+{
+    template <typename T>
+    std::size_t operator()(const T& hashable) const
+    {
+        return std::hash<T>{}(hashable);
+    }
+};
+
+} // namespace detail
+
+struct no_init
+{
+};
+
+template <typename... Types>
+class variant
+{
+    static_assert(sizeof...(Types) > 0, "Template parameter type list of variant can not be empty");
+    static_assert(!detail::disjunction<std::is_reference<Types>...>::value, "Variant can not hold reference types. Maybe use std::reference_wrapper?");
+
+private:
+    static const std::size_t data_size = detail::static_max<sizeof(Types)...>::value;
+    static const std::size_t data_align = detail::static_max<alignof(Types)...>::value;
+public:
+    struct adapted_variant_tag;
+    using types = std::tuple<Types...>;
+private:
+    using first_type = typename std::tuple_element<0, types>::type;
+    using data_type = typename std::aligned_storage<data_size, data_align>::type;
+    using helper_type = detail::variant_helper<Types...>;
+
+    std::size_t type_index;
+    data_type data;
+
+public:
+    VARIANT_INLINE variant() noexcept(std::is_nothrow_default_constructible<first_type>::value)
+        : type_index(sizeof...(Types)-1)
+    {
+        static_assert(std::is_default_constructible<first_type>::value, "First type in variant must be default constructible to allow default construction of variant");
+        new (&data) first_type();
+    }
+
+    VARIANT_INLINE variant(no_init) noexcept
+        : type_index(detail::invalid_value) {}
+
+    // http://isocpp.org/blog/2012/11/universal-references-in-c11-scott-meyers
+    template <typename T, typename Traits = detail::value_traits<T, Types...>,
+              typename Enable = typename std::enable_if<Traits::is_valid && !std::is_same<variant<Types...>, typename Traits::value_type>::value>::type >
+    VARIANT_INLINE variant(T&& val) noexcept(std::is_nothrow_constructible<typename Traits::target_type, T&&>::value)
+        : type_index(Traits::index)
+    {
+        new (&data) typename Traits::target_type(std::forward<T>(val));
+    }
+
+    VARIANT_INLINE variant(variant<Types...> const& old)
+        : type_index(old.type_index)
+    {
+        helper_type::copy(old.type_index, &old.data, &data);
+    }
+
+    VARIANT_INLINE variant(variant<Types...>&& old)
+        noexcept(detail::conjunction<std::is_nothrow_move_constructible<Types>...>::value)
+        : type_index(old.type_index)
+    {
+        helper_type::move(old.type_index, &old.data, &data);
+    }
+
+private:
+    VARIANT_INLINE void copy_assign(variant<Types...> const& rhs)
+    {
+        helper_type::destroy(type_index, &data);
+        type_index = detail::invalid_value;
+        helper_type::copy(rhs.type_index, &rhs.data, &data);
+        type_index = rhs.type_index;
+    }
+
+    VARIANT_INLINE void move_assign(variant<Types...>&& rhs)
+    {
+        helper_type::destroy(type_index, &data);
+        type_index = detail::invalid_value;
+        helper_type::move(rhs.type_index, &rhs.data, &data);
+        type_index = rhs.type_index;
+    }
+
+public:
+    VARIANT_INLINE variant<Types...>& operator=(variant<Types...>&& other)
+    {
+        move_assign(std::move(other));
+        return *this;
+    }
+
+    VARIANT_INLINE variant<Types...>& operator=(variant<Types...> const& other)
+    {
+        copy_assign(other);
+        return *this;
+    }
+
+    // conversions
+    // move-assign
+    template <typename T>
+    VARIANT_INLINE variant<Types...>& operator=(T&& rhs) noexcept
+    {
+        variant<Types...> temp(std::forward<T>(rhs));
+        move_assign(std::move(temp));
+        return *this;
+    }
+
+    // copy-assign
+    template <typename T>
+    VARIANT_INLINE variant<Types...>& operator=(T const& rhs)
+    {
+        variant<Types...> temp(rhs);
+        copy_assign(temp);
+        return *this;
+    }
+
+    template <typename T, typename std::enable_if<
+                          (detail::direct_type<T, Types...>::index != detail::invalid_value)>::type* = nullptr>
+    VARIANT_INLINE bool is() const
+    {
+        return type_index == detail::direct_type<T, Types...>::index;
+    }
+
+    template <typename T,typename std::enable_if<
+                         (detail::direct_type<recursive_wrapper<T>, Types...>::index != detail::invalid_value)>::type* = nullptr>
+    VARIANT_INLINE bool is() const
+    {
+        return type_index == detail::direct_type<recursive_wrapper<T>, Types...>::index;
+    }
+
+    VARIANT_INLINE bool valid() const
+    {
+        return type_index != detail::invalid_value;
+    }
+
+    template <typename T, typename... Args>
+    VARIANT_INLINE void set(Args&&... args)
+    {
+        helper_type::destroy(type_index, &data);
+        type_index = detail::invalid_value;
+        new (&data) T(std::forward<Args>(args)...);
+        type_index = detail::direct_type<T, Types...>::index;
+    }
+
+    // get_unchecked<T>()
+    template <typename T, typename std::enable_if<
+                          (detail::direct_type<T, Types...>::index != detail::invalid_value)>::type* = nullptr>
+    VARIANT_INLINE T& get_unchecked()
+    {
+        return *reinterpret_cast<T*>(&data);
+    }
+
+#ifdef HAS_EXCEPTIONS
+    // get<T>()
+    template <typename T, typename std::enable_if<
+                          (detail::direct_type<T, Types...>::index != detail::invalid_value)>::type* = nullptr>
+    VARIANT_INLINE T& get()
+    {
+        if (type_index == detail::direct_type<T, Types...>::index)
+        {
+            return *reinterpret_cast<T*>(&data);
+        }
+        else
+        {
+            throw bad_variant_access("in get<T>()");
+        }
+    }
+#endif
+
+    template <typename T, typename std::enable_if<
+                          (detail::direct_type<T, Types...>::index != detail::invalid_value)>::type* = nullptr>
+    VARIANT_INLINE T const& get_unchecked() const
+    {
+        return *reinterpret_cast<T const*>(&data);
+    }
+
+#ifdef HAS_EXCEPTIONS
+    template <typename T, typename std::enable_if<
+                          (detail::direct_type<T, Types...>::index != detail::invalid_value)>::type* = nullptr>
+    VARIANT_INLINE T const& get() const
+    {
+        if (type_index == detail::direct_type<T, Types...>::index)
+        {
+            return *reinterpret_cast<T const*>(&data);
+        }
+        else
+        {
+            throw bad_variant_access("in get<T>()");
+        }
+    }
+#endif
+
+    // get_unchecked<T>() - T stored as recursive_wrapper<T>
+    template <typename T, typename std::enable_if<
+                          (detail::direct_type<recursive_wrapper<T>, Types...>::index != detail::invalid_value)>::type* = nullptr>
+    VARIANT_INLINE T& get_unchecked()
+    {
+        return (*reinterpret_cast<recursive_wrapper<T>*>(&data)).get();
+    }
+
+#ifdef HAS_EXCEPTIONS
+    // get<T>() - T stored as recursive_wrapper<T>
+    template <typename T, typename std::enable_if<
+                          (detail::direct_type<recursive_wrapper<T>, Types...>::index != detail::invalid_value)>::type* = nullptr>
+    VARIANT_INLINE T& get()
+    {
+        if (type_index == detail::direct_type<recursive_wrapper<T>, Types...>::index)
+        {
+            return (*reinterpret_cast<recursive_wrapper<T>*>(&data)).get();
+        }
+        else
+        {
+            throw bad_variant_access("in get<T>()");
+        }
+    }
+#endif
+
+    template <typename T, typename std::enable_if<
+                          (detail::direct_type<recursive_wrapper<T>, Types...>::index != detail::invalid_value)>::type* = nullptr>
+    VARIANT_INLINE T const& get_unchecked() const
+    {
+        return (*reinterpret_cast<recursive_wrapper<T> const*>(&data)).get();
+    }
+
+#ifdef HAS_EXCEPTIONS
+    template <typename T, typename std::enable_if<
+                          (detail::direct_type<recursive_wrapper<T>, Types...>::index != detail::invalid_value)>::type* = nullptr>
+    VARIANT_INLINE T const& get() const
+    {
+        if (type_index == detail::direct_type<recursive_wrapper<T>, Types...>::index)
+        {
+            return (*reinterpret_cast<recursive_wrapper<T> const*>(&data)).get();
+        }
+        else
+        {
+            throw bad_variant_access("in get<T>()");
+        }
+    }
+#endif
+
+    // get_unchecked<T>() - T stored as std::reference_wrapper<T>
+    template <typename T, typename std::enable_if<
+                          (detail::direct_type<std::reference_wrapper<T>, Types...>::index != detail::invalid_value)>::type* = nullptr>
+    VARIANT_INLINE T& get_unchecked()
+    {
+        return (*reinterpret_cast<std::reference_wrapper<T>*>(&data)).get();
+    }
+
+#ifdef HAS_EXCEPTIONS
+    // get<T>() - T stored as std::reference_wrapper<T>
+    template <typename T, typename std::enable_if<
+                          (detail::direct_type<std::reference_wrapper<T>, Types...>::index != detail::invalid_value)>::type* = nullptr>
+    VARIANT_INLINE T& get()
+    {
+        if (type_index == detail::direct_type<std::reference_wrapper<T>, Types...>::index)
+        {
+            return (*reinterpret_cast<std::reference_wrapper<T>*>(&data)).get();
+        }
+        else
+        {
+            throw bad_variant_access("in get<T>()");
+        }
+    }
+#endif
+
+    template <typename T, typename std::enable_if<
+                          (detail::direct_type<std::reference_wrapper<T const>, Types...>::index != detail::invalid_value)>::type* = nullptr>
+    VARIANT_INLINE T const& get_unchecked() const
+    {
+        return (*reinterpret_cast<std::reference_wrapper<T const> const*>(&data)).get();
+    }
+
+#ifdef HAS_EXCEPTIONS
+    template <typename T, typename std::enable_if<
+                          (detail::direct_type<std::reference_wrapper<T const>, Types...>::index != detail::invalid_value)>::type* = nullptr>
+    VARIANT_INLINE T const& get() const
+    {
+        if (type_index == detail::direct_type<std::reference_wrapper<T const>, Types...>::index)
+        {
+            return (*reinterpret_cast<std::reference_wrapper<T const> const*>(&data)).get();
+        }
+        else
+        {
+            throw bad_variant_access("in get<T>()");
+        }
+    }
+#endif
+
+    // This function is deprecated because it returns an internal index field.
+    // Use which() instead.
+    MAPBOX_VARIANT_DEPRECATED VARIANT_INLINE std::size_t get_type_index() const
+    {
+        return type_index;
+    }
+
+    VARIANT_INLINE int which() const noexcept
+    {
+        return static_cast<int>(sizeof...(Types)-type_index - 1);
+    }
+
+    template <typename T, typename std::enable_if<
+                          (detail::direct_type<T, Types...>::index != detail::invalid_value)>::type* = nullptr>
+    VARIANT_INLINE static constexpr int which() noexcept
+    {
+        return static_cast<int>(sizeof...(Types)-detail::direct_type<T, Types...>::index - 1);
+    }
+
+    // visitor
+    // unary
+    template <typename F, typename V, typename R = typename detail::result_of_unary_visit<F, first_type>::type>
+    auto VARIANT_INLINE static visit(V const& v, F&& f)
+        -> decltype(detail::dispatcher<F, V, R, Types...>::apply_const(v, std::forward<F>(f)))
+    {
+        return detail::dispatcher<F, V, R, Types...>::apply_const(v, std::forward<F>(f));
+    }
+    // non-const
+    template <typename F, typename V, typename R = typename detail::result_of_unary_visit<F, first_type>::type>
+    auto VARIANT_INLINE static visit(V& v, F&& f)
+        -> decltype(detail::dispatcher<F, V, R, Types...>::apply(v, std::forward<F>(f)))
+    {
+        return detail::dispatcher<F, V, R, Types...>::apply(v, std::forward<F>(f));
+    }
+
+    // binary
+    // const
+    template <typename F, typename V, typename R = typename detail::result_of_binary_visit<F, first_type>::type>
+    auto VARIANT_INLINE static binary_visit(V const& v0, V const& v1, F&& f)
+        -> decltype(detail::binary_dispatcher<F, V, R, Types...>::apply_const(v0, v1, std::forward<F>(f)))
+    {
+        return detail::binary_dispatcher<F, V, R, Types...>::apply_const(v0, v1, std::forward<F>(f));
+    }
+    // non-const
+    template <typename F, typename V, typename R = typename detail::result_of_binary_visit<F, first_type>::type>
+    auto VARIANT_INLINE static binary_visit(V& v0, V& v1, F&& f)
+        -> decltype(detail::binary_dispatcher<F, V, R, Types...>::apply(v0, v1, std::forward<F>(f)))
+    {
+        return detail::binary_dispatcher<F, V, R, Types...>::apply(v0, v1, std::forward<F>(f));
+    }
+
+    // match
+    // unary
+    template <typename... Fs>
+    auto VARIANT_INLINE match(Fs&&... fs) const
+        -> decltype(variant::visit(*this, ::mapbox::util::make_visitor(std::forward<Fs>(fs)...)))
+    {
+        return variant::visit(*this, ::mapbox::util::make_visitor(std::forward<Fs>(fs)...));
+    }
+    // non-const
+    template <typename... Fs>
+    auto VARIANT_INLINE match(Fs&&... fs)
+        -> decltype(variant::visit(*this, ::mapbox::util::make_visitor(std::forward<Fs>(fs)...)))
+    {
+        return variant::visit(*this, ::mapbox::util::make_visitor(std::forward<Fs>(fs)...));
+    }
+
+    ~variant() noexcept // no-throw destructor
+    {
+        helper_type::destroy(type_index, &data);
+    }
+
+    // comparison operators
+    // equality
+    VARIANT_INLINE bool operator==(variant const& rhs) const
+    {
+        assert(valid() && rhs.valid());
+        if (this->which() != rhs.which())
+        {
+            return false;
+        }
+        detail::comparer<variant, detail::equal_comp> visitor(*this);
+        return visit(rhs, visitor);
+    }
+
+    VARIANT_INLINE bool operator!=(variant const& rhs) const
+    {
+        return !(*this == rhs);
+    }
+
+    // less than
+    VARIANT_INLINE bool operator<(variant const& rhs) const
+    {
+        assert(valid() && rhs.valid());
+        if (this->which() != rhs.which())
+        {
+            return this->which() < rhs.which();
+        }
+        detail::comparer<variant, detail::less_comp> visitor(*this);
+        return visit(rhs, visitor);
+    }
+    VARIANT_INLINE bool operator>(variant const& rhs) const
+    {
+        return rhs < *this;
+    }
+    VARIANT_INLINE bool operator<=(variant const& rhs) const
+    {
+        return !(*this > rhs);
+    }
+    VARIANT_INLINE bool operator>=(variant const& rhs) const
+    {
+        return !(*this < rhs);
+    }
+};
+
+// unary visitor interface
+// const
+template <typename F, typename V>
+auto VARIANT_INLINE apply_visitor(F&& f, V const& v) -> decltype(V::visit(v, std::forward<F>(f)))
+{
+    return V::visit(v, std::forward<F>(f));
+}
+
+// non-const
+template <typename F, typename V>
+auto VARIANT_INLINE apply_visitor(F&& f, V& v) -> decltype(V::visit(v, std::forward<F>(f)))
+{
+    return V::visit(v, std::forward<F>(f));
+}
+
+// binary visitor interface
+// const
+template <typename F, typename V>
+auto VARIANT_INLINE apply_visitor(F&& f, V const& v0, V const& v1) -> decltype(V::binary_visit(v0, v1, std::forward<F>(f)))
+{
+    return V::binary_visit(v0, v1, std::forward<F>(f));
+}
+
+// non-const
+template <typename F, typename V>
+auto VARIANT_INLINE apply_visitor(F&& f, V& v0, V& v1) -> decltype(V::binary_visit(v0, v1, std::forward<F>(f)))
+{
+    return V::binary_visit(v0, v1, std::forward<F>(f));
+}
+
+// getter interface
+
+#ifdef HAS_EXCEPTIONS
+template <typename ResultType, typename T>
+auto get(T& var)->decltype(var.template get<ResultType>())
+{
+    return var.template get<ResultType>();
+}
+#endif
+
+template <typename ResultType, typename T>
+ResultType& get_unchecked(T& var)
+{
+    return var.template get_unchecked<ResultType>();
+}
+
+#ifdef HAS_EXCEPTIONS
+template <typename ResultType, typename T>
+auto get(T const& var)->decltype(var.template get<ResultType>())
+{
+    return var.template get<ResultType>();
+}
+#endif
+
+template <typename ResultType, typename T>
+ResultType const& get_unchecked(T const& var)
+{
+    return var.template get_unchecked<ResultType>();
+}
+} // namespace util
+} // namespace mapbox
+
+// hashable iff underlying types are hashable
+namespace std {
+template <typename... Types>
+struct hash< ::mapbox::util::variant<Types...>> {
+    std::size_t operator()(const ::mapbox::util::variant<Types...>& v) const noexcept
+    {
+        return ::mapbox::util::apply_visitor(::mapbox::util::detail::hasher{}, v);
+    }
+};
+}
+
+#endif // MAPBOX_UTIL_VARIANT_HPP
diff --git a/cpp/src/arrow/vendored/variant/variant_io.hpp b/cpp/src/arrow/vendored/variant/variant_io.hpp
new file mode 100644
index 0000000000000..494d2a964e319
--- /dev/null
+++ b/cpp/src/arrow/vendored/variant/variant_io.hpp
@@ -0,0 +1,47 @@
+// Vendored from https://github.com/mapbox/variant at tag v1.1.5
+
+#ifndef MAPBOX_UTIL_VARIANT_IO_HPP
+#define MAPBOX_UTIL_VARIANT_IO_HPP
+
+#include <iosfwd>
+
+#include "variant.hpp"
+
+namespace mapbox {
+namespace util {
+
+namespace detail {
+// operator<< helper
+template <typename Out>
+class printer
+{
+public:
+    explicit printer(Out& out)
+        : out_(out) {}
+    printer& operator=(printer const&) = delete;
+
+    // visitor
+    template <typename T>
+    void operator()(T const& operand) const
+    {
+        out_ << operand;
+    }
+
+private:
+    Out& out_;
+};
+}
+
+// operator<<
+template <typename CharT, typename Traits, typename... Types>
+VARIANT_INLINE std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& out, variant<Types...> const& rhs)
+{
+    detail::printer<std::basic_ostream<CharT, Traits>> visitor(out);
+    apply_visitor(visitor, rhs);
+    return out;
+}
+} // namespace util
+} // namespace mapbox
+
+#endif // MAPBOX_UTIL_VARIANT_IO_HPP
diff --git a/cpp/src/arrow/vendored/variant/variant_visitor.hpp b/cpp/src/arrow/vendored/variant/variant_visitor.hpp
new file mode 100644
index 0000000000000..60020f4dd05dc
--- /dev/null
+++ b/cpp/src/arrow/vendored/variant/variant_visitor.hpp
@@ -0,0 +1,40 @@
+// Vendored from https://github.com/mapbox/variant at tag v1.1.5
+
+#ifndef MAPBOX_UTIL_VARIANT_VISITOR_HPP
+#define MAPBOX_UTIL_VARIANT_VISITOR_HPP
+
+namespace mapbox {
+namespace util {
+
+template <typename... Fns>
+struct visitor;
+
+template <typename Fn>
+struct visitor<Fn> : Fn
+{
+    using type = Fn;
+    using Fn::operator();
+
+    visitor(Fn fn) : Fn(fn) {}
+};
+
+template <typename Fn, typename... Fns>
+struct visitor<Fn, Fns...> : Fn, visitor<Fns...>
+{
+    using type = visitor;
+    using Fn::operator();
+    using visitor<Fns...>::operator();
+
+    visitor(Fn fn, Fns... fns) : Fn(fn), visitor<Fns...>(fns...) {}
+};
+
+template <typename... Fns>
+visitor<Fns...> make_visitor(Fns... fns)
+{
+    return visitor<Fns...>(fns...);
+}
+
+} // namespace util
+} // namespace mapbox
+
+#endif // MAPBOX_UTIL_VARIANT_VISITOR_HPP
diff --git a/cpp/src/arrow/util/xxhash/xxhash.c b/cpp/src/arrow/vendored/xxhash/xxhash.c
similarity index 100%
rename from cpp/src/arrow/util/xxhash/xxhash.c
rename to cpp/src/arrow/vendored/xxhash/xxhash.c
diff --git a/cpp/src/arrow/util/xxhash/xxhash.h b/cpp/src/arrow/vendored/xxhash/xxhash.h
similarity index 100%
rename from cpp/src/arrow/util/xxhash/xxhash.h
rename to cpp/src/arrow/vendored/xxhash/xxhash.h
diff --git a/cpp/src/arrow/visitor.cc b/cpp/src/arrow/visitor.cc
index 47dba6cd8ddf2..0098e27590d10 100644
--- a/cpp/src/arrow/visitor.cc
+++ b/cpp/src/arrow/visitor.cc
@@ -30,33 +30,33 @@ namespace arrow {
     return Status::NotImplemented(array.type()->ToString()); \
   }
 
-ARRAY_VISITOR_DEFAULT(NullArray);
-ARRAY_VISITOR_DEFAULT(BooleanArray);
-ARRAY_VISITOR_DEFAULT(Int8Array);
-ARRAY_VISITOR_DEFAULT(Int16Array);
-ARRAY_VISITOR_DEFAULT(Int32Array);
-ARRAY_VISITOR_DEFAULT(Int64Array);
-ARRAY_VISITOR_DEFAULT(UInt8Array);
-ARRAY_VISITOR_DEFAULT(UInt16Array);
-ARRAY_VISITOR_DEFAULT(UInt32Array);
-ARRAY_VISITOR_DEFAULT(UInt64Array);
-ARRAY_VISITOR_DEFAULT(HalfFloatArray);
-ARRAY_VISITOR_DEFAULT(FloatArray);
-ARRAY_VISITOR_DEFAULT(DoubleArray);
-ARRAY_VISITOR_DEFAULT(BinaryArray);
-ARRAY_VISITOR_DEFAULT(StringArray);
-ARRAY_VISITOR_DEFAULT(FixedSizeBinaryArray);
-ARRAY_VISITOR_DEFAULT(Date32Array);
-ARRAY_VISITOR_DEFAULT(Date64Array);
-ARRAY_VISITOR_DEFAULT(Time32Array);
-ARRAY_VISITOR_DEFAULT(Time64Array);
-ARRAY_VISITOR_DEFAULT(TimestampArray);
-ARRAY_VISITOR_DEFAULT(IntervalArray);
-ARRAY_VISITOR_DEFAULT(ListArray);
-ARRAY_VISITOR_DEFAULT(StructArray);
-ARRAY_VISITOR_DEFAULT(UnionArray);
-ARRAY_VISITOR_DEFAULT(DictionaryArray);
-ARRAY_VISITOR_DEFAULT(Decimal128Array);
+ARRAY_VISITOR_DEFAULT(NullArray)
+ARRAY_VISITOR_DEFAULT(BooleanArray)
+ARRAY_VISITOR_DEFAULT(Int8Array)
+ARRAY_VISITOR_DEFAULT(Int16Array)
+ARRAY_VISITOR_DEFAULT(Int32Array)
+ARRAY_VISITOR_DEFAULT(Int64Array)
+ARRAY_VISITOR_DEFAULT(UInt8Array)
+ARRAY_VISITOR_DEFAULT(UInt16Array)
+ARRAY_VISITOR_DEFAULT(UInt32Array)
+ARRAY_VISITOR_DEFAULT(UInt64Array)
+ARRAY_VISITOR_DEFAULT(HalfFloatArray)
+ARRAY_VISITOR_DEFAULT(FloatArray)
+ARRAY_VISITOR_DEFAULT(DoubleArray)
+ARRAY_VISITOR_DEFAULT(BinaryArray)
+ARRAY_VISITOR_DEFAULT(StringArray)
+ARRAY_VISITOR_DEFAULT(FixedSizeBinaryArray)
+ARRAY_VISITOR_DEFAULT(Date32Array)
+ARRAY_VISITOR_DEFAULT(Date64Array)
+ARRAY_VISITOR_DEFAULT(Time32Array)
+ARRAY_VISITOR_DEFAULT(Time64Array)
+ARRAY_VISITOR_DEFAULT(TimestampArray)
+ARRAY_VISITOR_DEFAULT(IntervalArray)
+ARRAY_VISITOR_DEFAULT(ListArray)
+ARRAY_VISITOR_DEFAULT(StructArray)
+ARRAY_VISITOR_DEFAULT(UnionArray)
+ARRAY_VISITOR_DEFAULT(DictionaryArray)
+ARRAY_VISITOR_DEFAULT(Decimal128Array)
 
 #undef ARRAY_VISITOR_DEFAULT
 
@@ -68,33 +68,33 @@ ARRAY_VISITOR_DEFAULT(Decimal128Array);
     return Status::NotImplemented(type.ToString());   \
   }
 
-TYPE_VISITOR_DEFAULT(NullType);
-TYPE_VISITOR_DEFAULT(BooleanType);
-TYPE_VISITOR_DEFAULT(Int8Type);
-TYPE_VISITOR_DEFAULT(Int16Type);
-TYPE_VISITOR_DEFAULT(Int32Type);
-TYPE_VISITOR_DEFAULT(Int64Type);
-TYPE_VISITOR_DEFAULT(UInt8Type);
-TYPE_VISITOR_DEFAULT(UInt16Type);
-TYPE_VISITOR_DEFAULT(UInt32Type);
-TYPE_VISITOR_DEFAULT(UInt64Type);
-TYPE_VISITOR_DEFAULT(HalfFloatType);
-TYPE_VISITOR_DEFAULT(FloatType);
-TYPE_VISITOR_DEFAULT(DoubleType);
-TYPE_VISITOR_DEFAULT(StringType);
-TYPE_VISITOR_DEFAULT(BinaryType);
-TYPE_VISITOR_DEFAULT(FixedSizeBinaryType);
-TYPE_VISITOR_DEFAULT(Date64Type);
-TYPE_VISITOR_DEFAULT(Date32Type);
-TYPE_VISITOR_DEFAULT(Time32Type);
-TYPE_VISITOR_DEFAULT(Time64Type);
-TYPE_VISITOR_DEFAULT(TimestampType);
-TYPE_VISITOR_DEFAULT(IntervalType);
-TYPE_VISITOR_DEFAULT(Decimal128Type);
-TYPE_VISITOR_DEFAULT(ListType);
-TYPE_VISITOR_DEFAULT(StructType);
-TYPE_VISITOR_DEFAULT(UnionType);
-TYPE_VISITOR_DEFAULT(DictionaryType);
+TYPE_VISITOR_DEFAULT(NullType)
+TYPE_VISITOR_DEFAULT(BooleanType)
+TYPE_VISITOR_DEFAULT(Int8Type)
+TYPE_VISITOR_DEFAULT(Int16Type)
+TYPE_VISITOR_DEFAULT(Int32Type)
+TYPE_VISITOR_DEFAULT(Int64Type)
+TYPE_VISITOR_DEFAULT(UInt8Type)
+TYPE_VISITOR_DEFAULT(UInt16Type)
+TYPE_VISITOR_DEFAULT(UInt32Type)
+TYPE_VISITOR_DEFAULT(UInt64Type)
+TYPE_VISITOR_DEFAULT(HalfFloatType)
+TYPE_VISITOR_DEFAULT(FloatType)
+TYPE_VISITOR_DEFAULT(DoubleType)
+TYPE_VISITOR_DEFAULT(StringType)
+TYPE_VISITOR_DEFAULT(BinaryType)
+TYPE_VISITOR_DEFAULT(FixedSizeBinaryType)
+TYPE_VISITOR_DEFAULT(Date64Type)
+TYPE_VISITOR_DEFAULT(Date32Type)
+TYPE_VISITOR_DEFAULT(Time32Type)
+TYPE_VISITOR_DEFAULT(Time64Type)
+TYPE_VISITOR_DEFAULT(TimestampType)
+TYPE_VISITOR_DEFAULT(IntervalType)
+TYPE_VISITOR_DEFAULT(Decimal128Type)
+TYPE_VISITOR_DEFAULT(ListType)
+TYPE_VISITOR_DEFAULT(StructType)
+TYPE_VISITOR_DEFAULT(UnionType)
+TYPE_VISITOR_DEFAULT(DictionaryType)
 
 #undef TYPE_VISITOR_DEFAULT
 
diff --git a/cpp/src/arrow/visitor_inline.h b/cpp/src/arrow/visitor_inline.h
index b6fc1f1ff2bfb..a5deaa7a1d22c 100644
--- a/cpp/src/arrow/visitor_inline.h
+++ b/cpp/src/arrow/visitor_inline.h
@@ -121,7 +121,7 @@ inline Status VisitArrayInline(const Array& array, VISITOR* visitor) {
 // The scalar value's type depends on the array data type:
 // - the type's `c_type`, if any
 // - for boolean arrays, a `bool`
-// - for binary, string and fixed-size binary arrars, a `util::string_view`
+// - for binary, string and fixed-size binary arrays, a `util::string_view`
 
 template <typename T, typename Enable = void>
 struct ArrayDataVisitor {};
diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index b71313e019aab..d5f4364129b18 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -15,30 +15,40 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# LLVM/Clang is required by multiple subdirs.
-cmake_minimum_required(VERSION 3.11)
+set(GANDIVA_VERSION "${ARROW_VERSION}")
 
-project(gandiva)
+# For "make gandiva" to build everything Gandiva-related
+add_custom_target(gandiva-all)
+add_custom_target(gandiva)
+add_custom_target(gandiva-tests)
+add_custom_target(gandiva-benchmarks)
 
-include(GandivaBuildUtils)
+add_dependencies(gandiva-all gandiva gandiva-tests gandiva-benchmarks)
 
 find_package(LLVM)
 
 # Set the path where the byte-code files will be installed.
 set(GANDIVA_BC_INSTALL_DIR
-  ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/gandiva)
+  ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/gandiva)
 
 set(GANDIVA_BC_FILE_NAME irhelpers.bc)
-set(GANDIVA_BC_INSTALL_PATH ${GANDIVA_BC_INSTALL_DIR}/${GANDIVA_BC_FILE_NAME})
-set(GANDIVA_BC_OUTPUT_PATH ${BUILD_OUTPUT_ROOT_DIRECTORY}/${GANDIVA_BC_FILE_NAME})
+set(GANDIVA_BC_INSTALL_PATH "${GANDIVA_BC_INSTALL_DIR}/${GANDIVA_BC_FILE_NAME}")
+set(GANDIVA_BC_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${GANDIVA_BC_FILE_NAME}")
+install(FILES
+  ${GANDIVA_BC_OUTPUT_PATH}
+  DESTINATION ${GANDIVA_BC_INSTALL_DIR})
 
 set(BC_FILE_PATH_CC "${CMAKE_CURRENT_BINARY_DIR}/bc_file_path.cc")
 configure_file(bc_file_path.cc.in ${BC_FILE_PATH_CC})
+add_definitions(-DGANDIVA_BYTE_COMPILE_FILE_PATH="${GANDIVA_BC_OUTPUT_PATH}")
 
 set(SRC_FILES annotator.cc
       bitmap_accumulator.cc
+      cast_time.cc
       configuration.cc
       context_helper.cc
+      decimal_ir.cc
+      decimal_type_util.cc
       engine.cc
       date_utils.cc
       expr_decomposer.cc
@@ -47,7 +57,14 @@ set(SRC_FILES annotator.cc
       expression_registry.cc
       exported_funcs_registry.cc
       filter.cc
+      function_ir_builder.cc
       function_registry.cc
+      function_registry_arithmetic.cc
+      function_registry_datetime.cc
+      function_registry_hash.cc
+      function_registry_math_ops.cc
+      function_registry_string.cc
+      function_registry_timestamp_arithmetic.cc
       function_signature.cc
       gdv_function_stubs.cc
       llvm_generator.cc
@@ -58,14 +75,10 @@ set(SRC_FILES annotator.cc
       selection_vector.cc
       tree_expr_builder.cc
       to_date_holder.cc
-      ${SHARED_HELPER_FILES}
       ${BC_FILE_PATH_CC})
 
 set(GANDIVA_SHARED_PRIVATE_LINK_LIBS
   arrow_shared
-  ${BOOST_REGEX_LIBRARY}
-  ${BOOST_SYSTEM_LIBRARY}
-  ${BOOST_FILESYSTEM_LIBRARY}
   LLVM::LLVM_INTERFACE
   ${RE2_LIBRARY})
 
@@ -82,81 +95,115 @@ if (ARROW_GANDIVA_STATIC_LIBSTDCPP
     -static-libgcc)
 endif()
 
+# if (MSVC)
+#   # Symbols that need to be made public in gandiva.dll for LLVM IR
+#   # compilation
+#   set(MSVC_SYMBOL_EXPORTS _Init_thread_header)
+#   foreach(SYMBOL ${MSVC_SYMBOL_EXPORTS})
+#     set(GANDIVA_SHARED_LINK_FLAGS "${GANDIVA_SHARED_LINK_FLAGS} /EXPORT:${SYMBOL}")
+#   endforeach()
+# endif()
+
 ADD_ARROW_LIB(gandiva
   SOURCES ${SRC_FILES}
   OUTPUTS GANDIVA_LIBRARIES
   DEPENDENCIES arrow_dependencies precompiled
   EXTRA_INCLUDES
   $<TARGET_PROPERTY:LLVM::LLVM_INTERFACE,INTERFACE_INCLUDE_DIRECTORIES>
+  SHARED_LINK_FLAGS ${GANDIVA_SHARED_LINK_FLAGS}
   SHARED_LINK_LIBS arrow_shared
   SHARED_PRIVATE_LINK_LIBS ${GANDIVA_SHARED_PRIVATE_LINK_LIBS}
   STATIC_LINK_LIBS ${GANDIVA_STATIC_LINK_LIBS})
 
+foreach(LIB_TARGET ${GANDIVA_LIBRARIES})
+  target_compile_definitions(${LIB_TARGET}
+    PRIVATE GANDIVA_EXPORTING)
+endforeach()
+
+if (ARROW_BUILD_STATIC AND WIN32)
+  target_compile_definitions(gandiva_static PUBLIC GANDIVA_STATIC)
+endif()
+
+add_dependencies(gandiva ${GANDIVA_LIBRARIES})
+
 # install for gandiva
 include(GNUInstallDirs)
 
-# install libgandiva
-install(
-  TARGETS gandiva_shared gandiva_static
-  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-)
-
 # install the header files.
-install(FILES
-  arrow.h
-  condition.h
-  configuration.h
-  expression.h
-  expression_registry.h
-  filter.h
-  function_signature.h
-  gandiva_aliases.h
-  logging.h
-  projector.h
-  selection_vector.h
-  tree_expr_builder.h
-  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/gandiva")
+ARROW_INSTALL_ALL_HEADERS("gandiva")
 
 # pkg-config support
-configure_file(gandiva.pc.in
-  "${CMAKE_CURRENT_BINARY_DIR}/gandiva.pc"
-  @ONLY)
-install(
-  FILES "${CMAKE_CURRENT_BINARY_DIR}/gandiva.pc"
-  DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/")
-
-if (ARROW_GANDIVA_BUILD_TESTS)
-  #args: label test-file src-files
-  add_gandiva_unit_test(bitmap_accumulator_test.cc bitmap_accumulator.cc)
-  add_gandiva_unit_test(engine_llvm_test.cc engine.cc llvm_types.cc configuration.cc
-      gdv_function_stubs.cc context_helper.cc to_date_holder.cc date_utils.cc
-      exported_funcs_registry.cc ${BC_FILE_PATH_CC})
-  add_gandiva_unit_test(function_signature_test.cc function_signature.cc)
-  add_gandiva_unit_test(function_registry_test.cc function_registry.cc function_signature.cc)
-  add_gandiva_unit_test(llvm_types_test.cc llvm_types.cc)
-  add_gandiva_unit_test(llvm_generator_test.cc llvm_generator.cc regex_util.cc engine.cc
-      llvm_types.cc expr_decomposer.cc function_registry.cc annotator.cc
-      bitmap_accumulator.cc configuration.cc  function_signature.cc like_holder.cc
-      to_date_holder.cc date_utils.cc regex_util.cc gdv_function_stubs.cc context_helper.cc
-      exported_funcs_registry.cc ${BC_FILE_PATH_CC})
-  add_gandiva_unit_test(annotator_test.cc annotator.cc function_signature.cc)
-  add_gandiva_unit_test(tree_expr_test.cc tree_expr_builder.cc expr_decomposer.cc annotator.cc function_registry.cc function_signature.cc like_holder.cc regex_util.cc to_date_holder.cc date_utils.cc)
-  add_gandiva_unit_test(expr_decomposer_test.cc expr_decomposer.cc tree_expr_builder.cc annotator.cc function_registry.cc function_signature.cc like_holder.cc regex_util.cc to_date_holder.cc date_utils.cc)
-  add_gandiva_unit_test(expression_registry_test.cc llvm_types.cc expression_registry.cc function_signature.cc function_registry.cc)
-  add_gandiva_unit_test(selection_vector_test.cc selection_vector.cc)
-  add_gandiva_unit_test(lru_cache_test.cc)
-  add_gandiva_unit_test(to_date_holder_test.cc to_date_holder.cc date_utils.cc)
-  add_gandiva_unit_test(simple_arena_test.cc)
-endif()
+ARROW_ADD_PKG_CONFIG("gandiva")
+
+set(GANDIVA_STATIC_TEST_LINK_LIBS
+  gandiva_static
+  ${RE2_LIBRARY}
+  ${ARROW_TEST_LINK_LIBS})
+
+set(GANDIVA_SHARED_TEST_LINK_LIBS
+  gandiva_shared
+  ${RE2_LIBRARY}
+  ${ARROW_TEST_LINK_LIBS})
+
+function(ADD_GANDIVA_TEST REL_TEST_NAME)
+  set(options USE_STATIC_LINKING)
+  set(one_value_args)
+  set(multi_value_args)
+  cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
+
+  if (NO_TESTS)
+    return()
+  endif()
+
+  set(TEST_ARGUMENTS
+    ENABLED
+    PREFIX "gandiva"
+    LABELS "gandiva-tests"
+    ${ARG_UNPARSED_ARGUMENTS})
+
+  # and uses less disk space, but in some cases we need to force static
+  # linking (see rationale below).
+  if (ARG_USE_STATIC_LINKING)
+    ADD_TEST_CASE(${REL_TEST_NAME}
+      ${TEST_ARGUMENTS}
+      STATIC_LINK_LIBS ${GANDIVA_STATIC_TEST_LINK_LIBS})
+  else()
+    ADD_TEST_CASE(${REL_TEST_NAME}
+      ${TEST_ARGUMENTS}
+      STATIC_LINK_LIBS ${GANDIVA_SHARED_TEST_LINK_LIBS})
+  endif()
+
+  set(TARGET_NAME gandiva-${REL_TEST_NAME})
+
+  if((TARGET ${TARGET_NAME}) AND
+      (${REL_TEST_NAME} MATCHES "llvm" OR
+       ${REL_TEST_NAME} MATCHES "expression_registry"))
+    # If the unit test has llvm in its name, include llvm.
+    add_dependencies(${TARGET_NAME} LLVM::LLVM_INTERFACE)
+    target_link_libraries(${TARGET_NAME} PRIVATE LLVM::LLVM_INTERFACE)
+  endif()
+endfunction()
+
+ADD_GANDIVA_TEST(bitmap_accumulator_test)
+ADD_GANDIVA_TEST(engine_llvm_test)
+ADD_GANDIVA_TEST(function_signature_test)
+ADD_GANDIVA_TEST(function_registry_test)
+ADD_GANDIVA_TEST(llvm_types_test)
+ADD_GANDIVA_TEST(llvm_generator_test)
+ADD_GANDIVA_TEST(annotator_test)
+ADD_GANDIVA_TEST(tree_expr_test)
+ADD_GANDIVA_TEST(expr_decomposer_test)
+ADD_GANDIVA_TEST(expression_registry_test)
+ADD_GANDIVA_TEST(selection_vector_test)
+ADD_GANDIVA_TEST(lru_cache_test)
+ADD_GANDIVA_TEST(to_date_holder_test)
+ADD_GANDIVA_TEST(simple_arena_test)
+ADD_GANDIVA_TEST(like_holder_test)
+ADD_GANDIVA_TEST(decimal_type_util_test)
 
 if (ARROW_GANDIVA_JAVA)
   add_subdirectory(jni)
 endif()
-add_subdirectory(precompiled)
 
-if (ARROW_GANDIVA_BUILD_TESTS)
-  include(CTest)
-  enable_testing()
-
-  add_subdirectory(tests)
-endif()
+add_subdirectory(precompiled)
+add_subdirectory(tests)
diff --git a/cpp/src/gandiva/annotator.h b/cpp/src/gandiva/annotator.h
index 6c2cd05b04efd..c0ddc02463590 100644
--- a/cpp/src/gandiva/annotator.h
+++ b/cpp/src/gandiva/annotator.h
@@ -27,12 +27,13 @@
 #include "gandiva/eval_batch.h"
 #include "gandiva/gandiva_aliases.h"
 #include "gandiva/logging.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
 /// \brief annotate the arrow fields in an expression, and use that
 /// to convert the incoming arrow-format row batch to an EvalBatch.
-class Annotator {
+class GANDIVA_EXPORT Annotator {
  public:
   Annotator() : buffer_count_(0), local_bitmap_count_(0) {}
 
diff --git a/cpp/src/gandiva/arrow.h b/cpp/src/gandiva/arrow.h
index ea283523a56dc..cc2bd9a10294b 100644
--- a/cpp/src/gandiva/arrow.h
+++ b/cpp/src/gandiva/arrow.h
@@ -35,6 +35,9 @@ using ArrayPtr = std::shared_ptr<arrow::Array>;
 using DataTypePtr = std::shared_ptr<arrow::DataType>;
 using DataTypeVector = std::vector<DataTypePtr>;
 
+using Decimal128TypePtr = std::shared_ptr<arrow::Decimal128Type>;
+using Decimal128TypeVector = std::vector<Decimal128TypePtr>;
+
 using FieldPtr = std::shared_ptr<arrow::Field>;
 using FieldVector = std::vector<FieldPtr>;
 
@@ -48,6 +51,14 @@ using ArrayDataVector = std::vector<ArrayDataPtr>;
 using Status = arrow::Status;
 using StatusCode = arrow::StatusCode;
 
+static inline bool is_decimal_128(DataTypePtr type) {
+  if (type->id() == arrow::Type::DECIMAL) {
+    auto decimal_type = arrow::internal::checked_cast<arrow::DecimalType*>(type.get());
+    return decimal_type->byte_width() == 16;
+  } else {
+    return false;
+  }
+}
 }  // namespace gandiva
 
 #endif  // GANDIVA_EXPR_ARROW_H
diff --git a/cpp/src/gandiva/basic_decimal_scalar.h b/cpp/src/gandiva/basic_decimal_scalar.h
new file mode 100644
index 0000000000000..fab82277cc978
--- /dev/null
+++ b/cpp/src/gandiva/basic_decimal_scalar.h
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include "arrow/util/basic_decimal.h"
+
+namespace gandiva {
+
+using arrow::BasicDecimal128;
+
+/// Represents a 128-bit decimal value along with its precision and scale.
+class BasicDecimalScalar128 {
+ public:
+  BasicDecimalScalar128(int64_t high_bits, uint64_t low_bits, int32_t precision,
+                        int32_t scale)
+      : value_(high_bits, low_bits), precision_(precision), scale_(scale) {}
+
+  BasicDecimalScalar128(const BasicDecimal128& value, int32_t precision, int32_t scale)
+      : value_(value), precision_(precision), scale_(scale) {}
+
+  BasicDecimalScalar128(int32_t precision, int32_t scale)
+      : precision_(precision), scale_(scale) {}
+
+  int32_t scale() const { return scale_; }
+
+  int32_t precision() const { return precision_; }
+
+  const BasicDecimal128& value() const { return value_; }
+
+ private:
+  BasicDecimal128 value_;
+  int32_t precision_;
+  int32_t scale_;
+};
+
+inline bool operator==(const BasicDecimalScalar128& left,
+                       const BasicDecimalScalar128& right) {
+  return left.value() == right.value() && left.precision() == right.precision() &&
+         left.scale() == right.scale();
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/bc_file_path.cc.in b/cpp/src/gandiva/bc_file_path.cc.in
index d6b4e342b6714..54e81ca2bfa18 100644
--- a/cpp/src/gandiva/bc_file_path.cc.in
+++ b/cpp/src/gandiva/bc_file_path.cc.in
@@ -18,6 +18,6 @@
 namespace gandiva {
 
 // Path to the byte-code file.
-extern const char kByteCodeFilePath[] = "${GANDIVA_BC_OUTPUT_PATH}";
+extern const char kByteCodeFilePath[] = "${GANDIVA_BC_INSTALL_PATH}";
 
 } // namespace gandiva
diff --git a/cpp/src/gandiva/bitmap_accumulator.h b/cpp/src/gandiva/bitmap_accumulator.h
index 157405d680e5b..15a2044b5fd4d 100644
--- a/cpp/src/gandiva/bitmap_accumulator.h
+++ b/cpp/src/gandiva/bitmap_accumulator.h
@@ -24,12 +24,13 @@
 #include "gandiva/dex.h"
 #include "gandiva/dex_visitor.h"
 #include "gandiva/eval_batch.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
 /// \brief Extract bitmap buffer from either the input/buffer vectors or the
 /// local validity bitmap, and accumultes them to do the final computation.
-class BitMapAccumulator : public DexDefaultVisitor {
+class GANDIVA_EXPORT BitMapAccumulator : public DexDefaultVisitor {
  public:
   explicit BitMapAccumulator(const EvalBatch& eval_batch)
       : eval_batch_(eval_batch), all_invalid_(false) {}
diff --git a/cpp/src/gandiva/bitmap_accumulator_test.cc b/cpp/src/gandiva/bitmap_accumulator_test.cc
index fc89421344e83..51a8b09ec724d 100644
--- a/cpp/src/gandiva/bitmap_accumulator_test.cc
+++ b/cpp/src/gandiva/bitmap_accumulator_test.cc
@@ -21,25 +21,22 @@
 #include <vector>
 
 #include <gtest/gtest.h>
+
+#include "arrow/test-util.h"
+
 #include "gandiva/dex.h"
 
 namespace gandiva {
 
 class TestBitMapAccumulator : public ::testing::Test {
  protected:
-  void FillBitMap(uint8_t* bmap, int nrecords);
+  void FillBitMap(uint8_t* bmap, uint32_t seed, int nrecords);
   void ByteWiseIntersectBitMaps(uint8_t* dst, const std::vector<uint8_t*>& srcs,
                                 int nrecords);
 };
 
-void TestBitMapAccumulator::FillBitMap(uint8_t* bmap, int nrecords) {
-  int nbytes = nrecords / 8;
-  unsigned int cur;
-
-  for (int i = 0; i < nbytes; ++i) {
-    rand_r(&cur);
-    bmap[i] = static_cast<uint8_t>(cur % UINT8_MAX);
-  }
+void TestBitMapAccumulator::FillBitMap(uint8_t* bmap, uint32_t seed, int nbytes) {
+  ::arrow::random_bytes(nbytes, seed, bmap);
 }
 
 void TestBitMapAccumulator::ByteWiseIntersectBitMaps(uint8_t* dst,
@@ -62,7 +59,7 @@ TEST_F(TestBitMapAccumulator, TestIntersectBitMaps) {
   uint8_t expected_bitmap[length];
 
   for (int i = 0; i < 4; i++) {
-    FillBitMap(src_bitmaps[i], nrecords);
+    FillBitMap(src_bitmaps[i], i, length);
   }
 
   for (int i = 0; i < 4; i++) {
diff --git a/cpp/src/gandiva/cast_time.cc b/cpp/src/gandiva/cast_time.cc
new file mode 100644
index 0000000000000..1d4293b199661
--- /dev/null
+++ b/cpp/src/gandiva/cast_time.cc
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+
+#include "arrow/vendored/datetime.h"
+
+#include "gandiva/precompiled/time_fields.h"
+
+#ifndef GANDIVA_UNIT_TEST
+#include "gandiva/exported_funcs.h"
+#include "gandiva/gdv_function_stubs.h"
+
+#include "gandiva/engine.h"
+
+namespace gandiva {
+
+void ExportedTimeFunctions::AddMappings(Engine* engine) const {
+  std::vector<llvm::Type*> args;
+  auto types = engine->types();
+
+  // gdv_fn_time_with_zone
+  args = {types->ptr_type(types->i32_type()),  // time fields
+          types->i8_ptr_type(),                // const char* zone
+          types->i32_type(),                   // int data_len
+          types->i64_type()};                  // timestamp *ret_time
+
+  engine->AddGlobalMappingForFunc("gdv_fn_time_with_zone",
+                                  types->i32_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_time_with_zone));
+}
+
+}  // namespace gandiva
+#endif  // !GANDIVA_UNIT_TEST
+
+extern "C" {
+
+// TODO : Do input validation or make sure the callers do that ?
+int gdv_fn_time_with_zone(int* time_fields, const char* zone, int zone_len,
+                          int64_t* ret_time) {
+  using arrow::util::date::day;
+  using arrow::util::date::local_days;
+  using arrow::util::date::locate_zone;
+  using arrow::util::date::month;
+  using arrow::util::date::time_zone;
+  using arrow::util::date::year;
+  using std::chrono::hours;
+  using std::chrono::milliseconds;
+  using std::chrono::minutes;
+  using std::chrono::seconds;
+
+  using gandiva::TimeFields;
+  try {
+    const time_zone* tz = locate_zone(std::string(zone, zone_len));
+    *ret_time = tz->to_sys(local_days(year(time_fields[TimeFields::kYear]) /
+                                      month(time_fields[TimeFields::kMonth]) /
+                                      day(time_fields[TimeFields::kDay])) +
+                           hours(time_fields[TimeFields::kHours]) +
+                           minutes(time_fields[TimeFields::kMinutes]) +
+                           seconds(time_fields[TimeFields::kSeconds]) +
+                           milliseconds(time_fields[TimeFields::kSubSeconds]))
+                    .time_since_epoch()
+                    .count();
+  } catch (...) {
+    return EINVAL;
+  }
+
+  return 0;
+}
+
+}  // extern "C"
diff --git a/cpp/src/gandiva/compiled_expr.h b/cpp/src/gandiva/compiled_expr.h
index 2f23971f366d3..b7799f18928e0 100644
--- a/cpp/src/gandiva/compiled_expr.h
+++ b/cpp/src/gandiva/compiled_expr.h
@@ -18,7 +18,7 @@
 #ifndef GANDIVA_COMPILED_EXPR_H
 #define GANDIVA_COMPILED_EXPR_H
 
-#include <llvm/IR/IRBuilder.h>
+#include "gandiva/llvm_includes.h"
 #include "gandiva/value_validity_pair.h"
 
 namespace gandiva {
diff --git a/cpp/src/gandiva/configuration.h b/cpp/src/gandiva/configuration.h
index 04e2eed287e13..480a95e9274ee 100644
--- a/cpp/src/gandiva/configuration.h
+++ b/cpp/src/gandiva/configuration.h
@@ -15,16 +15,18 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef GANDIVA_CONFIGURATION_H
-#define GANDIVA_CONFIGURATION_H
+#pragma once
 
 #include <memory>
 #include <string>
 
 #include "arrow/status.h"
 
+#include "gandiva/visibility.h"
+
 namespace gandiva {
 
+GANDIVA_EXPORT
 extern const char kByteCodeFilePath[];
 
 class ConfigurationBuilder;
@@ -32,7 +34,7 @@ class ConfigurationBuilder;
 ///
 /// It contains elements to customize gandiva execution
 /// at run time.
-class Configuration {
+class GANDIVA_EXPORT Configuration {
  public:
   friend class ConfigurationBuilder;
 
@@ -53,7 +55,7 @@ class Configuration {
 ///
 /// Provides a default configuration and convenience methods
 /// to override specific values and build a custom instance
-class ConfigurationBuilder {
+class GANDIVA_EXPORT ConfigurationBuilder {
  public:
   ConfigurationBuilder() : byte_code_file_path_(kByteCodeFilePath) {}
 
@@ -83,4 +85,3 @@ class ConfigurationBuilder {
 };
 
 }  // namespace gandiva
-#endif  // GANDIVA_CONFIGURATION_H
diff --git a/cpp/src/gandiva/date_utils.cc b/cpp/src/gandiva/date_utils.cc
index 2686b193500ff..f0a80d3c95921 100644
--- a/cpp/src/gandiva/date_utils.cc
+++ b/cpp/src/gandiva/date_utils.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <algorithm>
+#include <cstdint>
 #include <memory>
 #include <sstream>
 #include <vector>
@@ -57,7 +58,7 @@ Status DateUtils::ToInternalFormat(const std::string& format,
   std::stringstream buffer;
   bool is_in_quoted_text = false;
 
-  for (uint i = 0; i < format.length(); i++) {
+  for (size_t i = 0; i < format.size(); i++) {
     char currentChar = format[i];
 
     // logic before we append to the buffer
@@ -75,11 +76,8 @@ Status DateUtils::ToInternalFormat(const std::string& format,
         buffer.str("");
         continue;
       } else {
-        if (buffer.str().length() > 0) {
-          std::stringstream err_msg;
-          err_msg << "Invalid date format string '" << format << "' at position " << i;
-          return Status::Invalid(err_msg.str());
-        }
+        ARROW_RETURN_IF(buffer.str().length() > 0,
+                        Status::Invalid("Invalid date format string '", format, "'"));
 
         is_in_quoted_text = true;
         continue;
@@ -156,10 +154,7 @@ Status DateUtils::ToInternalFormat(const std::string& format,
         }
       }
     } else {
-      // no potential matches found
-      std::stringstream err_msg;
-      err_msg << "Invalid date format string '" << format << "' at position " << i;
-      return Status::Invalid(err_msg.str());
+      return Status::Invalid("Invalid date format string '", format, "'");
     }
   }
 
@@ -170,11 +165,10 @@ Status DateUtils::ToInternalFormat(const std::string& format,
     if (exactMatches.size() == 1 && exactMatches[0].length() == buffer.str().length()) {
       builder << sql_date_format_to_boost_map_[exactMatches[0]];
     } else {
-      // we didn't successfully parse the entire string
+      // Format partially parsed
       int64_t pos = format.length() - buffer.str().length();
-      std::stringstream err_msg;
-      err_msg << "Invalid date format string '" << format << "' at position " << pos;
-      return Status::Invalid(err_msg.str());
+      return Status::Invalid("Invalid date format string '", format, "' at position ",
+                             pos);
     }
   }
   std::string final_pattern = builder.str();
diff --git a/cpp/src/gandiva/date_utils.h b/cpp/src/gandiva/date_utils.h
index 64a150b6ba72d..e87203bd017ee 100644
--- a/cpp/src/gandiva/date_utils.h
+++ b/cpp/src/gandiva/date_utils.h
@@ -23,12 +23,22 @@
 #include <unordered_map>
 #include <vector>
 
+#if defined(_MSC_VER)
+#include <ctime>
+#include <iomanip>
+#include <sstream>
+#endif
+
+#include "arrow/util/macros.h"
+#include "arrow/vendored/datetime.h"
+
 #include "gandiva/arrow.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
 /// \brief Utility class for converting sql date patterns to internal date patterns.
-class DateUtils {
+class GANDIVA_EXPORT DateUtils {
  public:
   static Status ToInternalFormat(const std::string& format,
                                  std::shared_ptr<std::string>* internal_format);
@@ -47,6 +57,55 @@ class DateUtils {
   static std::vector<std::string> GetExactMatches(const std::string& pattern);
 };
 
+namespace internal {
+
+/// \brief Returns seconds since the UNIX epoch
+static inline bool ParseTimestamp(const char* buf, const char* format,
+                                  bool ignoreTimeInDay, int64_t* out) {
+#if defined(_MSC_VER)
+  static std::locale lc_all(setlocale(LC_ALL, NULLPTR));
+  std::istringstream stream(buf);
+  stream.imbue(lc_all);
+
+  // TODO: date::parse fails parsing when the hour value is 0.
+  // eg.1886-12-01 00:00:00
+  arrow::util::date::sys_seconds seconds;
+  if (ignoreTimeInDay) {
+    arrow::util::date::sys_days days;
+    stream >> arrow::util::date::parse(format, days);
+    if (stream.fail()) {
+      return false;
+    }
+    seconds = days;
+  } else {
+    stream >> arrow::util::date::parse(format, seconds);
+    if (stream.fail()) {
+      return false;
+    }
+  }
+  auto seconds_in_epoch = seconds.time_since_epoch().count();
+  *out = seconds_in_epoch;
+  return true;
+#else
+  struct tm result;
+  char* ret = strptime(buf, format, &result);
+  if (ret == NULLPTR) {
+    return false;
+  }
+  // ignore the time part
+  arrow::util::date::sys_seconds secs =
+      arrow::util::date::sys_days(arrow::util::date::year(result.tm_year + 1900) /
+                                  (result.tm_mon + 1) / result.tm_mday);
+  if (!ignoreTimeInDay) {
+    secs += (std::chrono::hours(result.tm_hour) + std::chrono::minutes(result.tm_min) +
+             std::chrono::seconds(result.tm_sec));
+  }
+  *out = secs.time_since_epoch().count();
+  return true;
+#endif
+}
+
+}  // namespace internal
 }  // namespace gandiva
 
 #endif  // TO_DATE_HELPER_H
diff --git a/cpp/src/gandiva/decimal_ir.cc b/cpp/src/gandiva/decimal_ir.cc
new file mode 100644
index 0000000000000..d10158a6f0487
--- /dev/null
+++ b/cpp/src/gandiva/decimal_ir.cc
@@ -0,0 +1,401 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <sstream>
+#include <utility>
+
+#include "arrow/status.h"
+#include "gandiva/decimal_ir.h"
+#include "gandiva/decimal_type_util.h"
+
+// Algorithms adapted from Apache Impala
+
+namespace gandiva {
+
+#define ADD_TRACE_32(msg, value) \
+  if (enable_ir_traces_) {       \
+    AddTrace32(msg, value);      \
+  }
+#define ADD_TRACE_128(msg, value) \
+  if (enable_ir_traces_) {        \
+    AddTrace128(msg, value);      \
+  }
+
+const char* DecimalIR::kScaleMultipliersName = "gandivaScaleMultipliers";
+
+/// Populate globals required by decimal IR.
+/// TODO: can this be done just once ?
+void DecimalIR::AddGlobals(Engine* engine) {
+  auto types = engine->types();
+
+  // populate vector : [ 1, 10, 100, 1000, ..]
+  std::string value = "1";
+  std::vector<llvm::Constant*> scale_multipliers;
+  for (int i = 0; i < DecimalTypeUtil::kMaxPrecision + 1; ++i) {
+    auto multiplier =
+        llvm::ConstantInt::get(llvm::Type::getInt128Ty(*engine->context()), value, 10);
+    scale_multipliers.push_back(multiplier);
+    value.append("0");
+  }
+
+  auto array_type =
+      llvm::ArrayType::get(types->i128_type(), DecimalTypeUtil::kMaxPrecision + 1);
+  auto initializer = llvm::ConstantArray::get(
+      array_type, llvm::ArrayRef<llvm::Constant*>(scale_multipliers));
+
+  auto globalScaleMultipliers = new llvm::GlobalVariable(
+      *engine->module(), array_type, true /*constant*/,
+      llvm::GlobalValue::LinkOnceAnyLinkage, initializer, kScaleMultipliersName);
+  globalScaleMultipliers->setAlignment(16);
+}
+
+// Lookup intrinsic functions
+void DecimalIR::InitializeIntrinsics() {
+  sadd_with_overflow_fn_ = llvm::Intrinsic::getDeclaration(
+      module(), llvm::Intrinsic::sadd_with_overflow, types()->i128_type());
+  DCHECK_NE(sadd_with_overflow_fn_, nullptr);
+
+  smul_with_overflow_fn_ = llvm::Intrinsic::getDeclaration(
+      module(), llvm::Intrinsic::smul_with_overflow, types()->i128_type());
+  DCHECK_NE(smul_with_overflow_fn_, nullptr);
+
+  i128_with_overflow_struct_type_ =
+      sadd_with_overflow_fn_->getFunctionType()->getReturnType();
+}
+
+// CPP:  return kScaleMultipliers[scale]
+llvm::Value* DecimalIR::GetScaleMultiplier(llvm::Value* scale) {
+  auto const_array = module()->getGlobalVariable(kScaleMultipliersName);
+  auto ptr = ir_builder()->CreateGEP(const_array, {types()->i32_constant(0), scale});
+  return ir_builder()->CreateLoad(ptr);
+}
+
+// CPP:  x <= y ? y : x
+llvm::Value* DecimalIR::GetHigherScale(llvm::Value* x_scale, llvm::Value* y_scale) {
+  llvm::Value* le = ir_builder()->CreateICmpSLE(x_scale, y_scale);
+  return ir_builder()->CreateSelect(le, y_scale, x_scale);
+}
+
+// CPP: return (increase_scale_by <= 0)  ?
+//              in_value : in_value * GetScaleMultiplier(increase_scale_by)
+llvm::Value* DecimalIR::IncreaseScale(llvm::Value* in_value,
+                                      llvm::Value* increase_scale_by) {
+  llvm::Value* le_zero =
+      ir_builder()->CreateICmpSLE(increase_scale_by, types()->i32_constant(0));
+  // then block
+  auto then_lambda = [&] { return in_value; };
+
+  // else block
+  auto else_lambda = [&] {
+    llvm::Value* multiplier = GetScaleMultiplier(increase_scale_by);
+    return ir_builder()->CreateMul(in_value, multiplier);
+  };
+
+  return BuildIfElse(le_zero, types()->i128_type(), then_lambda, else_lambda);
+}
+
+// CPP: return (increase_scale_by <= 0)  ?
+//              {in_value,false} : {in_value * GetScaleMultiplier(increase_scale_by),true}
+//
+// The return value also indicates if there was an overflow while increasing the scale.
+DecimalIR::ValueWithOverflow DecimalIR::IncreaseScaleWithOverflowCheck(
+    llvm::Value* in_value, llvm::Value* increase_scale_by) {
+  llvm::Value* le_zero =
+      ir_builder()->CreateICmpSLE(increase_scale_by, types()->i32_constant(0));
+
+  // then block
+  auto then_lambda = [&] {
+    ValueWithOverflow ret{in_value, types()->false_constant()};
+    return ret.AsStruct(this);
+  };
+
+  // else block
+  auto else_lambda = [&] {
+    llvm::Value* multiplier = GetScaleMultiplier(increase_scale_by);
+    return ir_builder()->CreateCall(smul_with_overflow_fn_, {in_value, multiplier});
+  };
+
+  auto ir_struct =
+      BuildIfElse(le_zero, i128_with_overflow_struct_type_, then_lambda, else_lambda);
+  return ValueWithOverflow::MakeFromStruct(this, ir_struct);
+}
+
+// CPP: return (reduce_scale_by <= 0)  ?
+//              in_value : in_value / GetScaleMultiplier(reduce_scale_by)
+//
+// ReduceScale cannot cause an overflow.
+llvm::Value* DecimalIR::ReduceScale(llvm::Value* in_value, llvm::Value* reduce_scale_by) {
+  auto le_zero = ir_builder()->CreateICmpSLE(reduce_scale_by, types()->i32_constant(0));
+  // then block
+  auto then_lambda = [&] { return in_value; };
+
+  // else block
+  auto else_lambda = [&] {
+    // TODO : handle rounding.
+    llvm::Value* multiplier = GetScaleMultiplier(reduce_scale_by);
+    return ir_builder()->CreateSDiv(in_value, multiplier);
+  };
+
+  return BuildIfElse(le_zero, types()->i128_type(), then_lambda, else_lambda);
+}
+
+/// @brief Fast-path for add
+/// Adjust x and y to the same scale, and add them.
+llvm::Value* DecimalIR::AddFastPath(const ValueFull& x, const ValueFull& y) {
+  auto higher_scale = GetHigherScale(x.scale(), y.scale());
+  ADD_TRACE_32("AddFastPath : higher_scale", higher_scale);
+
+  // CPP : x_scaled = IncreaseScale(x_value, higher_scale - x_scale)
+  auto x_delta = ir_builder()->CreateSub(higher_scale, x.scale());
+  auto x_scaled = IncreaseScale(x.value(), x_delta);
+  ADD_TRACE_128("AddFastPath : x_scaled", x_scaled);
+
+  // CPP : y_scaled = IncreaseScale(y_value, higher_scale - y_scale)
+  auto y_delta = ir_builder()->CreateSub(higher_scale, y.scale());
+  auto y_scaled = IncreaseScale(y.value(), y_delta);
+  ADD_TRACE_128("AddFastPath : y_scaled", y_scaled);
+
+  auto sum = ir_builder()->CreateAdd(x_scaled, y_scaled);
+  ADD_TRACE_128("AddFastPath : sum", sum);
+  return sum;
+}
+
+// @brief Add with overflow check.
+/// Adjust x and y to the same scale, add them, and reduce sum to output scale.
+/// If there is an overflow, the sum is set to 0.
+DecimalIR::ValueWithOverflow DecimalIR::AddWithOverflowCheck(const ValueFull& x,
+                                                             const ValueFull& y,
+                                                             const ValueFull& out) {
+  auto higher_scale = GetHigherScale(x.scale(), y.scale());
+  ADD_TRACE_32("AddWithOverflowCheck : higher_scale", higher_scale);
+
+  // CPP : x_scaled = IncreaseScale(x_value, higher_scale - x.scale())
+  auto x_delta = ir_builder()->CreateSub(higher_scale, x.scale());
+  auto x_scaled = IncreaseScaleWithOverflowCheck(x.value(), x_delta);
+  ADD_TRACE_128("AddWithOverflowCheck : x_scaled", x_scaled.value());
+
+  // CPP : y_scaled = IncreaseScale(y_value, higher_scale - y_scale)
+  auto y_delta = ir_builder()->CreateSub(higher_scale, y.scale());
+  auto y_scaled = IncreaseScaleWithOverflowCheck(y.value(), y_delta);
+  ADD_TRACE_128("AddWithOverflowCheck : y_scaled", y_scaled.value());
+
+  // CPP : sum = x_scaled + y_scaled
+  auto sum_ir_struct = ir_builder()->CreateCall(sadd_with_overflow_fn_,
+                                                {x_scaled.value(), y_scaled.value()});
+  auto sum = ValueWithOverflow::MakeFromStruct(this, sum_ir_struct);
+  ADD_TRACE_128("AddWithOverflowCheck : sum", sum.value());
+
+  // CPP : overflow ? 0 : sum / GetScaleMultiplier(max_scale - out_scale)
+  auto overflow = GetCombinedOverflow({x_scaled, y_scaled, sum});
+  ADD_TRACE_32("AddWithOverflowCheck : overflow", overflow);
+  auto then_lambda = [&] {
+    // if there is an overflow, the value returned won't be used. so, save the division.
+    return types()->i128_constant(0);
+  };
+  auto else_lambda = [&] {
+    auto reduce_scale_by = ir_builder()->CreateSub(higher_scale, out.scale());
+    return ReduceScale(sum.value(), reduce_scale_by);
+  };
+  auto sum_descaled =
+      BuildIfElse(overflow, types()->i128_type(), then_lambda, else_lambda);
+  return ValueWithOverflow(sum_descaled, overflow);
+}
+
+// This is pretty complex, so use CPP fns.
+llvm::Value* DecimalIR::AddLarge(const ValueFull& x, const ValueFull& y,
+                                 const ValueFull& out) {
+  auto block = ir_builder()->GetInsertBlock();
+  auto out_high_ptr = new llvm::AllocaInst(types()->i64_type(), 0, "out_hi", block);
+  auto out_low_ptr = new llvm::AllocaInst(types()->i64_type(), 0, "out_low", block);
+  auto x_split = ValueSplit::MakeFromInt128(this, x.value());
+  auto y_split = ValueSplit::MakeFromInt128(this, y.value());
+
+  std::vector<llvm::Value*> args = {
+      x_split.high(),  x_split.low(), x.precision(), x.scale(),
+      y_split.high(),  y_split.low(), y.precision(), y.scale(),
+      out.precision(), out.scale(),   out_high_ptr,  out_low_ptr,
+  };
+  ir_builder()->CreateCall(module()->getFunction("add_large_decimal128_decimal128"),
+                           args);
+
+  auto out_high = ir_builder()->CreateLoad(out_high_ptr);
+  auto out_low = ir_builder()->CreateLoad(out_low_ptr);
+  auto sum = ValueSplit(out_high, out_low).AsInt128(this);
+  ADD_TRACE_128("AddLarge : sum", sum);
+  return sum;
+}
+
+/// The output scale/precision cannot be arbitary values. The algo here depends on them
+/// to be the same as computed in DecimalTypeSql.
+/// TODO: enforce this.
+Status DecimalIR::BuildAdd() {
+  // Create fn prototype :
+  // int128_t
+  // add_decimal128_decimal128(int128_t x_value, int32_t x_precision, int32_t x_scale,
+  //                           int128_t y_value, int32_t y_precision, int32_t y_scale
+  //                           int32_t out_precision, int32_t out_scale)
+  auto i32 = types()->i32_type();
+  auto i128 = types()->i128_type();
+  auto function = BuildFunction("add_decimal128_decimal128", i128,
+                                {
+                                    {"x_value", i128},
+                                    {"x_precision", i32},
+                                    {"x_scale", i32},
+                                    {"y_value", i128},
+                                    {"y_precision", i32},
+                                    {"y_scale", i32},
+                                    {"out_precision", i32},
+                                    {"out_scale", i32},
+                                });
+
+  auto arg_iter = function->arg_begin();
+  ValueFull x(&arg_iter[0], &arg_iter[1], &arg_iter[2]);
+  ValueFull y(&arg_iter[3], &arg_iter[4], &arg_iter[5]);
+  ValueFull out(nullptr, &arg_iter[6], &arg_iter[7]);
+
+  auto entry = llvm::BasicBlock::Create(*context(), "entry", function);
+  ir_builder()->SetInsertPoint(entry);
+
+  // CPP :
+  // if (out_precision < 38) {
+  //   return AddFastPath(x, y)
+  // } else {
+  //   ret = AddWithOverflowCheck(x, y)
+  //   if (ret.overflow)
+  //     return AddLarge(x, y)
+  //   else
+  //     return ret.value;
+  // }
+  llvm::Value* lt_max_precision = ir_builder()->CreateICmpSLT(
+      out.precision(), types()->i32_constant(DecimalTypeUtil::kMaxPrecision));
+  auto then_lambda = [&] {
+    // fast-path add
+    return AddFastPath(x, y);
+  };
+  auto else_lambda = [&] {
+    if (kUseOverflowIntrinsics) {
+      // do the add and check if there was overflow
+      auto ret = AddWithOverflowCheck(x, y, out);
+
+      // if there is an overflow, switch to the AddLarge codepath.
+      return BuildIfElse(ret.overflow(), types()->i128_type(),
+                         [&] { return AddLarge(x, y, out); },
+                         [&] { return ret.value(); });
+    } else {
+      return AddLarge(x, y, out);
+    }
+  };
+  auto value =
+      BuildIfElse(lt_max_precision, types()->i128_type(), then_lambda, else_lambda);
+
+  // store result to out
+  ir_builder()->CreateRet(value);
+  return Status::OK();
+}
+
+Status DecimalIR::AddFunctions(Engine* engine) {
+  auto decimal_ir = std::make_shared<DecimalIR>(engine);
+
+  // Populate global variables used by decimal operations.
+  decimal_ir->AddGlobals(engine);
+
+  // Lookup intrinsic functions
+  decimal_ir->InitializeIntrinsics();
+
+  // build "add"
+  return decimal_ir->BuildAdd();
+}
+
+// Do an bitwise-or of all the overflow bits.
+llvm::Value* DecimalIR::GetCombinedOverflow(
+    std::vector<DecimalIR::ValueWithOverflow> vec) {
+  llvm::Value* res = types()->false_constant();
+  for (auto& val : vec) {
+    res = ir_builder()->CreateOr(res, val.overflow());
+  }
+  return res;
+}
+
+DecimalIR::ValueSplit DecimalIR::ValueSplit::MakeFromInt128(DecimalIR* decimal_ir,
+                                                            llvm::Value* in) {
+  auto builder = decimal_ir->ir_builder();
+  auto types = decimal_ir->types();
+
+  auto high = builder->CreateLShr(in, types->i128_constant(64));
+  high = builder->CreateTrunc(high, types->i64_type());
+  auto low = builder->CreateTrunc(in, types->i64_type());
+  return ValueSplit(high, low);
+}
+
+/// Convert IR struct {%i64, %i64} to cpp class ValueSplit
+DecimalIR::ValueSplit DecimalIR::ValueSplit::MakeFromStruct(DecimalIR* decimal_ir,
+                                                            llvm::Value* dstruct) {
+  auto builder = decimal_ir->ir_builder();
+  auto high = builder->CreateExtractValue(dstruct, 0);
+  auto low = builder->CreateExtractValue(dstruct, 1);
+  return DecimalIR::ValueSplit(high, low);
+}
+
+llvm::Value* DecimalIR::ValueSplit::AsInt128(DecimalIR* decimal_ir) const {
+  auto builder = decimal_ir->ir_builder();
+  auto types = decimal_ir->types();
+
+  auto value = builder->CreateSExt(high_, types->i128_type());
+  value = builder->CreateShl(value, types->i128_constant(64));
+  value = builder->CreateAdd(value, builder->CreateZExt(low_, types->i128_type()));
+  return value;
+}
+
+/// Convert IR struct {%i128, %i1} to cpp class ValueWithOverflow
+DecimalIR::ValueWithOverflow DecimalIR::ValueWithOverflow::MakeFromStruct(
+    DecimalIR* decimal_ir, llvm::Value* dstruct) {
+  auto builder = decimal_ir->ir_builder();
+  auto value = builder->CreateExtractValue(dstruct, 0);
+  auto overflow = builder->CreateExtractValue(dstruct, 1);
+  return DecimalIR::ValueWithOverflow(value, overflow);
+}
+
+/// Convert to IR struct {%i128, %i1}
+llvm::Value* DecimalIR::ValueWithOverflow::AsStruct(DecimalIR* decimal_ir) const {
+  auto builder = decimal_ir->ir_builder();
+
+  auto undef = llvm::UndefValue::get(decimal_ir->i128_with_overflow_struct_type_);
+  auto struct_val = builder->CreateInsertValue(undef, value(), 0);
+  return builder->CreateInsertValue(struct_val, overflow(), 1);
+}
+
+/// debug traces
+void DecimalIR::AddTrace(const std::string& fmt, std::vector<llvm::Value*> args) {
+  DCHECK(enable_ir_traces_);
+
+  auto ir_str = ir_builder()->CreateGlobalStringPtr(fmt);
+  args.insert(args.begin(), ir_str);
+  ir_builder()->CreateCall(module()->getFunction("printf"), args, "trace");
+}
+
+void DecimalIR::AddTrace32(const std::string& msg, llvm::Value* value) {
+  AddTrace("DECIMAL_IR_TRACE:: " + msg + " %d\n", {value});
+}
+
+void DecimalIR::AddTrace128(const std::string& msg, llvm::Value* value) {
+  // convert i128 into two i64s for printing
+  auto split = ValueSplit::MakeFromInt128(this, value);
+  AddTrace("DECIMAL_IR_TRACE:: " + msg + " %llx:%llx (%lld:%llu)\n",
+           {split.high(), split.low(), split.high(), split.low()});
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/decimal_ir.h b/cpp/src/gandiva/decimal_ir.h
new file mode 100644
index 0000000000000..fae762c362d94
--- /dev/null
+++ b/cpp/src/gandiva/decimal_ir.h
@@ -0,0 +1,171 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef GANDIVA_DECIMAL_ADD_IR_BUILDER_H
+#define GANDIVA_DECIMAL_ADD_IR_BUILDER_H
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gandiva/function_ir_builder.h"
+
+namespace gandiva {
+
+/// @brief Decimal IR functions
+class DecimalIR : public FunctionIRBuilder {
+ public:
+  explicit DecimalIR(Engine* engine)
+      : FunctionIRBuilder(engine), enable_ir_traces_(false) {}
+
+  /// Build decimal IR functions and add them to the engine.
+  static Status AddFunctions(Engine* engine);
+
+  void EnableTraces() { enable_ir_traces_ = true; }
+
+ private:
+  /// The intrinsic fn for divide with small divisors is about 10x slower, so not
+  /// using these.
+  static const bool kUseOverflowIntrinsics = false;
+
+  // Holder for an i128 value, along with its with scale and precision.
+  class ValueFull {
+   public:
+    ValueFull(llvm::Value* value, llvm::Value* precision, llvm::Value* scale)
+        : value_(value), precision_(precision), scale_(scale) {}
+
+    llvm::Value* value() const { return value_; }
+    llvm::Value* precision() const { return precision_; }
+    llvm::Value* scale() const { return scale_; }
+
+   private:
+    llvm::Value* value_;
+    llvm::Value* precision_;
+    llvm::Value* scale_;
+  };
+
+  // Holder for an i128 value, and a boolean indicating overflow.
+  class ValueWithOverflow {
+   public:
+    ValueWithOverflow(llvm::Value* value, llvm::Value* overflow)
+        : value_(value), overflow_(overflow) {}
+
+    // Make from IR struct
+    static ValueWithOverflow MakeFromStruct(DecimalIR* decimal_ir, llvm::Value* dstruct);
+
+    // Build a corresponding IR struct
+    llvm::Value* AsStruct(DecimalIR* decimal_ir) const;
+
+    llvm::Value* value() const { return value_; }
+    llvm::Value* overflow() const { return overflow_; }
+
+   private:
+    llvm::Value* value_;
+    llvm::Value* overflow_;
+  };
+
+  // Holder for an i128 value that is split into two i64s
+  class ValueSplit {
+   public:
+    ValueSplit(llvm::Value* high, llvm::Value* low) : high_(high), low_(low) {}
+
+    // Make from i128 value
+    static ValueSplit MakeFromInt128(DecimalIR* decimal_ir, llvm::Value* in);
+
+    // Make from IR struct
+    static ValueSplit MakeFromStruct(DecimalIR* decimal_ir, llvm::Value* dstruct);
+
+    // Combine the two parts into an i128
+    llvm::Value* AsInt128(DecimalIR* decimal_ir) const;
+
+    llvm::Value* high() const { return high_; }
+    llvm::Value* low() const { return low_; }
+
+   private:
+    llvm::Value* high_;
+    llvm::Value* low_;
+  };
+
+  // Add global variables to the module.
+  static void AddGlobals(Engine* engine);
+
+  // Initialize intrinsic functions that are used by decimal operations.
+  void InitializeIntrinsics();
+
+  // Create IR builder for decimal add function.
+  static Status MakeAdd(Engine* engine, std::shared_ptr<FunctionIRBuilder>* out);
+
+  // Get the multiplier for specified scale (i.e 10^scale)
+  llvm::Value* GetScaleMultiplier(llvm::Value* scale);
+
+  // Get the higher of the two scales
+  llvm::Value* GetHigherScale(llvm::Value* x_scale, llvm::Value* y_scale);
+
+  // Increase scale of 'in_value' by 'increase_scale_by'.
+  // - If 'increase_scale_by' is <= 0, does nothing.
+  llvm::Value* IncreaseScale(llvm::Value* in_value, llvm::Value* increase_scale_by);
+
+  // Similar to IncreaseScale. but, also check if there is overflow.
+  ValueWithOverflow IncreaseScaleWithOverflowCheck(llvm::Value* in_value,
+                                                   llvm::Value* increase_scale_by);
+
+  // Reduce scale of 'in_value' by 'reduce_scale_by'.
+  // - If 'reduce_scale_by' is <= 0, does nothing.
+  llvm::Value* ReduceScale(llvm::Value* in_value, llvm::Value* reduce_scale_by);
+
+  // Fast path of add: guaranteed no overflow
+  llvm::Value* AddFastPath(const ValueFull& x, const ValueFull& y);
+
+  // Similar to AddFastPath, but check if there's an overflow.
+  ValueWithOverflow AddWithOverflowCheck(const ValueFull& x, const ValueFull& y,
+                                         const ValueFull& out);
+
+  // Do addition of large integers (both positive and negative).
+  llvm::Value* AddLarge(const ValueFull& x, const ValueFull& y, const ValueFull& out);
+
+  // Get the combined overflow (logical or).
+  llvm::Value* GetCombinedOverflow(std::vector<ValueWithOverflow> values);
+
+  // Build the function for adding decimals.
+  Status BuildAdd();
+
+  // Add a trace in IR code.
+  void AddTrace(const std::string& fmt, std::vector<llvm::Value*> args);
+
+  // Add a trace msg along with a 32-bit integer.
+  void AddTrace32(const std::string& msg, llvm::Value* value);
+
+  // Add a trace msg along with a 128-bit integer.
+  void AddTrace128(const std::string& msg, llvm::Value* value);
+
+  // name of the global variable having the array of scale multipliers.
+  static const char* kScaleMultipliersName;
+
+  // Intrinsic functions
+  llvm::Function* sadd_with_overflow_fn_;
+  llvm::Function* smul_with_overflow_fn_;
+
+  // struct { i128: value, i1: overflow}
+  llvm::Type* i128_with_overflow_struct_type_;
+
+  // if set to true, ir traces are enabled. Useful for debugging.
+  bool enable_ir_traces_;
+};
+
+}  // namespace gandiva
+
+#endif  // GANDIVA_FUNCTION_IR_BUILDER_H
diff --git a/cpp/src/gandiva/decimal_scalar.h b/cpp/src/gandiva/decimal_scalar.h
new file mode 100644
index 0000000000000..5b38770da632a
--- /dev/null
+++ b/cpp/src/gandiva/decimal_scalar.h
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License
+
+#pragma once
+
+#include <cstdint>
+#include <iostream>
+#include <string>
+#include "arrow/util/decimal.h"
+#include "gandiva/basic_decimal_scalar.h"
+
+namespace gandiva {
+
+using Decimal128 = arrow::Decimal128;
+
+/// Represents a 128-bit decimal value along with its precision and scale.
+///
+/// BasicDecimalScalar128 can be safely compiled to IR without references to libstdc++.
+/// This class has additional functionality on top of BasicDecimalScalar128 to deal with
+/// strings and streams.
+class DecimalScalar128 : public BasicDecimalScalar128 {
+ public:
+  using BasicDecimalScalar128::BasicDecimalScalar128;
+
+  DecimalScalar128(const std::string& value, int32_t precision, int32_t scale)
+      : BasicDecimalScalar128(Decimal128(value), precision, scale) {}
+
+  inline std::string ToString() const {
+    Decimal128 dvalue(value());
+    return dvalue.ToString(0) + "," + std::to_string(precision()) + "," +
+           std::to_string(scale());
+  }
+
+  friend std::ostream& operator<<(std::ostream& os, const DecimalScalar128& dec) {
+    os << dec.ToString();
+    return os;
+  }
+};
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/decimal_type_util.cc b/cpp/src/gandiva/decimal_type_util.cc
new file mode 100644
index 0000000000000..74c9326176373
--- /dev/null
+++ b/cpp/src/gandiva/decimal_type_util.cc
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/decimal_type_util.h"
+#include "gandiva/logging.h"
+
+namespace gandiva {
+
+constexpr int32_t DecimalTypeUtil::kMinAdjustedScale;
+
+#define DCHECK_TYPE(type)                        \
+  {                                              \
+    DCHECK_GE(type->scale(), 0);                 \
+    DCHECK_LE(type->precision(), kMaxPrecision); \
+  }
+
+// Implementation of decimal rules.
+Status DecimalTypeUtil::GetResultType(Op op, const Decimal128TypeVector& in_types,
+                                      Decimal128TypePtr* out_type) {
+  DCHECK_EQ(in_types.size(), 2);
+
+  *out_type = nullptr;
+  auto t1 = in_types[0];
+  auto t2 = in_types[1];
+  DCHECK_TYPE(t1);
+  DCHECK_TYPE(t2);
+
+  int32_t s1 = t1->scale();
+  int32_t s2 = t2->scale();
+  int32_t p1 = t1->precision();
+  int32_t p2 = t2->precision();
+  int32_t result_scale = 0;
+  int32_t result_precision = 0;
+
+  switch (op) {
+    case kOpAdd:
+    case kOpSubtract:
+      result_scale = std::max(s1, s2);
+      result_precision = std::max(p1 - s1, p2 - s2) + result_scale + 1;
+      break;
+
+    case kOpMultiply:
+      result_scale = s1 + s2;
+      result_precision = p1 + p2 + 1;
+      break;
+
+    case kOpDivide:
+      result_scale = std::max(kMinAdjustedScale, s1 + p2 + 1);
+      result_precision = p1 - s1 + s2 + result_scale;
+      break;
+
+    case kOpMod:
+      result_scale = std::max(s1, s2);
+      result_precision = std::min(p1 - s1, p2 - s2) + result_scale;
+      break;
+  }
+  *out_type = MakeAdjustedType(result_precision, result_scale);
+  return Status::OK();
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/decimal_type_util.h b/cpp/src/gandiva/decimal_type_util.h
new file mode 100644
index 0000000000000..aa3c255bb6948
--- /dev/null
+++ b/cpp/src/gandiva/decimal_type_util.h
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Adapted from Apache Impala
+
+#ifndef GANDIVA_DECIMAL_TYPE_SQL_H
+#define GANDIVA_DECIMAL_TYPE_SQL_H
+
+#include <algorithm>
+#include <memory>
+
+#include "gandiva/arrow.h"
+#include "gandiva/visibility.h"
+
+namespace gandiva {
+
+/// @brief Handles conversion of scale/precision for operations on decimal types.
+/// TODO : do validations for all of these.
+class GANDIVA_EXPORT DecimalTypeUtil {
+ public:
+  enum Op {
+    kOpAdd,
+    kOpSubtract,
+    kOpMultiply,
+    kOpDivide,
+    kOpMod,
+  };
+
+  /// The maximum precision representable by a 4-byte decimal
+  static constexpr int32_t kMaxDecimal32Precision = 9;
+
+  /// The maximum precision representable by a 8-byte decimal
+  static constexpr int32_t kMaxDecimal64Precision = 18;
+
+  /// The maximum precision representable by a 16-byte decimal
+  static constexpr int32_t kMaxPrecision = 38;
+
+  // The maximum scale representable.
+  static constexpr int32_t kMaxScale = kMaxPrecision;
+
+  // When operating on decimal inputs, the integer part of the output can exceed the
+  // max precision. In such cases, the scale can be reduced, upto a minimum of
+  // kMinAdjustedScale.
+  // * There is no strong reason for 6, but both SQLServer and Impala use 6 too.
+  static constexpr int32_t kMinAdjustedScale = 6;
+
+  // For specified operation and input scale/precision, determine the output
+  // scale/precision.
+  static Status GetResultType(Op op, const Decimal128TypeVector& in_types,
+                              Decimal128TypePtr* out_type);
+
+  static Decimal128TypePtr MakeType(int32_t precision, int32_t scale);
+
+ private:
+  // Reduce the scale if possible so that precision stays <= kMaxPrecision
+  static Decimal128TypePtr MakeAdjustedType(int32_t precision, int32_t scale) {
+    if (precision > kMaxPrecision) {
+      int32_t min_scale = std::min(scale, kMinAdjustedScale);
+      int32_t delta = precision - kMaxPrecision;
+      precision = kMaxPrecision;
+      scale = std::max(scale - delta, min_scale);
+    }
+    return MakeType(precision, scale);
+  }
+};
+
+inline Decimal128TypePtr DecimalTypeUtil::MakeType(int32_t precision, int32_t scale) {
+  return std::dynamic_pointer_cast<arrow::Decimal128Type>(
+      arrow::decimal(precision, scale));
+}
+
+}  // namespace gandiva
+
+#endif  // GANDIVA_DECIMAL_TYPE_SQL_H
diff --git a/cpp/src/gandiva/decimal_type_util_test.cc b/cpp/src/gandiva/decimal_type_util_test.cc
new file mode 100644
index 0000000000000..a593990638af5
--- /dev/null
+++ b/cpp/src/gandiva/decimal_type_util_test.cc
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Adapted from Apache Impala
+
+#include <gtest/gtest.h>
+
+#include "gandiva/decimal_type_util.h"
+#include "tests/test_util.h"
+
+namespace gandiva {
+
+#define DECIMAL_TYPE(p, s) DecimalTypeUtil::MakeType(p, s)
+
+Decimal128TypePtr DoOp(DecimalTypeUtil::Op op, Decimal128TypePtr d1,
+                       Decimal128TypePtr d2) {
+  Decimal128TypePtr ret_type;
+  EXPECT_OK(DecimalTypeUtil::GetResultType(op, {d1, d2}, &ret_type));
+  return ret_type;
+}
+
+TEST(DecimalResultTypes, Basic) {
+  EXPECT_ARROW_TYPE_EQUALS(
+      DECIMAL_TYPE(31, 10),
+      DoOp(DecimalTypeUtil::kOpAdd, DECIMAL_TYPE(30, 10), DECIMAL_TYPE(30, 10)));
+
+  EXPECT_ARROW_TYPE_EQUALS(
+      DECIMAL_TYPE(32, 6),
+      DoOp(DecimalTypeUtil::kOpAdd, DECIMAL_TYPE(30, 6), DECIMAL_TYPE(30, 5)));
+
+  EXPECT_ARROW_TYPE_EQUALS(
+      DECIMAL_TYPE(38, 9),
+      DoOp(DecimalTypeUtil::kOpAdd, DECIMAL_TYPE(30, 10), DECIMAL_TYPE(38, 10)));
+
+  EXPECT_ARROW_TYPE_EQUALS(
+      DECIMAL_TYPE(38, 9),
+      DoOp(DecimalTypeUtil::kOpAdd, DECIMAL_TYPE(38, 10), DECIMAL_TYPE(38, 38)));
+
+  EXPECT_ARROW_TYPE_EQUALS(
+      DECIMAL_TYPE(38, 6),
+      DoOp(DecimalTypeUtil::kOpAdd, DECIMAL_TYPE(38, 10), DECIMAL_TYPE(38, 2)));
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/dex.h b/cpp/src/gandiva/dex.h
index afce44ed12fee..894d9611058bd 100644
--- a/cpp/src/gandiva/dex.h
+++ b/cpp/src/gandiva/dex.h
@@ -32,11 +32,12 @@
 #include "gandiva/literal_holder.h"
 #include "gandiva/native_function.h"
 #include "gandiva/value_validity_pair.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
 /// \brief Decomposed expression : the validity and value are separated.
-class Dex {
+class GANDIVA_EXPORT Dex {
  public:
   /// Derived classes should simply invoke the Visit api of the visitor.
   virtual void Accept(DexVisitor& visitor) = 0;
@@ -44,7 +45,7 @@ class Dex {
 };
 
 /// Base class for other Vector related Dex.
-class VectorReadBaseDex : public Dex {
+class GANDIVA_EXPORT VectorReadBaseDex : public Dex {
  public:
   explicit VectorReadBaseDex(FieldDescriptorPtr field_desc) : field_desc_(field_desc) {}
 
@@ -59,7 +60,7 @@ class VectorReadBaseDex : public Dex {
 };
 
 /// validity component of a ValueVector
-class VectorReadValidityDex : public VectorReadBaseDex {
+class GANDIVA_EXPORT VectorReadValidityDex : public VectorReadBaseDex {
  public:
   explicit VectorReadValidityDex(FieldDescriptorPtr field_desc)
       : VectorReadBaseDex(field_desc) {}
@@ -70,7 +71,7 @@ class VectorReadValidityDex : public VectorReadBaseDex {
 };
 
 /// value component of a fixed-len ValueVector
-class VectorReadFixedLenValueDex : public VectorReadBaseDex {
+class GANDIVA_EXPORT VectorReadFixedLenValueDex : public VectorReadBaseDex {
  public:
   explicit VectorReadFixedLenValueDex(FieldDescriptorPtr field_desc)
       : VectorReadBaseDex(field_desc) {}
@@ -81,7 +82,7 @@ class VectorReadFixedLenValueDex : public VectorReadBaseDex {
 };
 
 /// value component of a variable-len ValueVector
-class VectorReadVarLenValueDex : public VectorReadBaseDex {
+class GANDIVA_EXPORT VectorReadVarLenValueDex : public VectorReadBaseDex {
  public:
   explicit VectorReadVarLenValueDex(FieldDescriptorPtr field_desc)
       : VectorReadBaseDex(field_desc) {}
@@ -94,7 +95,7 @@ class VectorReadVarLenValueDex : public VectorReadBaseDex {
 };
 
 /// validity based on a local bitmap.
-class LocalBitMapValidityDex : public Dex {
+class GANDIVA_EXPORT LocalBitMapValidityDex : public Dex {
  public:
   explicit LocalBitMapValidityDex(int local_bitmap_idx)
       : local_bitmap_idx_(local_bitmap_idx) {}
@@ -108,7 +109,7 @@ class LocalBitMapValidityDex : public Dex {
 };
 
 /// base function expression
-class FuncDex : public Dex {
+class GANDIVA_EXPORT FuncDex : public Dex {
  public:
   FuncDex(FuncDescriptorPtr func_descriptor, const NativeFunction* native_function,
           FunctionHolderPtr function_holder, const ValueValidityPairVector& args)
@@ -134,7 +135,7 @@ class FuncDex : public Dex {
 
 /// A function expression that only deals with non-null inputs, and generates non-null
 /// outputs.
-class NonNullableFuncDex : public FuncDex {
+class GANDIVA_EXPORT NonNullableFuncDex : public FuncDex {
  public:
   NonNullableFuncDex(FuncDescriptorPtr func_descriptor,
                      const NativeFunction* native_function,
@@ -147,7 +148,7 @@ class NonNullableFuncDex : public FuncDex {
 
 /// A function expression that deals with nullable inputs, but generates non-null
 /// outputs.
-class NullableNeverFuncDex : public FuncDex {
+class GANDIVA_EXPORT NullableNeverFuncDex : public FuncDex {
  public:
   NullableNeverFuncDex(FuncDescriptorPtr func_descriptor,
                        const NativeFunction* native_function,
@@ -160,7 +161,7 @@ class NullableNeverFuncDex : public FuncDex {
 
 /// A function expression that deals with nullable inputs, and
 /// nullable outputs.
-class NullableInternalFuncDex : public FuncDex {
+class GANDIVA_EXPORT NullableInternalFuncDex : public FuncDex {
  public:
   NullableInternalFuncDex(FuncDescriptorPtr func_descriptor,
                           const NativeFunction* native_function,
@@ -179,17 +180,17 @@ class NullableInternalFuncDex : public FuncDex {
 };
 
 /// special validity type that always returns true.
-class TrueDex : public Dex {
+class GANDIVA_EXPORT TrueDex : public Dex {
   void Accept(DexVisitor& visitor) override { visitor.Visit(*this); }
 };
 
 /// special validity type that always returns false.
-class FalseDex : public Dex {
+class GANDIVA_EXPORT FalseDex : public Dex {
   void Accept(DexVisitor& visitor) override { visitor.Visit(*this); }
 };
 
 /// decomposed expression for a literal.
-class LiteralDex : public Dex {
+class GANDIVA_EXPORT LiteralDex : public Dex {
  public:
   LiteralDex(DataTypePtr type, const LiteralHolder& holder)
       : type_(type), holder_(holder) {}
@@ -206,7 +207,7 @@ class LiteralDex : public Dex {
 };
 
 /// decomposed if-else expression.
-class IfDex : public Dex {
+class GANDIVA_EXPORT IfDex : public Dex {
  public:
   IfDex(ValueValidityPairPtr condition_vv, ValueValidityPairPtr then_vv,
         ValueValidityPairPtr else_vv, DataTypePtr result_type, int local_bitmap_idx,
@@ -242,7 +243,7 @@ class IfDex : public Dex {
 };
 
 // decomposed boolean expression.
-class BooleanDex : public Dex {
+class GANDIVA_EXPORT BooleanDex : public Dex {
  public:
   BooleanDex(const ValueValidityPairVector& args, int local_bitmap_idx)
       : args_(args), local_bitmap_idx_(local_bitmap_idx) {}
@@ -258,7 +259,7 @@ class BooleanDex : public Dex {
 };
 
 /// Boolean-AND expression
-class BooleanAndDex : public BooleanDex {
+class GANDIVA_EXPORT BooleanAndDex : public BooleanDex {
  public:
   BooleanAndDex(const ValueValidityPairVector& args, int local_bitmap_idx)
       : BooleanDex(args, local_bitmap_idx) {}
@@ -267,7 +268,7 @@ class BooleanAndDex : public BooleanDex {
 };
 
 /// Boolean-OR expression
-class BooleanOrDex : public BooleanDex {
+class GANDIVA_EXPORT BooleanOrDex : public BooleanDex {
  public:
   BooleanOrDex(const ValueValidityPairVector& args, int local_bitmap_idx)
       : BooleanDex(args, local_bitmap_idx) {}
diff --git a/cpp/src/gandiva/dex_visitor.h b/cpp/src/gandiva/dex_visitor.h
index 456fe430511dc..c34629a53e1a8 100644
--- a/cpp/src/gandiva/dex_visitor.h
+++ b/cpp/src/gandiva/dex_visitor.h
@@ -21,6 +21,7 @@
 #include <string>
 
 #include "gandiva/logging.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
@@ -41,7 +42,7 @@ template <typename Type>
 class InExprDexBase;
 
 /// \brief Visitor for decomposed expression.
-class DexVisitor {
+class GANDIVA_EXPORT DexVisitor {
  public:
   virtual ~DexVisitor() = default;
 
@@ -67,7 +68,7 @@ class DexVisitor {
 #define VISIT_DCHECK(DEX_CLASS) \
   void Visit(const DEX_CLASS& dex) override { DCHECK(0); }
 
-class DexDefaultVisitor : public DexVisitor {
+class GANDIVA_EXPORT DexDefaultVisitor : public DexVisitor {
   VISIT_DCHECK(VectorReadValidityDex)
   VISIT_DCHECK(VectorReadFixedLenValueDex)
   VISIT_DCHECK(VectorReadVarLenValueDex)
diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc
index 59884c5b4ad44..d073a3e749f8a 100644
--- a/cpp/src/gandiva/engine.cc
+++ b/cpp/src/gandiva/engine.cc
@@ -23,6 +23,15 @@
 #include <unordered_set>
 #include <utility>
 
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4141)
+#pragma warning(disable : 4146)
+#pragma warning(disable : 4244)
+#pragma warning(disable : 4267)
+#pragma warning(disable : 4624)
+#endif
+
 #include <llvm/Analysis/Passes.h>
 #include <llvm/Analysis/TargetTransformInfo.h>
 #include <llvm/Bitcode/BitcodeReader.h>
@@ -39,6 +48,12 @@
 #include <llvm/Transforms/Scalar.h>
 #include <llvm/Transforms/Scalar/GVN.h>
 #include <llvm/Transforms/Vectorize.h>
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+#include "gandiva/decimal_ir.h"
 #include "gandiva/exported_funcs_registry.h"
 
 namespace gandiva {
@@ -94,6 +109,10 @@ Status Engine::Make(std::shared_ptr<Configuration> config,
   auto status = engine_obj->LoadPreCompiledIRFiles(config->byte_code_file_path());
   ARROW_RETURN_NOT_OK(status);
 
+  // Add decimal functions
+  status = DecimalIR::AddFunctions(engine_obj.get());
+  ARROW_RETURN_NOT_OK(status);
+
   *engine = std::move(engine_obj);
   return Status::OK();
 }
@@ -103,12 +122,11 @@ Status Engine::LoadPreCompiledIRFiles(const std::string& byte_code_file_path) {
   /// Read from file into memory buffer.
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> buffer_or_error =
       llvm::MemoryBuffer::getFile(byte_code_file_path);
-  if (!buffer_or_error) {
-    std::stringstream ss;
-    ss << "Could not load module from IR " << byte_code_file_path << ": "
-       << buffer_or_error.getError().message();
-    return Status::CodeGenError(ss.str());
-  }
+  ARROW_RETURN_IF(
+      !buffer_or_error,
+      Status::CodeGenError("Could not load module from IR ", byte_code_file_path, ": ",
+                           buffer_or_error.getError().message()));
+
   std::unique_ptr<llvm::MemoryBuffer> buffer = move(buffer_or_error.get());
 
   /// Parse the IR module.
@@ -123,15 +141,11 @@ Status Engine::LoadPreCompiledIRFiles(const std::string& byte_code_file_path) {
   }
   std::unique_ptr<llvm::Module> ir_module = move(module_or_error.get());
 
-  /// Verify the IR module
-  if (llvm::verifyModule(*ir_module, &llvm::errs())) {
-    return Status::CodeGenError("verify of IR Module failed");
-  }
+  ARROW_RETURN_IF(llvm::verifyModule(*ir_module, &llvm::errs()),
+                  Status::CodeGenError("verify of IR Module failed"));
+  ARROW_RETURN_IF(llvm::Linker::linkModules(*module_, move(ir_module)),
+                  Status::CodeGenError("failed to link IR Modules"));
 
-  // Link this to the primary module.
-  if (llvm::Linker::linkModules(*module_, move(ir_module))) {
-    return Status::CodeGenError("failed to link IR Modules");
-  }
   return Status::OK();
 }
 
@@ -188,7 +202,7 @@ Status Engine::FinalizeModule(bool optimise_ir, bool dump_ir) {
 
     // run the optimiser
     llvm::PassManagerBuilder pass_builder;
-    pass_builder.OptLevel = 2;
+    pass_builder.OptLevel = 3;
     pass_builder.populateModulePassManager(*pass_manager);
     pass_manager->run(*module_);
 
@@ -197,13 +211,13 @@ Status Engine::FinalizeModule(bool optimise_ir, bool dump_ir) {
     }
   }
 
-  if (llvm::verifyModule(*module_, &llvm::errs())) {
-    return Status::CodeGenError("verify of module failed after optimisation passes");
-  }
+  ARROW_RETURN_IF(llvm::verifyModule(*module_, &llvm::errs()),
+                  Status::CodeGenError("Module verification failed after optimizer"));
 
   // do the compilation
   execution_engine_->finalizeObject();
   module_finalized_ = true;
+
   return Status::OK();
 }
 
@@ -227,7 +241,7 @@ void Engine::DumpIR(std::string prefix) {
   std::string str;
 
   llvm::raw_string_ostream stream(str);
-  module_->print(stream, NULL);
+  module_->print(stream, nullptr);
   std::cout << "====" << prefix << "===" << str << "\n";
 }
 
diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h
index f377ebc38d3ef..7a976d5f9265c 100644
--- a/cpp/src/gandiva/engine.h
+++ b/cpp/src/gandiva/engine.h
@@ -23,31 +23,31 @@
 #include <string>
 #include <vector>
 
-#include <llvm/ExecutionEngine/ExecutionEngine.h>
-#include <llvm/IR/IRBuilder.h>
-#include <llvm/IR/LLVMContext.h>
-#include <llvm/IR/Module.h>
-
 #include "arrow/status.h"
 #include "arrow/util/macros.h"
 
 #include "gandiva/configuration.h"
+#include "gandiva/llvm_includes.h"
 #include "gandiva/llvm_types.h"
 #include "gandiva/logging.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
+class FunctionIRBuilder;
+
 /// \brief LLVM Execution engine wrapper.
-class Engine {
+class GANDIVA_EXPORT Engine {
  public:
   llvm::LLVMContext* context() { return context_.get(); }
   llvm::IRBuilder<>* ir_builder() { return ir_builder_.get(); }
   LLVMTypes* types() { return types_.get(); }
   llvm::Module* module() { return module_; }
 
-  /// factory method to create and initialize the engine object.
+  /// Factory method to create and initialize the engine object.
   ///
-  /// \param[out] engine the created engine.
+  /// \param[in] config the engine configuration
+  /// \param[out] engine the created engine
   static Status Make(std::shared_ptr<Configuration> config,
                      std::unique_ptr<Engine>* engine);
 
diff --git a/cpp/src/gandiva/engine_llvm_test.cc b/cpp/src/gandiva/engine_llvm_test.cc
index fe4f82e19320c..627c385f97363 100644
--- a/cpp/src/gandiva/engine_llvm_test.cc
+++ b/cpp/src/gandiva/engine_llvm_test.cc
@@ -19,6 +19,7 @@
 
 #include <gtest/gtest.h>
 #include "gandiva/llvm_types.h"
+#include "gandiva/tests/test_util.h"
 
 namespace gandiva {
 
@@ -100,7 +101,7 @@ llvm::Function* TestEngine::BuildVecAdd(Engine* engine, LLVMTypes* types) {
 
 TEST_F(TestEngine, TestAddUnoptimised) {
   std::unique_ptr<Engine> engine;
-  Status status = Engine::Make(ConfigurationBuilder::DefaultConfiguration(), &engine);
+  auto status = Engine::Make(TestConfiguration(), &engine);
   EXPECT_TRUE(status.ok()) << status.message();
   LLVMTypes types(*engine->context());
   llvm::Function* ir_func = BuildVecAdd(engine.get(), &types);
@@ -115,7 +116,7 @@ TEST_F(TestEngine, TestAddUnoptimised) {
 
 TEST_F(TestEngine, TestAddOptimised) {
   std::unique_ptr<Engine> engine;
-  Status status = Engine::Make(ConfigurationBuilder::DefaultConfiguration(), &engine);
+  auto status = Engine::Make(TestConfiguration(), &engine);
   EXPECT_TRUE(status.ok()) << status.message();
   LLVMTypes types(*engine->context());
   llvm::Function* ir_func = BuildVecAdd(engine.get(), &types);
diff --git a/cpp/src/gandiva/eval_batch.h b/cpp/src/gandiva/eval_batch.h
index 608f4200ce415..093968f232afb 100644
--- a/cpp/src/gandiva/eval_batch.h
+++ b/cpp/src/gandiva/eval_batch.h
@@ -85,7 +85,7 @@ class EvalBatch {
   /// An array of 'num_buffers_', each containing a buffer. The buffer
   /// sizes depends on the data type, but all of them have the same
   /// number of slots (equal to num_records_).
-  std::unique_ptr<uint8_t*> buffers_array_;
+  std::unique_ptr<uint8_t* []> buffers_array_;
 
   std::unique_ptr<LocalBitMapsHolder> local_bitmaps_holder_;
 
diff --git a/cpp/src/gandiva/exported_funcs.h b/cpp/src/gandiva/exported_funcs.h
index 0ca28c2b5b188..4e028be6ec1d3 100644
--- a/cpp/src/gandiva/exported_funcs.h
+++ b/cpp/src/gandiva/exported_funcs.h
@@ -45,6 +45,12 @@ class ExportedContextFunctions : public ExportedFuncsBase {
 };
 REGISTER_EXPORTED_FUNCS(ExportedContextFunctions);
 
+// Class for exporting Context functions
+class ExportedTimeFunctions : public ExportedFuncsBase {
+  void AddMappings(Engine* engine) const override;
+};
+REGISTER_EXPORTED_FUNCS(ExportedTimeFunctions);
+
 }  // namespace gandiva
 
 #endif  // GANDIVA_EXPORTED_FUNCS_H
diff --git a/cpp/src/gandiva/exported_funcs_registry.h b/cpp/src/gandiva/exported_funcs_registry.h
index 511ec9c212468..35ad5c0fae516 100644
--- a/cpp/src/gandiva/exported_funcs_registry.h
+++ b/cpp/src/gandiva/exported_funcs_registry.h
@@ -18,6 +18,7 @@
 #ifndef GANDIVA_EXPORTED_FUNCS_REGISTRY_H
 #define GANDIVA_EXPORTED_FUNCS_REGISTRY_H
 
+#include <memory>
 #include <vector>
 
 #include <gandiva/engine.h>
@@ -30,12 +31,12 @@ class ExportedFuncsBase;
 /// LLVM/IR code.
 class ExportedFuncsRegistry {
  public:
-  using list_type = std::vector<ExportedFuncsBase*>;
+  using list_type = std::vector<std::shared_ptr<ExportedFuncsBase>>;
 
   // Add functions from all the registered classes to the engine.
   static void AddMappings(Engine* engine);
 
-  static bool Register(ExportedFuncsBase* entry) {
+  static bool Register(std::shared_ptr<ExportedFuncsBase> entry) {
     registered().push_back(entry);
     return true;
   }
@@ -48,7 +49,8 @@ class ExportedFuncsRegistry {
 };
 
 #define REGISTER_EXPORTED_FUNCS(classname) \
-  static bool _registered_##classname = ExportedFuncsRegistry::Register(new classname)
+  static bool _registered_##classname =    \
+      ExportedFuncsRegistry::Register(std::make_shared<classname>())
 
 }  // namespace gandiva
 
diff --git a/cpp/src/gandiva/expr_decomposer.cc b/cpp/src/gandiva/expr_decomposer.cc
index bed84ededb5e7..91014f1b82783 100644
--- a/cpp/src/gandiva/expr_decomposer.cc
+++ b/cpp/src/gandiva/expr_decomposer.cc
@@ -232,7 +232,7 @@ int ExprDecomposer::PushThenEntry(const IfNode& node) {
   // push new entry to the stack.
   std::unique_ptr<IfStackEntry> entry(new IfStackEntry(
       node, kStackEntryThen, false /*is_terminal_else*/, local_bitmap_idx));
-  if_entries_stack_.push(std::move(entry));
+  if_entries_stack_.emplace(std::move(entry));
   return local_bitmap_idx;
 }
 
@@ -250,7 +250,7 @@ void ExprDecomposer::PopThenEntry(const IfNode& node) {
 void ExprDecomposer::PushElseEntry(const IfNode& node, int local_bitmap_idx) {
   std::unique_ptr<IfStackEntry> entry(new IfStackEntry(
       node, kStackEntryElse, true /*is_terminal_else*/, local_bitmap_idx));
-  if_entries_stack_.push(std::move(entry));
+  if_entries_stack_.emplace(std::move(entry));
 }
 
 bool ExprDecomposer::PopElseEntry(const IfNode& node) {
@@ -268,7 +268,7 @@ bool ExprDecomposer::PopElseEntry(const IfNode& node) {
 
 void ExprDecomposer::PushConditionEntry(const IfNode& node) {
   std::unique_ptr<IfStackEntry> entry(new IfStackEntry(node, kStackEntryCondition));
-  if_entries_stack_.push(std::move(entry));
+  if_entries_stack_.emplace(std::move(entry));
 }
 
 void ExprDecomposer::PopConditionEntry(const IfNode& node) {
diff --git a/cpp/src/gandiva/expr_decomposer.h b/cpp/src/gandiva/expr_decomposer.h
index bc21ed07cf57c..ab92ca3d24940 100644
--- a/cpp/src/gandiva/expr_decomposer.h
+++ b/cpp/src/gandiva/expr_decomposer.h
@@ -27,6 +27,7 @@
 #include "gandiva/expression.h"
 #include "gandiva/node.h"
 #include "gandiva/node_visitor.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
@@ -35,7 +36,7 @@ class Annotator;
 
 /// \brief Decomposes an expression tree to seperate out the validity and
 /// value expressions.
-class ExprDecomposer : public NodeVisitor {
+class GANDIVA_EXPORT ExprDecomposer : public NodeVisitor {
  public:
   explicit ExprDecomposer(const FunctionRegistry& registry, Annotator& annotator)
       : registry_(registry), annotator_(annotator) {}
@@ -49,6 +50,8 @@ class ExprDecomposer : public NodeVisitor {
   }
 
  private:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(ExprDecomposer);
+
   FRIEND_TEST(TestExprDecomposer, TestStackSimple);
   FRIEND_TEST(TestExprDecomposer, TestNested);
   FRIEND_TEST(TestExprDecomposer, TestInternalIf);
@@ -83,6 +86,9 @@ class ExprDecomposer : public NodeVisitor {
     StackEntryType entry_type_;
     bool is_terminal_else_;
     int local_bitmap_idx_;
+
+   private:
+    ARROW_DISALLOW_COPY_AND_ASSIGN(IfStackEntry);
   };
 
   // pop 'condition entry' into stack.
diff --git a/cpp/src/gandiva/expr_validator.cc b/cpp/src/gandiva/expr_validator.cc
index 3f5d63745f942..43de9d7a053f8 100644
--- a/cpp/src/gandiva/expr_validator.cc
+++ b/cpp/src/gandiva/expr_validator.cc
@@ -24,133 +24,114 @@
 namespace gandiva {
 
 Status ExprValidator::Validate(const ExpressionPtr& expr) {
-  if (expr == nullptr) {
-    return Status::ExpressionValidationError("Expression cannot be null.");
-  }
+  ARROW_RETURN_IF(expr == nullptr,
+                  Status::ExpressionValidationError("Expression cannot be null"));
+
   Node& root = *expr->root();
-  Status status = root.Accept(*this);
-  if (!status.ok()) {
-    return status;
-  }
-  // validate return type matches
-  // no need to check if type is supported
-  // since root type has been validated.
-  if (!root.return_type()->Equals(*expr->result()->type())) {
-    std::stringstream ss;
-    ss << "Return type of root node " << root.return_type()->name()
-       << " does not match that of expression " << *expr->result()->type();
-    return Status::ExpressionValidationError(ss.str());
-  }
+  ARROW_RETURN_NOT_OK(root.Accept(*this));
+
+  // Ensure root's return type match the expression return type. Type
+  // support validation is not required because root type is already supported.
+  ARROW_RETURN_IF(!root.return_type()->Equals(*expr->result()->type()),
+                  Status::ExpressionValidationError("Return type of root node ",
+                                                    root.return_type()->name(),
+                                                    " does not match that of expression ",
+                                                    expr->result()->type()->name()));
+
   return Status::OK();
 }
 
 Status ExprValidator::Visit(const FieldNode& node) {
   auto llvm_type = types_->IRType(node.return_type()->id());
-  if (llvm_type == nullptr) {
-    std::stringstream ss;
-    ss << "Field " << node.field()->name() << " has unsupported data type "
-       << node.return_type()->name();
-    return Status::ExpressionValidationError(ss.str());
-  }
+  ARROW_RETURN_IF(llvm_type == nullptr,
+                  Status::ExpressionValidationError("Field ", node.field()->name(),
+                                                    " has unsupported data type ",
+                                                    node.return_type()->name()));
 
+  // Ensure that field is found in schema
   auto field_in_schema_entry = field_map_.find(node.field()->name());
+  ARROW_RETURN_IF(field_in_schema_entry == field_map_.end(),
+                  Status::ExpressionValidationError("Field ", node.field()->name(),
+                                                    " not in schema."));
 
-  // validate that field is in schema.
-  if (field_in_schema_entry == field_map_.end()) {
-    std::stringstream ss;
-    ss << "Field " << node.field()->name() << " not in schema.";
-    return Status::ExpressionValidationError(ss.str());
-  }
-
+  // Ensure that that the found field match.
   FieldPtr field_in_schema = field_in_schema_entry->second;
-  // validate that field matches the definition in schema.
-  if (!field_in_schema->Equals(node.field())) {
-    std::stringstream ss;
-    ss << "Field definition in schema " << field_in_schema->ToString()
-       << " different from field in expression " << node.field()->ToString();
-    return Status::ExpressionValidationError(ss.str());
-  }
+  ARROW_RETURN_IF(!field_in_schema->Equals(node.field()),
+                  Status::ExpressionValidationError(
+                      "Field definition in schema ", field_in_schema->ToString(),
+                      " different from field in expression ", node.field()->ToString()));
+
   return Status::OK();
 }
 
 Status ExprValidator::Visit(const FunctionNode& node) {
   auto desc = node.descriptor();
   FunctionSignature signature(desc->name(), desc->params(), desc->return_type());
+
   const NativeFunction* native_function = registry_.LookupSignature(signature);
-  if (native_function == nullptr) {
-    std::stringstream ss;
-    ss << "Function " << signature.ToString() << " not supported yet. ";
-    return Status::ExpressionValidationError(ss.str());
-  }
+  ARROW_RETURN_IF(native_function == nullptr,
+                  Status::ExpressionValidationError("Function ", signature.ToString(),
+                                                    " not supported yet. "));
 
   for (auto& child : node.children()) {
-    Status status = child->Accept(*this);
-    ARROW_RETURN_NOT_OK(status);
+    ARROW_RETURN_NOT_OK(child->Accept(*this));
   }
+
   return Status::OK();
 }
 
 Status ExprValidator::Visit(const IfNode& node) {
-  Status status = node.condition()->Accept(*this);
-  ARROW_RETURN_NOT_OK(status);
-  status = node.then_node()->Accept(*this);
-  ARROW_RETURN_NOT_OK(status);
-  status = node.else_node()->Accept(*this);
-  ARROW_RETURN_NOT_OK(status);
+  ARROW_RETURN_NOT_OK(node.condition()->Accept(*this));
+  ARROW_RETURN_NOT_OK(node.then_node()->Accept(*this));
+  ARROW_RETURN_NOT_OK(node.else_node()->Accept(*this));
 
   auto if_node_ret_type = node.return_type();
   auto then_node_ret_type = node.then_node()->return_type();
   auto else_node_ret_type = node.else_node()->return_type();
 
-  if (!if_node_ret_type->Equals(*then_node_ret_type)) {
-    std::stringstream ss;
-    ss << "Return type of if " << *if_node_ret_type << " and then " << *then_node_ret_type
-       << " not matching.";
-    return Status::ExpressionValidationError(ss.str());
-  }
+  // Then-branch return type must match.
+  ARROW_RETURN_IF(!if_node_ret_type->Equals(*then_node_ret_type),
+                  Status::ExpressionValidationError(
+                      "Return type of if ", if_node_ret_type->ToString(), " and then ",
+                      then_node_ret_type->ToString(), " not matching."));
 
-  if (!if_node_ret_type->Equals(*else_node_ret_type)) {
-    std::stringstream ss;
-    ss << "Return type of if " << *if_node_ret_type << " and else " << *else_node_ret_type
-       << " not matching.";
-    return Status::ExpressionValidationError(ss.str());
-  }
+  // Else-branch return type must match.
+  ARROW_RETURN_IF(!if_node_ret_type->Equals(*else_node_ret_type),
+                  Status::ExpressionValidationError(
+                      "Return type of if ", if_node_ret_type->ToString(), " and else ",
+                      else_node_ret_type->ToString(), " not matching."));
 
   return Status::OK();
 }
 
 Status ExprValidator::Visit(const LiteralNode& node) {
   auto llvm_type = types_->IRType(node.return_type()->id());
-  if (llvm_type == nullptr) {
-    std::stringstream ss;
-    ss << "Value " << node.holder() << " has unsupported data type "
-       << node.return_type()->name();
-    return Status::ExpressionValidationError(ss.str());
-  }
+  ARROW_RETURN_IF(llvm_type == nullptr,
+                  Status::ExpressionValidationError("Value ", node.holder(),
+                                                    " has unsupported data type ",
+                                                    node.return_type()->name()));
+
   return Status::OK();
 }
 
 Status ExprValidator::Visit(const BooleanNode& node) {
-  Status status;
-
-  if (node.children().size() < 2) {
-    std::stringstream ss;
-    ss << "Boolean expression has " << node.children().size()
-       << " children, expected atleast two";
-    return Status::ExpressionValidationError(ss.str());
-  }
+  ARROW_RETURN_IF(
+      node.children().size() < 2,
+      Status::ExpressionValidationError("Boolean expression has ", node.children().size(),
+                                        " children, expected atleast two"));
 
   for (auto& child : node.children()) {
-    if (!child->return_type()->Equals(arrow::boolean())) {
-      std::stringstream ss;
-      ss << "Boolean expression has a child with return type "
-         << child->return_type()->name() << ", expected return type boolean";
-      return Status::ExpressionValidationError(ss.str());
-    }
-
-    status = child->Accept(*this);
-    ARROW_RETURN_NOT_OK(status);
+    const auto bool_type = arrow::boolean();
+    const auto ret_type = child->return_type();
+
+    ARROW_RETURN_IF(!ret_type->Equals(bool_type),
+                    Status::ExpressionValidationError(
+                        "Boolean expression has a child with return type ",
+                        ret_type->ToString(), ", expected return type boolean"));
+
+    ARROW_RETURN_NOT_OK(child->Accept(*this));
   }
+
   return Status::OK();
 }
 
@@ -178,18 +159,13 @@ Status ExprValidator::Visit(const InExpressionNode<std::string>& node) {
 Status ExprValidator::ValidateInExpression(size_t number_of_values,
                                            DataTypePtr in_expr_return_type,
                                            DataTypePtr type_of_values) {
-  if (static_cast<int32_t>(number_of_values) == 0) {
-    std::stringstream ss;
-    ss << "IN Expression needs a non-empty constant list to match.";
-    return Status::ExpressionValidationError(ss.str());
-  }
-
-  if (!in_expr_return_type->Equals(type_of_values)) {
-    std::stringstream ss;
-    ss << "Evaluation expression for IN clause returns " << in_expr_return_type
-       << " values are of type" << type_of_values;
-    return Status::ExpressionValidationError(ss.str());
-  }
+  ARROW_RETURN_IF(number_of_values == 0,
+                  Status::ExpressionValidationError(
+                      "IN Expression needs a non-empty constant list to match."));
+  ARROW_RETURN_IF(!in_expr_return_type->Equals(type_of_values),
+                  Status::ExpressionValidationError(
+                      "Evaluation expression for IN clause returns ", in_expr_return_type,
+                      " values are of type", type_of_values));
 
   return Status::OK();
 }
diff --git a/cpp/src/gandiva/expression.h b/cpp/src/gandiva/expression.h
index e3ae18f4d4c28..2141e871393e5 100644
--- a/cpp/src/gandiva/expression.h
+++ b/cpp/src/gandiva/expression.h
@@ -22,11 +22,12 @@
 
 #include "gandiva/arrow.h"
 #include "gandiva/gandiva_aliases.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
 /// \brief An expression tree with a root node, and a result field.
-class Expression {
+class GANDIVA_EXPORT Expression {
  public:
   Expression(const NodePtr root, const FieldPtr result) : root_(root), result_(result) {}
 
diff --git a/cpp/src/gandiva/expression_registry.cc b/cpp/src/gandiva/expression_registry.cc
index fb5a45e779926..1a087c96f33bd 100644
--- a/cpp/src/gandiva/expression_registry.cc
+++ b/cpp/src/gandiva/expression_registry.cc
@@ -136,10 +136,12 @@ void ExpressionRegistry::AddArrowTypesToVector(arrow::Type::type& type,
     case arrow::Type::type::NA:
       vector.push_back(arrow::null());
       break;
+    case arrow::Type::type::DECIMAL:
+      vector.push_back(arrow::decimal(0, 0));
+      break;
     case arrow::Type::type::FIXED_SIZE_BINARY:
     case arrow::Type::type::MAP:
     case arrow::Type::type::INTERVAL:
-    case arrow::Type::type::DECIMAL:
     case arrow::Type::type::LIST:
     case arrow::Type::type::STRUCT:
     case arrow::Type::type::UNION:
diff --git a/cpp/src/gandiva/expression_registry.h b/cpp/src/gandiva/expression_registry.h
index a03deab91cdc1..4524a077a629c 100644
--- a/cpp/src/gandiva/expression_registry.h
+++ b/cpp/src/gandiva/expression_registry.h
@@ -24,6 +24,7 @@
 #include "gandiva/arrow.h"
 #include "gandiva/function_signature.h"
 #include "gandiva/gandiva_aliases.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
@@ -33,13 +34,13 @@ class FunctionRegistry;
 ///
 /// Has helper methods for clients to programatically discover
 /// data types and functions supported by Gandiva.
-class ExpressionRegistry {
+class GANDIVA_EXPORT ExpressionRegistry {
  public:
   using iterator = const NativeFunction*;
   ExpressionRegistry();
   ~ExpressionRegistry();
   static DataTypeVector supported_types() { return supported_types_; }
-  class FunctionSignatureIterator {
+  class GANDIVA_EXPORT FunctionSignatureIterator {
    public:
     explicit FunctionSignatureIterator(iterator it) : it_(it) {}
 
diff --git a/cpp/src/gandiva/filter.cc b/cpp/src/gandiva/filter.cc
index 7a24d9554ef3f..3bba1909af866 100644
--- a/cpp/src/gandiva/filter.cc
+++ b/cpp/src/gandiva/filter.cc
@@ -37,35 +37,33 @@ Filter::Filter(std::unique_ptr<LLVMGenerator> llvm_generator, SchemaPtr schema,
       schema_(schema),
       configuration_(configuration) {}
 
+Filter::~Filter() {}
+
 Status Filter::Make(SchemaPtr schema, ConditionPtr condition,
                     std::shared_ptr<Configuration> configuration,
                     std::shared_ptr<Filter>* filter) {
-  ARROW_RETURN_FAILURE_IF_FALSE(schema != nullptr,
-                                Status::Invalid("schema cannot be null"));
-  ARROW_RETURN_FAILURE_IF_FALSE(condition != nullptr,
-                                Status::Invalid("condition cannot be null"));
-  ARROW_RETURN_FAILURE_IF_FALSE(configuration != nullptr,
-                                Status::Invalid("configuration cannot be null"));
+  ARROW_RETURN_IF(schema == nullptr, Status::Invalid("Schema cannot be null"));
+  ARROW_RETURN_IF(condition == nullptr, Status::Invalid("Condition cannot be null"));
+  ARROW_RETURN_IF(configuration == nullptr,
+                  Status::Invalid("Configuration cannot be null"));
+
   static Cache<FilterCacheKey, std::shared_ptr<Filter>> cache;
   FilterCacheKey cache_key(schema, configuration, *(condition.get()));
-  std::shared_ptr<Filter> cachedFilter = cache.GetModule(cache_key);
+  auto cachedFilter = cache.GetModule(cache_key);
   if (cachedFilter != nullptr) {
     *filter = cachedFilter;
     return Status::OK();
   }
+
   // Build LLVM generator, and generate code for the specified expression
   std::unique_ptr<LLVMGenerator> llvm_gen;
-  Status status = LLVMGenerator::Make(configuration, &llvm_gen);
-  ARROW_RETURN_NOT_OK(status);
+  ARROW_RETURN_NOT_OK(LLVMGenerator::Make(configuration, &llvm_gen));
 
   // Run the validation on the expression.
   // Return if the expression is invalid since we will not be able to process further.
   ExprValidator expr_validator(llvm_gen->types(), schema);
-  status = expr_validator.Validate(condition);
-  ARROW_RETURN_NOT_OK(status);
-
-  status = llvm_gen->Build({condition});
-  ARROW_RETURN_NOT_OK(status);
+  ARROW_RETURN_NOT_OK(expr_validator.Validate(condition));
+  ARROW_RETURN_NOT_OK(llvm_gen->Build({condition}));
 
   // Instantiate the filter with the completely built llvm generator
   *filter = std::make_shared<Filter>(std::move(llvm_gen), schema, configuration);
@@ -76,42 +74,33 @@ Status Filter::Make(SchemaPtr schema, ConditionPtr condition,
 
 Status Filter::Evaluate(const arrow::RecordBatch& batch,
                         std::shared_ptr<SelectionVector> out_selection) {
-  if (!batch.schema()->Equals(*schema_)) {
-    return Status::Invalid("Schema in RecordBatch must match the schema in Make()");
-  }
-  if (batch.num_rows() == 0) {
-    return Status::Invalid("RecordBatch must be non-empty.");
-  }
-  if (out_selection == nullptr) {
-    return Status::Invalid("out_selection must be non-null.");
-  }
-  if (out_selection->GetMaxSlots() < batch.num_rows()) {
-    std::stringstream ss;
-    ss << "out_selection has " << out_selection->GetMaxSlots()
-       << " slots, which is less than the batch size " << batch.num_rows();
-    return Status::Invalid(ss.str());
-  }
+  const auto num_rows = batch.num_rows();
+  ARROW_RETURN_IF(!batch.schema()->Equals(*schema_),
+                  Status::Invalid("RecordBatch schema must expected filter schema"));
+  ARROW_RETURN_IF(num_rows == 0, Status::Invalid("RecordBatch must be non-empty."));
+  ARROW_RETURN_IF(out_selection == nullptr,
+                  Status::Invalid("out_selection must be non-null."));
+  ARROW_RETURN_IF(out_selection->GetMaxSlots() < num_rows,
+                  Status::Invalid("Output selection vector capacity too small"));
 
   // Allocate three local_bitmaps (one for output, one for validity, one to compute the
   // intersection).
-  LocalBitMapsHolder bitmaps(batch.num_rows(), 3 /*local_bitmaps*/);
+  LocalBitMapsHolder bitmaps(num_rows, 3 /*local_bitmaps*/);
   int64_t bitmap_size = bitmaps.GetLocalBitMapSize();
 
   auto validity = std::make_shared<arrow::Buffer>(bitmaps.GetLocalBitMap(0), bitmap_size);
   auto value = std::make_shared<arrow::Buffer>(bitmaps.GetLocalBitMap(1), bitmap_size);
-  auto array_data =
-      arrow::ArrayData::Make(arrow::boolean(), batch.num_rows(), {validity, value});
+  auto array_data = arrow::ArrayData::Make(arrow::boolean(), num_rows, {validity, value});
 
   // Execute the expression(s).
-  auto status = llvm_generator_->Execute(batch, {array_data});
-  ARROW_RETURN_NOT_OK(status);
+  ARROW_RETURN_NOT_OK(llvm_generator_->Execute(batch, {array_data}));
 
   // Compute the intersection of the value and validity.
   auto result = bitmaps.GetLocalBitMap(2);
   BitMapAccumulator::IntersectBitMaps(
-      result, {bitmaps.GetLocalBitMap(0), bitmaps.GetLocalBitMap((1))}, batch.num_rows());
+      result, {bitmaps.GetLocalBitMap(0), bitmaps.GetLocalBitMap((1))}, num_rows);
 
-  return out_selection->PopulateFromBitMap(result, bitmap_size, batch.num_rows() - 1);
+  return out_selection->PopulateFromBitMap(result, bitmap_size, num_rows - 1);
 }
 
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/filter.h b/cpp/src/gandiva/filter.h
index 6ff7010ac07f6..4fbda806e0af9 100644
--- a/cpp/src/gandiva/filter.h
+++ b/cpp/src/gandiva/filter.h
@@ -15,8 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef GANDIVA_EXPR_FILTER_H
-#define GANDIVA_EXPR_FILTER_H
+#pragma once
 
 #include <memory>
 #include <string>
@@ -29,6 +28,7 @@
 #include "gandiva/condition.h"
 #include "gandiva/configuration.h"
 #include "gandiva/selection_vector.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
@@ -38,12 +38,14 @@ class LLVMGenerator;
 ///
 /// A filter is built for a specific schema and condition. Once the filter is built, it
 /// can be used to evaluate many row batches.
-class Filter {
+class GANDIVA_EXPORT Filter {
  public:
   Filter(std::unique_ptr<LLVMGenerator> llvm_generator, SchemaPtr schema,
          std::shared_ptr<Configuration> config);
 
-  ~Filter() = default;
+  // Inline dtor will attempt to resolve the destructor for
+  // LLVMGenerator on MSVC, so we compile the dtor in the object code
+  ~Filter();
 
   /// Build a filter for the given schema and condition, with the default configuration.
   ///
@@ -81,5 +83,3 @@ class Filter {
 };
 
 }  // namespace gandiva
-
-#endif  // GANDIVA_EXPR_FILTER_H
diff --git a/cpp/src/gandiva/func_descriptor.h b/cpp/src/gandiva/func_descriptor.h
index 9b18a9b694319..08f719995fe6b 100644
--- a/cpp/src/gandiva/func_descriptor.h
+++ b/cpp/src/gandiva/func_descriptor.h
@@ -22,11 +22,12 @@
 #include <vector>
 
 #include "gandiva/arrow.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
 /// Descriptor for a function in the expression.
-class FuncDescriptor {
+class GANDIVA_EXPORT FuncDescriptor {
  public:
   FuncDescriptor(const std::string& name, const DataTypeVector& params,
                  DataTypePtr return_type)
diff --git a/cpp/src/gandiva/function_holder.h b/cpp/src/gandiva/function_holder.h
index 4d007d1db3fa7..43dbeac07c97f 100644
--- a/cpp/src/gandiva/function_holder.h
+++ b/cpp/src/gandiva/function_holder.h
@@ -20,10 +20,12 @@
 
 #include <memory>
 
+#include "gandiva/visibility.h"
+
 namespace gandiva {
 
 /// Holder for a function that can be invoked from LLVM.
-class FunctionHolder {
+class GANDIVA_EXPORT FunctionHolder {
  public:
   virtual ~FunctionHolder() = default;
 };
diff --git a/cpp/src/gandiva/function_ir_builder.cc b/cpp/src/gandiva/function_ir_builder.cc
new file mode 100644
index 0000000000000..194273933cd15
--- /dev/null
+++ b/cpp/src/gandiva/function_ir_builder.cc
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/function_ir_builder.h"
+
+namespace gandiva {
+
+llvm::Value* FunctionIRBuilder::BuildIfElse(llvm::Value* condition,
+                                            llvm::Type* return_type,
+                                            std::function<llvm::Value*()> then_func,
+                                            std::function<llvm::Value*()> else_func) {
+  llvm::IRBuilder<>* builder = ir_builder();
+  llvm::Function* function = builder->GetInsertBlock()->getParent();
+  DCHECK_NE(function, nullptr);
+
+  // Create blocks for the then, else and merge cases.
+  llvm::BasicBlock* then_bb = llvm::BasicBlock::Create(*context(), "then", function);
+  llvm::BasicBlock* else_bb = llvm::BasicBlock::Create(*context(), "else", function);
+  llvm::BasicBlock* merge_bb = llvm::BasicBlock::Create(*context(), "merge", function);
+
+  builder->CreateCondBr(condition, then_bb, else_bb);
+
+  // Emit the then block.
+  builder->SetInsertPoint(then_bb);
+  auto then_value = then_func();
+  builder->CreateBr(merge_bb);
+
+  // refresh then_bb for phi (could have changed due to code generation of then_value).
+  then_bb = builder->GetInsertBlock();
+
+  // Emit the else block.
+  builder->SetInsertPoint(else_bb);
+  auto else_value = else_func();
+  builder->CreateBr(merge_bb);
+
+  // refresh else_bb for phi (could have changed due to code generation of else_value).
+  else_bb = builder->GetInsertBlock();
+
+  // Emit the merge block.
+  builder->SetInsertPoint(merge_bb);
+  llvm::PHINode* result_value = builder->CreatePHI(return_type, 2, "res_value");
+  result_value->addIncoming(then_value, then_bb);
+  result_value->addIncoming(else_value, else_bb);
+  return result_value;
+}
+
+llvm::Function* FunctionIRBuilder::BuildFunction(const std::string& function_name,
+                                                 llvm::Type* return_type,
+                                                 std::vector<NamedArg> in_args) {
+  std::vector<llvm::Type*> arg_types;
+  for (auto& arg : in_args) {
+    arg_types.push_back(arg.type);
+  }
+  auto prototype = llvm::FunctionType::get(return_type, arg_types, false /*isVarArg*/);
+  auto function = llvm::Function::Create(prototype, llvm::GlobalValue::ExternalLinkage,
+                                         function_name, module());
+
+  uint32_t i = 0;
+  for (auto& fn_arg : function->args()) {
+    DCHECK_LT(i, in_args.size());
+    fn_arg.setName(in_args[i].name);
+    ++i;
+  }
+  return function;
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/function_ir_builder.h b/cpp/src/gandiva/function_ir_builder.h
new file mode 100644
index 0000000000000..7d6003a62d5bf
--- /dev/null
+++ b/cpp/src/gandiva/function_ir_builder.h
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef GANDIVA_FUNCTION_IR_BUILDER_H
+#define GANDIVA_FUNCTION_IR_BUILDER_H
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gandiva/engine.h"
+#include "gandiva/gandiva_aliases.h"
+#include "gandiva/llvm_types.h"
+
+namespace gandiva {
+
+/// @brief Base class for building IR functions.
+class FunctionIRBuilder {
+ public:
+  explicit FunctionIRBuilder(Engine* engine) : engine_(engine) {}
+  virtual ~FunctionIRBuilder() = default;
+
+ protected:
+  LLVMTypes* types() { return engine_->types(); }
+  llvm::Module* module() { return engine_->module(); }
+  llvm::LLVMContext* context() { return engine_->context(); }
+  llvm::IRBuilder<>* ir_builder() { return engine_->ir_builder(); }
+
+  /// Build an if-else block.
+  llvm::Value* BuildIfElse(llvm::Value* condition, llvm::Type* return_type,
+                           std::function<llvm::Value*()> then_func,
+                           std::function<llvm::Value*()> else_func);
+
+  struct NamedArg {
+    std::string name;
+    llvm::Type* type;
+  };
+
+  /// Build llvm fn.
+  llvm::Function* BuildFunction(const std::string& function_name, llvm::Type* return_type,
+                                std::vector<NamedArg> in_args);
+
+ private:
+  Engine* engine_;
+};
+
+}  // namespace gandiva
+
+#endif  // GANDIVA_FUNCTION_IR_BUILDER_H
diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc
index 3928fbeb0edb3..43eda4dee77d7 100644
--- a/cpp/src/gandiva/function_registry.cc
+++ b/cpp/src/gandiva/function_registry.cc
@@ -16,450 +16,62 @@
 // under the License.
 
 #include "gandiva/function_registry.h"
-
+#include "gandiva/function_registry_arithmetic.h"
+#include "gandiva/function_registry_datetime.h"
+#include "gandiva/function_registry_hash.h"
+#include "gandiva/function_registry_math_ops.h"
+#include "gandiva/function_registry_string.h"
+#include "gandiva/function_registry_timestamp_arithmetic.h"
+
+#include <iterator>
+#include <utility>
 #include <vector>
 
 namespace gandiva {
 
-using arrow::binary;
-using arrow::boolean;
-using arrow::date64;
-using arrow::float32;
-using arrow::float64;
-using arrow::int16;
-using arrow::int32;
-using arrow::int64;
-using arrow::int8;
-using arrow::uint16;
-using arrow::uint32;
-using arrow::uint64;
-using arrow::uint8;
-using arrow::utf8;
-using std::vector;
-
-#define STRINGIFY(a) #a
-
-// Binary functions that :
-// - have the same input type for both params
-// - output type is same as the input type
-// - NULL handling is of type NULL_IF_NULL
-//
-// The pre-compiled fn name includes the base name & input type names. eg. add_int32_int32
-#define BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(NAME, TYPE)                             \
-  NativeFunction(#NAME, DataTypeVector{TYPE(), TYPE()}, TYPE(), kResultNullIfNull, \
-                 STRINGIFY(NAME##_##TYPE##_##TYPE))
-
-// Binary functions that :
-// - have the same input type for both params
-// - NULL handling is of type NULL_IINTERNAL
-// - can return error.
-//
-// The pre-compiled fn name includes the base name & input type names. eg. add_int32_int32
-#define BINARY_UNSAFE_NULL_IF_NULL(NAME, IN_TYPE, OUT_TYPE)                  \
-  NativeFunction(#NAME, DataTypeVector{IN_TYPE(), IN_TYPE()}, OUT_TYPE(),    \
-                 kResultNullIfNull, STRINGIFY(NAME##_##IN_TYPE##_##IN_TYPE), \
-                 NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors)
-
-#define BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL(NAME, TYPE) \
-  BINARY_UNSAFE_NULL_IF_NULL(NAME, TYPE, TYPE)
-
-// Binary functions that :
-// - have different input types, or output type
-// - NULL handling is of type NULL_IF_NULL
-//
-// The pre-compiled fn name includes the base name & input type names. eg. mod_int64_int32
-#define BINARY_GENERIC_SAFE_NULL_IF_NULL(NAME, IN_TYPE1, IN_TYPE2, OUT_TYPE) \
-  NativeFunction(#NAME, DataTypeVector{IN_TYPE1(), IN_TYPE2()}, OUT_TYPE(),  \
-                 kResultNullIfNull, STRINGIFY(NAME##_##IN_TYPE1##_##IN_TYPE2))
-
-// Binary functions that :
-// - have the same input type
-// - output type is boolean
-// - NULL handling is of type NULL_IF_NULL
-//
-// The pre-compiled fn name includes the base name & input type names.
-// eg. equal_int32_int32
-#define BINARY_RELATIONAL_SAFE_NULL_IF_NULL(NAME, TYPE)                               \
-  NativeFunction(#NAME, DataTypeVector{TYPE(), TYPE()}, boolean(), kResultNullIfNull, \
-                 STRINGIFY(NAME##_##TYPE##_##TYPE))
-
-// Unary functions that :
-// - NULL handling is of type NULL_IF_NULL
-//
-// The pre-compiled fn name includes the base name & input type name. eg. castFloat_int32
-#define UNARY_SAFE_NULL_IF_NULL(NAME, IN_TYPE, OUT_TYPE)                          \
-  NativeFunction(#NAME, DataTypeVector{IN_TYPE()}, OUT_TYPE(), kResultNullIfNull, \
-                 STRINGIFY(NAME##_##IN_TYPE))
-
-// Unary functions that :
-// - NULL handling is of type NULL_NEVER
-//
-// The pre-compiled fn name includes the base name & input type name. eg. isnull_int32
-#define UNARY_SAFE_NULL_NEVER_BOOL(NAME, TYPE)                               \
-  NativeFunction(#NAME, DataTypeVector{TYPE()}, boolean(), kResultNullNever, \
-                 STRINGIFY(NAME##_##TYPE))
-
-// Unary functions that :
-// - NULL handling is of type NULL_INTERNAL
-//
-// The pre-compiled fn name includes the base name & input type name. eg. castFloat_int32
-#define UNARY_UNSAFE_NULL_IF_NULL(NAME, IN_TYPE, OUT_TYPE)                        \
-  NativeFunction(#NAME, DataTypeVector{IN_TYPE()}, OUT_TYPE(), kResultNullIfNull, \
-                 STRINGIFY(NAME##_##IN_TYPE),                                     \
-                 NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors)
-
-// Binary functions that :
-// - NULL handling is of type NULL_NEVER
-//
-// The pre-compiled fn name includes the base name & input type names,
-// eg. is_distinct_from_int32_int32
-#define BINARY_SAFE_NULL_NEVER_BOOL(NAME, TYPE)                                      \
-  NativeFunction(#NAME, DataTypeVector{TYPE(), TYPE()}, boolean(), kResultNullNever, \
-                 STRINGIFY(NAME##_##TYPE##_##TYPE))
-
-// Extract functions (used with data/time types) that :
-// - NULL handling is of type NULL_IF_NULL
-//
-// The pre-compiled fn name includes the base name & input type name. eg. extractYear_date
-#define EXTRACT_SAFE_NULL_IF_NULL(NAME, TYPE)                               \
-  NativeFunction(#NAME, DataTypeVector{TYPE()}, int64(), kResultNullIfNull, \
-                 STRINGIFY(NAME##_##TYPE))
-
-// Hash32 functions that :
-// - NULL handling is of type NULL_NEVER
-//
-// The pre-compiled fn name includes the base name & input type name. hash32_int8
-#define HASH32_SAFE_NULL_NEVER(NAME, TYPE)                                 \
-  NativeFunction(#NAME, DataTypeVector{TYPE()}, int32(), kResultNullNever, \
-                 STRINGIFY(NAME##_##TYPE))
-
-// Hash32 functions that :
-// - NULL handling is of type NULL_NEVER
-//
-// The pre-compiled fn name includes the base name & input type name. hash32_int8
-#define HASH64_SAFE_NULL_NEVER(NAME, TYPE)                                 \
-  NativeFunction(#NAME, DataTypeVector{TYPE()}, int64(), kResultNullNever, \
-                 STRINGIFY(NAME##_##TYPE))
-
-// Hash32 functions with seed that :
-// - NULL handling is of type NULL_NEVER
-//
-// The pre-compiled fn name includes the base name & input type name. hash32WithSeed_int8
-#define HASH32_SEED_SAFE_NULL_NEVER(NAME, TYPE)                                     \
-  NativeFunction(#NAME, DataTypeVector{TYPE(), int32()}, int32(), kResultNullNever, \
-                 STRINGIFY(NAME##WithSeed_##TYPE))
-
-// Hash64 functions with seed that :
-// - NULL handling is of type NULL_NEVER
-//
-// The pre-compiled fn name includes the base name & input type name. hash32WithSeed_int8
-#define HASH64_SEED_SAFE_NULL_NEVER(NAME, TYPE)                                     \
-  NativeFunction(#NAME, DataTypeVector{TYPE(), int64()}, int64(), kResultNullNever, \
-                 STRINGIFY(NAME##WithSeed_##TYPE))
-
-// Iterate the inner macro over all numeric types
-#define NUMERIC_TYPES(INNER, NAME)                                                       \
-  INNER(NAME, int8), INNER(NAME, int16), INNER(NAME, int32), INNER(NAME, int64),         \
-      INNER(NAME, uint8), INNER(NAME, uint16), INNER(NAME, uint32), INNER(NAME, uint64), \
-      INNER(NAME, float32), INNER(NAME, float64)
-
-// Iterate the inner macro over numeric and date/time types
-#define NUMERIC_DATE_TYPES(INNER, NAME) \
-  NUMERIC_TYPES(INNER, NAME), DATE_TYPES(INNER, NAME), TIME_TYPES(INNER, NAME)
-
-// Iterate the inner macro over all date types
-#define DATE_TYPES(INNER, NAME) INNER(NAME, date64), INNER(NAME, timestamp)
-
-// Iterate the inner macro over all time types
-#define TIME_TYPES(INNER, NAME) INNER(NAME, time32)
-
-// Iterate the inner macro over all data types
-#define VAR_LEN_TYPES(INNER, NAME) INNER(NAME, utf8), INNER(NAME, binary)
-
-// Iterate the inner macro over all numeric types, date types and bool type
-#define NUMERIC_BOOL_DATE_TYPES(INNER, NAME) \
-  NUMERIC_DATE_TYPES(INNER, NAME), INNER(NAME, boolean)
-
-// Iterate the inner macro over all numeric types, date types, bool and varlen types
-#define NUMERIC_BOOL_DATE_VAR_LEN_TYPES(INNER, NAME) \
-  NUMERIC_BOOL_DATE_TYPES(INNER, NAME), VAR_LEN_TYPES(INNER, NAME)
-
-// list of registered native functions.
-NativeFunction FunctionRegistry::pc_registry_[] = {
-    // Arithmetic operations
-    NUMERIC_TYPES(BINARY_SYMMETRIC_SAFE_NULL_IF_NULL, add),
-    NUMERIC_TYPES(BINARY_SYMMETRIC_SAFE_NULL_IF_NULL, subtract),
-    NUMERIC_TYPES(BINARY_SYMMETRIC_SAFE_NULL_IF_NULL, multiply),
-    NUMERIC_TYPES(BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL, divide),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(mod, int64, int32, int32),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(mod, int64, int64, int64),
-    NUMERIC_BOOL_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, equal),
-    NUMERIC_BOOL_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, not_equal),
-    NUMERIC_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, less_than),
-    NUMERIC_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, less_than_or_equal_to),
-    NUMERIC_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, greater_than),
-    NUMERIC_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, greater_than_or_equal_to),
-    UNARY_SAFE_NULL_IF_NULL(not, boolean, boolean),
-
-    // cast operations
-    UNARY_SAFE_NULL_IF_NULL(castBIGINT, int32, int64),
-    UNARY_SAFE_NULL_IF_NULL(castFLOAT4, int32, float32),
-    UNARY_SAFE_NULL_IF_NULL(castFLOAT4, int64, float32),
-    UNARY_SAFE_NULL_IF_NULL(castFLOAT8, int32, float64),
-    UNARY_SAFE_NULL_IF_NULL(castFLOAT8, int64, float64),
-    UNARY_SAFE_NULL_IF_NULL(castFLOAT8, float32, float64),
-    UNARY_SAFE_NULL_IF_NULL(castDATE, int64, date64),
-
-    // extended math ops
-    UNARY_SAFE_NULL_IF_NULL(cbrt, int32, float64),
-    UNARY_SAFE_NULL_IF_NULL(cbrt, int64, float64),
-    UNARY_SAFE_NULL_IF_NULL(cbrt, uint32, float64),
-    UNARY_SAFE_NULL_IF_NULL(cbrt, uint64, float64),
-    UNARY_SAFE_NULL_IF_NULL(cbrt, float32, float64),
-    UNARY_SAFE_NULL_IF_NULL(cbrt, float64, float64),
-
-    UNARY_SAFE_NULL_IF_NULL(exp, int32, float64),
-    UNARY_SAFE_NULL_IF_NULL(exp, int64, float64),
-    UNARY_SAFE_NULL_IF_NULL(exp, uint32, float64),
-    UNARY_SAFE_NULL_IF_NULL(exp, uint64, float64),
-    UNARY_SAFE_NULL_IF_NULL(exp, float32, float64),
-    UNARY_SAFE_NULL_IF_NULL(exp, float64, float64),
-
-    UNARY_SAFE_NULL_IF_NULL(log, int32, float64),
-    UNARY_SAFE_NULL_IF_NULL(log, int64, float64),
-    UNARY_SAFE_NULL_IF_NULL(log, uint32, float64),
-    UNARY_SAFE_NULL_IF_NULL(log, uint64, float64),
-    UNARY_SAFE_NULL_IF_NULL(log, float32, float64),
-    UNARY_SAFE_NULL_IF_NULL(log, float64, float64),
-
-    UNARY_SAFE_NULL_IF_NULL(log10, int32, float64),
-    UNARY_SAFE_NULL_IF_NULL(log10, int64, float64),
-    UNARY_SAFE_NULL_IF_NULL(log10, uint32, float64),
-    UNARY_SAFE_NULL_IF_NULL(log10, uint64, float64),
-    UNARY_SAFE_NULL_IF_NULL(log10, float32, float64),
-    UNARY_SAFE_NULL_IF_NULL(log10, float64, float64),
-
-    BINARY_UNSAFE_NULL_IF_NULL(log, int32, float64),
-    BINARY_UNSAFE_NULL_IF_NULL(log, int64, float64),
-    BINARY_UNSAFE_NULL_IF_NULL(log, uint32, float64),
-    BINARY_UNSAFE_NULL_IF_NULL(log, uint64, float64),
-    BINARY_UNSAFE_NULL_IF_NULL(log, float32, float64),
-    BINARY_UNSAFE_NULL_IF_NULL(log, float64, float64),
-
-    BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(power, float64),
-
-    // nullable never operations
-    NUMERIC_BOOL_DATE_TYPES(UNARY_SAFE_NULL_NEVER_BOOL, isnull),
-    NUMERIC_BOOL_DATE_TYPES(UNARY_SAFE_NULL_NEVER_BOOL, isnotnull),
-    NUMERIC_TYPES(UNARY_SAFE_NULL_NEVER_BOOL, isnumeric),
-
-    // nullable never binary operations
-    NUMERIC_BOOL_DATE_TYPES(BINARY_SAFE_NULL_NEVER_BOOL, is_distinct_from),
-    NUMERIC_BOOL_DATE_TYPES(BINARY_SAFE_NULL_NEVER_BOOL, is_not_distinct_from),
-
-    // date/timestamp operations
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractMillennium),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractCentury),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractDecade),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractYear),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractDoy),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractQuarter),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractMonth),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractWeek),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractDow),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractDay),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractHour),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractMinute),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractSecond),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractEpoch),
-
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, date64, date64, float64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, timestamp, timestamp, float64),
-
-    // date_trunc operations on date/timestamp
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Millennium),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Century),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Decade),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Year),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Quarter),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Month),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Week),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Day),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Hour),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Minute),
-    DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Second),
-
-    // time operations
-    TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractHour),
-    TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractMinute),
-    TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractSecond),
-
-    // timestamp diff operations
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampdiffSecond, timestamp, timestamp, int32),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampdiffMinute, timestamp, timestamp, int32),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampdiffHour, timestamp, timestamp, int32),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampdiffDay, timestamp, timestamp, int32),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampdiffWeek, timestamp, timestamp, int32),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampdiffMonth, timestamp, timestamp, int32),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampdiffQuarter, timestamp, timestamp, int32),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampdiffYear, timestamp, timestamp, int32),
-
-    // timestamp add int32 operations
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddSecond, timestamp, int32, timestamp),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddMinute, timestamp, int32, timestamp),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddHour, timestamp, int32, timestamp),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddDay, timestamp, int32, timestamp),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddWeek, timestamp, int32, timestamp),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddMonth, timestamp, int32, timestamp),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddQuarter, timestamp, int32, timestamp),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddYear, timestamp, int32, timestamp),
-    // date add int32 operations
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddSecond, date64, int32, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddMinute, date64, int32, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddHour, date64, int32, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddDay, date64, int32, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddWeek, date64, int32, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddMonth, date64, int32, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddQuarter, date64, int32, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddYear, date64, int32, date64),
-
-    // timestamp add int64 operations
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddSecond, timestamp, int64, timestamp),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddMinute, timestamp, int64, timestamp),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddHour, timestamp, int64, timestamp),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddDay, timestamp, int64, timestamp),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddWeek, timestamp, int64, timestamp),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddMonth, timestamp, int64, timestamp),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddQuarter, timestamp, int64, timestamp),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddYear, timestamp, int64, timestamp),
-    // date add int64 operations
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddSecond, date64, int64, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddMinute, date64, int64, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddHour, date64, int64, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddDay, date64, int64, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddWeek, date64, int64, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddMonth, date64, int64, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddQuarter, date64, int64, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddYear, date64, int64, date64),
-
-    // date_add(date64, int32), date_add(timestamp, int32)
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(date_add, date64, int32, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(add, date64, int32, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(date_add, timestamp, int32, timestamp),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(add, timestamp, int32, timestamp),
-
-    // date_add(date64, int64), date_add(timestamp, int64)
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(date_add, date64, int64, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(add, date64, int64, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(date_add, timestamp, int64, timestamp),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(add, timestamp, int64, timestamp),
-
-    // date_add(int32, date64), date_add(int32, timestamp)
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(date_add, int32, date64, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(add, int32, date64, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(date_add, int32, timestamp, timestamp),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(add, int32, timestamp, timestamp),
-
-    // date_add(int64, date64), date_add(int64, timestamp)
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(date_add, int64, date64, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(add, int64, date64, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(date_add, int64, timestamp, timestamp),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(add, int64, timestamp, timestamp),
-
-    // date_sub(date64, int32), subtract and date_diff
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(date_sub, date64, int32, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(subtract, date64, int32, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(date_diff, date64, int32, date64),
-    // date_sub(timestamp, int32), subtract and date_diff
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(date_sub, timestamp, int32, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(subtract, timestamp, int32, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(date_diff, timestamp, int32, date64),
-
-    // date_sub(date64, int64), subtract and date_diff
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(date_sub, date64, int64, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(subtract, date64, int64, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(date_diff, date64, int64, date64),
-    // date_sub(timestamp, int64), subtract and date_diff
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(date_sub, timestamp, int64, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(subtract, timestamp, int64, date64),
-    BINARY_GENERIC_SAFE_NULL_IF_NULL(date_diff, timestamp, int64, date64),
-
-    // hash functions
-    NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH32_SAFE_NULL_NEVER, hash),
-    NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH32_SAFE_NULL_NEVER, hash32),
-    NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH32_SAFE_NULL_NEVER, hash32AsDouble),
-    NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH32_SEED_SAFE_NULL_NEVER, hash32),
-    NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH32_SEED_SAFE_NULL_NEVER, hash32AsDouble),
-
-    NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH64_SAFE_NULL_NEVER, hash64),
-    NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH64_SAFE_NULL_NEVER, hash64AsDouble),
-    NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH64_SEED_SAFE_NULL_NEVER, hash64),
-    NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH64_SEED_SAFE_NULL_NEVER, hash64AsDouble),
-
-    // utf8/binary operations
-    UNARY_SAFE_NULL_IF_NULL(octet_length, utf8, int32),
-    UNARY_SAFE_NULL_IF_NULL(octet_length, binary, int32),
-    UNARY_SAFE_NULL_IF_NULL(bit_length, utf8, int32),
-    UNARY_SAFE_NULL_IF_NULL(bit_length, binary, int32),
-    UNARY_UNSAFE_NULL_IF_NULL(char_length, utf8, int32),
-    UNARY_UNSAFE_NULL_IF_NULL(length, utf8, int32),
-    UNARY_UNSAFE_NULL_IF_NULL(lengthUtf8, binary, int32),
-
-    VAR_LEN_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, equal),
-    VAR_LEN_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, not_equal),
-    VAR_LEN_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, less_than),
-    VAR_LEN_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, less_than_or_equal_to),
-    VAR_LEN_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, greater_than),
-    VAR_LEN_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, greater_than_or_equal_to),
-
-    BINARY_RELATIONAL_SAFE_NULL_IF_NULL(starts_with, utf8),
-    BINARY_RELATIONAL_SAFE_NULL_IF_NULL(ends_with, utf8),
-
-    NativeFunction("upper", DataTypeVector{utf8()}, utf8(), kResultNullIfNull,
-                   "upper_utf8", NativeFunction::kNeedsContext),
-
-    NativeFunction("like", DataTypeVector{utf8(), utf8()}, boolean(), kResultNullIfNull,
-                   "gdv_fn_like_utf8_utf8", NativeFunction::kNeedsFunctionHolder),
-
-    NativeFunction("castDATE", DataTypeVector{utf8()}, date64(), kResultNullIfNull,
-                   "castDATE_utf8",
-                   NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
-
-    NativeFunction("to_date", DataTypeVector{utf8(), utf8(), int32()}, date64(),
-                   kResultNullInternal, "gdv_fn_to_date_utf8_utf8_int32",
-                   NativeFunction::kNeedsContext | NativeFunction::kNeedsFunctionHolder |
-                       NativeFunction::kCanReturnErrors),
-};  // namespace gandiva
-
 FunctionRegistry::iterator FunctionRegistry::begin() const {
-  return std::begin(pc_registry_);
+  return &(*pc_registry_.begin());
 }
 
 FunctionRegistry::iterator FunctionRegistry::end() const {
-  return std::end(pc_registry_);
+  return &(*pc_registry_.end());
 }
 
-FunctionRegistry::SignatureMap FunctionRegistry::pc_registry_map_ = InitPCMap();
+std::vector<NativeFunction> FunctionRegistry::pc_registry_;
 
-FunctionRegistry::SignatureMap FunctionRegistry::InitPCMap() {
+SignatureMap FunctionRegistry::pc_registry_map_ = InitPCMap();
+
+SignatureMap FunctionRegistry::InitPCMap() {
   SignatureMap map;
 
-  int num_entries = static_cast<int>(sizeof(pc_registry_) / sizeof(NativeFunction));
-  for (int i = 0; i < num_entries; i++) {
-    const NativeFunction* entry = &pc_registry_[i];
+  auto v1 = GetArithmeticFunctionRegistry();
+  pc_registry_.insert(std::end(pc_registry_), v1.begin(), v1.end());
+  auto v2 = GetDateTimeFunctionRegistry();
+  pc_registry_.insert(std::end(pc_registry_), v2.begin(), v2.end());
+
+  auto v3 = GetHashFunctionRegistry();
+  pc_registry_.insert(std::end(pc_registry_), v3.begin(), v3.end());
 
-    DCHECK(map.find(&entry->signature()) == map.end());
-    map[&entry->signature()] = entry;
-    // printf("%s -> %s\n", entry->signature().ToString().c_str(),
-    //      entry->pc_name().c_str());
+  auto v4 = GetMathOpsFunctionRegistry();
+  pc_registry_.insert(std::end(pc_registry_), v4.begin(), v4.end());
+
+  auto v5 = GetStringFunctionRegistry();
+  pc_registry_.insert(std::end(pc_registry_), v5.begin(), v5.end());
+
+  auto v6 = GetDateTimeArithmeticFunctionRegistry();
+  pc_registry_.insert(std::end(pc_registry_), v6.begin(), v6.end());
+
+  for (auto& elem : pc_registry_) {
+    map.insert(std::make_pair(&(elem.signature()), &elem));
   }
+
   return map;
 }
 
 const NativeFunction* FunctionRegistry::LookupSignature(
     const FunctionSignature& signature) const {
   auto got = pc_registry_map_.find(&signature);
-  return got == pc_registry_map_.end() ? NULL : got->second;
+  return got == pc_registry_map_.end() ? nullptr : got->second;
 }
 
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/function_registry.h b/cpp/src/gandiva/function_registry.h
index 0f74089fc6d8e..f7aa3de4bb50a 100644
--- a/cpp/src/gandiva/function_registry.h
+++ b/cpp/src/gandiva/function_registry.h
@@ -18,15 +18,16 @@
 #ifndef GANDIVA_FUNCTION_REGISTRY_H
 #define GANDIVA_FUNCTION_REGISTRY_H
 
-#include <unordered_map>
-
+#include <vector>
+#include "gandiva/function_registry_common.h"
 #include "gandiva/gandiva_aliases.h"
 #include "gandiva/native_function.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
 ///\brief Registry of pre-compiled IR functions.
-class FunctionRegistry {
+class GANDIVA_EXPORT FunctionRegistry {
  public:
   using iterator = const NativeFunction*;
 
@@ -37,28 +38,9 @@ class FunctionRegistry {
   iterator end() const;
 
  private:
-  struct KeyHash {
-    std::size_t operator()(const FunctionSignature* k) const { return k->Hash(); }
-  };
-
-  struct KeyEquals {
-    bool operator()(const FunctionSignature* s1, const FunctionSignature* s2) const {
-      return *s1 == *s2;
-    }
-  };
-
-  static DataTypePtr time32() { return arrow::time32(arrow::TimeUnit::MILLI); }
-
-  static DataTypePtr time64() { return arrow::time64(arrow::TimeUnit::MICRO); }
-
-  static DataTypePtr timestamp() { return arrow::timestamp(arrow::TimeUnit::MILLI); }
-
-  typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyHash,
-                             KeyEquals>
-      SignatureMap;
   static SignatureMap InitPCMap();
 
-  static NativeFunction pc_registry_[];
+  static std::vector<NativeFunction> pc_registry_;
   static SignatureMap pc_registry_map_;
 };
 
diff --git a/cpp/src/gandiva/function_registry_arithmetic.cc b/cpp/src/gandiva/function_registry_arithmetic.cc
new file mode 100644
index 0000000000000..c5a798cb4e235
--- /dev/null
+++ b/cpp/src/gandiva/function_registry_arithmetic.cc
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/function_registry_arithmetic.h"
+#include "gandiva/function_registry_common.h"
+
+namespace gandiva {
+
+#define BINARY_SYMMETRIC_FN(name) NUMERIC_TYPES(BINARY_SYMMETRIC_SAFE_NULL_IF_NULL, name)
+
+#define BINARY_RELATIONAL_BOOL_FN(name) \
+  NUMERIC_BOOL_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, name)
+
+#define BINARY_RELATIONAL_BOOL_DATE_FN(name) \
+  NUMERIC_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, name)
+
+#define UNARY_OCTET_LEN_FN(name) \
+  UNARY_SAFE_NULL_IF_NULL(name, utf8, int32), UNARY_SAFE_NULL_IF_NULL(name, binary, int32)
+
+#define UNARY_CAST_TO_FLOAT64(name) UNARY_SAFE_NULL_IF_NULL(castFLOAT8, name, float64)
+
+#define UNARY_CAST_TO_FLOAT32(name) UNARY_SAFE_NULL_IF_NULL(castFLOAT4, name, float32)
+
+std::vector<NativeFunction> GetArithmeticFunctionRegistry() {
+  static std::vector<NativeFunction> arithmetic_fn_registry_ = {
+      UNARY_SAFE_NULL_IF_NULL(not, boolean, boolean),
+      UNARY_SAFE_NULL_IF_NULL(castBIGINT, int32, int64),
+
+      UNARY_CAST_TO_FLOAT32(int32),
+      UNARY_CAST_TO_FLOAT32(int64),
+
+      UNARY_CAST_TO_FLOAT64(int32),
+      UNARY_CAST_TO_FLOAT64(int64),
+      UNARY_CAST_TO_FLOAT64(float32),
+
+      UNARY_SAFE_NULL_IF_NULL(castDATE, int64, date64),
+
+      BINARY_SYMMETRIC_FN(add),
+      BINARY_SYMMETRIC_FN(subtract),
+      BINARY_SYMMETRIC_FN(multiply),
+
+      NUMERIC_TYPES(BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL, divide),
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(mod, int64, int32, int32),
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(mod, int64, int64, int64),
+
+      BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(add, decimal128),
+
+      BINARY_RELATIONAL_BOOL_FN(equal),
+      BINARY_RELATIONAL_BOOL_FN(not_equal),
+
+      BINARY_RELATIONAL_BOOL_DATE_FN(less_than),
+      BINARY_RELATIONAL_BOOL_DATE_FN(less_than_or_equal_to),
+      BINARY_RELATIONAL_BOOL_DATE_FN(greater_than),
+      BINARY_RELATIONAL_BOOL_DATE_FN(greater_than_or_equal_to),
+
+      UNARY_OCTET_LEN_FN(octet_length),
+      UNARY_OCTET_LEN_FN(bit_length),
+
+      UNARY_UNSAFE_NULL_IF_NULL(char_length, utf8, int32),
+      UNARY_UNSAFE_NULL_IF_NULL(length, utf8, int32),
+      UNARY_UNSAFE_NULL_IF_NULL(lengthUtf8, binary, int32)};
+
+  return arithmetic_fn_registry_;
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/function_registry_arithmetic.h b/cpp/src/gandiva/function_registry_arithmetic.h
new file mode 100644
index 0000000000000..e98a4e7b5b1b4
--- /dev/null
+++ b/cpp/src/gandiva/function_registry_arithmetic.h
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef GANDIVA_FUNCTION_REGISTRY_ARITHMETIC_H
+#define GANDIVA_FUNCTION_REGISTRY_ARITHMETIC_H
+
+#include <vector>
+#include "gandiva/native_function.h"
+
+namespace gandiva {
+
+std::vector<NativeFunction> GetArithmeticFunctionRegistry();
+
+}  // namespace gandiva
+
+#endif  // GANDIVA_FUNCTION_REGISTRY_ARITHMETIC_H
diff --git a/cpp/src/gandiva/function_registry_common.h b/cpp/src/gandiva/function_registry_common.h
new file mode 100644
index 0000000000000..3ae065a14769d
--- /dev/null
+++ b/cpp/src/gandiva/function_registry_common.h
@@ -0,0 +1,219 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef GANDIVA_FUNCTION_REGISTRY_COMMON_H
+#define GANDIVA_FUNCTION_REGISTRY_COMMON_H
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "gandiva/arrow.h"
+#include "gandiva/function_signature.h"
+#include "gandiva/gandiva_aliases.h"
+#include "gandiva/native_function.h"
+
+/* This is a private file, intended for internal use by gandiva & must not be included
+ * directly.
+ */
+namespace gandiva {
+
+using arrow::binary;
+using arrow::boolean;
+using arrow::date64;
+using arrow::float32;
+using arrow::float64;
+using arrow::int16;
+using arrow::int32;
+using arrow::int64;
+using arrow::int8;
+using arrow::uint16;
+using arrow::uint32;
+using arrow::uint64;
+using arrow::uint8;
+using arrow::utf8;
+using std::vector;
+
+inline DataTypePtr time32() { return arrow::time32(arrow::TimeUnit::MILLI); }
+
+inline DataTypePtr time64() { return arrow::time64(arrow::TimeUnit::MICRO); }
+
+inline DataTypePtr timestamp() { return arrow::timestamp(arrow::TimeUnit::MILLI); }
+inline DataTypePtr decimal128() { return arrow::decimal(0, 0); }
+
+struct KeyHash {
+  std::size_t operator()(const FunctionSignature* k) const { return k->Hash(); }
+};
+
+struct KeyEquals {
+  bool operator()(const FunctionSignature* s1, const FunctionSignature* s2) const {
+    return *s1 == *s2;
+  }
+};
+
+typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyHash,
+                           KeyEquals>
+    SignatureMap;
+
+// Binary functions that :
+// - have the same input type for both params
+// - output type is same as the input type
+// - NULL handling is of type NULL_IF_NULL
+//
+// The pre-compiled fn name includes the base name & input type names. eg. add_int32_int32
+#define BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(NAME, TYPE)                             \
+  NativeFunction(#NAME, DataTypeVector{TYPE(), TYPE()}, TYPE(), kResultNullIfNull, \
+                 ARROW_STRINGIFY(NAME##_##TYPE##_##TYPE))
+
+// Binary functions that :
+// - have the same input type for both params
+// - NULL handling is of type NULL_IINTERNAL
+// - can return error.
+//
+// The pre-compiled fn name includes the base name & input type names. eg. add_int32_int32
+#define BINARY_UNSAFE_NULL_IF_NULL(NAME, IN_TYPE, OUT_TYPE)                        \
+  NativeFunction(#NAME, DataTypeVector{IN_TYPE(), IN_TYPE()}, OUT_TYPE(),          \
+                 kResultNullIfNull, ARROW_STRINGIFY(NAME##_##IN_TYPE##_##IN_TYPE), \
+                 NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors)
+
+#define BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL(NAME, TYPE) \
+  BINARY_UNSAFE_NULL_IF_NULL(NAME, TYPE, TYPE)
+
+// Binary functions that :
+// - have different input types, or output type
+// - NULL handling is of type NULL_IF_NULL
+//
+// The pre-compiled fn name includes the base name & input type names. eg. mod_int64_int32
+#define BINARY_GENERIC_SAFE_NULL_IF_NULL(NAME, IN_TYPE1, IN_TYPE2, OUT_TYPE) \
+  NativeFunction(#NAME, DataTypeVector{IN_TYPE1(), IN_TYPE2()}, OUT_TYPE(),  \
+                 kResultNullIfNull, ARROW_STRINGIFY(NAME##_##IN_TYPE1##_##IN_TYPE2))
+
+// Binary functions that :
+// - have the same input type
+// - output type is boolean
+// - NULL handling is of type NULL_IF_NULL
+//
+// The pre-compiled fn name includes the base name & input type names.
+// eg. equal_int32_int32
+#define BINARY_RELATIONAL_SAFE_NULL_IF_NULL(NAME, TYPE)                               \
+  NativeFunction(#NAME, DataTypeVector{TYPE(), TYPE()}, boolean(), kResultNullIfNull, \
+                 ARROW_STRINGIFY(NAME##_##TYPE##_##TYPE))
+
+// Unary functions that :
+// - NULL handling is of type NULL_IF_NULL
+//
+// The pre-compiled fn name includes the base name & input type name. eg. castFloat_int32
+#define UNARY_SAFE_NULL_IF_NULL(NAME, IN_TYPE, OUT_TYPE)                          \
+  NativeFunction(#NAME, DataTypeVector{IN_TYPE()}, OUT_TYPE(), kResultNullIfNull, \
+                 ARROW_STRINGIFY(NAME##_##IN_TYPE))
+
+// Unary functions that :
+// - NULL handling is of type NULL_NEVER
+//
+// The pre-compiled fn name includes the base name & input type name. eg. isnull_int32
+#define UNARY_SAFE_NULL_NEVER_BOOL(NAME, TYPE)                               \
+  NativeFunction(#NAME, DataTypeVector{TYPE()}, boolean(), kResultNullNever, \
+                 ARROW_STRINGIFY(NAME##_##TYPE))
+
+// Unary functions that :
+// - NULL handling is of type NULL_INTERNAL
+//
+// The pre-compiled fn name includes the base name & input type name. eg. castFloat_int32
+#define UNARY_UNSAFE_NULL_IF_NULL(NAME, IN_TYPE, OUT_TYPE)                        \
+  NativeFunction(#NAME, DataTypeVector{IN_TYPE()}, OUT_TYPE(), kResultNullIfNull, \
+                 ARROW_STRINGIFY(NAME##_##IN_TYPE),                               \
+                 NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors)
+
+// Binary functions that :
+// - NULL handling is of type NULL_NEVER
+//
+// The pre-compiled fn name includes the base name & input type names,
+// eg. is_distinct_from_int32_int32
+#define BINARY_SAFE_NULL_NEVER_BOOL(NAME, TYPE)                                      \
+  NativeFunction(#NAME, DataTypeVector{TYPE(), TYPE()}, boolean(), kResultNullNever, \
+                 ARROW_STRINGIFY(NAME##_##TYPE##_##TYPE))
+
+// Extract functions (used with data/time types) that :
+// - NULL handling is of type NULL_IF_NULL
+//
+// The pre-compiled fn name includes the base name & input type name. eg. extractYear_date
+#define EXTRACT_SAFE_NULL_IF_NULL(NAME, TYPE)                               \
+  NativeFunction(#NAME, DataTypeVector{TYPE()}, int64(), kResultNullIfNull, \
+                 ARROW_STRINGIFY(NAME##_##TYPE))
+
+// Hash32 functions that :
+// - NULL handling is of type NULL_NEVER
+//
+// The pre-compiled fn name includes the base name & input type name. hash32_int8
+#define HASH32_SAFE_NULL_NEVER(NAME, TYPE)                                 \
+  NativeFunction(#NAME, DataTypeVector{TYPE()}, int32(), kResultNullNever, \
+                 ARROW_STRINGIFY(NAME##_##TYPE))
+
+// Hash32 functions that :
+// - NULL handling is of type NULL_NEVER
+//
+// The pre-compiled fn name includes the base name & input type name. hash32_int8
+#define HASH64_SAFE_NULL_NEVER(NAME, TYPE)                                 \
+  NativeFunction(#NAME, DataTypeVector{TYPE()}, int64(), kResultNullNever, \
+                 ARROW_STRINGIFY(NAME##_##TYPE))
+
+// Hash32 functions with seed that :
+// - NULL handling is of type NULL_NEVER
+//
+// The pre-compiled fn name includes the base name & input type name. hash32WithSeed_int8
+#define HASH32_SEED_SAFE_NULL_NEVER(NAME, TYPE)                                     \
+  NativeFunction(#NAME, DataTypeVector{TYPE(), int32()}, int32(), kResultNullNever, \
+                 ARROW_STRINGIFY(NAME##WithSeed_##TYPE))
+
+// Hash64 functions with seed that :
+// - NULL handling is of type NULL_NEVER
+//
+// The pre-compiled fn name includes the base name & input type name. hash32WithSeed_int8
+#define HASH64_SEED_SAFE_NULL_NEVER(NAME, TYPE)                                     \
+  NativeFunction(#NAME, DataTypeVector{TYPE(), int64()}, int64(), kResultNullNever, \
+                 ARROW_STRINGIFY(NAME##WithSeed_##TYPE))
+
+// Iterate the inner macro over all numeric types
+#define NUMERIC_TYPES(INNER, NAME)                                                       \
+  INNER(NAME, int8), INNER(NAME, int16), INNER(NAME, int32), INNER(NAME, int64),         \
+      INNER(NAME, uint8), INNER(NAME, uint16), INNER(NAME, uint32), INNER(NAME, uint64), \
+      INNER(NAME, float32), INNER(NAME, float64)
+
+// Iterate the inner macro over numeric and date/time types
+#define NUMERIC_DATE_TYPES(INNER, NAME) \
+  NUMERIC_TYPES(INNER, NAME), DATE_TYPES(INNER, NAME), TIME_TYPES(INNER, NAME)
+
+// Iterate the inner macro over all date types
+#define DATE_TYPES(INNER, NAME) INNER(NAME, date64), INNER(NAME, timestamp)
+
+// Iterate the inner macro over all time types
+#define TIME_TYPES(INNER, NAME) INNER(NAME, time32)
+
+// Iterate the inner macro over all data types
+#define VAR_LEN_TYPES(INNER, NAME) INNER(NAME, utf8), INNER(NAME, binary)
+
+// Iterate the inner macro over all numeric types, date types and bool type
+#define NUMERIC_BOOL_DATE_TYPES(INNER, NAME) \
+  NUMERIC_DATE_TYPES(INNER, NAME), INNER(NAME, boolean)
+
+// Iterate the inner macro over all numeric types, date types, bool and varlen types
+#define NUMERIC_BOOL_DATE_VAR_LEN_TYPES(INNER, NAME) \
+  NUMERIC_BOOL_DATE_TYPES(INNER, NAME), VAR_LEN_TYPES(INNER, NAME)
+
+}  // namespace gandiva
+
+#endif
diff --git a/cpp/src/gandiva/function_registry_datetime.cc b/cpp/src/gandiva/function_registry_datetime.cc
new file mode 100644
index 0000000000000..f36e5678c0d73
--- /dev/null
+++ b/cpp/src/gandiva/function_registry_datetime.cc
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/function_registry_datetime.h"
+#include "gandiva/function_registry_common.h"
+
+namespace gandiva {
+
+#define DATE_EXTRACTION_FNS(name)                           \
+  DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Millennium),  \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Century), \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Decade),  \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Year),    \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Quarter), \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Month),   \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Week),    \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Day),     \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Hour),    \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Minute),  \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Second)
+
+#define TIME_EXTRACTION_FNS(name)                          \
+  TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Hour),       \
+      TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Minute), \
+      TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Second)
+
+std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
+  static std::vector<NativeFunction> date_time_fn_registry_ = {
+      DATE_EXTRACTION_FNS(extract),
+      DATE_EXTRACTION_FNS(date_trunc_),
+
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractDoy),
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractDow),
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractEpoch),
+
+      TIME_EXTRACTION_FNS(extract),
+
+      NativeFunction("castDATE", DataTypeVector{utf8()}, date64(), kResultNullIfNull,
+                     "castDATE_utf8",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castTIMESTAMP", DataTypeVector{utf8()}, timestamp(),
+                     kResultNullIfNull, "castTIMESTAMP_utf8",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("to_date", DataTypeVector{utf8(), utf8(), int32()}, date64(),
+                     kResultNullInternal, "gdv_fn_to_date_utf8_utf8_int32",
+                     NativeFunction::kNeedsContext |
+                         NativeFunction::kNeedsFunctionHolder |
+                         NativeFunction::kCanReturnErrors)};
+
+  return date_time_fn_registry_;
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/function_registry_datetime.h b/cpp/src/gandiva/function_registry_datetime.h
new file mode 100644
index 0000000000000..c9b88942215d8
--- /dev/null
+++ b/cpp/src/gandiva/function_registry_datetime.h
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef GANDIVA_FUNCTION_REGISTRY_DATE_TIME_H
+#define GANDIVA_FUNCTION_REGISTRY_DATE_TIME_H
+
+#include <vector>
+#include "gandiva/native_function.h"
+
+namespace gandiva {
+
+std::vector<NativeFunction> GetDateTimeFunctionRegistry();
+
+}  // namespace gandiva
+
+#endif  // GANDIVA_FUNCTION_REGISTRY_DATE_TIME_H
diff --git a/cpp/src/gandiva/function_registry_hash.cc b/cpp/src/gandiva/function_registry_hash.cc
new file mode 100644
index 0000000000000..a163a230eaca3
--- /dev/null
+++ b/cpp/src/gandiva/function_registry_hash.cc
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/function_registry_hash.h"
+#include "gandiva/function_registry_common.h"
+
+namespace gandiva {
+
+#define HASH32_SAFE_NULL_NEVER_FN(name) \
+  NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH32_SAFE_NULL_NEVER, name)
+
+#define HASH32_SEED_SAFE_NULL_NEVER_FN(name) \
+  NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH32_SEED_SAFE_NULL_NEVER, name)
+
+#define HASH64_SAFE_NULL_NEVER_FN(name) \
+  NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH64_SAFE_NULL_NEVER, name)
+
+#define HASH64_SEED_SAFE_NULL_NEVER_FN(name) \
+  NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH64_SEED_SAFE_NULL_NEVER, name)
+
+std::vector<NativeFunction> GetHashFunctionRegistry() {
+  static std::vector<NativeFunction> hash_fn_registry_ = {
+      HASH32_SAFE_NULL_NEVER_FN(hash),
+      HASH32_SAFE_NULL_NEVER_FN(hash32),
+      HASH32_SAFE_NULL_NEVER_FN(hash32AsDouble),
+
+      HASH32_SEED_SAFE_NULL_NEVER_FN(hash32),
+      HASH32_SEED_SAFE_NULL_NEVER_FN(hash32AsDouble),
+
+      HASH64_SAFE_NULL_NEVER_FN(hash64),
+      HASH64_SAFE_NULL_NEVER_FN(hash64AsDouble),
+
+      HASH64_SEED_SAFE_NULL_NEVER_FN(hash64),
+      HASH64_SEED_SAFE_NULL_NEVER_FN(hash64AsDouble)};
+
+  return hash_fn_registry_;
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/function_registry_hash.h b/cpp/src/gandiva/function_registry_hash.h
new file mode 100644
index 0000000000000..dc02cb21e37b5
--- /dev/null
+++ b/cpp/src/gandiva/function_registry_hash.h
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef GANDIVA_FUNCTION_REGISTRY_HASH_H
+#define GANDIVA_FUNCTION_REGISTRY_HASH_H
+
+#include <vector>
+#include "gandiva/native_function.h"
+
+namespace gandiva {
+
+std::vector<NativeFunction> GetHashFunctionRegistry();
+
+}  // namespace gandiva
+
+#endif  // GANDIVA_FUNCTION_REGISTRY_HASH_H
diff --git a/cpp/src/gandiva/function_registry_math_ops.cc b/cpp/src/gandiva/function_registry_math_ops.cc
new file mode 100644
index 0000000000000..31b4b13119a86
--- /dev/null
+++ b/cpp/src/gandiva/function_registry_math_ops.cc
@@ -0,0 +1,67 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/function_registry_math_ops.h"
+#include "gandiva/function_registry_common.h"
+
+namespace gandiva {
+
+#define MATH_UNARY_OPS(name)                           \
+  UNARY_SAFE_NULL_IF_NULL(name, int32, float64),       \
+      UNARY_SAFE_NULL_IF_NULL(name, int64, float64),   \
+      UNARY_SAFE_NULL_IF_NULL(name, uint32, float64),  \
+      UNARY_SAFE_NULL_IF_NULL(name, uint64, float64),  \
+      UNARY_SAFE_NULL_IF_NULL(name, float32, float64), \
+      UNARY_SAFE_NULL_IF_NULL(name, float64, float64)
+
+#define MATH_BINARY_UNSAFE(name)                          \
+  BINARY_UNSAFE_NULL_IF_NULL(name, int32, float64),       \
+      BINARY_UNSAFE_NULL_IF_NULL(name, int64, float64),   \
+      BINARY_UNSAFE_NULL_IF_NULL(name, uint32, float64),  \
+      BINARY_UNSAFE_NULL_IF_NULL(name, uint64, float64),  \
+      BINARY_UNSAFE_NULL_IF_NULL(name, float32, float64), \
+      BINARY_UNSAFE_NULL_IF_NULL(name, float64, float64)
+
+#define UNARY_SAFE_NULL_NEVER_BOOL_FN(name) \
+  NUMERIC_BOOL_DATE_TYPES(UNARY_SAFE_NULL_NEVER_BOOL, name)
+
+#define BINARY_SAFE_NULL_NEVER_BOOL_FN(name) \
+  NUMERIC_BOOL_DATE_TYPES(BINARY_SAFE_NULL_NEVER_BOOL, name)
+
+std::vector<NativeFunction> GetMathOpsFunctionRegistry() {
+  static std::vector<NativeFunction> math_fn_registry_ = {
+      MATH_UNARY_OPS(cbrt),
+      MATH_UNARY_OPS(exp),
+      MATH_UNARY_OPS(log),
+      MATH_UNARY_OPS(log10),
+
+      MATH_BINARY_UNSAFE(log),
+
+      BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(power, float64),
+
+      UNARY_SAFE_NULL_NEVER_BOOL_FN(isnull),
+      UNARY_SAFE_NULL_NEVER_BOOL_FN(isnotnull),
+
+      NUMERIC_TYPES(UNARY_SAFE_NULL_NEVER_BOOL, isnumeric),
+
+      BINARY_SAFE_NULL_NEVER_BOOL_FN(is_distinct_from),
+      BINARY_SAFE_NULL_NEVER_BOOL_FN(is_not_distinct_from)};
+
+  return math_fn_registry_;
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/function_registry_math_ops.h b/cpp/src/gandiva/function_registry_math_ops.h
new file mode 100644
index 0000000000000..0204ffc8809ac
--- /dev/null
+++ b/cpp/src/gandiva/function_registry_math_ops.h
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef GANDIVA_FUNCTION_REGISTRY_MATHOPS_H
+#define GANDIVA_FUNCTION_REGISTRY_MATHOPS_H
+
+#include <vector>
+#include "gandiva/native_function.h"
+
+namespace gandiva {
+
+std::vector<NativeFunction> GetMathOpsFunctionRegistry();
+
+}  // namespace gandiva
+
+#endif  // GANDIVA_FUNCTION_REGISTRY_MATHOPS_H
diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
new file mode 100644
index 0000000000000..c97925af9cbb3
--- /dev/null
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/function_registry_string.h"
+#include "gandiva/function_registry_common.h"
+
+namespace gandiva {
+
+#define BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(name) \
+  VAR_LEN_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, name)
+
+#define BINARY_RELATIONAL_SAFE_NULL_IF_NULL_UTF8_FN(name) \
+  BINARY_RELATIONAL_SAFE_NULL_IF_NULL(name, utf8)
+
+std::vector<NativeFunction> GetStringFunctionRegistry() {
+  static std::vector<NativeFunction> string_fn_registry_ = {
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(equal),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(not_equal),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(less_than),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(less_than_or_equal_to),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(greater_than),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(greater_than_or_equal_to),
+
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_UTF8_FN(starts_with),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_UTF8_FN(ends_with),
+
+      NativeFunction("upper", DataTypeVector{utf8()}, utf8(), kResultNullIfNull,
+                     "upper_utf8", NativeFunction::kNeedsContext),
+
+      NativeFunction("like", DataTypeVector{utf8(), utf8()}, boolean(), kResultNullIfNull,
+                     "gdv_fn_like_utf8_utf8", NativeFunction::kNeedsFunctionHolder)};
+
+  return string_fn_registry_;
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/function_registry_string.h b/cpp/src/gandiva/function_registry_string.h
new file mode 100644
index 0000000000000..c9217893e5c0b
--- /dev/null
+++ b/cpp/src/gandiva/function_registry_string.h
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef GANDIVA_FUNCTION_REGISTRY_STRING_H
+#define GANDIVA_FUNCTION_REGISTRY_STRING_H
+
+#include <vector>
+#include "gandiva/native_function.h"
+
+namespace gandiva {
+
+std::vector<NativeFunction> GetStringFunctionRegistry();
+
+}  // namespace gandiva
+
+#endif  // GANDIVA_FUNCTION_REGISTRY_STRING_H
diff --git a/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc b/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc
new file mode 100644
index 0000000000000..7af76909b7d8f
--- /dev/null
+++ b/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/function_registry_timestamp_arithmetic.h"
+#include "gandiva/function_registry_common.h"
+
+namespace gandiva {
+
+#define TIMESTAMP_ADD_FNS(name)                                            \
+  BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, int32, timestamp),     \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, date64, int32, date64),       \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, int64, timestamp), \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, date64, int64, date64)
+
+#define TIMESTAMP_DIFF_FN(name) \
+  BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, timestamp, int32)
+
+#define DATE_ADD_FNS(name)                                                 \
+  BINARY_GENERIC_SAFE_NULL_IF_NULL(name, date64, int32, date64),           \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, int32, timestamp), \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, date64, int64, date64),       \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, int64, timestamp), \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int32, date64, date64),       \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int32, timestamp, timestamp), \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int64, date64, date64),       \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int64, timestamp, timestamp)
+
+#define DATE_DIFF_FNS(name)                                             \
+  BINARY_GENERIC_SAFE_NULL_IF_NULL(name, date64, int32, date64),        \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, int32, date64), \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, date64, int64, date64),    \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, int64, date64)
+
+std::vector<NativeFunction> GetDateTimeArithmeticFunctionRegistry() {
+  static std::vector<NativeFunction> datetime_fn_registry_ = {
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, date64, date64, float64),
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, timestamp, timestamp, float64),
+
+      TIMESTAMP_DIFF_FN(timestampdiffSecond),
+      TIMESTAMP_DIFF_FN(timestampdiffMinute),
+      TIMESTAMP_DIFF_FN(timestampdiffHour),
+      TIMESTAMP_DIFF_FN(timestampdiffDay),
+      TIMESTAMP_DIFF_FN(timestampdiffWeek),
+      TIMESTAMP_DIFF_FN(timestampdiffMonth),
+      TIMESTAMP_DIFF_FN(timestampdiffQuarter),
+      TIMESTAMP_DIFF_FN(timestampdiffYear),
+
+      TIMESTAMP_ADD_FNS(timestampaddSecond),
+      TIMESTAMP_ADD_FNS(timestampaddMinute),
+      TIMESTAMP_ADD_FNS(timestampaddHour),
+      TIMESTAMP_ADD_FNS(timestampaddDay),
+      TIMESTAMP_ADD_FNS(timestampaddWeek),
+      TIMESTAMP_ADD_FNS(timestampaddMonth),
+      TIMESTAMP_ADD_FNS(timestampaddQuarter),
+      TIMESTAMP_ADD_FNS(timestampaddYear),
+
+      DATE_ADD_FNS(date_add),
+      DATE_ADD_FNS(add),
+
+      DATE_DIFF_FNS(date_sub),
+      DATE_DIFF_FNS(subtract),
+      DATE_DIFF_FNS(date_diff)};
+
+  return datetime_fn_registry_;
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/function_registry_timestamp_arithmetic.h b/cpp/src/gandiva/function_registry_timestamp_arithmetic.h
new file mode 100644
index 0000000000000..f1b97093663ba
--- /dev/null
+++ b/cpp/src/gandiva/function_registry_timestamp_arithmetic.h
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef GANDIVA_FUNCTION_REGISTRY_TIMESTAMP_ARITHMETIC_H
+#define GANDIVA_FUNCTION_REGISTRY_TIMESTAMP_ARITHMETIC_H
+
+#include <vector>
+#include "gandiva/native_function.h"
+
+namespace gandiva {
+
+std::vector<NativeFunction> GetDateTimeArithmeticFunctionRegistry();
+
+}  // namespace gandiva
+
+#endif  // GANDIVA_FUNCTION_REGISTRY_TIMESTAMP_ARITHMETIC_H
diff --git a/cpp/src/gandiva/function_signature.h b/cpp/src/gandiva/function_signature.h
index e5dff245b158f..a5015ce43ec75 100644
--- a/cpp/src/gandiva/function_signature.h
+++ b/cpp/src/gandiva/function_signature.h
@@ -24,12 +24,13 @@
 
 #include "gandiva/arrow.h"
 #include "gandiva/logging.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
 /// \brief Signature for a function : includes the base name, input param types and
 /// output types.
-class FunctionSignature {
+class GANDIVA_EXPORT FunctionSignature {
  public:
   FunctionSignature(const std::string& base_name, const DataTypeVector& param_types,
                     DataTypePtr ret_type)
@@ -56,10 +57,22 @@ class FunctionSignature {
   std::string ToString() const;
 
  private:
-  // TODO : for some of the types, this shouldn't match type specific data. eg. for
-  // decimals, this shouldn't match precision/scale.
   bool DataTypeEquals(const DataTypePtr left, const DataTypePtr right) const {
-    return left->Equals(right);
+    if (left->id() == right->id()) {
+      switch (left->id()) {
+        case arrow::Type::DECIMAL: {
+          // For decimal types, the precision/scale isn't part of the signature.
+          auto dleft = arrow::internal::checked_cast<arrow::DecimalType*>(left.get());
+          auto dright = arrow::internal::checked_cast<arrow::DecimalType*>(right.get());
+          return (dleft != NULL) && (dright != NULL) &&
+                 (dleft->byte_width() == dright->byte_width());
+        }
+        default:
+          return left->Equals(right);
+      }
+    } else {
+      return false;
+    }
   }
 
   std::string base_name_;
diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h
index 154e80b8feecc..8f940cee0f46f 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -43,6 +43,9 @@ bool in_expr_lookup_int32(int64_t ptr, int32_t value, bool in_validity);
 bool in_expr_lookup_int64(int64_t ptr, int64_t value, bool in_validity);
 
 bool in_expr_lookup_utf8(int64_t ptr, const char* data, int data_len, bool in_validity);
+
+int gdv_fn_time_with_zone(int* time_fields, const char* zone, int zone_len,
+                          int64_t* ret_time);
 }
 
 #endif  // GDV_FUNCTION_STUBS_H
diff --git a/cpp/src/gandiva/jni/CMakeLists.txt b/cpp/src/gandiva/jni/CMakeLists.txt
index 8684fe8723de3..a07d3903a75ac 100644
--- a/cpp/src/gandiva/jni/CMakeLists.txt
+++ b/cpp/src/gandiva/jni/CMakeLists.txt
@@ -15,7 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
-project(gandiva_jni)
+if(CMAKE_VERSION VERSION_LESS 3.11)
+  message(FATAL_ERROR "Building the Gandiva JNI bindings requires CMake version >= 3.11")
+endif()
 
 # Find JNI
 find_package(JNI REQUIRED)
@@ -61,6 +63,7 @@ set(GANDIVA_JNI_SOURCES config_builder.cc
 #   cpp/src
 ADD_ARROW_LIB(gandiva_jni
   SOURCES ${GANDIVA_JNI_SOURCES}
+  OUTPUTS GANDIVA_JNI_LIBRARIES
   SHARED_PRIVATE_LINK_LIBS ${GANDIVA_LINK_LIBS}
   STATIC_LINK_LIBS ${GANDIVA_LINK_LIBS}
   DEPENDENCIES gandiva_java gandiva_jni_proto
@@ -69,6 +72,8 @@ ADD_ARROW_LIB(gandiva_jni
   ${JNI_HEADERS_DIR}
   PRIVATE_INCLUDES ${JNI_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR})
 
+add_dependencies(gandiva ${GANDIVA_JNI_LIBRARIES})
+
 # filter out everything that is not needed for the jni bridge
 # statically linked stdc++ has conflicts with stdc++ loaded by other libraries.
 if (NOT APPLE)
diff --git a/cpp/src/gandiva/jni/expression_registry_helper.cc b/cpp/src/gandiva/jni/expression_registry_helper.cc
index 5227329db472a..b5c6880a25cf1 100644
--- a/cpp/src/gandiva/jni/expression_registry_helper.cc
+++ b/cpp/src/gandiva/jni/expression_registry_helper.cc
@@ -121,10 +121,15 @@ void ArrowToProtobuf(DataTypePtr type, types::ExtGandivaType* gandiva_data_type)
     case arrow::Type::type::NA:
       gandiva_data_type->set_type(types::GandivaType::NONE);
       break;
+    case arrow::Type::type::DECIMAL: {
+      gandiva_data_type->set_type(types::GandivaType::DECIMAL);
+      gandiva_data_type->set_precision(0);
+      gandiva_data_type->set_scale(0);
+      break;
+    }
     case arrow::Type::type::FIXED_SIZE_BINARY:
     case arrow::Type::type::MAP:
     case arrow::Type::type::INTERVAL:
-    case arrow::Type::type::DECIMAL:
     case arrow::Type::type::LIST:
     case arrow::Type::type::STRUCT:
     case arrow::Type::type::UNION:
diff --git a/cpp/src/gandiva/jni/jni_common.cc b/cpp/src/gandiva/jni/jni_common.cc
index 639ad361f4a8a..339b0cd4e5a9d 100644
--- a/cpp/src/gandiva/jni/jni_common.cc
+++ b/cpp/src/gandiva/jni/jni_common.cc
@@ -381,6 +381,12 @@ NodePtr ProtoTypeToNode(const types::TreeNode& node) {
     return TreeExprBuilder::MakeBinaryLiteral(node.binarynode().value());
   }
 
+  if (node.has_decimalnode()) {
+    std::string value = node.decimalnode().value();
+    gandiva::DecimalScalar128 literal(value, node.decimalnode().precision(),
+                                      node.decimalnode().scale());
+    return TreeExprBuilder::MakeDecimalLiteral(literal);
+  }
   std::cerr << "Unknown node type in protobuf\n";
   return nullptr;
 }
diff --git a/cpp/src/gandiva/jni/symbols.map b/cpp/src/gandiva/jni/symbols.map
index a387ae1f5af6b..e0f5def41f00e 100644
--- a/cpp/src/gandiva/jni/symbols.map
+++ b/cpp/src/gandiva/jni/symbols.map
@@ -15,6 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 {
-  global: extern "C++" { gandiva*; Java*; JNI*; };
+  global: extern "C++" { gandiva*; }; Java*; JNI*;
   local: *;
 };
diff --git a/cpp/src/gandiva/like_holder.cc b/cpp/src/gandiva/like_holder.cc
index d659b22c46e34..f4bbc512e5d6a 100644
--- a/cpp/src/gandiva/like_holder.cc
+++ b/cpp/src/gandiva/like_holder.cc
@@ -50,39 +50,40 @@ const FunctionNode LikeHolder::TryOptimize(const FunctionNode& node) {
     }
   }
 
-  // didn't hit any of the optimisation paths. return original.
+  // Could not optimize, return original node.
   return node;
 }
 
+static bool IsArrowStringLiteral(arrow::Type::type type) {
+  return type == arrow::Type::STRING || type == arrow::Type::BINARY;
+}
+
 Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* holder) {
-  if (node.children().size() != 2) {
-    return Status::Invalid("'like' function requires two parameters");
-  }
+  ARROW_RETURN_IF(node.children().size() != 2,
+                  Status::Invalid("'like' function requires two parameters"));
 
   auto literal = dynamic_cast<LiteralNode*>(node.children().at(1).get());
-  if (literal == nullptr) {
-    return Status::Invalid("'like' function requires a literal as the second parameter");
-  }
+  ARROW_RETURN_IF(
+      literal == nullptr,
+      Status::Invalid("'like' function requires a literal as the second parameter"));
 
   auto literal_type = literal->return_type()->id();
-  if (literal_type != arrow::Type::STRING && literal_type != arrow::Type::BINARY) {
-    return Status::Invalid(
-        "'like' function requires a string literal as the second parameter");
-  }
-  auto pattern = boost::get<std::string>(literal->holder());
-  return Make(pattern, holder);
+  ARROW_RETURN_IF(
+      !IsArrowStringLiteral(literal_type),
+      Status::Invalid(
+          "'like' function requires a string literal as the second parameter"));
+
+  return Make(literal->holder().get<std::string>(), holder);
 }
 
 Status LikeHolder::Make(const std::string& sql_pattern,
                         std::shared_ptr<LikeHolder>* holder) {
   std::string pcre_pattern;
-  auto status = RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern);
-  ARROW_RETURN_NOT_OK(status);
+  ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern));
 
   auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern));
-  if (!lholder->regex_.ok()) {
-    return Status::Invalid("building re2 regex failed for pattern " + pcre_pattern);
-  }
+  ARROW_RETURN_IF(!lholder->regex_.ok(),
+                  Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed"));
 
   *holder = lholder;
   return Status::OK();
diff --git a/cpp/src/gandiva/like_holder.h b/cpp/src/gandiva/like_holder.h
index 23ed367e8ccf7..eab30bf732fa4 100644
--- a/cpp/src/gandiva/like_holder.h
+++ b/cpp/src/gandiva/like_holder.h
@@ -22,14 +22,17 @@
 #include <string>
 
 #include <re2/re2.h>
+
 #include "arrow/status.h"
+
 #include "gandiva/function_holder.h"
 #include "gandiva/node.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
 /// Function Holder for SQL 'like'
-class LikeHolder : public FunctionHolder {
+class GANDIVA_EXPORT LikeHolder : public FunctionHolder {
  public:
   ~LikeHolder() override = default;
 
diff --git a/cpp/src/gandiva/like_holder_test.cc b/cpp/src/gandiva/like_holder_test.cc
index 3e3cd37c4fed1..d0ce8bb595021 100644
--- a/cpp/src/gandiva/like_holder_test.cc
+++ b/cpp/src/gandiva/like_holder_test.cc
@@ -84,6 +84,16 @@ TEST_F(TestLikeHolder, TestRegexEscape) {
   EXPECT_EQ(res, "%hello_abc.def#");
 }
 
+TEST_F(TestLikeHolder, TestDot) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("abc.", &like_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+  EXPECT_FALSE(like("abcd"));
+}
+
 TEST_F(TestLikeHolder, TestOptimise) {
   // optimise for 'starts_with'
   auto fnode = LikeHolder::TryOptimize(BuildLike("xy 123z%"));
diff --git a/cpp/src/gandiva/literal_holder.h b/cpp/src/gandiva/literal_holder.h
index 0a65ea2c3e249..36afdd3c874e2 100644
--- a/cpp/src/gandiva/literal_holder.h
+++ b/cpp/src/gandiva/literal_holder.h
@@ -20,14 +20,16 @@
 
 #include <string>
 
-#include <boost/variant.hpp>
+#include <arrow/util/variant.h>
+
+#include <arrow/type.h>
+#include "gandiva/decimal_scalar.h"
 
 namespace gandiva {
 
 using LiteralHolder =
-    boost::variant<bool, float, double, int8_t, int16_t, int32_t, int64_t, uint8_t,
-                   uint16_t, uint32_t, uint64_t, std::string>;
-
+    arrow::util::variant<bool, float, double, int8_t, int16_t, int32_t, int64_t, uint8_t,
+                         uint16_t, uint32_t, uint64_t, std::string, DecimalScalar128>;
 }  // namespace gandiva
 
 #endif  // GANDIVA_LITERAL_HOLDER
diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc
index 82d0386cfb9f3..c6844cfe5dd8a 100644
--- a/cpp/src/gandiva/llvm_generator.cc
+++ b/cpp/src/gandiva/llvm_generator.cc
@@ -44,10 +44,10 @@ LLVMGenerator::LLVMGenerator()
 Status LLVMGenerator::Make(std::shared_ptr<Configuration> config,
                            std::unique_ptr<LLVMGenerator>* llvm_generator) {
   std::unique_ptr<LLVMGenerator> llvmgen_obj(new LLVMGenerator());
-  Status status = Engine::Make(config, &(llvmgen_obj->engine_));
-  ARROW_RETURN_NOT_OK(status);
 
+  ARROW_RETURN_NOT_OK(Engine::Make(config, &(llvmgen_obj->engine_)));
   *llvm_generator = std::move(llvmgen_obj);
+
   return Status::OK();
 }
 
@@ -57,33 +57,29 @@ Status LLVMGenerator::Add(const ExpressionPtr expr, const FieldDescriptorPtr out
   // decompose the expression to separate out value and validities.
   ExprDecomposer decomposer(function_registry_, annotator_);
   ValueValidityPairPtr value_validity;
-  auto status = decomposer.Decompose(*expr->root(), &value_validity);
-  ARROW_RETURN_NOT_OK(status);
+  ARROW_RETURN_NOT_OK(decomposer.Decompose(*expr->root(), &value_validity));
 
   // Generate the IR function for the decomposed expression.
   llvm::Function* ir_function = nullptr;
-  status = CodeGenExprValue(value_validity->value_expr(), output, idx, &ir_function);
-  ARROW_RETURN_NOT_OK(status);
+  ARROW_RETURN_NOT_OK(
+      CodeGenExprValue(value_validity->value_expr(), output, idx, &ir_function));
 
   std::unique_ptr<CompiledExpr> compiled_expr(
       new CompiledExpr(value_validity, output, ir_function));
   compiled_exprs_.push_back(std::move(compiled_expr));
+
   return Status::OK();
 }
 
 /// Build and optimise module for projection expression.
 Status LLVMGenerator::Build(const ExpressionVector& exprs) {
-  Status status;
-
   for (auto& expr : exprs) {
     auto output = annotator_.AddOutputFieldDescriptor(expr->result());
-    status = Add(expr, output);
-    ARROW_RETURN_NOT_OK(status);
+    ARROW_RETURN_NOT_OK(Add(expr, output));
   }
 
-  // optimise, compile and finalize the module
-  status = engine_->FinalizeModule(optimise_ir_, dump_ir_);
-  ARROW_RETURN_NOT_OK(status);
+  // Optimize, compile and finalize the module
+  ARROW_RETURN_NOT_OK(engine_->FinalizeModule(optimise_ir_, dump_ir_));
 
   // setup the jit functions for each expression.
   for (auto& compiled_expr : compiled_exprs_) {
@@ -91,6 +87,7 @@ Status LLVMGenerator::Build(const ExpressionVector& exprs) {
     EvalFunc fn = reinterpret_cast<EvalFunc>(engine_->CompiledFunction(ir_func));
     compiled_expr->set_jit_function(fn);
   }
+
   return Status::OK();
 }
 
@@ -107,13 +104,15 @@ Status LLVMGenerator::Execute(const arrow::RecordBatch& record_batch,
     EvalFunc jit_function = compiled_expr->jit_function();
     jit_function(eval_batch->GetBufferArray(), eval_batch->GetLocalBitMapArray(),
                  (int64_t)eval_batch->GetExecutionContext(), record_batch.num_rows());
-    // check for execution errors
-    if (eval_batch->GetExecutionContext()->has_error()) {
-      return Status::ExecutionError(eval_batch->GetExecutionContext()->get_error());
-    }
+
+    ARROW_RETURN_IF(
+        eval_batch->GetExecutionContext()->has_error(),
+        Status::ExecutionError(eval_batch->GetExecutionContext()->get_error()));
+
     // generate validity vectors.
     ComputeBitMapsForExpr(*compiled_expr, *eval_batch);
   }
+
   return Status::OK();
 }
 
@@ -233,8 +232,8 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, FieldDescriptorPtr out
   engine_->AddFunctionToCompile(func_name);
   *fn = llvm::Function::Create(prototype, llvm::GlobalValue::ExternalLinkage, func_name,
                                module());
-  ARROW_RETURN_FAILURE_IF_FALSE((*fn != nullptr),
-                                Status::CodeGenError("Error creating function."));
+  ARROW_RETURN_IF((*fn == nullptr), Status::CodeGenError("Error creating function."));
+
   // Name the arguments
   llvm::Function::arg_iterator args = (*fn)->arg_begin();
   llvm::Value* arg_addrs = &*args;
@@ -396,9 +395,21 @@ llvm::Value* LLVMGenerator::AddFunctionCall(const std::string& full_name,
     value = ir_builder()->CreateCall(fn, args, full_name);
     DCHECK(value->getType() == ret_type);
   }
+
   return value;
 }
 
+std::shared_ptr<DecimalLValue> LLVMGenerator::BuildDecimalLValue(llvm::Value* value,
+                                                                 DataTypePtr arrow_type) {
+  // only decimals of size 128-bit supported.
+  DCHECK(is_decimal_128(arrow_type));
+  auto decimal_type =
+      arrow::internal::checked_cast<arrow::DecimalType*>(arrow_type.get());
+  return std::make_shared<DecimalLValue>(value, nullptr,
+                                         types()->i32_constant(decimal_type->precision()),
+                                         types()->i32_constant(decimal_type->scale()));
+}
+
 #define ADD_VISITOR_TRACE(...)         \
   if (generator_->enable_ir_traces_) { \
     generator_->AddTrace(__VA_ARGS__); \
@@ -422,20 +433,33 @@ LLVMGenerator::Visitor::Visitor(LLVMGenerator* generator, llvm::Function* functi
 
 void LLVMGenerator::Visitor::Visit(const VectorReadFixedLenValueDex& dex) {
   llvm::IRBuilder<>* builder = ir_builder();
-
   llvm::Value* slot_ref = GetBufferReference(dex.DataIdx(), kBufferTypeData, dex.Field());
-
   llvm::Value* slot_value;
-  if (dex.FieldType()->id() == arrow::Type::BOOL) {
-    slot_value = generator_->GetPackedBitValue(slot_ref, loop_var_);
-  } else {
-    llvm::Value* slot_offset = builder->CreateGEP(slot_ref, loop_var_);
-    slot_value = builder->CreateLoad(slot_offset, dex.FieldName());
-  }
+  std::shared_ptr<LValue> lvalue;
 
+  switch (dex.FieldType()->id()) {
+    case arrow::Type::BOOL:
+      slot_value = generator_->GetPackedBitValue(slot_ref, loop_var_);
+      lvalue = std::make_shared<LValue>(slot_value);
+      break;
+
+    case arrow::Type::DECIMAL: {
+      auto slot_offset = builder->CreateGEP(slot_ref, loop_var_);
+      slot_value = builder->CreateLoad(slot_offset, dex.FieldName());
+      lvalue = generator_->BuildDecimalLValue(slot_value, dex.FieldType());
+      break;
+    }
+
+    default: {
+      auto slot_offset = builder->CreateGEP(slot_ref, loop_var_);
+      slot_value = builder->CreateLoad(slot_offset, dex.FieldName());
+      lvalue = std::make_shared<LValue>(slot_value);
+      break;
+    }
+  }
   ADD_VISITOR_TRACE("visit fixed-len data vector " + dex.FieldName() + " value %T",
                     slot_value);
-  result_.reset(new LValue(slot_value));
+  result_ = lvalue;
 }
 
 void LLVMGenerator::Visitor::Visit(const VectorReadVarLenValueDex& dex) {
@@ -503,52 +527,52 @@ void LLVMGenerator::Visitor::Visit(const LiteralDex& dex) {
 
   switch (dex.type()->id()) {
     case arrow::Type::BOOL:
-      value = types->i1_constant(boost::get<bool>(dex.holder()));
+      value = types->i1_constant(dex.holder().get<bool>());
       break;
 
     case arrow::Type::UINT8:
-      value = types->i8_constant(boost::get<uint8_t>(dex.holder()));
+      value = types->i8_constant(dex.holder().get<uint8_t>());
       break;
 
     case arrow::Type::UINT16:
-      value = types->i16_constant(boost::get<uint16_t>(dex.holder()));
+      value = types->i16_constant(dex.holder().get<uint16_t>());
       break;
 
     case arrow::Type::UINT32:
-      value = types->i32_constant(boost::get<uint32_t>(dex.holder()));
+      value = types->i32_constant(dex.holder().get<uint32_t>());
       break;
 
     case arrow::Type::UINT64:
-      value = types->i64_constant(boost::get<uint64_t>(dex.holder()));
+      value = types->i64_constant(dex.holder().get<uint64_t>());
       break;
 
     case arrow::Type::INT8:
-      value = types->i8_constant(boost::get<int8_t>(dex.holder()));
+      value = types->i8_constant(dex.holder().get<int8_t>());
       break;
 
     case arrow::Type::INT16:
-      value = types->i16_constant(boost::get<int16_t>(dex.holder()));
+      value = types->i16_constant(dex.holder().get<int16_t>());
       break;
 
     case arrow::Type::INT32:
-      value = types->i32_constant(boost::get<int32_t>(dex.holder()));
+      value = types->i32_constant(dex.holder().get<int32_t>());
       break;
 
     case arrow::Type::INT64:
-      value = types->i64_constant(boost::get<int64_t>(dex.holder()));
+      value = types->i64_constant(dex.holder().get<int64_t>());
       break;
 
     case arrow::Type::FLOAT:
-      value = types->float_constant(boost::get<float>(dex.holder()));
+      value = types->float_constant(dex.holder().get<float>());
       break;
 
     case arrow::Type::DOUBLE:
-      value = types->double_constant(boost::get<double>(dex.holder()));
+      value = types->double_constant(dex.holder().get<double>());
       break;
 
     case arrow::Type::STRING:
     case arrow::Type::BINARY: {
-      const std::string& str = boost::get<std::string>(dex.holder());
+      const std::string& str = dex.holder().get<std::string>();
 
       llvm::Constant* str_int_cast = types->i64_constant((int64_t)str.c_str());
       value = llvm::ConstantExpr::getIntToPtr(str_int_cast, types->i8_ptr_type());
@@ -557,21 +581,36 @@ void LLVMGenerator::Visitor::Visit(const LiteralDex& dex) {
     }
 
     case arrow::Type::DATE64:
-      value = types->i64_constant(boost::get<int64_t>(dex.holder()));
+      value = types->i64_constant(dex.holder().get<int64_t>());
       break;
 
     case arrow::Type::TIME32:
-      value = types->i32_constant(boost::get<int32_t>(dex.holder()));
+      value = types->i32_constant(dex.holder().get<int32_t>());
       break;
 
     case arrow::Type::TIME64:
-      value = types->i64_constant(boost::get<int64_t>(dex.holder()));
+      value = types->i64_constant(dex.holder().get<int64_t>());
       break;
 
     case arrow::Type::TIMESTAMP:
-      value = types->i64_constant(boost::get<int64_t>(dex.holder()));
+      value = types->i64_constant(dex.holder().get<int64_t>());
       break;
 
+    case arrow::Type::DECIMAL: {
+      // build code for struct
+      auto scalar = dex.holder().get<DecimalScalar128>();
+      // ConstantInt doesn't have a get method that takes int128 or a pair of int64. so,
+      // passing the string representation instead.
+      auto int128_value =
+          llvm::ConstantInt::get(llvm::Type::getInt128Ty(*generator_->context()),
+                                 Decimal128(scalar.value()).ToIntegerString(), 10);
+      auto type = arrow::decimal(scalar.precision(), scalar.scale());
+      auto lvalue = generator_->BuildDecimalLValue(int128_value, type);
+      // set it as the l-value and return.
+      result_ = lvalue;
+      return;
+    }
+
     default:
       DCHECK(0);
   }
@@ -589,13 +628,14 @@ void LLVMGenerator::Visitor::Visit(const NonNullableFuncDex& dex) {
   auto params = BuildParams(dex.function_holder().get(), dex.args(), false,
                             native_function->NeedsContext());
 
+  auto arrow_return_type = dex.func_descriptor()->return_type();
   if (native_function->CanReturnErrors()) {
     // slow path : if a function can return errors, skip invoking the function
     // unless all of the input args are valid. Otherwise, it can cause spurious errors.
 
     llvm::IRBuilder<>* builder = ir_builder();
     LLVMTypes* types = generator_->types();
-    auto arrow_type_id = native_function->signature().ret_type()->id();
+    auto arrow_type_id = arrow_return_type->id();
     auto result_type = types->IRType(arrow_type_id);
 
     // Build combined validity of the args.
@@ -609,7 +649,7 @@ void LLVMGenerator::Visitor::Visit(const NonNullableFuncDex& dex) {
     auto then_lambda = [&] {
       ADD_VISITOR_TRACE("fn " + function_name +
                         " can return errors : all args valid, invoke fn");
-      return BuildFunctionCall(native_function, &params);
+      return BuildFunctionCall(native_function, arrow_return_type, &params);
     };
 
     // else block
@@ -624,10 +664,10 @@ void LLVMGenerator::Visitor::Visit(const NonNullableFuncDex& dex) {
       return std::make_shared<LValue>(else_value, else_value_len);
     };
 
-    result_ = BuildIfElse(is_valid, then_lambda, else_lambda, result_type);
+    result_ = BuildIfElse(is_valid, then_lambda, else_lambda, arrow_return_type);
   } else {
     // fast path : invoke function without computing validities.
-    result_ = BuildFunctionCall(native_function, &params);
+    result_ = BuildFunctionCall(native_function, arrow_return_type, &params);
   }
 }
 
@@ -639,7 +679,8 @@ void LLVMGenerator::Visitor::Visit(const NullableNeverFuncDex& dex) {
   auto params = BuildParams(dex.function_holder().get(), dex.args(), true,
                             native_function->NeedsContext());
 
-  result_ = BuildFunctionCall(native_function, &params);
+  auto arrow_return_type = dex.func_descriptor()->return_type();
+  result_ = BuildFunctionCall(native_function, arrow_return_type, &params);
 }
 
 void LLVMGenerator::Visitor::Visit(const NullableInternalFuncDex& dex) {
@@ -659,7 +700,8 @@ void LLVMGenerator::Visitor::Visit(const NullableInternalFuncDex& dex) {
       new llvm::AllocaInst(types->i8_type(), 0, "result_valid", entry_block_);
   params.push_back(result_valid_ptr);
 
-  result_ = BuildFunctionCall(native_function, &params);
+  auto arrow_return_type = dex.func_descriptor()->return_type();
+  result_ = BuildFunctionCall(native_function, arrow_return_type, &params);
 
   // load the result validity and truncate to i1.
   llvm::Value* result_valid_i8 = builder->CreateLoad(result_valid_ptr);
@@ -672,7 +714,6 @@ void LLVMGenerator::Visitor::Visit(const NullableInternalFuncDex& dex) {
 void LLVMGenerator::Visitor::Visit(const IfDex& dex) {
   ADD_VISITOR_TRACE("visit IfExpression");
   llvm::IRBuilder<>* builder = ir_builder();
-  LLVMTypes* types = generator_->types();
 
   // Evaluate condition.
   LValuePtr if_condition = BuildValueAndValidity(dex.condition_vv());
@@ -714,9 +755,8 @@ void LLVMGenerator::Visitor::Visit(const IfDex& dex) {
   };
 
   // build the if-else condition.
-  auto result_type = types->IRType(dex.result_type()->id());
-  result_ = BuildIfElse(validAndMatched, then_lambda, else_lambda, result_type);
-  if (result_type == types->i8_ptr_type()) {
+  result_ = BuildIfElse(validAndMatched, then_lambda, else_lambda, dex.result_type());
+  if (arrow::is_binary_like(dex.result_type()->id())) {
     ADD_VISITOR_TRACE("IfElse result length %T", result_->length());
   }
   ADD_VISITOR_TRACE("IfElse result value %T", result_->data());
@@ -906,7 +946,7 @@ void LLVMGenerator::Visitor::VisitInExpression(const InExprDexBase<Type>& dex) {
 LValuePtr LLVMGenerator::Visitor::BuildIfElse(llvm::Value* condition,
                                               std::function<LValuePtr()> then_func,
                                               std::function<LValuePtr()> else_func,
-                                              llvm::Type* result_type) {
+                                              DataTypePtr result_type) {
   llvm::IRBuilder<>* builder = ir_builder();
   llvm::LLVMContext* context = generator_->context();
   LLVMTypes* types = generator_->types();
@@ -936,17 +976,31 @@ LValuePtr LLVMGenerator::Visitor::BuildIfElse(llvm::Value* condition,
 
   // Emit the merge block.
   builder->SetInsertPoint(merge_bb);
-  llvm::PHINode* result_value = builder->CreatePHI(result_type, 2, "res_value");
+  auto llvm_type = types->IRType(result_type->id());
+  llvm::PHINode* result_value = builder->CreatePHI(llvm_type, 2, "res_value");
   result_value->addIncoming(then_lvalue->data(), then_bb);
   result_value->addIncoming(else_lvalue->data(), else_bb);
 
-  llvm::PHINode* result_length = nullptr;
-  if (result_type == types->i8_ptr_type()) {
-    result_length = builder->CreatePHI(types->i32_type(), 2, "res_length");
-    result_length->addIncoming(then_lvalue->length(), then_bb);
-    result_length->addIncoming(else_lvalue->length(), else_bb);
+  LValuePtr ret;
+  switch (result_type->id()) {
+    case arrow::Type::STRING: {
+      llvm::PHINode* result_length;
+      result_length = builder->CreatePHI(types->i32_type(), 2, "res_length");
+      result_length->addIncoming(then_lvalue->length(), then_bb);
+      result_length->addIncoming(else_lvalue->length(), else_bb);
+      ret = std::make_shared<LValue>(result_value, result_length);
+      break;
+    }
+
+    case arrow::Type::DECIMAL:
+      ret = generator_->BuildDecimalLValue(result_value, result_type);
+      break;
+
+    default:
+      ret = std::make_shared<LValue>(result_value);
+      break;
   }
-  return std::make_shared<LValue>(result_value, result_length);
+  return ret;
 }
 
 LValuePtr LLVMGenerator::Visitor::BuildValueAndValidity(const ValueValidityPair& pair) {
@@ -963,25 +1017,46 @@ LValuePtr LLVMGenerator::Visitor::BuildValueAndValidity(const ValueValidityPair&
 }
 
 LValuePtr LLVMGenerator::Visitor::BuildFunctionCall(const NativeFunction* func,
+                                                    DataTypePtr arrow_return_type,
                                                     std::vector<llvm::Value*>* params) {
-  auto arrow_return_type = func->signature().ret_type()->id();
-  auto llvm_return_type = generator_->types()->IRType(arrow_return_type);
-
-  // add extra arg for return length for variable len return types (alloced on stack).
-  llvm::AllocaInst* result_len_ptr = nullptr;
-  if (arrow::is_binary_like(arrow_return_type)) {
-    result_len_ptr = new llvm::AllocaInst(generator_->types()->i32_type(), 0,
-                                          "result_len", entry_block_);
-    params->push_back(result_len_ptr);
-    has_arena_allocs_ = true;
-  }
+  auto types = generator_->types();
+  auto arrow_return_type_id = arrow_return_type->id();
+  auto llvm_return_type = types->IRType(arrow_return_type_id);
+
+  if (arrow_return_type_id == arrow::Type::DECIMAL) {
+    // For decimal fns, the output precision/scale are passed along as parameters.
+    //
+    // convert from this :
+    //     out = add_decimal(v1, p1, s1, v2, p2, s2)
+    // to:
+    //     out = add_decimal(v1, p1, s1, v2, p2, s2, out_p, out_s)
+
+    // Append the out_precision and out_scale
+    auto ret_lvalue = generator_->BuildDecimalLValue(nullptr, arrow_return_type);
+    params->push_back(ret_lvalue->precision());
+    params->push_back(ret_lvalue->scale());
+
+    // Make the function call
+    auto out = generator_->AddFunctionCall(func->pc_name(), llvm_return_type, *params);
+    ret_lvalue->set_data(out);
+    return std::move(ret_lvalue);
+  } else {
+    // add extra arg for return length for variable len return types (alloced on stack).
+    llvm::AllocaInst* result_len_ptr = nullptr;
+    if (arrow::is_binary_like(arrow_return_type_id)) {
+      result_len_ptr = new llvm::AllocaInst(generator_->types()->i32_type(), 0,
+                                            "result_len", entry_block_);
+      params->push_back(result_len_ptr);
+      has_arena_allocs_ = true;
+    }
 
-  // Make the function call
-  llvm::IRBuilder<>* builder = ir_builder();
-  auto value = generator_->AddFunctionCall(func->pc_name(), llvm_return_type, *params);
-  auto value_len =
-      (result_len_ptr == nullptr) ? nullptr : builder->CreateLoad(result_len_ptr);
-  return std::make_shared<LValue>(value, value_len);
+    // Make the function call
+    llvm::IRBuilder<>* builder = ir_builder();
+    auto value = generator_->AddFunctionCall(func->pc_name(), llvm_return_type, *params);
+    auto value_len =
+        (result_len_ptr == nullptr) ? nullptr : builder->CreateLoad(result_len_ptr);
+    return std::make_shared<LValue>(value, value_len);
+  }
 }
 
 std::vector<llvm::Value*> LLVMGenerator::Visitor::BuildParams(
@@ -1007,12 +1082,9 @@ std::vector<llvm::Value*> LLVMGenerator::Visitor::BuildParams(
     DexPtr value_expr = pair->value_expr();
     value_expr->Accept(*this);
     LValue& result_ref = *result();
-    params.push_back(result_ref.data());
 
-    // build length (for var len data types)
-    if (result_ref.length() != nullptr) {
-      params.push_back(result_ref.length());
-    }
+    // append all the parameters corresponding to this LValue.
+    result_ref.AppendFunctionParams(&params);
 
     // build validity.
     if (with_validity) {
diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h
index 49f209d280d13..2c1d5c10194ac 100644
--- a/cpp/src/gandiva/llvm_generator.h
+++ b/cpp/src/gandiva/llvm_generator.h
@@ -36,13 +36,14 @@
 #include "gandiva/llvm_types.h"
 #include "gandiva/lvalue.h"
 #include "gandiva/value_validity_pair.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
 class FunctionHolder;
 
 /// Builds an LLVM module and generates code for the specified set of expressions.
-class LLVMGenerator {
+class GANDIVA_EXPORT LLVMGenerator {
  public:
   /// \brief Factory method to initialize the generator.
   static Status Make(std::shared_ptr<Configuration> config,
@@ -119,12 +120,13 @@ class LLVMGenerator {
                                           bool with_validity, bool with_context);
 
     // Generate code to onvoke a function call.
-    LValuePtr BuildFunctionCall(const NativeFunction* func,
+    LValuePtr BuildFunctionCall(const NativeFunction* func, DataTypePtr arrow_return_type,
                                 std::vector<llvm::Value*>* params);
 
     // Generate code for an if-else condition.
     LValuePtr BuildIfElse(llvm::Value* condition, std::function<LValuePtr()> then_func,
-                          std::function<LValuePtr()> else_func, llvm::Type* result_type);
+                          std::function<LValuePtr()> else_func,
+                          DataTypePtr arrow_return_type);
 
     // Switch to the entry_block and get reference of the validity/value/offsets buffer
     llvm::Value* GetBufferReference(int idx, BufferType buffer_type, FieldPtr field);
@@ -184,6 +186,10 @@ class LLVMGenerator {
   void ClearPackedBitValueIfFalse(llvm::Value* bitmap, llvm::Value* position,
                                   llvm::Value* value);
 
+  // Generate code to build a DecimalLValue with specified value/precision/scale.
+  std::shared_ptr<DecimalLValue> BuildDecimalLValue(llvm::Value* value,
+                                                    DataTypePtr arrow_type);
+
   /// Generate code to make a function call (to a pre-compiled IR function) which takes
   /// 'args' and has a return type 'ret_type'.
   llvm::Value* AddFunctionCall(const std::string& full_name, llvm::Type* ret_type,
diff --git a/cpp/src/gandiva/llvm_generator_test.cc b/cpp/src/gandiva/llvm_generator_test.cc
index 818c7912150a9..fed6339314850 100644
--- a/cpp/src/gandiva/llvm_generator_test.cc
+++ b/cpp/src/gandiva/llvm_generator_test.cc
@@ -26,6 +26,7 @@
 #include "gandiva/expression.h"
 #include "gandiva/func_descriptor.h"
 #include "gandiva/function_registry.h"
+#include "gandiva/tests/test_util.h"
 
 namespace gandiva {
 
@@ -39,8 +40,7 @@ class TestLLVMGenerator : public ::testing::Test {
 // Verify that a valid pc function exists for every function in the registry.
 TEST_F(TestLLVMGenerator, VerifyPCFunctions) {
   std::unique_ptr<LLVMGenerator> generator;
-  Status status =
-      LLVMGenerator::Make(ConfigurationBuilder::DefaultConfiguration(), &generator);
+  auto status = LLVMGenerator::Make(TestConfiguration(), &generator);
   EXPECT_TRUE(status.ok()) << status.message();
 
   llvm::Module* module = generator->module();
@@ -54,8 +54,7 @@ TEST_F(TestLLVMGenerator, VerifyPCFunctions) {
 TEST_F(TestLLVMGenerator, TestAdd) {
   // Setup LLVM generator to do an arithmetic add of two vectors
   std::unique_ptr<LLVMGenerator> generator;
-  Status status =
-      LLVMGenerator::Make(ConfigurationBuilder::DefaultConfiguration(), &generator);
+  auto status = LLVMGenerator::Make(TestConfiguration(), &generator);
   EXPECT_TRUE(status.ok());
   Annotator annotator;
 
diff --git a/cpp/src/gandiva/llvm_includes.h b/cpp/src/gandiva/llvm_includes.h
new file mode 100644
index 0000000000000..9de1f45a0ad61
--- /dev/null
+++ b/cpp/src/gandiva/llvm_includes.h
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4141)
+#pragma warning(disable : 4146)
+#pragma warning(disable : 4244)
+#pragma warning(disable : 4267)
+#pragma warning(disable : 4291)
+#pragma warning(disable : 4624)
+#endif
+
+#include <llvm/ExecutionEngine/ExecutionEngine.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/LLVMContext.h>
+#include <llvm/IR/Module.h>
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
diff --git a/cpp/src/gandiva/llvm_types.cc b/cpp/src/gandiva/llvm_types.cc
index 0b89d96e3fb02..18ff627a5651f 100644
--- a/cpp/src/gandiva/llvm_types.cc
+++ b/cpp/src/gandiva/llvm_types.cc
@@ -40,6 +40,7 @@ LLVMTypes::LLVMTypes(llvm::LLVMContext& context) : context_(context) {
       {arrow::Type::type::TIMESTAMP, i64_type()},
       {arrow::Type::type::STRING, i8_ptr_type()},
       {arrow::Type::type::BINARY, i8_ptr_type()},
+      {arrow::Type::type::DECIMAL, i128_type()},
   };
 }
 
diff --git a/cpp/src/gandiva/llvm_types.h b/cpp/src/gandiva/llvm_types.h
index dab47d059f7f2..2629d326c3590 100644
--- a/cpp/src/gandiva/llvm_types.h
+++ b/cpp/src/gandiva/llvm_types.h
@@ -21,15 +21,15 @@
 #include <map>
 #include <vector>
 
-#include <llvm/IR/IRBuilder.h>
-#include <llvm/IR/LLVMContext.h>
 #include "gandiva/arrow.h"
+#include "gandiva/llvm_includes.h"
 #include "gandiva/logging.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
 /// \brief Holder for llvm types, and mappings between arrow types and llvm types.
-class LLVMTypes {
+class GANDIVA_EXPORT LLVMTypes {
  public:
   explicit LLVMTypes(llvm::LLVMContext& context);
 
@@ -43,6 +43,8 @@ class LLVMTypes {
 
   llvm::Type* i64_type() { return llvm::Type::getInt64Ty(context_); }
 
+  llvm::Type* i128_type() { return llvm::Type::getInt128Ty(context_); }
+
   llvm::Type* float_type() { return llvm::Type::getFloatTy(context_); }
 
   llvm::Type* double_type() { return llvm::Type::getDoubleTy(context_); }
@@ -53,12 +55,19 @@ class LLVMTypes {
 
   llvm::PointerType* i64_ptr_type() { return llvm::PointerType::get(i64_type(), 0); }
 
-  llvm::PointerType* ptr_type(llvm::Type* base_type) {
-    return llvm::PointerType::get(base_type, 0);
+  llvm::PointerType* i128_ptr_type() { return llvm::PointerType::get(i128_type(), 0); }
+
+  llvm::StructType* i128_split_type() {
+    // struct with high/low bits (see decimal_ops.cc:DecimalSplit)
+    return llvm::StructType::get(context_, {i64_type(), i64_type()}, false);
   }
 
   llvm::Type* void_type() { return llvm::Type::getVoidTy(context_); }
 
+  llvm::PointerType* ptr_type(llvm::Type* base_type) {
+    return llvm::PointerType::get(base_type, 0);
+  }
+
   llvm::Constant* true_constant() {
     return llvm::ConstantInt::get(context_, llvm::APInt(1, 1));
   }
@@ -87,6 +96,18 @@ class LLVMTypes {
     return llvm::ConstantInt::get(context_, llvm::APInt(64, val));
   }
 
+  llvm::Constant* i128_constant(int64_t val) {
+    return llvm::ConstantInt::get(context_, llvm::APInt(128, val));
+  }
+
+  llvm::Constant* i128_zero() {
+    return llvm::ConstantInt::get(context_, llvm::APInt(128, 0));
+  }
+
+  llvm::Constant* i128_one() {
+    return llvm::ConstantInt::get(context_, llvm::APInt(128, 1));
+  }
+
   llvm::Constant* float_constant(float val) {
     return llvm::ConstantFP::get(float_type(), val);
   }
diff --git a/cpp/src/gandiva/local_bitmaps_holder.h b/cpp/src/gandiva/local_bitmaps_holder.h
index 1dc82562e3110..ae0ba53e99003 100644
--- a/cpp/src/gandiva/local_bitmaps_holder.h
+++ b/cpp/src/gandiva/local_bitmaps_holder.h
@@ -50,10 +50,10 @@ class LocalBitMapsHolder {
   int64_t num_records_;
 
   /// A container of 'local_bitmaps_', each sized to accomodate 'num_records'.
-  std::vector<std::unique_ptr<uint8_t>> local_bitmaps_vec_;
+  std::vector<std::unique_ptr<uint8_t[]>> local_bitmaps_vec_;
 
   /// An array of the local bitmaps.
-  std::unique_ptr<uint8_t*> local_bitmaps_array_;
+  std::unique_ptr<uint8_t* []> local_bitmaps_array_;
 
   int64_t local_bitmap_size_;
 };
@@ -72,7 +72,7 @@ inline LocalBitMapsHolder::LocalBitMapsHolder(int64_t num_records, int num_local
   // Alloc 'num_local_bitmaps_' number of bitmaps, each of capacity 'num_records_'.
   for (int i = 0; i < num_local_bitmaps; ++i) {
     // TODO : round-up to a slab friendly multiple.
-    std::unique_ptr<uint8_t> bitmap(new uint8_t[local_bitmap_size_]);
+    std::unique_ptr<uint8_t[]> bitmap(new uint8_t[local_bitmap_size_]);
 
     // keep pointer to the bitmap in the array.
     (local_bitmaps_array_.get())[i] = bitmap.get();
diff --git a/cpp/src/gandiva/lru_cache_test.cc b/cpp/src/gandiva/lru_cache_test.cc
index 230a811fc1b31..8ac04c3461b7e 100644
--- a/cpp/src/gandiva/lru_cache_test.cc
+++ b/cpp/src/gandiva/lru_cache_test.cc
@@ -59,6 +59,6 @@ TEST_F(TestLruCache, TestLruBehavior) {
   cache_.get(TestCacheKey(1));
   cache_.insert(TestCacheKey(3), "hello");
   // should have evicted key 2.
-  ASSERT_EQ(cache_.get(TestCacheKey(1)).value(), "hello");
+  ASSERT_EQ(*cache_.get(TestCacheKey(1)), "hello");
 }
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/lvalue.h b/cpp/src/gandiva/lvalue.h
index 2ff03dcdd9c56..6c9814cd63017 100644
--- a/cpp/src/gandiva/lvalue.h
+++ b/cpp/src/gandiva/lvalue.h
@@ -18,29 +18,63 @@
 #ifndef GANDIVA_LVALUE_H
 #define GANDIVA_LVALUE_H
 
+#include <vector>
+
 #include "arrow/util/macros.h"
 
-#include <llvm/IR/IRBuilder.h>
+#include "gandiva/llvm_includes.h"
+#include "gandiva/logging.h"
 
 namespace gandiva {
 
 /// \brief Tracks validity/value builders in LLVM.
-class LValue {
+class GANDIVA_EXPORT LValue {
  public:
   explicit LValue(llvm::Value* data, llvm::Value* length = NULLPTR,
                   llvm::Value* validity = NULLPTR)
       : data_(data), length_(length), validity_(validity) {}
+  virtual ~LValue() = default;
 
   llvm::Value* data() { return data_; }
   llvm::Value* length() { return length_; }
   llvm::Value* validity() { return validity_; }
 
+  void set_data(llvm::Value* data) { data_ = data; }
+
+  // Append the params required when passing this as a function parameter.
+  virtual void AppendFunctionParams(std::vector<llvm::Value*>* params) {
+    params->push_back(data_);
+    if (length_ != NULLPTR) {
+      params->push_back(length_);
+    }
+  }
+
  private:
   llvm::Value* data_;
   llvm::Value* length_;
   llvm::Value* validity_;
 };
 
+class GANDIVA_EXPORT DecimalLValue : public LValue {
+ public:
+  DecimalLValue(llvm::Value* data, llvm::Value* validity, llvm::Value* precision,
+                llvm::Value* scale)
+      : LValue(data, NULLPTR, validity), precision_(precision), scale_(scale) {}
+
+  llvm::Value* precision() { return precision_; }
+  llvm::Value* scale() { return scale_; }
+
+  void AppendFunctionParams(std::vector<llvm::Value*>* params) override {
+    LValue::AppendFunctionParams(params);
+    params->push_back(precision_);
+    params->push_back(scale_);
+  }
+
+ private:
+  llvm::Value* precision_;
+  llvm::Value* scale_;
+};
+
 }  // namespace gandiva
 
 #endif  // GANDIVA_LVALUE_H
diff --git a/cpp/src/gandiva/native_function.h b/cpp/src/gandiva/native_function.h
index 7a250e01cb619..82714c7de9f61 100644
--- a/cpp/src/gandiva/native_function.h
+++ b/cpp/src/gandiva/native_function.h
@@ -23,6 +23,7 @@
 #include <vector>
 
 #include "gandiva/function_signature.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
@@ -37,7 +38,7 @@ enum ResultNullableType {
 
 /// \brief Holder for the mapping from a function in an expression to a
 /// precompiled function.
-class NativeFunction {
+class GANDIVA_EXPORT NativeFunction {
  public:
   // fucntion attributes.
   static constexpr int32_t kNeedsContext = (1 << 1);
@@ -52,7 +53,6 @@ class NativeFunction {
   bool NeedsFunctionHolder() const { return (flags_ & kNeedsFunctionHolder) != 0; }
   bool CanReturnErrors() const { return (flags_ & kCanReturnErrors) != 0; }
 
- private:
   NativeFunction(const std::string& base_name, const DataTypeVector& param_types,
                  DataTypePtr ret_type, const ResultNullableType& result_nullable_type,
                  const std::string& pc_name, int32_t flags = 0)
@@ -61,6 +61,7 @@ class NativeFunction {
         result_nullable_type_(result_nullable_type),
         pc_name_(pc_name) {}
 
+ private:
   FunctionSignature signature_;
 
   /// attributes
@@ -69,8 +70,6 @@ class NativeFunction {
 
   /// pre-compiled function name.
   std::string pc_name_;
-
-  friend class FunctionRegistry;
 };
 
 }  // end namespace gandiva
diff --git a/cpp/src/gandiva/node.h b/cpp/src/gandiva/node.h
index d31924aa73017..ca51123994a0e 100644
--- a/cpp/src/gandiva/node.h
+++ b/cpp/src/gandiva/node.h
@@ -30,12 +30,13 @@
 #include "gandiva/gandiva_aliases.h"
 #include "gandiva/literal_holder.h"
 #include "gandiva/node_visitor.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
 /// \brief Represents a node in the expression tree. Validity and value are
 /// in a joined state.
-class Node {
+class GANDIVA_EXPORT Node {
  public:
   explicit Node(DataTypePtr return_type) : return_type_(return_type) {}
 
@@ -53,7 +54,7 @@ class Node {
 };
 
 /// \brief Node in the expression tree, representing a literal.
-class LiteralNode : public Node {
+class GANDIVA_EXPORT LiteralNode : public Node {
  public:
   LiteralNode(DataTypePtr type, const LiteralHolder& holder, bool is_null)
       : Node(type), holder_(holder), is_null_(is_null) {}
@@ -76,12 +77,12 @@ class LiteralNode : public Node {
     // The default formatter prints in decimal can cause a loss in precision. so,
     // print in hex. Can't use hexfloat since gcc 4.9 doesn't support it.
     if (return_type()->id() == arrow::Type::DOUBLE) {
-      double dvalue = boost::get<double>(holder_);
+      double dvalue = holder_.get<double>();
       uint64_t bits;
       memcpy(&bits, &dvalue, sizeof(bits));
       ss << " raw(" << std::hex << bits << ")";
     } else if (return_type()->id() == arrow::Type::FLOAT) {
-      float fvalue = boost::get<float>(holder_);
+      float fvalue = holder_.get<float>();
       uint32_t bits;
       memcpy(&bits, &fvalue, sizeof(bits));
       ss << " raw(" << std::hex << bits << ")";
@@ -95,7 +96,7 @@ class LiteralNode : public Node {
 };
 
 /// \brief Node in the expression tree, representing an arrow field.
-class FieldNode : public Node {
+class GANDIVA_EXPORT FieldNode : public Node {
  public:
   explicit FieldNode(FieldPtr field) : Node(field->type()), field_(field) {}
 
@@ -112,7 +113,7 @@ class FieldNode : public Node {
 };
 
 /// \brief Node in the expression tree, representing a function.
-class FunctionNode : public Node {
+class GANDIVA_EXPORT FunctionNode : public Node {
  public:
   FunctionNode(const std::string& name, const NodeVector& children, DataTypePtr retType);
 
@@ -154,7 +155,7 @@ inline FunctionNode::FunctionNode(const std::string& name, const NodeVector& chi
 }
 
 /// \brief Node in the expression tree, representing an if-else expression.
-class IfNode : public Node {
+class GANDIVA_EXPORT IfNode : public Node {
  public:
   IfNode(NodePtr condition, NodePtr then_node, NodePtr else_node, DataTypePtr result_type)
       : Node(result_type),
@@ -183,7 +184,7 @@ class IfNode : public Node {
 };
 
 /// \brief Node in the expression tree, representing an and/or boolean expression.
-class BooleanNode : public Node {
+class GANDIVA_EXPORT BooleanNode : public Node {
  public:
   enum ExprType : char { AND, OR };
 
diff --git a/cpp/src/gandiva/node_visitor.h b/cpp/src/gandiva/node_visitor.h
index ba3645a58969f..27d05649b8ec5 100644
--- a/cpp/src/gandiva/node_visitor.h
+++ b/cpp/src/gandiva/node_visitor.h
@@ -23,6 +23,7 @@
 #include "arrow/status.h"
 
 #include "gandiva/logging.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
@@ -35,7 +36,7 @@ template <typename Type>
 class InExpressionNode;
 
 /// \brief Visitor for nodes in the expression tree.
-class NodeVisitor {
+class GANDIVA_EXPORT NodeVisitor {
  public:
   virtual ~NodeVisitor() = default;
 
diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt
index a4414cae0fc86..5c40a6c28af80 100644
--- a/cpp/src/gandiva/precompiled/CMakeLists.txt
+++ b/cpp/src/gandiva/precompiled/CMakeLists.txt
@@ -20,12 +20,27 @@ project(gandiva)
 set(PRECOMPILED_SRCS
     arithmetic_ops.cc
     bitmap.cc
+    decimal_ops.cc
+    decimal_wrapper.cc
     extended_math_ops.cc
     hash.cc
     print.cc
     string_ops.cc
     time.cc
-    timestamp_arithmetic.cc)
+    timestamp_arithmetic.cc
+    ../../arrow/util/basic_decimal.cc)
+
+if (MSVC)
+  # clang pretends to be a particular version of MSVC. Version 1900 is
+  # Visual Studio 2015, and the standard library uses C++14 features,
+  # so we have to use that -std version to get the IR compilation to
+  # work
+  set(PLATFORM_CLANG_OPTIONS
+    -std=c++14 -fms-compatibility -fms-compatibility-version=19)
+else()
+  set(PLATFORM_CLANG_OPTIONS
+    -std=c++11)
+endif()
 
 # Create bitcode for each of the source files.
 foreach(SRC_FILE ${PRECOMPILED_SRCS})
@@ -35,7 +50,14 @@ foreach(SRC_FILE ${PRECOMPILED_SRCS})
   add_custom_command(
     OUTPUT ${BC_FILE}
     COMMAND ${CLANG_EXECUTABLE}
-            -std=c++11 -emit-llvm -O2 -c ${ABSOLUTE_SRC} -o ${BC_FILE}
+            ${PLATFORM_CLANG_OPTIONS}
+            -DGANDIVA_IR
+            -DNDEBUG # DCHECK macros not implemented in precompiled code
+            -DARROW_STATIC # Do not set __declspec(dllimport) on MSVC on Arrow symbols
+            -DGANDIVA_STATIC # Do not set __declspec(dllimport) on MSVC on Gandiva symbols
+            -fno-use-cxa-atexit  # Workaround for unresolved __dso_handle
+            -emit-llvm -O3 -c ${ABSOLUTE_SRC} -o ${BC_FILE}
+            ${ARROW_GANDIVA_PC_CXX_FLAGS}
             -I${CMAKE_SOURCE_DIR}/src
     DEPENDS ${SRC_FILE})
   list(APPEND BC_FILES ${BC_FILE})
@@ -51,11 +73,36 @@ add_custom_command(
 
 add_custom_target(precompiled ALL DEPENDS ${GANDIVA_BC_OUTPUT_PATH})
 
+function(add_precompiled_unit_test REL_TEST_NAME)
+  get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE)
+
+  set(TEST_NAME "gandiva-precompiled-${TEST_NAME}")
+
+  add_executable(${TEST_NAME} ${REL_TEST_NAME} ${ARGN})
+  target_include_directories(${TEST_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/src)
+  target_link_libraries(${TEST_NAME}
+    PRIVATE ${ARROW_TEST_LINK_LIBS} ${RE2_LIBRARY}
+  )
+  target_compile_definitions(${TEST_NAME} PRIVATE
+    GANDIVA_UNIT_TEST=1
+    ARROW_STATIC
+    GANDIVA_STATIC)
+  set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/${TEST_NAME}")
+  add_test(${TEST_NAME} ${TEST_PATH})
+  set_property(TEST ${TEST_NAME}
+    APPEND PROPERTY
+    LABELS "unittest;gandiva-tests")
+  add_dependencies(gandiva-tests ${TEST_NAME})
+endfunction(add_precompiled_unit_test REL_TEST_NAME)
+
 # testing
-add_precompiled_unit_test(bitmap_test.cc bitmap.cc)
-add_precompiled_unit_test(epoch_time_point_test.cc)
-add_precompiled_unit_test(time_test.cc time.cc timestamp_arithmetic.cc ../context_helper.cc)
-add_precompiled_unit_test(hash_test.cc hash.cc)
-add_precompiled_unit_test(string_ops_test.cc string_ops.cc ../context_helper.cc)
-add_precompiled_unit_test(arithmetic_ops_test.cc arithmetic_ops.cc ../context_helper.cc)
-add_precompiled_unit_test(extended_math_ops_test.cc extended_math_ops.cc ../context_helper.cc)
+if (ARROW_BUILD_TESTS)
+  add_precompiled_unit_test(bitmap_test.cc bitmap.cc)
+  add_precompiled_unit_test(epoch_time_point_test.cc)
+  add_precompiled_unit_test(time_test.cc time.cc timestamp_arithmetic.cc ../context_helper.cc ../cast_time.cc ../../arrow/vendored/datetime/tz.cpp)
+  add_precompiled_unit_test(hash_test.cc hash.cc)
+  add_precompiled_unit_test(string_ops_test.cc string_ops.cc ../context_helper.cc)
+  add_precompiled_unit_test(arithmetic_ops_test.cc arithmetic_ops.cc ../context_helper.cc)
+  add_precompiled_unit_test(extended_math_ops_test.cc extended_math_ops.cc ../context_helper.cc)
+  add_precompiled_unit_test(decimal_ops_test.cc decimal_ops.cc ../decimal_type_util.cc)
+endif()
diff --git a/cpp/src/gandiva/precompiled/decimal_ops.cc b/cpp/src/gandiva/precompiled/decimal_ops.cc
new file mode 100644
index 0000000000000..99231fe537f7a
--- /dev/null
+++ b/cpp/src/gandiva/precompiled/decimal_ops.cc
@@ -0,0 +1,225 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Alogrithms adapted from Apache Impala
+
+#include "gandiva/precompiled/decimal_ops.h"
+
+#include <algorithm>
+
+#include "gandiva/decimal_type_util.h"
+#include "gandiva/logging.h"
+
+namespace gandiva {
+namespace decimalops {
+
+using arrow::BasicDecimal128;
+
+static BasicDecimal128 CheckAndIncreaseScale(const BasicDecimal128& in, int32_t delta) {
+  return (delta <= 0) ? in : in.IncreaseScaleBy(delta);
+}
+
+static BasicDecimal128 CheckAndReduceScale(const BasicDecimal128& in, int32_t delta) {
+  return (delta <= 0) ? in : in.ReduceScaleBy(delta);
+}
+
+/// Adjust x and y to the same scale, and add them.
+static BasicDecimal128 AddFastPath(const BasicDecimalScalar128& x,
+                                   const BasicDecimalScalar128& y, int32_t out_scale) {
+  auto higher_scale = std::max(x.scale(), y.scale());
+
+  auto x_scaled = CheckAndIncreaseScale(x.value(), higher_scale - x.scale());
+  auto y_scaled = CheckAndIncreaseScale(y.value(), higher_scale - y.scale());
+  return x_scaled + y_scaled;
+}
+
+/// Add x and y, caller has ensured there can be no overflow.
+static BasicDecimal128 AddNoOverflow(const BasicDecimalScalar128& x,
+                                     const BasicDecimalScalar128& y, int32_t out_scale) {
+  auto higher_scale = std::max(x.scale(), y.scale());
+  auto sum = AddFastPath(x, y, out_scale);
+  return CheckAndReduceScale(sum, higher_scale - out_scale);
+}
+
+/// Both x_value and y_value must be >= 0
+static BasicDecimal128 AddLargePositive(const BasicDecimalScalar128& x,
+                                        const BasicDecimalScalar128& y,
+                                        int32_t out_scale) {
+  DCHECK_GE(x.value(), 0);
+  DCHECK_GE(y.value(), 0);
+
+  // separate out whole/fractions.
+  BasicDecimal128 x_left, x_right, y_left, y_right;
+  x.value().GetWholeAndFraction(x.scale(), &x_left, &x_right);
+  y.value().GetWholeAndFraction(y.scale(), &y_left, &y_right);
+
+  // Adjust fractional parts to higher scale.
+  auto higher_scale = std::max(x.scale(), y.scale());
+  auto x_right_scaled = CheckAndIncreaseScale(x_right, higher_scale - x.scale());
+  auto y_right_scaled = CheckAndIncreaseScale(y_right, higher_scale - y.scale());
+
+  BasicDecimal128 right;
+  BasicDecimal128 carry_to_left;
+  auto multiplier = BasicDecimal128::GetScaleMultiplier(higher_scale);
+  if (x_right_scaled >= multiplier - y_right_scaled) {
+    right = x_right_scaled - (multiplier - y_right_scaled);
+    carry_to_left = 1;
+  } else {
+    right = x_right_scaled + y_right_scaled;
+    carry_to_left = 0;
+  }
+  right = CheckAndReduceScale(right, higher_scale - out_scale);
+
+  auto left = x_left + y_left + carry_to_left;
+  return (left * BasicDecimal128::GetScaleMultiplier(out_scale)) + right;
+}
+
+/// x_value and y_value cannot be 0, and one must be positive and the other negative.
+static BasicDecimal128 AddLargeNegative(const BasicDecimalScalar128& x,
+                                        const BasicDecimalScalar128& y,
+                                        int32_t out_scale) {
+  DCHECK_NE(x.value(), 0);
+  DCHECK_NE(y.value(), 0);
+  DCHECK((x.value() < 0 && y.value() > 0) || (x.value() > 0 && y.value() < 0));
+
+  // separate out whole/fractions.
+  BasicDecimal128 x_left, x_right, y_left, y_right;
+  x.value().GetWholeAndFraction(x.scale(), &x_left, &x_right);
+  y.value().GetWholeAndFraction(y.scale(), &y_left, &y_right);
+
+  // Adjust fractional parts to higher scale.
+  auto higher_scale = std::max(x.scale(), y.scale());
+  x_right = CheckAndIncreaseScale(x_right, higher_scale - x.scale());
+  y_right = CheckAndIncreaseScale(y_right, higher_scale - y.scale());
+
+  // Overflow not possible because one is +ve and the other is -ve.
+  auto left = x_left + y_left;
+  auto right = x_right + y_right;
+
+  // If the whole and fractional parts have different signs, then we need to make the
+  // fractional part have the same sign as the whole part. If either left or right is
+  // zero, then nothing needs to be done.
+  if (left < 0 && right > 0) {
+    left += 1;
+    right -= BasicDecimal128::GetScaleMultiplier(higher_scale);
+  } else if (left > 0 && right < 0) {
+    left -= 1;
+    right += BasicDecimal128::GetScaleMultiplier(higher_scale);
+  }
+  right = CheckAndReduceScale(right, higher_scale - out_scale);
+  return (left * BasicDecimal128::GetScaleMultiplier(out_scale)) + right;
+}
+
+static BasicDecimal128 AddLarge(const BasicDecimalScalar128& x,
+                                const BasicDecimalScalar128& y, int32_t out_scale) {
+  if (x.value() >= 0 && y.value() >= 0) {
+    // both positive or 0
+    return AddLargePositive(x, y, out_scale);
+  } else if (x.value() <= 0 && y.value() <= 0) {
+    // both negative or 0
+    BasicDecimalScalar128 x_neg(-x.value(), x.precision(), x.scale());
+    BasicDecimalScalar128 y_neg(-y.value(), y.precision(), y.scale());
+    return -AddLargePositive(x_neg, y_neg, out_scale);
+  } else {
+    // one positive and the other negative
+    return AddLargeNegative(x, y, out_scale);
+  }
+}
+
+// Suppose we have a number that requires x bits to be represented and we scale it up by
+// 10^scale_by. Let's say now y bits are required to represent it. This function returns
+// the maximum possible y - x for a given 'scale_by'.
+inline int32_t MaxBitsRequiredIncreaseAfterScaling(int32_t scale_by) {
+  // We rely on the following formula:
+  // bits_required(x * 10^y) <= bits_required(x) + floor(log2(10^y)) + 1
+  // We precompute floor(log2(10^x)) + 1 for x = 0, 1, 2...75, 76
+  DCHECK_GE(scale_by, 0);
+  DCHECK_LE(scale_by, 76);
+  static const int32_t floor_log2_plus_one[] = {
+      0,   4,   7,   10,  14,  17,  20,  24,  27,  30,  34,  37,  40,  44,  47,  50,
+      54,  57,  60,  64,  67,  70,  74,  77,  80,  84,  87,  90,  94,  97,  100, 103,
+      107, 110, 113, 117, 120, 123, 127, 130, 133, 137, 140, 143, 147, 150, 153, 157,
+      160, 163, 167, 170, 173, 177, 180, 183, 187, 190, 193, 196, 200, 203, 206, 210,
+      213, 216, 220, 223, 226, 230, 233, 236, 240, 243, 246, 250, 253};
+  return floor_log2_plus_one[scale_by];
+}
+
+// If we have a number with 'num_lz' leading zeros, and we scale it up by 10^scale_by,
+// this function returns the minimum number of leading zeros the result can have.
+inline int32_t MinLeadingZerosAfterScaling(int32_t num_lz, int32_t scale_by) {
+  DCHECK_GE(scale_by, 0);
+  DCHECK_LE(scale_by, 76);
+  int32_t result = num_lz - MaxBitsRequiredIncreaseAfterScaling(scale_by);
+  return result;
+}
+
+// Returns the maximum possible number of bits required to represent num * 10^scale_by.
+inline int32_t MaxBitsRequiredAfterScaling(const BasicDecimalScalar128& num,
+                                           int32_t scale_by) {
+  auto value = num.value();
+  auto value_abs = value.Abs();
+
+  int32_t num_occupied = 128 - value_abs.CountLeadingBinaryZeros();
+  DCHECK_GE(scale_by, 0);
+  DCHECK_LE(scale_by, 76);
+  return num_occupied + MaxBitsRequiredIncreaseAfterScaling(scale_by);
+}
+
+// Returns the minimum number of leading zero x or y would have after one of them gets
+// scaled up to match the scale of the other one.
+inline int32_t MinLeadingZeros(const BasicDecimalScalar128& x,
+                               const BasicDecimalScalar128& y) {
+  auto x_value = x.value();
+  auto x_value_abs = x_value.Abs();
+
+  auto y_value = y.value();
+  auto y_value_abs = y_value.Abs();
+
+  int32_t x_lz = x_value_abs.CountLeadingBinaryZeros();
+  int32_t y_lz = y_value_abs.CountLeadingBinaryZeros();
+  if (x.scale() < y.scale()) {
+    x_lz = MinLeadingZerosAfterScaling(x_lz, y.scale() - x.scale());
+  } else if (x.scale() > y.scale()) {
+    y_lz = MinLeadingZerosAfterScaling(y_lz, x.scale() - y.scale());
+  }
+  return std::min(x_lz, y_lz);
+}
+
+BasicDecimal128 Add(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y,
+                    int32_t out_precision, int32_t out_scale) {
+  if (out_precision < DecimalTypeUtil::kMaxPrecision) {
+    // fast-path add
+    return AddFastPath(x, y, out_scale);
+  } else {
+    int32_t min_lz = MinLeadingZeros(x, y);
+    if (min_lz >= 3) {
+      // If both numbers have at least MIN_LZ leading zeros, we can add them directly
+      // without the risk of overflow.
+      // We want the result to have at least 2 leading zeros, which ensures that it fits
+      // into the maximum decimal because 2^126 - 1 < 10^38 - 1. If both x and y have at
+      // least 3 leading zeros, then we are guaranteed that the result will have at lest 2
+      // leading zeros.
+      return AddNoOverflow(x, y, out_scale);
+    } else {
+      // slower-version : add whole/fraction parts separately, and then, combine.
+      return AddLarge(x, y, out_scale);
+    }
+  }
+}
+
+}  // namespace decimalops
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/decimal_ops.h b/cpp/src/gandiva/precompiled/decimal_ops.h
new file mode 100644
index 0000000000000..1e202b88a2515
--- /dev/null
+++ b/cpp/src/gandiva/precompiled/decimal_ops.h
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include "gandiva/basic_decimal_scalar.h"
+
+namespace gandiva {
+namespace decimalops {
+
+/// Return the sum of 'x' and 'y'.
+/// out_precision and out_scale are passed along for efficiency, they must match
+/// the rules in DecimalTypeSql::GetResultType.
+arrow::BasicDecimal128 Add(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y,
+                           int32_t out_precision, int32_t out_scale);
+
+}  // namespace decimalops
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/decimal_ops_test.cc b/cpp/src/gandiva/precompiled/decimal_ops_test.cc
new file mode 100644
index 0000000000000..6e58106044753
--- /dev/null
+++ b/cpp/src/gandiva/precompiled/decimal_ops_test.cc
@@ -0,0 +1,77 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <algorithm>
+#include <memory>
+
+#include "arrow/test-util.h"
+#include "gandiva/decimal_scalar.h"
+#include "gandiva/decimal_type_util.h"
+#include "gandiva/precompiled/decimal_ops.h"
+#include "gandiva/precompiled/types.h"
+
+namespace gandiva {
+
+class TestDecimalSql : public ::testing::Test {
+ protected:
+  static void AddAndVerify(const DecimalScalar128& x, const DecimalScalar128& y,
+                           const DecimalScalar128& expected);
+};
+
+#define EXPECT_DECIMAL_EQ(x, y, expected, actual)                                    \
+  EXPECT_EQ(expected, actual) << (x).ToString() << " + " << (y).ToString()           \
+                              << " expected : " << expected.ToString() << " actual " \
+                              << actual.ToString()
+
+void TestDecimalSql::AddAndVerify(const DecimalScalar128& x, const DecimalScalar128& y,
+                                  const DecimalScalar128& expected) {
+  auto t1 = std::make_shared<arrow::Decimal128Type>(x.precision(), x.scale());
+  auto t2 = std::make_shared<arrow::Decimal128Type>(y.precision(), y.scale());
+
+  Decimal128TypePtr out_type;
+  EXPECT_OK(DecimalTypeUtil::GetResultType(DecimalTypeUtil::kOpAdd, {t1, t2}, &out_type));
+
+  auto out_value = decimalops::Add(x, y, out_type->precision(), out_type->scale());
+  EXPECT_DECIMAL_EQ(
+      x, y, expected,
+      DecimalScalar128(out_value, out_type->precision(), out_type->scale()));
+}
+
+TEST_F(TestDecimalSql, Add) {
+  // fast-path
+  AddAndVerify(DecimalScalar128{"201", 30, 3},   // x
+               DecimalScalar128{"301", 30, 3},   // y
+               DecimalScalar128{"502", 31, 3});  // expected
+
+  // max precision
+  AddAndVerify(DecimalScalar128{"09999999999999999999999999999999000000", 38, 5},  // x
+               DecimalScalar128{"100", 38, 7},                                     // y
+               DecimalScalar128{"99999999999999999999999999999990000010", 38, 6});
+
+  // Both -ve
+  AddAndVerify(DecimalScalar128{"-201", 30, 3},    // x
+               DecimalScalar128{"-301", 30, 2},    // y
+               DecimalScalar128{"-3211", 32, 3});  // expected
+
+  // -ve and max precision
+  AddAndVerify(DecimalScalar128{"-09999999999999999999999999999999000000", 38, 5},  // x
+               DecimalScalar128{"-100", 38, 7},                                     // y
+               DecimalScalar128{"-99999999999999999999999999999990000010", 38, 6});
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/decimal_wrapper.cc b/cpp/src/gandiva/precompiled/decimal_wrapper.cc
new file mode 100644
index 0000000000000..f327a50cce663
--- /dev/null
+++ b/cpp/src/gandiva/precompiled/decimal_wrapper.cc
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/precompiled/decimal_ops.h"
+#include "gandiva/precompiled/types.h"
+
+extern "C" {
+
+FORCE_INLINE
+void add_large_decimal128_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                                     int32_t x_scale, int64_t y_high, uint64_t y_low,
+                                     int32_t y_precision, int32_t y_scale,
+                                     int32_t out_precision, int32_t out_scale,
+                                     int64_t* out_high, uint64_t* out_low) {
+  gandiva::BasicDecimalScalar128 x(x_high, x_low, x_precision, x_scale);
+  gandiva::BasicDecimalScalar128 y(y_high, y_low, y_precision, y_scale);
+
+  arrow::BasicDecimal128 out = gandiva::decimalops::Add(x, y, out_precision, out_scale);
+  *out_high = out.high_bits();
+  *out_low = out.low_bits();
+}
+
+}  // extern "C"
diff --git a/cpp/src/gandiva/precompiled/epoch_time_point.h b/cpp/src/gandiva/precompiled/epoch_time_point.h
index dc6340d134e0a..32d6cea731938 100644
--- a/cpp/src/gandiva/precompiled/epoch_time_point.h
+++ b/cpp/src/gandiva/precompiled/epoch_time_point.h
@@ -19,7 +19,7 @@
 #define GANDIVA_EPOCH_TIME_POINT_H
 
 // TODO(wesm): IR compilation does not have any include directories set
-#include "../../arrow/util/date.h"
+#include "../../arrow/vendored/datetime/date.h"
 
 // A point of time measured in millis since epoch.
 class EpochTimePoint {
@@ -35,16 +35,18 @@ class EpochTimePoint {
   int TmMon() const { return static_cast<unsigned int>(YearMonthDay().month()) - 1; }
 
   int TmYday() const {
-    auto to_days = date::floor<date::days>(tp_);
-    auto first_day_in_year = date::sys_days{YearMonthDay().year() / date::jan / 1};
+    auto to_days = arrow::util::date::floor<arrow::util::date::days>(tp_);
+    auto first_day_in_year =
+        arrow::util::date::sys_days{YearMonthDay().year() / arrow::util::date::jan / 1};
     return (to_days - first_day_in_year).count();
   }
 
   int TmMday() const { return static_cast<unsigned int>(YearMonthDay().day()); }
 
   int TmWday() const {
-    auto to_days = date::floor<date::days>(tp_);
-    return (date::weekday{to_days} - date::Sunday).count();  // NOLINT
+    auto to_days = arrow::util::date::floor<arrow::util::date::days>(tp_);
+    return (arrow::util::date::weekday{to_days} - arrow::util::date::Sunday)  // NOLINT
+        .count();                                                             // NOLINT
   }
 
   int TmHour() const { return static_cast<int>(TimeOfDay().hours().count()); }
@@ -57,22 +59,22 @@ class EpochTimePoint {
   }
 
   EpochTimePoint AddYears(int num_years) const {
-    auto ymd = YearMonthDay() + date::years(num_years);
-    return EpochTimePoint((date::sys_days{ymd} +  // NOLINT
+    auto ymd = YearMonthDay() + arrow::util::date::years(num_years);
+    return EpochTimePoint((arrow::util::date::sys_days{ymd} +  // NOLINT
                            TimeOfDay().to_duration())
                               .time_since_epoch());
   }
 
   EpochTimePoint AddMonths(int num_months) const {
-    auto ymd = YearMonthDay() + date::months(num_months);
-    return EpochTimePoint((date::sys_days{ymd} +  // NOLINT
+    auto ymd = YearMonthDay() + arrow::util::date::months(num_months);
+    return EpochTimePoint((arrow::util::date::sys_days{ymd} +  // NOLINT
                            TimeOfDay().to_duration())
                               .time_since_epoch());
   }
 
   EpochTimePoint AddDays(int num_days) const {
-    auto days_since_epoch = date::sys_days{YearMonthDay()}  // NOLINT
-                            + date::days(num_days);
+    auto days_since_epoch = arrow::util::date::sys_days{YearMonthDay()}  // NOLINT
+                            + arrow::util::date::days(num_days);
     return EpochTimePoint(
         (days_since_epoch + TimeOfDay().to_duration()).time_since_epoch());
   }
@@ -86,13 +88,16 @@ class EpochTimePoint {
   int64_t MillisSinceEpoch() const { return tp_.time_since_epoch().count(); }
 
  private:
-  date::year_month_day YearMonthDay() const {
-    return date::year_month_day{date::floor<date::days>(tp_)};  // NOLINT
+  arrow::util::date::year_month_day YearMonthDay() const {
+    return arrow::util::date::year_month_day{
+        arrow::util::date::floor<arrow::util::date::days>(tp_)};  // NOLINT
   }
 
-  date::time_of_day<std::chrono::milliseconds> TimeOfDay() const {
-    auto millis_since_midnight = tp_ - date::floor<date::days>(tp_);
-    return date::time_of_day<std::chrono::milliseconds>(millis_since_midnight);
+  arrow::util::date::time_of_day<std::chrono::milliseconds> TimeOfDay() const {
+    auto millis_since_midnight =
+        tp_ - arrow::util::date::floor<arrow::util::date::days>(tp_);
+    return arrow::util::date::time_of_day<std::chrono::milliseconds>(
+        millis_since_midnight);
   }
 
   std::chrono::time_point<std::chrono::system_clock, std::chrono::milliseconds> tp_;
diff --git a/cpp/src/gandiva/precompiled/epoch_time_point_test.cc b/cpp/src/gandiva/precompiled/epoch_time_point_test.cc
index f489b7d748c64..32cb9e87fe2a8 100644
--- a/cpp/src/gandiva/precompiled/epoch_time_point_test.cc
+++ b/cpp/src/gandiva/precompiled/epoch_time_point_test.cc
@@ -15,36 +15,39 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <time.h>
+#include <ctime>
 
 #include <gtest/gtest.h>
 #include "./epoch_time_point.h"
+#include "gandiva/precompiled/testing.h"
 #include "gandiva/precompiled/types.h"
 
-namespace gandiva {
+#include "gandiva/date_utils.h"
 
-timestamp StringToTimestamp(const char* buf) {
-  struct tm tm;
-  strptime(buf, "%Y-%m-%d %H:%M:%S", &tm);
-  return timegm(&tm) * 1000;  // to millis
-}
+namespace gandiva {
 
 TEST(TestEpochTimePoint, TestTm) {
   auto ts = StringToTimestamp("2015-05-07 10:20:34");
   EpochTimePoint tp(ts);
 
+  struct tm* tm_ptr;
+#if defined(_MSC_VER)
+  __time64_t tsec = ts / 1000;
+  tm_ptr = _gmtime64(&tsec);
+#else
   struct tm tm;
   time_t tsec = ts / 1000;
-  gmtime_r(&tsec, &tm);
-
-  EXPECT_EQ(tp.TmYear(), tm.tm_year);
-  EXPECT_EQ(tp.TmMon(), tm.tm_mon);
-  EXPECT_EQ(tp.TmYday(), tm.tm_yday);
-  EXPECT_EQ(tp.TmMday(), tm.tm_mday);
-  EXPECT_EQ(tp.TmWday(), tm.tm_wday);
-  EXPECT_EQ(tp.TmHour(), tm.tm_hour);
-  EXPECT_EQ(tp.TmMin(), tm.tm_min);
-  EXPECT_EQ(tp.TmSec(), tm.tm_sec);
+  tm_ptr = gmtime_r(&tsec, &tm);
+#endif
+
+  EXPECT_EQ(tp.TmYear(), tm_ptr->tm_year);
+  EXPECT_EQ(tp.TmMon(), tm_ptr->tm_mon);
+  EXPECT_EQ(tp.TmYday(), tm_ptr->tm_yday);
+  EXPECT_EQ(tp.TmMday(), tm_ptr->tm_mday);
+  EXPECT_EQ(tp.TmWday(), tm_ptr->tm_wday);
+  EXPECT_EQ(tp.TmHour(), tm_ptr->tm_hour);
+  EXPECT_EQ(tp.TmMin(), tm_ptr->tm_min);
+  EXPECT_EQ(tp.TmSec(), tm_ptr->tm_sec);
 }
 
 TEST(TestEpochTimePoint, TestAddYears) {
diff --git a/cpp/src/gandiva/precompiled/extended_math_ops.cc b/cpp/src/gandiva/precompiled/extended_math_ops.cc
index 1b7642cc3b3e6..b17ccd8e80a84 100644
--- a/cpp/src/gandiva/precompiled/extended_math_ops.cc
+++ b/cpp/src/gandiva/precompiled/extended_math_ops.cc
@@ -33,30 +33,40 @@ extern "C" {
   INNER(float64, OUT_TYPE)
 
 // Cubic root
-#define CBRT(IN_TYPE, OUT_TYPE) \
-  FORCE_INLINE                  \
-  OUT_TYPE cbrt_##IN_TYPE(IN_TYPE in) { return static_cast<float64>(cbrtl(in)); }
+#define CBRT(IN_TYPE, OUT_TYPE)                                       \
+  FORCE_INLINE                                                        \
+  OUT_TYPE cbrt_##IN_TYPE(IN_TYPE in) {                               \
+    return static_cast<float64>(cbrtl(static_cast<long double>(in))); \
+  }
 
 ENUMERIC_TYPES_UNARY(CBRT, float64)
 
 // Exponent
-#define EXP(IN_TYPE, OUT_TYPE) \
-  FORCE_INLINE                 \
-  OUT_TYPE exp_##IN_TYPE(IN_TYPE in) { return static_cast<float64>(expl(in)); }
+#define EXP(IN_TYPE, OUT_TYPE)                                       \
+  FORCE_INLINE                                                       \
+  OUT_TYPE exp_##IN_TYPE(IN_TYPE in) {                               \
+    return static_cast<float64>(expl(static_cast<long double>(in))); \
+  }
 
 ENUMERIC_TYPES_UNARY(EXP, float64)
 
 // log
-#define LOG(IN_TYPE, OUT_TYPE) \
-  FORCE_INLINE                 \
-  OUT_TYPE log_##IN_TYPE(IN_TYPE in) { return static_cast<float64>(logl(in)); }
+#define LOG(IN_TYPE, OUT_TYPE)                                       \
+  FORCE_INLINE                                                       \
+  OUT_TYPE log_##IN_TYPE(IN_TYPE in) {                               \
+    return static_cast<float64>(logl(static_cast<long double>(in))); \
+  }
 
 ENUMERIC_TYPES_UNARY(LOG, float64)
 
 // log base 10
-#define LOG10(IN_TYPE, OUT_TYPE) \
-  FORCE_INLINE                   \
-  OUT_TYPE log10_##IN_TYPE(IN_TYPE in) { return static_cast<float64>(log10l(in)); }
+#define LOG10(IN_TYPE, OUT_TYPE)                                       \
+  FORCE_INLINE                                                         \
+  OUT_TYPE log10_##IN_TYPE(IN_TYPE in) {                               \
+    return static_cast<float64>(log10l(static_cast<long double>(in))); \
+  }
+
+#define LOGL(VALUE) static_cast<float64>(logl(static_cast<long double>(VALUE)))
 
 ENUMERIC_TYPES_UNARY(LOG10, float64)
 
@@ -74,12 +84,12 @@ void set_error_for_logbase(int64_t execution_context, double base) {
 #define LOG_WITH_BASE(IN_TYPE1, IN_TYPE2, OUT_TYPE)                                    \
   FORCE_INLINE                                                                         \
   OUT_TYPE log_##IN_TYPE1##_##IN_TYPE2(int64 context, IN_TYPE1 base, IN_TYPE2 value) { \
-    OUT_TYPE log_of_base = static_cast<float64>(logl(base));                           \
+    OUT_TYPE log_of_base = LOGL(base);                                                 \
     if (log_of_base == 0) {                                                            \
       set_error_for_logbase(context, static_cast<float64>(base));                      \
       return 0;                                                                        \
     }                                                                                  \
-    return static_cast<float64>(logl(value) / logl(base));                             \
+    return LOGL(value) / LOGL(base);                                                   \
   }
 
 LOG_WITH_BASE(int32, int32, float64)
diff --git a/cpp/src/gandiva/precompiled/testing.h b/cpp/src/gandiva/precompiled/testing.h
new file mode 100644
index 0000000000000..3214eec6f0494
--- /dev/null
+++ b/cpp/src/gandiva/precompiled/testing.h
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <ctime>
+
+#include <gtest/gtest.h>
+
+#include "arrow/util/logging.h"
+
+#include "gandiva/date_utils.h"
+#include "gandiva/precompiled/types.h"
+
+namespace gandiva {
+
+timestamp StringToTimestamp(const char* buf) {
+  int64_t out = 0;
+  DCHECK(internal::ParseTimestamp(buf, "%Y-%m-%d %H:%M:%S", false, &out));
+  return out * 1000;
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc
index 2ea7cd4eed032..22c7cbf33568f 100644
--- a/cpp/src/gandiva/precompiled/time.cc
+++ b/cpp/src/gandiva/precompiled/time.cc
@@ -24,6 +24,7 @@ extern "C" {
 #include <time.h>
 
 #include "./time_constants.h"
+#include "./time_fields.h"
 #include "./types.h"
 
 #define MINS_IN_HOUR 60
@@ -518,6 +519,12 @@ void set_error_for_date(int32 length, const char* input, const char* msg,
 }
 
 date64 castDATE_utf8(int64_t context, const char* input, int32 length) {
+  using arrow::util::date::day;
+  using arrow::util::date::month;
+  using arrow::util::date::sys_days;
+  using arrow::util::date::year;
+  using arrow::util::date::year_month_day;
+  using gandiva::TimeFields;
   // format : 0 is year, 1 is month and 2 is day.
   int dateFields[3];
   int dateIndex = 0, index = 0, value = 0;
@@ -546,21 +553,129 @@ date64 castDATE_utf8(int64_t context, const char* input, int32 length) {
    * If range of two digits is between 70 - 99 then year = 1970 - 1999
    * Else if two digits is between 00 - 69 = 2000 - 2069
    */
-  if (dateFields[0] < 100) {
-    if (dateFields[0] < 70) {
-      dateFields[0] += 2000;
+  if (dateFields[TimeFields::kYear] < 100) {
+    if (dateFields[TimeFields::kYear] < 70) {
+      dateFields[TimeFields::kYear] += 2000;
     } else {
-      dateFields[0] += 1900;
+      dateFields[TimeFields::kYear] += 1900;
     }
   }
-  date::year_month_day day =
-      date::year(dateFields[0]) / date::month(dateFields[1]) / date::day(dateFields[2]);
-  if (!day.ok()) {
+  year_month_day date = year(dateFields[TimeFields::kYear]) /
+                        month(dateFields[TimeFields::kMonth]) /
+                        day(dateFields[TimeFields::kDay]);
+  if (!date.ok()) {
     set_error_for_date(length, input, msg, context);
     return 0;
   }
-  return std::chrono::time_point_cast<std::chrono::milliseconds>(date::sys_days(day))
+  return std::chrono::time_point_cast<std::chrono::milliseconds>(sys_days(date))
       .time_since_epoch()
       .count();
 }
+
+/*
+ * Input consists of mandatory and optional fields.
+ * Mandatory fields are year, month and day.
+ * Optional fields are time, displacement and zone.
+ * Format is <year-month-day>[ hours:minutes:seconds][.millis][ displacement|zone]
+ */
+timestamp castTIMESTAMP_utf8(int64_t context, const char* input, int32 length) {
+  using arrow::util::date::day;
+  using arrow::util::date::month;
+  using arrow::util::date::sys_days;
+  using arrow::util::date::year;
+  using arrow::util::date::year_month_day;
+  using gandiva::TimeFields;
+  using std::chrono::hours;
+  using std::chrono::milliseconds;
+  using std::chrono::minutes;
+  using std::chrono::seconds;
+
+  int ts_fields[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
+  boolean add_displacement = true;
+  boolean encountered_zone = false;
+  int ts_field_index = TimeFields::kYear, index = 0, value = 0;
+  while (ts_field_index < TimeFields::kMax && index < length) {
+    if (isdigit(input[index])) {
+      value = (value * 10) + (input[index] - '0');
+    } else {
+      ts_fields[ts_field_index] = value;
+      value = 0;
+
+      switch (input[index]) {
+        case '.':
+        case ':':
+        case ' ':
+          ts_field_index++;
+          break;
+        case '+':
+          // +08:00, means time zone is 8 hours ahead. Need to substract.
+          add_displacement = false;
+          ts_field_index = TimeFields::kDisplacementHours;
+          break;
+        case '-':
+          // Overloaded as date separator and negative displacement.
+          ts_field_index = (ts_field_index < 3) ? (ts_field_index + 1)
+                                                : TimeFields::kDisplacementHours;
+          break;
+        default:
+          encountered_zone = true;
+          break;
+      }
+    }
+    if (encountered_zone) {
+      break;
+    }
+    index++;
+  }
+
+  // Store the last value
+  if (ts_field_index < TimeFields::kMax) {
+    ts_fields[ts_field_index++] = value;
+  }
+
+  // adjust the year
+  if (ts_fields[TimeFields::kYear] < 100) {
+    if (ts_fields[TimeFields::kYear] < 70) {
+      ts_fields[TimeFields::kYear] += 2000;
+    } else {
+      ts_fields[TimeFields::kYear] += 1900;
+    }
+  }
+
+  // handle timezone
+  if (encountered_zone) {
+    int err = 0;
+    timestamp ret_time = 0;
+    err = gdv_fn_time_with_zone(&ts_fields[0], (input + index), (length - index),
+                                &ret_time);
+    if (err) {
+      const char* msg = "Invalid timestamp or unknown zone for timestamp value ";
+      set_error_for_date(length, input, msg, context);
+      return 0;
+    }
+    return ret_time;
+  }
+
+  year_month_day date = year(ts_fields[TimeFields::kYear]) /
+                        month(ts_fields[TimeFields::kMonth]) /
+                        day(ts_fields[TimeFields::kDay]);
+  if (!date.ok()) {
+    const char* msg = "Not a valid day for timestamp value ";
+    set_error_for_date(length, input, msg, context);
+    return 0;
+  }
+
+  auto date_time = sys_days(date) + hours(ts_fields[TimeFields::kHours]) +
+                   minutes(ts_fields[TimeFields::kMinutes]) +
+                   seconds(ts_fields[TimeFields::kSeconds]) +
+                   milliseconds(ts_fields[TimeFields::kSubSeconds]);
+  if (ts_fields[TimeFields::kDisplacementHours] ||
+      ts_fields[TimeFields::kDisplacementMinutes]) {
+    auto displacement_time = hours(ts_fields[TimeFields::kDisplacementHours]) +
+                             minutes(ts_fields[TimeFields::kDisplacementMinutes]);
+    date_time = (add_displacement) ? (date_time + displacement_time)
+                                   : (date_time - displacement_time);
+  }
+  return std::chrono::time_point_cast<milliseconds>(date_time).time_since_epoch().count();
+}
 }  // extern "C"
diff --git a/cpp/src/gandiva/precompiled/time_fields.h b/cpp/src/gandiva/precompiled/time_fields.h
new file mode 100644
index 0000000000000..7131d5c8232b8
--- /dev/null
+++ b/cpp/src/gandiva/precompiled/time_fields.h
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef GANDIVA_TIME_FIELDS_H
+#define GANDIVA_TIME_FIELDS_H
+
+namespace gandiva {
+
+enum TimeFields {
+  kYear,
+  kMonth,
+  kDay,
+  kHours,
+  kMinutes,
+  kSeconds,
+  kSubSeconds,
+  kDisplacementHours,
+  kDisplacementMinutes,
+  kMax
+};
+
+}  // namespace gandiva
+#endif  // GANDIVA_TIME_FIELDS_H
diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc
index b8ee4dc4fbef3..b8f80698200c0 100644
--- a/cpp/src/gandiva/precompiled/time_test.cc
+++ b/cpp/src/gandiva/precompiled/time_test.cc
@@ -15,20 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <time.h>
-
 #include <gtest/gtest.h>
+#include <time.h>
 #include "../execution_context.h"
+#include "gandiva/precompiled/testing.h"
 #include "gandiva/precompiled/types.h"
 
 namespace gandiva {
 
-timestamp StringToTimestamp(const char* buf) {
-  struct tm tm;
-  strptime(buf, "%Y-%m-%d %H:%M:%S", &tm);
-  return timegm(&tm) * 1000;  // to millis
-}
-
 TEST(TestTime, TestCastDate) {
   ExecutionContext context;
   int64_t context_ptr = reinterpret_cast<int64_t>(&context);
@@ -50,6 +44,55 @@ TEST(TestTime, TestCastDate) {
   EXPECT_EQ(castDATE_utf8(context_ptr, "71-12-XX", 8), 0);
 }
 
+TEST(TestTime, TestCastTimestamp) {
+  ExecutionContext context;
+  int64_t context_ptr = reinterpret_cast<int64_t>(&context);
+
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1967-12-1", 9), -65836800000);
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1972-12-1", 9), 92016000000);
+  EXPECT_EQ(castDATE_utf8(context_ptr, "67-12-1", 7), 3089923200000);
+  EXPECT_EQ(castDATE_utf8(context_ptr, "67-1-1", 7), 3061065600000);
+  EXPECT_EQ(castDATE_utf8(context_ptr, "71-1-1", 7), 31536000000);
+
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30", 18), 969702330000);
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.920", 22), 969702330920);
+
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.920 +08:00", 29),
+            969673530920);
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.920 -11:45", 29),
+            969744630920);
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "65-03-04 00:20:40.920 +00:30", 28),
+            3003349840920);
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1932-05-18 11:30:00.920 +11:30", 30),
+            -1187308799080);
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1857-02-11 20:31:40.920 -05:30", 30),
+            -3562264699080);
+
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.920 Canada/Pacific", 37),
+            969727530920);
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2012-02-28 23:30:59 Asia/Kolkata", 32),
+            1330452059000);
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1923-10-07 03:03:03 America/New_York", 36),
+            -1459094217000);
+
+  // error cases
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "20000923", 8), 0);
+  EXPECT_EQ(context.get_error(), "Not a valid day for timestamp value 20000923");
+  context.Reset();
+
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-2b", 10), 0);
+  EXPECT_EQ(context.get_error(),
+            "Invalid timestamp or unknown zone for timestamp value 2000-09-2b");
+  context.Reset();
+
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.920 Unknown/Zone", 35),
+            0);
+  EXPECT_EQ(context.get_error(),
+            "Invalid timestamp or unknown zone for timestamp value 2000-09-23 "
+            "9:45:30.920 Unknown/Zone");
+  context.Reset();
+}
+
 TEST(TestTime, TestExtractTime) {
   // 10:20:33
   int32 time_as_millis_in_day = 37233000;
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 9c574ba6e3f0a..7f3b0a09b43ce 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -157,6 +157,8 @@ int32 utf8_length(int64 context, const char* data, int32 data_len);
 
 date64 castDATE_utf8(int64_t execution_context, const char* input, int32 length);
 
+timestamp castTIMESTAMP_utf8(int64_t execution_context, const char* input, int32 length);
+
 }  // extern "C"
 
 #endif  // PRECOMPILED_TYPES_H
diff --git a/cpp/src/gandiva/projector.cc b/cpp/src/gandiva/projector.cc
index 8020a45b3d302..7950fc7b8d149 100644
--- a/cpp/src/gandiva/projector.cc
+++ b/cpp/src/gandiva/projector.cc
@@ -36,6 +36,8 @@ Projector::Projector(std::unique_ptr<LLVMGenerator> llvm_generator, SchemaPtr sc
       output_fields_(output_fields),
       configuration_(configuration) {}
 
+Projector::~Projector() {}
+
 Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs,
                        std::shared_ptr<Projector>* projector) {
   return Projector::Make(schema, exprs, ConfigurationBuilder::DefaultConfiguration(),
@@ -45,12 +47,10 @@ Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs,
 Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs,
                        std::shared_ptr<Configuration> configuration,
                        std::shared_ptr<Projector>* projector) {
-  ARROW_RETURN_FAILURE_IF_FALSE(schema != nullptr,
-                                Status::Invalid("schema cannot be null"));
-  ARROW_RETURN_FAILURE_IF_FALSE(!exprs.empty(),
-                                Status::Invalid("expressions need to be non-empty"));
-  ARROW_RETURN_FAILURE_IF_FALSE(configuration != nullptr,
-                                Status::Invalid("configuration cannot be null"));
+  ARROW_RETURN_IF(schema == nullptr, Status::Invalid("Schema cannot be null"));
+  ARROW_RETURN_IF(exprs.empty(), Status::Invalid("Expressions cannot be empty"));
+  ARROW_RETURN_IF(configuration == nullptr,
+                  Status::Invalid("Configuration cannot be null"));
 
   // see if equivalent projector was already built
   static Cache<ProjectorCacheKey, std::shared_ptr<Projector>> cache;
@@ -63,23 +63,21 @@ Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs,
 
   // Build LLVM generator, and generate code for the specified expressions
   std::unique_ptr<LLVMGenerator> llvm_gen;
-  Status status = LLVMGenerator::Make(configuration, &llvm_gen);
-  ARROW_RETURN_NOT_OK(status);
+  ARROW_RETURN_NOT_OK(LLVMGenerator::Make(configuration, &llvm_gen));
 
   // Run the validation on the expressions.
   // Return if any of the expression is invalid since
   // we will not be able to process further.
   ExprValidator expr_validator(llvm_gen->types(), schema);
   for (auto& expr : exprs) {
-    status = expr_validator.Validate(expr);
-    ARROW_RETURN_NOT_OK(status);
+    ARROW_RETURN_NOT_OK(expr_validator.Validate(expr));
   }
 
-  status = llvm_gen->Build(exprs);
-  ARROW_RETURN_NOT_OK(status);
+  ARROW_RETURN_NOT_OK(llvm_gen->Build(exprs));
 
   // save the output field types. Used for validation at Evaluate() time.
   std::vector<FieldPtr> output_fields;
+  output_fields.reserve(exprs.size());
   for (auto& expr : exprs) {
     output_fields.push_back(expr->result());
   }
@@ -94,133 +92,109 @@ Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs,
 
 Status Projector::Evaluate(const arrow::RecordBatch& batch,
                            const ArrayDataVector& output_data_vecs) {
-  Status status = ValidateEvaluateArgsCommon(batch);
-  ARROW_RETURN_NOT_OK(status);
-
-  if (output_data_vecs.size() != output_fields_.size()) {
-    std::stringstream ss;
-    ss << "number of buffers for output_data_vecs is " << output_data_vecs.size()
-       << ", expected " << output_fields_.size();
-    return Status::Invalid(ss.str());
-  }
+  ARROW_RETURN_NOT_OK(ValidateEvaluateArgsCommon(batch));
+  ARROW_RETURN_IF(
+      output_data_vecs.size() != output_fields_.size(),
+      Status::Invalid("Number of output buffers must match number of fields"));
 
   int idx = 0;
   for (auto& array_data : output_data_vecs) {
+    const auto output_field = output_fields_[idx];
     if (array_data == nullptr) {
-      std::stringstream ss;
-      ss << "array for output field " << output_fields_[idx]->name() << "is null.";
-      return Status::Invalid(ss.str());
+      return Status::Invalid("Output array for field ", output_field->name(),
+                             " should not be null");
     }
 
-    Status status =
-        ValidateArrayDataCapacity(*array_data, *(output_fields_[idx]), batch.num_rows());
-    ARROW_RETURN_NOT_OK(status);
+    ARROW_RETURN_NOT_OK(
+        ValidateArrayDataCapacity(*array_data, *output_field, batch.num_rows()));
     ++idx;
   }
+
   return llvm_generator_->Execute(batch, output_data_vecs);
 }
 
 Status Projector::Evaluate(const arrow::RecordBatch& batch, arrow::MemoryPool* pool,
                            arrow::ArrayVector* output) {
-  Status status = ValidateEvaluateArgsCommon(batch);
-  ARROW_RETURN_NOT_OK(status);
-
-  if (output == nullptr) {
-    return Status::Invalid("output must be non-null.");
-  }
-
-  if (pool == nullptr) {
-    return Status::Invalid("memory pool must be non-null.");
-  }
+  ARROW_RETURN_NOT_OK(ValidateEvaluateArgsCommon(batch));
+  ARROW_RETURN_IF(output == nullptr, Status::Invalid("Output must be non-null."));
+  ARROW_RETURN_IF(pool == nullptr, Status::Invalid("Memory pool must be non-null."));
 
   // Allocate the output data vecs.
   ArrayDataVector output_data_vecs;
+  output_data_vecs.reserve(output_fields_.size());
   for (auto& field : output_fields_) {
     ArrayDataPtr output_data;
 
-    status = AllocArrayData(field->type(), batch.num_rows(), pool, &output_data);
-    ARROW_RETURN_NOT_OK(status);
-
+    ARROW_RETURN_NOT_OK(
+        AllocArrayData(field->type(), batch.num_rows(), pool, &output_data));
     output_data_vecs.push_back(output_data);
   }
 
   // Execute the expression(s).
-  status = llvm_generator_->Execute(batch, output_data_vecs);
-  ARROW_RETURN_NOT_OK(status);
+  ARROW_RETURN_NOT_OK(llvm_generator_->Execute(batch, output_data_vecs));
 
   // Create and return array arrays.
   output->clear();
   for (auto& array_data : output_data_vecs) {
     output->push_back(arrow::MakeArray(array_data));
   }
+
   return Status::OK();
 }
 
 // TODO : handle variable-len vectors
 Status Projector::AllocArrayData(const DataTypePtr& type, int64_t num_records,
                                  arrow::MemoryPool* pool, ArrayDataPtr* array_data) {
-  if (!arrow::is_primitive(type->id())) {
-    return Status::Invalid("Unsupported output data type " + type->ToString());
-  }
+  const auto* fw_type = dynamic_cast<const arrow::FixedWidthType*>(type.get());
+  ARROW_RETURN_IF(fw_type == nullptr,
+                  Status::Invalid("Unsupported output data type ", type));
 
-  arrow::Status astatus;
   std::shared_ptr<arrow::Buffer> null_bitmap;
-  int64_t size = arrow::BitUtil::BytesForBits(num_records);
-  astatus = arrow::AllocateBuffer(pool, size, &null_bitmap);
-  ARROW_RETURN_NOT_OK(astatus);
+  int64_t bitmap_bytes = arrow::BitUtil::BytesForBits(num_records);
+  ARROW_RETURN_NOT_OK(arrow::AllocateBuffer(pool, bitmap_bytes, &null_bitmap));
 
   std::shared_ptr<arrow::Buffer> data;
-  const auto& fw_type = dynamic_cast<const arrow::FixedWidthType&>(*type);
-  int64_t data_len = arrow::BitUtil::BytesForBits(num_records * fw_type.bit_width());
-  astatus = arrow::AllocateBuffer(pool, data_len, &data);
-  ARROW_RETURN_NOT_OK(astatus);
+  int64_t data_len = arrow::BitUtil::BytesForBits(num_records * fw_type->bit_width());
+  ARROW_RETURN_NOT_OK(arrow::AllocateBuffer(pool, data_len, &data));
+
+  // This is not strictly required but valgrind gets confused and detects this
+  // as uninitialized memory access. See arrow::util::SetBitTo().
+  if (type->id() == arrow::Type::BOOL) {
+    memset(data->mutable_data(), 0, data_len);
+  }
 
   *array_data = arrow::ArrayData::Make(type, num_records, {null_bitmap, data});
   return Status::OK();
 }
 
 Status Projector::ValidateEvaluateArgsCommon(const arrow::RecordBatch& batch) {
-  if (!batch.schema()->Equals(*schema_)) {
-    return Status::Invalid("Schema in RecordBatch must match the schema in Make()");
-  }
-  if (batch.num_rows() == 0) {
-    return Status::Invalid("RecordBatch must be non-empty.");
-  }
+  ARROW_RETURN_IF(!batch.schema()->Equals(*schema_),
+                  Status::Invalid("Schema in RecordBatch must match schema in Make()"));
+  ARROW_RETURN_IF(batch.num_rows() == 0,
+                  Status::Invalid("RecordBatch must be non-empty."));
+
   return Status::OK();
 }
 
 Status Projector::ValidateArrayDataCapacity(const arrow::ArrayData& array_data,
                                             const arrow::Field& field,
                                             int64_t num_records) {
-  // verify that there are atleast two buffers (validity and data).
-  if (array_data.buffers.size() < 2) {
-    std::stringstream ss;
-    ss << "number of buffers for output field " << field.name() << "is "
-       << array_data.buffers.size() << ", must have minimum 2.";
-    return Status::Invalid(ss.str());
-  }
+  ARROW_RETURN_IF(array_data.buffers.size() < 2,
+                  Status::Invalid("ArrayData must have at least 2 buffers"));
 
-  // verify size of bitmap buffer.
   int64_t min_bitmap_len = arrow::BitUtil::BytesForBits(num_records);
   int64_t bitmap_len = array_data.buffers[0]->capacity();
-  if (bitmap_len < min_bitmap_len) {
-    std::stringstream ss;
-    ss << "bitmap buffer for output field " << field.name() << "has size " << bitmap_len
-       << ", must have minimum size " << min_bitmap_len;
-    return Status::Invalid(ss.str());
-  }
+  ARROW_RETURN_IF(bitmap_len < min_bitmap_len,
+                  Status::Invalid("Bitmap buffer too small for ", field.name()));
 
   // verify size of data buffer.
   // TODO : handle variable-len vectors
   const auto& fw_type = dynamic_cast<const arrow::FixedWidthType&>(*field.type());
   int64_t min_data_len = arrow::BitUtil::BytesForBits(num_records * fw_type.bit_width());
   int64_t data_len = array_data.buffers[1]->capacity();
-  if (data_len < min_data_len) {
-    std::stringstream ss;
-    ss << "data buffer for output field " << field.name() << " has size " << data_len
-       << ", must have minimum size " << min_data_len;
-    return Status::Invalid(ss.str());
-  }
+  ARROW_RETURN_IF(data_len < min_data_len,
+                  Status::Invalid("Data buffer too small for ", field.name()));
+
   return Status::OK();
 }
 
diff --git a/cpp/src/gandiva/projector.h b/cpp/src/gandiva/projector.h
index c9d727164735e..58bac78a4068c 100644
--- a/cpp/src/gandiva/projector.h
+++ b/cpp/src/gandiva/projector.h
@@ -15,8 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef GANDIVA_EXPR_PROJECTOR_H
-#define GANDIVA_EXPR_PROJECTOR_H
+#pragma once
 
 #include <memory>
 #include <string>
@@ -28,6 +27,7 @@
 #include "gandiva/arrow.h"
 #include "gandiva/configuration.h"
 #include "gandiva/expression.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
@@ -37,8 +37,12 @@ class LLVMGenerator;
 ///
 /// A projector is built for a specific schema and vector of expressions.
 /// Once the projector is built, it can be used to evaluate many row batches.
-class Projector {
+class GANDIVA_EXPORT Projector {
  public:
+  // Inline dtor will attempt to resolve the destructor for
+  // LLVMGenerator on MSVC, so we compile the dtor in the object code
+  ~Projector();
+
   /// Build a default projector for the given schema to evaluate
   /// the vector of expressions.
   ///
@@ -99,5 +103,3 @@ class Projector {
 };
 
 }  // namespace gandiva
-
-#endif  // GANDIVA_EXPR_PROJECTOR_H
diff --git a/cpp/src/gandiva/projector_cache_key.h b/cpp/src/gandiva/projector_cache_key.h
index e5839163b4d18..26da5288e5d15 100644
--- a/cpp/src/gandiva/projector_cache_key.h
+++ b/cpp/src/gandiva/projector_cache_key.h
@@ -41,7 +41,7 @@ class ProjectorCacheKey {
       boost::hash_combine(result, expr_as_string);
       UpdateUniqifier(expr_as_string);
     }
-    boost::hash_combine(result, configuration);
+    boost::hash_combine(result, configuration->Hash());
     boost::hash_combine(result, schema_->ToString());
     boost::hash_combine(result, uniqifier_);
     hash_code_ = result;
@@ -55,7 +55,7 @@ class ProjectorCacheKey {
       return false;
     }
 
-    if (configuration_ != other.configuration_) {
+    if (*configuration_ != *other.configuration_) {
       return false;
     }
 
diff --git a/cpp/src/gandiva/proto/Types.proto b/cpp/src/gandiva/proto/Types.proto
index ac19d0f1c1919..7474065f68b73 100644
--- a/cpp/src/gandiva/proto/Types.proto
+++ b/cpp/src/gandiva/proto/Types.proto
@@ -146,6 +146,13 @@ message BinaryNode {
   optional bytes value = 1;
 }
 
+message DecimalNode {
+  optional string value = 1;
+  optional int32 precision = 2;
+  optional int32 scale = 3;
+}
+
+
 message TreeNode {
   optional FieldNode fieldNode = 1;
   optional FunctionNode fnNode = 2;
@@ -164,6 +171,7 @@ message TreeNode {
   optional DoubleNode doubleNode = 16;
   optional StringNode stringNode = 17;
   optional BinaryNode binaryNode = 18;
+  optional DecimalNode decimalNode = 19;
 }
 
 message ExpressionRoot {
diff --git a/cpp/src/gandiva/regex_util.cc b/cpp/src/gandiva/regex_util.cc
index 893af095a3dd2..abdd579d1f5e4 100644
--- a/cpp/src/gandiva/regex_util.cc
+++ b/cpp/src/gandiva/regex_util.cc
@@ -20,7 +20,7 @@
 namespace gandiva {
 
 const std::set<char> RegexUtil::pcre_regex_specials_ = {
-    '[', ']', '(', ')', '|', '^', '-', '+', '*', '?', '{', '}', '$', '\\'};
+    '[', ']', '(', ')', '|', '^', '-', '+', '*', '?', '{', '}', '$', '\\', '.'};
 
 Status RegexUtil::SqlLikePatternToPcre(const std::string& sql_pattern, char escape_char,
                                        std::string& pcre_pattern) {
@@ -38,20 +38,16 @@ Status RegexUtil::SqlLikePatternToPcre(const std::string& sql_pattern, char esca
     if (cur == escape_char) {
       // escape char must be followed by '_', '%' or the escape char itself.
       ++idx;
-      if (idx == sql_pattern.size()) {
-        std::stringstream msg;
-        msg << "unexpected escape char at the end of pattern " << sql_pattern;
-        return Status::Invalid(msg.str());
-      }
+      ARROW_RETURN_IF(
+          idx == sql_pattern.size(),
+          Status::Invalid("Unexpected escape char at the end of pattern ", sql_pattern));
 
       cur = sql_pattern.at(idx);
       if (cur == '_' || cur == '%' || cur == escape_char) {
         pcre_pattern += cur;
       } else {
-        std::stringstream msg;
-        msg << "invalid escape sequence in pattern " << sql_pattern << " at offset "
-            << idx;
-        return Status::Invalid(msg.str());
+        return Status::Invalid("Invalid escape sequence in pattern ", sql_pattern,
+                               " at offset ", idx);
       }
     } else if (cur == '_') {
       pcre_pattern += '.';
diff --git a/cpp/src/gandiva/regex_util.h b/cpp/src/gandiva/regex_util.h
index 6a22af2b9cac3..7ea7060c483f8 100644
--- a/cpp/src/gandiva/regex_util.h
+++ b/cpp/src/gandiva/regex_util.h
@@ -23,11 +23,12 @@
 #include <string>
 
 #include "gandiva/arrow.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
 /// \brief Utility class for converting sql patterns to pcre patterns.
-class RegexUtil {
+class GANDIVA_EXPORT RegexUtil {
  public:
   // Convert an sql pattern to a pcre pattern
   static Status SqlLikePatternToPcre(const std::string& like_pattern, char escape_char,
diff --git a/cpp/src/gandiva/selection_vector.cc b/cpp/src/gandiva/selection_vector.cc
index 9266ca7fe1056..e643cece8a2c3 100644
--- a/cpp/src/gandiva/selection_vector.cc
+++ b/cpp/src/gandiva/selection_vector.cc
@@ -22,28 +22,23 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/util/bit-util.h"
+
 #include "gandiva/selection_vector_impl.h"
 
 namespace gandiva {
 
 Status SelectionVector::PopulateFromBitMap(const uint8_t* bitmap, int64_t bitmap_size,
                                            int64_t max_bitmap_index) {
-  if (bitmap_size % 8 != 0) {
-    std::stringstream ss;
-    ss << "bitmap size " << bitmap_size << " must be padded to 64-bit size";
-    return Status::Invalid(ss.str());
-  }
-  if (max_bitmap_index < 0) {
-    std::stringstream ss;
-    ss << "max bitmap index " << max_bitmap_index << " must be positive";
-    return Status::Invalid(ss.str());
-  }
-  if (static_cast<uint64_t>(max_bitmap_index) > GetMaxSupportedValue()) {
-    std::stringstream ss;
-    ss << "max_bitmap_index " << max_bitmap_index << " must be <= maxSupportedValue "
-       << GetMaxSupportedValue() << " in selection vector";
-    return Status::Invalid(ss.str());
-  }
+  const uint64_t max_idx = static_cast<uint64_t>(max_bitmap_index);
+  ARROW_RETURN_IF(bitmap_size % 8, Status::Invalid("Bitmap size ", bitmap_size,
+                                                   " must be aligned to 64-bit size"));
+  ARROW_RETURN_IF(max_bitmap_index < 0,
+                  Status::Invalid("Max bitmap index must be positive"));
+  ARROW_RETURN_IF(
+      max_idx > GetMaxSupportedValue(),
+      Status::Invalid("max_bitmap_index ", max_idx, " must be <= maxSupportedValue ",
+                      GetMaxSupportedValue(), " in selection vector"));
 
   int64_t max_slots = GetMaxSlots();
 
@@ -55,8 +50,18 @@ Status SelectionVector::PopulateFromBitMap(const uint8_t* bitmap, int64_t bitmap
     uint64_t current_word = bitmap_64[bitmap_idx];
 
     while (current_word != 0) {
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4146)
+#endif
+      // MSVC warns about negating an unsigned type. We suppress it for now
       uint64_t highest_only = current_word & -current_word;
-      int pos_in_word = __builtin_ctzl(highest_only);
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+      int pos_in_word = arrow::BitUtil::CountTrailingZeros(highest_only);
 
       int64_t pos_in_bitmap = bitmap_idx * 64 + pos_in_word;
       if (pos_in_bitmap > max_bitmap_index) {
@@ -64,9 +69,9 @@ Status SelectionVector::PopulateFromBitMap(const uint8_t* bitmap, int64_t bitmap
         break;
       }
 
-      if (selection_idx >= max_slots) {
-        return Status::Invalid("selection vector has no remaining slots");
-      }
+      ARROW_RETURN_IF(selection_idx >= max_slots,
+                      Status::Invalid("selection vector has no remaining slots"));
+
       SetIndex(selection_idx, pos_in_bitmap);
       ++selection_idx;
 
@@ -81,60 +86,54 @@ Status SelectionVector::PopulateFromBitMap(const uint8_t* bitmap, int64_t bitmap
 Status SelectionVector::MakeInt16(int64_t max_slots,
                                   std::shared_ptr<arrow::Buffer> buffer,
                                   std::shared_ptr<SelectionVector>* selection_vector) {
-  auto status = SelectionVectorInt16::ValidateBuffer(max_slots, buffer);
-  ARROW_RETURN_NOT_OK(status);
-
+  ARROW_RETURN_NOT_OK(SelectionVectorInt16::ValidateBuffer(max_slots, buffer));
   *selection_vector = std::make_shared<SelectionVectorInt16>(max_slots, buffer);
+
   return Status::OK();
 }
 
 Status SelectionVector::MakeInt16(int64_t max_slots, arrow::MemoryPool* pool,
                                   std::shared_ptr<SelectionVector>* selection_vector) {
   std::shared_ptr<arrow::Buffer> buffer;
-  auto status = SelectionVectorInt16::AllocateBuffer(max_slots, pool, &buffer);
-  ARROW_RETURN_NOT_OK(status);
-
+  ARROW_RETURN_NOT_OK(SelectionVectorInt16::AllocateBuffer(max_slots, pool, &buffer));
   *selection_vector = std::make_shared<SelectionVectorInt16>(max_slots, buffer);
+
   return Status::OK();
 }
 
 Status SelectionVector::MakeInt32(int64_t max_slots,
                                   std::shared_ptr<arrow::Buffer> buffer,
                                   std::shared_ptr<SelectionVector>* selection_vector) {
-  auto status = SelectionVectorInt32::ValidateBuffer(max_slots, buffer);
-  ARROW_RETURN_NOT_OK(status);
-
+  ARROW_RETURN_NOT_OK(SelectionVectorInt32::ValidateBuffer(max_slots, buffer));
   *selection_vector = std::make_shared<SelectionVectorInt32>(max_slots, buffer);
+
   return Status::OK();
 }
 
 Status SelectionVector::MakeInt32(int64_t max_slots, arrow::MemoryPool* pool,
                                   std::shared_ptr<SelectionVector>* selection_vector) {
   std::shared_ptr<arrow::Buffer> buffer;
-  auto status = SelectionVectorInt32::AllocateBuffer(max_slots, pool, &buffer);
-  ARROW_RETURN_NOT_OK(status);
-
+  ARROW_RETURN_NOT_OK(SelectionVectorInt32::AllocateBuffer(max_slots, pool, &buffer));
   *selection_vector = std::make_shared<SelectionVectorInt32>(max_slots, buffer);
+
   return Status::OK();
 }
 
 Status SelectionVector::MakeInt64(int64_t max_slots,
                                   std::shared_ptr<arrow::Buffer> buffer,
                                   std::shared_ptr<SelectionVector>* selection_vector) {
-  auto status = SelectionVectorInt64::ValidateBuffer(max_slots, buffer);
-  ARROW_RETURN_NOT_OK(status);
-
+  ARROW_RETURN_NOT_OK(SelectionVectorInt64::ValidateBuffer(max_slots, buffer));
   *selection_vector = std::make_shared<SelectionVectorInt64>(max_slots, buffer);
+
   return Status::OK();
 }
 
 Status SelectionVector::MakeInt64(int64_t max_slots, arrow::MemoryPool* pool,
                                   std::shared_ptr<SelectionVector>* selection_vector) {
   std::shared_ptr<arrow::Buffer> buffer;
-  auto status = SelectionVectorInt64::AllocateBuffer(max_slots, pool, &buffer);
-  ARROW_RETURN_NOT_OK(status);
-
+  ARROW_RETURN_NOT_OK(SelectionVectorInt64::AllocateBuffer(max_slots, pool, &buffer));
   *selection_vector = std::make_shared<SelectionVectorInt64>(max_slots, buffer);
+
   return Status::OK();
 }
 
@@ -142,8 +141,7 @@ template <typename C_TYPE, typename A_TYPE>
 Status SelectionVectorImpl<C_TYPE, A_TYPE>::AllocateBuffer(
     int64_t max_slots, arrow::MemoryPool* pool, std::shared_ptr<arrow::Buffer>* buffer) {
   auto buffer_len = max_slots * sizeof(C_TYPE);
-  auto astatus = arrow::AllocateBuffer(pool, buffer_len, buffer);
-  ARROW_RETURN_NOT_OK(astatus);
+  ARROW_RETURN_NOT_OK(arrow::AllocateBuffer(pool, buffer_len, buffer));
 
   return Status::OK();
 }
@@ -151,19 +149,13 @@ Status SelectionVectorImpl<C_TYPE, A_TYPE>::AllocateBuffer(
 template <typename C_TYPE, typename A_TYPE>
 Status SelectionVectorImpl<C_TYPE, A_TYPE>::ValidateBuffer(
     int64_t max_slots, std::shared_ptr<arrow::Buffer> buffer) {
-  // verify buffer is mutable
-  if (!buffer->is_mutable()) {
-    return Status::Invalid("buffer for selection vector must be mutable");
-  }
+  ARROW_RETURN_IF(!buffer->is_mutable(),
+                  Status::Invalid("buffer for selection vector must be mutable"));
+
+  const int64_t min_len = max_slots * sizeof(C_TYPE);
+  ARROW_RETURN_IF(buffer->size() < min_len,
+                  Status::Invalid("Buffer for selection vector is too small"));
 
-  // verify size of buffer.
-  int64_t min_len = max_slots * sizeof(C_TYPE);
-  if (buffer->size() < min_len) {
-    std::stringstream ss;
-    ss << "buffer for selection_data has size " << buffer->size()
-       << ", must have minimum size " << min_len;
-    return Status::Invalid(ss.str());
-  }
   return Status::OK();
 }
 
diff --git a/cpp/src/gandiva/selection_vector.h b/cpp/src/gandiva/selection_vector.h
index dcd2f6bbb7f4c..2e9941781d01e 100644
--- a/cpp/src/gandiva/selection_vector.h
+++ b/cpp/src/gandiva/selection_vector.h
@@ -24,12 +24,13 @@
 
 #include "gandiva/arrow.h"
 #include "gandiva/logging.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
 /// \brief Selection Vector : vector of indices in a row-batch for a selection,
 /// backed by an arrow-array.
-class SelectionVector {
+class GANDIVA_EXPORT SelectionVector {
  public:
   virtual ~SelectionVector() = default;
 
diff --git a/cpp/src/gandiva/selection_vector_test.cc b/cpp/src/gandiva/selection_vector_test.cc
index acb0f338cd6ae..67389273c82f2 100644
--- a/cpp/src/gandiva/selection_vector_test.cc
+++ b/cpp/src/gandiva/selection_vector_test.cc
@@ -18,6 +18,7 @@
 #include "gandiva/selection_vector.h"
 
 #include <memory>
+#include <vector>
 
 #include <gtest/gtest.h>
 
@@ -102,15 +103,14 @@ TEST_F(TestSelectionVector, TestInt16PopulateFromBitMap) {
   EXPECT_EQ(status.ok(), true) << status.message();
 
   int bitmap_size = RoundUpNumi64(max_slots) * 8;
-  std::unique_ptr<uint8_t> bitmap(new uint8_t[bitmap_size]);
-  memset(bitmap.get(), 0, bitmap_size);
+  std::vector<uint8_t> bitmap(bitmap_size);
 
-  arrow::BitUtil::SetBit(bitmap.get(), 0);
-  arrow::BitUtil::SetBit(bitmap.get(), 5);
-  arrow::BitUtil::SetBit(bitmap.get(), 121);
-  arrow::BitUtil::SetBit(bitmap.get(), 220);
+  arrow::BitUtil::SetBit(&bitmap[0], 0);
+  arrow::BitUtil::SetBit(&bitmap[0], 5);
+  arrow::BitUtil::SetBit(&bitmap[0], 121);
+  arrow::BitUtil::SetBit(&bitmap[0], 220);
 
-  status = selection->PopulateFromBitMap(bitmap.get(), bitmap_size, max_slots - 1);
+  status = selection->PopulateFromBitMap(&bitmap[0], bitmap_size, max_slots - 1);
   EXPECT_EQ(status.ok(), true) << status.message();
 
   EXPECT_EQ(selection->GetNumSlots(), 3);
@@ -127,15 +127,14 @@ TEST_F(TestSelectionVector, TestInt16PopulateFromBitMapNegative) {
   EXPECT_EQ(status.ok(), true) << status.message();
 
   int bitmap_size = 16;
-  std::unique_ptr<uint8_t> bitmap(new uint8_t[bitmap_size]);
-  memset(bitmap.get(), 0, bitmap_size);
+  std::vector<uint8_t> bitmap(bitmap_size);
 
-  arrow::BitUtil::SetBit(bitmap.get(), 0);
-  arrow::BitUtil::SetBit(bitmap.get(), 1);
-  arrow::BitUtil::SetBit(bitmap.get(), 2);
+  arrow::BitUtil::SetBit(&bitmap[0], 0);
+  arrow::BitUtil::SetBit(&bitmap[0], 1);
+  arrow::BitUtil::SetBit(&bitmap[0], 2);
 
   // The bitmap has three set bits, whereas the selection vector has capacity for only 2.
-  status = selection->PopulateFromBitMap(bitmap.get(), bitmap_size, 2);
+  status = selection->PopulateFromBitMap(&bitmap[0], bitmap_size, 2);
   EXPECT_EQ(status.IsInvalid(), true);
 }
 
@@ -175,15 +174,14 @@ TEST_F(TestSelectionVector, TestInt32PopulateFromBitMap) {
   EXPECT_EQ(status.ok(), true) << status.message();
 
   int bitmap_size = RoundUpNumi64(max_slots) * 8;
-  std::unique_ptr<uint8_t> bitmap(new uint8_t[bitmap_size]);
-  memset(bitmap.get(), 0, bitmap_size);
+  std::vector<uint8_t> bitmap(bitmap_size);
 
-  arrow::BitUtil::SetBit(bitmap.get(), 0);
-  arrow::BitUtil::SetBit(bitmap.get(), 5);
-  arrow::BitUtil::SetBit(bitmap.get(), 121);
-  arrow::BitUtil::SetBit(bitmap.get(), 220);
+  arrow::BitUtil::SetBit(&bitmap[0], 0);
+  arrow::BitUtil::SetBit(&bitmap[0], 5);
+  arrow::BitUtil::SetBit(&bitmap[0], 121);
+  arrow::BitUtil::SetBit(&bitmap[0], 220);
 
-  status = selection->PopulateFromBitMap(bitmap.get(), bitmap_size, max_slots - 1);
+  status = selection->PopulateFromBitMap(&bitmap[0], bitmap_size, max_slots - 1);
   EXPECT_EQ(status.ok(), true) << status.message();
 
   EXPECT_EQ(selection->GetNumSlots(), 3);
@@ -243,15 +241,14 @@ TEST_F(TestSelectionVector, TestInt64PopulateFromBitMap) {
   EXPECT_EQ(status.ok(), true) << status.message();
 
   int bitmap_size = RoundUpNumi64(max_slots) * 8;
-  std::unique_ptr<uint8_t> bitmap(new uint8_t[bitmap_size]);
-  memset(bitmap.get(), 0, bitmap_size);
+  std::vector<uint8_t> bitmap(bitmap_size);
 
-  arrow::BitUtil::SetBit(bitmap.get(), 0);
-  arrow::BitUtil::SetBit(bitmap.get(), 5);
-  arrow::BitUtil::SetBit(bitmap.get(), 121);
-  arrow::BitUtil::SetBit(bitmap.get(), 220);
+  arrow::BitUtil::SetBit(&bitmap[0], 0);
+  arrow::BitUtil::SetBit(&bitmap[0], 5);
+  arrow::BitUtil::SetBit(&bitmap[0], 121);
+  arrow::BitUtil::SetBit(&bitmap[0], 220);
 
-  status = selection->PopulateFromBitMap(bitmap.get(), bitmap_size, max_slots - 1);
+  status = selection->PopulateFromBitMap(&bitmap[0], bitmap_size, max_slots - 1);
   EXPECT_EQ(status.ok(), true) << status.message();
 
   EXPECT_EQ(selection->GetNumSlots(), 3);
diff --git a/cpp/src/gandiva/tests/CMakeLists.txt b/cpp/src/gandiva/tests/CMakeLists.txt
index ae600634e74a7..c81618e8ebf32 100644
--- a/cpp/src/gandiva/tests/CMakeLists.txt
+++ b/cpp/src/gandiva/tests/CMakeLists.txt
@@ -15,28 +15,25 @@
 # specific language governing permissions and limitations
 # under the License.
 
-project(gandiva)
+ADD_GANDIVA_TEST(filter_test)
+ADD_GANDIVA_TEST(projector_test)
+ADD_GANDIVA_TEST(projector_build_validation_test)
+ADD_GANDIVA_TEST(if_expr_test)
+ADD_GANDIVA_TEST(literal_test)
+ADD_GANDIVA_TEST(boolean_expr_test)
+ADD_GANDIVA_TEST(binary_test)
+ADD_GANDIVA_TEST(date_time_test)
+ADD_GANDIVA_TEST(to_string_test)
+ADD_GANDIVA_TEST(hash_test)
+ADD_GANDIVA_TEST(in_expr_test)
+ADD_GANDIVA_TEST(null_validity_test)
+ADD_GANDIVA_TEST(decimal_test)
+ADD_GANDIVA_TEST(decimal_single_test)
 
-foreach(lib_type "shared" "static")
-  add_gandiva_integ_test(filter_test.cc gandiva_${lib_type})
-  add_gandiva_integ_test(projector_test.cc gandiva_${lib_type})
-  add_gandiva_integ_test(if_expr_test.cc gandiva_${lib_type})
-  add_gandiva_integ_test(literal_test.cc gandiva_${lib_type})
-  add_gandiva_integ_test(projector_build_validation_test.cc gandiva_${lib_type})
-  add_gandiva_integ_test(boolean_expr_test.cc gandiva_${lib_type})
-  add_gandiva_integ_test(utf8_test.cc gandiva_${lib_type})
-  add_gandiva_integ_test(binary_test.cc gandiva_${lib_type})
-  add_gandiva_integ_test(date_time_test.cc gandiva_${lib_type})
-  add_gandiva_integ_test(to_string_test.cc gandiva_${lib_type})
-  add_gandiva_integ_test(hash_test.cc gandiva_${lib_type})
-  add_gandiva_integ_test(in_expr_test.cc gandiva_${lib_type})
-  add_gandiva_integ_test(null_validity_test.cc gandiva_${lib_type})
-endforeach(lib_type)
-
-set(GANDIVA_BENCHMARK_LINK_LIBRARIES
-  gandiva_static
-)
+ADD_GANDIVA_TEST(projector_test_static
+  SOURCES projector_test.cc
+  USE_STATIC_LINKING)
 
 ADD_ARROW_BENCHMARK(micro_benchmarks
   PREFIX "gandiva"
-  EXTRA_LINK_LIBS ${GANDIVA_BENCHMARK_LINK_LIBRARIES})
+  EXTRA_LINK_LIBS gandiva_static)
diff --git a/cpp/src/gandiva/tests/binary_test.cc b/cpp/src/gandiva/tests/binary_test.cc
index d5d99db910b9d..6ac3c5155196e 100644
--- a/cpp/src/gandiva/tests/binary_test.cc
+++ b/cpp/src/gandiva/tests/binary_test.cc
@@ -61,7 +61,7 @@ TEST_F(TestBinary, TestSimple) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok()) << status.message();
 
   // Create a row-batch with some sample data
diff --git a/cpp/src/gandiva/tests/boolean_expr_test.cc b/cpp/src/gandiva/tests/boolean_expr_test.cc
index 3351ab3ccf3ff..9226f357159c6 100644
--- a/cpp/src/gandiva/tests/boolean_expr_test.cc
+++ b/cpp/src/gandiva/tests/boolean_expr_test.cc
@@ -60,7 +60,7 @@ TEST_F(TestBooleanExpr, SimpleAnd) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // FALSE_VALID && ?  => FALSE_VALID
@@ -133,7 +133,7 @@ TEST_F(TestBooleanExpr, SimpleOr) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // TRUE_VALID && ?  => TRUE_VALID
@@ -210,7 +210,7 @@ TEST_F(TestBooleanExpr, AndThree) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   int num_records = 8;
@@ -257,7 +257,7 @@ TEST_F(TestBooleanExpr, OrThree) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   int num_records = 8;
@@ -317,7 +317,7 @@ TEST_F(TestBooleanExpr, BooleanAndInsideIf) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   int num_records = 4;
@@ -368,7 +368,7 @@ TEST_F(TestBooleanExpr, IfInsideBooleanAnd) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   int num_records = 4;
diff --git a/cpp/src/gandiva/tests/date_time_test.cc b/cpp/src/gandiva/tests/date_time_test.cc
index 3914558d716c7..7867a9500fddb 100644
--- a/cpp/src/gandiva/tests/date_time_test.cc
+++ b/cpp/src/gandiva/tests/date_time_test.cc
@@ -57,7 +57,7 @@ int64_t MillisSince(time_t base_line, int32_t yy, int32_t mm, int32_t dd, int32_
   given_ts.tm_min = min;
   given_ts.tm_sec = sec;
 
-  return (lround(difftime(mktime(&given_ts), base_line)) * 1000 + millis);
+  return (static_cast<int64_t>(difftime(mktime(&given_ts), base_line)) * 1000 + millis);
 }
 
 TEST_F(TestProjector, TestIsNull) {
@@ -73,7 +73,8 @@ TEST_F(TestProjector, TestIsNull) {
   auto isnotnull_expr = TreeExprBuilder::MakeExpression("isnotnull", {t0}, b0);
 
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {isnull_expr, isnotnull_expr}, &projector);
+  auto status = Projector::Make(schema, {isnull_expr, isnotnull_expr},
+                                TestConfiguration(), &projector);
   ASSERT_TRUE(status.ok());
 
   int num_records = 4;
@@ -126,8 +127,9 @@ TEST_F(TestProjector, TestDateTime) {
   auto ts2day_expr = TreeExprBuilder::MakeExpression("extractDay", {field2}, field_day);
 
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(
-      schema, {date2year_expr, date2month_expr, ts2month_expr, ts2day_expr}, &projector);
+  auto status = Projector::Make(
+      schema, {date2year_expr, date2month_expr, ts2month_expr, ts2day_expr},
+      TestConfiguration(), &projector);
   ASSERT_TRUE(status.ok());
 
   struct tm y1970;
@@ -196,7 +198,8 @@ TEST_F(TestProjector, TestTime) {
       TreeExprBuilder::MakeExpression("extractHour", {field0}, field_hour);
 
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {time2min_expr, time2hour_expr}, &projector);
+  auto status = Projector::Make(schema, {time2min_expr, time2hour_expr},
+                                TestConfiguration(), &projector);
   ASSERT_TRUE(status.ok());
 
   // create input data
@@ -264,7 +267,7 @@ TEST_F(TestProjector, TestTimestampDiff) {
   std::shared_ptr<Projector> projector;
   auto exprs = {diff_secs_expr,  diff_mins_expr,   diff_hours_expr,    diff_days_expr,
                 diff_weeks_expr, diff_months_expr, diff_quarters_expr, diff_years_expr};
-  Status status = Projector::Make(schema, exprs, &projector);
+  auto status = Projector::Make(schema, exprs, TestConfiguration(), &projector);
   ASSERT_TRUE(status.ok());
 
   struct tm y1970;
@@ -337,7 +340,8 @@ TEST_F(TestProjector, TestMonthsBetween) {
       TreeExprBuilder::MakeExpression("months_between", {f0, f1}, output);
 
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {months_between_expr}, &projector);
+  auto status =
+      Projector::Make(schema, {months_between_expr}, TestConfiguration(), &projector);
   std::cout << status.message();
   ASSERT_TRUE(status.ok());
 
diff --git a/cpp/src/gandiva/tests/decimal_single_test.cc b/cpp/src/gandiva/tests/decimal_single_test.cc
new file mode 100644
index 0000000000000..776ef6efbd0d9
--- /dev/null
+++ b/cpp/src/gandiva/tests/decimal_single_test.cc
@@ -0,0 +1,224 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <sstream>
+
+#include <gtest/gtest.h>
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+
+#include "gandiva/decimal_scalar.h"
+#include "gandiva/decimal_type_util.h"
+#include "gandiva/projector.h"
+#include "gandiva/tests/test_util.h"
+#include "gandiva/tree_expr_builder.h"
+
+using arrow::Decimal128;
+
+namespace gandiva {
+
+#define EXPECT_DECIMAL_SUM_EQUALS(x, y, expected, actual)                  \
+  EXPECT_EQ(expected, actual) << (x).ToString() << " + " << (y).ToString() \
+                              << " expected : " << (expected).ToString()   \
+                              << " actual : " << (actual).ToString();
+
+DecimalScalar128 decimal_literal(const char* value, int precision, int scale) {
+  std::string value_string = std::string(value);
+  return DecimalScalar128(value_string, precision, scale);
+}
+
+class TestDecimalOps : public ::testing::Test {
+ public:
+  void SetUp() { pool_ = arrow::default_memory_pool(); }
+
+  ArrayPtr MakeDecimalVector(const DecimalScalar128& in);
+  void AddAndVerify(const DecimalScalar128& x, const DecimalScalar128& y,
+                    const DecimalScalar128& expected);
+
+ protected:
+  arrow::MemoryPool* pool_;
+};
+
+ArrayPtr TestDecimalOps::MakeDecimalVector(const DecimalScalar128& in) {
+  std::vector<arrow::Decimal128> ret;
+
+  Decimal128 decimal_value = in.value();
+
+  auto decimal_type = std::make_shared<arrow::Decimal128Type>(in.precision(), in.scale());
+  return MakeArrowArrayDecimal(decimal_type, {decimal_value}, {true});
+}
+
+void TestDecimalOps::AddAndVerify(const DecimalScalar128& x, const DecimalScalar128& y,
+                                  const DecimalScalar128& expected) {
+  auto x_type = std::make_shared<arrow::Decimal128Type>(x.precision(), x.scale());
+  auto y_type = std::make_shared<arrow::Decimal128Type>(y.precision(), y.scale());
+  auto field_x = field("x", x_type);
+  auto field_y = field("y", y_type);
+  auto schema = arrow::schema({field_x, field_y});
+
+  Decimal128TypePtr output_type;
+  auto status = DecimalTypeUtil::GetResultType(DecimalTypeUtil::kOpAdd, {x_type, y_type},
+                                               &output_type);
+  EXPECT_OK(status);
+
+  // output fields
+  auto res = field("res", output_type);
+
+  // build expression : x + y
+  auto expr = TreeExprBuilder::MakeExpression("add", {field_x, field_y}, res);
+
+  // Build a projector for the expression.
+  std::shared_ptr<Projector> projector;
+  status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
+  EXPECT_OK(status);
+
+  // Create a row-batch with some sample data
+  auto array_a = MakeDecimalVector(x);
+  auto array_b = MakeDecimalVector(y);
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, 1 /*num_records*/, {array_a, array_b});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_OK(status);
+
+  // Validate results
+  auto out_array = dynamic_cast<arrow::Decimal128Array*>(outputs[0].get());
+  const Decimal128 out_value(out_array->GetValue(0));
+
+  auto dtype = dynamic_cast<arrow::Decimal128Type*>(out_array->type().get());
+  std::string value_string = out_value.ToString(0);
+  DecimalScalar128 actual{value_string, dtype->precision(), dtype->scale()};
+
+  EXPECT_DECIMAL_SUM_EQUALS(x, y, expected, actual);
+}
+
+TEST_F(TestDecimalOps, TestAdd) {
+  // fast-path
+  AddAndVerify(decimal_literal("201", 30, 3),   // x
+               decimal_literal("301", 30, 3),   // y
+               decimal_literal("502", 31, 3));  // expected
+
+  AddAndVerify(decimal_literal("201", 30, 3),    // x
+               decimal_literal("301", 30, 2),    // y
+               decimal_literal("3211", 32, 3));  // expected
+
+  AddAndVerify(decimal_literal("201", 30, 3),    // x
+               decimal_literal("301", 30, 4),    // y
+               decimal_literal("2311", 32, 4));  // expected
+
+  // max precision, but no overflow
+  AddAndVerify(decimal_literal("201", 38, 3),   // x
+               decimal_literal("301", 38, 3),   // y
+               decimal_literal("502", 38, 3));  // expected
+
+  AddAndVerify(decimal_literal("201", 38, 3),    // x
+               decimal_literal("301", 38, 2),    // y
+               decimal_literal("3211", 38, 3));  // expected
+
+  AddAndVerify(decimal_literal("201", 38, 3),    // x
+               decimal_literal("301", 38, 4),    // y
+               decimal_literal("2311", 38, 4));  // expected
+
+  AddAndVerify(decimal_literal("201", 38, 3),      // x
+               decimal_literal("301", 38, 7),      // y
+               decimal_literal("201030", 38, 6));  // expected
+
+  AddAndVerify(decimal_literal("1201", 38, 3),   // x
+               decimal_literal("1801", 38, 3),   // y
+               decimal_literal("3002", 38, 3));  // carry-over from fractional
+
+  // max precision
+  AddAndVerify(decimal_literal("09999999999999999999999999999999000000", 38, 5),  // x
+               decimal_literal("100", 38, 7),                                     // y
+               decimal_literal("99999999999999999999999999999990000010", 38, 6));
+
+  AddAndVerify(decimal_literal("-09999999999999999999999999999999000000", 38, 5),  // x
+               decimal_literal("100", 38, 7),                                      // y
+               decimal_literal("-99999999999999999999999999999989999990", 38, 6));
+
+  AddAndVerify(decimal_literal("09999999999999999999999999999999000000", 38, 5),  // x
+               decimal_literal("-100", 38, 7),                                    // y
+               decimal_literal("99999999999999999999999999999989999990", 38, 6));
+
+  AddAndVerify(decimal_literal("-09999999999999999999999999999999000000", 38, 5),  // x
+               decimal_literal("-100", 38, 7),                                     // y
+               decimal_literal("-99999999999999999999999999999990000010", 38, 6));
+
+  AddAndVerify(decimal_literal("09999999999999999999999999999999999999", 38, 6),  // x
+               decimal_literal("89999999999999999999999999999999999999", 38, 7),  // y
+               decimal_literal("18999999999999999999999999999999999999", 38, 6));
+
+  // Both -ve
+  AddAndVerify(decimal_literal("-201", 30, 3),    // x
+               decimal_literal("-301", 30, 2),    // y
+               decimal_literal("-3211", 32, 3));  // expected
+
+  AddAndVerify(decimal_literal("-201", 38, 3),    // x
+               decimal_literal("-301", 38, 4),    // y
+               decimal_literal("-2311", 38, 4));  // expected
+
+  // Mix of +ve and -ve
+  AddAndVerify(decimal_literal("-201", 30, 3),   // x
+               decimal_literal("301", 30, 2),    // y
+               decimal_literal("2809", 32, 3));  // expected
+
+  AddAndVerify(decimal_literal("-201", 38, 3),    // x
+               decimal_literal("301", 38, 4),     // y
+               decimal_literal("-1709", 38, 4));  // expected
+
+  AddAndVerify(decimal_literal("201", 38, 3),      // x
+               decimal_literal("-301", 38, 7),     // y
+               decimal_literal("200970", 38, 6));  // expected
+
+  AddAndVerify(decimal_literal("-1901", 38, 4),  // x
+               decimal_literal("1801", 38, 4),   // y
+               decimal_literal("-100", 38, 4));  // expected
+
+  AddAndVerify(decimal_literal("1801", 38, 4),   // x
+               decimal_literal("-1901", 38, 4),  // y
+               decimal_literal("-100", 38, 4));  // expected
+
+  // rounding +ve
+  AddAndVerify(decimal_literal("1000999", 38, 6),   // x
+               decimal_literal("10000999", 38, 7),  // y
+               decimal_literal("2001099", 38, 6));
+
+  AddAndVerify(decimal_literal("1000999", 38, 6),   // x
+               decimal_literal("10000995", 38, 7),  // y
+               decimal_literal("2001099", 38, 6));
+
+  AddAndVerify(decimal_literal("1000999", 38, 6),   // x
+               decimal_literal("10000992", 38, 7),  // y
+               decimal_literal("2001098", 38, 6));
+
+  // rounding -ve
+  AddAndVerify(decimal_literal("-1000999", 38, 6),   // x
+               decimal_literal("-10000999", 38, 7),  // y
+               decimal_literal("-2001099", 38, 6));
+
+  AddAndVerify(decimal_literal("-1000999", 38, 6),   // x
+               decimal_literal("-10000995", 38, 7),  // y
+               decimal_literal("-2001099", 38, 6));
+
+  AddAndVerify(decimal_literal("-1000999", 38, 6),   // x
+               decimal_literal("-10000992", 38, 7),  // y
+               decimal_literal("-2001098", 38, 6));
+}
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/tests/decimal_test.cc b/cpp/src/gandiva/tests/decimal_test.cc
new file mode 100644
index 0000000000000..da93b0e2d9da6
--- /dev/null
+++ b/cpp/src/gandiva/tests/decimal_test.cc
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <sstream>
+
+#include <gtest/gtest.h>
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "arrow/util/decimal.h"
+
+#include "gandiva/decimal_type_util.h"
+#include "gandiva/projector.h"
+#include "gandiva/tests/test_util.h"
+#include "gandiva/tree_expr_builder.h"
+
+using arrow::Decimal128;
+
+namespace gandiva {
+
+class TestDecimal : public ::testing::Test {
+ public:
+  void SetUp() { pool_ = arrow::default_memory_pool(); }
+
+  std::vector<Decimal128> MakeDecimalVector(std::vector<std::string> values,
+                                            int32_t scale);
+
+ protected:
+  arrow::MemoryPool* pool_;
+};
+
+std::vector<Decimal128> TestDecimal::MakeDecimalVector(std::vector<std::string> values,
+                                                       int32_t scale) {
+  std::vector<arrow::Decimal128> ret;
+  for (auto str : values) {
+    Decimal128 str_value;
+    int32_t str_precision;
+    int32_t str_scale;
+
+    auto status = Decimal128::FromString(str, &str_value, &str_precision, &str_scale);
+    DCHECK_OK(status);
+
+    Decimal128 scaled_value;
+    status = str_value.Rescale(str_scale, scale, &scaled_value);
+    ret.push_back(scaled_value);
+  }
+  return ret;
+}
+
+TEST_F(TestDecimal, TestSimple) {
+  // schema for input fields
+  constexpr int32_t precision = 36;
+  constexpr int32_t scale = 18;
+  auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+  auto field_a = field("a", decimal_type);
+  auto field_b = field("b", decimal_type);
+  auto field_c = field("c", decimal_type);
+  auto schema = arrow::schema({field_a, field_b, field_c});
+
+  Decimal128TypePtr add2_type;
+  auto status = DecimalTypeUtil::GetResultType(DecimalTypeUtil::kOpAdd,
+                                               {decimal_type, decimal_type}, &add2_type);
+
+  Decimal128TypePtr output_type;
+  status = DecimalTypeUtil::GetResultType(DecimalTypeUtil::kOpAdd,
+                                          {add2_type, decimal_type}, &output_type);
+
+  // output fields
+  auto res = field("res0", output_type);
+
+  // build expression : a + b + c
+  auto node_a = TreeExprBuilder::MakeField(field_a);
+  auto node_b = TreeExprBuilder::MakeField(field_b);
+  auto node_c = TreeExprBuilder::MakeField(field_c);
+  auto add2 = TreeExprBuilder::MakeFunction("add", {node_a, node_b}, add2_type);
+  auto add3 = TreeExprBuilder::MakeFunction("add", {add2, node_c}, output_type);
+  auto expr = TreeExprBuilder::MakeExpression(add3, res);
+
+  // Build a projector for the expression.
+  std::shared_ptr<Projector> projector;
+  status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
+  DCHECK_OK(status);
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+  auto array_a =
+      MakeArrowArrayDecimal(decimal_type, MakeDecimalVector({"1", "2", "3", "4"}, scale),
+                            {false, true, true, true});
+  auto array_b =
+      MakeArrowArrayDecimal(decimal_type, MakeDecimalVector({"2", "3", "4", "5"}, scale),
+                            {false, true, true, true});
+  auto array_c =
+      MakeArrowArrayDecimal(decimal_type, MakeDecimalVector({"3", "4", "5", "6"}, scale),
+                            {true, true, true, true});
+
+  // prepare input record batch
+  auto in_batch =
+      arrow::RecordBatch::Make(schema, num_records, {array_a, array_b, array_c});
+
+  auto expected =
+      MakeArrowArrayDecimal(output_type, MakeDecimalVector({"6", "9", "12", "15"}, scale),
+                            {false, true, true, true});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  DCHECK_OK(status);
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(expected, outputs[0]);
+}
+
+TEST_F(TestDecimal, TestLiteral) {
+  // schema for input fields
+  constexpr int32_t precision = 36;
+  constexpr int32_t scale = 18;
+  auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+  auto field_a = field("a", decimal_type);
+  auto schema = arrow::schema({
+      field_a,
+  });
+
+  Decimal128TypePtr add2_type;
+  auto status = DecimalTypeUtil::GetResultType(DecimalTypeUtil::kOpAdd,
+                                               {decimal_type, decimal_type}, &add2_type);
+
+  // output fields
+  auto res = field("res0", add2_type);
+
+  // build expression : a + b + c
+  auto node_a = TreeExprBuilder::MakeField(field_a);
+  static std::string decimal_point_six = "6";
+  DecimalScalar128 literal(decimal_point_six, 2, 1);
+  auto node_b = TreeExprBuilder::MakeDecimalLiteral(literal);
+  auto add2 = TreeExprBuilder::MakeFunction("add", {node_a, node_b}, add2_type);
+  auto expr = TreeExprBuilder::MakeExpression(add2, res);
+
+  // Build a projector for the expression.
+  std::shared_ptr<Projector> projector;
+  status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
+  DCHECK_OK(status);
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+  auto array_a =
+      MakeArrowArrayDecimal(decimal_type, MakeDecimalVector({"1", "2", "3", "4"}, scale),
+                            {false, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
+
+  auto expected = MakeArrowArrayDecimal(
+      add2_type, MakeDecimalVector({"1.6", "2.6", "3.6", "4.6"}, scale),
+      {false, true, true, true});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  DCHECK_OK(status);
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(expected, outputs[0]);
+}
+
+TEST_F(TestDecimal, TestIfElse) {
+  // schema for input fields
+  constexpr int32_t precision = 36;
+  constexpr int32_t scale = 18;
+  auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+  auto field_a = field("a", decimal_type);
+  auto field_b = field("b", decimal_type);
+  auto field_c = field("c", arrow::boolean());
+  auto schema = arrow::schema({field_a, field_b, field_c});
+
+  // output fields
+  auto field_result = field("res", decimal_type);
+
+  // build expression.
+  // if (c)
+  //   a
+  // else
+  //   b
+  auto node_a = TreeExprBuilder::MakeField(field_a);
+  auto node_b = TreeExprBuilder::MakeField(field_b);
+  auto node_c = TreeExprBuilder::MakeField(field_c);
+  auto if_node = TreeExprBuilder::MakeIf(node_c, node_a, node_b, decimal_type);
+
+  auto expr = TreeExprBuilder::MakeExpression(if_node, field_result);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  Status status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
+  DCHECK_OK(status);
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+  auto array_a =
+      MakeArrowArrayDecimal(decimal_type, MakeDecimalVector({"1", "2", "3", "4"}, scale),
+                            {false, true, true, true});
+  auto array_b =
+      MakeArrowArrayDecimal(decimal_type, MakeDecimalVector({"2", "3", "4", "5"}, scale),
+                            {true, true, true, true});
+
+  auto array_c = MakeArrowArrayBool({true, false, true, false}, {true, true, true, true});
+
+  // expected output
+  auto exp =
+      MakeArrowArrayDecimal(decimal_type, MakeDecimalVector({"0", "3", "3", "5"}, scale),
+                            {false, true, true, true});
+
+  // prepare input record batch
+  auto in_batch =
+      arrow::RecordBatch::Make(schema, num_records, {array_a, array_b, array_c});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  DCHECK_OK(status);
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/tests/filter_test.cc b/cpp/src/gandiva/tests/filter_test.cc
index f95cdcc3fef9c..ee60388d5dc1f 100644
--- a/cpp/src/gandiva/tests/filter_test.cc
+++ b/cpp/src/gandiva/tests/filter_test.cc
@@ -50,14 +50,15 @@ TEST_F(TestFilter, TestFilterCache) {
   auto less_than_10 = TreeExprBuilder::MakeFunction("less_than", {sum_func, literal_10},
                                                     arrow::boolean());
   auto condition = TreeExprBuilder::MakeCondition(less_than_10);
+  auto configuration = TestConfiguration();
 
   std::shared_ptr<Filter> filter;
-  Status status = Filter::Make(schema, condition, &filter);
+  auto status = Filter::Make(schema, condition, configuration, &filter);
   EXPECT_TRUE(status.ok());
 
   // same schema and condition, should return the same filter as above.
   std::shared_ptr<Filter> cached_filter;
-  status = Filter::Make(schema, condition, &cached_filter);
+  status = Filter::Make(schema, condition, configuration, &cached_filter);
   EXPECT_TRUE(status.ok());
   EXPECT_TRUE(cached_filter.get() == filter.get());
 
@@ -65,7 +66,8 @@ TEST_F(TestFilter, TestFilterCache) {
   auto field2 = field("f2", int32());
   auto different_schema = arrow::schema({field0, field1, field2});
   std::shared_ptr<Filter> should_be_new_filter;
-  status = Filter::Make(different_schema, condition, &should_be_new_filter);
+  status =
+      Filter::Make(different_schema, condition, configuration, &should_be_new_filter);
   EXPECT_TRUE(status.ok());
   EXPECT_TRUE(cached_filter.get() != should_be_new_filter.get());
 
@@ -74,7 +76,7 @@ TEST_F(TestFilter, TestFilterCache) {
       "greater_than", {sum_func, literal_10}, arrow::boolean());
   auto new_condition = TreeExprBuilder::MakeCondition(greater_than_10);
   std::shared_ptr<Filter> should_be_new_filter1;
-  status = Filter::Make(schema, new_condition, &should_be_new_filter1);
+  status = Filter::Make(schema, new_condition, configuration, &should_be_new_filter1);
   EXPECT_TRUE(status.ok());
   EXPECT_TRUE(cached_filter.get() != should_be_new_filter1.get());
 }
@@ -96,7 +98,7 @@ TEST_F(TestFilter, TestSimple) {
   auto condition = TreeExprBuilder::MakeCondition(less_than_10);
 
   std::shared_ptr<Filter> filter;
-  Status status = Filter::Make(schema, condition, &filter);
+  auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -134,7 +136,7 @@ TEST_F(TestFilter, TestSimpleCustomConfig) {
   std::shared_ptr<Configuration> config = config_builder.build();
 
   std::shared_ptr<Filter> filter;
-  Status status = Filter::Make(schema, condition, &filter);
+  auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -168,7 +170,7 @@ TEST_F(TestFilter, TestZeroCopy) {
   auto condition = TreeExprBuilder::MakeCondition("isnotnull", {field0});
 
   std::shared_ptr<Filter> filter;
-  Status status = Filter::Make(schema, condition, &filter);
+  auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -208,7 +210,7 @@ TEST_F(TestFilter, TestZeroCopyNegative) {
   auto condition = TreeExprBuilder::MakeCondition("isnotnull", {field0});
 
   std::shared_ptr<Filter> filter;
-  Status status = Filter::Make(schema, condition, &filter);
+  auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -265,7 +267,7 @@ TEST_F(TestFilter, TestSimpleSVInt32) {
   auto condition = TreeExprBuilder::MakeCondition(less_than_10);
 
   std::shared_ptr<Filter> filter;
-  Status status = Filter::Make(schema, condition, &filter);
+  auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
diff --git a/cpp/src/gandiva/tests/generate_data.h b/cpp/src/gandiva/tests/generate_data.h
index 01665b8ee17c5..398057510cb08 100644
--- a/cpp/src/gandiva/tests/generate_data.h
+++ b/cpp/src/gandiva/tests/generate_data.h
@@ -19,6 +19,8 @@
 #include <random>
 #include <string>
 
+#include "arrow/util/decimal.h"
+
 #ifndef GANDIVA_GENERATE_DATA_H
 #define GANDIVA_GENERATE_DATA_H
 
@@ -79,6 +81,24 @@ class Int64DataGenerator : public DataGenerator<int64_t> {
   Random random_;
 };
 
+class Decimal128DataGenerator : public DataGenerator<arrow::Decimal128> {
+ public:
+  explicit Decimal128DataGenerator(bool large) : large_(large) {}
+
+  arrow::Decimal128 GenerateData() {
+    uint64_t low = random_.next();
+    int64_t high = random_.next();
+    if (large_) {
+      high += (1ull << 62);
+    }
+    return arrow::Decimal128(high, low);
+  }
+
+ protected:
+  bool large_;
+  Random random_;
+};
+
 class FastUtf8DataGenerator : public DataGenerator<std::string> {
  public:
   explicit FastUtf8DataGenerator(int max_len) : max_len_(max_len), cur_char_('a') {}
diff --git a/cpp/src/gandiva/tests/hash_test.cc b/cpp/src/gandiva/tests/hash_test.cc
index 96f92284a5ca1..afaa885dfe26b 100644
--- a/cpp/src/gandiva/tests/hash_test.cc
+++ b/cpp/src/gandiva/tests/hash_test.cc
@@ -61,7 +61,8 @@ TEST_F(TestHash, TestSimple) {
 
   // Build a projector for the expression.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr_0, expr_1}, &projector);
+  auto status =
+      Projector::Make(schema, {expr_0, expr_1}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok()) << status.message();
 
   // Create a row-batch with some sample data
@@ -113,7 +114,8 @@ TEST_F(TestHash, TestBuf) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr_0, expr_1}, &projector);
+  auto status =
+      Projector::Make(schema, {expr_0, expr_1}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok()) << status.message();
 
   // Create a row-batch with some sample data
diff --git a/cpp/src/gandiva/tests/huge_table_test.cc b/cpp/src/gandiva/tests/huge_table_test.cc
index bffcb1994707f..cecf290a1439f 100644
--- a/cpp/src/gandiva/tests/huge_table_test.cc
+++ b/cpp/src/gandiva/tests/huge_table_test.cc
@@ -58,7 +58,7 @@ TEST_F(DISABLED_TestHugeProjector, SimpleTestSumHuge) {
   // Build expression
   auto sum_expr = TreeExprBuilder::MakeExpression("add", {field0, field1}, field_sum);
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {sum_expr}, &projector);
+  auto status = Projector::Make(schema, {sum_expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -136,7 +136,7 @@ TEST_F(DISABLED_TestHugeFilter, TestSimpleHugeFilter) {
   auto condition = TreeExprBuilder::MakeCondition(less_than_50);
 
   std::shared_ptr<Filter> filter;
-  Status status = Filter::Make(schema, condition, &filter);
+  auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
   EXPECT_TRUE(status.ok());
 
   // prepare input record batch
diff --git a/cpp/src/gandiva/tests/if_expr_test.cc b/cpp/src/gandiva/tests/if_expr_test.cc
index 93b35673b9467..54b6d43b4df1c 100644
--- a/cpp/src/gandiva/tests/if_expr_test.cc
+++ b/cpp/src/gandiva/tests/if_expr_test.cc
@@ -61,7 +61,7 @@ TEST_F(TestIfExpr, TestSimple) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -110,7 +110,7 @@ TEST_F(TestIfExpr, TestSimpleArithmetic) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -165,7 +165,7 @@ TEST_F(TestIfExpr, TestNested) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -228,7 +228,7 @@ TEST_F(TestIfExpr, TestNestedInIf) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -296,7 +296,7 @@ TEST_F(TestIfExpr, TestNestedInCondition) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -353,7 +353,7 @@ TEST_F(TestIfExpr, TestBigNested) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
diff --git a/cpp/src/gandiva/tests/in_expr_test.cc b/cpp/src/gandiva/tests/in_expr_test.cc
index 13ef97cfb8814..2103874cb1e2c 100644
--- a/cpp/src/gandiva/tests/in_expr_test.cc
+++ b/cpp/src/gandiva/tests/in_expr_test.cc
@@ -51,7 +51,7 @@ TEST_F(TestIn, TestInSimple) {
   auto condition = TreeExprBuilder::MakeCondition(in_expr);
 
   std::shared_ptr<Filter> filter;
-  Status status = Filter::Make(schema, condition, &filter);
+  auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -88,7 +88,7 @@ TEST_F(TestIn, TestInString) {
   auto condition = TreeExprBuilder::MakeCondition(in_expr);
 
   std::shared_ptr<Filter> filter;
-  Status status = Filter::Make(schema, condition, &filter);
+  auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -125,7 +125,7 @@ TEST_F(TestIn, TestInStringValidationError) {
   auto condition = TreeExprBuilder::MakeCondition(in_expr);
 
   std::shared_ptr<Filter> filter;
-  Status status = Filter::Make(schema, condition, &filter);
+  auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
 
   EXPECT_TRUE(status.IsExpressionValidationError());
   std::string expected_error = "Evaluation expression for IN clause returns ";
diff --git a/cpp/src/gandiva/tests/literal_test.cc b/cpp/src/gandiva/tests/literal_test.cc
index ced66452a2d45..53323cb4e7cbb 100644
--- a/cpp/src/gandiva/tests/literal_test.cc
+++ b/cpp/src/gandiva/tests/literal_test.cc
@@ -88,8 +88,8 @@ TEST_F(TestLiteral, TestSimpleArithmetic) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status =
-      Projector::Make(schema, {expr_a, expr_b, expr_c, expr_d, expr_e}, &projector);
+  auto status = Projector::Make(schema, {expr_a, expr_b, expr_c, expr_d, expr_e},
+                                TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -133,7 +133,7 @@ TEST_F(TestLiteral, TestLiteralHash) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok()) << status.message();
 
   auto res1 = field("a", int64());
@@ -142,7 +142,7 @@ TEST_F(TestLiteral, TestLiteralHash) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector1;
-  status = Projector::Make(schema, {expr1}, &projector1);
+  status = Projector::Make(schema, {expr1}, TestConfiguration(), &projector1);
   EXPECT_TRUE(status.ok()) << status.message();
   EXPECT_TRUE(projector.get() != projector1.get());
 }
@@ -165,7 +165,7 @@ TEST_F(TestLiteral, TestNullLiteral) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok()) << status.message();
 
   // Create a row-batch with some sample data
@@ -207,7 +207,7 @@ TEST_F(TestLiteral, TestNullLiteralInIf) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok()) << status.message();
 
   // Create a row-batch with some sample data
diff --git a/cpp/src/gandiva/tests/micro_benchmarks.cc b/cpp/src/gandiva/tests/micro_benchmarks.cc
index 7d844eb378bf8..e0794a233a2ce 100644
--- a/cpp/src/gandiva/tests/micro_benchmarks.cc
+++ b/cpp/src/gandiva/tests/micro_benchmarks.cc
@@ -19,6 +19,7 @@
 #include "arrow/memory_pool.h"
 #include "arrow/status.h"
 #include "benchmark/benchmark.h"
+#include "gandiva/decimal_type_util.h"
 #include "gandiva/projector.h"
 #include "gandiva/tests/test_util.h"
 #include "gandiva/tests/timed_evaluate.h"
@@ -31,10 +32,6 @@ using arrow::int32;
 using arrow::int64;
 using arrow::utf8;
 
-// TODO : the base numbers are from a mac. they need to be caliberated
-// for the hardware used by travis.
-float tolerance_ratio = 6.0;
-
 static void TimedTestAdd3(benchmark::State& state) {
   // schema for input fields
   auto field0 = field("f0", int64());
@@ -56,7 +53,7 @@ static void TimedTestAdd3(benchmark::State& state) {
   auto sum_expr = TreeExprBuilder::MakeExpression(sum, field_sum);
 
   std::shared_ptr<Projector> projector;
-  ASSERT_OK(Projector::Make(schema, {sum_expr}, &projector));
+  ASSERT_OK(Projector::Make(schema, {sum_expr}, TestConfiguration(), &projector));
 
   Int64DataGenerator data_generator;
   ProjectEvaluator evaluator(projector);
@@ -99,7 +96,7 @@ static void TimedTestBigNested(benchmark::State& state) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  ASSERT_OK(Projector::Make(schema, {expr}, &projector));
+  ASSERT_OK(Projector::Make(schema, {expr}, TestConfiguration(), &projector));
 
   BoundedInt32DataGenerator data_generator(250);
   ProjectEvaluator evaluator(projector);
@@ -122,7 +119,7 @@ static void TimedTestExtractYear(benchmark::State& state) {
   auto expr = TreeExprBuilder::MakeExpression("extractYear", {field0}, field_res);
 
   std::shared_ptr<Projector> projector;
-  ASSERT_OK(Projector::Make(schema, {expr}, &projector));
+  ASSERT_OK(Projector::Make(schema, {expr}, TestConfiguration(), &projector));
 
   Int64DataGenerator data_generator;
   ProjectEvaluator evaluator(projector);
@@ -149,7 +146,7 @@ static void TimedTestFilterAdd2(benchmark::State& state) {
   auto condition = TreeExprBuilder::MakeCondition(less_than);
 
   std::shared_ptr<Filter> filter;
-  ASSERT_OK(Filter::Make(schema, condition, &filter));
+  ASSERT_OK(Filter::Make(schema, condition, TestConfiguration(), &filter));
 
   Int64DataGenerator data_generator;
   FilterEvaluator evaluator(filter);
@@ -173,7 +170,7 @@ static void TimedTestFilterLike(benchmark::State& state) {
   auto condition = TreeExprBuilder::MakeCondition(like_yellow);
 
   std::shared_ptr<Filter> filter;
-  ASSERT_OK(Filter::Make(schema, condition, &filter));
+  ASSERT_OK(Filter::Make(schema, condition, TestConfiguration(), &filter));
 
   FastUtf8DataGenerator data_generator(32);
   FilterEvaluator evaluator(filter);
@@ -199,7 +196,7 @@ static void TimedTestAllocs(benchmark::State& state) {
   auto expr = TreeExprBuilder::MakeExpression(length, field_res);
 
   std::shared_ptr<Projector> projector;
-  ASSERT_OK(Projector::Make(schema, {expr}, &projector));
+  ASSERT_OK(Projector::Make(schema, {expr}, TestConfiguration(), &projector));
 
   FastUtf8DataGenerator data_generator(64);
   ProjectEvaluator evaluator(projector);
@@ -237,7 +234,7 @@ static void TimedTestMultiOr(benchmark::State& state) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  ASSERT_OK(Projector::Make(schema, {expr}, &projector));
+  ASSERT_OK(Projector::Make(schema, {expr}, TestConfiguration(), &projector));
 
   FastUtf8DataGenerator data_generator(250);
   ProjectEvaluator evaluator(projector);
@@ -269,7 +266,7 @@ static void TimedTestInExpr(benchmark::State& state) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  ASSERT_OK(Projector::Make(schema, {expr}, &projector));
+  ASSERT_OK(Projector::Make(schema, {expr}, TestConfiguration(), &projector));
 
   FastUtf8DataGenerator data_generator(250);
   ProjectEvaluator evaluator(projector);
@@ -280,6 +277,119 @@ static void TimedTestInExpr(benchmark::State& state) {
   ASSERT_OK(status);
 }
 
+static void DoDecimalAdd3(benchmark::State& state, int32_t precision, int32_t scale,
+                          bool large = false) {
+  // schema for input fields
+  auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+  auto field0 = field("f0", decimal_type);
+  auto field1 = field("f1", decimal_type);
+  auto field2 = field("f2", decimal_type);
+  auto schema = arrow::schema({field0, field1, field2});
+
+  Decimal128TypePtr add2_type;
+  auto status = DecimalTypeUtil::GetResultType(DecimalTypeUtil::kOpAdd,
+                                               {decimal_type, decimal_type}, &add2_type);
+
+  Decimal128TypePtr output_type;
+  status = DecimalTypeUtil::GetResultType(DecimalTypeUtil::kOpAdd,
+                                          {add2_type, decimal_type}, &output_type);
+
+  // output field
+  auto field_sum = field("add", output_type);
+
+  // Build expression
+  auto part_sum = TreeExprBuilder::MakeFunction(
+      "add", {TreeExprBuilder::MakeField(field1), TreeExprBuilder::MakeField(field2)},
+      add2_type);
+  auto sum = TreeExprBuilder::MakeFunction(
+      "add", {TreeExprBuilder::MakeField(field0), part_sum}, output_type);
+
+  auto sum_expr = TreeExprBuilder::MakeExpression(sum, field_sum);
+
+  std::shared_ptr<Projector> projector;
+  status = Projector::Make(schema, {sum_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok());
+
+  Decimal128DataGenerator data_generator(large);
+  ProjectEvaluator evaluator(projector);
+
+  status = TimedEvaluate<arrow::Decimal128Type, arrow::Decimal128>(
+      schema, evaluator, data_generator, arrow::default_memory_pool(), 1 * MILLION,
+      16 * THOUSAND, state);
+  ASSERT_OK(status);
+}
+
+static void DoDecimalAdd2(benchmark::State& state, int32_t precision, int32_t scale,
+                          bool large = false) {
+  // schema for input fields
+  auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+  auto field0 = field("f0", decimal_type);
+  auto field1 = field("f1", decimal_type);
+  auto schema = arrow::schema({field0, field1});
+
+  Decimal128TypePtr output_type;
+  auto status = DecimalTypeUtil::GetResultType(
+      DecimalTypeUtil::kOpAdd, {decimal_type, decimal_type}, &output_type);
+
+  // output field
+  auto field_sum = field("add", output_type);
+
+  // Build expression
+  auto sum = TreeExprBuilder::MakeExpression("add", {field0, field1}, field_sum);
+
+  std::shared_ptr<Projector> projector;
+  status = Projector::Make(schema, {sum}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok());
+
+  Decimal128DataGenerator data_generator(large);
+  ProjectEvaluator evaluator(projector);
+
+  status = TimedEvaluate<arrow::Decimal128Type, arrow::Decimal128>(
+      schema, evaluator, data_generator, arrow::default_memory_pool(), 1 * MILLION,
+      16 * THOUSAND, state);
+  ASSERT_OK(status);
+}
+
+static void DecimalAdd2Fast(benchmark::State& state) {
+  // use lesser precision to test the fast-path
+  DoDecimalAdd2(state, DecimalTypeUtil::kMaxPrecision - 6, 18);
+}
+
+static void DecimalAdd2LeadingZeroes(benchmark::State& state) {
+  // use max precision to test the large-integer-path
+  DoDecimalAdd2(state, DecimalTypeUtil::kMaxPrecision, 6);
+}
+
+static void DecimalAdd2LeadingZeroesWithDiv(benchmark::State& state) {
+  // use max precision to test the large-integer-path
+  DoDecimalAdd2(state, DecimalTypeUtil::kMaxPrecision, 18);
+}
+
+static void DecimalAdd2Large(benchmark::State& state) {
+  // use max precision to test the large-integer-path
+  DoDecimalAdd2(state, DecimalTypeUtil::kMaxPrecision, 18, true);
+}
+
+static void DecimalAdd3Fast(benchmark::State& state) {
+  // use lesser precision to test the fast-path
+  DoDecimalAdd3(state, DecimalTypeUtil::kMaxPrecision - 6, 18);
+}
+
+static void DecimalAdd3LeadingZeroes(benchmark::State& state) {
+  // use max precision to test the large-integer-path
+  DoDecimalAdd3(state, DecimalTypeUtil::kMaxPrecision, 6);
+}
+
+static void DecimalAdd3LeadingZeroesWithDiv(benchmark::State& state) {
+  // use max precision to test the large-integer-path
+  DoDecimalAdd3(state, DecimalTypeUtil::kMaxPrecision, 18);
+}
+
+static void DecimalAdd3Large(benchmark::State& state) {
+  // use max precision to test the large-integer-path
+  DoDecimalAdd3(state, DecimalTypeUtil::kMaxPrecision, 18, true);
+}
+
 BENCHMARK(TimedTestAdd3)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
 BENCHMARK(TimedTestBigNested)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
 BENCHMARK(TimedTestBigNested)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
@@ -289,5 +399,13 @@ BENCHMARK(TimedTestFilterLike)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
 BENCHMARK(TimedTestAllocs)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
 BENCHMARK(TimedTestMultiOr)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
 BENCHMARK(TimedTestInExpr)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(DecimalAdd2Fast)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(DecimalAdd2LeadingZeroes)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(DecimalAdd2LeadingZeroesWithDiv)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(DecimalAdd2Large)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(DecimalAdd3Fast)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(DecimalAdd3LeadingZeroes)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(DecimalAdd3LeadingZeroesWithDiv)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(DecimalAdd3Large)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
 
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/tests/null_validity_test.cc b/cpp/src/gandiva/tests/null_validity_test.cc
index 06cfdc08ba906..0374b68d46288 100644
--- a/cpp/src/gandiva/tests/null_validity_test.cc
+++ b/cpp/src/gandiva/tests/null_validity_test.cc
@@ -60,7 +60,7 @@ TEST_F(TestNullValidity, TestFunc) {
   auto condition = TreeExprBuilder::MakeCondition(less_than_10);
 
   std::shared_ptr<Filter> filter;
-  Status status = Filter::Make(schema, condition, &filter);
+  auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -111,7 +111,7 @@ TEST_F(TestNullValidity, TestIfElse) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -148,7 +148,7 @@ TEST_F(TestNullValidity, TestUtf8) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok()) << status.message();
 
   // Create a row-batch with some sample data
diff --git a/cpp/src/gandiva/tests/projector_build_validation_test.cc b/cpp/src/gandiva/tests/projector_build_validation_test.cc
index ddcb729b3bfee..6c4eef53ded68 100644
--- a/cpp/src/gandiva/tests/projector_build_validation_test.cc
+++ b/cpp/src/gandiva/tests/projector_build_validation_test.cc
@@ -50,7 +50,7 @@ TEST_F(TestProjector, TestNonExistentFunction) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {lt_expr}, &projector);
+  auto status = Projector::Make(schema, {lt_expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.IsExpressionValidationError());
   std::string expected_error =
       "Function bool non_existent_function(float, float) not supported yet.";
@@ -71,7 +71,7 @@ TEST_F(TestProjector, TestNotMatchingDataType) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {lt_expr}, &projector);
+  auto status = Projector::Make(schema, {lt_expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.IsExpressionValidationError());
   std::string expected_error =
       "Return type of root node float does not match that of expression bool";
@@ -92,7 +92,7 @@ TEST_F(TestProjector, TestNotSupportedDataType) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {lt_expr}, &projector);
+  auto status = Projector::Make(schema, {lt_expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.IsExpressionValidationError());
   std::string expected_error = "Field f0 has unsupported data type list";
   EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
@@ -113,7 +113,7 @@ TEST_F(TestProjector, TestIncorrectSchemaMissingField) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {lt_expr}, &projector);
+  auto status = Projector::Make(schema, {lt_expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.IsExpressionValidationError());
   std::string expected_error = "Field f2 not in schema";
   EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
@@ -135,7 +135,7 @@ TEST_F(TestProjector, TestIncorrectSchemaTypeNotMatching) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {lt_expr}, &projector);
+  auto status = Projector::Make(schema, {lt_expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.IsExpressionValidationError());
   std::string expected_error =
       "Field definition in schema f2: int32 different from field in expression f2: float";
@@ -166,7 +166,7 @@ TEST_F(TestProjector, TestIfNotSupportedFunction) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.IsExpressionValidationError());
 }
 
@@ -189,10 +189,8 @@ TEST_F(TestProjector, TestIfNotMatchingReturnType) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.IsExpressionValidationError());
-  std::string expected_error = "Return type of if bool and then int32 not matching.";
-  EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
 }
 
 TEST_F(TestProjector, TestElseNotMatchingReturnType) {
@@ -216,10 +214,8 @@ TEST_F(TestProjector, TestElseNotMatchingReturnType) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.IsExpressionValidationError());
-  std::string expected_error = "Return type of if int32 and else bool not matching.";
-  EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
 }
 
 TEST_F(TestProjector, TestElseNotSupportedType) {
@@ -243,10 +239,9 @@ TEST_F(TestProjector, TestElseNotSupportedType) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.IsExpressionValidationError());
-  std::string expected_error = "Field c has unsupported data type list";
-  EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
+  EXPECT_EQ(status.code(), StatusCode::ExpressionValidationError);
 }
 
 TEST_F(TestProjector, TestAndMinChildren) {
@@ -264,10 +259,8 @@ TEST_F(TestProjector, TestAndMinChildren) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.IsExpressionValidationError());
-  std::string expected_error = "Boolean expression has 1 children, expected atleast two";
-  EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
 }
 
 TEST_F(TestProjector, TestAndBooleanArgType) {
@@ -287,12 +280,8 @@ TEST_F(TestProjector, TestAndBooleanArgType) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.IsExpressionValidationError());
-  std::string expected_error =
-      "Boolean expression has a child with return type int32, expected return type "
-      "boolean";
-  EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
 }
 
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index becaf8f1ba3d7..ba0e63292f4ab 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -50,31 +50,55 @@ TEST_F(TestProjector, TestProjectCache) {
   auto sub_expr =
       TreeExprBuilder::MakeExpression("subtract", {field0, field1}, field_sub);
 
+  auto configuration = TestConfiguration();
+
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {sum_expr, sub_expr}, &projector);
-  EXPECT_TRUE(status.ok());
+  auto status = Projector::Make(schema, {sum_expr, sub_expr}, configuration, &projector);
+  ASSERT_OK(status);
 
   // everything is same, should return the same projector.
   auto schema_same = arrow::schema({field0, field1});
   std::shared_ptr<Projector> cached_projector;
-  status = Projector::Make(schema_same, {sum_expr, sub_expr}, &cached_projector);
-  EXPECT_TRUE(status.ok());
-  EXPECT_TRUE(cached_projector.get() == projector.get());
+  status = Projector::Make(schema_same, {sum_expr, sub_expr}, configuration,
+                           &cached_projector);
+  ASSERT_OK(status);
+  EXPECT_EQ(cached_projector, projector);
 
   // schema is different should return a new projector.
   auto field2 = field("f2", int32());
   auto different_schema = arrow::schema({field0, field1, field2});
   std::shared_ptr<Projector> should_be_new_projector;
-  status =
-      Projector::Make(different_schema, {sum_expr, sub_expr}, &should_be_new_projector);
-  EXPECT_TRUE(status.ok());
-  EXPECT_TRUE(cached_projector.get() != should_be_new_projector.get());
+  status = Projector::Make(different_schema, {sum_expr, sub_expr}, configuration,
+                           &should_be_new_projector);
+  ASSERT_OK(status);
+  EXPECT_NE(cached_projector, should_be_new_projector);
 
   // expression list is different should return a new projector.
   std::shared_ptr<Projector> should_be_new_projector1;
-  status = Projector::Make(schema, {sum_expr}, &should_be_new_projector1);
-  EXPECT_TRUE(status.ok());
-  EXPECT_TRUE(cached_projector.get() != should_be_new_projector1.get());
+  status = Projector::Make(schema, {sum_expr}, configuration, &should_be_new_projector1);
+  ASSERT_OK(status);
+  EXPECT_NE(cached_projector, should_be_new_projector1);
+
+  // another instance of the same configuration, should return the same projector.
+  status = Projector::Make(schema, {sum_expr, sub_expr}, TestConfiguration(),
+                           &cached_projector);
+  ASSERT_OK(status);
+  EXPECT_EQ(cached_projector, projector);
+
+  // if configuration is different, should return a new projector.
+
+  // build a new path by replacing the first '/' with '//'
+  std::string alt_path(GANDIVA_BYTE_COMPILE_FILE_PATH);
+  auto pos = alt_path.find('/', 0);
+  EXPECT_NE(pos, std::string::npos);
+  alt_path.replace(pos, 1, "//");
+  auto other_configuration =
+      ConfigurationBuilder().set_byte_code_file_path(alt_path).build();
+  std::shared_ptr<Projector> should_be_new_projector2;
+  status = Projector::Make(schema, {sum_expr, sub_expr}, other_configuration,
+                           &should_be_new_projector2);
+  ASSERT_OK(status);
+  EXPECT_NE(projector, should_be_new_projector2);
 }
 
 TEST_F(TestProjector, TestProjectCacheFieldNames) {
@@ -90,12 +114,13 @@ TEST_F(TestProjector, TestProjectCacheFieldNames) {
 
   auto sum_expr_01 = TreeExprBuilder::MakeExpression("add", {field0, field1}, sum_01);
   std::shared_ptr<Projector> projector_01;
-  Status status = Projector::Make(schema, {sum_expr_01}, &projector_01);
+  auto status =
+      Projector::Make(schema, {sum_expr_01}, TestConfiguration(), &projector_01);
   EXPECT_TRUE(status.ok());
 
   auto sum_expr_12 = TreeExprBuilder::MakeExpression("add", {field1, field2}, sum_12);
   std::shared_ptr<Projector> projector_12;
-  status = Projector::Make(schema, {sum_expr_12}, &projector_12);
+  status = Projector::Make(schema, {sum_expr_12}, TestConfiguration(), &projector_12);
   EXPECT_TRUE(status.ok());
 
   // add(f0, f1) != add(f1, f2)
@@ -111,14 +136,16 @@ TEST_F(TestProjector, TestProjectCacheDouble) {
 
   auto literal0 = TreeExprBuilder::MakeLiteral(d0);
   auto expr0 = TreeExprBuilder::MakeExpression(literal0, res);
+  auto configuration = TestConfiguration();
+
   std::shared_ptr<Projector> projector0;
-  auto status = Projector::Make(schema, {expr0}, &projector0);
+  auto status = Projector::Make(schema, {expr0}, configuration, &projector0);
   EXPECT_TRUE(status.ok()) << status.message();
 
   auto literal1 = TreeExprBuilder::MakeLiteral(d1);
   auto expr1 = TreeExprBuilder::MakeExpression(literal1, res);
   std::shared_ptr<Projector> projector1;
-  status = Projector::Make(schema, {expr1}, &projector1);
+  status = Projector::Make(schema, {expr1}, configuration, &projector1);
   EXPECT_TRUE(status.ok()) << status.message();
 
   EXPECT_TRUE(projector0.get() != projector1.get());
@@ -134,13 +161,13 @@ TEST_F(TestProjector, TestProjectCacheFloat) {
   auto literal0 = TreeExprBuilder::MakeLiteral(f0);
   auto expr0 = TreeExprBuilder::MakeExpression(literal0, res);
   std::shared_ptr<Projector> projector0;
-  auto status = Projector::Make(schema, {expr0}, &projector0);
+  auto status = Projector::Make(schema, {expr0}, TestConfiguration(), &projector0);
   EXPECT_TRUE(status.ok()) << status.message();
 
   auto literal1 = TreeExprBuilder::MakeLiteral(f1);
   auto expr1 = TreeExprBuilder::MakeExpression(literal1, res);
   std::shared_ptr<Projector> projector1;
-  status = Projector::Make(schema, {expr1}, &projector1);
+  status = Projector::Make(schema, {expr1}, TestConfiguration(), &projector1);
   EXPECT_TRUE(status.ok()) << status.message();
 
   EXPECT_TRUE(projector0.get() != projector1.get());
@@ -162,50 +189,8 @@ TEST_F(TestProjector, TestIntSumSub) {
       TreeExprBuilder::MakeExpression("subtract", {field0, field1}, field_sub);
 
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {sum_expr, sub_expr}, &projector);
-  EXPECT_TRUE(status.ok());
-
-  // Create a row-batch with some sample data
-  int num_records = 4;
-  auto array0 = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, false});
-  auto array1 = MakeArrowArrayInt32({11, 13, 15, 17}, {true, true, false, true});
-  // expected output
-  auto exp_sum = MakeArrowArrayInt32({12, 15, 0, 0}, {true, true, false, false});
-  auto exp_sub = MakeArrowArrayInt32({-10, -11, 0, 0}, {true, true, false, false});
-
-  // prepare input record batch
-  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
-
-  // Evaluate expression
-  arrow::ArrayVector outputs;
-  status = projector->Evaluate(*in_batch, pool_, &outputs);
-  EXPECT_TRUE(status.ok());
-
-  // Validate results
-  EXPECT_ARROW_ARRAY_EQUALS(exp_sum, outputs.at(0));
-  EXPECT_ARROW_ARRAY_EQUALS(exp_sub, outputs.at(1));
-}
-
-TEST_F(TestProjector, TestIntSumSubCustomConfig) {
-  // schema for input fields
-  auto field0 = field("f0", int32());
-  auto field1 = field("f2", int32());
-  auto schema = arrow::schema({field0, field1});
-
-  // output fields
-  auto field_sum = field("add", int32());
-  auto field_sub = field("subtract", int32());
-
-  // Build expression
-  auto sum_expr = TreeExprBuilder::MakeExpression("add", {field0, field1}, field_sum);
-  auto sub_expr =
-      TreeExprBuilder::MakeExpression("subtract", {field0, field1}, field_sub);
-
-  std::shared_ptr<Projector> projector;
-  ConfigurationBuilder config_builder;
-  std::shared_ptr<Configuration> config = config_builder.build();
-
-  Status status = Projector::Make(schema, {sum_expr, sub_expr}, config, &projector);
+  auto status =
+      Projector::Make(schema, {sum_expr, sub_expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -257,15 +242,17 @@ static void TestArithmeticOpsForType(arrow::MemoryPool* pool) {
   auto lt_expr = TreeExprBuilder::MakeExpression("less_than", {field0, field1}, field_lt);
 
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(
-      schema, {sum_expr, sub_expr, mul_expr, div_expr, eq_expr, lt_expr}, &projector);
+  auto status =
+      Projector::Make(schema, {sum_expr, sub_expr, mul_expr, div_expr, eq_expr, lt_expr},
+                      TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
-  int num_records = 4;
-  std::vector<C_TYPE> input0 = {1, 2, 53, 84};
-  std::vector<C_TYPE> input1 = {10, 15, 23, 84};
-  std::vector<bool> validity = {true, true, true, true};
+  int num_records = 12;
+  std::vector<C_TYPE> input0 = {1, 2, 53, 84, 5, 15, 0, 1, 52, 83, 4, 120};
+  std::vector<C_TYPE> input1 = {10, 15, 23, 84, 4, 51, 68, 9, 16, 18, 19, 37};
+  std::vector<bool> validity = {true, true, true, true, true, true,
+                                true, true, true, true, true, true};
 
   auto array0 = MakeArrowArray<TYPE, C_TYPE>(input0, validity);
   auto array1 = MakeArrowArray<TYPE, C_TYPE>(input1, validity);
@@ -344,9 +331,9 @@ TEST_F(TestProjector, TestExtendedMath) {
       TreeExprBuilder::MakeExpression("power", {field0, field1}, field_power);
 
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(
+  auto status = Projector::Make(
       schema, {cbrt_expr, exp_expr, log_expr, log10_expr, logb_expr, power_expr},
-      &projector);
+      TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -412,7 +399,7 @@ TEST_F(TestProjector, TestFloatLessThan) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {lt_expr}, &projector);
+  auto status = Projector::Make(schema, {lt_expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -447,7 +434,7 @@ TEST_F(TestProjector, TestIsNotNull) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {myexpr}, &projector);
+  auto status = Projector::Make(schema, {myexpr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -480,7 +467,7 @@ TEST_F(TestProjector, TestZeroCopy) {
   auto cast_expr = TreeExprBuilder::MakeExpression("castFLOAT4", {field0}, res);
 
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {cast_expr}, &projector);
+  auto status = Projector::Make(schema, {cast_expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -493,14 +480,15 @@ TEST_F(TestProjector, TestZeroCopy) {
 
   // allocate output buffers
   int64_t bitmap_sz = arrow::BitUtil::BytesForBits(num_records);
-  std::unique_ptr<uint8_t[]> bitmap(new uint8_t[bitmap_sz]);
+  int64_t bitmap_capacity = arrow::BitUtil::RoundUpToMultipleOf64(bitmap_sz);
+  std::vector<uint8_t> bitmap(bitmap_capacity);
   std::shared_ptr<arrow::MutableBuffer> bitmap_buf =
-      std::make_shared<arrow::MutableBuffer>(bitmap.get(), bitmap_sz);
+      std::make_shared<arrow::MutableBuffer>(&bitmap[0], bitmap_capacity);
 
   int64_t data_sz = sizeof(float) * num_records;
-  std::unique_ptr<uint8_t[]> data(new uint8_t[data_sz]);
+  std::vector<uint8_t> data(bitmap_capacity);
   std::shared_ptr<arrow::MutableBuffer> data_buf =
-      std::make_shared<arrow::MutableBuffer>(data.get(), data_sz);
+      std::make_shared<arrow::MutableBuffer>(&data[0], data_sz);
 
   auto array_data =
       arrow::ArrayData::Make(float32(), num_records, {bitmap_buf, data_buf});
@@ -526,7 +514,7 @@ TEST_F(TestProjector, TestZeroCopyNegative) {
   auto cast_expr = TreeExprBuilder::MakeExpression("castFLOAT4", {field0}, res);
 
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {cast_expr}, &projector);
+  auto status = Projector::Make(schema, {cast_expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -596,7 +584,7 @@ TEST_F(TestProjector, TestDivideZero) {
   auto div_expr = TreeExprBuilder::MakeExpression("divide", {field0, field1}, field_div);
 
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {div_expr}, &projector);
+  auto status = Projector::Make(schema, {div_expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok()) << status.message();
 
   // Create a row-batch with some sample data
@@ -645,7 +633,7 @@ TEST_F(TestProjector, TestModZero) {
   auto mod_expr = TreeExprBuilder::MakeExpression("mod", {field0, field1}, field_div);
 
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {mod_expr}, &projector);
+  auto status = Projector::Make(schema, {mod_expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok()) << status.message();
 
   // Create a row-batch with some sample data
diff --git a/cpp/src/gandiva/tests/test_util.h b/cpp/src/gandiva/tests/test_util.h
index d24448727bd83..0e0e27a0c9aa4 100644
--- a/cpp/src/gandiva/tests/test_util.h
+++ b/cpp/src/gandiva/tests/test_util.h
@@ -21,6 +21,7 @@
 #include <vector>
 #include "arrow/test-util.h"
 #include "gandiva/arrow.h"
+#include "gandiva/configuration.h"
 
 #ifndef GANDIVA_TEST_UTIL_H
 #define GANDIVA_TEST_UTIL_H
@@ -46,6 +47,14 @@ static ArrayPtr MakeArrowArray(std::vector<C_TYPE> values) {
   return out;
 }
 
+template <typename TYPE, typename C_TYPE>
+static ArrayPtr MakeArrowArray(const std::shared_ptr<arrow::DataType>& type,
+                               std::vector<C_TYPE> values, std::vector<bool> validity) {
+  ArrayPtr out;
+  arrow::ArrayFromVector<TYPE, C_TYPE>(type, validity, values, &out);
+  return out;
+}
+
 template <typename TYPE, typename C_TYPE>
 static ArrayPtr MakeArrowTypeArray(const std::shared_ptr<arrow::DataType>& type,
                                    const std::vector<C_TYPE>& values,
@@ -68,11 +77,22 @@ static ArrayPtr MakeArrowTypeArray(const std::shared_ptr<arrow::DataType>& type,
 #define MakeArrowArrayFloat64 MakeArrowArray<arrow::DoubleType, double>
 #define MakeArrowArrayUtf8 MakeArrowArray<arrow::StringType, std::string>
 #define MakeArrowArrayBinary MakeArrowArray<arrow::BinaryType, std::string>
+#define MakeArrowArrayDecimal MakeArrowArray<arrow::Decimal128Type, arrow::Decimal128>
 
 #define EXPECT_ARROW_ARRAY_EQUALS(a, b)                                \
   EXPECT_TRUE((a)->Equals(b)) << "expected array: " << (a)->ToString() \
                               << " actual array: " << (b)->ToString();
 
+#define EXPECT_ARROW_TYPE_EQUALS(a, b)                                \
+  EXPECT_TRUE((a)->Equals(b)) << "expected type: " << (a)->ToString() \
+                              << " actual type: " << (b)->ToString();
+
+std::shared_ptr<Configuration> TestConfiguration() {
+  auto builder = ConfigurationBuilder();
+  builder.set_byte_code_file_path(GANDIVA_BYTE_COMPILE_FILE_PATH);
+  return builder.build();
+}
+
 }  // namespace gandiva
 
 #endif  // GANDIVA_TEST_UTIL_H
diff --git a/cpp/src/gandiva/tests/timed_evaluate.h b/cpp/src/gandiva/tests/timed_evaluate.h
index dab47c2f218be..9db7d88d2a226 100644
--- a/cpp/src/gandiva/tests/timed_evaluate.h
+++ b/cpp/src/gandiva/tests/timed_evaluate.h
@@ -100,7 +100,9 @@ Status TimedEvaluate(SchemaPtr schema, BaseEvaluator& evaluator,
     for (int col = 0; col < num_fields; col++) {
       std::vector<C_TYPE> data = GenerateData<C_TYPE>(batch_size, data_generator);
       std::vector<bool> validity(batch_size, true);
-      ArrayPtr col_data = MakeArrowArray<TYPE, C_TYPE>(data, validity);
+      ArrayPtr col_data =
+          MakeArrowArray<TYPE, C_TYPE>(schema->field(col)->type(), data, validity);
+
       columns.push_back(col_data);
     }
 
diff --git a/cpp/src/gandiva/tests/utf8_test.cc b/cpp/src/gandiva/tests/utf8_test.cc
index 8b09b72f32d03..925ceea836280 100644
--- a/cpp/src/gandiva/tests/utf8_test.cc
+++ b/cpp/src/gandiva/tests/utf8_test.cc
@@ -67,7 +67,8 @@ TEST_F(TestUtf8, TestSimple) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr_a, expr_b, expr_c}, &projector);
+  auto status =
+      Projector::Make(schema, {expr_a, expr_b, expr_c}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok()) << status.message();
 
   // Create a row-batch with some sample data
@@ -113,7 +114,7 @@ TEST_F(TestUtf8, TestLiteral) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok()) << status.message();
 
   // Create a row-batch with some sample data
@@ -155,7 +156,7 @@ TEST_F(TestUtf8, TestNullLiteral) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok()) << status.message();
 
   // Create a row-batch with some sample data
@@ -197,7 +198,7 @@ TEST_F(TestUtf8, TestLike) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok()) << status.message();
 
   // Create a row-batch with some sample data
@@ -245,7 +246,7 @@ TEST_F(TestUtf8, TestBeginsEnds) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr1, expr2}, &projector);
+  auto status = Projector::Make(schema, {expr1, expr2}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok()) << status.message();
 
   // Create a row-batch with some sample data
@@ -291,7 +292,7 @@ TEST_F(TestUtf8, TestInternalAllocs) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok()) << status.message();
 
   // Create a row-batch with some sample data
@@ -334,7 +335,7 @@ TEST_F(TestUtf8, TestCastDate) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok()) << status.message();
 
   // Create a row-batch with some sample data
@@ -389,7 +390,7 @@ TEST_F(TestUtf8, TestToDateNoError) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok()) << status.message();
 
   // Create a row-batch with some sample data
@@ -444,7 +445,7 @@ TEST_F(TestUtf8, TestToDateError) {
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
-  Status status = Projector::Make(schema, {expr}, &projector);
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok()) << status.message();
 
   // Create a row-batch with some sample data
diff --git a/cpp/src/gandiva/to_date_holder.cc b/cpp/src/gandiva/to_date_holder.cc
index 9c8562280041d..824654f44a6e4 100644
--- a/cpp/src/gandiva/to_date_holder.cc
+++ b/cpp/src/gandiva/to_date_holder.cc
@@ -18,7 +18,7 @@
 #include <algorithm>
 #include <string>
 
-#include "arrow/util/date.h"
+#include "arrow/vendored/datetime.h"
 
 #include "gandiva/date_utils.h"
 #include "gandiva/execution_context.h"
@@ -44,7 +44,7 @@ Status ToDateHolder::Make(const FunctionNode& node,
     return Status::Invalid(
         "'to_date' function requires a string literal as the second parameter");
   }
-  auto pattern = boost::get<std::string>(literal_pattern->holder());
+  auto pattern = literal_pattern->holder().get<std::string>();
 
   auto literal_suppress_errors = dynamic_cast<LiteralNode*>(node.children().at(2).get());
   if (literal_pattern == nullptr) {
@@ -57,15 +57,14 @@ Status ToDateHolder::Make(const FunctionNode& node,
     return Status::Invalid(
         "'to_date' function requires a int literal as the third parameter");
   }
-  auto suppress_errors = boost::get<int>(literal_suppress_errors->holder());
+  auto suppress_errors = literal_suppress_errors->holder().get<int>();
   return Make(pattern, suppress_errors, holder);
 }
 
 Status ToDateHolder::Make(const std::string& sql_pattern, int32_t suppress_errors,
                           std::shared_ptr<ToDateHolder>* holder) {
   std::shared_ptr<std::string> transformed_pattern;
-  Status status = DateUtils::ToInternalFormat(sql_pattern, &transformed_pattern);
-  ARROW_RETURN_NOT_OK(status);
+  ARROW_RETURN_NOT_OK(DateUtils::ToInternalFormat(sql_pattern, &transformed_pattern));
   auto lholder = std::shared_ptr<ToDateHolder>(
       new ToDateHolder(*(transformed_pattern.get()), suppress_errors));
   *holder = lholder;
@@ -82,21 +81,14 @@ int64_t ToDateHolder::operator()(ExecutionContext* context, const std::string& d
   // Issues
   // 1. processes date that do not match the format.
   // 2. does not process time in format +08:00 (or) id.
-  struct tm result = {};
-  char* ret = strptime(data.c_str(), pattern_.c_str(), &result);
-  if (ret == nullptr) {
+  int64_t seconds_since_epoch = 0;
+  if (!internal::ParseTimestamp(data.c_str(), pattern_.c_str(), true,
+                                &seconds_since_epoch)) {
     return_error(context, data);
     return 0;
   }
+
   *out_valid = true;
-  // ignore the time part
-  date::sys_seconds secs = date::sys_days(date::year(result.tm_year + 1900) /
-                                          (result.tm_mon + 1) / result.tm_mday);
-  int64_t seconds_since_epoch = secs.time_since_epoch().count();
-  if (seconds_since_epoch == 0) {
-    return_error(context, data);
-    return 0;
-  }
   return seconds_since_epoch * 1000;
 }
 
diff --git a/cpp/src/gandiva/to_date_holder.h b/cpp/src/gandiva/to_date_holder.h
index 91133cc5269d8..c0c5afb8b31cd 100644
--- a/cpp/src/gandiva/to_date_holder.h
+++ b/cpp/src/gandiva/to_date_holder.h
@@ -27,11 +27,12 @@
 #include "gandiva/execution_context.h"
 #include "gandiva/function_holder.h"
 #include "gandiva/node.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
 /// Function Holder for SQL 'to_date'
-class ToDateHolder : public FunctionHolder {
+class GANDIVA_EXPORT ToDateHolder : public FunctionHolder {
  public:
   ~ToDateHolder() override = default;
 
diff --git a/cpp/src/gandiva/to_date_holder_test.cc b/cpp/src/gandiva/to_date_holder_test.cc
index 2a207b2ad7742..0effffb0ddb7c 100644
--- a/cpp/src/gandiva/to_date_holder_test.cc
+++ b/cpp/src/gandiva/to_date_holder_test.cc
@@ -18,6 +18,8 @@
 #include <memory>
 #include <vector>
 
+#include "arrow/test-util.h"
+
 #include "gandiva/execution_context.h"
 #include "gandiva/to_date_holder.h"
 #include "precompiled/epoch_time_point.h"
@@ -37,57 +39,68 @@ class TestToDateHolder : public ::testing::Test {
     return FunctionNode("to_date_utf8_utf8_int32",
                         {field, pattern_node, suppres_error_node}, arrow::int64());
   }
+
+ protected:
+  ExecutionContext execution_context_;
 };
 
 TEST_F(TestToDateHolder, TestSimpleDateTime) {
   std::shared_ptr<ToDateHolder> to_date_holder;
+  ASSERT_OK(ToDateHolder::Make("YYYY-MM-DD HH:MI:SS", 1, &to_date_holder));
 
-  auto status = ToDateHolder::Make("YYYY-MM-DD HH:MI:SS", 1, &to_date_holder);
-  EXPECT_EQ(status.ok(), true) << status.message();
-  ExecutionContext execution_context;
   auto& to_date = *to_date_holder;
   bool out_valid;
   int64_t millis_since_epoch =
-      to_date(&execution_context, "1986-12-01 01:01:01", true, &out_valid);
+      to_date(&execution_context_, "1986-12-01 01:01:01", true, &out_valid);
   EXPECT_EQ(millis_since_epoch, 533779200000);
 
   millis_since_epoch =
-      to_date(&execution_context, "1986-12-01 01:01:01.11", true, &out_valid);
+      to_date(&execution_context_, "1986-12-01 01:01:01.11", true, &out_valid);
   EXPECT_EQ(millis_since_epoch, 533779200000);
 
   millis_since_epoch =
-      to_date(&execution_context, "1986-12-01 01:01:01 +0800", true, &out_valid);
+      to_date(&execution_context_, "1986-12-01 01:01:01 +0800", true, &out_valid);
   EXPECT_EQ(millis_since_epoch, 533779200000);
 
+#if 0
+  // TODO : this fails parsing with date::parse and strptime on linux
+  millis_since_epoch =
+      to_date(&execution_context_, "1886-12-01 00:00:00", true, &out_valid);
+  EXPECT_EQ(out_valid, true);
+  EXPECT_EQ(millis_since_epoch, -2621894400000);
+#endif
+
   millis_since_epoch =
-      to_date(&execution_context, "1986-12-11 01:30:00", true, &out_valid);
+      to_date(&execution_context_, "1886-12-01 01:01:01", true, &out_valid);
+  EXPECT_EQ(millis_since_epoch, -2621894400000);
+
+  millis_since_epoch =
+      to_date(&execution_context_, "1986-12-11 01:30:00", true, &out_valid);
   EXPECT_EQ(millis_since_epoch, 534643200000);
 }
 
 TEST_F(TestToDateHolder, TestSimpleDate) {
   std::shared_ptr<ToDateHolder> to_date_holder;
+  ASSERT_OK(ToDateHolder::Make("YYYY-MM-DD", 1, &to_date_holder));
 
-  auto status = ToDateHolder::Make("YYYY-MM-DD", 1, &to_date_holder);
-  EXPECT_EQ(status.ok(), true) << status.message();
-  ExecutionContext execution_context;
   auto& to_date = *to_date_holder;
   bool out_valid;
   int64_t millis_since_epoch =
-      to_date(&execution_context, "1986-12-01", true, &out_valid);
+      to_date(&execution_context_, "1986-12-01", true, &out_valid);
   EXPECT_EQ(millis_since_epoch, 533779200000);
 
-  millis_since_epoch = to_date(&execution_context, "1986-12-1", true, &out_valid);
+  millis_since_epoch = to_date(&execution_context_, "1986-12-1", true, &out_valid);
   EXPECT_EQ(millis_since_epoch, 533779200000);
 
-  millis_since_epoch = to_date(&execution_context, "1886-12-1", true, &out_valid);
+  millis_since_epoch = to_date(&execution_context_, "1886-12-1", true, &out_valid);
   EXPECT_EQ(millis_since_epoch, -2621894400000);
 
-  millis_since_epoch = to_date(&execution_context, "2012-12-1", true, &out_valid);
+  millis_since_epoch = to_date(&execution_context_, "2012-12-1", true, &out_valid);
   EXPECT_EQ(millis_since_epoch, 1354320000000);
 
   // wrong month. should return 0 since we are suppresing errors.
   millis_since_epoch =
-      to_date(&execution_context, "1986-21-01 01:01:01 +0800", true, &out_valid);
+      to_date(&execution_context_, "1986-21-01 01:01:01 +0800", true, &out_valid);
   EXPECT_EQ(millis_since_epoch, 0);
 }
 
@@ -96,22 +109,22 @@ TEST_F(TestToDateHolder, TestSimpleDateTimeError) {
 
   auto status = ToDateHolder::Make("YYYY-MM-DD HH:MI:SS", 0, &to_date_holder);
   EXPECT_EQ(status.ok(), true) << status.message();
-  ExecutionContext execution_context;
   auto& to_date = *to_date_holder;
   bool out_valid;
 
   int64_t millis_since_epoch =
-      to_date(&execution_context, "1986-21-01 01:01:01 +0800", true, &out_valid);
+      to_date(&execution_context_, "1986-01-40 01:01:01 +0800", true, &out_valid);
+  EXPECT_EQ(0, millis_since_epoch);
   std::string expected_error =
-      "Error parsing value 1986-21-01 01:01:01 +0800 for given format";
-  EXPECT_TRUE(execution_context.get_error().find(expected_error) != std::string::npos)
+      "Error parsing value 1986-01-40 01:01:01 +0800 for given format";
+  EXPECT_TRUE(execution_context_.get_error().find(expected_error) != std::string::npos)
       << status.message();
 
   // not valid should not return error
-  execution_context.Reset();
-  millis_since_epoch = to_date(&execution_context, "nullptr", false, &out_valid);
+  execution_context_.Reset();
+  millis_since_epoch = to_date(&execution_context_, "nullptr", false, &out_valid);
   EXPECT_EQ(millis_since_epoch, 0);
-  EXPECT_TRUE(execution_context.has_error() == false);
+  EXPECT_TRUE(execution_context_.has_error() == false);
 }
 
 TEST_F(TestToDateHolder, TestSimpleDateTimeMakeError) {
diff --git a/cpp/src/gandiva/tree_expr_builder.cc b/cpp/src/gandiva/tree_expr_builder.cc
index 86a2824075497..a63b700c2eeae 100644
--- a/cpp/src/gandiva/tree_expr_builder.cc
+++ b/cpp/src/gandiva/tree_expr_builder.cc
@@ -19,6 +19,7 @@
 
 #include <utility>
 
+#include "gandiva/decimal_type_util.h"
 #include "gandiva/gandiva_aliases.h"
 #include "gandiva/node.h"
 
@@ -49,6 +50,11 @@ NodePtr TreeExprBuilder::MakeBinaryLiteral(const std::string& value) {
   return std::make_shared<LiteralNode>(arrow::binary(), LiteralHolder(value), false);
 }
 
+NodePtr TreeExprBuilder::MakeDecimalLiteral(const DecimalScalar128& value) {
+  return std::make_shared<LiteralNode>(arrow::decimal(value.precision(), value.scale()),
+                                       LiteralHolder(value), false);
+}
+
 NodePtr TreeExprBuilder::MakeNull(DataTypePtr data_type) {
   static const std::string empty;
 
@@ -92,6 +98,10 @@ NodePtr TreeExprBuilder::MakeNull(DataTypePtr data_type) {
       return std::make_shared<LiteralNode>(data_type, LiteralHolder((int64_t)0), true);
     case arrow::Type::TIMESTAMP:
       return std::make_shared<LiteralNode>(data_type, LiteralHolder((int64_t)0), true);
+    case arrow::Type::DECIMAL: {
+      DecimalScalar128 literal(0, 0);
+      return std::make_shared<LiteralNode>(data_type, LiteralHolder(literal), true);
+    }
     default:
       return nullptr;
   }
diff --git a/cpp/src/gandiva/tree_expr_builder.h b/cpp/src/gandiva/tree_expr_builder.h
index cd261c8bf978d..4b2789af04c04 100644
--- a/cpp/src/gandiva/tree_expr_builder.h
+++ b/cpp/src/gandiva/tree_expr_builder.h
@@ -23,13 +23,16 @@
 #include <unordered_set>
 #include <vector>
 
+#include "arrow/type.h"
 #include "gandiva/condition.h"
+#include "gandiva/decimal_scalar.h"
 #include "gandiva/expression.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
 /// \brief Tree Builder for a nested expression.
-class TreeExprBuilder {
+class GANDIVA_EXPORT TreeExprBuilder {
  public:
   /// \brief create a node on a literal.
   static NodePtr MakeLiteral(bool value);
@@ -45,6 +48,7 @@ class TreeExprBuilder {
   static NodePtr MakeLiteral(double value);
   static NodePtr MakeStringLiteral(const std::string& value);
   static NodePtr MakeBinaryLiteral(const std::string& value);
+  static NodePtr MakeDecimalLiteral(const DecimalScalar128& value);
 
   /// \brief create a node on a null literal.
   /// returns null if data_type is null or if it's not a supported datatype.
diff --git a/cpp/src/gandiva/value_validity_pair.h b/cpp/src/gandiva/value_validity_pair.h
index 1bcd5d6a4bfd2..0de525d97040f 100644
--- a/cpp/src/gandiva/value_validity_pair.h
+++ b/cpp/src/gandiva/value_validity_pair.h
@@ -21,11 +21,12 @@
 #include <vector>
 
 #include "gandiva/gandiva_aliases.h"
+#include "gandiva/visibility.h"
 
 namespace gandiva {
 
 /// Pair of vector/validities generated after decomposing an expression tree/subtree.
-class ValueValidityPair {
+class GANDIVA_EXPORT ValueValidityPair {
  public:
   ValueValidityPair(const DexVector& validity_exprs, DexPtr value_expr)
       : validity_exprs_(validity_exprs), value_expr_(value_expr) {}
diff --git a/cpp/src/gandiva/visibility.h b/cpp/src/gandiva/visibility.h
new file mode 100644
index 0000000000000..450b3056b2ec0
--- /dev/null
+++ b/cpp/src/gandiva/visibility.h
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4251)
+#else
+#pragma GCC diagnostic ignored "-Wattributes"
+#endif
+
+#ifdef GANDIVA_STATIC
+#define GANDIVA_EXPORT
+#elif defined(GANDIVA_EXPORTING)
+#define GANDIVA_EXPORT __declspec(dllexport)
+#else
+#define GANDIVA_EXPORT __declspec(dllimport)
+#endif
+
+#define GANDIVA_NO_EXPORT
+#else  // Not Windows
+#ifndef GANDIVA_EXPORT
+#define GANDIVA_EXPORT __attribute__((visibility("default")))
+#endif
+#ifndef GANDIVA_NO_EXPORT
+#define GANDIVA_NO_EXPORT __attribute__((visibility("hidden")))
+#endif
+#endif  // Non-Windows
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index 1538b58164b62..e3294bdee4dbb 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -15,6 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 
+add_custom_target(parquet-all)
+add_custom_target(parquet)
+add_custom_target(parquet-benchmarks)
+add_custom_target(parquet-tests)
+add_dependencies(parquet-all parquet parquet-tests parquet-benchmarks)
+
 file(READ "${CMAKE_CURRENT_SOURCE_DIR}/.parquetcppversion" PARQUET_VERSION)
 string(REPLACE "\n" "" PARQUET_VERSION "${PARQUET_VERSION}")
 string(REGEX MATCH "^([0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+)?)" VERSION ${PARQUET_VERSION})
@@ -22,9 +28,6 @@ if(NOT VERSION)
   message(FATAL_ERROR "invalid .parquetcppversion")
 endif()
 
-# For "make parquet" to build everything Parquet-related
-add_custom_target(parquet)
-
 function(ADD_PARQUET_TEST REL_TEST_NAME)
   set(options USE_STATIC_LINKING)
   set(one_value_args)
@@ -34,20 +37,39 @@ function(ADD_PARQUET_TEST REL_TEST_NAME)
     message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
   endif()
 
+  set(TEST_ARGUMENTS
+    PREFIX "parquet"
+    LABELS "parquet-tests")
+
   # By default we prefer shared linking with libparquet, as it's faster
   # and uses less disk space, but in some cases we need to force static
   # linking (see rationale below).
   if (ARG_USE_STATIC_LINKING)
-    ADD_ARROW_TEST(${REL_TEST_NAME}
+    ADD_TEST_CASE(${REL_TEST_NAME}
       STATIC_LINK_LIBS ${PARQUET_STATIC_TEST_LINK_LIBS}
-      PREFIX "parquet"
-      LABELS "unittest;parquet")
+      ${TEST_ARGUMENTS})
   else()
-    ADD_ARROW_TEST(${REL_TEST_NAME}
+    ADD_TEST_CASE(${REL_TEST_NAME}
       STATIC_LINK_LIBS ${PARQUET_SHARED_TEST_LINK_LIBS}
-      PREFIX "parquet"
-      LABELS "unittest;parquet")
+      ${TEST_ARGUMENTS})
+  endif()
+endfunction()
+
+function(ADD_PARQUET_BENCHMARK REL_TEST_NAME)
+  set(options)
+  set(one_value_args PREFIX)
+  set(multi_value_args)
+  cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
+  if (ARG_PREFIX)
+    set(PREFIX ${ARG_PREFIX})
+  else()
+    set(PREFIX "parquet")
   endif()
+  ADD_BENCHMARK(${REL_TEST_NAME}
+    PREFIX ${PREFIX}
+    LABELS "parquet-benchmarks"
+    ${PARQUET_BENCHMARK_LINK_OPTION}
+    ${ARG_UNPARSED_ARGUMENTS})
 endfunction()
 
 # ----------------------------------------------------------------------
@@ -76,8 +98,8 @@ if(MSVC)
 endif()
 
 set(PARQUET_MIN_TEST_LIBS
-  gtest_main_static
-  gtest_static)
+  ${GTEST_MAIN_LIBRARY}
+  ${GTEST_LIBRARY})
 
 if (APPLE)
   set(PARQUET_MIN_TEST_LIBS
@@ -101,9 +123,15 @@ set(PARQUET_STATIC_TEST_LINK_LIBS
   ${ARROW_LIBRARIES_FOR_STATIC_TESTS}
   parquet_static)
 
-set(PARQUET_BENCHMARK_LINK_LIBRARIES
-  arrow_benchmark_main
-  parquet_shared)
+if (WIN32)
+  # The benchmarks depend on some static Thrift symbols
+  set(PARQUET_BENCHMARK_LINK_OPTION
+    STATIC_LINK_LIBS arrow_benchmark_main
+    parquet_static)
+else()
+  set(PARQUET_BENCHMARK_LINK_OPTION
+    EXTRA_LINK_LIBS parquet_shared)
+endif()
 
 ############################################################
 # Generated Thrift sources
@@ -144,6 +172,7 @@ set(PARQUET_SRCS
   column_reader.cc
   column_scanner.cc
   column_writer.cc
+  encoding.cc
   file_reader.cc
   file_writer.cc
   metadata.cc
@@ -185,12 +214,19 @@ if (NOT PARQUET_MINIMAL_DEPENDENCY)
 
 # Although we don't link parquet_objlib against anything, we need it to depend
 # on these libs as we may generate their headers via ExternalProject_Add
-set(PARQUET_DEPENDENCIES ${PARQUET_DEPENDENCIES}
-  ${PARQUET_SHARED_LINK_LIBS}
-  ${PARQUET_SHARED_PRIVATE_LINK_LIBS}
-  ${PARQUET_STATIC_LINK_LIBS})
+if (ARROW_BUILD_SHARED)
+  set(PARQUET_DEPENDENCIES ${PARQUET_DEPENDENCIES}
+    ${PARQUET_SHARED_LINK_LIBS}
+    ${PARQUET_SHARED_PRIVATE_LINK_LIBS})
 endif()
 
+if (ARROW_BUILD_STATIC)
+  set(PARQUET_DEPENDENCIES ${PARQUET_DEPENDENCIES}
+    ${PARQUET_STATIC_LINK_LIBS})
+endif()
+
+endif(NOT PARQUET_MINIMAL_DEPENDENCY)
+
 if(NOT APPLE AND NOT MSVC)
   # Localize thirdparty symbols using a linker version script. This hides them
   # from the client application. The OS X linker does not support the
@@ -217,6 +253,8 @@ ADD_ARROW_LIB(parquet
   STATIC_LINK_LIBS ${PARQUET_STATIC_LINK_LIBS}
 )
 
+add_dependencies(parquet ${PARQUET_LIBRARIES})
+
 # Thrift requires these definitions for some types that we use
 foreach(LIB_TARGET ${PARQUET_LIBRARIES})
   target_compile_definitions(${LIB_TARGET}
@@ -232,32 +270,18 @@ foreach(LIB_TARGET ${PARQUET_LIBRARIES})
   endif()
 endforeach()
 
-add_dependencies(parquet ${PARQUET_LIBRARIES})
+# We always build the Parquet static libraries (see PARQUET-1420) so we add the
+# PARQUET_STATIC public compile definition if we are building the unit tests OR
+# if we are building the static library
+if (WIN32 AND (NOT NO_TESTS OR ARROW_BUILD_STATIC))
+  target_compile_definitions(parquet_static PUBLIC PARQUET_STATIC)
+endif()
 
 add_subdirectory(api)
 add_subdirectory(arrow)
 add_subdirectory(util)
 
-# Headers: top level
-install(FILES
-  bloom_filter.h
-  column_reader.h
-  column_page.h
-  column_scanner.h
-  column_writer.h
-  encoding.h
-  exception.h
-  file_reader.h
-  file_writer.h
-  hasher.h
-  metadata.h
-  murmur3.h
-  printer.h
-  properties.h
-  schema.h
-  statistics.h
-  types.h
-  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/parquet")
+ARROW_INSTALL_ALL_HEADERS("parquet")
 
 configure_file(parquet_version.h.in
   "${CMAKE_CURRENT_BINARY_DIR}/parquet_version.h"
@@ -268,13 +292,7 @@ install(FILES
   DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/parquet")
 
 # pkg-config support
-configure_file(parquet.pc.in
-  "${CMAKE_CURRENT_BINARY_DIR}/parquet.pc"
-  @ONLY)
-
-install(FILES
-  "${CMAKE_CURRENT_BINARY_DIR}/parquet.pc"
-  DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/")
+ARROW_ADD_PKG_CONFIG("parquet")
 
 ADD_PARQUET_TEST(bloom_filter-test)
 ADD_PARQUET_TEST(column_reader-test)
@@ -294,9 +312,10 @@ ADD_PARQUET_TEST(reader-test)
 ADD_PARQUET_TEST(file-deserialize-test USE_STATIC_LINKING)
 ADD_PARQUET_TEST(schema-test USE_STATIC_LINKING)
 
-ADD_ARROW_BENCHMARK(column-io-benchmark
-  PREFIX "parquet"
-  EXTRA_LINK_LIBS ${PARQUET_BENCHMARK_LINK_LIBRARIES})
-ADD_ARROW_BENCHMARK(encoding-benchmark
-  PREFIX "parquet"
-  EXTRA_LINK_LIBS ${PARQUET_BENCHMARK_LINK_LIBRARIES})
+ADD_PARQUET_BENCHMARK(column-io-benchmark)
+ADD_PARQUET_BENCHMARK(encoding-benchmark)
+
+# Required for tests, the ExternalProject for zstd does not build on CMake < 3.7
+if (ARROW_WITH_ZSTD)
+  add_definitions(-DARROW_WITH_ZSTD)
+endif()
diff --git a/cpp/src/parquet/api/CMakeLists.txt b/cpp/src/parquet/api/CMakeLists.txt
index 79fc716952a16..48fddb9d61ddf 100644
--- a/cpp/src/parquet/api/CMakeLists.txt
+++ b/cpp/src/parquet/api/CMakeLists.txt
@@ -16,9 +16,4 @@
 # under the License.
 
 # Headers: public api
-install(FILES
-  io.h
-  reader.h
-  writer.h
-  schema.h
-  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/parquet/api")
+ARROW_INSTALL_ALL_HEADERS("parquet/api")
diff --git a/cpp/src/parquet/arrow/CMakeLists.txt b/cpp/src/parquet/arrow/CMakeLists.txt
index 429dadcd37e5e..ba9e93df7b87a 100644
--- a/cpp/src/parquet/arrow/CMakeLists.txt
+++ b/cpp/src/parquet/arrow/CMakeLists.txt
@@ -18,13 +18,7 @@
 ADD_PARQUET_TEST(arrow-schema-test)
 ADD_PARQUET_TEST(arrow-reader-writer-test)
 
-ADD_ARROW_BENCHMARK(reader-writer-benchmark
-  PREFIX "parquet-arrow"
-  EXTRA_LINK_LIBS ${PARQUET_BENCHMARK_LINK_LIBRARIES})
+ADD_PARQUET_BENCHMARK(reader-writer-benchmark
+  PREFIX "parquet-arrow")
 
-# Headers: top level
-install(FILES
-  reader.h
-  schema.h
-  writer.h
-  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/parquet/arrow")
+ARROW_INSTALL_ALL_HEADERS("parquet/arrow")
diff --git a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
index 8aedd388d2341..bb9763224f3ba 100644
--- a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -29,6 +29,11 @@
 #include <sstream>
 #include <vector>
 
+#include "arrow/api.h"
+#include "arrow/test-util.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/decimal.h"
+
 #include "parquet/api/reader.h"
 #include "parquet/api/writer.h"
 
@@ -36,16 +41,9 @@
 #include "parquet/arrow/schema.h"
 #include "parquet/arrow/test-util.h"
 #include "parquet/arrow/writer.h"
-
 #include "parquet/file_writer.h"
-
 #include "parquet/util/test-common.h"
 
-#include "arrow/api.h"
-#include "arrow/test-util.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/decimal.h"
-
 using arrow::Array;
 using arrow::ArrayVisitor;
 using arrow::Buffer;
@@ -466,7 +464,11 @@ class TestParquetIO : public ::testing::Test {
     ASSERT_OK_NO_THROW(file_reader->GetColumn(0, &column_reader));
     ASSERT_NE(nullptr, column_reader.get());
 
-    ASSERT_OK(column_reader->NextBatch(SMALL_SIZE, out));
+    std::shared_ptr<ChunkedArray> chunked_out;
+    ASSERT_OK(column_reader->NextBatch(SMALL_SIZE, &chunked_out));
+
+    ASSERT_EQ(1, chunked_out->num_chunks());
+    *out = chunked_out->chunk(0);
     ASSERT_NE(nullptr, out->get());
   }
 
@@ -1191,65 +1193,116 @@ void MakeDateTimeTypesTable(std::shared_ptr<Table>* out, bool nanos_as_micros =
   auto f0 = field("f0", ::arrow::date32());
   auto f1 = field("f1", ::arrow::timestamp(TimeUnit::MILLI));
   auto f2 = field("f2", ::arrow::timestamp(TimeUnit::MICRO));
-  std::shared_ptr<::arrow::Field> f3;
-  if (nanos_as_micros) {
-    f3 = field("f3", ::arrow::timestamp(TimeUnit::MICRO));
-  } else {
-    f3 = field("f3", ::arrow::timestamp(TimeUnit::NANO));
-  }
+  auto f3_unit = nanos_as_micros ? TimeUnit::MICRO : TimeUnit::NANO;
+  auto f3 = field("f3", ::arrow::timestamp(f3_unit));
   auto f4 = field("f4", ::arrow::time32(TimeUnit::MILLI));
   auto f5 = field("f5", ::arrow::time64(TimeUnit::MICRO));
+
   std::shared_ptr<::arrow::Schema> schema(new ::arrow::Schema({f0, f1, f2, f3, f4, f5}));
 
   std::vector<int32_t> t32_values = {1489269000, 1489270000, 1489271000,
                                      1489272000, 1489272000, 1489273000};
-  std::vector<int64_t> t64_values = {1489269000000, 1489270000000, 1489271000000,
-                                     1489272000000, 1489272000000, 1489273000000};
+  std::vector<int64_t> t64_ns_values = {1489269000000, 1489270000000, 1489271000000,
+                                        1489272000000, 1489272000000, 1489273000000};
   std::vector<int64_t> t64_us_values = {1489269000, 1489270000, 1489271000,
                                         1489272000, 1489272000, 1489273000};
+  std::vector<int64_t> t64_ms_values = {1489269, 1489270, 1489271,
+                                        1489272, 1489272, 1489273};
 
   std::shared_ptr<Array> a0, a1, a2, a3, a4, a5;
   ArrayFromVector<::arrow::Date32Type, int32_t>(f0->type(), is_valid, t32_values, &a0);
-  ArrayFromVector<::arrow::TimestampType, int64_t>(f1->type(), is_valid, t64_values, &a1);
-  ArrayFromVector<::arrow::TimestampType, int64_t>(f2->type(), is_valid, t64_values, &a2);
-  if (nanos_as_micros) {
-    ArrayFromVector<::arrow::TimestampType, int64_t>(f3->type(), is_valid, t64_us_values,
-                                                     &a3);
-  } else {
-    ArrayFromVector<::arrow::TimestampType, int64_t>(f3->type(), is_valid, t64_values,
-                                                     &a3);
-  }
+  ArrayFromVector<::arrow::TimestampType, int64_t>(f1->type(), is_valid, t64_ms_values,
+                                                   &a1);
+  ArrayFromVector<::arrow::TimestampType, int64_t>(f2->type(), is_valid, t64_us_values,
+                                                   &a2);
+  auto f3_data = nanos_as_micros ? t64_us_values : t64_ns_values;
+  ArrayFromVector<::arrow::TimestampType, int64_t>(f3->type(), is_valid, f3_data, &a3);
   ArrayFromVector<::arrow::Time32Type, int32_t>(f4->type(), is_valid, t32_values, &a4);
-  ArrayFromVector<::arrow::Time64Type, int64_t>(f5->type(), is_valid, t64_values, &a5);
+  ArrayFromVector<::arrow::Time64Type, int64_t>(f5->type(), is_valid, t64_us_values, &a5);
 
   std::vector<std::shared_ptr<::arrow::Column>> columns = {
       std::make_shared<Column>("f0", a0), std::make_shared<Column>("f1", a1),
       std::make_shared<Column>("f2", a2), std::make_shared<Column>("f3", a3),
       std::make_shared<Column>("f4", a4), std::make_shared<Column>("f5", a5)};
+
   *out = Table::Make(schema, columns);
 }
 
 TEST(TestArrowReadWrite, DateTimeTypes) {
-  std::shared_ptr<Table> table;
+  std::shared_ptr<Table> table, result;
   MakeDateTimeTypesTable(&table);
 
-  // Use deprecated INT96 type
-  std::shared_ptr<Table> result;
-  ASSERT_NO_FATAL_FAILURE(DoSimpleRoundtrip(
-      table, false /* use_threads */, table->num_rows(), {}, &result,
-      ArrowWriterProperties::Builder().enable_deprecated_int96_timestamps()->build()));
-
-  ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*table, *result));
-
   // Cast nanaoseconds to microseconds and use INT64 physical type
   ASSERT_NO_FATAL_FAILURE(
       DoSimpleRoundtrip(table, false /* use_threads */, table->num_rows(), {}, &result));
-  std::shared_ptr<Table> expected;
   MakeDateTimeTypesTable(&table, true);
 
   ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*table, *result));
 }
 
+TEST(TestArrowReadWrite, UseDeprecatedInt96) {
+  using ::arrow::ArrayFromVector;
+  using ::arrow::field;
+  using ::arrow::schema;
+
+  std::vector<bool> is_valid = {true, true, true, false, true, true};
+
+  auto t_s = ::arrow::timestamp(TimeUnit::SECOND);
+  auto t_ms = ::arrow::timestamp(TimeUnit::MILLI);
+  auto t_us = ::arrow::timestamp(TimeUnit::MICRO);
+  auto t_ns = ::arrow::timestamp(TimeUnit::NANO);
+
+  std::vector<int64_t> s_values = {1489269, 1489270, 1489271, 1489272, 1489272, 1489273};
+  std::vector<int64_t> ms_values = {1489269000, 1489270000, 1489271000,
+                                    1489272001, 1489272000, 1489273000};
+  std::vector<int64_t> us_values = {1489269000000, 1489270000000, 1489271000000,
+                                    1489272000001, 1489272000000, 1489273000000};
+  std::vector<int64_t> ns_values = {1489269000000000LL, 1489270000000000LL,
+                                    1489271000000000LL, 1489272000000001LL,
+                                    1489272000000000LL, 1489273000000000LL};
+
+  std::shared_ptr<Array> a_s, a_ms, a_us, a_ns;
+  ArrayFromVector<::arrow::TimestampType, int64_t>(t_s, is_valid, s_values, &a_s);
+  ArrayFromVector<::arrow::TimestampType, int64_t>(t_ms, is_valid, ms_values, &a_ms);
+  ArrayFromVector<::arrow::TimestampType, int64_t>(t_us, is_valid, us_values, &a_us);
+  ArrayFromVector<::arrow::TimestampType, int64_t>(t_ns, is_valid, ns_values, &a_ns);
+
+  // Each input is typed with a unique TimeUnit
+  auto input_schema = schema(
+      {field("f_s", t_s), field("f_ms", t_ms), field("f_us", t_us), field("f_ns", t_ns)});
+  auto input = Table::Make(
+      input_schema,
+      {std::make_shared<Column>("f_s", a_s), std::make_shared<Column>("f_ms", a_ms),
+       std::make_shared<Column>("f_us", a_us), std::make_shared<Column>("f_ns", a_ns)});
+
+  // When reading parquet files, all int96 schema fields are converted to
+  // timestamp nanoseconds
+  auto ex_schema = schema({field("f_s", t_ns), field("f_ms", t_ns), field("f_us", t_ns),
+                           field("f_ns", t_ns)});
+  auto ex_result = Table::Make(
+      ex_schema,
+      {std::make_shared<Column>("f_s", a_ns), std::make_shared<Column>("f_ms", a_ns),
+       std::make_shared<Column>("f_us", a_ns), std::make_shared<Column>("f_ns", a_ns)});
+
+  std::shared_ptr<Table> result;
+  ASSERT_NO_FATAL_FAILURE(DoSimpleRoundtrip(
+      input, false /* use_threads */, input->num_rows(), {}, &result,
+      ArrowWriterProperties::Builder().enable_deprecated_int96_timestamps()->build()));
+
+  ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*ex_result, *result));
+
+  // Ensure enable_deprecated_int96_timestamps as precedence over
+  // coerce_timestamps.
+  ASSERT_NO_FATAL_FAILURE(DoSimpleRoundtrip(input, false /* use_threads */,
+                                            input->num_rows(), {}, &result,
+                                            ArrowWriterProperties::Builder()
+                                                .enable_deprecated_int96_timestamps()
+                                                ->coerce_timestamps(TimeUnit::MILLI)
+                                                ->build()));
+
+  ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*ex_result, *result));
+}
+
 TEST(TestArrowReadWrite, CoerceTimestamps) {
   using ::arrow::ArrayFromVector;
   using ::arrow::field;
@@ -1295,6 +1348,12 @@ TEST(TestArrowReadWrite, CoerceTimestamps) {
       {std::make_shared<Column>("f_s", a_ms), std::make_shared<Column>("f_ms", a_ms),
        std::make_shared<Column>("f_us", a_ms), std::make_shared<Column>("f_ns", a_ms)});
 
+  std::shared_ptr<Table> milli_result;
+  ASSERT_NO_FATAL_FAILURE(DoSimpleRoundtrip(
+      input, false /* use_threads */, input->num_rows(), {}, &milli_result,
+      ArrowWriterProperties::Builder().coerce_timestamps(TimeUnit::MILLI)->build()));
+  ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*ex_milli_result, *milli_result));
+
   // Result when coercing to microseconds
   auto s3 = std::shared_ptr<::arrow::Schema>(
       new ::arrow::Schema({field("f_s", t_us), field("f_ms", t_us), field("f_us", t_us),
@@ -1304,13 +1363,6 @@ TEST(TestArrowReadWrite, CoerceTimestamps) {
       {std::make_shared<Column>("f_s", a_us), std::make_shared<Column>("f_ms", a_us),
        std::make_shared<Column>("f_us", a_us), std::make_shared<Column>("f_ns", a_us)});
 
-  std::shared_ptr<Table> milli_result;
-  ASSERT_NO_FATAL_FAILURE(DoSimpleRoundtrip(
-      input, false /* use_threads */, input->num_rows(), {}, &milli_result,
-      ArrowWriterProperties::Builder().coerce_timestamps(TimeUnit::MILLI)->build()));
-
-  ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*ex_milli_result, *milli_result));
-
   std::shared_ptr<Table> micro_result;
   ASSERT_NO_FATAL_FAILURE(DoSimpleRoundtrip(
       input, false /* use_threads */, input->num_rows(), {}, &micro_result,
@@ -1455,65 +1507,6 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) {
   ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*ex_table, *result));
 }
 
-// Regression for ARROW-2802
-TEST(TestArrowReadWrite, CoerceTimestampsAndSupportDeprecatedInt96) {
-  using ::arrow::Column;
-  using ::arrow::default_memory_pool;
-  using ::arrow::Field;
-  using ::arrow::Schema;
-  using ::arrow::Table;
-  using ::arrow::TimestampBuilder;
-  using ::arrow::TimestampType;
-  using ::arrow::TimeUnit;
-
-  auto timestamp_type = std::make_shared<TimestampType>(TimeUnit::NANO);
-
-  TimestampBuilder builder(timestamp_type, default_memory_pool());
-  for (std::int64_t ii = 0; ii < 10; ++ii) {
-    ASSERT_OK(builder.Append(1000000000L * ii));
-  }
-  std::shared_ptr<Array> values;
-  ASSERT_OK(builder.Finish(&values));
-
-  std::vector<std::shared_ptr<Field>> fields;
-  auto field = std::make_shared<Field>("nanos", timestamp_type);
-  fields.emplace_back(field);
-
-  auto schema = std::make_shared<Schema>(fields);
-
-  std::vector<std::shared_ptr<Column>> columns;
-  auto column = std::make_shared<Column>("nanos", values);
-  columns.emplace_back(column);
-
-  auto table = Table::Make(schema, columns);
-
-  auto arrow_writer_properties = ArrowWriterProperties::Builder()
-                                     .coerce_timestamps(TimeUnit::MICRO)
-                                     ->enable_deprecated_int96_timestamps()
-                                     ->build();
-
-  std::shared_ptr<Table> result;
-  DoSimpleRoundtrip(table, false /* use_threads */, table->num_rows(), {}, &result,
-                    arrow_writer_properties);
-
-  ASSERT_EQ(table->num_columns(), result->num_columns());
-  ASSERT_EQ(table->num_rows(), result->num_rows());
-
-  auto actual_column = result->column(0);
-  auto data = actual_column->data();
-  auto expected_values =
-      static_cast<::arrow::NumericArray<TimestampType>*>(values.get())->raw_values();
-  for (int ii = 0; ii < data->num_chunks(); ++ii) {
-    auto chunk =
-        static_cast<::arrow::NumericArray<TimestampType>*>(data->chunk(ii).get());
-    auto values = chunk->raw_values();
-    for (int64_t jj = 0; jj < chunk->length(); ++jj, ++expected_values) {
-      // Check that the nanos have been converted to micros
-      ASSERT_EQ(*expected_values / 1000, values[jj]);
-    }
-  }
-}
-
 void MakeDoubleTable(int num_columns, int num_rows, int nchunks,
                      std::shared_ptr<Table>* out) {
   std::shared_ptr<::arrow::Column> column;
@@ -1712,6 +1705,7 @@ TEST(TestArrowReadWrite, ReadColumnSubset) {
 TEST(TestArrowReadWrite, ListLargeRecords) {
   // PARQUET-1308: This test passed on Linux when num_rows was smaller
   const int num_rows = 2000;
+  const int row_group_size = 100;
 
   std::shared_ptr<Array> list_array;
   std::shared_ptr<::DataType> list_type;
@@ -1723,8 +1717,8 @@ TEST(TestArrowReadWrite, ListLargeRecords) {
   std::shared_ptr<Table> table = Table::Make(schema, {list_array});
 
   std::shared_ptr<Buffer> buffer;
-  ASSERT_NO_FATAL_FAILURE(
-      WriteTableToBuffer(table, 100, default_arrow_writer_properties(), &buffer));
+  ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, row_group_size,
+                                             default_arrow_writer_properties(), &buffer));
 
   std::unique_ptr<FileReader> reader;
   ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
@@ -1736,7 +1730,7 @@ TEST(TestArrowReadWrite, ListLargeRecords) {
   ASSERT_OK_NO_THROW(reader->ReadTable(&result));
   ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*table, *result));
 
-  // Read chunked
+  // Read 1 record at a time
   ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
                               ::arrow::default_memory_pool(),
                               ::parquet::default_reader_properties(), nullptr, &reader));
@@ -1746,10 +1740,11 @@ TEST(TestArrowReadWrite, ListLargeRecords) {
 
   std::vector<std::shared_ptr<Array>> pieces;
   for (int i = 0; i < num_rows; ++i) {
-    std::shared_ptr<Array> piece;
-    ASSERT_OK(col_reader->NextBatch(1, &piece));
-    ASSERT_EQ(1, piece->length());
-    pieces.push_back(piece);
+    std::shared_ptr<ChunkedArray> chunked_piece;
+    ASSERT_OK(col_reader->NextBatch(1, &chunked_piece));
+    ASSERT_EQ(1, chunked_piece->length());
+    ASSERT_EQ(1, chunked_piece->num_chunks());
+    pieces.push_back(chunked_piece->chunk(0));
   }
   auto chunked = std::make_shared<::arrow::ChunkedArray>(pieces);
 
@@ -2263,7 +2258,7 @@ TEST_P(TestNestedSchemaRead, DeepNestedSchemaRead) {
   const int num_trees = 3;
   const int depth = 3;
 #else
-  const int num_trees = 10;
+  const int num_trees = 5;
   const int depth = 5;
 #endif
   const int num_children = 3;
@@ -2285,30 +2280,45 @@ TEST_P(TestNestedSchemaRead, DeepNestedSchemaRead) {
 INSTANTIATE_TEST_CASE_P(Repetition_type, TestNestedSchemaRead,
                         ::testing::Values(Repetition::REQUIRED, Repetition::OPTIONAL));
 
-TEST(TestImpalaConversion, NanosecondToImpala) {
+TEST(TestImpalaConversion, ArrowTimestampToImpalaTimestamp) {
   // June 20, 2017 16:32:56 and 123456789 nanoseconds
   int64_t nanoseconds = INT64_C(1497976376123456789);
-  Int96 expected = {{UINT32_C(632093973), UINT32_C(13871), UINT32_C(2457925)}};
+
   Int96 calculated;
+
+  Int96 expected = {{UINT32_C(632093973), UINT32_C(13871), UINT32_C(2457925)}};
   internal::NanosecondsToImpalaTimestamp(nanoseconds, &calculated);
   ASSERT_EQ(expected, calculated);
 }
 
-TEST(TestArrowReaderAdHoc, Int96BadMemoryAccess) {
-  // PARQUET-995
+void TryReadDataFile(const std::string& testing_file_path, bool should_succeed = true) {
   std::string dir_string(test::get_data_dir());
   std::stringstream ss;
-  ss << dir_string << "/"
-     << "alltypes_plain.parquet";
+  ss << dir_string << "/" << testing_file_path;
   auto path = ss.str();
 
   auto pool = ::arrow::default_memory_pool();
 
   std::unique_ptr<FileReader> arrow_reader;
-  ASSERT_NO_THROW(
-      arrow_reader.reset(new FileReader(pool, ParquetFileReader::OpenFile(path, false))));
-  std::shared_ptr<::arrow::Table> table;
-  ASSERT_OK_NO_THROW(arrow_reader->ReadTable(&table));
+  try {
+    arrow_reader.reset(new FileReader(pool, ParquetFileReader::OpenFile(path, false)));
+    std::shared_ptr<::arrow::Table> table;
+    ASSERT_OK(arrow_reader->ReadTable(&table));
+  } catch (const ParquetException& e) {
+    if (should_succeed) {
+      FAIL() << "Exception thrown when reading file: " << e.what();
+    }
+  }
+}
+
+TEST(TestArrowReaderAdHoc, Int96BadMemoryAccess) {
+  // PARQUET-995
+  TryReadDataFile("alltypes_plain.parquet");
+}
+
+TEST(TestArrowReaderAdHoc, CorruptedSchema) {
+  // PARQUET-1481
+  TryReadDataFile("bad_data/PARQUET-1481.parquet", false /* should_succeed */);
 }
 
 class TestArrowReaderAdHocSparkAndHvr
diff --git a/cpp/src/parquet/arrow/arrow-schema-test.cc b/cpp/src/parquet/arrow/arrow-schema-test.cc
index cb2b8508e66a5..73de8b1c456c9 100644
--- a/cpp/src/parquet/arrow/arrow-schema-test.cc
+++ b/cpp/src/parquet/arrow/arrow-schema-test.cc
@@ -21,6 +21,7 @@
 #include "gtest/gtest.h"
 
 #include "parquet/arrow/schema.h"
+#include "parquet/schema.h"
 
 #include "arrow/api.h"
 #include "arrow/test-util.h"
diff --git a/cpp/src/parquet/arrow/reader-writer-benchmark.cc b/cpp/src/parquet/arrow/reader-writer-benchmark.cc
index 775c1028bb43f..1889006573b6b 100644
--- a/cpp/src/parquet/arrow/reader-writer-benchmark.cc
+++ b/cpp/src/parquet/arrow/reader-writer-benchmark.cc
@@ -142,7 +142,8 @@ std::shared_ptr<::arrow::Table> TableFromVector<BooleanType>(const std::vector<b
 
 template <bool nullable, typename ParquetType>
 static void BM_WriteColumn(::benchmark::State& state) {
-  std::vector<typename ParquetType::c_type> values(BENCHMARK_SIZE, 128);
+  using T = typename ParquetType::c_type;
+  std::vector<T> values(BENCHMARK_SIZE, static_cast<T>(128));
   std::shared_ptr<::arrow::Table> table = TableFromVector<ParquetType>(values, nullable);
 
   while (state.KeepRunning()) {
@@ -167,7 +168,9 @@ BENCHMARK_TEMPLATE2(BM_WriteColumn, true, BooleanType);
 
 template <bool nullable, typename ParquetType>
 static void BM_ReadColumn(::benchmark::State& state) {
-  std::vector<typename ParquetType::c_type> values(BENCHMARK_SIZE, 128);
+  using T = typename ParquetType::c_type;
+
+  std::vector<T> values(BENCHMARK_SIZE, static_cast<T>(128));
   std::shared_ptr<::arrow::Table> table = TableFromVector<ParquetType>(values, nullable);
   auto output = std::make_shared<InMemoryOutputStream>();
   EXIT_NOT_OK(WriteTable(*table, ::arrow::default_memory_pool(), output, BENCHMARK_SIZE));
diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc
index 6273fda464025..0b60c66f9a2bc 100644
--- a/cpp/src/parquet/arrow/reader.cc
+++ b/cpp/src/parquet/arrow/reader.cc
@@ -21,17 +21,26 @@
 #include <climits>
 #include <cstring>
 #include <future>
-#include <ostream>
-#include <string>
 #include <type_traits>
 #include <utility>
 #include <vector>
 
-#include "arrow/api.h"
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/builder.h"
+#include "arrow/record_batch.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/bit-util.h"
+#include "arrow/util/int-util.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/thread-pool.h"
 
+// For arrow::compute::Datum. This should perhaps be promoted. See ARROW-4022
+#include "arrow/compute/kernel.h"
+
 #include "parquet/arrow/record_reader.h"
 #include "parquet/arrow/schema.h"
 #include "parquet/column_reader.h"
@@ -46,6 +55,7 @@
 
 using arrow::Array;
 using arrow::BooleanArray;
+using arrow::ChunkedArray;
 using arrow::Column;
 using arrow::Field;
 using arrow::Int32Array;
@@ -57,6 +67,9 @@ using arrow::StructArray;
 using arrow::Table;
 using arrow::TimestampArray;
 
+// For Array/ChunkedArray variant
+using arrow::compute::Datum;
+
 using parquet::schema::Node;
 
 // Help reduce verbosity
@@ -69,21 +82,24 @@ namespace parquet {
 namespace arrow {
 
 using ::arrow::BitUtil::BytesForBits;
+using ::arrow::BitUtil::FromBigEndian;
+using ::arrow::internal::SafeLeftShift;
 
-constexpr int64_t kJulianToUnixEpochDays = 2440588LL;
-constexpr int64_t kMillisecondsInADay = 86400000LL;
-constexpr int64_t kNanosecondsInADay = kMillisecondsInADay * 1000LL * 1000LL;
+template <typename ArrowType>
+using ArrayType = typename ::arrow::TypeTraits<ArrowType>::ArrayType;
 
-static inline int64_t impala_timestamp_to_nanoseconds(const Int96& impala_timestamp) {
-  int64_t days_since_epoch = impala_timestamp.value[2] - kJulianToUnixEpochDays;
-  int64_t nanoseconds = 0;
+namespace {
 
-  memcpy(&nanoseconds, &impala_timestamp.value, sizeof(int64_t));
-  return days_since_epoch * kNanosecondsInADay + nanoseconds;
+Status GetSingleChunk(const ChunkedArray& chunked, std::shared_ptr<Array>* out) {
+  DCHECK_GT(chunked.num_chunks(), 0);
+  if (chunked.num_chunks() > 1) {
+    return Status::Invalid("Function call returned a chunked array");
+  }
+  *out = chunked.chunk(0);
+  return Status::OK();
 }
 
-template <typename ArrowType>
-using ArrayType = typename ::arrow::TypeTraits<ArrowType>::ArrayType;
+}  // namespace
 
 // ----------------------------------------------------------------------
 // Iteration utilities
@@ -223,15 +239,18 @@ class FileReader::Impl {
   virtual ~Impl() {}
 
   Status GetColumn(int i, std::unique_ptr<ColumnReader>* out);
-  Status ReadSchemaField(int i, std::shared_ptr<Array>* out);
+
+  Status ReadSchemaField(int i, std::shared_ptr<ChunkedArray>* out);
   Status ReadSchemaField(int i, const std::vector<int>& indices,
-                         std::shared_ptr<Array>* out);
+                         std::shared_ptr<ChunkedArray>* out);
+  Status ReadColumn(int i, std::shared_ptr<ChunkedArray>* out);
+  Status ReadColumnChunk(int column_index, int row_group_index,
+                         std::shared_ptr<ChunkedArray>* out);
+
   Status GetReaderForNode(int index, const Node* node, const std::vector<int>& indices,
                           int16_t def_level,
                           std::unique_ptr<ColumnReader::ColumnReaderImpl>* out);
-  Status ReadColumn(int i, std::shared_ptr<Array>* out);
-  Status ReadColumnChunk(int column_index, int row_group_index,
-                         std::shared_ptr<Array>* out);
+
   Status GetSchema(std::shared_ptr<::arrow::Schema>* out);
   Status GetSchema(const std::vector<int>& indices,
                    std::shared_ptr<::arrow::Schema>* out);
@@ -267,7 +286,8 @@ class FileReader::Impl {
 class ColumnReader::ColumnReaderImpl {
  public:
   virtual ~ColumnReaderImpl() {}
-  virtual Status NextBatch(int64_t records_to_read, std::shared_ptr<Array>* out) = 0;
+  virtual Status NextBatch(int64_t records_to_read,
+                           std::shared_ptr<ChunkedArray>* out) = 0;
   virtual Status GetDefLevels(const int16_t** data, size_t* length) = 0;
   virtual Status GetRepLevels(const int16_t** data, size_t* length) = 0;
   virtual const std::shared_ptr<Field> field() = 0;
@@ -283,10 +303,10 @@ class PARQUET_NO_EXPORT PrimitiveImpl : public ColumnReader::ColumnReaderImpl {
     NextRowGroup();
   }
 
-  Status NextBatch(int64_t records_to_read, std::shared_ptr<Array>* out) override;
+  Status NextBatch(int64_t records_to_read, std::shared_ptr<ChunkedArray>* out) override;
 
   template <typename ParquetType>
-  Status WrapIntoListArray(std::shared_ptr<Array>* array);
+  Status WrapIntoListArray(Datum* inout_array);
 
   Status GetDefLevels(const int16_t** data, size_t* length) override;
   Status GetRepLevels(const int16_t** data, size_t* length) override;
@@ -314,7 +334,7 @@ class PARQUET_NO_EXPORT StructImpl : public ColumnReader::ColumnReaderImpl {
     InitField(node, children);
   }
 
-  Status NextBatch(int64_t records_to_read, std::shared_ptr<Array>* out) override;
+  Status NextBatch(int64_t records_to_read, std::shared_ptr<ChunkedArray>* out) override;
   Status GetDefLevels(const int16_t** data, size_t* length) override;
   Status GetRepLevels(const int16_t** data, size_t* length) override;
   const std::shared_ptr<Field> field() override { return field_; }
@@ -395,7 +415,7 @@ Status FileReader::Impl::GetReaderForNode(
   return Status::OK();
 }
 
-Status FileReader::Impl::ReadSchemaField(int i, std::shared_ptr<Array>* out) {
+Status FileReader::Impl::ReadSchemaField(int i, std::shared_ptr<ChunkedArray>* out) {
   std::vector<int> indices(reader_->metadata()->num_columns());
 
   for (size_t j = 0; j < indices.size(); ++j) {
@@ -406,7 +426,7 @@ Status FileReader::Impl::ReadSchemaField(int i, std::shared_ptr<Array>* out) {
 }
 
 Status FileReader::Impl::ReadSchemaField(int i, const std::vector<int>& indices,
-                                         std::shared_ptr<Array>* out) {
+                                         std::shared_ptr<ChunkedArray>* out) {
   auto parquet_schema = reader_->metadata()->schema();
 
   auto node = parquet_schema->group_node()->field(i).get();
@@ -432,7 +452,7 @@ Status FileReader::Impl::ReadSchemaField(int i, const std::vector<int>& indices,
   return reader->NextBatch(records_to_read, out);
 }
 
-Status FileReader::Impl::ReadColumn(int i, std::shared_ptr<Array>* out) {
+Status FileReader::Impl::ReadColumn(int i, std::shared_ptr<ChunkedArray>* out) {
   std::unique_ptr<ColumnReader> flat_column_reader;
   RETURN_NOT_OK(GetColumn(i, &flat_column_reader));
 
@@ -452,7 +472,7 @@ Status FileReader::Impl::GetSchema(const std::vector<int>& indices,
 }
 
 Status FileReader::Impl::ReadColumnChunk(int column_index, int row_group_index,
-                                         std::shared_ptr<Array>* out) {
+                                         std::shared_ptr<ChunkedArray>* out) {
   auto rg_metadata = reader_->metadata()->RowGroup(row_group_index);
   int64_t records_to_read = rg_metadata->ColumnChunk(column_index)->num_values();
 
@@ -463,10 +483,7 @@ Status FileReader::Impl::ReadColumnChunk(int column_index, int row_group_index,
       new PrimitiveImpl(pool_, std::move(input)));
   ColumnReader flat_column_reader(std::move(impl));
 
-  std::shared_ptr<Array> array;
-  RETURN_NOT_OK(flat_column_reader.NextBatch(records_to_read, &array));
-  *out = array;
-  return Status::OK();
+  return flat_column_reader.NextBatch(records_to_read, out);
 }
 
 Status FileReader::Impl::ReadRowGroup(int row_group_index,
@@ -485,7 +502,7 @@ Status FileReader::Impl::ReadRowGroup(int row_group_index,
   auto ReadColumnFunc = [&indices, &row_group_index, &schema, &columns, this](int i) {
     int column_index = indices[i];
 
-    std::shared_ptr<Array> array;
+    std::shared_ptr<ChunkedArray> array;
     RETURN_NOT_OK(ReadColumnChunk(column_index, row_group_index, &array));
     columns[i] = std::make_shared<Column>(schema->field(i), array);
     return Status::OK();
@@ -532,7 +549,7 @@ Status FileReader::Impl::ReadTable(const std::vector<int>& indices,
   std::vector<std::shared_ptr<Column>> columns(num_fields);
 
   auto ReadColumnFunc = [&indices, &field_indices, &schema, &columns, this](int i) {
-    std::shared_ptr<Array> array;
+    std::shared_ptr<ChunkedArray> array;
     RETURN_NOT_OK(ReadSchemaField(field_indices[i], indices, &array));
     columns[i] = std::make_shared<Column>(schema->field(i), array);
     return Status::OK();
@@ -576,8 +593,6 @@ Status FileReader::Impl::ReadTable(std::shared_ptr<Table>* table) {
 Status FileReader::Impl::ReadRowGroups(const std::vector<int>& row_groups,
                                        const std::vector<int>& indices,
                                        std::shared_ptr<Table>* table) {
-  // TODO(PARQUET-1393): Modify the record readers to already read this into a single,
-  // continuous array.
   std::vector<std::shared_ptr<Table>> tables(row_groups.size(), nullptr);
 
   for (size_t i = 0; i < row_groups.size(); ++i) {
@@ -633,7 +648,7 @@ Status FileReader::GetSchema(const std::vector<int>& indices,
   return impl_->GetSchema(indices, out);
 }
 
-Status FileReader::ReadColumn(int i, std::shared_ptr<Array>* out) {
+Status FileReader::ReadColumn(int i, std::shared_ptr<ChunkedArray>* out) {
   try {
     return impl_->ReadColumn(i, out);
   } catch (const ::parquet::ParquetException& e) {
@@ -641,7 +656,7 @@ Status FileReader::ReadColumn(int i, std::shared_ptr<Array>* out) {
   }
 }
 
-Status FileReader::ReadSchemaField(int i, std::shared_ptr<Array>* out) {
+Status FileReader::ReadSchemaField(int i, std::shared_ptr<ChunkedArray>* out) {
   try {
     return impl_->ReadSchemaField(i, out);
   } catch (const ::parquet::ParquetException& e) {
@@ -649,6 +664,18 @@ Status FileReader::ReadSchemaField(int i, std::shared_ptr<Array>* out) {
   }
 }
 
+Status FileReader::ReadColumn(int i, std::shared_ptr<Array>* out) {
+  std::shared_ptr<ChunkedArray> chunked_out;
+  RETURN_NOT_OK(ReadColumn(i, &chunked_out));
+  return GetSingleChunk(*chunked_out, out);
+}
+
+Status FileReader::ReadSchemaField(int i, std::shared_ptr<Array>* out) {
+  std::shared_ptr<ChunkedArray> chunked_out;
+  RETURN_NOT_OK(ReadSchemaField(i, &chunked_out));
+  return GetSingleChunk(*chunked_out, out);
+}
+
 Status FileReader::GetRecordBatchReader(const std::vector<int>& row_group_indices,
                                         std::shared_ptr<RecordBatchReader>* out) {
   std::vector<int> indices(impl_->num_columns());
@@ -671,10 +698,8 @@ Status FileReader::GetRecordBatchReader(const std::vector<int>& row_group_indice
   int max_num = num_row_groups();
   for (auto row_group_index : row_group_indices) {
     if (row_group_index < 0 || row_group_index >= max_num) {
-      std::ostringstream ss;
-      ss << "Some index in row_group_indices is " << row_group_index
-         << ", which is either < 0 or >= num_row_groups(" << max_num << ")";
-      return Status::Invalid(ss.str());
+      return Status::Invalid("Some index in row_group_indices is ", row_group_index,
+                             ", which is either < 0 or >= num_row_groups(", max_num, ")");
     }
   }
 
@@ -764,7 +789,28 @@ const ParquetFileReader* FileReader::parquet_reader() const {
 }
 
 template <typename ParquetType>
-Status PrimitiveImpl::WrapIntoListArray(std::shared_ptr<Array>* array) {
+Status PrimitiveImpl::WrapIntoListArray(Datum* inout_array) {
+  if (descr_->max_repetition_level() == 0) {
+    // Flat, no action
+    return Status::OK();
+  }
+
+  std::shared_ptr<Array> flat_array;
+
+  // ARROW-3762(wesm): If inout_array is a chunked array, we reject as this is
+  // not yet implemented
+  if (inout_array->kind() == Datum::CHUNKED_ARRAY) {
+    if (inout_array->chunked_array()->num_chunks() > 1) {
+      return Status::NotImplemented(
+          "Nested data conversions not implemented for "
+          "chunked array outputs");
+    }
+    flat_array = inout_array->chunked_array()->chunk(0);
+  } else {
+    DCHECK_EQ(Datum::ARRAY, inout_array->kind());
+    flat_array = inout_array->make_array();
+  }
+
   const int16_t* def_levels = record_reader_->def_levels();
   const int16_t* rep_levels = record_reader_->rep_levels();
   const int64_t total_levels_read = record_reader_->levels_position();
@@ -775,110 +821,106 @@ Status PrimitiveImpl::WrapIntoListArray(std::shared_ptr<Array>* array) {
                                   &arrow_schema));
   std::shared_ptr<Field> current_field = arrow_schema->field(0);
 
-  if (descr_->max_repetition_level() > 0) {
-    // Walk downwards to extract nullability
-    std::vector<bool> nullable;
-    std::vector<std::shared_ptr<::arrow::Int32Builder>> offset_builders;
-    std::vector<std::shared_ptr<::arrow::BooleanBuilder>> valid_bits_builders;
-    nullable.push_back(current_field->nullable());
-    while (current_field->type()->num_children() > 0) {
-      if (current_field->type()->num_children() > 1) {
-        return Status::NotImplemented(
-            "Fields with more than one child are not supported.");
-      } else {
-        if (current_field->type()->id() != ::arrow::Type::LIST) {
-          return Status::NotImplemented(
-              "Currently only nesting with Lists is supported.");
-        }
-        current_field = current_field->type()->child(0);
+  // Walk downwards to extract nullability
+  std::vector<bool> nullable;
+  std::vector<std::shared_ptr<::arrow::Int32Builder>> offset_builders;
+  std::vector<std::shared_ptr<::arrow::BooleanBuilder>> valid_bits_builders;
+  nullable.push_back(current_field->nullable());
+  while (current_field->type()->num_children() > 0) {
+    if (current_field->type()->num_children() > 1) {
+      return Status::NotImplemented("Fields with more than one child are not supported.");
+    } else {
+      if (current_field->type()->id() != ::arrow::Type::LIST) {
+        return Status::NotImplemented("Currently only nesting with Lists is supported.");
       }
-      offset_builders.emplace_back(
-          std::make_shared<::arrow::Int32Builder>(::arrow::int32(), pool_));
-      valid_bits_builders.emplace_back(
-          std::make_shared<::arrow::BooleanBuilder>(::arrow::boolean(), pool_));
-      nullable.push_back(current_field->nullable());
+      current_field = current_field->type()->child(0);
     }
+    offset_builders.emplace_back(
+        std::make_shared<::arrow::Int32Builder>(::arrow::int32(), pool_));
+    valid_bits_builders.emplace_back(
+        std::make_shared<::arrow::BooleanBuilder>(::arrow::boolean(), pool_));
+    nullable.push_back(current_field->nullable());
+  }
 
-    int64_t list_depth = offset_builders.size();
-    // This describes the minimal definition that describes a level that
-    // reflects a value in the primitive values array.
-    int16_t values_def_level = descr_->max_definition_level();
-    if (nullable[nullable.size() - 1]) {
-      values_def_level--;
-    }
+  int64_t list_depth = offset_builders.size();
+  // This describes the minimal definition that describes a level that
+  // reflects a value in the primitive values array.
+  int16_t values_def_level = descr_->max_definition_level();
+  if (nullable[nullable.size() - 1]) {
+    values_def_level--;
+  }
 
-    // The definition levels that are needed so that a list is declared
-    // as empty and not null.
-    std::vector<int16_t> empty_def_level(list_depth);
-    int def_level = 0;
-    for (int i = 0; i < list_depth; i++) {
-      if (nullable[i]) {
-        def_level++;
-      }
-      empty_def_level[i] = static_cast<int16_t>(def_level);
+  // The definition levels that are needed so that a list is declared
+  // as empty and not null.
+  std::vector<int16_t> empty_def_level(list_depth);
+  int def_level = 0;
+  for (int i = 0; i < list_depth; i++) {
+    if (nullable[i]) {
       def_level++;
     }
+    empty_def_level[i] = static_cast<int16_t>(def_level);
+    def_level++;
+  }
 
-    int32_t values_offset = 0;
-    std::vector<int64_t> null_counts(list_depth, 0);
-    for (int64_t i = 0; i < total_levels_read; i++) {
-      int16_t rep_level = rep_levels[i];
-      if (rep_level < descr_->max_repetition_level()) {
-        for (int64_t j = rep_level; j < list_depth; j++) {
-          if (j == (list_depth - 1)) {
-            RETURN_NOT_OK(offset_builders[j]->Append(values_offset));
-          } else {
-            RETURN_NOT_OK(offset_builders[j]->Append(
-                static_cast<int32_t>(offset_builders[j + 1]->length())));
-          }
+  int32_t values_offset = 0;
+  std::vector<int64_t> null_counts(list_depth, 0);
+  for (int64_t i = 0; i < total_levels_read; i++) {
+    int16_t rep_level = rep_levels[i];
+    if (rep_level < descr_->max_repetition_level()) {
+      for (int64_t j = rep_level; j < list_depth; j++) {
+        if (j == (list_depth - 1)) {
+          RETURN_NOT_OK(offset_builders[j]->Append(values_offset));
+        } else {
+          RETURN_NOT_OK(offset_builders[j]->Append(
+              static_cast<int32_t>(offset_builders[j + 1]->length())));
+        }
 
-          if (((empty_def_level[j] - 1) == def_levels[i]) && (nullable[j])) {
-            RETURN_NOT_OK(valid_bits_builders[j]->Append(false));
-            null_counts[j]++;
+        if (((empty_def_level[j] - 1) == def_levels[i]) && (nullable[j])) {
+          RETURN_NOT_OK(valid_bits_builders[j]->Append(false));
+          null_counts[j]++;
+          break;
+        } else {
+          RETURN_NOT_OK(valid_bits_builders[j]->Append(true));
+          if (empty_def_level[j] == def_levels[i]) {
             break;
-          } else {
-            RETURN_NOT_OK(valid_bits_builders[j]->Append(true));
-            if (empty_def_level[j] == def_levels[i]) {
-              break;
-            }
           }
         }
       }
-      if (def_levels[i] >= values_def_level) {
-        values_offset++;
-      }
     }
-    // Add the final offset to all lists
-    for (int64_t j = 0; j < list_depth; j++) {
-      if (j == (list_depth - 1)) {
-        RETURN_NOT_OK(offset_builders[j]->Append(values_offset));
-      } else {
-        RETURN_NOT_OK(offset_builders[j]->Append(
-            static_cast<int32_t>(offset_builders[j + 1]->length())));
-      }
+    if (def_levels[i] >= values_def_level) {
+      values_offset++;
     }
-
-    std::vector<std::shared_ptr<Buffer>> offsets;
-    std::vector<std::shared_ptr<Buffer>> valid_bits;
-    std::vector<int64_t> list_lengths;
-    for (int64_t j = 0; j < list_depth; j++) {
-      list_lengths.push_back(offset_builders[j]->length() - 1);
-      std::shared_ptr<Array> array;
-      RETURN_NOT_OK(offset_builders[j]->Finish(&array));
-      offsets.emplace_back(std::static_pointer_cast<Int32Array>(array)->values());
-      RETURN_NOT_OK(valid_bits_builders[j]->Finish(&array));
-      valid_bits.emplace_back(std::static_pointer_cast<BooleanArray>(array)->values());
+  }
+  // Add the final offset to all lists
+  for (int64_t j = 0; j < list_depth; j++) {
+    if (j == (list_depth - 1)) {
+      RETURN_NOT_OK(offset_builders[j]->Append(values_offset));
+    } else {
+      RETURN_NOT_OK(offset_builders[j]->Append(
+          static_cast<int32_t>(offset_builders[j + 1]->length())));
     }
+  }
 
-    std::shared_ptr<Array> output(*array);
-    for (int64_t j = list_depth - 1; j >= 0; j--) {
-      auto list_type =
-          ::arrow::list(::arrow::field("item", output->type(), nullable[j + 1]));
-      output = std::make_shared<::arrow::ListArray>(
-          list_type, list_lengths[j], offsets[j], output, valid_bits[j], null_counts[j]);
-    }
-    *array = output;
+  std::vector<std::shared_ptr<Buffer>> offsets;
+  std::vector<std::shared_ptr<Buffer>> valid_bits;
+  std::vector<int64_t> list_lengths;
+  for (int64_t j = 0; j < list_depth; j++) {
+    list_lengths.push_back(offset_builders[j]->length() - 1);
+    std::shared_ptr<Array> array;
+    RETURN_NOT_OK(offset_builders[j]->Finish(&array));
+    offsets.emplace_back(std::static_pointer_cast<Int32Array>(array)->values());
+    RETURN_NOT_OK(valid_bits_builders[j]->Finish(&array));
+    valid_bits.emplace_back(std::static_pointer_cast<BooleanArray>(array)->values());
+  }
+
+  std::shared_ptr<Array> output = flat_array;
+  for (int64_t j = list_depth - 1; j >= 0; j--) {
+    auto list_type =
+        ::arrow::list(::arrow::field("item", output->type(), nullable[j + 1]));
+    output = std::make_shared<::arrow::ListArray>(list_type, list_lengths[j], offsets[j],
+                                                  output, valid_bits[j], null_counts[j]);
   }
+  *inout_array = output;
   return Status::OK();
 }
 
@@ -909,8 +951,7 @@ struct TransferFunctor {
   using ParquetCType = typename ParquetType::c_type;
 
   Status operator()(RecordReader* reader, MemoryPool* pool,
-                    const std::shared_ptr<::arrow::DataType>& type,
-                    std::shared_ptr<Array>* out) {
+                    const std::shared_ptr<::arrow::DataType>& type, Datum* out) {
     static_assert(!std::is_same<ArrowType, ::arrow::Int32Type>::value,
                   "The fast path transfer functor should be used "
                   "for primitive values");
@@ -938,8 +979,7 @@ template <typename ArrowType, typename ParquetType>
 struct TransferFunctor<ArrowType, ParquetType,
                        supports_fast_path<ArrowType, ParquetType>> {
   Status operator()(RecordReader* reader, MemoryPool* pool,
-                    const std::shared_ptr<::arrow::DataType>& type,
-                    std::shared_ptr<Array>* out) {
+                    const std::shared_ptr<::arrow::DataType>& type, Datum* out) {
     int64_t length = reader->values_written();
     std::shared_ptr<ResizableBuffer> values = reader->ReleaseValues();
 
@@ -957,8 +997,7 @@ struct TransferFunctor<ArrowType, ParquetType,
 template <>
 struct TransferFunctor<::arrow::BooleanType, BooleanType> {
   Status operator()(RecordReader* reader, MemoryPool* pool,
-                    const std::shared_ptr<::arrow::DataType>& type,
-                    std::shared_ptr<Array>* out) {
+                    const std::shared_ptr<::arrow::DataType>& type, Datum* out) {
     int64_t length = reader->values_written();
     std::shared_ptr<Buffer> data;
 
@@ -991,8 +1030,7 @@ struct TransferFunctor<::arrow::BooleanType, BooleanType> {
 template <>
 struct TransferFunctor<::arrow::TimestampType, Int96Type> {
   Status operator()(RecordReader* reader, MemoryPool* pool,
-                    const std::shared_ptr<::arrow::DataType>& type,
-                    std::shared_ptr<Array>* out) {
+                    const std::shared_ptr<::arrow::DataType>& type, Datum* out) {
     int64_t length = reader->values_written();
     auto values = reinterpret_cast<const Int96*>(reader->values());
 
@@ -1001,7 +1039,7 @@ struct TransferFunctor<::arrow::TimestampType, Int96Type> {
 
     auto data_ptr = reinterpret_cast<int64_t*>(data->mutable_data());
     for (int64_t i = 0; i < length; i++) {
-      *data_ptr++ = impala_timestamp_to_nanoseconds(values[i]);
+      *data_ptr++ = Int96GetNanoSeconds(values[i]);
     }
 
     if (reader->nullable_values()) {
@@ -1019,8 +1057,7 @@ struct TransferFunctor<::arrow::TimestampType, Int96Type> {
 template <>
 struct TransferFunctor<::arrow::Date64Type, Int32Type> {
   Status operator()(RecordReader* reader, MemoryPool* pool,
-                    const std::shared_ptr<::arrow::DataType>& type,
-                    std::shared_ptr<Array>* out) {
+                    const std::shared_ptr<::arrow::DataType>& type, Datum* out) {
     int64_t length = reader->values_written();
     auto values = reinterpret_cast<const int32_t*>(reader->values());
 
@@ -1029,7 +1066,7 @@ struct TransferFunctor<::arrow::Date64Type, Int32Type> {
     auto out_ptr = reinterpret_cast<int64_t*>(data->mutable_data());
 
     for (int64_t i = 0; i < length; i++) {
-      *out_ptr++ = static_cast<int64_t>(values[i]) * kMillisecondsInADay;
+      *out_ptr++ = static_cast<int64_t>(values[i]) * kMillisecondsPerDay;
     }
 
     if (reader->nullable_values()) {
@@ -1046,26 +1083,29 @@ struct TransferFunctor<::arrow::Date64Type, Int32Type> {
 template <typename ArrowType, typename ParquetType>
 struct TransferFunctor<
     ArrowType, ParquetType,
-    typename std::enable_if<std::is_same<ParquetType, ByteArrayType>::value ||
-                            std::is_same<ParquetType, FLBAType>::value>::type> {
+    typename std::enable_if<
+        (std::is_base_of<::arrow::BinaryType, ArrowType>::value ||
+         std::is_same<::arrow::FixedSizeBinaryType, ArrowType>::value) &&
+        (std::is_same<ParquetType, ByteArrayType>::value ||
+         std::is_same<ParquetType, FLBAType>::value)>::type> {
   Status operator()(RecordReader* reader, MemoryPool* pool,
-                    const std::shared_ptr<::arrow::DataType>& type,
-                    std::shared_ptr<Array>* out) {
-    RETURN_NOT_OK(reader->builder()->Finish(out));
+                    const std::shared_ptr<::arrow::DataType>& type, Datum* out) {
+    std::vector<std::shared_ptr<Array>> chunks = reader->GetBuilderChunks();
 
     if (type->id() == ::arrow::Type::STRING) {
       // Convert from BINARY type to STRING
-      auto new_data = (*out)->data()->Copy();
-      new_data->type = type;
-      *out = ::arrow::MakeArray(new_data);
+      for (size_t i = 0; i < chunks.size(); ++i) {
+        auto new_data = chunks[i]->data()->Copy();
+        new_data->type = type;
+        chunks[i] = ::arrow::MakeArray(new_data);
+      }
     }
+    *out = std::make_shared<ChunkedArray>(chunks);
     return Status::OK();
   }
 };
 
 static uint64_t BytesToInteger(const uint8_t* bytes, int32_t start, int32_t stop) {
-  using ::arrow::BitUtil::FromBigEndian;
-
   const int32_t length = stop - start;
 
   DCHECK_GE(length, 0);
@@ -1121,37 +1161,54 @@ static constexpr int32_t kMaxDecimalBytes = 16;
 
 /// \brief Convert a sequence of big-endian bytes to one int64_t (high bits) and one
 /// uint64_t (low bits).
-static void BytesToIntegerPair(const uint8_t* bytes,
-                               const int32_t total_number_of_bytes_used, int64_t* high,
-                               uint64_t* low) {
-  DCHECK_GE(total_number_of_bytes_used, kMinDecimalBytes);
-  DCHECK_LE(total_number_of_bytes_used, kMaxDecimalBytes);
-
-  /// Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
-  /// sign bit.
-  const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;
+static void BytesToIntegerPair(const uint8_t* bytes, const int32_t length,
+                               int64_t* out_high, uint64_t* out_low) {
+  DCHECK_GE(length, kMinDecimalBytes);
+  DCHECK_LE(length, kMaxDecimalBytes);
 
-  /// Sign extend the low bits if necessary
-  *low = UINT64_MAX * (is_negative && total_number_of_bytes_used < 8);
-  *high = -1 * (is_negative && total_number_of_bytes_used < kMaxDecimalBytes);
+  // XXX This code is copied from Decimal::FromBigEndian
 
-  /// Stop byte of the high bytes
-  const int32_t high_bits_offset = std::max(0, total_number_of_bytes_used - 8);
+  int64_t high, low;
 
-  /// Shift left enough bits to make room for the incoming int64_t
-  *high <<= high_bits_offset * CHAR_BIT;
+  // Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
+  // sign bit.
+  const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;
 
-  /// Preserve the upper bits by inplace OR-ing the int64_t
-  *high |= BytesToInteger(bytes, 0, high_bits_offset);
+  // 1. Extract the high bytes
+  // Stop byte of the high bytes
+  const int32_t high_bits_offset = std::max(0, length - 8);
+  const auto high_bits = BytesToInteger(bytes, 0, high_bits_offset);
 
-  /// Stop byte of the low bytes
-  const int32_t low_bits_offset = std::min(total_number_of_bytes_used, 8);
+  if (high_bits_offset == 8) {
+    // Avoid undefined shift by 64 below
+    high = high_bits;
+  } else {
+    high = -1 * (is_negative && length < kMaxDecimalBytes);
+    // Shift left enough bits to make room for the incoming int64_t
+    high = SafeLeftShift(high, high_bits_offset * CHAR_BIT);
+    // Preserve the upper bits by inplace OR-ing the int64_t
+    high |= high_bits;
+  }
+
+  // 2. Extract the low bytes
+  // Stop byte of the low bytes
+  const int32_t low_bits_offset = std::min(length, 8);
+  const auto low_bits = BytesToInteger(bytes, high_bits_offset, length);
 
-  /// Shift left enough bits to make room for the incoming uint64_t
-  *low <<= low_bits_offset * CHAR_BIT;
+  if (low_bits_offset == 8) {
+    // Avoid undefined shift by 64 below
+    low = low_bits;
+  } else {
+    // Sign extend the low bits if necessary
+    low = -1 * (is_negative && length < 8);
+    // Shift left enough bits to make room for the incoming int64_t
+    low = SafeLeftShift(low, low_bits_offset * CHAR_BIT);
+    // Preserve the upper bits by inplace OR-ing the int64_t
+    low |= low_bits;
+  }
 
-  /// Preserve the upper bits by inplace OR-ing the uint64_t
-  *low |= BytesToInteger(bytes, high_bits_offset, total_number_of_bytes_used);
+  *out_high = high;
+  *out_low = static_cast<uint64_t>(low);
 }
 
 static inline void RawBytesToDecimalBytes(const uint8_t* value, int32_t byte_width,
@@ -1166,121 +1223,133 @@ static inline void RawBytesToDecimalBytes(const uint8_t* value, int32_t byte_wid
   BytesToIntegerPair(value, byte_width, high, low);
 }
 
-/// \brief Convert an array of FixedLenByteArrays to an arrow::Decimal128Array
-/// We do this by:
-/// 1. Creating a arrow::FixedSizeBinaryArray from the RecordReader's builder
-/// 2. Allocating a buffer for the arrow::Decimal128Array
-/// 3. Converting the big-endian bytes in the FixedSizeBinaryArray to two integers
-///    representing the high and low bits of each decimal value.
+// ----------------------------------------------------------------------
+// BYTE_ARRAY / FIXED_LEN_BYTE_ARRAY -> Decimal128
+
+template <typename T>
+Status ConvertToDecimal128(const Array& array, const std::shared_ptr<::arrow::DataType>&,
+                           MemoryPool* pool, std::shared_ptr<Array>*) {
+  return Status::NotImplemented("not implemented");
+}
+
 template <>
-struct TransferFunctor<::arrow::Decimal128Type, FLBAType> {
-  Status operator()(RecordReader* reader, MemoryPool* pool,
-                    const std::shared_ptr<::arrow::DataType>& type,
-                    std::shared_ptr<Array>* out) {
-    DCHECK_EQ(type->id(), ::arrow::Type::DECIMAL);
+Status ConvertToDecimal128<FLBAType>(const Array& array,
+                                     const std::shared_ptr<::arrow::DataType>& type,
+                                     MemoryPool* pool, std::shared_ptr<Array>* out) {
+  const auto& fixed_size_binary_array =
+      static_cast<const ::arrow::FixedSizeBinaryArray&>(array);
 
-    // Finish the built data into a temporary array
-    std::shared_ptr<Array> array;
-    RETURN_NOT_OK(reader->builder()->Finish(&array));
-    const auto& fixed_size_binary_array =
-        static_cast<const ::arrow::FixedSizeBinaryArray&>(*array);
+  // The byte width of each decimal value
+  const int32_t type_length =
+      static_cast<const ::arrow::Decimal128Type&>(*type).byte_width();
 
-    // Get the byte width of the values in the FixedSizeBinaryArray. Most of the time
-    // this will be different from the decimal array width because we write the minimum
-    // number of bytes necessary to represent a given precision
-    const int32_t byte_width =
-        static_cast<const ::arrow::FixedSizeBinaryType&>(*fixed_size_binary_array.type())
-            .byte_width();
+  // number of elements in the entire array
+  const int64_t length = fixed_size_binary_array.length();
 
-    // The byte width of each decimal value
-    const int32_t type_length =
-        static_cast<const ::arrow::Decimal128Type&>(*type).byte_width();
+  // Get the byte width of the values in the FixedSizeBinaryArray. Most of the time
+  // this will be different from the decimal array width because we write the minimum
+  // number of bytes necessary to represent a given precision
+  const int32_t byte_width =
+      static_cast<const ::arrow::FixedSizeBinaryType&>(*fixed_size_binary_array.type())
+          .byte_width();
 
-    // number of elements in the entire array
-    const int64_t length = fixed_size_binary_array.length();
+  // allocate memory for the decimal array
+  std::shared_ptr<Buffer> data;
+  RETURN_NOT_OK(::arrow::AllocateBuffer(pool, length * type_length, &data));
 
-    // allocate memory for the decimal array
-    std::shared_ptr<Buffer> data;
-    RETURN_NOT_OK(::arrow::AllocateBuffer(pool, length * type_length, &data));
-
-    // raw bytes that we can write to
-    uint8_t* out_ptr = data->mutable_data();
-
-    // convert each FixedSizeBinary value to valid decimal bytes
-    const int64_t null_count = fixed_size_binary_array.null_count();
-    if (null_count > 0) {
-      for (int64_t i = 0; i < length; ++i, out_ptr += type_length) {
-        if (!fixed_size_binary_array.IsNull(i)) {
-          RawBytesToDecimalBytes(fixed_size_binary_array.GetValue(i), byte_width,
-                                 out_ptr);
-        }
-      }
-    } else {
-      for (int64_t i = 0; i < length; ++i, out_ptr += type_length) {
+  // raw bytes that we can write to
+  uint8_t* out_ptr = data->mutable_data();
+
+  // convert each FixedSizeBinary value to valid decimal bytes
+  const int64_t null_count = fixed_size_binary_array.null_count();
+  if (null_count > 0) {
+    for (int64_t i = 0; i < length; ++i, out_ptr += type_length) {
+      if (!fixed_size_binary_array.IsNull(i)) {
         RawBytesToDecimalBytes(fixed_size_binary_array.GetValue(i), byte_width, out_ptr);
       }
     }
-
-    *out = std::make_shared<::arrow::Decimal128Array>(
-        type, length, data, fixed_size_binary_array.null_bitmap(), null_count);
-    return Status::OK();
+  } else {
+    for (int64_t i = 0; i < length; ++i, out_ptr += type_length) {
+      RawBytesToDecimalBytes(fixed_size_binary_array.GetValue(i), byte_width, out_ptr);
+    }
   }
-};
 
-/// \brief Convert an arrow::BinaryArray to an arrow::Decimal128Array
-/// We do this by:
-/// 1. Creating an arrow::BinaryArray from the RecordReader's builder
-/// 2. Allocating a buffer for the arrow::Decimal128Array
-/// 3. Converting the big-endian bytes in each BinaryArray entry to two integers
-///    representing the high and low bits of each decimal value.
-template <>
-struct TransferFunctor<::arrow::Decimal128Type, ByteArrayType> {
-  Status operator()(RecordReader* reader, MemoryPool* pool,
-                    const std::shared_ptr<::arrow::DataType>& type,
-                    std::shared_ptr<Array>* out) {
-    DCHECK_EQ(type->id(), ::arrow::Type::DECIMAL);
+  *out = std::make_shared<::arrow::Decimal128Array>(
+      type, length, data, fixed_size_binary_array.null_bitmap(), null_count);
 
-    // Finish the built data into a temporary array
-    std::shared_ptr<Array> array;
-    RETURN_NOT_OK(reader->builder()->Finish(&array));
-    const auto& binary_array = static_cast<const ::arrow::BinaryArray&>(*array);
+  return Status::OK();
+}
 
-    const int64_t length = binary_array.length();
+template <>
+Status ConvertToDecimal128<ByteArrayType>(const Array& array,
+                                          const std::shared_ptr<::arrow::DataType>& type,
+                                          MemoryPool* pool, std::shared_ptr<Array>* out) {
+  const auto& binary_array = static_cast<const ::arrow::BinaryArray&>(array);
+  const int64_t length = binary_array.length();
 
-    const auto& decimal_type = static_cast<const ::arrow::Decimal128Type&>(*type);
-    const int64_t type_length = decimal_type.byte_width();
+  const auto& decimal_type = static_cast<const ::arrow::Decimal128Type&>(*type);
+  const int64_t type_length = decimal_type.byte_width();
 
-    std::shared_ptr<Buffer> data;
-    RETURN_NOT_OK(::arrow::AllocateBuffer(pool, length * type_length, &data));
+  std::shared_ptr<Buffer> data;
+  RETURN_NOT_OK(::arrow::AllocateBuffer(pool, length * type_length, &data));
 
-    // raw bytes that we can write to
-    uint8_t* out_ptr = data->mutable_data();
+  // raw bytes that we can write to
+  uint8_t* out_ptr = data->mutable_data();
 
-    const int64_t null_count = binary_array.null_count();
+  const int64_t null_count = binary_array.null_count();
 
-    // convert each BinaryArray value to valid decimal bytes
-    for (int64_t i = 0; i < length; i++, out_ptr += type_length) {
-      int32_t record_len = 0;
-      const uint8_t* record_loc = binary_array.GetValue(i, &record_len);
+  // convert each BinaryArray value to valid decimal bytes
+  for (int64_t i = 0; i < length; i++, out_ptr += type_length) {
+    int32_t record_len = 0;
+    const uint8_t* record_loc = binary_array.GetValue(i, &record_len);
 
-      if ((record_len < 0) || (record_len > type_length)) {
-        return Status::Invalid("Invalid BYTE_ARRAY size");
-      }
+    if ((record_len < 0) || (record_len > type_length)) {
+      return Status::Invalid("Invalid BYTE_ARRAY size");
+    }
 
-      auto out_ptr_view = reinterpret_cast<uint64_t*>(out_ptr);
-      out_ptr_view[0] = 0;
-      out_ptr_view[1] = 0;
+    auto out_ptr_view = reinterpret_cast<uint64_t*>(out_ptr);
+    out_ptr_view[0] = 0;
+    out_ptr_view[1] = 0;
 
-      // only convert rows that are not null if there are nulls, or
-      // all rows, if there are not
-      if (((null_count > 0) && !binary_array.IsNull(i)) || (null_count <= 0)) {
-        RawBytesToDecimalBytes(record_loc, record_len, out_ptr);
-      }
+    // only convert rows that are not null if there are nulls, or
+    // all rows, if there are not
+    if (((null_count > 0) && !binary_array.IsNull(i)) || (null_count <= 0)) {
+      RawBytesToDecimalBytes(record_loc, record_len, out_ptr);
     }
+  }
+
+  *out = std::make_shared<::arrow::Decimal128Array>(
+      type, length, data, binary_array.null_bitmap(), null_count);
+  return Status::OK();
+}
+
+/// \brief Convert an arrow::BinaryArray to an arrow::Decimal128Array
+/// We do this by:
+/// 1. Creating an arrow::BinaryArray from the RecordReader's builder
+/// 2. Allocating a buffer for the arrow::Decimal128Array
+/// 3. Converting the big-endian bytes in each BinaryArray entry to two integers
+///    representing the high and low bits of each decimal value.
+template <typename ArrowType, typename ParquetType>
+struct TransferFunctor<
+    ArrowType, ParquetType,
+    typename std::enable_if<std::is_same<ArrowType, ::arrow::Decimal128Type>::value &&
+                            (std::is_same<ParquetType, ByteArrayType>::value ||
+                             std::is_same<ParquetType, FLBAType>::value)>::type> {
+  Status operator()(RecordReader* reader, MemoryPool* pool,
+                    const std::shared_ptr<::arrow::DataType>& type, Datum* out) {
+    DCHECK_EQ(type->id(), ::arrow::Type::DECIMAL);
 
-    *out = std::make_shared<::arrow::Decimal128Array>(
-        type, length, data, binary_array.null_bitmap(), null_count);
+    ::arrow::ArrayVector chunks = reader->GetBuilderChunks();
 
+    for (size_t i = 0; i < chunks.size(); ++i) {
+      std::shared_ptr<Array> chunk_as_decimal;
+      RETURN_NOT_OK(
+          ConvertToDecimal128<ParquetType>(*chunks[i], type, pool, &chunk_as_decimal));
+
+      // Replace the chunk, which will hopefully also free memory as we go
+      chunks[i] = chunk_as_decimal;
+    }
+    *out = std::make_shared<ChunkedArray>(chunks);
     return Status::OK();
   }
 };
@@ -1295,7 +1364,7 @@ template <typename ParquetIntegerType,
               std::is_same<ParquetIntegerType, Int64Type>::value>::type>
 static Status DecimalIntegerTransfer(RecordReader* reader, MemoryPool* pool,
                                      const std::shared_ptr<::arrow::DataType>& type,
-                                     std::shared_ptr<Array>* out) {
+                                     Datum* out) {
   DCHECK_EQ(type->id(), ::arrow::Type::DECIMAL);
 
   const int64_t length = reader->values_written();
@@ -1342,8 +1411,7 @@ static Status DecimalIntegerTransfer(RecordReader* reader, MemoryPool* pool,
 template <>
 struct TransferFunctor<::arrow::Decimal128Type, Int32Type> {
   Status operator()(RecordReader* reader, MemoryPool* pool,
-                    const std::shared_ptr<::arrow::DataType>& type,
-                    std::shared_ptr<Array>* out) {
+                    const std::shared_ptr<::arrow::DataType>& type, Datum* out) {
     return DecimalIntegerTransfer<Int32Type>(reader, pool, type, out);
   }
 };
@@ -1351,23 +1419,23 @@ struct TransferFunctor<::arrow::Decimal128Type, Int32Type> {
 template <>
 struct TransferFunctor<::arrow::Decimal128Type, Int64Type> {
   Status operator()(RecordReader* reader, MemoryPool* pool,
-                    const std::shared_ptr<::arrow::DataType>& type,
-                    std::shared_ptr<Array>* out) {
+                    const std::shared_ptr<::arrow::DataType>& type, Datum* out) {
     return DecimalIntegerTransfer<Int64Type>(reader, pool, type, out);
   }
 };
 
-#define TRANSFER_DATA(ArrowType, ParquetType)                            \
-  TransferFunctor<ArrowType, ParquetType> func;                          \
-  RETURN_NOT_OK(func(record_reader_.get(), pool_, field_->type(), out)); \
-  RETURN_NOT_OK(WrapIntoListArray<ParquetType>(out))
+#define TRANSFER_DATA(ArrowType, ParquetType)                                \
+  TransferFunctor<ArrowType, ParquetType> func;                              \
+  RETURN_NOT_OK(func(record_reader_.get(), pool_, field_->type(), &result)); \
+  RETURN_NOT_OK(WrapIntoListArray<ParquetType>(&result))
 
 #define TRANSFER_CASE(ENUM, ArrowType, ParquetType) \
   case ::arrow::Type::ENUM: {                       \
     TRANSFER_DATA(ArrowType, ParquetType);          \
   } break;
 
-Status PrimitiveImpl::NextBatch(int64_t records_to_read, std::shared_ptr<Array>* out) {
+Status PrimitiveImpl::NextBatch(int64_t records_to_read,
+                                std::shared_ptr<ChunkedArray>* out) {
   try {
     // Pre-allocation gives much better performance for flat columns
     record_reader_->Reserve(records_to_read);
@@ -1387,6 +1455,7 @@ Status PrimitiveImpl::NextBatch(int64_t records_to_read, std::shared_ptr<Array>*
     return ::arrow::Status::IOError(e.what());
   }
 
+  Datum result;
   switch (field_->type()->id()) {
     TRANSFER_CASE(BOOL, ::arrow::BooleanType, BooleanType)
     TRANSFER_CASE(UINT8, ::arrow::UInt8Type, Int32Type)
@@ -1405,8 +1474,8 @@ Status PrimitiveImpl::NextBatch(int64_t records_to_read, std::shared_ptr<Array>*
     TRANSFER_CASE(DATE64, ::arrow::Date64Type, Int32Type)
     TRANSFER_CASE(FIXED_SIZE_BINARY, ::arrow::FixedSizeBinaryType, FLBAType)
     case ::arrow::Type::NA: {
-      *out = std::make_shared<::arrow::NullArray>(record_reader_->values_written());
-      RETURN_NOT_OK(WrapIntoListArray<Int32Type>(out));
+      result = std::make_shared<::arrow::NullArray>(record_reader_->values_written());
+      RETURN_NOT_OK(WrapIntoListArray<Int32Type>(&result));
       break;
     }
     case ::arrow::Type::DECIMAL: {
@@ -1447,11 +1516,19 @@ Status PrimitiveImpl::NextBatch(int64_t records_to_read, std::shared_ptr<Array>*
       TRANSFER_CASE(TIME32, ::arrow::Time32Type, Int32Type)
       TRANSFER_CASE(TIME64, ::arrow::Time64Type, Int64Type)
     default:
-      std::stringstream ss;
-      ss << "No support for reading columns of type " << field_->type()->ToString();
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("No support for reading columns of type ",
+                                    field_->type()->ToString());
   }
 
+  DCHECK_NE(result.kind(), Datum::NONE);
+
+  if (result.kind() == Datum::ARRAY) {
+    *out = std::make_shared<ChunkedArray>(result.make_array());
+  } else if (result.kind() == Datum::CHUNKED_ARRAY) {
+    *out = result.chunked_array();
+  } else {
+    DCHECK(false) << "Should be impossible";
+  }
   return Status::OK();
 }
 
@@ -1477,10 +1554,17 @@ ColumnReader::ColumnReader(std::unique_ptr<ColumnReaderImpl> impl)
 
 ColumnReader::~ColumnReader() {}
 
-Status ColumnReader::NextBatch(int64_t records_to_read, std::shared_ptr<Array>* out) {
+Status ColumnReader::NextBatch(int64_t records_to_read,
+                               std::shared_ptr<ChunkedArray>* out) {
   return impl_->NextBatch(records_to_read, out);
 }
 
+Status ColumnReader::NextBatch(int64_t records_to_read, std::shared_ptr<Array>* out) {
+  std::shared_ptr<ChunkedArray> chunked_out;
+  RETURN_NOT_OK(impl_->NextBatch(records_to_read, &chunked_out));
+  return GetSingleChunk(*chunked_out, out);
+}
+
 // StructImpl methods
 
 Status StructImpl::DefLevelsToNullArray(std::shared_ptr<Buffer>* null_bitmap_out,
@@ -1565,17 +1649,21 @@ Status StructImpl::GetRepLevels(const int16_t** data, size_t* length) {
   return Status::NotImplemented("GetRepLevels is not implemented for struct");
 }
 
-Status StructImpl::NextBatch(int64_t records_to_read, std::shared_ptr<Array>* out) {
+Status StructImpl::NextBatch(int64_t records_to_read,
+                             std::shared_ptr<ChunkedArray>* out) {
   std::vector<std::shared_ptr<Array>> children_arrays;
   std::shared_ptr<Buffer> null_bitmap;
   int64_t null_count;
 
   // Gather children arrays and def levels
   for (auto& child : children_) {
-    std::shared_ptr<Array> child_array;
+    std::shared_ptr<ChunkedArray> field;
+    RETURN_NOT_OK(child->NextBatch(records_to_read, &field));
 
-    RETURN_NOT_OK(child->NextBatch(records_to_read, &child_array));
-    children_arrays.push_back(child_array);
+    if (field->num_chunks() > 1) {
+      return Status::Invalid("Chunked field reads not yet supported with StructArray");
+    }
+    children_arrays.push_back(field->chunk(0));
   }
 
   RETURN_NOT_OK(DefLevelsToNullArray(&null_bitmap, &null_count));
@@ -1589,8 +1677,9 @@ Status StructImpl::NextBatch(int64_t records_to_read, std::shared_ptr<Array>* ou
     }
   }
 
-  *out = std::make_shared<StructArray>(field()->type(), struct_length, children_arrays,
-                                       null_bitmap, null_count);
+  auto result = std::make_shared<StructArray>(field()->type(), struct_length,
+                                              children_arrays, null_bitmap, null_count);
+  *out = std::make_shared<ChunkedArray>(result);
   return Status::OK();
 }
 
@@ -1613,10 +1702,16 @@ RowGroupReader::~RowGroupReader() {}
 RowGroupReader::RowGroupReader(FileReader::Impl* impl, int row_group_index)
     : impl_(impl), row_group_index_(row_group_index) {}
 
-Status ColumnChunkReader::Read(std::shared_ptr<::arrow::Array>* out) {
+Status ColumnChunkReader::Read(std::shared_ptr<::arrow::ChunkedArray>* out) {
   return impl_->ReadColumnChunk(column_index_, row_group_index_, out);
 }
 
+Status ColumnChunkReader::Read(std::shared_ptr<::arrow::Array>* out) {
+  std::shared_ptr<ChunkedArray> chunked_out;
+  RETURN_NOT_OK(impl_->ReadColumnChunk(column_index_, row_group_index_, &chunked_out));
+  return GetSingleChunk(*chunked_out, out);
+}
+
 ColumnChunkReader::~ColumnChunkReader() {}
 
 ColumnChunkReader::ColumnChunkReader(FileReader::Impl* impl, int row_group_index,
diff --git a/cpp/src/parquet/arrow/reader.h b/cpp/src/parquet/arrow/reader.h
index 2cd94ca28fdcb..5286e742b08c1 100644
--- a/cpp/src/parquet/arrow/reader.h
+++ b/cpp/src/parquet/arrow/reader.h
@@ -30,6 +30,7 @@
 namespace arrow {
 
 class Array;
+class ChunkedArray;
 class MemoryPool;
 class RecordBatchReader;
 class Schema;
@@ -125,6 +126,10 @@ class PARQUET_EXPORT FileReader {
                             std::shared_ptr<::arrow::Schema>* out);
 
   // Read column as a whole into an Array.
+  ::arrow::Status ReadColumn(int i, std::shared_ptr<::arrow::ChunkedArray>* out);
+
+  /// \note Deprecated since 0.12
+  ARROW_DEPRECATED("Use version with ChunkedArray output")
   ::arrow::Status ReadColumn(int i, std::shared_ptr<::arrow::Array>* out);
 
   // NOTE: Experimental API
@@ -139,27 +144,11 @@ class PARQUET_EXPORT FileReader {
   // 2 foo3
   //
   // i=0 will read the entire foo struct, i=1 the foo2 primitive column etc
-  ::arrow::Status ReadSchemaField(int i, std::shared_ptr<::arrow::Array>* out);
+  ::arrow::Status ReadSchemaField(int i, std::shared_ptr<::arrow::ChunkedArray>* out);
 
-  // NOTE: Experimental API
-  // Reads a specific top level schema field into an Array, while keeping only chosen
-  // leaf columns.
-  // The index i refers the index of the top level schema field, which may
-  // be nested or flat, and indices vector refers to the leaf column indices - e.g.
-  //
-  // i  indices
-  // 0  0        foo.bar
-  // 0  1        foo.bar.baz
-  // 0  2        foo.qux
-  // 1  3        foo2
-  // 2  4        foo3
-  //
-  // i=0 indices={0,2} will read a partial struct with foo.bar and foo.quox columns
-  // i=1 indices={3} will read foo2 column
-  // i=1 indices={2} will result in out=nullptr
-  // leaf indices which are unrelated to the schema field are ignored
-  ::arrow::Status ReadSchemaField(int i, const std::vector<int>& indices,
-                                  std::shared_ptr<::arrow::Array>* out);
+  /// \note Deprecated since 0.12
+  ARROW_DEPRECATED("Use version with ChunkedArray output")
+  ::arrow::Status ReadSchemaField(int i, std::shared_ptr<::arrow::Array>* out);
 
   /// \brief Return a RecordBatchReader of row groups selected from row_group_indices, the
   ///    ordering in row_group_indices matters.
@@ -248,6 +237,10 @@ class PARQUET_EXPORT RowGroupReader {
 
 class PARQUET_EXPORT ColumnChunkReader {
  public:
+  ::arrow::Status Read(std::shared_ptr<::arrow::ChunkedArray>* out);
+
+  /// \note Deprecated since 0.12
+  ARROW_DEPRECATED("Use version with ChunkedArray output")
   ::arrow::Status Read(std::shared_ptr<::arrow::Array>* out);
 
   virtual ~ColumnChunkReader();
@@ -281,6 +274,11 @@ class PARQUET_EXPORT ColumnReader {
   //
   // Returns Status::OK on a successful read, including if you have exhausted
   // the data available in the file.
+  ::arrow::Status NextBatch(int64_t batch_size,
+                            std::shared_ptr<::arrow::ChunkedArray>* out);
+
+  /// \note Deprecated since 0.12
+  ARROW_DEPRECATED("Use version with ChunkedArray output")
   ::arrow::Status NextBatch(int64_t batch_size, std::shared_ptr<::arrow::Array>* out);
 
  private:
diff --git a/cpp/src/parquet/arrow/record_reader.cc b/cpp/src/parquet/arrow/record_reader.cc
index ce6fa2a5b91bb..39945afc78298 100644
--- a/cpp/src/parquet/arrow/record_reader.cc
+++ b/cpp/src/parquet/arrow/record_reader.cc
@@ -1,4 +1,4 @@
-// licensed to the Apache Software Foundation (ASF) under one
+// Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
@@ -20,26 +20,22 @@
 #include <algorithm>
 #include <cstdint>
 #include <cstring>
+#include <iostream>
 #include <memory>
-#include <sstream>
 #include <unordered_map>
 #include <utility>
 
+#include "arrow/array.h"
 #include "arrow/buffer.h"
 #include "arrow/builder.h"
-#include "arrow/memory_pool.h"
-#include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/logging.h"
-#include "arrow/util/rle-encoding.h"
 
 #include "parquet/column_page.h"
 #include "parquet/column_reader.h"
-#include "parquet/encoding-internal.h"
 #include "parquet/encoding.h"
 #include "parquet/exception.h"
-#include "parquet/properties.h"
 #include "parquet/schema.h"
 #include "parquet/types.h"
 
@@ -50,15 +46,16 @@ namespace internal {
 
 namespace BitUtil = ::arrow::BitUtil;
 
-template <typename DType>
-class TypedRecordReader;
-
 // PLAIN_DICTIONARY is deprecated but used to be used as a dictionary index
 // encoding.
 static bool IsDictionaryIndexEncoding(const Encoding::type& e) {
   return e == Encoding::RLE_DICTIONARY || e == Encoding::PLAIN_DICTIONARY;
 }
 
+// The minimum number of repetition/definition levels to decode at a time, for
+// better vectorized performance when doing many smaller record reads
+constexpr int64_t kMinLevelBatchSize = 1024;
+
 class RecordReader::RecordReaderImpl {
  public:
   RecordReaderImpl(const ColumnDescriptor* descr, MemoryPool* pool)
@@ -75,26 +72,102 @@ class RecordReader::RecordReaderImpl {
         null_count_(0),
         levels_written_(0),
         levels_position_(0),
-        levels_capacity_(0) {
+        levels_capacity_(0),
+        uses_values_(!(descr->physical_type() == Type::BYTE_ARRAY)) {
     nullable_values_ = internal::HasSpacedValues(descr);
-    values_ = AllocateBuffer(pool);
+    if (uses_values_) {
+      values_ = AllocateBuffer(pool);
+    }
     valid_bits_ = AllocateBuffer(pool);
     def_levels_ = AllocateBuffer(pool);
     rep_levels_ = AllocateBuffer(pool);
-
-    if (descr->physical_type() == Type::BYTE_ARRAY) {
-      builder_.reset(new ::arrow::BinaryBuilder(pool));
-    } else if (descr->physical_type() == Type::FIXED_LEN_BYTE_ARRAY) {
-      int byte_width = descr->type_length();
-      std::shared_ptr<::arrow::DataType> type = ::arrow::fixed_size_binary(byte_width);
-      builder_.reset(new ::arrow::FixedSizeBinaryBuilder(type, pool));
-    }
     Reset();
   }
 
   virtual ~RecordReaderImpl() = default;
 
-  virtual int64_t ReadRecords(int64_t num_records) = 0;
+  virtual int64_t ReadRecordData(const int64_t num_records) = 0;
+
+  // Returns true if there are still values in this column.
+  bool HasNext() {
+    // Either there is no data page available yet, or the data page has been
+    // exhausted
+    if (num_buffered_values_ == 0 || num_decoded_values_ == num_buffered_values_) {
+      if (!ReadNewPage() || num_buffered_values_ == 0) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  int64_t ReadRecords(int64_t num_records) {
+    // Delimit records, then read values at the end
+    int64_t records_read = 0;
+
+    if (levels_position_ < levels_written_) {
+      records_read += ReadRecordData(num_records);
+    }
+
+    int64_t level_batch_size = std::max(kMinLevelBatchSize, num_records);
+
+    // If we are in the middle of a record, we continue until reaching the
+    // desired number of records or the end of the current record if we've found
+    // enough records
+    while (!at_record_start_ || records_read < num_records) {
+      // Is there more data to read in this row group?
+      if (!HasNext()) {
+        if (!at_record_start_) {
+          // We ended the row group while inside a record that we haven't seen
+          // the end of yet. So increment the record count for the last record in
+          // the row group
+          ++records_read;
+          at_record_start_ = true;
+        }
+        break;
+      }
+
+      /// We perform multiple batch reads until we either exhaust the row group
+      /// or observe the desired number of records
+      int64_t batch_size = std::min(level_batch_size, available_values_current_page());
+
+      // No more data in column
+      if (batch_size == 0) {
+        break;
+      }
+
+      if (max_def_level_ > 0) {
+        ReserveLevels(batch_size);
+
+        int16_t* def_levels = this->def_levels() + levels_written_;
+        int16_t* rep_levels = this->rep_levels() + levels_written_;
+
+        // Not present for non-repeated fields
+        int64_t levels_read = 0;
+        if (max_rep_level_ > 0) {
+          levels_read = ReadDefinitionLevels(batch_size, def_levels);
+          if (ReadRepetitionLevels(batch_size, rep_levels) != levels_read) {
+            throw ParquetException("Number of decoded rep / def levels did not match");
+          }
+        } else if (max_def_level_ > 0) {
+          levels_read = ReadDefinitionLevels(batch_size, def_levels);
+        }
+
+        // Exhausted column chunk
+        if (levels_read == 0) {
+          break;
+        }
+
+        levels_written_ += levels_read;
+        records_read += ReadRecordData(num_records - records_read);
+      } else {
+        // No repetition or definition levels
+        batch_size = std::min(num_records - records_read, batch_size);
+        records_read += ReadRecordData(batch_size);
+      }
+    }
+
+    return records_read;
+  }
 
   // Dictionary decoders must be reset when advancing row groups
   virtual void ResetDecoders() = 0;
@@ -132,9 +205,13 @@ class RecordReader::RecordReaderImpl {
   bool nullable_values() const { return nullable_values_; }
 
   std::shared_ptr<ResizableBuffer> ReleaseValues() {
-    auto result = values_;
-    values_ = AllocateBuffer(pool_);
-    return result;
+    if (uses_values_) {
+      auto result = values_;
+      values_ = AllocateBuffer(pool_);
+      return result;
+    } else {
+      return nullptr;
+    }
   }
 
   std::shared_ptr<ResizableBuffer> ReleaseIsValid() {
@@ -143,8 +220,6 @@ class RecordReader::RecordReaderImpl {
     return result;
   }
 
-  ::arrow::ArrayBuilder* builder() { return builder_.get(); }
-
   // Process written repetition/definition levels to reach the end of
   // records. Process no more levels than necessary to delimit the indicated
   // number of logical records. Updates internal state of RecordReader
@@ -248,7 +323,13 @@ class RecordReader::RecordReaderImpl {
       }
 
       int type_size = GetTypeByteSize(descr_->physical_type());
-      PARQUET_THROW_NOT_OK(values_->Resize(new_values_capacity * type_size, false));
+
+      // XXX(wesm): A hack to avoid memory allocation when reading directly
+      // into builder classes
+      if (uses_values_) {
+        PARQUET_THROW_NOT_OK(values_->Resize(new_values_capacity * type_size, false));
+      }
+
       values_capacity_ = new_values_capacity;
     }
     if (nullable_values_) {
@@ -289,13 +370,15 @@ class RecordReader::RecordReaderImpl {
 
     records_read_ = 0;
 
-    // Calling Finish on the builders also resets them
+    // Call Finish on the binary builders to reset them
   }
 
   void ResetValues() {
     if (values_written_ > 0) {
       // Resize to 0, but do not shrink to fit
-      PARQUET_THROW_NOT_OK(values_->Resize(0, false));
+      if (uses_values_) {
+        PARQUET_THROW_NOT_OK(values_->Resize(0, false));
+      }
       PARQUET_THROW_NOT_OK(valid_bits_->Resize(0, false));
       values_written_ = 0;
       values_capacity_ = 0;
@@ -303,7 +386,13 @@ class RecordReader::RecordReaderImpl {
     }
   }
 
+  virtual void DebugPrintState() = 0;
+
+  virtual std::vector<std::shared_ptr<::arrow::Array>> GetBuilderChunks() = 0;
+
  protected:
+  virtual bool ReadNewPage() = 0;
+
   const ColumnDescriptor* descr_;
   ::arrow::MemoryPool* pool_;
 
@@ -344,10 +433,10 @@ class RecordReader::RecordReaderImpl {
   int64_t levels_position_;
   int64_t levels_capacity_;
 
-  // TODO(wesm): ByteArray / FixedLenByteArray types
-  std::unique_ptr<::arrow::ArrayBuilder> builder_;
-
   std::shared_ptr<::arrow::ResizableBuffer> values_;
+  // In the case of false, don't allocate the values buffer (when we directly read into
+  // builder classes).
+  bool uses_values_;
 
   template <typename T>
   T* ValuesHead() {
@@ -359,17 +448,32 @@ class RecordReader::RecordReaderImpl {
   std::shared_ptr<::arrow::ResizableBuffer> rep_levels_;
 };
 
-// The minimum number of repetition/definition levels to decode at a time, for
-// better vectorized performance when doing many smaller record reads
-constexpr int64_t kMinLevelBatchSize = 1024;
+template <typename DType>
+struct RecordReaderTraits {
+  using BuilderType = ::arrow::ArrayBuilder;
+};
+
+template <>
+struct RecordReaderTraits<ByteArrayType> {
+  using BuilderType = ::arrow::internal::ChunkedBinaryBuilder;
+};
+
+template <>
+struct RecordReaderTraits<FLBAType> {
+  using BuilderType = ::arrow::FixedSizeBinaryBuilder;
+};
 
 template <typename DType>
 class TypedRecordReader : public RecordReader::RecordReaderImpl {
  public:
-  typedef typename DType::c_type T;
+  using T = typename DType::c_type;
 
-  TypedRecordReader(const ColumnDescriptor* schema, ::arrow::MemoryPool* pool)
-      : RecordReader::RecordReaderImpl(schema, pool), current_decoder_(nullptr) {}
+  using BuilderType = typename RecordReaderTraits<DType>::BuilderType;
+
+  TypedRecordReader(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool)
+      : RecordReader::RecordReaderImpl(descr, pool), current_decoder_(nullptr) {
+    InitializeBuilder();
+  }
 
   void ResetDecoders() override { decoders_.clear(); }
 
@@ -390,7 +494,7 @@ class TypedRecordReader : public RecordReader::RecordReaderImpl {
   }
 
   // Return number of logical records read
-  int64_t ReadRecordData(const int64_t num_records) {
+  int64_t ReadRecordData(const int64_t num_records) override {
     // Conservative upper bound
     const int64_t possible_num_values =
         std::max(num_records, levels_written_ - levels_position_);
@@ -434,115 +538,101 @@ class TypedRecordReader : public RecordReader::RecordReaderImpl {
     return records_read;
   }
 
-  // Returns true if there are still values in this column.
-  bool HasNext() {
-    // Either there is no data page available yet, or the data page has been
-    // exhausted
-    if (num_buffered_values_ == 0 || num_decoded_values_ == num_buffered_values_) {
-      if (!ReadNewPage() || num_buffered_values_ == 0) {
-        return false;
-      }
-    }
-    return true;
-  }
+  void DebugPrintState() override {
+    const int16_t* def_levels = this->def_levels();
+    const int16_t* rep_levels = this->rep_levels();
+    const int64_t total_levels_read = levels_position_;
 
-  int64_t ReadRecords(int64_t num_records) override {
-    // Delimit records, then read values at the end
-    int64_t records_read = 0;
+    const T* values = reinterpret_cast<const T*>(this->values());
 
-    if (levels_position_ < levels_written_) {
-      records_read += ReadRecordData(num_records);
+    std::cout << "def levels: ";
+    for (int64_t i = 0; i < total_levels_read; ++i) {
+      std::cout << def_levels[i] << " ";
     }
+    std::cout << std::endl;
 
-    int64_t level_batch_size = std::max(kMinLevelBatchSize, num_records);
-
-    // If we are in the middle of a record, we continue until reaching the
-    // desired number of records or the end of the current record if we've found
-    // enough records
-    while (!at_record_start_ || records_read < num_records) {
-      // Is there more data to read in this row group?
-      if (!HasNext()) {
-        if (!at_record_start_) {
-          // We ended the row group while inside a record that we haven't seen
-          // the end of yet. So increment the record count for the last record in
-          // the row group
-          ++records_read;
-          at_record_start_ = true;
-        }
-        break;
-      }
-
-      /// We perform multiple batch reads until we either exhaust the row group
-      /// or observe the desired number of records
-      int64_t batch_size = std::min(level_batch_size, available_values_current_page());
-
-      // No more data in column
-      if (batch_size == 0) {
-        break;
-      }
-
-      if (max_def_level_ > 0) {
-        ReserveLevels(batch_size);
-
-        int16_t* def_levels = this->def_levels() + levels_written_;
-        int16_t* rep_levels = this->rep_levels() + levels_written_;
-
-        // Not present for non-repeated fields
-        int64_t levels_read = 0;
-        if (max_rep_level_ > 0) {
-          levels_read = ReadDefinitionLevels(batch_size, def_levels);
-          if (ReadRepetitionLevels(batch_size, rep_levels) != levels_read) {
-            throw ParquetException("Number of decoded rep / def levels did not match");
-          }
-        } else if (max_def_level_ > 0) {
-          levels_read = ReadDefinitionLevels(batch_size, def_levels);
-        }
-
-        // Exhausted column chunk
-        if (levels_read == 0) {
-          break;
-        }
+    std::cout << "rep levels: ";
+    for (int64_t i = 0; i < total_levels_read; ++i) {
+      std::cout << rep_levels[i] << " ";
+    }
+    std::cout << std::endl;
 
-        levels_written_ += levels_read;
-        records_read += ReadRecordData(num_records - records_read);
-      } else {
-        // No repetition or definition levels
-        batch_size = std::min(num_records - records_read, batch_size);
-        records_read += ReadRecordData(batch_size);
-      }
+    std::cout << "values: ";
+    for (int64_t i = 0; i < this->values_written(); ++i) {
+      std::cout << values[i] << " ";
     }
+    std::cout << std::endl;
+  }
 
-    return records_read;
+  std::vector<std::shared_ptr<::arrow::Array>> GetBuilderChunks() override {
+    throw ParquetException("GetChunks only implemented for binary types");
   }
 
  private:
-  typedef Decoder<DType> DecoderType;
+  using DecoderType = typename EncodingTraits<DType>::Decoder;
 
   // Map of encoding type to the respective decoder object. For example, a
   // column chunk's data pages may include both dictionary-encoded and
   // plain-encoded data.
-  std::unordered_map<int, std::shared_ptr<DecoderType>> decoders_;
+  std::unordered_map<int, std::unique_ptr<DecoderType>> decoders_;
+
+  std::unique_ptr<BuilderType> builder_;
 
   DecoderType* current_decoder_;
 
   // Advance to the next data page
-  bool ReadNewPage();
+  bool ReadNewPage() override;
+
+  void InitializeBuilder() {}
 
   void ConfigureDictionary(const DictionaryPage* page);
 };
 
+// TODO(wesm): Implement these to some satisfaction
+template <>
+void TypedRecordReader<Int96Type>::DebugPrintState() {}
+
+template <>
+void TypedRecordReader<ByteArrayType>::DebugPrintState() {}
+
+template <>
+void TypedRecordReader<FLBAType>::DebugPrintState() {}
+
+template <>
+void TypedRecordReader<ByteArrayType>::InitializeBuilder() {
+  // Maximum of 16MB chunks
+  constexpr int32_t kBinaryChunksize = 1 << 24;
+  DCHECK_EQ(descr_->physical_type(), Type::BYTE_ARRAY);
+  builder_.reset(new ::arrow::internal::ChunkedBinaryBuilder(kBinaryChunksize, pool_));
+}
+
+template <>
+void TypedRecordReader<FLBAType>::InitializeBuilder() {
+  DCHECK_EQ(descr_->physical_type(), Type::FIXED_LEN_BYTE_ARRAY);
+  int byte_width = descr_->type_length();
+  std::shared_ptr<::arrow::DataType> type = ::arrow::fixed_size_binary(byte_width);
+  builder_.reset(new ::arrow::FixedSizeBinaryBuilder(type, pool_));
+}
+
+template <>
+::arrow::ArrayVector TypedRecordReader<ByteArrayType>::GetBuilderChunks() {
+  ::arrow::ArrayVector chunks;
+  PARQUET_THROW_NOT_OK(builder_->Finish(&chunks));
+  return chunks;
+}
+
+template <>
+::arrow::ArrayVector TypedRecordReader<FLBAType>::GetBuilderChunks() {
+  std::shared_ptr<::arrow::Array> chunk;
+  PARQUET_THROW_NOT_OK(builder_->Finish(&chunk));
+  return ::arrow::ArrayVector({chunk});
+}
+
 template <>
 inline void TypedRecordReader<ByteArrayType>::ReadValuesDense(int64_t values_to_read) {
-  auto values = ValuesHead<ByteArray>();
-  int64_t num_decoded =
-      current_decoder_->Decode(values, static_cast<int>(values_to_read));
+  int64_t num_decoded = current_decoder_->DecodeArrowNonNull(
+      static_cast<int>(values_to_read), builder_.get());
   DCHECK_EQ(num_decoded, values_to_read);
-
-  auto builder = static_cast<::arrow::BinaryBuilder*>(builder_.get());
-  for (int64_t i = 0; i < num_decoded; i++) {
-    PARQUET_THROW_NOT_OK(
-        builder->Append(values[i].ptr, static_cast<int32_t>(values[i].len)));
-  }
   ResetValues();
 }
 
@@ -553,9 +643,8 @@ inline void TypedRecordReader<FLBAType>::ReadValuesDense(int64_t values_to_read)
       current_decoder_->Decode(values, static_cast<int>(values_to_read));
   DCHECK_EQ(num_decoded, values_to_read);
 
-  auto builder = static_cast<::arrow::FixedSizeBinaryBuilder*>(builder_.get());
   for (int64_t i = 0; i < num_decoded; i++) {
-    PARQUET_THROW_NOT_OK(builder->Append(values[i].ptr));
+    PARQUET_THROW_NOT_OK(builder_->Append(values[i].ptr));
   }
   ResetValues();
 }
@@ -563,25 +652,10 @@ inline void TypedRecordReader<FLBAType>::ReadValuesDense(int64_t values_to_read)
 template <>
 inline void TypedRecordReader<ByteArrayType>::ReadValuesSpaced(int64_t values_to_read,
                                                                int64_t null_count) {
-  uint8_t* valid_bits = valid_bits_->mutable_data();
-  const int64_t valid_bits_offset = values_written_;
-  auto values = ValuesHead<ByteArray>();
-
-  int64_t num_decoded = current_decoder_->DecodeSpaced(
-      values, static_cast<int>(values_to_read), static_cast<int>(null_count), valid_bits,
-      valid_bits_offset);
+  int64_t num_decoded = current_decoder_->DecodeArrow(
+      static_cast<int>(values_to_read), static_cast<int>(null_count),
+      valid_bits_->mutable_data(), values_written_, builder_.get());
   DCHECK_EQ(num_decoded, values_to_read);
-
-  auto builder = static_cast<::arrow::BinaryBuilder*>(builder_.get());
-
-  for (int64_t i = 0; i < num_decoded; i++) {
-    if (::arrow::BitUtil::GetBit(valid_bits, valid_bits_offset + i)) {
-      PARQUET_THROW_NOT_OK(
-          builder->Append(values[i].ptr, static_cast<int32_t>(values[i].len)));
-    } else {
-      PARQUET_THROW_NOT_OK(builder->AppendNull());
-    }
-  }
   ResetValues();
 }
 
@@ -597,12 +671,11 @@ inline void TypedRecordReader<FLBAType>::ReadValuesSpaced(int64_t values_to_read
       valid_bits_offset);
   DCHECK_EQ(num_decoded, values_to_read);
 
-  auto builder = static_cast<::arrow::FixedSizeBinaryBuilder*>(builder_.get());
   for (int64_t i = 0; i < num_decoded; i++) {
     if (::arrow::BitUtil::GetBit(valid_bits, valid_bits_offset + i)) {
-      PARQUET_THROW_NOT_OK(builder->Append(values[i].ptr));
+      PARQUET_THROW_NOT_OK(builder_->Append(values[i].ptr));
     } else {
-      PARQUET_THROW_NOT_OK(builder->AppendNull());
+      PARQUET_THROW_NOT_OK(builder_->AppendNull());
     }
   }
   ResetValues();
@@ -623,8 +696,8 @@ inline void TypedRecordReader<DType>::ConfigureDictionary(const DictionaryPage*
 
   if (page->encoding() == Encoding::PLAIN_DICTIONARY ||
       page->encoding() == Encoding::PLAIN) {
-    PlainDecoder<DType> dictionary(descr_);
-    dictionary.SetData(page->num_values(), page->data(), page->size());
+    auto dictionary = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
+    dictionary->SetData(page->num_values(), page->data(), page->size());
 
     // The dictionary is fully decoded during DictionaryDecoder::Init, so the
     // DictionaryPage buffer is no longer required after this step
@@ -632,14 +705,16 @@ inline void TypedRecordReader<DType>::ConfigureDictionary(const DictionaryPage*
     // TODO(wesm): investigate whether this all-or-nothing decoding of the
     // dictionary makes sense and whether performance can be improved
 
-    auto decoder = std::make_shared<DictionaryDecoder<DType>>(descr_, pool_);
-    decoder->SetDict(&dictionary);
-    decoders_[encoding] = decoder;
+    std::unique_ptr<DictDecoder<DType>> decoder = MakeDictDecoder<DType>(descr_, pool_);
+    decoder->SetDict(dictionary.get());
+    decoders_[encoding] =
+        std::unique_ptr<DecoderType>(dynamic_cast<DecoderType*>(decoder.release()));
   } else {
     ParquetException::NYI("only plain dictionary encoding has been implemented");
   }
 
   current_decoder_ = decoders_[encoding].get();
+  DCHECK(current_decoder_);
 }
 
 template <typename DType>
@@ -705,6 +780,7 @@ bool TypedRecordReader<DType>::ReadNewPage() {
 
       auto it = decoders_.find(static_cast<int>(encoding));
       if (it != decoders_.end()) {
+        DCHECK(it->second.get() != nullptr);
         if (encoding == Encoding::RLE_DICTIONARY) {
           DCHECK(current_decoder_->encoding() == Encoding::RLE_DICTIONARY);
         }
@@ -712,9 +788,9 @@ bool TypedRecordReader<DType>::ReadNewPage() {
       } else {
         switch (encoding) {
           case Encoding::PLAIN: {
-            std::shared_ptr<DecoderType> decoder(new PlainDecoder<DType>(descr_));
-            decoders_[static_cast<int>(encoding)] = decoder;
+            auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
             current_decoder_ = decoder.get();
+            decoders_[static_cast<int>(encoding)] = std::move(decoder);
             break;
           }
           case Encoding::RLE_DICTIONARY:
@@ -768,8 +844,12 @@ std::shared_ptr<RecordReader> RecordReader::Make(const ColumnDescriptor* descr,
     case Type::FIXED_LEN_BYTE_ARRAY:
       return std::shared_ptr<RecordReader>(
           new RecordReader(new TypedRecordReader<FLBAType>(descr, pool)));
-    default:
-      DCHECK(false);
+    default: {
+      // PARQUET-1481: This can occur if the file is corrupt
+      std::stringstream ss;
+      ss << "Invalid physical column type: " << static_cast<int>(descr->physical_type());
+      throw ParquetException(ss.str());
+    }
   }
   // Unreachable code, but supress compiler warning
   return nullptr;
@@ -804,8 +884,6 @@ std::shared_ptr<ResizableBuffer> RecordReader::ReleaseIsValid() {
   return impl_->ReleaseIsValid();
 }
 
-::arrow::ArrayBuilder* RecordReader::builder() { return impl_->builder(); }
-
 int64_t RecordReader::values_written() const { return impl_->values_written(); }
 
 int64_t RecordReader::levels_position() const { return impl_->levels_position(); }
@@ -822,5 +900,11 @@ void RecordReader::SetPageReader(std::unique_ptr<PageReader> reader) {
   impl_->SetPageReader(std::move(reader));
 }
 
+::arrow::ArrayVector RecordReader::GetBuilderChunks() {
+  return impl_->GetBuilderChunks();
+}
+
+void RecordReader::DebugPrintState() { impl_->DebugPrintState(); }
+
 }  // namespace internal
 }  // namespace parquet
diff --git a/cpp/src/parquet/arrow/record_reader.h b/cpp/src/parquet/arrow/record_reader.h
index 8da0709997026..cc932c2865028 100644
--- a/cpp/src/parquet/arrow/record_reader.h
+++ b/cpp/src/parquet/arrow/record_reader.h
@@ -20,15 +20,15 @@
 
 #include <cstdint>
 #include <memory>
+#include <vector>
 
 #include "arrow/memory_pool.h"
 
-#include "parquet/util/macros.h"
 #include "parquet/util/memory.h"
 
 namespace arrow {
 
-class ArrayBuilder;
+class Array;
 
 }  // namespace arrow
 
@@ -77,7 +77,6 @@ class RecordReader {
 
   std::shared_ptr<ResizableBuffer> ReleaseValues();
   std::shared_ptr<ResizableBuffer> ReleaseIsValid();
-  ::arrow::ArrayBuilder* builder();
 
   /// \brief Number of values written including nulls (if any)
   int64_t values_written() const;
@@ -104,6 +103,11 @@ class RecordReader {
   /// \param[in] reader obtained from RowGroupReader::GetColumnPageReader
   void SetPageReader(std::unique_ptr<PageReader> reader);
 
+  void DebugPrintState();
+
+  // For BYTE_ARRAY, FIXED_LEN_BYTE_ARRAY types that may have chunked output
+  std::vector<std::shared_ptr<::arrow::Array>> GetBuilderChunks();
+
  private:
   std::unique_ptr<RecordReaderImpl> impl_;
   explicit RecordReader(RecordReaderImpl* impl);
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index d0014a6f3aa2a..f1ebad0e5667f 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -19,14 +19,20 @@
 
 #include <string>
 #include <unordered_set>
+#include <utility>
 #include <vector>
 
-#include "parquet/api/schema.h"
-#include "parquet/util/schema-util.h"
-
-#include "arrow/api.h"
+#include "arrow/array.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
 #include "arrow/util/logging.h"
 
+#include "parquet/arrow/writer.h"
+#include "parquet/exception.h"
+#include "parquet/properties.h"
+#include "parquet/types.h"
+#include "parquet/util/schema-util.h"
+
 using arrow::Field;
 using arrow::Status;
 
@@ -80,10 +86,9 @@ static Status FromFLBA(const PrimitiveNode& node, std::shared_ptr<ArrowType>* ou
       *out = MakeDecimal128Type(node);
       break;
     default:
-      std::stringstream ss;
-      ss << "Unhandled logical type " << LogicalTypeToString(node.logical_type())
-         << " for fixed-length binary array";
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("Unhandled logical type ",
+                                    LogicalTypeToString(node.logical_type()),
+                                    " for fixed-length binary array");
   }
 
   return Status::OK();
@@ -122,10 +127,9 @@ static Status FromInt32(const PrimitiveNode& node, std::shared_ptr<ArrowType>* o
       *out = MakeDecimal128Type(node);
       break;
     default:
-      std::stringstream ss;
-      ss << "Unhandled logical type " << LogicalTypeToString(node.logical_type())
-         << " for INT32";
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("Unhandled logical type ",
+                                    LogicalTypeToString(node.logical_type()),
+                                    " for INT32");
   }
   return Status::OK();
 }
@@ -154,10 +158,9 @@ static Status FromInt64(const PrimitiveNode& node, std::shared_ptr<ArrowType>* o
       *out = ::arrow::time64(::arrow::TimeUnit::MICRO);
       break;
     default:
-      std::stringstream ss;
-      ss << "Unhandled logical type " << LogicalTypeToString(node.logical_type())
-         << " for INT64";
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented("Unhandled logical type ",
+                                    LogicalTypeToString(node.logical_type()),
+                                    " for INT64");
   }
   return Status::OK();
 }
@@ -423,45 +426,66 @@ Status StructToNode(const std::shared_ptr<::arrow::StructType>& type,
   return Status::OK();
 }
 
+static LogicalType::type LogicalTypeFromArrowTimeUnit(::arrow::TimeUnit::type time_unit) {
+  switch (time_unit) {
+    case ::arrow::TimeUnit::MILLI:
+      return LogicalType::TIMESTAMP_MILLIS;
+    case ::arrow::TimeUnit::MICRO:
+      return LogicalType::TIMESTAMP_MICROS;
+    case ::arrow::TimeUnit::SECOND:
+    case ::arrow::TimeUnit::NANO:
+      // No equivalent parquet logical type.
+      break;
+  }
+
+  return LogicalType::NONE;
+}
+
 static Status GetTimestampMetadata(const ::arrow::TimestampType& type,
                                    const ArrowWriterProperties& properties,
                                    ParquetType::type* physical_type,
                                    LogicalType::type* logical_type) {
-  auto unit = type.unit();
-  *physical_type = ParquetType::INT64;
+  const bool coerce = properties.coerce_timestamps_enabled();
+  const auto unit = coerce ? properties.coerce_timestamps_unit() : type.unit();
 
-  if (properties.coerce_timestamps_enabled()) {
-    auto coerce_unit = properties.coerce_timestamps_unit();
-    if (coerce_unit == ::arrow::TimeUnit::MILLI) {
-      *logical_type = LogicalType::TIMESTAMP_MILLIS;
-    } else if (coerce_unit == ::arrow::TimeUnit::MICRO) {
-      *logical_type = LogicalType::TIMESTAMP_MICROS;
-    } else {
-      return Status::NotImplemented(
-          "Can only coerce Arrow timestamps to milliseconds"
-          " or microseconds");
+  // The user is explicitly asking for Impala int96 encoding, there is no
+  // logical type.
+  if (properties.support_deprecated_int96_timestamps()) {
+    *physical_type = ParquetType::INT96;
+    return Status::OK();
+  }
+
+  *physical_type = ParquetType::INT64;
+  *logical_type = LogicalTypeFromArrowTimeUnit(unit);
+
+  // The user is requesting that all timestamp columns are casted to a specific
+  // type. Only 2 TimeUnit are supported by arrow-parquet.
+  if (coerce) {
+    switch (unit) {
+      case ::arrow::TimeUnit::MILLI:
+      case ::arrow::TimeUnit::MICRO:
+        break;
+      case ::arrow::TimeUnit::NANO:
+      case ::arrow::TimeUnit::SECOND:
+        return Status::NotImplemented(
+            "Can only coerce Arrow timestamps to milliseconds"
+            " or microseconds");
     }
+
     return Status::OK();
   }
 
-  if (unit == ::arrow::TimeUnit::MILLI) {
-    *logical_type = LogicalType::TIMESTAMP_MILLIS;
-  } else if (unit == ::arrow::TimeUnit::MICRO) {
+  // Until ARROW-3729 is resolved, nanoseconds are explicitly converted to
+  // int64 microseconds when deprecated int96 is not requested.
+  if (type.unit() == ::arrow::TimeUnit::NANO)
     *logical_type = LogicalType::TIMESTAMP_MICROS;
-  } else if (unit == ::arrow::TimeUnit::NANO) {
-    if (properties.support_deprecated_int96_timestamps()) {
-      *physical_type = ParquetType::INT96;
-      // No corresponding logical type
-    } else {
-      *logical_type = LogicalType::TIMESTAMP_MICROS;
-    }
-  } else {
+  else if (type.unit() == ::arrow::TimeUnit::SECOND)
     return Status::NotImplemented(
         "Only MILLI, MICRO, and NANOS units supported for Arrow timestamps with "
         "Parquet.");
-  }
+
   return Status::OK();
-}
+}  // namespace arrow
 
 Status FieldToNode(const std::shared_ptr<Field>& field,
                    const WriterProperties& properties,
@@ -592,10 +616,9 @@ Status FieldToNode(const std::shared_ptr<Field>& field,
     }
     default: {
       // TODO: DENSE_UNION, SPARE_UNION, JSON_SCALAR, DECIMAL_TEXT, VARCHAR
-      std::stringstream ss;
-      ss << "Unhandled type for Arrow to Parquet schema conversion: ";
-      ss << field->type()->ToString();
-      return Status::NotImplemented(ss.str());
+      return Status::NotImplemented(
+          "Unhandled type for Arrow to Parquet schema conversion: ",
+          field->type()->ToString());
     }
   }
   PARQUET_CATCH_NOT_OK(*out =
@@ -698,7 +721,7 @@ int32_t DecimalSize(int32_t precision) {
   }
   DCHECK(false);
   return -1;
-}
+}  // namespace arrow
 
 }  // namespace arrow
 }  // namespace parquet
diff --git a/cpp/src/parquet/arrow/schema.h b/cpp/src/parquet/arrow/schema.h
index 649fe86120a18..0e65ed844eb58 100644
--- a/cpp/src/parquet/arrow/schema.h
+++ b/cpp/src/parquet/arrow/schema.h
@@ -22,15 +22,14 @@
 #include <memory>
 #include <vector>
 
-#include "arrow/api.h"
-
-#include "parquet/arrow/writer.h"
 #include "parquet/metadata.h"
 #include "parquet/schema.h"
 #include "parquet/util/visibility.h"
 
 namespace arrow {
 
+class Field;
+class Schema;
 class Status;
 
 }  // namespace arrow
diff --git a/cpp/src/parquet/arrow/test-util.h b/cpp/src/parquet/arrow/test-util.h
index d425cb0db7e48..abe4a03364e13 100644
--- a/cpp/src/parquet/arrow/test-util.h
+++ b/cpp/src/parquet/arrow/test-util.h
@@ -15,8 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#pragma once
+
 #include <limits>
 #include <memory>
+#include <random>
 #include <string>
 #include <utility>
 #include <vector>
@@ -28,14 +31,6 @@
 
 #include "parquet/arrow/record_reader.h"
 
-namespace arrow {
-// PARQUET-1382: backwards-compatible shim for arrow::test namespace
-namespace test {}
-
-using namespace ::arrow::test;  // NOLINT
-
-}  // namespace arrow
-
 namespace parquet {
 
 using internal::RecordReader;
@@ -144,9 +139,9 @@ NonNullArray(size_t size, std::shared_ptr<Array>* out) {
 
 static inline void random_decimals(int64_t n, uint32_t seed, int32_t precision,
                                    uint8_t* out) {
-  std::mt19937 gen(seed);
+  std::default_random_engine gen(seed);
   std::uniform_int_distribution<uint32_t> d(0, std::numeric_limits<uint8_t>::max());
-  const int32_t required_bytes = DecimalSize(precision);
+  const int32_t required_bytes = ::arrow::DecimalSize(precision);
   constexpr int32_t byte_width = 16;
   std::fill(out, out + byte_width * n, '\0');
 
@@ -433,14 +428,13 @@ Status MakeEmptyListsArray(int64_t size, std::shared_ptr<Array>* out_array) {
   return Status::OK();
 }
 
-static std::shared_ptr<::arrow::Column> MakeColumn(const std::string& name,
-                                                   const std::shared_ptr<Array>& array,
-                                                   bool nullable) {
+static inline std::shared_ptr<::arrow::Column> MakeColumn(
+    const std::string& name, const std::shared_ptr<Array>& array, bool nullable) {
   auto field = ::arrow::field(name, array->type(), nullable);
   return std::make_shared<::arrow::Column>(field, array);
 }
 
-static std::shared_ptr<::arrow::Column> MakeColumn(
+static inline std::shared_ptr<::arrow::Column> MakeColumn(
     const std::string& name, const std::vector<std::shared_ptr<Array>>& arrays,
     bool nullable) {
   auto field = ::arrow::field(name, arrays[0]->type(), nullable);
@@ -484,44 +478,6 @@ void ExpectArrayT<::arrow::BooleanType>(void* expected, Array* result) {
   EXPECT_TRUE(result->Equals(*expected_array));
 }
 
-template <typename ParquetType>
-void PrintBufferedLevels(const RecordReader& reader) {
-  using T = typename ::parquet::type_traits<ParquetType::type_num>::value_type;
-
-  const int16_t* def_levels = reader.def_levels();
-  const int16_t* rep_levels = reader.rep_levels();
-  const int64_t total_levels_read = reader.levels_position();
-
-  const T* values = reinterpret_cast<const T*>(reader.values());
-
-  std::cout << "def levels: ";
-  for (int64_t i = 0; i < total_levels_read; ++i) {
-    std::cout << def_levels[i] << " ";
-  }
-  std::cout << std::endl;
-
-  std::cout << "rep levels: ";
-  for (int64_t i = 0; i < total_levels_read; ++i) {
-    std::cout << rep_levels[i] << " ";
-  }
-  std::cout << std::endl;
-
-  std::cout << "values: ";
-  for (int64_t i = 0; i < reader.values_written(); ++i) {
-    std::cout << values[i] << " ";
-  }
-  std::cout << std::endl;
-}
-
-template <>
-void PrintBufferedLevels<ByteArrayType>(const RecordReader& reader) {}
-
-template <>
-void PrintBufferedLevels<FLBAType>(const RecordReader& reader) {}
-
-template <>
-void PrintBufferedLevels<Int96Type>(const RecordReader& reader) {}
-
 }  // namespace arrow
 
 }  // namespace parquet
diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc
index f5e234d30211e..6813880f3b0e3 100644
--- a/cpp/src/parquet/arrow/writer.cc
+++ b/cpp/src/parquet/arrow/writer.cc
@@ -18,17 +18,29 @@
 #include "parquet/arrow/writer.h"
 
 #include <algorithm>
-#include <string>
+#include <cstddef>
+#include <type_traits>
 #include <utility>
 #include <vector>
 
-#include "arrow/api.h"
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/builder.h"
 #include "arrow/compute/api.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
 #include "arrow/util/bit-util.h"
+#include "arrow/util/checked_cast.h"
 #include "arrow/visitor_inline.h"
 
 #include "arrow/util/logging.h"
+
 #include "parquet/arrow/schema.h"
+#include "parquet/column_writer.h"
+#include "parquet/exception.h"
+#include "parquet/file_writer.h"
+#include "parquet/schema.h"
+#include "parquet/util/memory.h"
 
 using arrow::Array;
 using arrow::BinaryArray;
@@ -312,6 +324,10 @@ class ArrowColumnWriter {
   Status Write(const Array& data);
 
   Status Write(const ChunkedArray& data, int64_t offset, const int64_t size) {
+    if (data.length() == 0) {
+      return Status::OK();
+    }
+
     int64_t absolute_position = 0;
     int chunk_index = 0;
     int64_t chunk_offset = 0;
@@ -386,7 +402,11 @@ class ArrowColumnWriter {
   Status WriteBatch(int64_t num_levels, const int16_t* def_levels,
                     const int16_t* rep_levels,
                     const typename ParquetType::c_type* values) {
-    auto typed_writer = static_cast<TypedColumnWriter<ParquetType>*>(writer_);
+    auto typed_writer =
+        ::arrow::internal::checked_cast<TypedColumnWriter<ParquetType>*>(writer_);
+    // WriteBatch was called with type mismatching the writer_'s type. This
+    // could be a schema conversion problem.
+    DCHECK(typed_writer);
     PARQUET_CATCH_NOT_OK(
         typed_writer->WriteBatch(num_levels, def_levels, rep_levels, values));
     return Status::OK();
@@ -397,7 +417,11 @@ class ArrowColumnWriter {
                           const int16_t* rep_levels, const uint8_t* valid_bits,
                           int64_t valid_bits_offset,
                           const typename ParquetType::c_type* values) {
-    auto typed_writer = static_cast<TypedColumnWriter<ParquetType>*>(writer_);
+    auto typed_writer =
+        ::arrow::internal::checked_cast<TypedColumnWriter<ParquetType>*>(writer_);
+    // WriteBatchSpaced was called with type mismatching the writer_'s type. This
+    // could be a schema conversion problem.
+    DCHECK(typed_writer);
     PARQUET_CATCH_NOT_OK(typed_writer->WriteBatchSpaced(
         num_levels, def_levels, rep_levels, valid_bits, valid_bits_offset, values));
     return Status::OK();
@@ -504,7 +528,7 @@ Status ArrowColumnWriter::WriteNullableBatch(
   using ParquetCType = typename ParquetType::c_type;
 
   ParquetCType* buffer;
-  RETURN_NOT_OK(ctx_->GetScratchData<ParquetCType>(num_levels, &buffer));
+  RETURN_NOT_OK(ctx_->GetScratchData<ParquetCType>(num_values, &buffer));
   for (int i = 0; i < num_values; i++) {
     buffer[i] = static_cast<ParquetCType>(values[i]);
   }
@@ -570,20 +594,42 @@ NULLABLE_BATCH_FAST_PATH(DoubleType, ::arrow::DoubleType, double)
 NULLABLE_BATCH_FAST_PATH(Int64Type, ::arrow::TimestampType, int64_t)
 NONNULLABLE_BATCH_FAST_PATH(Int64Type, ::arrow::TimestampType, int64_t)
 
+#define CONV_CASE_LOOP(ConversionFunction) \
+  for (int64_t i = 0; i < num_values; i++) \
+    ConversionFunction(arrow_values[i], &output[i]);
+
+static void ConvertArrowTimestampToParquetInt96(const int64_t* arrow_values,
+                                                int64_t num_values,
+                                                ::arrow::TimeUnit ::type unit_type,
+                                                Int96* output) {
+  switch (unit_type) {
+    case TimeUnit::NANO:
+      CONV_CASE_LOOP(internal::NanosecondsToImpalaTimestamp);
+      break;
+    case TimeUnit::MICRO:
+      CONV_CASE_LOOP(internal::MicrosecondsToImpalaTimestamp);
+      break;
+    case TimeUnit::MILLI:
+      CONV_CASE_LOOP(internal::MillisecondsToImpalaTimestamp);
+      break;
+    case TimeUnit::SECOND:
+      CONV_CASE_LOOP(internal::SecondsToImpalaTimestamp);
+      break;
+  }
+}
+
+#undef CONV_CASE_LOOP
+
 template <>
 Status ArrowColumnWriter::WriteNullableBatch<Int96Type, ::arrow::TimestampType>(
     const ::arrow::TimestampType& type, int64_t num_values, int64_t num_levels,
     const int16_t* def_levels, const int16_t* rep_levels, const uint8_t* valid_bits,
     int64_t valid_bits_offset, const int64_t* values) {
-  Int96* buffer;
+  Int96* buffer = nullptr;
   RETURN_NOT_OK(ctx_->GetScratchData<Int96>(num_values, &buffer));
-  if (type.unit() == TimeUnit::NANO) {
-    for (int i = 0; i < num_values; i++) {
-      internal::NanosecondsToImpalaTimestamp(values[i], &buffer[i]);
-    }
-  } else {
-    return Status::NotImplemented("Only NANO timestamps are supported for Int96 writing");
-  }
+
+  ConvertArrowTimestampToParquetInt96(values, num_values, type.unit(), buffer);
+
   return WriteBatchSpaced<Int96Type>(num_levels, def_levels, rep_levels, valid_bits,
                                      valid_bits_offset, buffer);
 }
@@ -592,15 +638,11 @@ template <>
 Status ArrowColumnWriter::WriteNonNullableBatch<Int96Type, ::arrow::TimestampType>(
     const ::arrow::TimestampType& type, int64_t num_values, int64_t num_levels,
     const int16_t* def_levels, const int16_t* rep_levels, const int64_t* values) {
-  Int96* buffer;
+  Int96* buffer = nullptr;
   RETURN_NOT_OK(ctx_->GetScratchData<Int96>(num_values, &buffer));
-  if (type.unit() == TimeUnit::NANO) {
-    for (int i = 0; i < num_values; i++) {
-      internal::NanosecondsToImpalaTimestamp(values[i], buffer + i);
-    }
-  } else {
-    return Status::NotImplemented("Only NANO timestamps are supported for Int96 writing");
-  }
+
+  ConvertArrowTimestampToParquetInt96(values, num_values, type.unit(), buffer);
+
   return WriteBatch<Int96Type>(num_levels, def_levels, rep_levels, buffer);
 }
 
@@ -611,21 +653,15 @@ Status ArrowColumnWriter::WriteTimestamps(const Array& values, int64_t num_level
 
   const bool is_nanosecond = type.unit() == TimeUnit::NANO;
 
-  // In the case where support_deprecated_int96_timestamps was specified
-  // and coerce_timestamps_enabled was specified, a nanosecond column
-  // will have a physical type of int64. In that case, we fall through
-  // to the else if below.
-  //
-  // See https://issues.apache.org/jira/browse/ARROW-2082
-  if (is_nanosecond && ctx_->properties->support_deprecated_int96_timestamps() &&
-      !ctx_->properties->coerce_timestamps_enabled()) {
+  if (ctx_->properties->support_deprecated_int96_timestamps()) {
+    // The user explicitly required to use Int96 storage.
     return TypedWriteBatch<Int96Type, ::arrow::TimestampType>(values, num_levels,
                                                               def_levels, rep_levels);
   } else if (is_nanosecond ||
              (ctx_->properties->coerce_timestamps_enabled() &&
               (type.unit() != ctx_->properties->coerce_timestamps_unit()))) {
     // Casting is required. This covers several cases
-    // * Nanoseconds -> cast to microseconds
+    // * Nanoseconds -> cast to microseconds (until ARROW-3729 is resolved)
     // * coerce_timestamps_enabled_, cast all timestamps to requested unit
     return WriteTimestampsCoerce(ctx_->properties->truncated_timestamps_allowed(), values,
                                  num_levels, def_levels, rep_levels);
@@ -656,10 +692,8 @@ Status ArrowColumnWriter::WriteTimestampsCoerce(const bool truncated_timestamps_
   auto DivideBy = [&](const int64_t factor) {
     for (int64_t i = 0; i < array.length(); i++) {
       if (!truncated_timestamps_allowed && !data.IsNull(i) && (values[i] % factor != 0)) {
-        std::stringstream ss;
-        ss << "Casting from " << type.ToString() << " to " << target_type->ToString()
-           << " would lose data: " << values[i];
-        return Status::Invalid(ss.str());
+        return Status::Invalid("Casting from ", type.ToString(), " to ",
+                               target_type->ToString(), " would lose data: ", values[i]);
       }
       buffer[i] = values[i] / factor;
     }
@@ -861,6 +895,11 @@ Status ArrowColumnWriter::TypedWriteBatch<FLBAType, ::arrow::Decimal128Type>(
 }
 
 Status ArrowColumnWriter::Write(const Array& data) {
+  if (data.length() == 0) {
+    // Write nothing when length is 0
+    return Status::OK();
+  }
+
   ::arrow::Type::type values_type;
   RETURN_NOT_OK(GetLeafType(*data.type(), &values_type));
 
@@ -925,9 +964,8 @@ Status ArrowColumnWriter::Write(const Array& data) {
     default:
       break;
   }
-  std::stringstream ss;
-  ss << "Data type not supported as list value: " << values_array->type()->ToString();
-  return Status::NotImplemented(ss.str());
+  return Status::NotImplemented("Data type not supported as list value: ",
+                                values_array->type()->ToString());
 }
 
 }  // namespace
@@ -1112,22 +1150,32 @@ Status WriteFileMetaData(const FileMetaData& file_metadata,
 namespace {}  // namespace
 
 Status FileWriter::WriteTable(const Table& table, int64_t chunk_size) {
-  if (chunk_size <= 0) {
+  if (chunk_size <= 0 && table.num_rows() > 0) {
     return Status::Invalid("chunk size per row_group must be greater than 0");
   } else if (chunk_size > impl_->properties().max_row_group_length()) {
     chunk_size = impl_->properties().max_row_group_length();
   }
 
-  for (int chunk = 0; chunk * chunk_size < table.num_rows(); chunk++) {
-    int64_t offset = chunk * chunk_size;
-    int64_t size = std::min(chunk_size, table.num_rows() - offset);
-
-    RETURN_NOT_OK_ELSE(NewRowGroup(size), PARQUET_IGNORE_NOT_OK(Close()));
+  auto WriteRowGroup = [&](int64_t offset, int64_t size) {
+    RETURN_NOT_OK(NewRowGroup(size));
     for (int i = 0; i < table.num_columns(); i++) {
       auto chunked_data = table.column(i)->data();
-      RETURN_NOT_OK_ELSE(WriteColumnChunk(chunked_data, offset, size),
-                         PARQUET_IGNORE_NOT_OK(Close()));
+      RETURN_NOT_OK(WriteColumnChunk(chunked_data, offset, size));
     }
+    return Status::OK();
+  };
+
+  if (table.num_rows() == 0) {
+    // Append a row group with 0 rows
+    RETURN_NOT_OK_ELSE(WriteRowGroup(0, 0), PARQUET_IGNORE_NOT_OK(Close()));
+    return Status::OK();
+  }
+
+  for (int chunk = 0; chunk * chunk_size < table.num_rows(); chunk++) {
+    int64_t offset = chunk * chunk_size;
+    RETURN_NOT_OK_ELSE(
+        WriteRowGroup(offset, std::min(chunk_size, table.num_rows() - offset)),
+        PARQUET_IGNORE_NOT_OK(Close()));
   }
   return Status::OK();
 }
diff --git a/cpp/src/parquet/arrow/writer.h b/cpp/src/parquet/arrow/writer.h
index 2538c028002e4..ab3d7e815cc9a 100644
--- a/cpp/src/parquet/arrow/writer.h
+++ b/cpp/src/parquet/arrow/writer.h
@@ -18,26 +18,37 @@
 #ifndef PARQUET_ARROW_WRITER_H
 #define PARQUET_ARROW_WRITER_H
 
+#include <cstdint>
 #include <memory>
 
-#include "parquet/api/schema.h"
-#include "parquet/api/writer.h"
+#include "parquet/properties.h"
+#include "parquet/types.h"
+#include "parquet/util/visibility.h"
 
-#include "arrow/io/interfaces.h"
 #include "arrow/type.h"
 
 namespace arrow {
 
 class Array;
+class ChunkedArray;
 class MemoryPool;
-class PrimitiveArray;
-class Schema;
 class Status;
-class StringArray;
 class Table;
+
+namespace io {
+
+class OutputStream;
+
+}  // namespace io
+
 }  // namespace arrow
 
 namespace parquet {
+
+class FileMetaData;
+class OutputStream;
+class ParquetFileWriter;
+
 namespace arrow {
 
 class PARQUET_EXPORT ArrowWriterProperties {
@@ -45,19 +56,19 @@ class PARQUET_EXPORT ArrowWriterProperties {
   class Builder {
    public:
     Builder()
-        : write_nanos_as_int96_(false),
+        : write_timestamps_as_int96_(false),
           coerce_timestamps_enabled_(false),
           coerce_timestamps_unit_(::arrow::TimeUnit::SECOND),
           truncated_timestamps_allowed_(false) {}
     virtual ~Builder() {}
 
     Builder* disable_deprecated_int96_timestamps() {
-      write_nanos_as_int96_ = false;
+      write_timestamps_as_int96_ = false;
       return this;
     }
 
     Builder* enable_deprecated_int96_timestamps() {
-      write_nanos_as_int96_ = true;
+      write_timestamps_as_int96_ = true;
       return this;
     }
 
@@ -79,19 +90,19 @@ class PARQUET_EXPORT ArrowWriterProperties {
 
     std::shared_ptr<ArrowWriterProperties> build() {
       return std::shared_ptr<ArrowWriterProperties>(new ArrowWriterProperties(
-          write_nanos_as_int96_, coerce_timestamps_enabled_, coerce_timestamps_unit_,
+          write_timestamps_as_int96_, coerce_timestamps_enabled_, coerce_timestamps_unit_,
           truncated_timestamps_allowed_));
     }
 
    private:
-    bool write_nanos_as_int96_;
+    bool write_timestamps_as_int96_;
 
     bool coerce_timestamps_enabled_;
     ::arrow::TimeUnit::type coerce_timestamps_unit_;
     bool truncated_timestamps_allowed_;
   };
 
-  bool support_deprecated_int96_timestamps() const { return write_nanos_as_int96_; }
+  bool support_deprecated_int96_timestamps() const { return write_timestamps_as_int96_; }
 
   bool coerce_timestamps_enabled() const { return coerce_timestamps_enabled_; }
   ::arrow::TimeUnit::type coerce_timestamps_unit() const {
@@ -105,12 +116,12 @@ class PARQUET_EXPORT ArrowWriterProperties {
                                  bool coerce_timestamps_enabled,
                                  ::arrow::TimeUnit::type coerce_timestamps_unit,
                                  bool truncated_timestamps_allowed)
-      : write_nanos_as_int96_(write_nanos_as_int96),
+      : write_timestamps_as_int96_(write_nanos_as_int96),
         coerce_timestamps_enabled_(coerce_timestamps_enabled),
         coerce_timestamps_unit_(coerce_timestamps_unit),
         truncated_timestamps_allowed_(truncated_timestamps_allowed) {}
 
-  const bool write_nanos_as_int96_;
+  const bool write_timestamps_as_int96_;
   const bool coerce_timestamps_enabled_;
   const ::arrow::TimeUnit::type coerce_timestamps_unit_;
   const bool truncated_timestamps_allowed_;
@@ -208,24 +219,52 @@ namespace internal {
  * Timestamp conversion constants
  */
 constexpr int64_t kJulianEpochOffsetDays = INT64_C(2440588);
-constexpr int64_t kNanosecondsPerDay = INT64_C(86400000000000);
 
-/**
- * Converts nanosecond timestamps to Impala (Int96) format
- */
-inline void NanosecondsToImpalaTimestamp(const int64_t nanoseconds,
-                                         Int96* impala_timestamp) {
-  int64_t julian_days = (nanoseconds / kNanosecondsPerDay) + kJulianEpochOffsetDays;
+template <int64_t UnitPerDay, int64_t NanosecondsPerUnit>
+inline void ArrowTimestampToImpalaTimestamp(const int64_t time, Int96* impala_timestamp) {
+  int64_t julian_days = (time / UnitPerDay) + kJulianEpochOffsetDays;
   (*impala_timestamp).value[2] = (uint32_t)julian_days;
 
-  int64_t last_day_nanos = nanoseconds % kNanosecondsPerDay;
+  int64_t last_day_units = time % UnitPerDay;
   int64_t* impala_last_day_nanos = reinterpret_cast<int64_t*>(impala_timestamp);
-  *impala_last_day_nanos = last_day_nanos;
+  *impala_last_day_nanos = last_day_units * NanosecondsPerUnit;
+}
+
+constexpr int64_t kSecondsInNanos = INT64_C(1000000000);
+
+inline void SecondsToImpalaTimestamp(const int64_t seconds, Int96* impala_timestamp) {
+  ArrowTimestampToImpalaTimestamp<kSecondsPerDay, kSecondsInNanos>(seconds,
+                                                                   impala_timestamp);
+}
+
+constexpr int64_t kMillisecondsInNanos = kSecondsInNanos / INT64_C(1000);
+
+inline void MillisecondsToImpalaTimestamp(const int64_t milliseconds,
+                                          Int96* impala_timestamp) {
+  ArrowTimestampToImpalaTimestamp<kMillisecondsPerDay, kMillisecondsInNanos>(
+      milliseconds, impala_timestamp);
+}
+
+constexpr int64_t kMicrosecondsInNanos = kMillisecondsInNanos / INT64_C(1000);
+
+inline void MicrosecondsToImpalaTimestamp(const int64_t microseconds,
+                                          Int96* impala_timestamp) {
+  ArrowTimestampToImpalaTimestamp<kMicrosecondsPerDay, kMicrosecondsInNanos>(
+      microseconds, impala_timestamp);
+}
+
+constexpr int64_t kNanosecondsInNanos = INT64_C(1);
+
+inline void NanosecondsToImpalaTimestamp(const int64_t nanoseconds,
+                                         Int96* impala_timestamp) {
+  ArrowTimestampToImpalaTimestamp<kNanosecondsPerDay, kNanosecondsInNanos>(
+      nanoseconds, impala_timestamp);
 }
 
 }  // namespace internal
 
 }  // namespace arrow
+
 }  // namespace parquet
 
 #endif  // PARQUET_ARROW_WRITER_H
diff --git a/cpp/src/parquet/bloom_filter-test.cc b/cpp/src/parquet/bloom_filter-test.cc
index 945f80b7b96f0..e2b0b699b203f 100644
--- a/cpp/src/parquet/bloom_filter-test.cc
+++ b/cpp/src/parquet/bloom_filter-test.cc
@@ -93,17 +93,13 @@ std::string GetRandomString(uint32_t length) {
   const std::string charset =
       "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
 
-  // The uuid_seed was generated by "uuidgen -r"
-  const std::string uuid_seed = "8de406aa-fb59-4195-a81c-5152af26433f";
-  std::seed_seq seed(uuid_seed.begin(), uuid_seed.end());
-  std::mt19937 generator(seed);
+  std::default_random_engine gen(42);
   std::uniform_int_distribution<uint32_t> dist(0, static_cast<int>(charset.size() - 1));
-  std::string ret = "";
+  std::string ret(length, 'x');
 
   for (uint32_t i = 0; i < length; i++) {
-    ret += charset[dist(generator)];
+    ret[i] = charset[dist(gen)];
   }
-
   return ret;
 }
 
@@ -146,7 +142,7 @@ TEST(FPPTest, TestBloomFilter) {
   }
 
   // The exist should be probably less than 1000 according default FPP 0.01.
-  EXPECT_TRUE(exist < total_count * fpp);
+  EXPECT_LT(exist, total_count * fpp);
 }
 
 // The CompatibilityTest is used to test cross compatibility with parquet-mr, it reads
diff --git a/cpp/src/parquet/bloom_filter.cc b/cpp/src/parquet/bloom_filter.cc
index 31a33fa782a7b..8f5f695fde71f 100644
--- a/cpp/src/parquet/bloom_filter.cc
+++ b/cpp/src/parquet/bloom_filter.cc
@@ -15,17 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>
-#include <cmath>
 #include <cstdint>
+#include <cstring>
 
-#include "arrow/status.h"
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/logging.h"
 #include "parquet/bloom_filter.h"
 #include "parquet/exception.h"
 #include "parquet/murmur3.h"
-#include "parquet/types.h"
 
 namespace parquet {
 constexpr uint32_t BlockSplitBloomFilter::SALT[kBitsSetPerBlock];
diff --git a/cpp/src/parquet/bloom_filter.h b/cpp/src/parquet/bloom_filter.h
index 918780e04971a..a586dc2dcced6 100644
--- a/cpp/src/parquet/bloom_filter.h
+++ b/cpp/src/parquet/bloom_filter.h
@@ -18,18 +18,24 @@
 #ifndef PARQUET_BLOOM_FILTER_H
 #define PARQUET_BLOOM_FILTER_H
 
+#include <cmath>
 #include <cstdint>
 #include <memory>
 
+#include "arrow/util/bit-util.h"
 #include "arrow/util/logging.h"
-#include "parquet/exception.h"
 #include "parquet/hasher.h"
 #include "parquet/types.h"
 #include "parquet/util/memory.h"
 #include "parquet/util/visibility.h"
 
+namespace arrow {
+
+class MemoryPool;
+
+}  // namespace arrow
+
 namespace parquet {
-class OutputStream;
 
 // A Bloom filter is a compact structure to indicate whether an item is not in a set or
 // probably in a set. The Bloom filter usually consists of a bit set that represents a
@@ -98,7 +104,8 @@ class PARQUET_EXPORT BloomFilter {
 
   /// Compute hash for fixed byte array value by using its plain encoding result.
   ///
-  /// @param value the value to hash.
+  /// @param value the value address.
+  /// @param len the value length.
   /// @return hash result.
   virtual uint64_t Hash(const FLBA* value, uint32_t len) const = 0;
 
@@ -154,11 +161,13 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter {
   static uint32_t OptimalNumOfBits(uint32_t ndv, double fpp) {
     DCHECK(fpp > 0.0 && fpp < 1.0);
     const double m = -8.0 * ndv / log(1 - pow(fpp, 1.0 / 8));
-    uint32_t num_bits = static_cast<uint32_t>(m);
+    uint32_t num_bits;
 
     // Handle overflow.
     if (m < 0 || m > kMaximumBloomFilterBytes << 3) {
       num_bits = static_cast<uint32_t>(kMaximumBloomFilterBytes << 3);
+    } else {
+      num_bits = static_cast<uint32_t>(m);
     }
 
     // Round up to lower bound
@@ -183,6 +192,7 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter {
   void InsertHash(uint64_t hash) override;
   void WriteTo(OutputStream* sink) const override;
   uint32_t GetBitsetSize() const override { return num_bytes_; }
+
   uint64_t Hash(int64_t value) const override { return hasher_->Hash(value); }
   uint64_t Hash(float value) const override { return hasher_->Hash(value); }
   uint64_t Hash(double value) const override { return hasher_->Hash(value); }
@@ -192,6 +202,7 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter {
   uint64_t Hash(const FLBA* value, uint32_t len) const override {
     return hasher_->Hash(value, len);
   }
+
   /// Deserialize the Bloom filter from an input stream. It is used when reconstructing
   /// a Bloom filter from a parquet filter.
   ///
diff --git a/cpp/src/parquet/column-io-benchmark.cc b/cpp/src/parquet/column-io-benchmark.cc
index 8f286f4910000..c648d562649d1 100644
--- a/cpp/src/parquet/column-io-benchmark.cc
+++ b/cpp/src/parquet/column-io-benchmark.cc
@@ -20,6 +20,7 @@
 #include "parquet/column_reader.h"
 #include "parquet/column_writer.h"
 #include "parquet/file_reader.h"
+#include "parquet/metadata.h"
 #include "parquet/thrift.h"
 #include "parquet/util/memory.h"
 
@@ -35,8 +36,8 @@ std::unique_ptr<Int64Writer> BuildWriter(int64_t output_size, OutputStream* dst,
                                          const WriterProperties* properties) {
   std::unique_ptr<PageWriter> pager =
       PageWriter::Open(dst, Compression::UNCOMPRESSED, metadata);
-  return std::unique_ptr<Int64Writer>(
-      new Int64Writer(metadata, std::move(pager), Encoding::PLAIN, properties));
+  return std::unique_ptr<Int64Writer>(new Int64Writer(
+      metadata, std::move(pager), false /*use_dictionary*/, Encoding::PLAIN, properties));
 }
 
 std::shared_ptr<ColumnDescriptor> Int64Schema(Repetition::type repetition) {
@@ -107,12 +108,13 @@ BENCHMARK_TEMPLATE(BM_WriteInt64Column, Repetition::OPTIONAL, Compression::ZSTD)
 BENCHMARK_TEMPLATE(BM_WriteInt64Column, Repetition::REPEATED, Compression::ZSTD)
     ->Range(1024, 65536);
 
-std::unique_ptr<Int64Reader> BuildReader(std::shared_ptr<Buffer>& buffer,
+std::shared_ptr<Int64Reader> BuildReader(std::shared_ptr<Buffer>& buffer,
                                          int64_t num_values, ColumnDescriptor* schema) {
   std::unique_ptr<InMemoryInputStream> source(new InMemoryInputStream(buffer));
   std::unique_ptr<PageReader> page_reader =
       PageReader::Open(std::move(source), num_values, Compression::UNCOMPRESSED);
-  return std::unique_ptr<Int64Reader>(new Int64Reader(schema, std::move(page_reader)));
+  return std::static_pointer_cast<Int64Reader>(
+      ColumnReader::Make(schema, std::move(page_reader)));
 }
 
 template <Repetition::type repetition,
@@ -140,7 +142,7 @@ static void BM_ReadInt64Column(::benchmark::State& state) {
   std::vector<int16_t> definition_levels_out(state.range(1));
   std::vector<int16_t> repetition_levels_out(state.range(1));
   while (state.KeepRunning()) {
-    std::unique_ptr<Int64Reader> reader = BuildReader(src, state.range(1), schema.get());
+    std::shared_ptr<Int64Reader> reader = BuildReader(src, state.range(1), schema.get());
     int64_t values_read = 0;
     for (size_t i = 0; i < values.size(); i += values_read) {
       reader->ReadBatch(values_out.size(), definition_levels_out.data(),
diff --git a/cpp/src/parquet/column_reader-test.cc b/cpp/src/parquet/column_reader-test.cc
index 273b3029ba3d1..0475ca591de02 100644
--- a/cpp/src/parquet/column_reader-test.cc
+++ b/cpp/src/parquet/column_reader-test.cc
@@ -102,7 +102,7 @@ class TestPrimitiveReader : public ::testing::Test {
           &vresult[0] + total_values_read, &values_read));
       total_values_read += static_cast<int>(values_read);
       batch_actual += batch;
-      batch_size = std::max(batch_size * 2, 4096);
+      batch_size = std::min(1 << 24, std::max(batch_size * 2, 4096));
     } while (batch > 0);
 
     ASSERT_EQ(num_levels_, batch_actual);
@@ -147,7 +147,7 @@ class TestPrimitiveReader : public ::testing::Test {
       total_values_read += batch - static_cast<int>(null_count);
       batch_actual += batch;
       levels_actual += static_cast<int>(levels_read);
-      batch_size = std::max(batch_size * 2, 4096);
+      batch_size = std::min(1 << 24, std::max(batch_size * 2, 4096));
     } while ((batch > 0) || (levels_read > 0));
 
     ASSERT_EQ(num_levels_, levels_actual);
@@ -386,5 +386,34 @@ TEST_F(TestPrimitiveReader, TestDictionaryEncodedPages) {
   pages_.clear();
 }
 
+TEST(TestColumnReader, DefinitionLevelsToBitmap) {
+  // Bugs in this function were exposed in ARROW-3930
+  std::vector<int16_t> def_levels = {3, 3, 3, 2, 3, 3, 3, 3, 3};
+  std::vector<int16_t> rep_levels = {0, 1, 1, 1, 1, 1, 1, 1, 1};
+
+  std::vector<uint8_t> valid_bits(2, 0);
+
+  const int max_def_level = 3;
+  const int max_rep_level = 1;
+
+  int64_t values_read = -1;
+  int64_t null_count = 0;
+  internal::DefinitionLevelsToBitmap(def_levels.data(), 9, max_def_level, max_rep_level,
+                                     &values_read, &null_count, valid_bits.data(),
+                                     0 /* valid_bits_offset */);
+  ASSERT_EQ(9, values_read);
+  ASSERT_EQ(1, null_count);
+
+  // Call again with 0 definition levels, make sure that valid_bits is unmodifed
+  const uint8_t current_byte = valid_bits[1];
+  null_count = 0;
+  internal::DefinitionLevelsToBitmap(def_levels.data(), 0, max_def_level, max_rep_level,
+                                     &values_read, &null_count, valid_bits.data(),
+                                     9 /* valid_bits_offset */);
+  ASSERT_EQ(0, values_read);
+  ASSERT_EQ(0, null_count);
+  ASSERT_EQ(current_byte, valid_bits[1]);
+}
+
 }  // namespace test
 }  // namespace parquet
diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index 7fbf9babd71fa..33d2f5cefb5f0 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -17,20 +17,22 @@
 
 #include "parquet/column_reader.h"
 
-#include <algorithm>
 #include <cstdint>
+#include <exception>
+#include <iostream>
 #include <memory>
-#include <utility>
 
-#include <arrow/buffer.h>
-#include <arrow/memory_pool.h>
-#include <arrow/util/bit-util.h>
-#include <arrow/util/compression.h>
-#include <arrow/util/rle-encoding.h>
+#include "arrow/buffer.h"
+#include "arrow/util/bit-stream-utils.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/compression.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/rle-encoding.h"
 
 #include "parquet/column_page.h"
-#include "parquet/encoding-internal.h"
+#include "parquet/encoding.h"
 #include "parquet/properties.h"
+#include "parquet/statistics.h"
 #include "parquet/thrift.h"
 
 using arrow::MemoryPool;
@@ -264,19 +266,300 @@ std::unique_ptr<PageReader> PageReader::Open(std::unique_ptr<InputStream> stream
 }
 
 // ----------------------------------------------------------------------
+// TypedColumnReader implementations
 
-ColumnReader::ColumnReader(const ColumnDescriptor* descr,
-                           std::unique_ptr<PageReader> pager, MemoryPool* pool)
-    : descr_(descr),
-      pager_(std::move(pager)),
-      num_buffered_values_(0),
-      num_decoded_values_(0),
-      pool_(pool) {}
+template <typename DType>
+class TypedColumnReaderImpl : public TypedColumnReader<DType> {
+ public:
+  using T = typename DType::c_type;
+
+  TypedColumnReaderImpl(const ColumnDescriptor* descr, std::unique_ptr<PageReader> pager,
+                        ::arrow::MemoryPool* pool)
+      : descr_(descr),
+        pager_(std::move(pager)),
+        num_buffered_values_(0),
+        num_decoded_values_(0),
+        pool_(pool),
+        current_decoder_(NULLPTR) {}
+
+  int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+                    T* values, int64_t* values_read) override;
+
+  int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+                          T* values, uint8_t* valid_bits, int64_t valid_bits_offset,
+                          int64_t* levels_read, int64_t* values_read,
+                          int64_t* null_count) override;
+
+  int64_t Skip(int64_t num_rows_to_skip) override;
+
+  bool HasNext() override {
+    // Either there is no data page available yet, or the data page has been
+    // exhausted
+    if (num_buffered_values_ == 0 || num_decoded_values_ == num_buffered_values_) {
+      if (!ReadNewPage() || num_buffered_values_ == 0) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  Type::type type() const override { return descr_->physical_type(); }
+
+  const ColumnDescriptor* descr() const override { return descr_; }
+
+ protected:
+  using DecoderType = TypedDecoder<DType>;
+
+  // Advance to the next data page
+  bool ReadNewPage();
+
+  // Read multiple definition levels into preallocated memory
+  //
+  // Returns the number of decoded definition levels
+  int64_t ReadDefinitionLevels(int64_t batch_size, int16_t* levels) {
+    if (descr_->max_definition_level() == 0) {
+      return 0;
+    }
+    return definition_level_decoder_.Decode(static_cast<int>(batch_size), levels);
+  }
+
+  // Read multiple repetition levels into preallocated memory
+  // Returns the number of decoded repetition levels
+  int64_t ReadRepetitionLevels(int64_t batch_size, int16_t* levels) {
+    if (descr_->max_repetition_level() == 0) {
+      return 0;
+    }
+    return repetition_level_decoder_.Decode(static_cast<int>(batch_size), levels);
+  }
+
+  int64_t available_values_current_page() const {
+    return num_buffered_values_ - num_decoded_values_;
+  }
+
+  void ConsumeBufferedValues(int64_t num_values) { num_decoded_values_ += num_values; }
+
+  const ColumnDescriptor* descr_;
+
+  std::unique_ptr<PageReader> pager_;
+  std::shared_ptr<Page> current_page_;
+
+  // Not set if full schema for this field has no optional or repeated elements
+  LevelDecoder definition_level_decoder_;
+
+  // Not set for flat schemas.
+  LevelDecoder repetition_level_decoder_;
+
+  // The total number of values stored in the data page. This is the maximum of
+  // the number of encoded definition levels or encoded values. For
+  // non-repeated, required columns, this is equal to the number of encoded
+  // values. For repeated or optional values, there may be fewer data values
+  // than levels, and this tells you how many encoded levels there are in that
+  // case.
+  int64_t num_buffered_values_;
+
+  // The number of values from the current data page that have been decoded
+  // into memory
+  int64_t num_decoded_values_;
+
+  ::arrow::MemoryPool* pool_;
+
+  // Read up to batch_size values from the current data page into the
+  // pre-allocated memory T*
+  //
+  // @returns: the number of values read into the out buffer
+  int64_t ReadValues(int64_t batch_size, T* out);
+
+  // Read up to batch_size values from the current data page into the
+  // pre-allocated memory T*, leaving spaces for null entries according
+  // to the def_levels.
+  //
+  // @returns: the number of values read into the out buffer
+  int64_t ReadValuesSpaced(int64_t batch_size, T* out, int64_t null_count,
+                           uint8_t* valid_bits, int64_t valid_bits_offset);
+
+  // Map of encoding type to the respective decoder object. For example, a
+  // column chunk's data pages may include both dictionary-encoded and
+  // plain-encoded data.
+  std::unordered_map<int, std::unique_ptr<DecoderType>> decoders_;
+
+  void ConfigureDictionary(const DictionaryPage* page);
+  DecoderType* current_decoder_;
+};
+
+template <typename DType>
+int64_t TypedColumnReaderImpl<DType>::ReadValues(int64_t batch_size, T* out) {
+  int64_t num_decoded = current_decoder_->Decode(out, static_cast<int>(batch_size));
+  return num_decoded;
+}
+
+template <typename DType>
+int64_t TypedColumnReaderImpl<DType>::ReadValuesSpaced(int64_t batch_size, T* out,
+                                                       int64_t null_count,
+                                                       uint8_t* valid_bits,
+                                                       int64_t valid_bits_offset) {
+  return current_decoder_->DecodeSpaced(out, static_cast<int>(batch_size),
+                                        static_cast<int>(null_count), valid_bits,
+                                        valid_bits_offset);
+}
+
+template <typename DType>
+int64_t TypedColumnReaderImpl<DType>::ReadBatch(int64_t batch_size, int16_t* def_levels,
+                                                int16_t* rep_levels, T* values,
+                                                int64_t* values_read) {
+  // HasNext invokes ReadNewPage
+  if (!HasNext()) {
+    *values_read = 0;
+    return 0;
+  }
+
+  // TODO(wesm): keep reading data pages until batch_size is reached, or the
+  // row group is finished
+  batch_size = std::min(batch_size, num_buffered_values_ - num_decoded_values_);
+
+  int64_t num_def_levels = 0;
+  int64_t num_rep_levels = 0;
+
+  int64_t values_to_read = 0;
+
+  // If the field is required and non-repeated, there are no definition levels
+  if (descr_->max_definition_level() > 0 && def_levels) {
+    num_def_levels = ReadDefinitionLevels(batch_size, def_levels);
+    // TODO(wesm): this tallying of values-to-decode can be performed with better
+    // cache-efficiency if fused with the level decoding.
+    for (int64_t i = 0; i < num_def_levels; ++i) {
+      if (def_levels[i] == descr_->max_definition_level()) {
+        ++values_to_read;
+      }
+    }
+  } else {
+    // Required field, read all values
+    values_to_read = batch_size;
+  }
+
+  // Not present for non-repeated fields
+  if (descr_->max_repetition_level() > 0 && rep_levels) {
+    num_rep_levels = ReadRepetitionLevels(batch_size, rep_levels);
+    if (def_levels && num_def_levels != num_rep_levels) {
+      throw ParquetException("Number of decoded rep / def levels did not match");
+    }
+  }
+
+  *values_read = ReadValues(values_to_read, values);
+  int64_t total_values = std::max(num_def_levels, *values_read);
+  ConsumeBufferedValues(total_values);
+
+  return total_values;
+}
+
+template <typename DType>
+int64_t TypedColumnReaderImpl<DType>::ReadBatchSpaced(
+    int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, T* values,
+    uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read,
+    int64_t* values_read, int64_t* null_count_out) {
+  // HasNext invokes ReadNewPage
+  if (!HasNext()) {
+    *levels_read = 0;
+    *values_read = 0;
+    *null_count_out = 0;
+    return 0;
+  }
+
+  int64_t total_values;
+  // TODO(wesm): keep reading data pages until batch_size is reached, or the
+  // row group is finished
+  batch_size = std::min(batch_size, num_buffered_values_ - num_decoded_values_);
+
+  // If the field is required and non-repeated, there are no definition levels
+  if (descr_->max_definition_level() > 0) {
+    int64_t num_def_levels = ReadDefinitionLevels(batch_size, def_levels);
+
+    // Not present for non-repeated fields
+    if (descr_->max_repetition_level() > 0) {
+      int64_t num_rep_levels = ReadRepetitionLevels(batch_size, rep_levels);
+      if (num_def_levels != num_rep_levels) {
+        throw ParquetException("Number of decoded rep / def levels did not match");
+      }
+    }
+
+    const bool has_spaced_values = internal::HasSpacedValues(descr_);
+
+    int64_t null_count = 0;
+    if (!has_spaced_values) {
+      int values_to_read = 0;
+      for (int64_t i = 0; i < num_def_levels; ++i) {
+        if (def_levels[i] == descr_->max_definition_level()) {
+          ++values_to_read;
+        }
+      }
+      total_values = ReadValues(values_to_read, values);
+      for (int64_t i = 0; i < total_values; i++) {
+        ::arrow::BitUtil::SetBit(valid_bits, valid_bits_offset + i);
+      }
+      *values_read = total_values;
+    } else {
+      int16_t max_definition_level = descr_->max_definition_level();
+      int16_t max_repetition_level = descr_->max_repetition_level();
+      internal::DefinitionLevelsToBitmap(def_levels, num_def_levels, max_definition_level,
+                                         max_repetition_level, values_read, &null_count,
+                                         valid_bits, valid_bits_offset);
+      total_values = ReadValuesSpaced(*values_read, values, static_cast<int>(null_count),
+                                      valid_bits, valid_bits_offset);
+    }
+    *levels_read = num_def_levels;
+    *null_count_out = null_count;
+
+  } else {
+    // Required field, read all values
+    total_values = ReadValues(batch_size, values);
+    for (int64_t i = 0; i < total_values; i++) {
+      ::arrow::BitUtil::SetBit(valid_bits, valid_bits_offset + i);
+    }
+    *null_count_out = 0;
+    *levels_read = total_values;
+  }
 
-ColumnReader::~ColumnReader() {}
+  ConsumeBufferedValues(*levels_read);
+  return total_values;
+}
 
 template <typename DType>
-void TypedColumnReader<DType>::ConfigureDictionary(const DictionaryPage* page) {
+int64_t TypedColumnReaderImpl<DType>::Skip(int64_t num_rows_to_skip) {
+  int64_t rows_to_skip = num_rows_to_skip;
+  while (HasNext() && rows_to_skip > 0) {
+    // If the number of rows to skip is more than the number of undecoded values, skip the
+    // Page.
+    if (rows_to_skip > (num_buffered_values_ - num_decoded_values_)) {
+      rows_to_skip -= num_buffered_values_ - num_decoded_values_;
+      num_decoded_values_ = num_buffered_values_;
+    } else {
+      // We need to read this Page
+      // Jump to the right offset in the Page
+      int64_t batch_size = 1024;  // ReadBatch with a smaller memory footprint
+      int64_t values_read = 0;
+
+      std::shared_ptr<ResizableBuffer> vals = AllocateBuffer(
+          this->pool_, batch_size * type_traits<DType::type_num>::value_byte_size);
+      std::shared_ptr<ResizableBuffer> def_levels =
+          AllocateBuffer(this->pool_, batch_size * sizeof(int16_t));
+
+      std::shared_ptr<ResizableBuffer> rep_levels =
+          AllocateBuffer(this->pool_, batch_size * sizeof(int16_t));
+
+      do {
+        batch_size = std::min(batch_size, rows_to_skip);
+        values_read = ReadBatch(static_cast<int>(batch_size),
+                                reinterpret_cast<int16_t*>(def_levels->mutable_data()),
+                                reinterpret_cast<int16_t*>(rep_levels->mutable_data()),
+                                reinterpret_cast<T*>(vals->mutable_data()), &values_read);
+        rows_to_skip -= values_read;
+      } while (values_read > 0 && rows_to_skip > 0);
+    }
+  }
+  return num_rows_to_skip - rows_to_skip;
+}
+
+template <typename DType>
+void TypedColumnReaderImpl<DType>::ConfigureDictionary(const DictionaryPage* page) {
   int encoding = static_cast<int>(page->encoding());
   if (page->encoding() == Encoding::PLAIN_DICTIONARY ||
       page->encoding() == Encoding::PLAIN) {
@@ -290,18 +573,17 @@ void TypedColumnReader<DType>::ConfigureDictionary(const DictionaryPage* page) {
 
   if (page->encoding() == Encoding::PLAIN_DICTIONARY ||
       page->encoding() == Encoding::PLAIN) {
-    PlainDecoder<DType> dictionary(descr_);
-    dictionary.SetData(page->num_values(), page->data(), page->size());
+    auto dictionary = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
+    dictionary->SetData(page->num_values(), page->data(), page->size());
 
-    // The dictionary is fully decoded during DictionaryDecoder::Init, so the
+    // The dictionary is fully decoded during SetData, so the
     // DictionaryPage buffer is no longer required after this step
     //
     // TODO(wesm): investigate whether this all-or-nothing decoding of the
     // dictionary makes sense and whether performance can be improved
-
-    auto decoder = std::make_shared<DictionaryDecoder<DType>>(descr_, pool_);
-    decoder->SetDict(&dictionary);
-    decoders_[encoding] = decoder;
+    auto decoder = MakeDictDecoder<DType>(descr_, pool_);
+    decoder->SetDict(dictionary.get());
+    decoders_[encoding] = std::move(decoder);
   } else {
     ParquetException::NYI("only plain dictionary encoding has been implemented");
   }
@@ -316,7 +598,7 @@ static bool IsDictionaryIndexEncoding(const Encoding::type& e) {
 }
 
 template <typename DType>
-bool TypedColumnReader<DType>::ReadNewPage() {
+bool TypedColumnReaderImpl<DType>::ReadNewPage() {
   // Loop until we find the next data page.
   const uint8_t* buffer;
 
@@ -385,9 +667,9 @@ bool TypedColumnReader<DType>::ReadNewPage() {
       } else {
         switch (encoding) {
           case Encoding::PLAIN: {
-            std::shared_ptr<DecoderType> decoder(new PlainDecoder<DType>(descr_));
-            decoders_[static_cast<int>(encoding)] = decoder;
+            auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
             current_decoder_ = decoder.get();
+            decoders_[static_cast<int>(encoding)] = std::move(decoder);
             break;
           }
           case Encoding::RLE_DICTIONARY:
@@ -414,23 +696,6 @@ bool TypedColumnReader<DType>::ReadNewPage() {
   return true;
 }
 
-// ----------------------------------------------------------------------
-// Batch read APIs
-
-int64_t ColumnReader::ReadDefinitionLevels(int64_t batch_size, int16_t* levels) {
-  if (descr_->max_definition_level() == 0) {
-    return 0;
-  }
-  return definition_level_decoder_.Decode(static_cast<int>(batch_size), levels);
-}
-
-int64_t ColumnReader::ReadRepetitionLevels(int64_t batch_size, int16_t* levels) {
-  if (descr_->max_repetition_level() == 0) {
-    return 0;
-  }
-  return repetition_level_decoder_.Decode(static_cast<int>(batch_size), levels);
-}
-
 // ----------------------------------------------------------------------
 // Dynamic column reader constructor
 
@@ -439,21 +704,29 @@ std::shared_ptr<ColumnReader> ColumnReader::Make(const ColumnDescriptor* descr,
                                                  MemoryPool* pool) {
   switch (descr->physical_type()) {
     case Type::BOOLEAN:
-      return std::make_shared<BoolReader>(descr, std::move(pager), pool);
+      return std::make_shared<TypedColumnReaderImpl<BooleanType>>(descr, std::move(pager),
+                                                                  pool);
     case Type::INT32:
-      return std::make_shared<Int32Reader>(descr, std::move(pager), pool);
+      return std::make_shared<TypedColumnReaderImpl<Int32Type>>(descr, std::move(pager),
+                                                                pool);
     case Type::INT64:
-      return std::make_shared<Int64Reader>(descr, std::move(pager), pool);
+      return std::make_shared<TypedColumnReaderImpl<Int64Type>>(descr, std::move(pager),
+                                                                pool);
     case Type::INT96:
-      return std::make_shared<Int96Reader>(descr, std::move(pager), pool);
+      return std::make_shared<TypedColumnReaderImpl<Int96Type>>(descr, std::move(pager),
+                                                                pool);
     case Type::FLOAT:
-      return std::make_shared<FloatReader>(descr, std::move(pager), pool);
+      return std::make_shared<TypedColumnReaderImpl<FloatType>>(descr, std::move(pager),
+                                                                pool);
     case Type::DOUBLE:
-      return std::make_shared<DoubleReader>(descr, std::move(pager), pool);
+      return std::make_shared<TypedColumnReaderImpl<DoubleType>>(descr, std::move(pager),
+                                                                 pool);
     case Type::BYTE_ARRAY:
-      return std::make_shared<ByteArrayReader>(descr, std::move(pager), pool);
+      return std::make_shared<TypedColumnReaderImpl<ByteArrayType>>(
+          descr, std::move(pager), pool);
     case Type::FIXED_LEN_BYTE_ARRAY:
-      return std::make_shared<FixedLenByteArrayReader>(descr, std::move(pager), pool);
+      return std::make_shared<TypedColumnReaderImpl<FLBAType>>(descr, std::move(pager),
+                                                               pool);
     default:
       ParquetException::NYI("type reader not implemented");
   }
@@ -461,16 +734,4 @@ std::shared_ptr<ColumnReader> ColumnReader::Make(const ColumnDescriptor* descr,
   return std::shared_ptr<ColumnReader>(nullptr);
 }
 
-// ----------------------------------------------------------------------
-// Instantiate templated classes
-
-template class PARQUET_TEMPLATE_EXPORT TypedColumnReader<BooleanType>;
-template class PARQUET_TEMPLATE_EXPORT TypedColumnReader<Int32Type>;
-template class PARQUET_TEMPLATE_EXPORT TypedColumnReader<Int64Type>;
-template class PARQUET_TEMPLATE_EXPORT TypedColumnReader<Int96Type>;
-template class PARQUET_TEMPLATE_EXPORT TypedColumnReader<FloatType>;
-template class PARQUET_TEMPLATE_EXPORT TypedColumnReader<DoubleType>;
-template class PARQUET_TEMPLATE_EXPORT TypedColumnReader<ByteArrayType>;
-template class PARQUET_TEMPLATE_EXPORT TypedColumnReader<FLBAType>;
-
 }  // namespace parquet
diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h
index 960f2107dfa09..577107deaaa3a 100644
--- a/cpp/src/parquet/column_reader.h
+++ b/cpp/src/parquet/column_reader.h
@@ -15,30 +15,23 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef PARQUET_COLUMN_READER_H
-#define PARQUET_COLUMN_READER_H
+#pragma once
 
 #include <algorithm>
-#include <climits>
 #include <cstdint>
-#include <cstring>
-#include <iostream>
 #include <memory>
 #include <unordered_map>
 #include <utility>
-#include <vector>
 
-#include <arrow/buffer.h>
-#include <arrow/builder.h>
-#include <arrow/memory_pool.h>
-#include <arrow/util/bit-util.h>
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/macros.h"
 
-#include "parquet/column_page.h"
 #include "parquet/encoding.h"
 #include "parquet/exception.h"
 #include "parquet/schema.h"
 #include "parquet/types.h"
-#include "parquet/util/macros.h"
 #include "parquet/util/memory.h"
 #include "parquet/util/visibility.h"
 
@@ -56,6 +49,9 @@ class RleDecoder;
 
 namespace parquet {
 
+class DictionaryPage;
+class Page;
+
 // 16 MB is the default maximum page header size
 static constexpr uint32_t kDefaultMaxPageHeaderSize = 16 * 1024 * 1024;
 
@@ -105,125 +101,26 @@ class PARQUET_EXPORT PageReader {
 
 class PARQUET_EXPORT ColumnReader {
  public:
-  ColumnReader(const ColumnDescriptor*, std::unique_ptr<PageReader>,
-               ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
-  virtual ~ColumnReader();
+  virtual ~ColumnReader() = default;
 
   static std::shared_ptr<ColumnReader> Make(
       const ColumnDescriptor* descr, std::unique_ptr<PageReader> pager,
       ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
 
   // Returns true if there are still values in this column.
-  bool HasNext() {
-    // Either there is no data page available yet, or the data page has been
-    // exhausted
-    if (num_buffered_values_ == 0 || num_decoded_values_ == num_buffered_values_) {
-      if (!ReadNewPage() || num_buffered_values_ == 0) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  Type::type type() const { return descr_->physical_type(); }
-
-  const ColumnDescriptor* descr() const { return descr_; }
-
- protected:
-  virtual bool ReadNewPage() = 0;
-
-  // Read multiple definition levels into preallocated memory
-  //
-  // Returns the number of decoded definition levels
-  int64_t ReadDefinitionLevels(int64_t batch_size, int16_t* levels);
-
-  // Read multiple repetition levels into preallocated memory
-  // Returns the number of decoded repetition levels
-  int64_t ReadRepetitionLevels(int64_t batch_size, int16_t* levels);
-
-  int64_t available_values_current_page() const {
-    return num_buffered_values_ - num_decoded_values_;
-  }
-
-  void ConsumeBufferedValues(int64_t num_values) { num_decoded_values_ += num_values; }
-
-  const ColumnDescriptor* descr_;
-
-  std::unique_ptr<PageReader> pager_;
-  std::shared_ptr<Page> current_page_;
-
-  // Not set if full schema for this field has no optional or repeated elements
-  LevelDecoder definition_level_decoder_;
-
-  // Not set for flat schemas.
-  LevelDecoder repetition_level_decoder_;
-
-  // The total number of values stored in the data page. This is the maximum of
-  // the number of encoded definition levels or encoded values. For
-  // non-repeated, required columns, this is equal to the number of encoded
-  // values. For repeated or optional values, there may be fewer data values
-  // than levels, and this tells you how many encoded levels there are in that
-  // case.
-  int64_t num_buffered_values_;
+  virtual bool HasNext() = 0;
 
-  // The number of values from the current data page that have been decoded
-  // into memory
-  int64_t num_decoded_values_;
+  virtual Type::type type() const = 0;
 
-  ::arrow::MemoryPool* pool_;
+  virtual const ColumnDescriptor* descr() const = 0;
 };
 
-namespace internal {
-
-static inline void DefinitionLevelsToBitmap(
-    const int16_t* def_levels, int64_t num_def_levels, const int16_t max_definition_level,
-    const int16_t max_repetition_level, int64_t* values_read, int64_t* null_count,
-    uint8_t* valid_bits, const int64_t valid_bits_offset) {
-  ::arrow::internal::BitmapWriter valid_bits_writer(valid_bits, valid_bits_offset,
-                                                    num_def_levels);
-
-  // TODO(itaiin): As an interim solution we are splitting the code path here
-  // between repeated+flat column reads, and non-repeated+nested reads.
-  // Those paths need to be merged in the future
-  for (int i = 0; i < num_def_levels; ++i) {
-    if (def_levels[i] == max_definition_level) {
-      valid_bits_writer.Set();
-    } else if (max_repetition_level > 0) {
-      // repetition+flat case
-      if (def_levels[i] == (max_definition_level - 1)) {
-        valid_bits_writer.Clear();
-        *null_count += 1;
-      } else {
-        continue;
-      }
-    } else {
-      // non-repeated+nested case
-      if (def_levels[i] < max_definition_level) {
-        valid_bits_writer.Clear();
-        *null_count += 1;
-      } else {
-        throw ParquetException("definition level exceeds maximum");
-      }
-    }
-
-    valid_bits_writer.Next();
-  }
-  valid_bits_writer.Finish();
-  *values_read = valid_bits_writer.position();
-}
-
-}  // namespace internal
-
 // API to read values from a single column. This is a main client facing API.
 template <typename DType>
-class PARQUET_TEMPLATE_CLASS_EXPORT TypedColumnReader : public ColumnReader {
+class TypedColumnReader : public ColumnReader {
  public:
   typedef typename DType::c_type T;
 
-  TypedColumnReader(const ColumnDescriptor* schema, std::unique_ptr<PageReader> pager,
-                    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
-      : ColumnReader(schema, std::move(pager), pool), current_decoder_(NULLPTR) {}
-
   // Read a batch of repetition levels, definition levels, and values from the
   // column.
   //
@@ -241,8 +138,8 @@ class PARQUET_TEMPLATE_CLASS_EXPORT TypedColumnReader : public ColumnReader {
   // This API is the same for both V1 and V2 of the DataPage
   //
   // @returns: actual number of levels read (see values_read for number of values read)
-  int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
-                    T* values, int64_t* values_read);
+  virtual int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+                            T* values, int64_t* values_read) = 0;
 
   /// Read a batch of repetition levels, definition levels, and values from the
   /// column and leave spaces for null entries on the lowest level in the values
@@ -278,114 +175,59 @@ class PARQUET_TEMPLATE_CLASS_EXPORT TypedColumnReader : public ColumnReader {
   ///   (i.e. definition_level == max_definition_level - 1)
   /// @param[out] null_count The number of nulls on the lowest levels.
   ///   (i.e. (values_read - null_count) is total number of non-null entries)
-  int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
-                          T* values, uint8_t* valid_bits, int64_t valid_bits_offset,
-                          int64_t* levels_read, int64_t* values_read,
-                          int64_t* null_count);
+  virtual int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels,
+                                  int16_t* rep_levels, T* values, uint8_t* valid_bits,
+                                  int64_t valid_bits_offset, int64_t* levels_read,
+                                  int64_t* values_read, int64_t* null_count) = 0;
 
   // Skip reading levels
   // Returns the number of levels skipped
-  int64_t Skip(int64_t num_rows_to_skip);
-
- private:
-  typedef Decoder<DType> DecoderType;
-
-  // Advance to the next data page
-  bool ReadNewPage() override;
-
-  // Read up to batch_size values from the current data page into the
-  // pre-allocated memory T*
-  //
-  // @returns: the number of values read into the out buffer
-  int64_t ReadValues(int64_t batch_size, T* out);
-
-  // Read up to batch_size values from the current data page into the
-  // pre-allocated memory T*, leaving spaces for null entries according
-  // to the def_levels.
-  //
-  // @returns: the number of values read into the out buffer
-  int64_t ReadValuesSpaced(int64_t batch_size, T* out, int64_t null_count,
-                           uint8_t* valid_bits, int64_t valid_bits_offset);
-
-  // Map of encoding type to the respective decoder object. For example, a
-  // column chunk's data pages may include both dictionary-encoded and
-  // plain-encoded data.
-  std::unordered_map<int, std::shared_ptr<DecoderType>> decoders_;
-
-  void ConfigureDictionary(const DictionaryPage* page);
-
-  DecoderType* current_decoder_;
+  virtual int64_t Skip(int64_t num_rows_to_skip) = 0;
 };
 
-// ----------------------------------------------------------------------
-// Type column reader implementations
-
-template <typename DType>
-inline int64_t TypedColumnReader<DType>::ReadValues(int64_t batch_size, T* out) {
-  int64_t num_decoded = current_decoder_->Decode(out, static_cast<int>(batch_size));
-  return num_decoded;
-}
-
-template <typename DType>
-inline int64_t TypedColumnReader<DType>::ReadValuesSpaced(int64_t batch_size, T* out,
-                                                          int64_t null_count,
-                                                          uint8_t* valid_bits,
-                                                          int64_t valid_bits_offset) {
-  return current_decoder_->DecodeSpaced(out, static_cast<int>(batch_size),
-                                        static_cast<int>(null_count), valid_bits,
-                                        valid_bits_offset);
-}
-
-template <typename DType>
-inline int64_t TypedColumnReader<DType>::ReadBatch(int64_t batch_size,
-                                                   int16_t* def_levels,
-                                                   int16_t* rep_levels, T* values,
-                                                   int64_t* values_read) {
-  // HasNext invokes ReadNewPage
-  if (!HasNext()) {
-    *values_read = 0;
-    return 0;
-  }
-
-  // TODO(wesm): keep reading data pages until batch_size is reached, or the
-  // row group is finished
-  batch_size = std::min(batch_size, num_buffered_values_ - num_decoded_values_);
-
-  int64_t num_def_levels = 0;
-  int64_t num_rep_levels = 0;
+namespace internal {
 
-  int64_t values_to_read = 0;
+static inline void DefinitionLevelsToBitmap(
+    const int16_t* def_levels, int64_t num_def_levels, const int16_t max_definition_level,
+    const int16_t max_repetition_level, int64_t* values_read, int64_t* null_count,
+    uint8_t* valid_bits, int64_t valid_bits_offset) {
+  // We assume here that valid_bits is large enough to accommodate the
+  // additional definition levels and the ones that have already been written
+  ::arrow::internal::BitmapWriter valid_bits_writer(valid_bits, valid_bits_offset,
+                                                    valid_bits_offset + num_def_levels);
 
-  // If the field is required and non-repeated, there are no definition levels
-  if (descr_->max_definition_level() > 0 && def_levels) {
-    num_def_levels = ReadDefinitionLevels(batch_size, def_levels);
-    // TODO(wesm): this tallying of values-to-decode can be performed with better
-    // cache-efficiency if fused with the level decoding.
-    for (int64_t i = 0; i < num_def_levels; ++i) {
-      if (def_levels[i] == descr_->max_definition_level()) {
-        ++values_to_read;
+  // TODO(itaiin): As an interim solution we are splitting the code path here
+  // between repeated+flat column reads, and non-repeated+nested reads.
+  // Those paths need to be merged in the future
+  for (int i = 0; i < num_def_levels; ++i) {
+    if (def_levels[i] == max_definition_level) {
+      valid_bits_writer.Set();
+    } else if (max_repetition_level > 0) {
+      // repetition+flat case
+      if (def_levels[i] == (max_definition_level - 1)) {
+        valid_bits_writer.Clear();
+        *null_count += 1;
+      } else {
+        continue;
+      }
+    } else {
+      // non-repeated+nested case
+      if (def_levels[i] < max_definition_level) {
+        valid_bits_writer.Clear();
+        *null_count += 1;
+      } else {
+        throw ParquetException("definition level exceeds maximum");
       }
     }
-  } else {
-    // Required field, read all values
-    values_to_read = batch_size;
-  }
 
-  // Not present for non-repeated fields
-  if (descr_->max_repetition_level() > 0 && rep_levels) {
-    num_rep_levels = ReadRepetitionLevels(batch_size, rep_levels);
-    if (def_levels && num_def_levels != num_rep_levels) {
-      throw ParquetException("Number of decoded rep / def levels did not match");
-    }
+    valid_bits_writer.Next();
   }
-
-  *values_read = ReadValues(values_to_read, values);
-  int64_t total_values = std::max(num_def_levels, *values_read);
-  ConsumeBufferedValues(total_values);
-
-  return total_values;
+  valid_bits_writer.Finish();
+  *values_read = valid_bits_writer.position();
 }
 
+}  // namespace internal
+
 namespace internal {
 
 // TODO(itaiin): another code path split to merge when the general case is done
@@ -409,134 +251,13 @@ static inline bool HasSpacedValues(const ColumnDescriptor* descr) {
 
 }  // namespace internal
 
-template <typename DType>
-inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(
-    int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, T* values,
-    uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read,
-    int64_t* values_read, int64_t* null_count_out) {
-  // HasNext invokes ReadNewPage
-  if (!HasNext()) {
-    *levels_read = 0;
-    *values_read = 0;
-    *null_count_out = 0;
-    return 0;
-  }
-
-  int64_t total_values;
-  // TODO(wesm): keep reading data pages until batch_size is reached, or the
-  // row group is finished
-  batch_size = std::min(batch_size, num_buffered_values_ - num_decoded_values_);
-
-  // If the field is required and non-repeated, there are no definition levels
-  if (descr_->max_definition_level() > 0) {
-    int64_t num_def_levels = ReadDefinitionLevels(batch_size, def_levels);
-
-    // Not present for non-repeated fields
-    if (descr_->max_repetition_level() > 0) {
-      int64_t num_rep_levels = ReadRepetitionLevels(batch_size, rep_levels);
-      if (num_def_levels != num_rep_levels) {
-        throw ParquetException("Number of decoded rep / def levels did not match");
-      }
-    }
-
-    const bool has_spaced_values = internal::HasSpacedValues(descr_);
-
-    int64_t null_count = 0;
-    if (!has_spaced_values) {
-      int values_to_read = 0;
-      for (int64_t i = 0; i < num_def_levels; ++i) {
-        if (def_levels[i] == descr_->max_definition_level()) {
-          ++values_to_read;
-        }
-      }
-      total_values = ReadValues(values_to_read, values);
-      for (int64_t i = 0; i < total_values; i++) {
-        ::arrow::BitUtil::SetBit(valid_bits, valid_bits_offset + i);
-      }
-      *values_read = total_values;
-    } else {
-      int16_t max_definition_level = descr_->max_definition_level();
-      int16_t max_repetition_level = descr_->max_repetition_level();
-      internal::DefinitionLevelsToBitmap(def_levels, num_def_levels, max_definition_level,
-                                         max_repetition_level, values_read, &null_count,
-                                         valid_bits, valid_bits_offset);
-      total_values = ReadValuesSpaced(*values_read, values, static_cast<int>(null_count),
-                                      valid_bits, valid_bits_offset);
-    }
-    *levels_read = num_def_levels;
-    *null_count_out = null_count;
-
-  } else {
-    // Required field, read all values
-    total_values = ReadValues(batch_size, values);
-    for (int64_t i = 0; i < total_values; i++) {
-      ::arrow::BitUtil::SetBit(valid_bits, valid_bits_offset + i);
-    }
-    *null_count_out = 0;
-    *levels_read = total_values;
-  }
-
-  ConsumeBufferedValues(*levels_read);
-  return total_values;
-}
-
-template <typename DType>
-int64_t TypedColumnReader<DType>::Skip(int64_t num_rows_to_skip) {
-  int64_t rows_to_skip = num_rows_to_skip;
-  while (HasNext() && rows_to_skip > 0) {
-    // If the number of rows to skip is more than the number of undecoded values, skip the
-    // Page.
-    if (rows_to_skip > (num_buffered_values_ - num_decoded_values_)) {
-      rows_to_skip -= num_buffered_values_ - num_decoded_values_;
-      num_decoded_values_ = num_buffered_values_;
-    } else {
-      // We need to read this Page
-      // Jump to the right offset in the Page
-      int64_t batch_size = 1024;  // ReadBatch with a smaller memory footprint
-      int64_t values_read = 0;
-
-      std::shared_ptr<ResizableBuffer> vals = AllocateBuffer(
-          this->pool_, batch_size * type_traits<DType::type_num>::value_byte_size);
-      std::shared_ptr<ResizableBuffer> def_levels =
-          AllocateBuffer(this->pool_, batch_size * sizeof(int16_t));
-
-      std::shared_ptr<ResizableBuffer> rep_levels =
-          AllocateBuffer(this->pool_, batch_size * sizeof(int16_t));
-
-      do {
-        batch_size = std::min(batch_size, rows_to_skip);
-        values_read = ReadBatch(static_cast<int>(batch_size),
-                                reinterpret_cast<int16_t*>(def_levels->mutable_data()),
-                                reinterpret_cast<int16_t*>(rep_levels->mutable_data()),
-                                reinterpret_cast<T*>(vals->mutable_data()), &values_read);
-        rows_to_skip -= values_read;
-      } while (values_read > 0 && rows_to_skip > 0);
-    }
-  }
-  return num_rows_to_skip - rows_to_skip;
-}
-
-// ----------------------------------------------------------------------
-// Template instantiations
-
-typedef TypedColumnReader<BooleanType> BoolReader;
-typedef TypedColumnReader<Int32Type> Int32Reader;
-typedef TypedColumnReader<Int64Type> Int64Reader;
-typedef TypedColumnReader<Int96Type> Int96Reader;
-typedef TypedColumnReader<FloatType> FloatReader;
-typedef TypedColumnReader<DoubleType> DoubleReader;
-typedef TypedColumnReader<ByteArrayType> ByteArrayReader;
-typedef TypedColumnReader<FLBAType> FixedLenByteArrayReader;
-
-PARQUET_EXTERN_TEMPLATE TypedColumnReader<BooleanType>;
-PARQUET_EXTERN_TEMPLATE TypedColumnReader<Int32Type>;
-PARQUET_EXTERN_TEMPLATE TypedColumnReader<Int64Type>;
-PARQUET_EXTERN_TEMPLATE TypedColumnReader<Int96Type>;
-PARQUET_EXTERN_TEMPLATE TypedColumnReader<FloatType>;
-PARQUET_EXTERN_TEMPLATE TypedColumnReader<DoubleType>;
-PARQUET_EXTERN_TEMPLATE TypedColumnReader<ByteArrayType>;
-PARQUET_EXTERN_TEMPLATE TypedColumnReader<FLBAType>;
+using BoolReader = TypedColumnReader<BooleanType>;
+using Int32Reader = TypedColumnReader<Int32Type>;
+using Int64Reader = TypedColumnReader<Int64Type>;
+using Int96Reader = TypedColumnReader<Int96Type>;
+using FloatReader = TypedColumnReader<FloatType>;
+using DoubleReader = TypedColumnReader<DoubleType>;
+using ByteArrayReader = TypedColumnReader<ByteArrayType>;
+using FixedLenByteArrayReader = TypedColumnReader<FLBAType>;
 
 }  // namespace parquet
-
-#endif  // PARQUET_COLUMN_READER_H
diff --git a/cpp/src/parquet/column_scanner.cc b/cpp/src/parquet/column_scanner.cc
index 51c87732959fb..8011318a78c9a 100644
--- a/cpp/src/parquet/column_scanner.cc
+++ b/cpp/src/parquet/column_scanner.cc
@@ -21,7 +21,6 @@
 #include <memory>
 
 #include "parquet/column_reader.h"
-#include "parquet/util/memory.h"
 
 using arrow::MemoryPool;
 
diff --git a/cpp/src/parquet/column_scanner.h b/cpp/src/parquet/column_scanner.h
index f23c86173cb32..cb0da2c9e18f1 100644
--- a/cpp/src/parquet/column_scanner.h
+++ b/cpp/src/parquet/column_scanner.h
@@ -25,11 +25,13 @@
 #include <string>
 #include <vector>
 
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
+
 #include "parquet/column_reader.h"
 #include "parquet/exception.h"
 #include "parquet/schema.h"
 #include "parquet/types.h"
-#include "parquet/util/macros.h"
 #include "parquet/util/memory.h"
 #include "parquet/util/visibility.h"
 
@@ -87,7 +89,7 @@ class PARQUET_EXPORT Scanner {
 };
 
 template <typename DType>
-class PARQUET_EXPORT TypedScanner : public Scanner {
+class PARQUET_TEMPLATE_CLASS_EXPORT TypedScanner : public Scanner {
  public:
   typedef typename DType::c_type T;
 
diff --git a/cpp/src/parquet/column_writer-test.cc b/cpp/src/parquet/column_writer-test.cc
index b81f3ed8152b6..5db7a495e5a26 100644
--- a/cpp/src/parquet/column_writer-test.cc
+++ b/cpp/src/parquet/column_writer-test.cc
@@ -17,8 +17,12 @@
 
 #include <gtest/gtest.h>
 
+#include <arrow/test-util.h>
+
 #include "parquet/column_reader.h"
 #include "parquet/column_writer.h"
+#include "parquet/metadata.h"
+#include "parquet/properties.h"
 #include "parquet/test-specialization.h"
 #include "parquet/test-util.h"
 #include "parquet/thrift.h"
@@ -28,6 +32,7 @@
 
 namespace parquet {
 
+using schema::GroupNode;
 using schema::NodePtr;
 using schema::PrimitiveNode;
 
@@ -40,11 +45,15 @@ const int SMALL_SIZE = 100;
 const int LARGE_SIZE = 10000;
 // Very large size to test dictionary fallback.
 const int VERY_LARGE_SIZE = 40000;
+// Reduced dictionary page size to use for testing dictionary fallback with valgrind
+const int64_t DICTIONARY_PAGE_SIZE = 1024;
 #else
 // Larger size to test some corner cases, only used in some specific cases.
 const int LARGE_SIZE = 100000;
 // Very large size to test dictionary fallback.
 const int VERY_LARGE_SIZE = 400000;
+// Dictionary page size to use for testing dictionary fallback
+const int64_t DICTIONARY_PAGE_SIZE = 1024 * 1024;
 #endif
 
 template <typename TestType>
@@ -71,17 +80,21 @@ class TestPrimitiveWriter : public PrimitiveTypedTest<TestType> {
     std::unique_ptr<InMemoryInputStream> source(new InMemoryInputStream(buffer));
     std::unique_ptr<PageReader> page_reader =
         PageReader::Open(std::move(source), num_rows, compression);
-    reader_.reset(new TypedColumnReader<TestType>(this->descr_, std::move(page_reader)));
+    reader_ = std::static_pointer_cast<TypedColumnReader<TestType>>(
+        ColumnReader::Make(this->descr_, std::move(page_reader)));
   }
 
   std::shared_ptr<TypedColumnWriter<TestType>> BuildWriter(
       int64_t output_size = SMALL_SIZE,
-      const ColumnProperties& column_properties = ColumnProperties()) {
+      const ColumnProperties& column_properties = ColumnProperties(),
+      const ParquetVersion::type version = ParquetVersion::PARQUET_1_0) {
     sink_.reset(new InMemoryOutputStream());
     WriterProperties::Builder wp_builder;
+    wp_builder.version(version);
     if (column_properties.encoding() == Encoding::PLAIN_DICTIONARY ||
         column_properties.encoding() == Encoding::RLE_DICTIONARY) {
       wp_builder.enable_dictionary();
+      wp_builder.dictionary_pagesize_limit(DICTIONARY_PAGE_SIZE);
     } else {
       wp_builder.disable_dictionary();
       wp_builder.encoding(column_properties.encoding());
@@ -125,6 +138,50 @@ class TestPrimitiveWriter : public PrimitiveTypedTest<TestType> {
     ASSERT_NO_FATAL_FAILURE(this->ReadAndCompare(compression, num_rows));
   }
 
+  void TestDictionaryFallbackEncoding(ParquetVersion::type version) {
+    this->GenerateData(VERY_LARGE_SIZE);
+    ColumnProperties column_properties;
+    column_properties.set_dictionary_enabled(true);
+
+    if (version == ParquetVersion::PARQUET_1_0) {
+      column_properties.set_encoding(Encoding::PLAIN_DICTIONARY);
+    } else {
+      column_properties.set_encoding(Encoding::RLE_DICTIONARY);
+    }
+
+    auto writer = this->BuildWriter(VERY_LARGE_SIZE, column_properties, version);
+
+    writer->WriteBatch(this->values_.size(), nullptr, nullptr, this->values_ptr_);
+    writer->Close();
+
+    // Read all rows so we are sure that also the non-dictionary pages are read correctly
+    this->SetupValuesOut(VERY_LARGE_SIZE);
+    this->ReadColumnFully();
+    ASSERT_EQ(VERY_LARGE_SIZE, this->values_read_);
+    this->values_.resize(VERY_LARGE_SIZE);
+    ASSERT_EQ(this->values_, this->values_out_);
+    std::vector<Encoding::type> encodings = this->metadata_encodings();
+
+    if (this->type_num() == Type::BOOLEAN) {
+      // Dictionary encoding is not allowed for boolean type
+      // There are 2 encodings (PLAIN, RLE) in a non dictionary encoding case
+      std::vector<Encoding::type> expected({Encoding::PLAIN, Encoding::RLE});
+      ASSERT_EQ(encodings, expected);
+    } else if (version == ParquetVersion::PARQUET_1_0) {
+      // There are 4 encodings (PLAIN_DICTIONARY, PLAIN, RLE, PLAIN) in a fallback case
+      // for version 1.0
+      std::vector<Encoding::type> expected(
+          {Encoding::PLAIN_DICTIONARY, Encoding::PLAIN, Encoding::RLE, Encoding::PLAIN});
+      ASSERT_EQ(encodings, expected);
+    } else {
+      // There are 4 encodings (RLE_DICTIONARY, PLAIN, RLE, PLAIN) in a fallback case for
+      // version 2.0
+      std::vector<Encoding::type> expected(
+          {Encoding::RLE_DICTIONARY, Encoding::PLAIN, Encoding::RLE, Encoding::PLAIN});
+      ASSERT_EQ(encodings, expected);
+    }
+  }
+
   void WriteRequiredWithSettings(Encoding::type encoding, Compression::type compression,
                                  bool enable_dictionary, bool enable_statistics,
                                  int64_t num_rows) {
@@ -204,7 +261,7 @@ class TestPrimitiveWriter : public PrimitiveTypedTest<TestType> {
   int64_t values_read_;
   // Keep the reader alive as for ByteArray the lifetime of the ByteArray
   // content is bound to the reader.
-  std::unique_ptr<TypedColumnReader<TestType>> reader_;
+  std::shared_ptr<TypedColumnReader<TestType>> reader_;
 
   std::vector<int16_t> definition_levels_out_;
   std::vector<int16_t> repetition_levels_out_;
@@ -346,11 +403,6 @@ TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithLz4Compression) {
                                  LARGE_SIZE);
 }
 
-TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithZstdCompression) {
-  this->TestRequiredWithSettings(Encoding::PLAIN, Compression::ZSTD, false, false,
-                                 LARGE_SIZE);
-}
-
 TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithStats) {
   this->TestRequiredWithSettings(Encoding::PLAIN, Compression::UNCOMPRESSED, false, true,
                                  LARGE_SIZE);
@@ -376,10 +428,19 @@ TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithStatsAndLz4Compression) {
                                  LARGE_SIZE);
 }
 
+// The ExternalProject for zstd does not build on CMake < 3.7, so we do not
+// require it here
+#ifdef ARROW_WITH_ZSTD
+TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithZstdCompression) {
+  this->TestRequiredWithSettings(Encoding::PLAIN, Compression::ZSTD, false, false,
+                                 LARGE_SIZE);
+}
+
 TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithStatsAndZstdCompression) {
   this->TestRequiredWithSettings(Encoding::PLAIN, Compression::ZSTD, false, true,
                                  LARGE_SIZE);
 }
+#endif
 
 TYPED_TEST(TestPrimitiveWriter, Optional) {
   // Optional and non-repeated, with definition levels
@@ -471,32 +532,13 @@ TYPED_TEST(TestPrimitiveWriter, RequiredLargeChunk) {
   ASSERT_EQ(this->values_, this->values_out_);
 }
 
-// Test case for dictionary fallback encoding
-TYPED_TEST(TestPrimitiveWriter, RequiredVeryLargeChunk) {
-  this->GenerateData(VERY_LARGE_SIZE);
-
-  auto writer = this->BuildWriter(VERY_LARGE_SIZE, Encoding::PLAIN_DICTIONARY);
-  writer->WriteBatch(this->values_.size(), nullptr, nullptr, this->values_ptr_);
-  writer->Close();
+// Test cases for dictionary fallback encoding
+TYPED_TEST(TestPrimitiveWriter, DictionaryFallbackVersion1_0) {
+  this->TestDictionaryFallbackEncoding(ParquetVersion::PARQUET_1_0);
+}
 
-  // Read all rows so we are sure that also the non-dictionary pages are read correctly
-  this->SetupValuesOut(VERY_LARGE_SIZE);
-  this->ReadColumnFully();
-  ASSERT_EQ(VERY_LARGE_SIZE, this->values_read_);
-  this->values_.resize(VERY_LARGE_SIZE);
-  ASSERT_EQ(this->values_, this->values_out_);
-  std::vector<Encoding::type> encodings = this->metadata_encodings();
-  // There are 3 encodings (RLE, PLAIN_DICTIONARY, PLAIN) in a fallback case
-  // Dictionary encoding is not allowed for boolean type
-  // There are 2 encodings (RLE, PLAIN) in a non dictionary encoding case
-  if (this->type_num() != Type::BOOLEAN) {
-    ASSERT_EQ(Encoding::PLAIN_DICTIONARY, encodings[0]);
-    ASSERT_EQ(Encoding::PLAIN, encodings[1]);
-    ASSERT_EQ(Encoding::RLE, encodings[2]);
-  } else {
-    ASSERT_EQ(Encoding::PLAIN, encodings[0]);
-    ASSERT_EQ(Encoding::RLE, encodings[1]);
-  }
+TYPED_TEST(TestPrimitiveWriter, DictionaryFallbackVersion2_0) {
+  this->TestDictionaryFallbackEncoding(ParquetVersion::PARQUET_2_0);
 }
 
 // PARQUET-719
@@ -581,6 +623,52 @@ TEST_F(TestByteArrayValuesWriter, CheckDefaultStats) {
   ASSERT_TRUE(this->metadata_is_stats_set());
 }
 
+TEST(TestColumnWriter, RepeatedListsUpdateSpacedBug) {
+  // In ARROW-3930 we discovered a bug when writing from Arrow when we had data
+  // that looks like this:
+  //
+  // [null, [0, 1, null, 2, 3, 4, null]]
+
+  // Create schema
+  NodePtr item = schema::Int32("item");  // optional item
+  NodePtr list(GroupNode::Make("b", Repetition::REPEATED, {item}, LogicalType::LIST));
+  NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list}));  // optional list
+  std::vector<NodePtr> fields = {bag};
+  NodePtr root = GroupNode::Make("schema", Repetition::REPEATED, fields);
+
+  SchemaDescriptor schema;
+  schema.Init(root);
+
+  InMemoryOutputStream sink;
+  auto props = WriterProperties::Builder().build();
+
+  auto metadata = ColumnChunkMetaDataBuilder::Make(props, schema.Column(0));
+  std::unique_ptr<PageWriter> pager =
+      PageWriter::Open(&sink, Compression::UNCOMPRESSED, metadata.get());
+  std::shared_ptr<ColumnWriter> writer =
+      ColumnWriter::Make(metadata.get(), std::move(pager), props.get());
+  auto typed_writer = std::static_pointer_cast<TypedColumnWriter<Int32Type>>(writer);
+
+  std::vector<int16_t> def_levels = {1, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3};
+  std::vector<int16_t> rep_levels = {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  std::vector<int32_t> values = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+
+  // Write the values into uninitialized memory
+  std::shared_ptr<Buffer> values_buffer;
+  ASSERT_OK(::arrow::AllocateBuffer(64, &values_buffer));
+  memcpy(values_buffer->mutable_data(), values.data(), 13 * sizeof(int32_t));
+  auto values_data = reinterpret_cast<const int32_t*>(values_buffer->data());
+
+  std::shared_ptr<Buffer> valid_bits;
+  ASSERT_OK(::arrow::BitUtil::BytesToBits({1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1},
+                                          ::arrow::default_memory_pool(), &valid_bits));
+
+  // valgrind will warn about out of bounds access into def_levels_data
+  typed_writer->WriteBatchSpaced(14, def_levels.data(), rep_levels.data(),
+                                 valid_bits->data(), 0, values_data);
+  writer->Close();
+}
+
 void GenerateLevels(int min_repeat_factor, int max_repeat_factor, int max_level,
                     std::vector<int16_t>& input_levels) {
   // for each repetition count upto max_repeat_factor
diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index a45613f1b982c..0919a3f1d7a65 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -17,23 +17,32 @@
 
 #include "parquet/column_writer.h"
 
+#include <algorithm>
 #include <cstdint>
 #include <memory>
 #include <utility>
 
+#include "arrow/status.h"
+#include "arrow/util/bit-stream-utils.h"
 #include "arrow/util/bit-util.h"
+#include "arrow/util/checked_cast.h"
 #include "arrow/util/compression.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/rle-encoding.h"
 
-#include "parquet/encoding-internal.h"
+#include "parquet/metadata.h"
 #include "parquet/properties.h"
 #include "parquet/statistics.h"
 #include "parquet/thrift.h"
+#include "parquet/types.h"
 #include "parquet/util/memory.h"
 
 namespace parquet {
 
+namespace BitUtil = ::arrow::BitUtil;
+
+using ::arrow::internal::checked_cast;
+
 using BitWriter = ::arrow::BitUtil::BitWriter;
 using RleEncoder = ::arrow::util::RleEncoder;
 
@@ -141,6 +150,7 @@ class SerializedPageWriter : public PageWriter {
         total_uncompressed_size_(0),
         total_compressed_size_(0) {
     compressor_ = GetCodecFromArrow(codec);
+    thrift_serializer_.reset(new ThriftSerializer);
   }
 
   int64_t WriteDictionaryPage(const DictionaryPage& page) override {
@@ -171,8 +181,7 @@ class SerializedPageWriter : public PageWriter {
     if (dictionary_page_offset_ == 0) {
       dictionary_page_offset_ = start_pos;
     }
-    int64_t header_size =
-        SerializeThriftMsg(&page_header, sizeof(format::PageHeader), sink_);
+    int64_t header_size = thrift_serializer_->Serialize(&page_header, sink_);
     sink_->Write(compressed_data->data(), compressed_data->size());
 
     total_uncompressed_size_ += uncompressed_size + header_size;
@@ -237,8 +246,7 @@ class SerializedPageWriter : public PageWriter {
       data_page_offset_ = start_pos;
     }
 
-    int64_t header_size =
-        SerializeThriftMsg(&page_header, sizeof(format::PageHeader), sink_);
+    int64_t header_size = thrift_serializer_->Serialize(&page_header, sink_);
     sink_->Write(compressed_data->data(), compressed_data->size());
 
     total_uncompressed_size_ += uncompressed_size + header_size;
@@ -270,6 +278,8 @@ class SerializedPageWriter : public PageWriter {
   int64_t total_uncompressed_size_;
   int64_t total_compressed_size_;
 
+  std::unique_ptr<ThriftSerializer> thrift_serializer_;
+
   // Compression codec to use.
   std::unique_ptr<::arrow::util::Codec> compressor_;
 };
@@ -353,7 +363,6 @@ ColumnWriter::ColumnWriter(ColumnChunkMetaDataBuilder* metadata,
       encoding_(encoding),
       properties_(properties),
       allocator_(properties->memory_pool()),
-      pool_(properties->memory_pool()),
       num_buffered_values_(0),
       num_buffered_encoded_values_(0),
       rows_written_(0),
@@ -534,24 +543,12 @@ void ColumnWriter::FlushBufferedDataPages() {
 template <typename Type>
 TypedColumnWriter<Type>::TypedColumnWriter(ColumnChunkMetaDataBuilder* metadata,
                                            std::unique_ptr<PageWriter> pager,
+                                           const bool use_dictionary,
                                            Encoding::type encoding,
                                            const WriterProperties* properties)
-    : ColumnWriter(metadata, std::move(pager),
-                   (encoding == Encoding::PLAIN_DICTIONARY ||
-                    encoding == Encoding::RLE_DICTIONARY),
-                   encoding, properties) {
-  switch (encoding) {
-    case Encoding::PLAIN:
-      current_encoder_.reset(new PlainEncoder<Type>(descr_, properties->memory_pool()));
-      break;
-    case Encoding::PLAIN_DICTIONARY:
-    case Encoding::RLE_DICTIONARY:
-      current_encoder_.reset(
-          new DictEncoder<Type>(descr_, &pool_, properties->memory_pool()));
-      break;
-    default:
-      ParquetException::NYI("Selected encoding is not supported");
-  }
+    : ColumnWriter(metadata, std::move(pager), use_dictionary, encoding, properties) {
+  current_encoder_ = MakeEncoder(Type::type_num, encoding, use_dictionary, descr_,
+                                 properties->memory_pool());
 
   if (properties->statistics_enabled(descr_->path()) &&
       (SortOrder::UNKNOWN != descr_->sort_order())) {
@@ -564,29 +561,33 @@ TypedColumnWriter<Type>::TypedColumnWriter(ColumnChunkMetaDataBuilder* metadata,
 // Fallback to PLAIN if dictionary page limit is reached.
 template <typename Type>
 void TypedColumnWriter<Type>::CheckDictionarySizeLimit() {
-  auto dict_encoder = static_cast<DictEncoder<Type>*>(current_encoder_.get());
+  // We have to dynamic cast here because TypedEncoder<Type> as some compilers
+  // don't want to cast through virtual inheritance
+  auto dict_encoder = dynamic_cast<DictEncoder<Type>*>(current_encoder_.get());
   if (dict_encoder->dict_encoded_size() >= properties_->dictionary_pagesize_limit()) {
     WriteDictionaryPage();
     // Serialize the buffered Dictionary Indicies
     FlushBufferedDataPages();
     fallback_ = true;
     // Only PLAIN encoding is supported for fallback in V1
-    current_encoder_.reset(new PlainEncoder<Type>(descr_, properties_->memory_pool()));
+    current_encoder_ = MakeEncoder(Type::type_num, Encoding::PLAIN, false, descr_,
+                                   properties_->memory_pool());
     encoding_ = Encoding::PLAIN;
   }
 }
 
 template <typename Type>
 void TypedColumnWriter<Type>::WriteDictionaryPage() {
-  auto dict_encoder = static_cast<DictEncoder<Type>*>(current_encoder_.get());
+  // We have to dynamic cast here because TypedEncoder<Type> as some compilers
+  // don't want to cast through virtual inheritance
+  auto dict_encoder = dynamic_cast<DictEncoder<Type>*>(current_encoder_.get());
+  DCHECK(dict_encoder);
   std::shared_ptr<ResizableBuffer> buffer =
       AllocateBuffer(properties_->memory_pool(), dict_encoder->dict_encoded_size());
   dict_encoder->WriteDict(buffer->mutable_data());
-  // TODO Get rid of this deep call
-  dict_encoder->mem_pool()->FreeAll();
 
   DictionaryPage page(buffer, dict_encoder->num_entries(),
-                      properties_->dictionary_index_encoding());
+                      properties_->dictionary_page_encoding());
   total_bytes_written_ += pager_->WriteDictionaryPage(page);
 }
 
@@ -619,36 +620,37 @@ std::shared_ptr<ColumnWriter> ColumnWriter::Make(ColumnChunkMetaDataBuilder* met
                                                  std::unique_ptr<PageWriter> pager,
                                                  const WriterProperties* properties) {
   const ColumnDescriptor* descr = metadata->descr();
+  const bool use_dictionary = properties->dictionary_enabled(descr->path()) &&
+                              descr->physical_type() != Type::BOOLEAN;
   Encoding::type encoding = properties->encoding(descr->path());
-  if (properties->dictionary_enabled(descr->path()) &&
-      descr->physical_type() != Type::BOOLEAN) {
-    encoding = properties->dictionary_page_encoding();
+  if (use_dictionary) {
+    encoding = properties->dictionary_index_encoding();
   }
   switch (descr->physical_type()) {
     case Type::BOOLEAN:
-      return std::make_shared<BoolWriter>(metadata, std::move(pager), encoding,
-                                          properties);
+      return std::make_shared<BoolWriter>(metadata, std::move(pager), use_dictionary,
+                                          encoding, properties);
     case Type::INT32:
-      return std::make_shared<Int32Writer>(metadata, std::move(pager), encoding,
-                                           properties);
+      return std::make_shared<Int32Writer>(metadata, std::move(pager), use_dictionary,
+                                           encoding, properties);
     case Type::INT64:
-      return std::make_shared<Int64Writer>(metadata, std::move(pager), encoding,
-                                           properties);
+      return std::make_shared<Int64Writer>(metadata, std::move(pager), use_dictionary,
+                                           encoding, properties);
     case Type::INT96:
-      return std::make_shared<Int96Writer>(metadata, std::move(pager), encoding,
-                                           properties);
+      return std::make_shared<Int96Writer>(metadata, std::move(pager), use_dictionary,
+                                           encoding, properties);
     case Type::FLOAT:
-      return std::make_shared<FloatWriter>(metadata, std::move(pager), encoding,
-                                           properties);
+      return std::make_shared<FloatWriter>(metadata, std::move(pager), use_dictionary,
+                                           encoding, properties);
     case Type::DOUBLE:
-      return std::make_shared<DoubleWriter>(metadata, std::move(pager), encoding,
-                                            properties);
+      return std::make_shared<DoubleWriter>(metadata, std::move(pager), use_dictionary,
+                                            encoding, properties);
     case Type::BYTE_ARRAY:
-      return std::make_shared<ByteArrayWriter>(metadata, std::move(pager), encoding,
-                                               properties);
+      return std::make_shared<ByteArrayWriter>(metadata, std::move(pager), use_dictionary,
+                                               encoding, properties);
     case Type::FIXED_LEN_BYTE_ARRAY:
-      return std::make_shared<FixedLenByteArrayWriter>(metadata, std::move(pager),
-                                                       encoding, properties);
+      return std::make_shared<FixedLenByteArrayWriter>(
+          metadata, std::move(pager), use_dictionary, encoding, properties);
     default:
       ParquetException::NYI("type reader not implemented");
   }
@@ -721,7 +723,7 @@ inline int64_t TypedColumnWriter<DType>::WriteMiniBatch(int64_t num_values,
 
 template <typename DType>
 inline int64_t TypedColumnWriter<DType>::WriteMiniBatchSpaced(
-    int64_t num_values, const int16_t* def_levels, const int16_t* rep_levels,
+    int64_t num_levels, const int16_t* def_levels, const int16_t* rep_levels,
     const uint8_t* valid_bits, int64_t valid_bits_offset, const T* values,
     int64_t* num_spaced_written) {
   int64_t values_to_write = 0;
@@ -733,7 +735,7 @@ inline int64_t TypedColumnWriter<DType>::WriteMiniBatchSpaced(
     if (descr_->schema_node()->is_optional()) {
       min_spaced_def_level--;
     }
-    for (int64_t i = 0; i < num_values; ++i) {
+    for (int64_t i = 0; i < num_levels; ++i) {
       if (def_levels[i] == descr_->max_definition_level()) {
         ++values_to_write;
       }
@@ -742,27 +744,27 @@ inline int64_t TypedColumnWriter<DType>::WriteMiniBatchSpaced(
       }
     }
 
-    WriteDefinitionLevels(num_values, def_levels);
+    WriteDefinitionLevels(num_levels, def_levels);
   } else {
     // Required field, write all values
-    values_to_write = num_values;
-    spaced_values_to_write = num_values;
+    values_to_write = num_levels;
+    spaced_values_to_write = num_levels;
   }
 
   // Not present for non-repeated fields
   if (descr_->max_repetition_level() > 0) {
     // A row could include more than one value
     // Count the occasions where we start a new row
-    for (int64_t i = 0; i < num_values; ++i) {
+    for (int64_t i = 0; i < num_levels; ++i) {
       if (rep_levels[i] == 0) {
         rows_written_++;
       }
     }
 
-    WriteRepetitionLevels(num_values, rep_levels);
+    WriteRepetitionLevels(num_levels, rep_levels);
   } else {
     // Each value is exactly one row
-    rows_written_ += static_cast<int>(num_values);
+    rows_written_ += static_cast<int>(num_levels);
   }
 
   if (descr_->schema_node()->is_optional()) {
@@ -774,10 +776,10 @@ inline int64_t TypedColumnWriter<DType>::WriteMiniBatchSpaced(
 
   if (page_statistics_ != nullptr) {
     page_statistics_->UpdateSpaced(values, valid_bits, valid_bits_offset, values_to_write,
-                                   num_values - values_to_write);
+                                   spaced_values_to_write - values_to_write);
   }
 
-  num_buffered_values_ += num_values;
+  num_buffered_values_ += num_levels;
   num_buffered_encoded_values_ += values_to_write;
 
   if (current_encoder_->EstimatedDataEncodedSize() >= properties_->data_pagesize()) {
@@ -844,7 +846,8 @@ void TypedColumnWriter<DType>::WriteBatchSpaced(
 
 template <typename DType>
 void TypedColumnWriter<DType>::WriteValues(int64_t num_values, const T* values) {
-  current_encoder_->Put(values, static_cast<int>(num_values));
+  dynamic_cast<ValueEncoderType*>(current_encoder_.get())
+      ->Put(values, static_cast<int>(num_values));
 }
 
 template <typename DType>
@@ -852,8 +855,8 @@ void TypedColumnWriter<DType>::WriteValuesSpaced(int64_t num_values,
                                                  const uint8_t* valid_bits,
                                                  int64_t valid_bits_offset,
                                                  const T* values) {
-  current_encoder_->PutSpaced(values, static_cast<int>(num_values), valid_bits,
-                              valid_bits_offset);
+  dynamic_cast<ValueEncoderType*>(current_encoder_.get())
+      ->PutSpaced(values, static_cast<int>(num_values), valid_bits, valid_bits_offset);
 }
 
 template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter<BooleanType>;
diff --git a/cpp/src/parquet/column_writer.h b/cpp/src/parquet/column_writer.h
index 41bc7bd3bf2fe..254bf0dd02e50 100644
--- a/cpp/src/parquet/column_writer.h
+++ b/cpp/src/parquet/column_writer.h
@@ -15,20 +15,20 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef PARQUET_COLUMN_WRITER_H
-#define PARQUET_COLUMN_WRITER_H
+#pragma once
 
+#include <cstdint>
 #include <memory>
 #include <vector>
 
+#include "arrow/memory_pool.h"
+
 #include "parquet/column_page.h"
 #include "parquet/encoding.h"
-#include "parquet/metadata.h"
-#include "parquet/properties.h"
+#include "parquet/exception.h"
 #include "parquet/schema.h"
 #include "parquet/statistics.h"
 #include "parquet/types.h"
-#include "parquet/util/macros.h"
 #include "parquet/util/memory.h"
 #include "parquet/util/visibility.h"
 
@@ -46,6 +46,9 @@ class RleEncoder;
 
 namespace parquet {
 
+class ColumnChunkMetaDataBuilder;
+class WriterProperties;
+
 class PARQUET_EXPORT LevelEncoder {
  public:
   LevelEncoder();
@@ -186,7 +189,6 @@ class PARQUET_EXPORT ColumnWriter {
   LevelEncoder level_encoder_;
 
   ::arrow::MemoryPool* allocator_;
-  ChunkedAllocator pool_;
 
   // The total number of values stored in the data page. This is the maximum of
   // the number of encoded definition levels or encoded values. For
@@ -237,8 +239,8 @@ class PARQUET_TEMPLATE_CLASS_EXPORT TypedColumnWriter : public ColumnWriter {
   typedef typename DType::c_type T;
 
   TypedColumnWriter(ColumnChunkMetaDataBuilder* metadata,
-                    std::unique_ptr<PageWriter> pager, Encoding::type encoding,
-                    const WriterProperties* properties);
+                    std::unique_ptr<PageWriter> pager, const bool use_dictionary,
+                    Encoding::type encoding, const WriterProperties* properties);
 
   // Write a batch of repetition levels, definition levels, and values to the
   // column.
@@ -299,13 +301,13 @@ class PARQUET_TEMPLATE_CLASS_EXPORT TypedColumnWriter : public ColumnWriter {
                                int64_t valid_bits_offset, const T* values,
                                int64_t* num_spaced_written);
 
-  typedef Encoder<DType> EncoderType;
-
   // Write values to a temporary buffer before they are encoded into pages
   void WriteValues(int64_t num_values, const T* values);
   void WriteValuesSpaced(int64_t num_values, const uint8_t* valid_bits,
                          int64_t valid_bits_offset, const T* values);
-  std::unique_ptr<EncoderType> current_encoder_;
+
+  using ValueEncoderType = typename EncodingTraits<DType>::Encoder;
+  std::unique_ptr<Encoder> current_encoder_;
 
   typedef TypedRowGroupStatistics<DType> TypedStats;
   std::unique_ptr<TypedStats> page_statistics_;
@@ -331,5 +333,3 @@ PARQUET_EXTERN_TEMPLATE TypedColumnWriter<ByteArrayType>;
 PARQUET_EXTERN_TEMPLATE TypedColumnWriter<FLBAType>;
 
 }  // namespace parquet
-
-#endif  // PARQUET_COLUMN_READER_H
diff --git a/cpp/src/parquet/encoding-benchmark.cc b/cpp/src/parquet/encoding-benchmark.cc
index 364cdba15a252..8031aeb7ce168 100644
--- a/cpp/src/parquet/encoding-benchmark.cc
+++ b/cpp/src/parquet/encoding-benchmark.cc
@@ -17,7 +17,8 @@
 
 #include "benchmark/benchmark.h"
 
-#include "parquet/encoding-internal.h"
+#include "parquet/encoding.h"
+#include "parquet/schema.h"
 #include "parquet/util/memory.h"
 
 using arrow::default_memory_pool;
@@ -27,39 +28,39 @@ namespace parquet {
 
 using schema::PrimitiveNode;
 
-namespace benchmark {
-
 std::shared_ptr<ColumnDescriptor> Int64Schema(Repetition::type repetition) {
   auto node = PrimitiveNode::Make("int64", repetition, Type::INT64);
   return std::make_shared<ColumnDescriptor>(node, repetition != Repetition::REQUIRED,
                                             repetition == Repetition::REPEATED);
 }
 
-static void BM_PlainEncodingBoolean(::benchmark::State& state) {
-  std::vector<bool> values(state.range(0), 64);
-  PlainEncoder<BooleanType> encoder(nullptr);
+static void BM_PlainEncodingBoolean(benchmark::State& state) {
+  std::vector<bool> values(state.range(0), true);
+  auto encoder = MakeEncoder(Type::BOOLEAN, Encoding::PLAIN);
+  auto typed_encoder = dynamic_cast<BooleanEncoder*>(encoder.get());
 
   while (state.KeepRunning()) {
-    encoder.Put(values, static_cast<int>(values.size()));
-    encoder.FlushValues();
+    typed_encoder->Put(values, static_cast<int>(values.size()));
+    typed_encoder->FlushValues();
   }
   state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool));
 }
 
 BENCHMARK(BM_PlainEncodingBoolean)->Range(1024, 65536);
 
-static void BM_PlainDecodingBoolean(::benchmark::State& state) {
-  std::vector<bool> values(state.range(0), 64);
+static void BM_PlainDecodingBoolean(benchmark::State& state) {
+  std::vector<bool> values(state.range(0), true);
   bool* output = new bool[state.range(0)];
-  PlainEncoder<BooleanType> encoder(nullptr);
-  encoder.Put(values, static_cast<int>(values.size()));
-  std::shared_ptr<Buffer> buf = encoder.FlushValues();
+  auto encoder = MakeEncoder(Type::BOOLEAN, Encoding::PLAIN);
+  auto typed_encoder = dynamic_cast<BooleanEncoder*>(encoder.get());
+  typed_encoder->Put(values, static_cast<int>(values.size()));
+  std::shared_ptr<Buffer> buf = encoder->FlushValues();
 
   while (state.KeepRunning()) {
-    PlainDecoder<BooleanType> decoder(nullptr);
-    decoder.SetData(static_cast<int>(values.size()), buf->data(),
-                    static_cast<int>(buf->size()));
-    decoder.Decode(output, static_cast<int>(values.size()));
+    auto decoder = MakeTypedDecoder<BooleanType>(Encoding::PLAIN);
+    decoder->SetData(static_cast<int>(values.size()), buf->data(),
+                     static_cast<int>(buf->size()));
+    decoder->Decode(output, static_cast<int>(values.size()));
   }
 
   state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool));
@@ -68,30 +69,29 @@ static void BM_PlainDecodingBoolean(::benchmark::State& state) {
 
 BENCHMARK(BM_PlainDecodingBoolean)->Range(1024, 65536);
 
-static void BM_PlainEncodingInt64(::benchmark::State& state) {
+static void BM_PlainEncodingInt64(benchmark::State& state) {
   std::vector<int64_t> values(state.range(0), 64);
-  PlainEncoder<Int64Type> encoder(nullptr);
-
+  auto encoder = MakeTypedEncoder<Int64Type>(Encoding::PLAIN);
   while (state.KeepRunning()) {
-    encoder.Put(values.data(), static_cast<int>(values.size()));
-    encoder.FlushValues();
+    encoder->Put(values.data(), static_cast<int>(values.size()));
+    encoder->FlushValues();
   }
   state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int64_t));
 }
 
 BENCHMARK(BM_PlainEncodingInt64)->Range(1024, 65536);
 
-static void BM_PlainDecodingInt64(::benchmark::State& state) {
+static void BM_PlainDecodingInt64(benchmark::State& state) {
   std::vector<int64_t> values(state.range(0), 64);
-  PlainEncoder<Int64Type> encoder(nullptr);
-  encoder.Put(values.data(), static_cast<int>(values.size()));
-  std::shared_ptr<Buffer> buf = encoder.FlushValues();
+  auto encoder = MakeTypedEncoder<Int64Type>(Encoding::PLAIN);
+  encoder->Put(values.data(), static_cast<int>(values.size()));
+  std::shared_ptr<Buffer> buf = encoder->FlushValues();
 
   while (state.KeepRunning()) {
-    PlainDecoder<Int64Type> decoder(nullptr);
-    decoder.SetData(static_cast<int>(values.size()), buf->data(),
-                    static_cast<int>(buf->size()));
-    decoder.Decode(values.data(), static_cast<int>(values.size()));
+    auto decoder = MakeTypedDecoder<Int64Type>(Encoding::PLAIN);
+    decoder->SetData(static_cast<int>(values.size()), buf->data(),
+                     static_cast<int>(buf->size()));
+    decoder->Decode(values.data(), static_cast<int>(values.size()));
   }
   state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int64_t));
 }
@@ -100,45 +100,47 @@ BENCHMARK(BM_PlainDecodingInt64)->Range(1024, 65536);
 
 template <typename Type>
 static void DecodeDict(std::vector<typename Type::c_type>& values,
-                       ::benchmark::State& state) {
+                       benchmark::State& state) {
   typedef typename Type::c_type T;
   int num_values = static_cast<int>(values.size());
 
-  ChunkedAllocator pool;
   MemoryPool* allocator = default_memory_pool();
   std::shared_ptr<ColumnDescriptor> descr = Int64Schema(Repetition::REQUIRED);
 
-  DictEncoder<Type> encoder(descr.get(), &pool, allocator);
-  for (int i = 0; i < num_values; ++i) {
-    encoder.Put(values[i]);
-  }
+  auto base_encoder =
+      MakeEncoder(Type::type_num, Encoding::PLAIN, true, descr.get(), allocator);
+  auto encoder =
+      dynamic_cast<typename EncodingTraits<Type>::Encoder*>(base_encoder.get());
+  auto dict_traits = dynamic_cast<DictEncoder<Type>*>(base_encoder.get());
+  encoder->Put(values.data(), num_values);
 
   std::shared_ptr<ResizableBuffer> dict_buffer =
-      AllocateBuffer(allocator, encoder.dict_encoded_size());
+      AllocateBuffer(allocator, dict_traits->dict_encoded_size());
 
   std::shared_ptr<ResizableBuffer> indices =
-      AllocateBuffer(allocator, encoder.EstimatedDataEncodedSize());
+      AllocateBuffer(allocator, encoder->EstimatedDataEncodedSize());
 
-  encoder.WriteDict(dict_buffer->mutable_data());
-  int actual_bytes =
-      encoder.WriteIndices(indices->mutable_data(), static_cast<int>(indices->size()));
+  dict_traits->WriteDict(dict_buffer->mutable_data());
+  int actual_bytes = dict_traits->WriteIndices(indices->mutable_data(),
+                                               static_cast<int>(indices->size()));
 
   PARQUET_THROW_NOT_OK(indices->Resize(actual_bytes));
 
   while (state.KeepRunning()) {
-    PlainDecoder<Type> dict_decoder(descr.get());
-    dict_decoder.SetData(encoder.num_entries(), dict_buffer->data(),
-                         static_cast<int>(dict_buffer->size()));
-    DictionaryDecoder<Type> decoder(descr.get());
-    decoder.SetDict(&dict_decoder);
-    decoder.SetData(num_values, indices->data(), static_cast<int>(indices->size()));
-    decoder.Decode(values.data(), num_values);
+    auto dict_decoder = MakeTypedDecoder<Type>(Encoding::PLAIN, descr.get());
+    dict_decoder->SetData(dict_traits->num_entries(), dict_buffer->data(),
+                          static_cast<int>(dict_buffer->size()));
+
+    auto decoder = MakeDictDecoder<Type>(descr.get());
+    decoder->SetDict(dict_decoder.get());
+    decoder->SetData(num_values, indices->data(), static_cast<int>(indices->size()));
+    decoder->Decode(values.data(), num_values);
   }
 
   state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(T));
 }
 
-static void BM_DictDecodingInt64_repeats(::benchmark::State& state) {
+static void BM_DictDecodingInt64_repeats(benchmark::State& state) {
   typedef Int64Type Type;
   typedef typename Type::c_type T;
 
@@ -148,7 +150,7 @@ static void BM_DictDecodingInt64_repeats(::benchmark::State& state) {
 
 BENCHMARK(BM_DictDecodingInt64_repeats)->Range(1024, 65536);
 
-static void BM_DictDecodingInt64_literals(::benchmark::State& state) {
+static void BM_DictDecodingInt64_literals(benchmark::State& state) {
   typedef Int64Type Type;
   typedef typename Type::c_type T;
 
@@ -161,6 +163,4 @@ static void BM_DictDecodingInt64_literals(::benchmark::State& state) {
 
 BENCHMARK(BM_DictDecodingInt64_literals)->Range(1024, 65536);
 
-}  // namespace benchmark
-
 }  // namespace parquet
diff --git a/cpp/src/parquet/encoding-internal.h b/cpp/src/parquet/encoding-internal.h
deleted file mode 100644
index b06ad41cc52c2..0000000000000
--- a/cpp/src/parquet/encoding-internal.h
+++ /dev/null
@@ -1,857 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef PARQUET_ENCODING_INTERNAL_H
-#define PARQUET_ENCODING_INTERNAL_H
-
-#include <algorithm>
-#include <cstdint>
-#include <limits>
-#include <memory>
-#include <vector>
-
-#include "arrow/util/bit-stream-utils.h"
-#include "arrow/util/bit-util.h"
-#include "arrow/util/hashing.h"
-#include "arrow/util/macros.h"
-#include "arrow/util/rle-encoding.h"
-
-#include "parquet/encoding.h"
-#include "parquet/exception.h"
-#include "parquet/schema.h"
-#include "parquet/types.h"
-#include "parquet/util/memory.h"
-
-namespace parquet {
-
-namespace BitUtil = ::arrow::BitUtil;
-
-class ColumnDescriptor;
-
-// ----------------------------------------------------------------------
-// Encoding::PLAIN decoder implementation
-
-template <typename DType>
-class PlainDecoder : public Decoder<DType> {
- public:
-  typedef typename DType::c_type T;
-  using Decoder<DType>::num_values_;
-
-  explicit PlainDecoder(const ColumnDescriptor* descr)
-      : Decoder<DType>(descr, Encoding::PLAIN), data_(nullptr), len_(0) {
-    if (descr_ && descr_->physical_type() == Type::FIXED_LEN_BYTE_ARRAY) {
-      type_length_ = descr_->type_length();
-    } else {
-      type_length_ = -1;
-    }
-  }
-
-  virtual void SetData(int num_values, const uint8_t* data, int len) {
-    num_values_ = num_values;
-    data_ = data;
-    len_ = len;
-  }
-
-  virtual int Decode(T* buffer, int max_values);
-
- private:
-  using Decoder<DType>::descr_;
-  const uint8_t* data_;
-  int len_;
-  int type_length_;
-};
-
-// Decode routine templated on C++ type rather than type enum
-template <typename T>
-inline int DecodePlain(const uint8_t* data, int64_t data_size, int num_values,
-                       int type_length, T* out) {
-  int bytes_to_decode = num_values * static_cast<int>(sizeof(T));
-  if (data_size < bytes_to_decode) {
-    ParquetException::EofException();
-  }
-  memcpy(out, data, bytes_to_decode);
-  return bytes_to_decode;
-}
-
-// Template specialization for BYTE_ARRAY. The written values do not own their
-// own data.
-template <>
-inline int DecodePlain<ByteArray>(const uint8_t* data, int64_t data_size, int num_values,
-                                  int type_length, ByteArray* out) {
-  int bytes_decoded = 0;
-  int increment;
-  for (int i = 0; i < num_values; ++i) {
-    uint32_t len = out[i].len = *reinterpret_cast<const uint32_t*>(data);
-    increment = static_cast<int>(sizeof(uint32_t) + len);
-    if (data_size < increment) ParquetException::EofException();
-    out[i].ptr = data + sizeof(uint32_t);
-    data += increment;
-    data_size -= increment;
-    bytes_decoded += increment;
-  }
-  return bytes_decoded;
-}
-
-// Template specialization for FIXED_LEN_BYTE_ARRAY. The written values do not
-// own their own data.
-template <>
-inline int DecodePlain<FixedLenByteArray>(const uint8_t* data, int64_t data_size,
-                                          int num_values, int type_length,
-                                          FixedLenByteArray* out) {
-  int bytes_to_decode = type_length * num_values;
-  if (data_size < bytes_to_decode) {
-    ParquetException::EofException();
-  }
-  for (int i = 0; i < num_values; ++i) {
-    out[i].ptr = data;
-    data += type_length;
-    data_size -= type_length;
-  }
-  return bytes_to_decode;
-}
-
-template <typename DType>
-inline int PlainDecoder<DType>::Decode(T* buffer, int max_values) {
-  max_values = std::min(max_values, num_values_);
-  int bytes_consumed = DecodePlain<T>(data_, len_, max_values, type_length_, buffer);
-  data_ += bytes_consumed;
-  len_ -= bytes_consumed;
-  num_values_ -= max_values;
-  return max_values;
-}
-
-template <>
-class PlainDecoder<BooleanType> : public Decoder<BooleanType> {
- public:
-  explicit PlainDecoder(const ColumnDescriptor* descr)
-      : Decoder<BooleanType>(descr, Encoding::PLAIN) {}
-
-  virtual void SetData(int num_values, const uint8_t* data, int len) {
-    num_values_ = num_values;
-    bit_reader_ = BitUtil::BitReader(data, len);
-  }
-
-  // Two flavors of bool decoding
-  int Decode(uint8_t* buffer, int max_values) {
-    max_values = std::min(max_values, num_values_);
-    bool val;
-    ::arrow::internal::BitmapWriter bit_writer(buffer, 0, max_values);
-    for (int i = 0; i < max_values; ++i) {
-      if (!bit_reader_.GetValue(1, &val)) {
-        ParquetException::EofException();
-      }
-      if (val) {
-        bit_writer.Set();
-      }
-      bit_writer.Next();
-    }
-    bit_writer.Finish();
-    num_values_ -= max_values;
-    return max_values;
-  }
-
-  virtual int Decode(bool* buffer, int max_values) {
-    max_values = std::min(max_values, num_values_);
-    if (bit_reader_.GetBatch(1, buffer, max_values) != max_values) {
-      ParquetException::EofException();
-    }
-    num_values_ -= max_values;
-    return max_values;
-  }
-
- private:
-  BitUtil::BitReader bit_reader_;
-};
-
-// ----------------------------------------------------------------------
-// Encoding::PLAIN encoder implementation
-
-template <typename DType>
-class PlainEncoder : public Encoder<DType> {
- public:
-  typedef typename DType::c_type T;
-
-  explicit PlainEncoder(const ColumnDescriptor* descr,
-                        ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
-      : Encoder<DType>(descr, Encoding::PLAIN, pool) {
-    values_sink_.reset(new InMemoryOutputStream(pool));
-  }
-
-  int64_t EstimatedDataEncodedSize() override { return values_sink_->Tell(); }
-
-  std::shared_ptr<Buffer> FlushValues() override;
-  void Put(const T* src, int num_values) override;
-
- protected:
-  std::unique_ptr<InMemoryOutputStream> values_sink_;
-};
-
-template <>
-class PlainEncoder<BooleanType> : public Encoder<BooleanType> {
- public:
-  explicit PlainEncoder(const ColumnDescriptor* descr,
-                        ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
-      : Encoder<BooleanType>(descr, Encoding::PLAIN, pool),
-        bits_available_(kInMemoryDefaultCapacity * 8),
-        bits_buffer_(AllocateBuffer(pool, kInMemoryDefaultCapacity)),
-        values_sink_(new InMemoryOutputStream(pool)) {
-    bit_writer_.reset(new BitUtil::BitWriter(bits_buffer_->mutable_data(),
-                                             static_cast<int>(bits_buffer_->size())));
-  }
-
-  int64_t EstimatedDataEncodedSize() override {
-    return values_sink_->Tell() + bit_writer_->bytes_written();
-  }
-
-  std::shared_ptr<Buffer> FlushValues() override {
-    if (bits_available_ > 0) {
-      bit_writer_->Flush();
-      values_sink_->Write(bit_writer_->buffer(), bit_writer_->bytes_written());
-      bit_writer_->Clear();
-      bits_available_ = static_cast<int>(bits_buffer_->size()) * 8;
-    }
-
-    std::shared_ptr<Buffer> buffer = values_sink_->GetBuffer();
-    values_sink_.reset(new InMemoryOutputStream(this->pool_));
-    return buffer;
-  }
-
-#define PLAINDECODER_BOOLEAN_PUT(input_type, function_attributes)                 \
-  void Put(input_type src, int num_values) function_attributes {                  \
-    int bit_offset = 0;                                                           \
-    if (bits_available_ > 0) {                                                    \
-      int bits_to_write = std::min(bits_available_, num_values);                  \
-      for (int i = 0; i < bits_to_write; i++) {                                   \
-        bit_writer_->PutValue(src[i], 1);                                         \
-      }                                                                           \
-      bits_available_ -= bits_to_write;                                           \
-      bit_offset = bits_to_write;                                                 \
-                                                                                  \
-      if (bits_available_ == 0) {                                                 \
-        bit_writer_->Flush();                                                     \
-        values_sink_->Write(bit_writer_->buffer(), bit_writer_->bytes_written()); \
-        bit_writer_->Clear();                                                     \
-      }                                                                           \
-    }                                                                             \
-                                                                                  \
-    int bits_remaining = num_values - bit_offset;                                 \
-    while (bit_offset < num_values) {                                             \
-      bits_available_ = static_cast<int>(bits_buffer_->size()) * 8;               \
-                                                                                  \
-      int bits_to_write = std::min(bits_available_, bits_remaining);              \
-      for (int i = bit_offset; i < bit_offset + bits_to_write; i++) {             \
-        bit_writer_->PutValue(src[i], 1);                                         \
-      }                                                                           \
-      bit_offset += bits_to_write;                                                \
-      bits_available_ -= bits_to_write;                                           \
-      bits_remaining -= bits_to_write;                                            \
-                                                                                  \
-      if (bits_available_ == 0) {                                                 \
-        bit_writer_->Flush();                                                     \
-        values_sink_->Write(bit_writer_->buffer(), bit_writer_->bytes_written()); \
-        bit_writer_->Clear();                                                     \
-      }                                                                           \
-    }                                                                             \
-  }
-
-  PLAINDECODER_BOOLEAN_PUT(const bool*, override)
-  PLAINDECODER_BOOLEAN_PUT(const std::vector<bool>&, )
-
- protected:
-  int bits_available_;
-  std::unique_ptr<BitUtil::BitWriter> bit_writer_;
-  std::shared_ptr<ResizableBuffer> bits_buffer_;
-  std::unique_ptr<InMemoryOutputStream> values_sink_;
-};
-
-template <typename DType>
-inline std::shared_ptr<Buffer> PlainEncoder<DType>::FlushValues() {
-  std::shared_ptr<Buffer> buffer = values_sink_->GetBuffer();
-  values_sink_.reset(new InMemoryOutputStream(this->pool_));
-  return buffer;
-}
-
-template <typename DType>
-inline void PlainEncoder<DType>::Put(const T* buffer, int num_values) {
-  values_sink_->Write(reinterpret_cast<const uint8_t*>(buffer), num_values * sizeof(T));
-}
-
-template <>
-inline void PlainEncoder<ByteArrayType>::Put(const ByteArray* src, int num_values) {
-  for (int i = 0; i < num_values; ++i) {
-    // Write the result to the output stream
-    values_sink_->Write(reinterpret_cast<const uint8_t*>(&src[i].len), sizeof(uint32_t));
-    if (src[i].len > 0) {
-      DCHECK(nullptr != src[i].ptr) << "Value ptr cannot be NULL";
-    }
-    values_sink_->Write(reinterpret_cast<const uint8_t*>(src[i].ptr), src[i].len);
-  }
-}
-
-template <>
-inline void PlainEncoder<FLBAType>::Put(const FixedLenByteArray* src, int num_values) {
-  for (int i = 0; i < num_values; ++i) {
-    // Write the result to the output stream
-    if (descr_->type_length() > 0) {
-      DCHECK(nullptr != src[i].ptr) << "Value ptr cannot be NULL";
-    }
-    values_sink_->Write(reinterpret_cast<const uint8_t*>(src[i].ptr),
-                        descr_->type_length());
-  }
-}
-
-// ----------------------------------------------------------------------
-// Dictionary encoding and decoding
-
-template <typename Type>
-class DictionaryDecoder : public Decoder<Type> {
- public:
-  typedef typename Type::c_type T;
-
-  // Initializes the dictionary with values from 'dictionary'. The data in
-  // dictionary is not guaranteed to persist in memory after this call so the
-  // dictionary decoder needs to copy the data out if necessary.
-  explicit DictionaryDecoder(const ColumnDescriptor* descr,
-                             ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
-      : Decoder<Type>(descr, Encoding::RLE_DICTIONARY),
-        dictionary_(0, pool),
-        byte_array_data_(AllocateBuffer(pool, 0)) {}
-
-  // Perform type-specific initiatialization
-  void SetDict(Decoder<Type>* dictionary);
-
-  void SetData(int num_values, const uint8_t* data, int len) override {
-    num_values_ = num_values;
-    if (len == 0) return;
-    uint8_t bit_width = *data;
-    ++data;
-    --len;
-    idx_decoder_ = ::arrow::util::RleDecoder(data, len, bit_width);
-  }
-
-  int Decode(T* buffer, int max_values) override {
-    max_values = std::min(max_values, num_values_);
-    int decoded_values =
-        idx_decoder_.GetBatchWithDict(dictionary_.data(), buffer, max_values);
-    if (decoded_values != max_values) {
-      ParquetException::EofException();
-    }
-    num_values_ -= max_values;
-    return max_values;
-  }
-
-  int DecodeSpaced(T* buffer, int num_values, int null_count, const uint8_t* valid_bits,
-                   int64_t valid_bits_offset) override {
-    int decoded_values =
-        idx_decoder_.GetBatchWithDictSpaced(dictionary_.data(), buffer, num_values,
-                                            null_count, valid_bits, valid_bits_offset);
-    if (decoded_values != num_values) {
-      ParquetException::EofException();
-    }
-    return decoded_values;
-  }
-
- private:
-  using Decoder<Type>::num_values_;
-
-  // Only one is set.
-  Vector<T> dictionary_;
-
-  // Data that contains the byte array data (byte_array_dictionary_ just has the
-  // pointers).
-  std::shared_ptr<ResizableBuffer> byte_array_data_;
-
-  ::arrow::util::RleDecoder idx_decoder_;
-};
-
-template <typename Type>
-inline void DictionaryDecoder<Type>::SetDict(Decoder<Type>* dictionary) {
-  int num_dictionary_values = dictionary->values_left();
-  dictionary_.Resize(num_dictionary_values);
-  dictionary->Decode(&dictionary_[0], num_dictionary_values);
-}
-
-template <>
-inline void DictionaryDecoder<BooleanType>::SetDict(Decoder<BooleanType>* dictionary) {
-  ParquetException::NYI("Dictionary encoding is not implemented for boolean values");
-}
-
-template <>
-inline void DictionaryDecoder<ByteArrayType>::SetDict(
-    Decoder<ByteArrayType>* dictionary) {
-  int num_dictionary_values = dictionary->values_left();
-  dictionary_.Resize(num_dictionary_values);
-  dictionary->Decode(&dictionary_[0], num_dictionary_values);
-
-  int total_size = 0;
-  for (int i = 0; i < num_dictionary_values; ++i) {
-    total_size += dictionary_[i].len;
-  }
-  if (total_size > 0) {
-    PARQUET_THROW_NOT_OK(byte_array_data_->Resize(total_size, false));
-  }
-
-  int offset = 0;
-  uint8_t* bytes_data = byte_array_data_->mutable_data();
-  for (int i = 0; i < num_dictionary_values; ++i) {
-    memcpy(bytes_data + offset, dictionary_[i].ptr, dictionary_[i].len);
-    dictionary_[i].ptr = bytes_data + offset;
-    offset += dictionary_[i].len;
-  }
-}
-
-template <>
-inline void DictionaryDecoder<FLBAType>::SetDict(Decoder<FLBAType>* dictionary) {
-  int num_dictionary_values = dictionary->values_left();
-  dictionary_.Resize(num_dictionary_values);
-  dictionary->Decode(&dictionary_[0], num_dictionary_values);
-
-  int fixed_len = descr_->type_length();
-  int total_size = num_dictionary_values * fixed_len;
-
-  PARQUET_THROW_NOT_OK(byte_array_data_->Resize(total_size, false));
-  uint8_t* bytes_data = byte_array_data_->mutable_data();
-  for (int32_t i = 0, offset = 0; i < num_dictionary_values; ++i, offset += fixed_len) {
-    memcpy(bytes_data + offset, dictionary_[i].ptr, fixed_len);
-    dictionary_[i].ptr = bytes_data + offset;
-  }
-}
-
-// ----------------------------------------------------------------------
-// Dictionary encoder
-
-template <typename DType>
-struct DictEncoderTraits {
-  using c_type = typename DType::c_type;
-  using MemoTableType = ::arrow::internal::ScalarMemoTable<c_type>;
-};
-
-template <>
-struct DictEncoderTraits<ByteArrayType> {
-  using MemoTableType = ::arrow::internal::BinaryMemoTable;
-};
-
-template <>
-struct DictEncoderTraits<FLBAType> {
-  using MemoTableType = ::arrow::internal::BinaryMemoTable;
-};
-
-// Initially 1024 elements
-static constexpr int32_t INITIAL_HASH_TABLE_SIZE = 1 << 10;
-
-/// See the dictionary encoding section of https://github.com/Parquet/parquet-format.
-/// The encoding supports streaming encoding. Values are encoded as they are added while
-/// the dictionary is being constructed. At any time, the buffered values can be
-/// written out with the current dictionary size. More values can then be added to
-/// the encoder, including new dictionary entries.
-template <typename DType>
-class DictEncoder : public Encoder<DType> {
-  using MemoTableType = typename DictEncoderTraits<DType>::MemoTableType;
-
- public:
-  typedef typename DType::c_type T;
-
-  // XXX pool is unused
-  explicit DictEncoder(const ColumnDescriptor* desc, ChunkedAllocator* pool = nullptr,
-                       ::arrow::MemoryPool* allocator = ::arrow::default_memory_pool())
-      : Encoder<DType>(desc, Encoding::PLAIN_DICTIONARY, allocator),
-        allocator_(allocator),
-        pool_(pool),
-        dict_encoded_size_(0),
-        type_length_(desc->type_length()),
-        memo_table_(INITIAL_HASH_TABLE_SIZE) {}
-
-  ~DictEncoder() override { DCHECK(buffered_indices_.empty()); }
-
-  void set_type_length(int type_length) { type_length_ = type_length; }
-
-  /// Returns a conservative estimate of the number of bytes needed to encode the buffered
-  /// indices. Used to size the buffer passed to WriteIndices().
-  int64_t EstimatedDataEncodedSize() override {
-    // Note: because of the way RleEncoder::CheckBufferFull() is called, we have to
-    // reserve
-    // an extra "RleEncoder::MinBufferSize" bytes. These extra bytes won't be used
-    // but not reserving them would cause the encoder to fail.
-    return 1 +
-           ::arrow::util::RleEncoder::MaxBufferSize(
-               bit_width(), static_cast<int>(buffered_indices_.size())) +
-           ::arrow::util::RleEncoder::MinBufferSize(bit_width());
-  }
-
-  /// The minimum bit width required to encode the currently buffered indices.
-  int bit_width() const {
-    if (ARROW_PREDICT_FALSE(num_entries() == 0)) return 0;
-    if (ARROW_PREDICT_FALSE(num_entries() == 1)) return 1;
-    return BitUtil::Log2(num_entries());
-  }
-
-  /// Writes out any buffered indices to buffer preceded by the bit width of this data.
-  /// Returns the number of bytes written.
-  /// If the supplied buffer is not big enough, returns -1.
-  /// buffer must be preallocated with buffer_len bytes. Use EstimatedDataEncodedSize()
-  /// to size buffer.
-  int WriteIndices(uint8_t* buffer, int buffer_len);
-
-  int dict_encoded_size() { return dict_encoded_size_; }
-
-  /// Encode value. Note that this does not actually write any data, just
-  /// buffers the value's index to be written later.
-  inline void Put(const T& value);
-  void Put(const T* values, int num_values) override;
-
-  std::shared_ptr<Buffer> FlushValues() override {
-    std::shared_ptr<ResizableBuffer> buffer =
-        AllocateBuffer(this->allocator_, EstimatedDataEncodedSize());
-    int result_size = WriteIndices(buffer->mutable_data(),
-                                   static_cast<int>(EstimatedDataEncodedSize()));
-    PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false));
-    return buffer;
-  }
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    ::arrow::internal::BitmapReader valid_bits_reader(valid_bits, valid_bits_offset,
-                                                      num_values);
-    for (int32_t i = 0; i < num_values; i++) {
-      if (valid_bits_reader.IsSet()) {
-        Put(src[i]);
-      }
-      valid_bits_reader.Next();
-    }
-  }
-
-  /// Writes out the encoded dictionary to buffer. buffer must be preallocated to
-  /// dict_encoded_size() bytes.
-  void WriteDict(uint8_t* buffer);
-
-  ChunkedAllocator* mem_pool() { return pool_; }
-
-  /// The number of entries in the dictionary.
-  int num_entries() const { return memo_table_.size(); }
-
- private:
-  /// Clears all the indices (but leaves the dictionary).
-  void ClearIndices() { buffered_indices_.clear(); }
-
-  ::arrow::MemoryPool* allocator_;
-
-  // For ByteArray / FixedLenByteArray data. Not owned
-  ChunkedAllocator* pool_;
-
-  /// Indices that have not yet be written out by WriteIndices().
-  std::vector<int> buffered_indices_;
-
-  /// The number of bytes needed to encode the dictionary.
-  int dict_encoded_size_;
-
-  /// Size of each encoded dictionary value. -1 for variable-length types.
-  int type_length_;
-
-  MemoTableType memo_table_;
-};
-
-template <typename DType>
-void DictEncoder<DType>::Put(const T* src, int num_values) {
-  for (int32_t i = 0; i < num_values; i++) {
-    Put(src[i]);
-  }
-}
-
-template <typename DType>
-inline void DictEncoder<DType>::Put(const T& v) {
-  // Put() implementation for primitive types
-  auto on_found = [](int32_t memo_index) {};
-  auto on_not_found = [this](int32_t memo_index) {
-    dict_encoded_size_ += static_cast<int>(sizeof(T));
-  };
-
-  auto memo_index = memo_table_.GetOrInsert(v, on_found, on_not_found);
-  buffered_indices_.push_back(memo_index);
-}
-
-template <>
-inline void DictEncoder<ByteArrayType>::Put(const ByteArray& v) {
-  static const uint8_t empty[] = {0};
-
-  auto on_found = [](int32_t memo_index) {};
-  auto on_not_found = [&](int32_t memo_index) {
-    dict_encoded_size_ += static_cast<int>(v.len + sizeof(uint32_t));
-  };
-
-  DCHECK(v.ptr != nullptr || v.len == 0);
-  const void* ptr = (v.ptr != nullptr) ? v.ptr : empty;
-  auto memo_index =
-      memo_table_.GetOrInsert(ptr, static_cast<int32_t>(v.len), on_found, on_not_found);
-  buffered_indices_.push_back(memo_index);
-}
-
-template <>
-inline void DictEncoder<FLBAType>::Put(const FixedLenByteArray& v) {
-  static const uint8_t empty[] = {0};
-
-  auto on_found = [](int32_t memo_index) {};
-  auto on_not_found = [this](int32_t memo_index) { dict_encoded_size_ += type_length_; };
-
-  DCHECK(v.ptr != nullptr || type_length_ == 0);
-  const void* ptr = (v.ptr != nullptr) ? v.ptr : empty;
-  auto memo_index = memo_table_.GetOrInsert(ptr, type_length_, on_found, on_not_found);
-  buffered_indices_.push_back(memo_index);
-}
-
-template <typename DType>
-inline void DictEncoder<DType>::WriteDict(uint8_t* buffer) {
-  // For primitive types, only a memcpy
-  DCHECK_EQ(static_cast<size_t>(dict_encoded_size_), sizeof(T) * memo_table_.size());
-  memo_table_.CopyValues(0 /* start_pos */, reinterpret_cast<T*>(buffer));
-}
-
-// ByteArray and FLBA already have the dictionary encoded in their data heaps
-template <>
-inline void DictEncoder<ByteArrayType>::WriteDict(uint8_t* buffer) {
-  memo_table_.VisitValues(0, [&](const ::arrow::util::string_view& v) {
-    uint32_t len = static_cast<uint32_t>(v.length());
-    memcpy(buffer, &len, sizeof(uint32_t));
-    buffer += sizeof(uint32_t);
-    memcpy(buffer, v.data(), v.length());
-    buffer += v.length();
-  });
-}
-
-template <>
-inline void DictEncoder<FLBAType>::WriteDict(uint8_t* buffer) {
-  memo_table_.VisitValues(0, [&](const ::arrow::util::string_view& v) {
-    DCHECK_EQ(v.length(), static_cast<size_t>(type_length_));
-    memcpy(buffer, v.data(), type_length_);
-    buffer += type_length_;
-  });
-}
-
-template <typename DType>
-inline int DictEncoder<DType>::WriteIndices(uint8_t* buffer, int buffer_len) {
-  // Write bit width in first byte
-  *buffer = static_cast<uint8_t>(bit_width());
-  ++buffer;
-  --buffer_len;
-
-  ::arrow::util::RleEncoder encoder(buffer, buffer_len, bit_width());
-  for (int index : buffered_indices_) {
-    if (!encoder.Put(index)) return -1;
-  }
-  encoder.Flush();
-
-  ClearIndices();
-  return 1 + encoder.len();
-}
-
-// ----------------------------------------------------------------------
-// DeltaBitPackDecoder
-
-template <typename DType>
-class DeltaBitPackDecoder : public Decoder<DType> {
- public:
-  typedef typename DType::c_type T;
-
-  explicit DeltaBitPackDecoder(const ColumnDescriptor* descr,
-                               ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
-      : Decoder<DType>(descr, Encoding::DELTA_BINARY_PACKED), pool_(pool) {
-    if (DType::type_num != Type::INT32 && DType::type_num != Type::INT64) {
-      throw ParquetException("Delta bit pack encoding should only be for integer data.");
-    }
-  }
-
-  virtual void SetData(int num_values, const uint8_t* data, int len) {
-    num_values_ = num_values;
-    decoder_ = BitUtil::BitReader(data, len);
-    values_current_block_ = 0;
-    values_current_mini_block_ = 0;
-  }
-
-  virtual int Decode(T* buffer, int max_values) {
-    return GetInternal(buffer, max_values);
-  }
-
- private:
-  using Decoder<DType>::num_values_;
-
-  void InitBlock() {
-    int32_t block_size;
-    if (!decoder_.GetVlqInt(&block_size)) ParquetException::EofException();
-    if (!decoder_.GetVlqInt(&num_mini_blocks_)) ParquetException::EofException();
-    if (!decoder_.GetVlqInt(&values_current_block_)) {
-      ParquetException::EofException();
-    }
-    if (!decoder_.GetZigZagVlqInt(&last_value_)) ParquetException::EofException();
-
-    delta_bit_widths_ = AllocateBuffer(pool_, num_mini_blocks_);
-    uint8_t* bit_width_data = delta_bit_widths_->mutable_data();
-
-    if (!decoder_.GetZigZagVlqInt(&min_delta_)) ParquetException::EofException();
-    for (int i = 0; i < num_mini_blocks_; ++i) {
-      if (!decoder_.GetAligned<uint8_t>(1, bit_width_data + i)) {
-        ParquetException::EofException();
-      }
-    }
-    values_per_mini_block_ = block_size / num_mini_blocks_;
-    mini_block_idx_ = 0;
-    delta_bit_width_ = bit_width_data[0];
-    values_current_mini_block_ = values_per_mini_block_;
-  }
-
-  template <typename T>
-  int GetInternal(T* buffer, int max_values) {
-    max_values = std::min(max_values, num_values_);
-    const uint8_t* bit_width_data = delta_bit_widths_->data();
-    for (int i = 0; i < max_values; ++i) {
-      if (ARROW_PREDICT_FALSE(values_current_mini_block_ == 0)) {
-        ++mini_block_idx_;
-        if (mini_block_idx_ < static_cast<size_t>(delta_bit_widths_->size())) {
-          delta_bit_width_ = bit_width_data[mini_block_idx_];
-          values_current_mini_block_ = values_per_mini_block_;
-        } else {
-          InitBlock();
-          buffer[i] = last_value_;
-          continue;
-        }
-      }
-
-      // TODO: the key to this algorithm is to decode the entire miniblock at once.
-      int64_t delta;
-      if (!decoder_.GetValue(delta_bit_width_, &delta)) ParquetException::EofException();
-      delta += min_delta_;
-      last_value_ += static_cast<int32_t>(delta);
-      buffer[i] = last_value_;
-      --values_current_mini_block_;
-    }
-    num_values_ -= max_values;
-    return max_values;
-  }
-
-  ::arrow::MemoryPool* pool_;
-  BitUtil::BitReader decoder_;
-  int32_t values_current_block_;
-  int32_t num_mini_blocks_;
-  uint64_t values_per_mini_block_;
-  uint64_t values_current_mini_block_;
-
-  int32_t min_delta_;
-  size_t mini_block_idx_;
-  std::shared_ptr<ResizableBuffer> delta_bit_widths_;
-  int delta_bit_width_;
-
-  int32_t last_value_;
-};
-
-// ----------------------------------------------------------------------
-// DELTA_LENGTH_BYTE_ARRAY
-
-class DeltaLengthByteArrayDecoder : public Decoder<ByteArrayType> {
- public:
-  explicit DeltaLengthByteArrayDecoder(
-      const ColumnDescriptor* descr,
-      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
-      : Decoder<ByteArrayType>(descr, Encoding::DELTA_LENGTH_BYTE_ARRAY),
-        len_decoder_(nullptr, pool) {}
-
-  virtual void SetData(int num_values, const uint8_t* data, int len) {
-    num_values_ = num_values;
-    if (len == 0) return;
-    int total_lengths_len = *reinterpret_cast<const int*>(data);
-    data += 4;
-    len_decoder_.SetData(num_values, data, total_lengths_len);
-    data_ = data + total_lengths_len;
-    len_ = len - 4 - total_lengths_len;
-  }
-
-  virtual int Decode(ByteArray* buffer, int max_values) {
-    max_values = std::min(max_values, num_values_);
-    std::vector<int> lengths(max_values);
-    len_decoder_.Decode(lengths.data(), max_values);
-    for (int i = 0; i < max_values; ++i) {
-      buffer[i].len = lengths[i];
-      buffer[i].ptr = data_;
-      data_ += lengths[i];
-      len_ -= lengths[i];
-    }
-    num_values_ -= max_values;
-    return max_values;
-  }
-
- private:
-  using Decoder<ByteArrayType>::num_values_;
-  DeltaBitPackDecoder<Int32Type> len_decoder_;
-  const uint8_t* data_;
-  int len_;
-};
-
-// ----------------------------------------------------------------------
-// DELTA_BYTE_ARRAY
-
-class DeltaByteArrayDecoder : public Decoder<ByteArrayType> {
- public:
-  explicit DeltaByteArrayDecoder(
-      const ColumnDescriptor* descr,
-      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
-      : Decoder<ByteArrayType>(descr, Encoding::DELTA_BYTE_ARRAY),
-        prefix_len_decoder_(nullptr, pool),
-        suffix_decoder_(nullptr, pool),
-        last_value_(0, nullptr) {}
-
-  virtual void SetData(int num_values, const uint8_t* data, int len) {
-    num_values_ = num_values;
-    if (len == 0) return;
-    int prefix_len_length = *reinterpret_cast<const int*>(data);
-    data += 4;
-    len -= 4;
-    prefix_len_decoder_.SetData(num_values, data, prefix_len_length);
-    data += prefix_len_length;
-    len -= prefix_len_length;
-    suffix_decoder_.SetData(num_values, data, len);
-  }
-
-  // TODO: this doesn't work and requires memory management. We need to allocate
-  // new strings to store the results.
-  virtual int Decode(ByteArray* buffer, int max_values) {
-    max_values = std::min(max_values, num_values_);
-    for (int i = 0; i < max_values; ++i) {
-      int prefix_len = 0;
-      prefix_len_decoder_.Decode(&prefix_len, 1);
-      ByteArray suffix = {0, nullptr};
-      suffix_decoder_.Decode(&suffix, 1);
-      buffer[i].len = prefix_len + suffix.len;
-
-      uint8_t* result = reinterpret_cast<uint8_t*>(malloc(buffer[i].len));
-      memcpy(result, last_value_.ptr, prefix_len);
-      memcpy(result + prefix_len, suffix.ptr, suffix.len);
-
-      buffer[i].ptr = result;
-      last_value_ = buffer[i];
-    }
-    num_values_ -= max_values;
-    return max_values;
-  }
-
- private:
-  using Decoder<ByteArrayType>::num_values_;
-
-  DeltaBitPackDecoder<Int32Type> prefix_len_decoder_;
-  DeltaLengthByteArrayDecoder suffix_decoder_;
-  ByteArray last_value_;
-};
-
-}  // namespace parquet
-
-#endif  // PARQUET_ENCODING_INTERNAL_H
diff --git a/cpp/src/parquet/encoding-test.cc b/cpp/src/parquet/encoding-test.cc
index 50e1394c629d0..28d98126ec84a 100644
--- a/cpp/src/parquet/encoding-test.cc
+++ b/cpp/src/parquet/encoding-test.cc
@@ -24,7 +24,7 @@
 
 #include "arrow/util/bit-util.h"
 
-#include "parquet/encoding-internal.h"
+#include "parquet/encoding.h"
 #include "parquet/schema.h"
 #include "parquet/types.h"
 #include "parquet/util/memory.h"
@@ -43,29 +43,31 @@ namespace test {
 TEST(VectorBooleanTest, TestEncodeDecode) {
   // PARQUET-454
   int nvalues = 10000;
-  int nbytes = static_cast<int>(BitUtil::BytesForBits(nvalues));
+  int nbytes = static_cast<int>(::arrow::BitUtil::BytesForBits(nvalues));
 
   // seed the prng so failure is deterministic
   vector<bool> draws = flip_coins_seed(nvalues, 0.5, 0);
 
-  PlainEncoder<BooleanType> encoder(nullptr);
-  PlainDecoder<BooleanType> decoder(nullptr);
+  std::unique_ptr<BooleanEncoder> encoder =
+      MakeTypedEncoder<BooleanType>(Encoding::PLAIN);
+  encoder->Put(draws, nvalues);
 
-  encoder.Put(draws, nvalues);
+  std::unique_ptr<BooleanDecoder> decoder =
+      MakeTypedDecoder<BooleanType>(Encoding::PLAIN);
 
-  std::shared_ptr<Buffer> encode_buffer = encoder.FlushValues();
+  std::shared_ptr<Buffer> encode_buffer = encoder->FlushValues();
   ASSERT_EQ(nbytes, encode_buffer->size());
 
   vector<uint8_t> decode_buffer(nbytes);
   const uint8_t* decode_data = &decode_buffer[0];
 
-  decoder.SetData(nvalues, encode_buffer->data(),
-                  static_cast<int>(encode_buffer->size()));
-  int values_decoded = decoder.Decode(&decode_buffer[0], nvalues);
+  decoder->SetData(nvalues, encode_buffer->data(),
+                   static_cast<int>(encode_buffer->size()));
+  int values_decoded = decoder->Decode(&decode_buffer[0], nvalues);
   ASSERT_EQ(nvalues, values_decoded);
 
   for (int i = 0; i < nvalues; ++i) {
-    ASSERT_EQ(draws[i], BitUtil::GetBit(decode_data, i)) << i;
+    ASSERT_EQ(draws[i], ::arrow::BitUtil::GetBit(decode_data, i)) << i;
   }
 }
 
@@ -155,7 +157,7 @@ class TestEncodingBase : public ::testing::Test {
     allocator_ = default_memory_pool();
   }
 
-  void TearDown() { pool_.FreeAll(); }
+  void TearDown() {}
 
   void InitData(int nvalues, int repeats) {
     num_values_ = nvalues * repeats;
@@ -181,7 +183,6 @@ class TestEncodingBase : public ::testing::Test {
   }
 
  protected:
-  ChunkedAllocator pool_;
   MemoryPool* allocator_;
 
   int num_values_;
@@ -199,7 +200,6 @@ class TestEncodingBase : public ::testing::Test {
 // Member variables are not visible to templated subclasses. Possibly figure
 // out an alternative to this class layering at some point
 #define USING_BASE_MEMBERS()                    \
-  using TestEncodingBase<Type>::pool_;          \
   using TestEncodingBase<Type>::allocator_;     \
   using TestEncodingBase<Type>::descr_;         \
   using TestEncodingBase<Type>::num_values_;    \
@@ -216,14 +216,14 @@ class TestPlainEncoding : public TestEncodingBase<Type> {
   static constexpr int TYPE = Type::type_num;
 
   virtual void CheckRoundtrip() {
-    PlainEncoder<Type> encoder(descr_.get());
-    PlainDecoder<Type> decoder(descr_.get());
-    encoder.Put(draws_, num_values_);
-    encode_buffer_ = encoder.FlushValues();
-
-    decoder.SetData(num_values_, encode_buffer_->data(),
-                    static_cast<int>(encode_buffer_->size()));
-    int values_decoded = decoder.Decode(decode_buf_, num_values_);
+    auto encoder = MakeTypedEncoder<Type>(Encoding::PLAIN, false, descr_.get());
+    auto decoder = MakeTypedDecoder<Type>(Encoding::PLAIN, descr_.get());
+    encoder->Put(draws_, num_values_);
+    encode_buffer_ = encoder->FlushValues();
+
+    decoder->SetData(num_values_, encode_buffer_->data(),
+                     static_cast<int>(encode_buffer_->size()));
+    int values_decoded = decoder->Decode(decode_buf_, num_values_);
     ASSERT_EQ(num_values_, values_decoded);
     ASSERT_NO_FATAL_FAILURE(VerifyResults<T>(decode_buf_, draws_, num_values_));
   }
@@ -252,29 +252,38 @@ class TestDictionaryEncoding : public TestEncodingBase<Type> {
   static constexpr int TYPE = Type::type_num;
 
   void CheckRoundtrip() {
-    std::vector<uint8_t> valid_bits(BitUtil::BytesForBits(num_values_) + 1, 255);
-    DictEncoder<Type> encoder(descr_.get(), &pool_);
+    std::vector<uint8_t> valid_bits(::arrow::BitUtil::BytesForBits(num_values_) + 1, 255);
 
-    ASSERT_NO_THROW(encoder.Put(draws_, num_values_));
-    dict_buffer_ = AllocateBuffer(default_memory_pool(), encoder.dict_encoded_size());
-    encoder.WriteDict(dict_buffer_->mutable_data());
-    std::shared_ptr<Buffer> indices = encoder.FlushValues();
+    auto base_encoder = MakeEncoder(Type::type_num, Encoding::PLAIN, true, descr_.get());
+    auto encoder =
+        dynamic_cast<typename EncodingTraits<Type>::Encoder*>(base_encoder.get());
+    auto dict_traits = dynamic_cast<DictEncoder<Type>*>(base_encoder.get());
+
+    ASSERT_NO_THROW(encoder->Put(draws_, num_values_));
+    dict_buffer_ =
+        AllocateBuffer(default_memory_pool(), dict_traits->dict_encoded_size());
+    dict_traits->WriteDict(dict_buffer_->mutable_data());
+    std::shared_ptr<Buffer> indices = encoder->FlushValues();
+
+    auto base_spaced_encoder =
+        MakeEncoder(Type::type_num, Encoding::PLAIN, true, descr_.get());
+    auto spaced_encoder =
+        dynamic_cast<typename EncodingTraits<Type>::Encoder*>(base_spaced_encoder.get());
 
-    DictEncoder<Type> spaced_encoder(descr_.get(), &pool_);
     // PutSpaced should lead to the same results
-    ASSERT_NO_THROW(spaced_encoder.PutSpaced(draws_, num_values_, valid_bits.data(), 0));
-    std::shared_ptr<Buffer> indices_from_spaced = spaced_encoder.FlushValues();
+    ASSERT_NO_THROW(spaced_encoder->PutSpaced(draws_, num_values_, valid_bits.data(), 0));
+    std::shared_ptr<Buffer> indices_from_spaced = spaced_encoder->FlushValues();
     ASSERT_TRUE(indices_from_spaced->Equals(*indices));
 
-    PlainDecoder<Type> dict_decoder(descr_.get());
-    dict_decoder.SetData(encoder.num_entries(), dict_buffer_->data(),
-                         static_cast<int>(dict_buffer_->size()));
+    auto dict_decoder = MakeTypedDecoder<Type>(Encoding::PLAIN, descr_.get());
+    dict_decoder->SetData(dict_traits->num_entries(), dict_buffer_->data(),
+                          static_cast<int>(dict_buffer_->size()));
 
-    DictionaryDecoder<Type> decoder(descr_.get());
-    decoder.SetDict(&dict_decoder);
+    auto decoder = MakeDictDecoder<Type>(descr_.get());
+    decoder->SetDict(dict_decoder.get());
 
-    decoder.SetData(num_values_, indices->data(), static_cast<int>(indices->size()));
-    int values_decoded = decoder.Decode(decode_buf_, num_values_);
+    decoder->SetData(num_values_, indices->data(), static_cast<int>(indices->size()));
+    int values_decoded = decoder->Decode(decode_buf_, num_values_);
     ASSERT_EQ(num_values_, values_decoded);
 
     // TODO(wesm): The DictionaryDecoder must stay alive because the decoded
@@ -283,9 +292,9 @@ class TestDictionaryEncoding : public TestEncodingBase<Type> {
     ASSERT_NO_FATAL_FAILURE(VerifyResults<T>(decode_buf_, draws_, num_values_));
 
     // Also test spaced decoding
-    decoder.SetData(num_values_, indices->data(), static_cast<int>(indices->size()));
+    decoder->SetData(num_values_, indices->data(), static_cast<int>(indices->size()));
     values_decoded =
-        decoder.DecodeSpaced(decode_buf_, num_values_, 0, valid_bits.data(), 0);
+        decoder->DecodeSpaced(decode_buf_, num_values_, 0, valid_bits.data(), 0);
     ASSERT_EQ(num_values_, values_decoded);
     ASSERT_NO_FATAL_FAILURE(VerifyResults<T>(decode_buf_, draws_, num_values_));
   }
@@ -302,10 +311,7 @@ TYPED_TEST(TestDictionaryEncoding, BasicRoundTrip) {
 }
 
 TEST(TestDictionaryEncoding, CannotDictDecodeBoolean) {
-  PlainDecoder<BooleanType> dict_decoder(nullptr);
-  DictionaryDecoder<BooleanType> decoder(nullptr);
-
-  ASSERT_THROW(decoder.SetDict(&dict_decoder), ParquetException);
+  ASSERT_THROW(MakeDictDecoder<BooleanType>(nullptr), ParquetException);
 }
 
 }  // namespace test
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
new file mode 100644
index 0000000000000..da630671f7903
--- /dev/null
+++ b/cpp/src/parquet/encoding.cc
@@ -0,0 +1,1280 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/encoding.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstdlib>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "arrow/builder.h"
+#include "arrow/status.h"
+#include "arrow/util/bit-stream-utils.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/hashing.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/rle-encoding.h"
+#include "arrow/util/string_view.h"
+
+#include "parquet/exception.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+#include "parquet/util/memory.h"
+
+namespace parquet {
+
+namespace BitUtil = ::arrow::BitUtil;
+
+class EncoderImpl : virtual public Encoder {
+ public:
+  EncoderImpl(const ColumnDescriptor* descr, Encoding::type encoding,
+              ::arrow::MemoryPool* pool)
+      : descr_(descr),
+        encoding_(encoding),
+        pool_(pool),
+        type_length_(descr ? descr->type_length() : -1) {}
+
+  Encoding::type encoding() const override { return encoding_; }
+
+  ::arrow::MemoryPool* memory_pool() const override { return pool_; }
+
+ protected:
+  // For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY
+  const ColumnDescriptor* descr_;
+  const Encoding::type encoding_;
+  ::arrow::MemoryPool* pool_;
+
+  /// Type length from descr
+  int type_length_;
+};
+
+// ----------------------------------------------------------------------
+// Plain encoder implementation
+
+template <typename DType>
+class PlainEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
+ public:
+  using T = typename DType::c_type;
+
+  explicit PlainEncoder(const ColumnDescriptor* descr,
+                        ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+
+  int64_t EstimatedDataEncodedSize() override;
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  void Put(const T* buffer, int num_values) override;
+
+ protected:
+  std::unique_ptr<InMemoryOutputStream> values_sink_;
+};
+
+template <typename DType>
+PlainEncoder<DType>::PlainEncoder(const ColumnDescriptor* descr,
+                                  ::arrow::MemoryPool* pool)
+    : EncoderImpl(descr, Encoding::PLAIN, pool) {
+  values_sink_.reset(new InMemoryOutputStream(pool));
+}
+template <typename DType>
+int64_t PlainEncoder<DType>::EstimatedDataEncodedSize() {
+  return values_sink_->Tell();
+}
+
+template <typename DType>
+std::shared_ptr<Buffer> PlainEncoder<DType>::FlushValues() {
+  std::shared_ptr<Buffer> buffer = values_sink_->GetBuffer();
+  values_sink_.reset(new InMemoryOutputStream(this->pool_));
+  return buffer;
+}
+
+template <typename DType>
+void PlainEncoder<DType>::Put(const T* buffer, int num_values) {
+  values_sink_->Write(reinterpret_cast<const uint8_t*>(buffer), num_values * sizeof(T));
+}
+
+template <>
+inline void PlainEncoder<ByteArrayType>::Put(const ByteArray* src, int num_values) {
+  for (int i = 0; i < num_values; ++i) {
+    // Write the result to the output stream
+    values_sink_->Write(reinterpret_cast<const uint8_t*>(&src[i].len), sizeof(uint32_t));
+    if (src[i].len > 0) {
+      DCHECK(nullptr != src[i].ptr) << "Value ptr cannot be NULL";
+    }
+    values_sink_->Write(reinterpret_cast<const uint8_t*>(src[i].ptr), src[i].len);
+  }
+}
+
+template <>
+inline void PlainEncoder<FLBAType>::Put(const FixedLenByteArray* src, int num_values) {
+  for (int i = 0; i < num_values; ++i) {
+    // Write the result to the output stream
+    if (descr_->type_length() > 0) {
+      DCHECK(nullptr != src[i].ptr) << "Value ptr cannot be NULL";
+    }
+    values_sink_->Write(reinterpret_cast<const uint8_t*>(src[i].ptr),
+                        descr_->type_length());
+  }
+}
+
+class PlainByteArrayEncoder : public PlainEncoder<ByteArrayType>,
+                              virtual public ByteArrayEncoder {
+ public:
+  using BASE = PlainEncoder<ByteArrayType>;
+  using BASE::PlainEncoder;
+};
+
+class PlainFLBAEncoder : public PlainEncoder<FLBAType>, virtual public FLBAEncoder {
+ public:
+  using BASE = PlainEncoder<FLBAType>;
+  using BASE::PlainEncoder;
+};
+
+class PlainBooleanEncoder : public EncoderImpl,
+                            virtual public TypedEncoder<BooleanType>,
+                            virtual public BooleanEncoder {
+ public:
+  explicit PlainBooleanEncoder(
+      const ColumnDescriptor* descr,
+      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+
+  int64_t EstimatedDataEncodedSize() override;
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  void Put(const bool* src, int num_values) override;
+  void Put(const std::vector<bool>& src, int num_values) override;
+
+ private:
+  int bits_available_;
+  std::unique_ptr<::arrow::BitUtil::BitWriter> bit_writer_;
+  std::shared_ptr<ResizableBuffer> bits_buffer_;
+  std::unique_ptr<InMemoryOutputStream> values_sink_;
+
+  template <typename SequenceType>
+  void PutImpl(const SequenceType& src, int num_values);
+};
+
+template <typename SequenceType>
+void PlainBooleanEncoder::PutImpl(const SequenceType& src, int num_values) {
+  int bit_offset = 0;
+  if (bits_available_ > 0) {
+    int bits_to_write = std::min(bits_available_, num_values);
+    for (int i = 0; i < bits_to_write; i++) {
+      bit_writer_->PutValue(src[i], 1);
+    }
+    bits_available_ -= bits_to_write;
+    bit_offset = bits_to_write;
+
+    if (bits_available_ == 0) {
+      bit_writer_->Flush();
+      values_sink_->Write(bit_writer_->buffer(), bit_writer_->bytes_written());
+      bit_writer_->Clear();
+    }
+  }
+
+  int bits_remaining = num_values - bit_offset;
+  while (bit_offset < num_values) {
+    bits_available_ = static_cast<int>(bits_buffer_->size()) * 8;
+
+    int bits_to_write = std::min(bits_available_, bits_remaining);
+    for (int i = bit_offset; i < bit_offset + bits_to_write; i++) {
+      bit_writer_->PutValue(src[i], 1);
+    }
+    bit_offset += bits_to_write;
+    bits_available_ -= bits_to_write;
+    bits_remaining -= bits_to_write;
+
+    if (bits_available_ == 0) {
+      bit_writer_->Flush();
+      values_sink_->Write(bit_writer_->buffer(), bit_writer_->bytes_written());
+      bit_writer_->Clear();
+    }
+  }
+}
+
+PlainBooleanEncoder::PlainBooleanEncoder(const ColumnDescriptor* descr,
+                                         ::arrow::MemoryPool* pool)
+    : EncoderImpl(descr, Encoding::PLAIN, pool),
+      bits_available_(kInMemoryDefaultCapacity * 8),
+      bits_buffer_(AllocateBuffer(pool, kInMemoryDefaultCapacity)),
+      values_sink_(new InMemoryOutputStream(pool)) {
+  bit_writer_.reset(new BitUtil::BitWriter(bits_buffer_->mutable_data(),
+                                           static_cast<int>(bits_buffer_->size())));
+}
+
+int64_t PlainBooleanEncoder::EstimatedDataEncodedSize() {
+  return values_sink_->Tell() + bit_writer_->bytes_written();
+}
+
+std::shared_ptr<Buffer> PlainBooleanEncoder::FlushValues() {
+  if (bits_available_ > 0) {
+    bit_writer_->Flush();
+    values_sink_->Write(bit_writer_->buffer(), bit_writer_->bytes_written());
+    bit_writer_->Clear();
+    bits_available_ = static_cast<int>(bits_buffer_->size()) * 8;
+  }
+
+  std::shared_ptr<Buffer> buffer = values_sink_->GetBuffer();
+  values_sink_.reset(new InMemoryOutputStream(this->pool_));
+  return buffer;
+}
+
+void PlainBooleanEncoder::Put(const bool* src, int num_values) {
+  PutImpl(src, num_values);
+}
+
+void PlainBooleanEncoder::Put(const std::vector<bool>& src, int num_values) {
+  PutImpl(src, num_values);
+}
+
+// ----------------------------------------------------------------------
+// DictEncoder<T> implementations
+
+template <typename DType>
+struct DictEncoderTraits {
+  using c_type = typename DType::c_type;
+  using MemoTableType = ::arrow::internal::ScalarMemoTable<c_type>;
+};
+
+template <>
+struct DictEncoderTraits<ByteArrayType> {
+  using MemoTableType = ::arrow::internal::BinaryMemoTable;
+};
+
+template <>
+struct DictEncoderTraits<FLBAType> {
+  using MemoTableType = ::arrow::internal::BinaryMemoTable;
+};
+
+/// See the dictionary encoding section of https://github.com/Parquet/parquet-format.
+/// The encoding supports streaming encoding. Values are encoded as they are added while
+/// the dictionary is being constructed. At any time, the buffered values can be
+/// written out with the current dictionary size. More values can then be added to
+/// the encoder, including new dictionary entries.
+template <typename DType>
+class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder<DType> {
+  using MemoTableType = typename DictEncoderTraits<DType>::MemoTableType;
+
+ public:
+  typedef typename DType::c_type T;
+
+  explicit DictEncoderImpl(
+      const ColumnDescriptor* desc,
+      ::arrow::MemoryPool* allocator = ::arrow::default_memory_pool());
+
+  ~DictEncoderImpl() override { DCHECK(buffered_indices_.empty()); }
+
+  int dict_encoded_size() override { return dict_encoded_size_; }
+
+  int WriteIndices(uint8_t* buffer, int buffer_len) override {
+    // Write bit width in first byte
+    *buffer = static_cast<uint8_t>(bit_width());
+    ++buffer;
+    --buffer_len;
+
+    ::arrow::util::RleEncoder encoder(buffer, buffer_len, bit_width());
+    for (int index : buffered_indices_) {
+      if (!encoder.Put(index)) return -1;
+    }
+    encoder.Flush();
+
+    ClearIndices();
+    return 1 + encoder.len();
+  }
+
+  void set_type_length(int type_length) { this->type_length_ = type_length; }
+
+  /// Returns a conservative estimate of the number of bytes needed to encode the buffered
+  /// indices. Used to size the buffer passed to WriteIndices().
+  int64_t EstimatedDataEncodedSize() override;
+
+  /// The minimum bit width required to encode the currently buffered indices.
+  int bit_width() const override;
+
+  /// Encode value. Note that this does not actually write any data, just
+  /// buffers the value's index to be written later.
+  inline void Put(const T& value);
+  void Put(const T* values, int num_values) override;
+
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override;
+
+  /// Writes out the encoded dictionary to buffer. buffer must be preallocated to
+  /// dict_encoded_size() bytes.
+  void WriteDict(uint8_t* buffer) override;
+
+  /// The number of entries in the dictionary.
+  int num_entries() const override { return memo_table_.size(); }
+
+ private:
+  /// Clears all the indices (but leaves the dictionary).
+  void ClearIndices() { buffered_indices_.clear(); }
+
+  /// Indices that have not yet be written out by WriteIndices().
+  std::vector<int> buffered_indices_;
+
+  /// The number of bytes needed to encode the dictionary.
+  int dict_encoded_size_;
+
+  MemoTableType memo_table_;
+};
+
+// Initially 1024 elements
+static constexpr int32_t INITIAL_HASH_TABLE_SIZE = 1 << 10;
+
+template <typename DType>
+DictEncoderImpl<DType>::DictEncoderImpl(const ColumnDescriptor* desc,
+                                        ::arrow::MemoryPool* pool)
+    : EncoderImpl(desc, Encoding::PLAIN_DICTIONARY, pool),
+      dict_encoded_size_(0),
+      memo_table_(INITIAL_HASH_TABLE_SIZE) {}
+
+template <typename DType>
+int64_t DictEncoderImpl<DType>::EstimatedDataEncodedSize() {
+  // Note: because of the way RleEncoder::CheckBufferFull() is called, we have to
+  // reserve
+  // an extra "RleEncoder::MinBufferSize" bytes. These extra bytes won't be used
+  // but not reserving them would cause the encoder to fail.
+  return 1 +
+         ::arrow::util::RleEncoder::MaxBufferSize(
+             bit_width(), static_cast<int>(buffered_indices_.size())) +
+         ::arrow::util::RleEncoder::MinBufferSize(bit_width());
+}
+
+template <typename DType>
+int DictEncoderImpl<DType>::bit_width() const {
+  if (ARROW_PREDICT_FALSE(num_entries() == 0)) return 0;
+  if (ARROW_PREDICT_FALSE(num_entries() == 1)) return 1;
+  return BitUtil::Log2(num_entries());
+}
+
+template <typename DType>
+std::shared_ptr<Buffer> DictEncoderImpl<DType>::FlushValues() {
+  std::shared_ptr<ResizableBuffer> buffer =
+      AllocateBuffer(this->pool_, EstimatedDataEncodedSize());
+  int result_size =
+      WriteIndices(buffer->mutable_data(), static_cast<int>(EstimatedDataEncodedSize()));
+  PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false));
+  return std::move(buffer);
+}
+
+template <typename DType>
+void DictEncoderImpl<DType>::Put(const T* src, int num_values) {
+  for (int32_t i = 0; i < num_values; i++) {
+    Put(src[i]);
+  }
+}
+
+template <typename DType>
+void DictEncoderImpl<DType>::PutSpaced(const T* src, int num_values,
+                                       const uint8_t* valid_bits,
+                                       int64_t valid_bits_offset) {
+  ::arrow::internal::BitmapReader valid_bits_reader(valid_bits, valid_bits_offset,
+                                                    num_values);
+  for (int32_t i = 0; i < num_values; i++) {
+    if (valid_bits_reader.IsSet()) {
+      Put(src[i]);
+    }
+    valid_bits_reader.Next();
+  }
+}
+
+template <typename DType>
+void DictEncoderImpl<DType>::WriteDict(uint8_t* buffer) {
+  // For primitive types, only a memcpy
+  DCHECK_EQ(static_cast<size_t>(dict_encoded_size_), sizeof(T) * memo_table_.size());
+  memo_table_.CopyValues(0 /* start_pos */, reinterpret_cast<T*>(buffer));
+}
+
+// ByteArray and FLBA already have the dictionary encoded in their data heaps
+template <>
+void DictEncoderImpl<ByteArrayType>::WriteDict(uint8_t* buffer) {
+  memo_table_.VisitValues(0, [&](const ::arrow::util::string_view& v) {
+    uint32_t len = static_cast<uint32_t>(v.length());
+    memcpy(buffer, &len, sizeof(uint32_t));
+    buffer += sizeof(uint32_t);
+    memcpy(buffer, v.data(), v.length());
+    buffer += v.length();
+  });
+}
+
+template <>
+void DictEncoderImpl<FLBAType>::WriteDict(uint8_t* buffer) {
+  memo_table_.VisitValues(0, [&](const ::arrow::util::string_view& v) {
+    DCHECK_EQ(v.length(), static_cast<size_t>(type_length_));
+    memcpy(buffer, v.data(), type_length_);
+    buffer += type_length_;
+  });
+}
+
+template <typename DType>
+inline void DictEncoderImpl<DType>::Put(const T& v) {
+  // Put() implementation for primitive types
+  auto on_found = [](int32_t memo_index) {};
+  auto on_not_found = [this](int32_t memo_index) {
+    dict_encoded_size_ += static_cast<int>(sizeof(T));
+  };
+
+  auto memo_index = memo_table_.GetOrInsert(v, on_found, on_not_found);
+  buffered_indices_.push_back(memo_index);
+}
+
+template <>
+inline void DictEncoderImpl<ByteArrayType>::Put(const ByteArray& v) {
+  static const uint8_t empty[] = {0};
+
+  auto on_found = [](int32_t memo_index) {};
+  auto on_not_found = [&](int32_t memo_index) {
+    dict_encoded_size_ += static_cast<int>(v.len + sizeof(uint32_t));
+  };
+
+  DCHECK(v.ptr != nullptr || v.len == 0);
+  const void* ptr = (v.ptr != nullptr) ? v.ptr : empty;
+  auto memo_index =
+      memo_table_.GetOrInsert(ptr, static_cast<int32_t>(v.len), on_found, on_not_found);
+  buffered_indices_.push_back(memo_index);
+}
+
+template <>
+inline void DictEncoderImpl<FLBAType>::Put(const FixedLenByteArray& v) {
+  static const uint8_t empty[] = {0};
+
+  auto on_found = [](int32_t memo_index) {};
+  auto on_not_found = [this](int32_t memo_index) { dict_encoded_size_ += type_length_; };
+
+  DCHECK(v.ptr != nullptr || type_length_ == 0);
+  const void* ptr = (v.ptr != nullptr) ? v.ptr : empty;
+  auto memo_index = memo_table_.GetOrInsert(ptr, type_length_, on_found, on_not_found);
+  buffered_indices_.push_back(memo_index);
+}
+
+class DictByteArrayEncoder : public DictEncoderImpl<ByteArrayType>,
+                             virtual public ByteArrayEncoder {
+ public:
+  using BASE = DictEncoderImpl<ByteArrayType>;
+  using BASE::DictEncoderImpl;
+};
+
+class DictFLBAEncoder : public DictEncoderImpl<FLBAType>, virtual public FLBAEncoder {
+ public:
+  using BASE = DictEncoderImpl<FLBAType>;
+  using BASE::DictEncoderImpl;
+};
+
+// ----------------------------------------------------------------------
+// Encoder and decoder factory functions
+
+std::unique_ptr<Encoder> MakeEncoder(Type::type type_num, Encoding::type encoding,
+                                     bool use_dictionary, const ColumnDescriptor* descr,
+                                     ::arrow::MemoryPool* pool) {
+  if (use_dictionary) {
+    switch (type_num) {
+      case Type::INT32:
+        return std::unique_ptr<Encoder>(new DictEncoderImpl<Int32Type>(descr, pool));
+      case Type::INT64:
+        return std::unique_ptr<Encoder>(new DictEncoderImpl<Int64Type>(descr, pool));
+      case Type::INT96:
+        return std::unique_ptr<Encoder>(new DictEncoderImpl<Int96Type>(descr, pool));
+      case Type::FLOAT:
+        return std::unique_ptr<Encoder>(new DictEncoderImpl<FloatType>(descr, pool));
+      case Type::DOUBLE:
+        return std::unique_ptr<Encoder>(new DictEncoderImpl<DoubleType>(descr, pool));
+      case Type::BYTE_ARRAY:
+        return std::unique_ptr<Encoder>(new DictByteArrayEncoder(descr, pool));
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::unique_ptr<Encoder>(new DictFLBAEncoder(descr, pool));
+      default:
+        DCHECK(false) << "Encoder not implemented";
+        break;
+    }
+  } else if (encoding == Encoding::PLAIN) {
+    switch (type_num) {
+      case Type::BOOLEAN:
+        return std::unique_ptr<Encoder>(new PlainBooleanEncoder(descr, pool));
+      case Type::INT32:
+        return std::unique_ptr<Encoder>(new PlainEncoder<Int32Type>(descr, pool));
+      case Type::INT64:
+        return std::unique_ptr<Encoder>(new PlainEncoder<Int64Type>(descr, pool));
+      case Type::INT96:
+        return std::unique_ptr<Encoder>(new PlainEncoder<Int96Type>(descr, pool));
+      case Type::FLOAT:
+        return std::unique_ptr<Encoder>(new PlainEncoder<FloatType>(descr, pool));
+      case Type::DOUBLE:
+        return std::unique_ptr<Encoder>(new PlainEncoder<DoubleType>(descr, pool));
+      case Type::BYTE_ARRAY:
+        return std::unique_ptr<Encoder>(new PlainByteArrayEncoder(descr, pool));
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::unique_ptr<Encoder>(new PlainFLBAEncoder(descr, pool));
+      default:
+        DCHECK(false) << "Encoder not implemented";
+        break;
+    }
+  } else {
+    ParquetException::NYI("Selected encoding is not supported");
+  }
+  DCHECK(false) << "Should not be able to reach this code";
+  return nullptr;
+}
+
+class DecoderImpl : virtual public Decoder {
+ public:
+  void SetData(int num_values, const uint8_t* data, int len) override {
+    num_values_ = num_values;
+    data_ = data;
+    len_ = len;
+  }
+
+  int values_left() const override { return num_values_; }
+  Encoding::type encoding() const override { return encoding_; }
+
+ protected:
+  explicit DecoderImpl(const ColumnDescriptor* descr, Encoding::type encoding)
+      : descr_(descr), encoding_(encoding), num_values_(0), data_(NULLPTR), len_(0) {}
+
+  // For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY
+  const ColumnDescriptor* descr_;
+
+  const Encoding::type encoding_;
+  int num_values_;
+  const uint8_t* data_;
+  int len_;
+  int type_length_;
+};
+
+template <typename DType>
+class PlainDecoder : public DecoderImpl, virtual public TypedDecoder<DType> {
+ public:
+  using T = typename DType::c_type;
+  explicit PlainDecoder(const ColumnDescriptor* descr);
+
+  int Decode(T* buffer, int max_values) override;
+};
+
+template <typename DType>
+PlainDecoder<DType>::PlainDecoder(const ColumnDescriptor* descr)
+    : DecoderImpl(descr, Encoding::PLAIN) {
+  if (descr_ && descr_->physical_type() == Type::FIXED_LEN_BYTE_ARRAY) {
+    type_length_ = descr_->type_length();
+  } else {
+    type_length_ = -1;
+  }
+}
+
+// Decode routine templated on C++ type rather than type enum
+template <typename T>
+inline int DecodePlain(const uint8_t* data, int64_t data_size, int num_values,
+                       int type_length, T* out) {
+  int bytes_to_decode = num_values * static_cast<int>(sizeof(T));
+  if (data_size < bytes_to_decode) {
+    ParquetException::EofException();
+  }
+  // If bytes_to_decode == 0, data could be null
+  if (bytes_to_decode > 0) {
+    memcpy(out, data, bytes_to_decode);
+  }
+  return bytes_to_decode;
+}
+
+// Template specialization for BYTE_ARRAY. The written values do not own their
+// own data.
+template <>
+inline int DecodePlain<ByteArray>(const uint8_t* data, int64_t data_size, int num_values,
+                                  int type_length, ByteArray* out) {
+  int bytes_decoded = 0;
+  int increment;
+  for (int i = 0; i < num_values; ++i) {
+    uint32_t len = out[i].len = *reinterpret_cast<const uint32_t*>(data);
+    increment = static_cast<int>(sizeof(uint32_t) + len);
+    if (data_size < increment) ParquetException::EofException();
+    out[i].ptr = data + sizeof(uint32_t);
+    data += increment;
+    data_size -= increment;
+    bytes_decoded += increment;
+  }
+  return bytes_decoded;
+}
+
+// Template specialization for FIXED_LEN_BYTE_ARRAY. The written values do not
+// own their own data.
+template <>
+inline int DecodePlain<FixedLenByteArray>(const uint8_t* data, int64_t data_size,
+                                          int num_values, int type_length,
+                                          FixedLenByteArray* out) {
+  int bytes_to_decode = type_length * num_values;
+  if (data_size < bytes_to_decode) {
+    ParquetException::EofException();
+  }
+  for (int i = 0; i < num_values; ++i) {
+    out[i].ptr = data;
+    data += type_length;
+    data_size -= type_length;
+  }
+  return bytes_to_decode;
+}
+
+template <typename DType>
+int PlainDecoder<DType>::Decode(T* buffer, int max_values) {
+  max_values = std::min(max_values, num_values_);
+  int bytes_consumed = DecodePlain<T>(data_, len_, max_values, type_length_, buffer);
+  data_ += bytes_consumed;
+  len_ -= bytes_consumed;
+  num_values_ -= max_values;
+  return max_values;
+}
+
+class PlainBooleanDecoder : public DecoderImpl,
+                            virtual public TypedDecoder<BooleanType>,
+                            virtual public BooleanDecoder {
+ public:
+  explicit PlainBooleanDecoder(const ColumnDescriptor* descr);
+  void SetData(int num_values, const uint8_t* data, int len) override;
+
+  // Two flavors of bool decoding
+  int Decode(uint8_t* buffer, int max_values) override;
+  int Decode(bool* buffer, int max_values) override;
+
+ private:
+  std::unique_ptr<::arrow::BitUtil::BitReader> bit_reader_;
+};
+
+PlainBooleanDecoder::PlainBooleanDecoder(const ColumnDescriptor* descr)
+    : DecoderImpl(descr, Encoding::PLAIN) {}
+
+void PlainBooleanDecoder::SetData(int num_values, const uint8_t* data, int len) {
+  num_values_ = num_values;
+  bit_reader_.reset(new BitUtil::BitReader(data, len));
+}
+
+int PlainBooleanDecoder::Decode(uint8_t* buffer, int max_values) {
+  max_values = std::min(max_values, num_values_);
+  bool val;
+  ::arrow::internal::BitmapWriter bit_writer(buffer, 0, max_values);
+  for (int i = 0; i < max_values; ++i) {
+    if (!bit_reader_->GetValue(1, &val)) {
+      ParquetException::EofException();
+    }
+    if (val) {
+      bit_writer.Set();
+    }
+    bit_writer.Next();
+  }
+  bit_writer.Finish();
+  num_values_ -= max_values;
+  return max_values;
+}
+
+int PlainBooleanDecoder::Decode(bool* buffer, int max_values) {
+  max_values = std::min(max_values, num_values_);
+  if (bit_reader_->GetBatch(1, buffer, max_values) != max_values) {
+    ParquetException::EofException();
+  }
+  num_values_ -= max_values;
+  return max_values;
+}
+
+class PlainByteArrayDecoder : public PlainDecoder<ByteArrayType>,
+                              virtual public ByteArrayDecoder {
+ public:
+  using Base = PlainDecoder<ByteArrayType>;
+  using Base::DecodeSpaced;
+  using Base::PlainDecoder;
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  ::arrow::internal::ChunkedBinaryBuilder* out) override {
+    int result = 0;
+    PARQUET_THROW_NOT_OK(
+        DecodeArrow(num_values, null_count, valid_bits, valid_bits_offset, out, &result));
+    return result;
+  }
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  ::arrow::BinaryDictionaryBuilder* out) override {
+    int result = 0;
+    PARQUET_THROW_NOT_OK(
+        DecodeArrow(num_values, null_count, valid_bits, valid_bits_offset, out, &result));
+    return result;
+  }
+
+  int DecodeArrowNonNull(int num_values,
+                         ::arrow::internal::ChunkedBinaryBuilder* out) override {
+    int result = 0;
+    PARQUET_THROW_NOT_OK(DecodeArrowNonNull(num_values, out, &result));
+    return result;
+  }
+
+ private:
+  template <typename BuilderType>
+  ::arrow::Status DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                              int64_t valid_bits_offset, BuilderType* out,
+                              int* values_decoded) {
+    num_values = std::min(num_values, num_values_);
+
+    ARROW_RETURN_NOT_OK(out->Reserve(num_values));
+
+    ::arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, num_values);
+    int increment;
+    int i = 0;
+    const uint8_t* data = data_;
+    int64_t data_size = len_;
+    int bytes_decoded = 0;
+    while (i < num_values) {
+      if (bit_reader.IsSet()) {
+        uint32_t len = *reinterpret_cast<const uint32_t*>(data);
+        increment = static_cast<int>(sizeof(uint32_t) + len);
+        if (data_size < increment) {
+          ParquetException::EofException();
+        }
+        ARROW_RETURN_NOT_OK(out->Append(data + sizeof(uint32_t), len));
+        data += increment;
+        data_size -= increment;
+        bytes_decoded += increment;
+        ++i;
+      } else {
+        ARROW_RETURN_NOT_OK(out->AppendNull());
+      }
+      bit_reader.Next();
+    }
+
+    data_ += bytes_decoded;
+    len_ -= bytes_decoded;
+    num_values_ -= num_values;
+    *values_decoded = num_values;
+    return ::arrow::Status::OK();
+  }
+
+  ::arrow::Status DecodeArrowNonNull(int num_values,
+                                     ::arrow::internal::ChunkedBinaryBuilder* out,
+                                     int* values_decoded) {
+    num_values = std::min(num_values, num_values_);
+    ARROW_RETURN_NOT_OK(out->Reserve(num_values));
+    int i = 0;
+    const uint8_t* data = data_;
+    int64_t data_size = len_;
+    int bytes_decoded = 0;
+    while (i < num_values) {
+      uint32_t len = *reinterpret_cast<const uint32_t*>(data);
+      int increment = static_cast<int>(sizeof(uint32_t) + len);
+      if (data_size < increment) ParquetException::EofException();
+      ARROW_RETURN_NOT_OK(out->Append(data + sizeof(uint32_t), len));
+      data += increment;
+      data_size -= increment;
+      bytes_decoded += increment;
+    }
+
+    data_ += bytes_decoded;
+    len_ -= bytes_decoded;
+    num_values_ -= num_values;
+    *values_decoded = num_values;
+    return ::arrow::Status::OK();
+  }
+};
+
+class PlainFLBADecoder : public PlainDecoder<FLBAType>, virtual public FLBADecoder {
+ public:
+  using Base = PlainDecoder<FLBAType>;
+  using Base::PlainDecoder;
+};
+
+// ----------------------------------------------------------------------
+// Dictionary encoding and decoding
+
+template <typename Type>
+class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder<Type> {
+ public:
+  typedef typename Type::c_type T;
+
+  // Initializes the dictionary with values from 'dictionary'. The data in
+  // dictionary is not guaranteed to persist in memory after this call so the
+  // dictionary decoder needs to copy the data out if necessary.
+  explicit DictDecoderImpl(const ColumnDescriptor* descr,
+                           ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+      : DecoderImpl(descr, Encoding::RLE_DICTIONARY),
+        dictionary_(0, pool),
+        byte_array_data_(AllocateBuffer(pool, 0)) {}
+
+  // Perform type-specific initiatialization
+  void SetDict(TypedDecoder<Type>* dictionary) override;
+
+  void SetData(int num_values, const uint8_t* data, int len) override {
+    num_values_ = num_values;
+    if (len == 0) return;
+    uint8_t bit_width = *data;
+    ++data;
+    --len;
+    idx_decoder_ = ::arrow::util::RleDecoder(data, len, bit_width);
+  }
+
+  int Decode(T* buffer, int max_values) override {
+    max_values = std::min(max_values, num_values_);
+    int decoded_values =
+        idx_decoder_.GetBatchWithDict(dictionary_.data(), buffer, max_values);
+    if (decoded_values != max_values) {
+      ParquetException::EofException();
+    }
+    num_values_ -= max_values;
+    return max_values;
+  }
+
+  int DecodeSpaced(T* buffer, int num_values, int null_count, const uint8_t* valid_bits,
+                   int64_t valid_bits_offset) override {
+    int decoded_values =
+        idx_decoder_.GetBatchWithDictSpaced(dictionary_.data(), buffer, num_values,
+                                            null_count, valid_bits, valid_bits_offset);
+    if (decoded_values != num_values) {
+      ParquetException::EofException();
+    }
+    return decoded_values;
+  }
+
+ protected:
+  // Only one is set.
+  Vector<T> dictionary_;
+
+  // Data that contains the byte array data (byte_array_dictionary_ just has the
+  // pointers).
+  std::shared_ptr<ResizableBuffer> byte_array_data_;
+
+  ::arrow::util::RleDecoder idx_decoder_;
+};
+
+template <typename Type>
+inline void DictDecoderImpl<Type>::SetDict(TypedDecoder<Type>* dictionary) {
+  int num_dictionary_values = dictionary->values_left();
+  dictionary_.Resize(num_dictionary_values);
+  dictionary->Decode(dictionary_.data(), num_dictionary_values);
+}
+
+template <>
+inline void DictDecoderImpl<BooleanType>::SetDict(TypedDecoder<BooleanType>* dictionary) {
+  ParquetException::NYI("Dictionary encoding is not implemented for boolean values");
+}
+
+template <>
+inline void DictDecoderImpl<ByteArrayType>::SetDict(
+    TypedDecoder<ByteArrayType>* dictionary) {
+  int num_dictionary_values = dictionary->values_left();
+  dictionary_.Resize(num_dictionary_values);
+  dictionary->Decode(&dictionary_[0], num_dictionary_values);
+
+  int total_size = 0;
+  for (int i = 0; i < num_dictionary_values; ++i) {
+    total_size += dictionary_[i].len;
+  }
+  if (total_size > 0) {
+    PARQUET_THROW_NOT_OK(byte_array_data_->Resize(total_size, false));
+  }
+
+  int offset = 0;
+  uint8_t* bytes_data = byte_array_data_->mutable_data();
+  for (int i = 0; i < num_dictionary_values; ++i) {
+    memcpy(bytes_data + offset, dictionary_[i].ptr, dictionary_[i].len);
+    dictionary_[i].ptr = bytes_data + offset;
+    offset += dictionary_[i].len;
+  }
+}
+
+template <>
+inline void DictDecoderImpl<FLBAType>::SetDict(TypedDecoder<FLBAType>* dictionary) {
+  int num_dictionary_values = dictionary->values_left();
+  dictionary_.Resize(num_dictionary_values);
+  dictionary->Decode(&dictionary_[0], num_dictionary_values);
+
+  int fixed_len = descr_->type_length();
+  int total_size = num_dictionary_values * fixed_len;
+
+  PARQUET_THROW_NOT_OK(byte_array_data_->Resize(total_size, false));
+  uint8_t* bytes_data = byte_array_data_->mutable_data();
+  for (int32_t i = 0, offset = 0; i < num_dictionary_values; ++i, offset += fixed_len) {
+    memcpy(bytes_data + offset, dictionary_[i].ptr, fixed_len);
+    dictionary_[i].ptr = bytes_data + offset;
+  }
+}
+
+class DictByteArrayDecoder : public DictDecoderImpl<ByteArrayType>,
+                             virtual public ByteArrayDecoder {
+ public:
+  using BASE = DictDecoderImpl<ByteArrayType>;
+  using BASE::DictDecoderImpl;
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  ::arrow::internal::ChunkedBinaryBuilder* out) override {
+    int result = 0;
+    PARQUET_THROW_NOT_OK(
+        DecodeArrow(num_values, null_count, valid_bits, valid_bits_offset, out, &result));
+    return result;
+  }
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  ::arrow::BinaryDictionaryBuilder* out) override {
+    int result = 0;
+    PARQUET_THROW_NOT_OK(
+        DecodeArrow(num_values, null_count, valid_bits, valid_bits_offset, out, &result));
+    return result;
+  }
+
+  int DecodeArrowNonNull(int num_values,
+                         ::arrow::internal::ChunkedBinaryBuilder* out) override {
+    int result = 0;
+    PARQUET_THROW_NOT_OK(DecodeArrowNonNull(num_values, out, &result));
+    return result;
+  }
+
+ private:
+  template <typename BuilderType>
+  ::arrow::Status DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                              int64_t valid_bits_offset, BuilderType* builder,
+                              int* out_num_values) {
+    constexpr int32_t buffer_size = 1024;
+    int32_t indices_buffer[buffer_size];
+
+    ::arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, num_values);
+
+    int values_decoded = 0;
+    while (values_decoded < num_values) {
+      bool is_valid = bit_reader.IsSet();
+      bit_reader.Next();
+
+      if (is_valid) {
+        int32_t batch_size =
+            std::min<int32_t>(buffer_size, num_values - values_decoded - null_count);
+        int num_indices = idx_decoder_.GetBatch(indices_buffer, batch_size);
+
+        int i = 0;
+        while (true) {
+          // Consume all indices
+          if (is_valid) {
+            const auto& val = dictionary_[indices_buffer[i]];
+            ARROW_RETURN_NOT_OK(builder->Append(val.ptr, val.len));
+            ++i;
+          } else {
+            ARROW_RETURN_NOT_OK(builder->AppendNull());
+            --null_count;
+          }
+          ++values_decoded;
+          if (i == num_indices) {
+            // Do not advance the bit_reader if we have fulfilled the decode
+            // request
+            break;
+          }
+          is_valid = bit_reader.IsSet();
+          bit_reader.Next();
+        }
+      } else {
+        ARROW_RETURN_NOT_OK(builder->AppendNull());
+        --null_count;
+        ++values_decoded;
+      }
+    }
+    if (values_decoded != num_values) {
+      return ::arrow::Status::IOError("Expected to dictionary-decode ", num_values,
+                                      " but only able to decode ", values_decoded);
+    }
+    *out_num_values = values_decoded;
+    return ::arrow::Status::OK();
+  }
+
+  template <typename BuilderType>
+  ::arrow::Status DecodeArrowNonNull(int num_values, BuilderType* builder,
+                                     int* out_num_values) {
+    constexpr int32_t buffer_size = 2048;
+    int32_t indices_buffer[buffer_size];
+    int values_decoded = 0;
+    while (values_decoded < num_values) {
+      int num_indices = idx_decoder_.GetBatch(indices_buffer, buffer_size);
+      if (num_indices == 0) break;
+      for (int i = 0; i < num_indices; ++i) {
+        const auto& val = dictionary_[indices_buffer[i]];
+        PARQUET_THROW_NOT_OK(builder->Append(val.ptr, val.len));
+      }
+      values_decoded += num_indices;
+    }
+    if (values_decoded != num_values) {
+      ParquetException::EofException();
+    }
+    *out_num_values = values_decoded;
+    return ::arrow::Status::OK();
+  }
+};
+
+class DictFLBADecoder : public DictDecoderImpl<FLBAType>, virtual public FLBADecoder {
+ public:
+  using BASE = DictDecoderImpl<FLBAType>;
+  using BASE::DictDecoderImpl;
+};
+
+// ----------------------------------------------------------------------
+// DeltaBitPackDecoder
+
+template <typename DType>
+class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder<DType> {
+ public:
+  typedef typename DType::c_type T;
+
+  explicit DeltaBitPackDecoder(const ColumnDescriptor* descr,
+                               ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+      : DecoderImpl(descr, Encoding::DELTA_BINARY_PACKED), pool_(pool) {
+    if (DType::type_num != Type::INT32 && DType::type_num != Type::INT64) {
+      throw ParquetException("Delta bit pack encoding should only be for integer data.");
+    }
+  }
+
+  virtual void SetData(int num_values, const uint8_t* data, int len) {
+    this->num_values_ = num_values;
+    decoder_ = ::arrow::BitUtil::BitReader(data, len);
+    values_current_block_ = 0;
+    values_current_mini_block_ = 0;
+  }
+
+  virtual int Decode(T* buffer, int max_values) {
+    return GetInternal(buffer, max_values);
+  }
+
+ private:
+  void InitBlock() {
+    int32_t block_size;
+    if (!decoder_.GetVlqInt(&block_size)) ParquetException::EofException();
+    if (!decoder_.GetVlqInt(&num_mini_blocks_)) ParquetException::EofException();
+    if (!decoder_.GetVlqInt(&values_current_block_)) {
+      ParquetException::EofException();
+    }
+    if (!decoder_.GetZigZagVlqInt(&last_value_)) ParquetException::EofException();
+
+    delta_bit_widths_ = AllocateBuffer(pool_, num_mini_blocks_);
+    uint8_t* bit_width_data = delta_bit_widths_->mutable_data();
+
+    if (!decoder_.GetZigZagVlqInt(&min_delta_)) ParquetException::EofException();
+    for (int i = 0; i < num_mini_blocks_; ++i) {
+      if (!decoder_.GetAligned<uint8_t>(1, bit_width_data + i)) {
+        ParquetException::EofException();
+      }
+    }
+    values_per_mini_block_ = block_size / num_mini_blocks_;
+    mini_block_idx_ = 0;
+    delta_bit_width_ = bit_width_data[0];
+    values_current_mini_block_ = values_per_mini_block_;
+  }
+
+  template <typename T>
+  int GetInternal(T* buffer, int max_values) {
+    max_values = std::min(max_values, this->num_values_);
+    const uint8_t* bit_width_data = delta_bit_widths_->data();
+    for (int i = 0; i < max_values; ++i) {
+      if (ARROW_PREDICT_FALSE(values_current_mini_block_ == 0)) {
+        ++mini_block_idx_;
+        if (mini_block_idx_ < static_cast<size_t>(delta_bit_widths_->size())) {
+          delta_bit_width_ = bit_width_data[mini_block_idx_];
+          values_current_mini_block_ = values_per_mini_block_;
+        } else {
+          InitBlock();
+          buffer[i] = last_value_;
+          continue;
+        }
+      }
+
+      // TODO: the key to this algorithm is to decode the entire miniblock at once.
+      int64_t delta;
+      if (!decoder_.GetValue(delta_bit_width_, &delta)) ParquetException::EofException();
+      delta += min_delta_;
+      last_value_ += static_cast<int32_t>(delta);
+      buffer[i] = last_value_;
+      --values_current_mini_block_;
+    }
+    this->num_values_ -= max_values;
+    return max_values;
+  }
+
+  ::arrow::MemoryPool* pool_;
+  ::arrow::BitUtil::BitReader decoder_;
+  int32_t values_current_block_;
+  int32_t num_mini_blocks_;
+  uint64_t values_per_mini_block_;
+  uint64_t values_current_mini_block_;
+
+  int32_t min_delta_;
+  size_t mini_block_idx_;
+  std::shared_ptr<ResizableBuffer> delta_bit_widths_;
+  int delta_bit_width_;
+
+  int32_t last_value_;
+};
+
+// ----------------------------------------------------------------------
+// DELTA_LENGTH_BYTE_ARRAY
+
+class DeltaLengthByteArrayDecoder : public DecoderImpl,
+                                    virtual public TypedDecoder<ByteArrayType> {
+ public:
+  explicit DeltaLengthByteArrayDecoder(
+      const ColumnDescriptor* descr,
+      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+      : DecoderImpl(descr, Encoding::DELTA_LENGTH_BYTE_ARRAY),
+        len_decoder_(nullptr, pool) {}
+
+  virtual void SetData(int num_values, const uint8_t* data, int len) {
+    num_values_ = num_values;
+    if (len == 0) return;
+    int total_lengths_len = *reinterpret_cast<const int*>(data);
+    data += 4;
+    this->len_decoder_.SetData(num_values, data, total_lengths_len);
+    data_ = data + total_lengths_len;
+    this->len_ = len - 4 - total_lengths_len;
+  }
+
+  virtual int Decode(ByteArray* buffer, int max_values) {
+    max_values = std::min(max_values, num_values_);
+    std::vector<int> lengths(max_values);
+    len_decoder_.Decode(lengths.data(), max_values);
+    for (int i = 0; i < max_values; ++i) {
+      buffer[i].len = lengths[i];
+      buffer[i].ptr = data_;
+      this->data_ += lengths[i];
+      this->len_ -= lengths[i];
+    }
+    this->num_values_ -= max_values;
+    return max_values;
+  }
+
+ private:
+  DeltaBitPackDecoder<Int32Type> len_decoder_;
+};
+
+// ----------------------------------------------------------------------
+// DELTA_BYTE_ARRAY
+
+class DeltaByteArrayDecoder : public DecoderImpl,
+                              virtual public TypedDecoder<ByteArrayType> {
+ public:
+  explicit DeltaByteArrayDecoder(
+      const ColumnDescriptor* descr,
+      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+      : DecoderImpl(descr, Encoding::DELTA_BYTE_ARRAY),
+        prefix_len_decoder_(nullptr, pool),
+        suffix_decoder_(nullptr, pool),
+        last_value_(0, nullptr) {}
+
+  virtual void SetData(int num_values, const uint8_t* data, int len) {
+    num_values_ = num_values;
+    if (len == 0) return;
+    int prefix_len_length = *reinterpret_cast<const int*>(data);
+    data += 4;
+    len -= 4;
+    prefix_len_decoder_.SetData(num_values, data, prefix_len_length);
+    data += prefix_len_length;
+    len -= prefix_len_length;
+    suffix_decoder_.SetData(num_values, data, len);
+  }
+
+  // TODO: this doesn't work and requires memory management. We need to allocate
+  // new strings to store the results.
+  virtual int Decode(ByteArray* buffer, int max_values) {
+    max_values = std::min(max_values, this->num_values_);
+    for (int i = 0; i < max_values; ++i) {
+      int prefix_len = 0;
+      prefix_len_decoder_.Decode(&prefix_len, 1);
+      ByteArray suffix = {0, nullptr};
+      suffix_decoder_.Decode(&suffix, 1);
+      buffer[i].len = prefix_len + suffix.len;
+
+      uint8_t* result = reinterpret_cast<uint8_t*>(malloc(buffer[i].len));
+      memcpy(result, last_value_.ptr, prefix_len);
+      memcpy(result + prefix_len, suffix.ptr, suffix.len);
+
+      buffer[i].ptr = result;
+      last_value_ = buffer[i];
+    }
+    this->num_values_ -= max_values;
+    return max_values;
+  }
+
+ private:
+  DeltaBitPackDecoder<Int32Type> prefix_len_decoder_;
+  DeltaLengthByteArrayDecoder suffix_decoder_;
+  ByteArray last_value_;
+};
+
+// ----------------------------------------------------------------------
+
+std::unique_ptr<Decoder> MakeDecoder(Type::type type_num, Encoding::type encoding,
+                                     const ColumnDescriptor* descr) {
+  if (encoding == Encoding::PLAIN) {
+    switch (type_num) {
+      case Type::BOOLEAN:
+        return std::unique_ptr<Decoder>(new PlainBooleanDecoder(descr));
+      case Type::INT32:
+        return std::unique_ptr<Decoder>(new PlainDecoder<Int32Type>(descr));
+      case Type::INT64:
+        return std::unique_ptr<Decoder>(new PlainDecoder<Int64Type>(descr));
+      case Type::INT96:
+        return std::unique_ptr<Decoder>(new PlainDecoder<Int96Type>(descr));
+      case Type::FLOAT:
+        return std::unique_ptr<Decoder>(new PlainDecoder<FloatType>(descr));
+      case Type::DOUBLE:
+        return std::unique_ptr<Decoder>(new PlainDecoder<DoubleType>(descr));
+      case Type::BYTE_ARRAY:
+        return std::unique_ptr<Decoder>(new PlainByteArrayDecoder(descr));
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::unique_ptr<Decoder>(new PlainFLBADecoder(descr));
+      default:
+        break;
+    }
+  } else {
+    ParquetException::NYI("Selected encoding is not supported");
+  }
+  DCHECK(false) << "Should not be able to reach this code";
+  return nullptr;
+}
+
+namespace detail {
+
+std::unique_ptr<Decoder> MakeDictDecoder(Type::type type_num,
+                                         const ColumnDescriptor* descr,
+                                         ::arrow::MemoryPool* pool) {
+  switch (type_num) {
+    case Type::BOOLEAN:
+      ParquetException::NYI("Dictionary encoding not implemented for boolean type");
+    case Type::INT32:
+      return std::unique_ptr<Decoder>(new DictDecoderImpl<Int32Type>(descr, pool));
+    case Type::INT64:
+      return std::unique_ptr<Decoder>(new DictDecoderImpl<Int64Type>(descr, pool));
+    case Type::INT96:
+      return std::unique_ptr<Decoder>(new DictDecoderImpl<Int96Type>(descr, pool));
+    case Type::FLOAT:
+      return std::unique_ptr<Decoder>(new DictDecoderImpl<FloatType>(descr, pool));
+    case Type::DOUBLE:
+      return std::unique_ptr<Decoder>(new DictDecoderImpl<DoubleType>(descr, pool));
+    case Type::BYTE_ARRAY:
+      return std::unique_ptr<Decoder>(new DictByteArrayDecoder(descr, pool));
+    case Type::FIXED_LEN_BYTE_ARRAY:
+      return std::unique_ptr<Decoder>(new DictFLBADecoder(descr, pool));
+    default:
+      break;
+  }
+  DCHECK(false) << "Should not be able to reach this code";
+  return nullptr;
+}
+
+}  // namespace detail
+
+}  // namespace parquet
diff --git a/cpp/src/parquet/encoding.h b/cpp/src/parquet/encoding.h
index 006f22f2d114a..046296cdb1445 100644
--- a/cpp/src/parquet/encoding.h
+++ b/cpp/src/parquet/encoding.h
@@ -15,50 +15,66 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef PARQUET_ENCODING_H
-#define PARQUET_ENCODING_H
+#pragma once
 
 #include <cstdint>
+#include <cstring>
 #include <memory>
-#include <sstream>
+#include <vector>
 
-#include "arrow/status.h"
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
 #include "arrow/util/bit-util.h"
+#include "arrow/util/macros.h"
 
 #include "parquet/exception.h"
-#include "parquet/schema.h"
 #include "parquet/types.h"
 #include "parquet/util/memory.h"
+#include "parquet/util/visibility.h"
+
+namespace arrow {
+
+class BinaryDictionaryBuilder;
+
+namespace internal {
+
+class ChunkedBinaryBuilder;
+
+}  // namespace internal
+}  // namespace arrow
 
 namespace parquet {
 
 class ColumnDescriptor;
 
+// Untyped base for all encoders
+class Encoder {
+ public:
+  virtual ~Encoder() = default;
+
+  virtual int64_t EstimatedDataEncodedSize() = 0;
+  virtual std::shared_ptr<Buffer> FlushValues() = 0;
+  virtual Encoding::type encoding() const = 0;
+
+  virtual ::arrow::MemoryPool* memory_pool() const = 0;
+};
+
 // Base class for value encoders. Since encoders may or not have state (e.g.,
 // dictionary encoding) we use a class instance to maintain any state.
 //
 // TODO(wesm): Encode interface API is temporary
 template <typename DType>
-class Encoder {
+class TypedEncoder : virtual public Encoder {
  public:
   typedef typename DType::c_type T;
 
-  virtual ~Encoder() {}
-
-  virtual int64_t EstimatedDataEncodedSize() = 0;
-  virtual std::shared_ptr<Buffer> FlushValues() = 0;
   virtual void Put(const T* src, int num_values) = 0;
+
   virtual void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
                          int64_t valid_bits_offset) {
     std::shared_ptr<ResizableBuffer> buffer;
-    auto status =
-        ::arrow::AllocateResizableBuffer(pool_, num_values * sizeof(T), &buffer);
-    if (!status.ok()) {
-      std::ostringstream ss;
-      ss << "AllocateResizableBuffer failed in Encoder.PutSpaced in " << __FILE__
-         << ", on line " << __LINE__;
-      throw ParquetException(ss.str());
-    }
+    PARQUET_THROW_NOT_OK(::arrow::AllocateResizableBuffer(
+        this->memory_pool(), num_values * sizeof(T), &buffer));
     int32_t num_valid_values = 0;
     ::arrow::internal::BitmapReader valid_bits_reader(valid_bits, valid_bits_offset,
                                                       num_values);
@@ -71,32 +87,53 @@ class Encoder {
     }
     Put(data, num_valid_values);
   }
+};
+
+// Base class for dictionary encoders
+template <typename DType>
+class DictEncoder : virtual public TypedEncoder<DType> {
+ public:
+  /// Writes out any buffered indices to buffer preceded by the bit width of this data.
+  /// Returns the number of bytes written.
+  /// If the supplied buffer is not big enough, returns -1.
+  /// buffer must be preallocated with buffer_len bytes. Use EstimatedDataEncodedSize()
+  /// to size buffer.
+  virtual int WriteIndices(uint8_t* buffer, int buffer_len) = 0;
+
+  virtual int dict_encoded_size() = 0;
+  // virtual int dict_encoded_size() { return dict_encoded_size_; }
 
-  Encoding::type encoding() const { return encoding_; }
+  virtual int bit_width() const = 0;
 
- protected:
-  explicit Encoder(const ColumnDescriptor* descr, Encoding::type encoding,
-                   ::arrow::MemoryPool* pool)
-      : descr_(descr), encoding_(encoding), pool_(pool) {}
+  /// Writes out the encoded dictionary to buffer. buffer must be preallocated to
+  /// dict_encoded_size() bytes.
+  virtual void WriteDict(uint8_t* buffer) = 0;
 
-  // For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY
-  const ColumnDescriptor* descr_;
-  const Encoding::type encoding_;
-  ::arrow::MemoryPool* pool_;
+  virtual int num_entries() const = 0;
 };
 
-// The Decoder template is parameterized on parquet::DataType subclasses
-template <typename DType>
+// ----------------------------------------------------------------------
+// Value decoding
+
 class Decoder {
  public:
-  typedef typename DType::c_type T;
-
-  virtual ~Decoder() {}
+  virtual ~Decoder() = default;
 
   // Sets the data for a new page. This will be called multiple times on the same
   // decoder and should reset all internal state.
   virtual void SetData(int num_values, const uint8_t* data, int len) = 0;
 
+  // Returns the number of values left (for the last call to SetData()). This is
+  // the number of values left in this page.
+  virtual int values_left() const = 0;
+  virtual Encoding::type encoding() const = 0;
+};
+
+template <typename DType>
+class TypedDecoder : virtual public Decoder {
+ public:
+  using T = typename DType::c_type;
+
   // Subclasses should override the ones they support. In each of these functions,
   // the decoder would decode put to 'max_values', storing the result in 'buffer'.
   // The function returns the number of values decoded, which should be max_values
@@ -117,7 +154,8 @@ class Decoder {
 
     // Depending on the number of nulls, some of the value slots in buffer may
     // be uninitialized, and this will cause valgrind warnings / potentially UB
-    memset(buffer + values_read, 0, (num_values - values_read) * sizeof(T));
+    memset(static_cast<void*>(buffer + values_read), 0,
+           (num_values - values_read) * sizeof(T));
 
     // Add spacing for null entries. As we have filled the buffer from the front,
     // we need to add the spacing from the back.
@@ -129,24 +167,166 @@ class Decoder {
     }
     return num_values;
   }
+};
 
-  // Returns the number of values left (for the last call to SetData()). This is
-  // the number of values left in this page.
-  int values_left() const { return num_values_; }
+template <typename DType>
+class DictDecoder : virtual public TypedDecoder<DType> {
+ public:
+  virtual void SetDict(TypedDecoder<DType>* dictionary) = 0;
+};
 
-  Encoding::type encoding() const { return encoding_; }
+// ----------------------------------------------------------------------
+// TypedEncoder specializations, traits, and factory functions
 
- protected:
-  explicit Decoder(const ColumnDescriptor* descr, Encoding::type encoding)
-      : descr_(descr), encoding_(encoding), num_values_(0) {}
+class BooleanEncoder : virtual public TypedEncoder<BooleanType> {
+ public:
+  using TypedEncoder<BooleanType>::Put;
+  virtual void Put(const std::vector<bool>& src, int num_values) = 0;
+};
 
-  // For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY
-  const ColumnDescriptor* descr_;
+using Int32Encoder = TypedEncoder<Int32Type>;
+using Int64Encoder = TypedEncoder<Int64Type>;
+using Int96Encoder = TypedEncoder<Int96Type>;
+using FloatEncoder = TypedEncoder<FloatType>;
+using DoubleEncoder = TypedEncoder<DoubleType>;
+class ByteArrayEncoder : virtual public TypedEncoder<ByteArrayType> {};
+class FLBAEncoder : virtual public TypedEncoder<FLBAType> {};
 
-  const Encoding::type encoding_;
-  int num_values_;
+class BooleanDecoder : virtual public TypedDecoder<BooleanType> {
+ public:
+  using TypedDecoder<BooleanType>::Decode;
+  virtual int Decode(uint8_t* buffer, int max_values) = 0;
 };
 
-}  // namespace parquet
+using Int32Decoder = TypedDecoder<Int32Type>;
+using Int64Decoder = TypedDecoder<Int64Type>;
+using Int96Decoder = TypedDecoder<Int96Type>;
+using FloatDecoder = TypedDecoder<FloatType>;
+using DoubleDecoder = TypedDecoder<DoubleType>;
+
+class ByteArrayDecoder : virtual public TypedDecoder<ByteArrayType> {
+ public:
+  using TypedDecoder<ByteArrayType>::DecodeSpaced;
+  virtual int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                          int64_t valid_bits_offset,
+                          ::arrow::internal::ChunkedBinaryBuilder* builder) = 0;
+
+  virtual int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                          int64_t valid_bits_offset,
+                          ::arrow::BinaryDictionaryBuilder* builder) = 0;
+
+  // TODO(wesm): Implement DecodeArrowNonNull as part of ARROW-3325
+  // See also ARROW-3772, ARROW-3769
+  virtual int DecodeArrowNonNull(int num_values,
+                                 ::arrow::internal::ChunkedBinaryBuilder* builder) = 0;
+};
+
+class FLBADecoder : virtual public TypedDecoder<FLBAType> {
+ public:
+  using TypedDecoder<FLBAType>::DecodeSpaced;
+
+  // TODO(wesm): As possible follow-up to PARQUET-1508, we should examine if
+  // there is value in adding specialized read methods for
+  // FIXED_LEN_BYTE_ARRAY. If only Decimal data can occur with this data type
+  // then perhaps not
+};
+
+template <typename T>
+struct EncodingTraits {};
+
+template <>
+struct EncodingTraits<BooleanType> {
+  using Encoder = BooleanEncoder;
+  using Decoder = BooleanDecoder;
+};
+
+template <>
+struct EncodingTraits<Int32Type> {
+  using Encoder = Int32Encoder;
+  using Decoder = Int32Decoder;
+};
+
+template <>
+struct EncodingTraits<Int64Type> {
+  using Encoder = Int64Encoder;
+  using Decoder = Int64Decoder;
+};
+
+template <>
+struct EncodingTraits<Int96Type> {
+  using Encoder = Int96Encoder;
+  using Decoder = Int96Decoder;
+};
 
-#endif  // PARQUET_ENCODING_H
+template <>
+struct EncodingTraits<FloatType> {
+  using Encoder = FloatEncoder;
+  using Decoder = FloatDecoder;
+};
+
+template <>
+struct EncodingTraits<DoubleType> {
+  using Encoder = DoubleEncoder;
+  using Decoder = DoubleDecoder;
+};
+
+template <>
+struct EncodingTraits<ByteArrayType> {
+  using Encoder = ByteArrayEncoder;
+  using Decoder = ByteArrayDecoder;
+};
+
+template <>
+struct EncodingTraits<FLBAType> {
+  using Encoder = FLBAEncoder;
+  using Decoder = FLBADecoder;
+};
+
+PARQUET_EXPORT
+std::unique_ptr<Encoder> MakeEncoder(
+    Type::type type_num, Encoding::type encoding, bool use_dictionary = false,
+    const ColumnDescriptor* descr = NULLPTR,
+    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+
+template <typename DType>
+std::unique_ptr<typename EncodingTraits<DType>::Encoder> MakeTypedEncoder(
+    Encoding::type encoding, bool use_dictionary = false,
+    const ColumnDescriptor* descr = NULLPTR,
+    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) {
+  using OutType = typename EncodingTraits<DType>::Encoder;
+  std::unique_ptr<Encoder> base =
+      MakeEncoder(DType::type_num, encoding, use_dictionary, descr, pool);
+  return std::unique_ptr<OutType>(dynamic_cast<OutType*>(base.release()));
+}
+
+PARQUET_EXPORT
+std::unique_ptr<Decoder> MakeDecoder(Type::type type_num, Encoding::type encoding,
+                                     const ColumnDescriptor* descr = NULLPTR);
+
+namespace detail {
+
+PARQUET_EXPORT
+std::unique_ptr<Decoder> MakeDictDecoder(Type::type type_num,
+                                         const ColumnDescriptor* descr,
+                                         ::arrow::MemoryPool* pool);
+
+}  // namespace detail
+
+template <typename DType>
+std::unique_ptr<DictDecoder<DType>> MakeDictDecoder(
+    const ColumnDescriptor* descr,
+    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) {
+  using OutType = DictDecoder<DType>;
+  auto decoder = detail::MakeDictDecoder(DType::type_num, descr, pool);
+  return std::unique_ptr<OutType>(dynamic_cast<OutType*>(decoder.release()));
+}
+
+template <typename DType>
+std::unique_ptr<typename EncodingTraits<DType>::Decoder> MakeTypedDecoder(
+    Encoding::type encoding, const ColumnDescriptor* descr = NULLPTR) {
+  using OutType = typename EncodingTraits<DType>::Decoder;
+  std::unique_ptr<Decoder> base = MakeDecoder(DType::type_num, encoding, descr);
+  return std::unique_ptr<OutType>(dynamic_cast<OutType*>(base.release()));
+}
+
+}  // namespace parquet
diff --git a/cpp/src/parquet/file-deserialize-test.cc b/cpp/src/parquet/file-deserialize-test.cc
index 17dfe387fd6e0..e62968e5d5dc9 100644
--- a/cpp/src/parquet/file-deserialize-test.cc
+++ b/cpp/src/parquet/file-deserialize-test.cc
@@ -22,6 +22,7 @@
 #include <memory>
 
 #include "parquet/column_page.h"
+#include "parquet/column_reader.h"
 #include "parquet/exception.h"
 #include "parquet/file_reader.h"
 #include "parquet/thrift.h"
@@ -85,8 +86,8 @@ class TestPageSerde : public ::testing::Test {
     page_header_.compressed_page_size = compressed_size;
     page_header_.type = format::PageType::DATA_PAGE;
 
-    ASSERT_NO_THROW(
-        SerializeThriftMsg(&page_header_, max_serialized_len, out_stream_.get()));
+    ThriftSerializer serializer;
+    ASSERT_NO_THROW(serializer.Serialize(&page_header_, out_stream_.get()));
   }
 
   void ResetStream() { out_stream_.reset(new InMemoryOutputStream); }
@@ -176,9 +177,11 @@ TEST_F(TestPageSerde, TestFailLargePageHeaders) {
 }
 
 TEST_F(TestPageSerde, Compression) {
-  Compression::type codec_types[5] = {Compression::GZIP, Compression::SNAPPY,
-                                      Compression::BROTLI, Compression::LZ4,
-                                      Compression::ZSTD};
+  std::vector<Compression::type> codec_types = {Compression::GZIP, Compression::SNAPPY,
+                                                Compression::BROTLI, Compression::LZ4};
+#ifdef ARROW_WITH_ZSTD
+  codec_types.push_back(Compression::ZSTD);
+#endif
 
   const int32_t num_rows = 32;  // dummy value
   data_page_header_.num_values = num_rows;
diff --git a/cpp/src/parquet/file-serialize-test.cc b/cpp/src/parquet/file-serialize-test.cc
index 750faa20e2454..88dd657603184 100644
--- a/cpp/src/parquet/file-serialize-test.cc
+++ b/cpp/src/parquet/file-serialize-test.cc
@@ -301,9 +301,11 @@ TYPED_TEST(TestSerialize, SmallFileLz4) {
   ASSERT_NO_FATAL_FAILURE(this->FileSerializeTest(Compression::LZ4));
 }
 
+#ifdef ARROW_WITH_ZSTD
 TYPED_TEST(TestSerialize, SmallFileZstd) {
   ASSERT_NO_FATAL_FAILURE(this->FileSerializeTest(Compression::ZSTD));
 }
+#endif
 
 }  // namespace test
 
diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc
index 5be1a86234f3b..0f8e35904c606 100644
--- a/cpp/src/parquet/file_reader.cc
+++ b/cpp/src/parquet/file_reader.cc
@@ -19,23 +19,22 @@
 
 #include <algorithm>
 #include <cstdint>
-#include <cstdio>
+#include <cstring>
 #include <memory>
-#include <sstream>
 #include <string>
 #include <utility>
-#include <vector>
 
+#include "arrow/buffer.h"
 #include "arrow/io/file.h"
+#include "arrow/status.h"
 #include "arrow/util/logging.h"
 
-#include "parquet/column_page.h"
 #include "parquet/column_reader.h"
 #include "parquet/column_scanner.h"
 #include "parquet/exception.h"
 #include "parquet/metadata.h"
 #include "parquet/properties.h"
-#include "parquet/thrift.h"
+#include "parquet/schema.h"
 #include "parquet/types.h"
 #include "parquet/util/memory.h"
 
diff --git a/cpp/src/parquet/file_reader.h b/cpp/src/parquet/file_reader.h
index 4730305c93131..2d1cc9221f377 100644
--- a/cpp/src/parquet/file_reader.h
+++ b/cpp/src/parquet/file_reader.h
@@ -19,24 +19,24 @@
 #define PARQUET_FILE_READER_H
 
 #include <cstdint>
-#include <iosfwd>
-#include <list>
 #include <memory>
 #include <string>
 #include <vector>
 
-#include "parquet/column_reader.h"
-#include "parquet/metadata.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/util/macros.h"
+
+#include "parquet/metadata.h"  // IWYU pragma:: keep
 #include "parquet/properties.h"
-#include "parquet/schema.h"
-#include "parquet/statistics.h"
-#include "parquet/util/macros.h"
-#include "parquet/util/memory.h"
 #include "parquet/util/visibility.h"
 
 namespace parquet {
 
 class ColumnReader;
+class FileMetaData;
+class PageReader;
+class RandomAccessSource;
+class RowGroupMetaData;
 
 class PARQUET_EXPORT RowGroupReader {
  public:
diff --git a/cpp/src/parquet/file_writer.cc b/cpp/src/parquet/file_writer.cc
index 01fa112fe37ef..51f0cb43b7eea 100644
--- a/cpp/src/parquet/file_writer.cc
+++ b/cpp/src/parquet/file_writer.cc
@@ -21,15 +21,12 @@
 #include <vector>
 
 #include "parquet/column_writer.h"
-#include "parquet/schema-internal.h"
 #include "parquet/schema.h"
-#include "parquet/thrift.h"
 #include "parquet/util/memory.h"
 
 using arrow::MemoryPool;
 
 using parquet::schema::GroupNode;
-using parquet::schema::SchemaFlattener;
 
 namespace parquet {
 
@@ -251,6 +248,9 @@ class FileSerializer : public ParquetFileWriter::Contents {
 
   void Close() override {
     if (is_open_) {
+      // If any functions here raise an exception, we set is_open_ to be false
+      // so that this does not get called again (possibly causing segfault)
+      is_open_ = false;
       if (row_group_writer_) {
         num_rows_ += row_group_writer_->num_rows();
         row_group_writer_->Close();
@@ -262,7 +262,6 @@ class FileSerializer : public ParquetFileWriter::Contents {
       WriteFileMetaData(*metadata, sink_.get());
 
       sink_->Close();
-      is_open_ = false;
     }
   }
 
diff --git a/cpp/src/parquet/file_writer.h b/cpp/src/parquet/file_writer.h
index 82703f82dc899..860500f3bfe14 100644
--- a/cpp/src/parquet/file_writer.h
+++ b/cpp/src/parquet/file_writer.h
@@ -20,25 +20,31 @@
 
 #include <cstdint>
 #include <memory>
+#include <ostream>
 
+#include "arrow/util/macros.h"
+
+#include "parquet/exception.h"
 #include "parquet/metadata.h"
 #include "parquet/properties.h"
 #include "parquet/schema.h"
-#include "parquet/util/macros.h"
-#include "parquet/util/memory.h"
 #include "parquet/util/visibility.h"
 
-namespace parquet {
+namespace arrow {
+
+class MemoryPool;
+
+namespace io {
 
-class ColumnWriter;
-class PageWriter;
 class OutputStream;
 
-namespace schema {
+}  // namespace io
+}  // namespace arrow
 
-class GroupNode;
+namespace parquet {
 
-}  // namespace schema
+class ColumnWriter;
+class OutputStream;
 
 class PARQUET_EXPORT RowGroupWriter {
  public:
diff --git a/cpp/src/parquet/hasher.h b/cpp/src/parquet/hasher.h
index dc316a0377cba..233262ebdd647 100644
--- a/cpp/src/parquet/hasher.h
+++ b/cpp/src/parquet/hasher.h
@@ -63,8 +63,8 @@ class Hasher {
 
   /// Compute hash for fixed byte array value by using its plain encoding result.
   ///
-  /// @param value the value to hash.
-  /// @return hash result.
+  /// @param value the value address.
+  /// @param len the value length.
   virtual uint64_t Hash(const FLBA* value, uint32_t len) const = 0;
 
   virtual ~Hasher() = default;
diff --git a/cpp/src/parquet/metadata-test.cc b/cpp/src/parquet/metadata-test.cc
index bcf911eab8b26..826ac4d6a504f 100644
--- a/cpp/src/parquet/metadata-test.cc
+++ b/cpp/src/parquet/metadata-test.cc
@@ -59,7 +59,6 @@ TEST(Metadata, TestBuildAccess) {
 
   auto f_builder = FileMetaDataBuilder::Make(&schema, props);
   auto rg1_builder = f_builder->AppendRowGroup();
-  auto rg2_builder = f_builder->AppendRowGroup();
 
   // Write the metadata
   // rowgroup1 metadata
@@ -75,6 +74,7 @@ TEST(Metadata, TestBuildAccess) {
   rg1_builder->Finish(1024);
 
   // rowgroup2 metadata
+  auto rg2_builder = f_builder->AppendRowGroup();
   col1_builder = rg2_builder->NextColumnChunk();
   col2_builder = rg2_builder->NextColumnChunk();
   // column metadata
diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc
index cf63b0f662b52..93c2073e898ba 100644
--- a/cpp/src/parquet/metadata.cc
+++ b/cpp/src/parquet/metadata.cc
@@ -16,22 +16,25 @@
 // under the License.
 
 #include <algorithm>
+#include <ostream>
 #include <string>
 #include <utility>
-#include <vector>
+
+#include "arrow/util/logging.h"
 
 #include "parquet/exception.h"
 #include "parquet/metadata.h"
 #include "parquet/schema-internal.h"
 #include "parquet/schema.h"
+#include "parquet/statistics.h"
 #include "parquet/thrift.h"
-#include "parquet/util/memory.h"
 
-#include <boost/algorithm/string.hpp>
-#include <boost/regex.hpp>
+#include <boost/regex.hpp>  // IWYU pragma: keep
 
 namespace parquet {
 
+class OutputStream;
+
 const ApplicationVersion& ApplicationVersion::PARQUET_251_FIXED_VERSION() {
   static ApplicationVersion version("parquet-mr", 1, 8, 0);
   return version;
@@ -47,6 +50,23 @@ const ApplicationVersion& ApplicationVersion::PARQUET_CPP_FIXED_STATS_VERSION()
   return version;
 }
 
+const ApplicationVersion& ApplicationVersion::PARQUET_MR_FIXED_STATS_VERSION() {
+  static ApplicationVersion version("parquet-mr", 1, 10, 0);
+  return version;
+}
+
+std::string ParquetVersionToString(ParquetVersion::type ver) {
+  switch (ver) {
+    case ParquetVersion::PARQUET_1_0:
+      return "1.0";
+    case ParquetVersion::PARQUET_2_0:
+      return "2.0";
+  }
+
+  // This should be unreachable
+  return "UNKNOWN";
+}
+
 template <typename DType>
 static std::shared_ptr<RowGroupStatistics> MakeTypedColumnStats(
     const format::ColumnMetaData& metadata, const ColumnDescriptor* descr) {
@@ -103,7 +123,6 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
     }
     possible_stats_ = nullptr;
   }
-  ~ColumnChunkMetaDataImpl() {}
 
   // column chunk
   inline int64_t file_offset() const { return column_->file_offset; }
@@ -185,13 +204,13 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
 };
 
 std::unique_ptr<ColumnChunkMetaData> ColumnChunkMetaData::Make(
-    const uint8_t* metadata, const ColumnDescriptor* descr,
+    const void* metadata, const ColumnDescriptor* descr,
     const ApplicationVersion* writer_version) {
   return std::unique_ptr<ColumnChunkMetaData>(
       new ColumnChunkMetaData(metadata, descr, writer_version));
 }
 
-ColumnChunkMetaData::ColumnChunkMetaData(const uint8_t* metadata,
+ColumnChunkMetaData::ColumnChunkMetaData(const void* metadata,
                                          const ColumnDescriptor* descr,
                                          const ApplicationVersion* writer_version)
     : impl_{std::unique_ptr<ColumnChunkMetaDataImpl>(new ColumnChunkMetaDataImpl(
@@ -260,7 +279,6 @@ class RowGroupMetaData::RowGroupMetaDataImpl {
                                 const SchemaDescriptor* schema,
                                 const ApplicationVersion* writer_version)
       : row_group_(row_group), schema_(schema), writer_version_(writer_version) {}
-  ~RowGroupMetaDataImpl() {}
 
   inline int num_columns() const { return static_cast<int>(row_group_->columns.size()); }
 
@@ -277,9 +295,8 @@ class RowGroupMetaData::RowGroupMetaDataImpl {
          << " columns, requested metadata for column: " << i;
       throw ParquetException(ss.str());
     }
-    return ColumnChunkMetaData::Make(
-        reinterpret_cast<const uint8_t*>(&row_group_->columns[i]), schema_->Column(i),
-        writer_version_);
+    return ColumnChunkMetaData::Make(&row_group_->columns[i], schema_->Column(i),
+                                     writer_version_);
   }
 
  private:
@@ -289,14 +306,13 @@ class RowGroupMetaData::RowGroupMetaDataImpl {
 };
 
 std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make(
-    const uint8_t* metadata, const SchemaDescriptor* schema,
+    const void* metadata, const SchemaDescriptor* schema,
     const ApplicationVersion* writer_version) {
   return std::unique_ptr<RowGroupMetaData>(
       new RowGroupMetaData(metadata, schema, writer_version));
 }
 
-RowGroupMetaData::RowGroupMetaData(const uint8_t* metadata,
-                                   const SchemaDescriptor* schema,
+RowGroupMetaData::RowGroupMetaData(const void* metadata, const SchemaDescriptor* schema,
                                    const ApplicationVersion* writer_version)
     : impl_{std::unique_ptr<RowGroupMetaDataImpl>(new RowGroupMetaDataImpl(
           reinterpret_cast<const format::RowGroup*>(metadata), schema, writer_version))} {
@@ -320,10 +336,11 @@ class FileMetaData::FileMetaDataImpl {
  public:
   FileMetaDataImpl() : metadata_len_(0) {}
 
-  explicit FileMetaDataImpl(const uint8_t* metadata, uint32_t* metadata_len)
+  explicit FileMetaDataImpl(const void* metadata, uint32_t* metadata_len)
       : metadata_len_(0) {
     metadata_.reset(new format::FileMetaData);
-    DeserializeThriftMsg(metadata, metadata_len, metadata_.get());
+    DeserializeThriftMsg(reinterpret_cast<const uint8_t*>(metadata), metadata_len,
+                         metadata_.get());
     metadata_len_ = *metadata_len;
 
     if (metadata_->__isset.created_by) {
@@ -336,7 +353,6 @@ class FileMetaData::FileMetaDataImpl {
     InitColumnOrders();
     InitKeyValueMetadata();
   }
-  ~FileMetaDataImpl() {}
 
   inline uint32_t size() const { return metadata_len_; }
   inline int num_columns() const { return schema_.num_columns(); }
@@ -353,7 +369,8 @@ class FileMetaData::FileMetaDataImpl {
   const ApplicationVersion& writer_version() const { return writer_version_; }
 
   void WriteTo(OutputStream* dst) const {
-    SerializeThriftMsg(metadata_.get(), 1024, dst);
+    ThriftSerializer serializer;
+    serializer.Serialize(metadata_.get(), dst);
   }
 
   std::unique_ptr<RowGroupMetaData> RowGroup(int i) {
@@ -363,9 +380,7 @@ class FileMetaData::FileMetaDataImpl {
          << " row groups, requested metadata for row group: " << i;
       throw ParquetException(ss.str());
     }
-    return RowGroupMetaData::Make(
-        reinterpret_cast<const uint8_t*>(&metadata_->row_groups[i]), &schema_,
-        &writer_version_);
+    return RowGroupMetaData::Make(&metadata_->row_groups[i], &schema_, &writer_version_);
   }
 
   const SchemaDescriptor* schema() const { return &schema_; }
@@ -417,13 +432,13 @@ class FileMetaData::FileMetaDataImpl {
   std::shared_ptr<const KeyValueMetadata> key_value_metadata_;
 };
 
-std::shared_ptr<FileMetaData> FileMetaData::Make(const uint8_t* metadata,
+std::shared_ptr<FileMetaData> FileMetaData::Make(const void* metadata,
                                                  uint32_t* metadata_len) {
   // This FileMetaData ctor is private, not compatible with std::make_shared
   return std::shared_ptr<FileMetaData>(new FileMetaData(metadata, metadata_len));
 }
 
-FileMetaData::FileMetaData(const uint8_t* metadata, uint32_t* metadata_len)
+FileMetaData::FileMetaData(const void* metadata, uint32_t* metadata_len)
     : impl_{std::unique_ptr<FileMetaDataImpl>(
           new FileMetaDataImpl(metadata, metadata_len))} {}
 
@@ -540,8 +555,10 @@ bool ApplicationVersion::VersionEq(const ApplicationVersion& other_version) cons
 bool ApplicationVersion::HasCorrectStatistics(Type::type col_type,
                                               EncodedStatistics& statistics,
                                               SortOrder::type sort_order) const {
-  // Parquet cpp version 1.3.0 onwards stats are computed correctly for all types
-  if ((application_ != "parquet-cpp") || (VersionLt(PARQUET_CPP_FIXED_STATS_VERSION()))) {
+  // parquet-cpp version 1.3.0 and parquet-mr 1.10.0 onwards stats are computed
+  // correctly for all types
+  if ((application_ == "parquet-cpp" && VersionLt(PARQUET_CPP_FIXED_STATS_VERSION())) ||
+      (application_ == "parquet-mr" && VersionLt(PARQUET_MR_FIXED_STATS_VERSION()))) {
     // Only SIGNED are valid unless max and min are the same
     // (in which case the sort order does not matter)
     bool max_equals_min = statistics.has_min && statistics.has_max
@@ -594,11 +611,7 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
     Init(column_chunk);
   }
 
-  ~ColumnChunkMetaDataBuilderImpl() {}
-
-  const uint8_t* contents() const {
-    return reinterpret_cast<const uint8_t*>(column_chunk_);
-  }
+  const void* contents() const { return column_chunk_; }
 
   // column chunk
   void set_file_path(const std::string& val) { column_chunk_->__set_file_path(val); }
@@ -665,7 +678,8 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
   }
 
   void WriteTo(OutputStream* sink) {
-    SerializeThriftMsg(column_chunk_, sizeof(format::ColumnChunk), sink);
+    ThriftSerializer serializer;
+    serializer.Serialize(column_chunk_, sink);
   }
 
   const ColumnDescriptor* descr() const { return column_; }
@@ -687,7 +701,7 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
 
 std::unique_ptr<ColumnChunkMetaDataBuilder> ColumnChunkMetaDataBuilder::Make(
     const std::shared_ptr<WriterProperties>& props, const ColumnDescriptor* column,
-    uint8_t* contents) {
+    void* contents) {
   return std::unique_ptr<ColumnChunkMetaDataBuilder>(
       new ColumnChunkMetaDataBuilder(props, column, contents));
 }
@@ -705,14 +719,14 @@ ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilder(
 
 ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilder(
     const std::shared_ptr<WriterProperties>& props, const ColumnDescriptor* column,
-    uint8_t* contents)
+    void* contents)
     : impl_{std::unique_ptr<ColumnChunkMetaDataBuilderImpl>(
           new ColumnChunkMetaDataBuilderImpl(
               props, column, reinterpret_cast<format::ColumnChunk*>(contents)))} {}
 
 ColumnChunkMetaDataBuilder::~ColumnChunkMetaDataBuilder() {}
 
-const uint8_t* ColumnChunkMetaDataBuilder::contents() const { return impl_->contents(); }
+const void* ColumnChunkMetaDataBuilder::contents() const { return impl_->contents(); }
 
 void ColumnChunkMetaDataBuilder::set_file_path(const std::string& path) {
   impl_->set_file_path(path);
@@ -742,12 +756,11 @@ void ColumnChunkMetaDataBuilder::SetStatistics(bool is_signed,
 class RowGroupMetaDataBuilder::RowGroupMetaDataBuilderImpl {
  public:
   explicit RowGroupMetaDataBuilderImpl(const std::shared_ptr<WriterProperties>& props,
-                                       const SchemaDescriptor* schema, uint8_t* contents)
+                                       const SchemaDescriptor* schema, void* contents)
       : properties_(props), schema_(schema), current_column_(0) {
     row_group_ = reinterpret_cast<format::RowGroup*>(contents);
     InitializeColumns(schema->num_columns());
   }
-  ~RowGroupMetaDataBuilderImpl() {}
 
   ColumnChunkMetaDataBuilder* NextColumnChunk() {
     if (!(current_column_ < num_columns())) {
@@ -758,8 +771,7 @@ class RowGroupMetaDataBuilder::RowGroupMetaDataBuilderImpl {
     }
     auto column = schema_->Column(current_column_);
     auto column_builder = ColumnChunkMetaDataBuilder::Make(
-        properties_, column,
-        reinterpret_cast<uint8_t*>(&row_group_->columns[current_column_++]));
+        properties_, column, &row_group_->columns[current_column_++]);
     auto column_builder_ptr = column_builder.get();
     column_builders_.push_back(std::move(column_builder));
     return column_builder_ptr;
@@ -808,14 +820,14 @@ class RowGroupMetaDataBuilder::RowGroupMetaDataBuilderImpl {
 
 std::unique_ptr<RowGroupMetaDataBuilder> RowGroupMetaDataBuilder::Make(
     const std::shared_ptr<WriterProperties>& props, const SchemaDescriptor* schema_,
-    uint8_t* contents) {
+    void* contents) {
   return std::unique_ptr<RowGroupMetaDataBuilder>(
       new RowGroupMetaDataBuilder(props, schema_, contents));
 }
 
 RowGroupMetaDataBuilder::RowGroupMetaDataBuilder(
     const std::shared_ptr<WriterProperties>& props, const SchemaDescriptor* schema_,
-    uint8_t* contents)
+    void* contents)
     : impl_{std::unique_ptr<RowGroupMetaDataBuilderImpl>(
           new RowGroupMetaDataBuilderImpl(props, schema_, contents))} {}
 
@@ -849,29 +861,21 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl {
       : properties_(props), schema_(schema), key_value_metadata_(key_value_metadata) {
     metadata_.reset(new format::FileMetaData());
   }
-  ~FileMetaDataBuilderImpl() {}
 
   RowGroupMetaDataBuilder* AppendRowGroup() {
-    auto row_group = std::unique_ptr<format::RowGroup>(new format::RowGroup());
-    auto row_group_builder = RowGroupMetaDataBuilder::Make(
-        properties_, schema_, reinterpret_cast<uint8_t*>(row_group.get()));
-    RowGroupMetaDataBuilder* row_group_ptr = row_group_builder.get();
-    row_group_builders_.push_back(std::move(row_group_builder));
-    row_groups_.push_back(std::move(row_group));
-    return row_group_ptr;
+    row_groups_.emplace_back();
+    current_row_group_builder_ =
+        RowGroupMetaDataBuilder::Make(properties_, schema_, &row_groups_.back());
+    return current_row_group_builder_.get();
   }
 
   std::unique_ptr<FileMetaData> Finish() {
     int64_t total_rows = 0;
-    std::vector<format::RowGroup> row_groups;
-    for (auto row_group = row_groups_.begin(); row_group != row_groups_.end();
-         row_group++) {
-      auto rowgroup = *((*row_group).get());
-      row_groups.push_back(rowgroup);
-      total_rows += rowgroup.num_rows;
+    for (auto row_group : row_groups_) {
+      total_rows += row_group.num_rows;
     }
     metadata_->__set_num_rows(total_rows);
-    metadata_->__set_row_groups(row_groups);
+    metadata_->__set_row_groups(row_groups_);
 
     if (key_value_metadata_) {
       metadata_->key_value_metadata.clear();
@@ -926,8 +930,9 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl {
 
  private:
   const std::shared_ptr<WriterProperties> properties_;
-  std::vector<std::unique_ptr<format::RowGroup>> row_groups_;
-  std::vector<std::unique_ptr<RowGroupMetaDataBuilder>> row_group_builders_;
+  std::vector<format::RowGroup> row_groups_;
+
+  std::unique_ptr<RowGroupMetaDataBuilder> current_row_group_builder_;
   const SchemaDescriptor* schema_;
   std::shared_ptr<const KeyValueMetadata> key_value_metadata_;
 };
diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h
index 706e980711683..4ccf14be1fdd5 100644
--- a/cpp/src/parquet/metadata.h
+++ b/cpp/src/parquet/metadata.h
@@ -18,23 +18,32 @@
 #ifndef PARQUET_FILE_METADATA_H
 #define PARQUET_FILE_METADATA_H
 
+#include <cstdint>
 #include <memory>
-#include <set>
 #include <string>
 #include <vector>
 
 #include "arrow/util/key_value_metadata.h"
+#include "arrow/util/macros.h"
 
 #include "parquet/properties.h"
-#include "parquet/schema.h"
-#include "parquet/statistics.h"
 #include "parquet/types.h"
-#include "parquet/util/macros.h"
-#include "parquet/util/memory.h"
 #include "parquet/util/visibility.h"
 
 namespace parquet {
 
+class ColumnDescriptor;
+class EncodedStatistics;
+class OutputStream;
+class RowGroupStatistics;
+class SchemaDescriptor;
+
+namespace schema {
+
+class ColumnPath;
+
+}  // namespace schema
+
 using KeyValueMetadata = ::arrow::KeyValueMetadata;
 
 class PARQUET_EXPORT ApplicationVersion {
@@ -43,6 +52,7 @@ class PARQUET_EXPORT ApplicationVersion {
   static const ApplicationVersion& PARQUET_251_FIXED_VERSION();
   static const ApplicationVersion& PARQUET_816_FIXED_VERSION();
   static const ApplicationVersion& PARQUET_CPP_FIXED_STATS_VERSION();
+  static const ApplicationVersion& PARQUET_MR_FIXED_STATS_VERSION();
   // Regular expression for the version format
   // major . minor . patch unknown - prerelease.x + build info
   // Eg: 1.5.0ab-cdh5.5.0+cd
@@ -93,7 +103,7 @@ class PARQUET_EXPORT ColumnChunkMetaData {
  public:
   // API convenience to get a MetaData accessor
   static std::unique_ptr<ColumnChunkMetaData> Make(
-      const uint8_t* metadata, const ColumnDescriptor* descr,
+      const void* metadata, const ColumnDescriptor* descr,
       const ApplicationVersion* writer_version = NULLPTR);
 
   ~ColumnChunkMetaData();
@@ -119,7 +129,7 @@ class PARQUET_EXPORT ColumnChunkMetaData {
   int64_t total_uncompressed_size() const;
 
  private:
-  explicit ColumnChunkMetaData(const uint8_t* metadata, const ColumnDescriptor* descr,
+  explicit ColumnChunkMetaData(const void* metadata, const ColumnDescriptor* descr,
                                const ApplicationVersion* writer_version = NULLPTR);
   // PIMPL Idiom
   class ColumnChunkMetaDataImpl;
@@ -130,7 +140,7 @@ class PARQUET_EXPORT RowGroupMetaData {
  public:
   // API convenience to get a MetaData accessor
   static std::unique_ptr<RowGroupMetaData> Make(
-      const uint8_t* metadata, const SchemaDescriptor* schema,
+      const void* metadata, const SchemaDescriptor* schema,
       const ApplicationVersion* writer_version = NULLPTR);
 
   ~RowGroupMetaData();
@@ -144,7 +154,7 @@ class PARQUET_EXPORT RowGroupMetaData {
   std::unique_ptr<ColumnChunkMetaData> ColumnChunk(int i) const;
 
  private:
-  explicit RowGroupMetaData(const uint8_t* metadata, const SchemaDescriptor* schema,
+  explicit RowGroupMetaData(const void* metadata, const SchemaDescriptor* schema,
                             const ApplicationVersion* writer_version = NULLPTR);
   // PIMPL Idiom
   class RowGroupMetaDataImpl;
@@ -156,7 +166,7 @@ class FileMetaDataBuilder;
 class PARQUET_EXPORT FileMetaData {
  public:
   // API convenience to get a MetaData accessor
-  static std::shared_ptr<FileMetaData> Make(const uint8_t* serialized_metadata,
+  static std::shared_ptr<FileMetaData> Make(const void* serialized_metadata,
                                             uint32_t* metadata_len);
 
   ~FileMetaData();
@@ -182,7 +192,7 @@ class PARQUET_EXPORT FileMetaData {
 
  private:
   friend FileMetaDataBuilder;
-  explicit FileMetaData(const uint8_t* serialized_metadata, uint32_t* metadata_len);
+  explicit FileMetaData(const void* serialized_metadata, uint32_t* metadata_len);
 
   // PIMPL Idiom
   FileMetaData();
@@ -199,7 +209,7 @@ class PARQUET_EXPORT ColumnChunkMetaDataBuilder {
 
   static std::unique_ptr<ColumnChunkMetaDataBuilder> Make(
       const std::shared_ptr<WriterProperties>& props, const ColumnDescriptor* column,
-      uint8_t* contents);
+      void* contents);
 
   ~ColumnChunkMetaDataBuilder();
 
@@ -217,7 +227,7 @@ class PARQUET_EXPORT ColumnChunkMetaDataBuilder {
               bool dictionary_fallback);
 
   // The metadata contents, suitable for passing to ColumnChunkMetaData::Make
-  const uint8_t* contents() const;
+  const void* contents() const;
 
   // For writing metadata at end of column chunk
   void WriteTo(OutputStream* sink);
@@ -226,7 +236,7 @@ class PARQUET_EXPORT ColumnChunkMetaDataBuilder {
   explicit ColumnChunkMetaDataBuilder(const std::shared_ptr<WriterProperties>& props,
                                       const ColumnDescriptor* column);
   explicit ColumnChunkMetaDataBuilder(const std::shared_ptr<WriterProperties>& props,
-                                      const ColumnDescriptor* column, uint8_t* contents);
+                                      const ColumnDescriptor* column, void* contents);
   // PIMPL Idiom
   class ColumnChunkMetaDataBuilderImpl;
   std::unique_ptr<ColumnChunkMetaDataBuilderImpl> impl_;
@@ -237,7 +247,7 @@ class PARQUET_EXPORT RowGroupMetaDataBuilder {
   // API convenience to get a MetaData reader
   static std::unique_ptr<RowGroupMetaDataBuilder> Make(
       const std::shared_ptr<WriterProperties>& props, const SchemaDescriptor* schema_,
-      uint8_t* contents);
+      void* contents);
 
   ~RowGroupMetaDataBuilder();
 
@@ -253,7 +263,7 @@ class PARQUET_EXPORT RowGroupMetaDataBuilder {
 
  private:
   explicit RowGroupMetaDataBuilder(const std::shared_ptr<WriterProperties>& props,
-                                   const SchemaDescriptor* schema_, uint8_t* contents);
+                                   const SchemaDescriptor* schema_, void* contents);
   // PIMPL Idiom
   class RowGroupMetaDataBuilderImpl;
   std::unique_ptr<RowGroupMetaDataBuilderImpl> impl_;
@@ -268,9 +278,10 @@ class PARQUET_EXPORT FileMetaDataBuilder {
 
   ~FileMetaDataBuilder();
 
+  // The prior RowGroupMetaDataBuilder (if any) is destroyed
   RowGroupMetaDataBuilder* AppendRowGroup();
 
-  // commit the metadata
+  // Complete the Thrift structure
   std::unique_ptr<FileMetaData> Finish();
 
  private:
@@ -282,6 +293,8 @@ class PARQUET_EXPORT FileMetaDataBuilder {
   std::unique_ptr<FileMetaDataBuilderImpl> impl_;
 };
 
+PARQUET_EXPORT std::string ParquetVersionToString(ParquetVersion::type ver);
+
 }  // namespace parquet
 
 #endif  // PARQUET_FILE_METADATA_H
diff --git a/cpp/src/parquet/printer.cc b/cpp/src/parquet/printer.cc
index 9f26a4180cda1..61d669bcb34d6 100644
--- a/cpp/src/parquet/printer.cc
+++ b/cpp/src/parquet/printer.cc
@@ -17,15 +17,30 @@
 
 #include "parquet/printer.h"
 
+#include <cstdint>
+#include <cstdio>
+#include <memory>
+#include <ostream>
 #include <string>
 #include <vector>
 
+#include "arrow/util/key_value_metadata.h"
+
 #include "parquet/column_scanner.h"
+#include "parquet/exception.h"
+#include "parquet/file_reader.h"
+#include "parquet/metadata.h"
+#include "parquet/schema.h"
+#include "parquet/statistics.h"
+#include "parquet/types.h"
 
 using std::string;
 using std::vector;
 
 namespace parquet {
+
+class ColumnReader;
+
 // ----------------------------------------------------------------------
 // ParquetFilePrinter::DebugPrint
 
@@ -38,7 +53,7 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selecte
   const FileMetaData* file_metadata = fileReader->metadata().get();
 
   stream << "File Name: " << filename << "\n";
-  stream << "Version: " << file_metadata->version() << "\n";
+  stream << "Version: " << ParquetVersionToString(file_metadata->version()) << "\n";
   stream << "Created By: " << file_metadata->created_by() << "\n";
   stream << "Total rows: " << file_metadata->num_rows() << "\n";
 
diff --git a/cpp/src/parquet/printer.h b/cpp/src/parquet/printer.h
index 1113c3fecd25b..4591e7abad058 100644
--- a/cpp/src/parquet/printer.h
+++ b/cpp/src/parquet/printer.h
@@ -18,17 +18,15 @@
 #ifndef PARQUET_FILE_PRINTER_H
 #define PARQUET_FILE_PRINTER_H
 
-#include <cstdint>
 #include <iosfwd>
 #include <list>
-#include <memory>
-#include <string>
-#include <vector>
 
-#include "parquet/file_reader.h"
+#include "parquet/util/visibility.h"
 
 namespace parquet {
 
+class ParquetFileReader;
+
 class PARQUET_EXPORT ParquetFilePrinter {
  private:
   ParquetFileReader* fileReader;
diff --git a/cpp/src/parquet/reader-test.cc b/cpp/src/parquet/reader-test.cc
index d628f4727c160..a0536b56a89ca 100644
--- a/cpp/src/parquet/reader-test.cc
+++ b/cpp/src/parquet/reader-test.cc
@@ -28,6 +28,7 @@
 #include "parquet/column_reader.h"
 #include "parquet/column_scanner.h"
 #include "parquet/file_reader.h"
+#include "parquet/metadata.h"
 #include "parquet/printer.h"
 #include "parquet/util/memory.h"
 #include "parquet/util/test-common.h"
diff --git a/cpp/src/parquet/schema.cc b/cpp/src/parquet/schema.cc
index da004344f2016..431f30773b96d 100644
--- a/cpp/src/parquet/schema.cc
+++ b/cpp/src/parquet/schema.cc
@@ -19,11 +19,13 @@
 #include "parquet/schema-internal.h"
 
 #include <algorithm>
+#include <cstring>
 #include <memory>
-#include <sstream>
 #include <string>
 #include <utility>
 
+#include "arrow/util/logging.h"
+
 #include "parquet/exception.h"
 #include "parquet/thrift.h"
 
diff --git a/cpp/src/parquet/schema.h b/cpp/src/parquet/schema.h
index add2f6dbab013..76920c0e93b57 100644
--- a/cpp/src/parquet/schema.h
+++ b/cpp/src/parquet/schema.h
@@ -28,6 +28,8 @@
 #include <unordered_map>
 #include <vector>
 
+#include "arrow/util/macros.h"
+
 #include "parquet/types.h"
 #include "parquet/util/macros.h"
 #include "parquet/util/visibility.h"
@@ -144,9 +146,7 @@ class PARQUET_EXPORT Node {
 
   const std::shared_ptr<ColumnPath> path() const;
 
-  // ToParquet returns an opaque void* to avoid exporting
-  // parquet::SchemaElement into the public API
-  virtual void ToParquet(void* opaque_element) const = 0;
+  virtual void ToParquet(void* element) const = 0;
 
   // Node::Visitor abstract class for walking schemas with the visitor pattern
   class Visitor {
@@ -193,8 +193,6 @@ typedef std::vector<NodePtr> NodeVector;
 // parameters)
 class PARQUET_EXPORT PrimitiveNode : public Node {
  public:
-  // FromParquet accepts an opaque void* to avoid exporting
-  // parquet::SchemaElement into the public API
   static std::unique_ptr<Node> FromParquet(const void* opaque_element, int id);
 
   static inline NodePtr Make(const std::string& name, Repetition::type repetition,
@@ -217,7 +215,7 @@ class PARQUET_EXPORT PrimitiveNode : public Node {
 
   const DecimalMetadata& decimal_metadata() const { return decimal_metadata_; }
 
-  void ToParquet(void* opaque_element) const override;
+  void ToParquet(void* element) const override;
   void Visit(Visitor* visitor) override;
   void VisitConst(ConstVisitor* visitor) const override;
 
@@ -250,8 +248,6 @@ class PARQUET_EXPORT PrimitiveNode : public Node {
 
 class PARQUET_EXPORT GroupNode : public Node {
  public:
-  // Like PrimitiveNode, GroupNode::FromParquet accepts an opaque void* to avoid exporting
-  // parquet::SchemaElement into the public API
   static std::unique_ptr<Node> FromParquet(const void* opaque_element, int id,
                                            const NodeVector& fields);
 
@@ -273,7 +269,7 @@ class PARQUET_EXPORT GroupNode : public Node {
 
   int field_count() const { return static_cast<int>(fields_.size()); }
 
-  void ToParquet(void* opaque_element) const override;
+  void ToParquet(void* element) const override;
   void Visit(Visitor* visitor) override;
   void VisitConst(ConstVisitor* visitor) const override;
 
diff --git a/cpp/src/parquet/statistics-test.cc b/cpp/src/parquet/statistics-test.cc
index e1926a36b684c..ecdbaeb78d83d 100644
--- a/cpp/src/parquet/statistics-test.cc
+++ b/cpp/src/parquet/statistics-test.cc
@@ -772,5 +772,33 @@ TEST(TestStatisticsDoubleNaN, NaNValues) {
   ASSERT_EQ(min, -3.0);
   ASSERT_EQ(max, 4.0);
 }
+
+// Test statistics for binary column with UNSIGNED sort order
+TEST(TestStatisticsMinMax, Unsigned) {
+  std::string dir_string(test::get_data_dir());
+  std::stringstream ss;
+  ss << dir_string << "/binary.parquet";
+  auto path = ss.str();
+
+  // The file is generated by parquet-mr 1.10.0, the first version that
+  // supports correct statistics for binary data (see PARQUET-1025). It
+  // contains a single column of binary type. Data is just single byte values
+  // from 0x00 to 0x0B.
+  auto file_reader = ParquetFileReader::OpenFile(path);
+  auto rg_reader = file_reader->RowGroup(0);
+  auto metadata = rg_reader->metadata();
+  auto column_schema = metadata->schema()->Column(0);
+  ASSERT_EQ(SortOrder::UNSIGNED, column_schema->sort_order());
+
+  auto column_chunk = metadata->ColumnChunk(0);
+  ASSERT_TRUE(column_chunk->is_stats_set());
+
+  std::shared_ptr<RowGroupStatistics> stats = column_chunk->statistics();
+  ASSERT_TRUE(stats != NULL);
+  ASSERT_EQ(0, stats->null_count());
+  ASSERT_EQ(12, stats->num_values());
+  ASSERT_EQ(0x00, stats->EncodeMin()[0]);
+  ASSERT_EQ(0x0b, stats->EncodeMax()[0]);
+}
 }  // namespace test
 }  // namespace parquet
diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc
index ed4e8d05592e4..4cb2bfd92131d 100644
--- a/cpp/src/parquet/statistics.cc
+++ b/cpp/src/parquet/statistics.cc
@@ -16,10 +16,13 @@
 // under the License.
 
 #include <algorithm>
+#include <cmath>
 #include <cstring>
 #include <type_traits>
 
-#include "parquet/encoding-internal.h"
+#include "arrow/util/logging.h"
+
+#include "parquet/encoding.h"
 #include "parquet/exception.h"
 #include "parquet/statistics.h"
 #include "parquet/util/memory.h"
@@ -296,19 +299,19 @@ EncodedStatistics TypedRowGroupStatistics<DType>::Encode() {
 
 template <typename DType>
 void TypedRowGroupStatistics<DType>::PlainEncode(const T& src, std::string* dst) {
-  PlainEncoder<DType> encoder(descr(), pool_);
-  encoder.Put(&src, 1);
-  auto buffer = encoder.FlushValues();
+  auto encoder = MakeTypedEncoder<DType>(Encoding::PLAIN, false, descr(), pool_);
+  encoder->Put(&src, 1);
+  auto buffer = encoder->FlushValues();
   auto ptr = reinterpret_cast<const char*>(buffer->data());
   dst->assign(ptr, buffer->size());
 }
 
 template <typename DType>
 void TypedRowGroupStatistics<DType>::PlainDecode(const std::string& src, T* dst) {
-  PlainDecoder<DType> decoder(descr());
-  decoder.SetData(1, reinterpret_cast<const uint8_t*>(src.c_str()),
-                  static_cast<int>(src.size()));
-  decoder.Decode(dst, 1);
+  auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr());
+  decoder->SetData(1, reinterpret_cast<const uint8_t*>(src.c_str()),
+                   static_cast<int>(src.size()));
+  decoder->Decode(dst, 1);
 }
 
 template <>
diff --git a/cpp/src/parquet/test-specialization.h b/cpp/src/parquet/test-specialization.h
index 3d88cfc9e3fb2..55d23748c5cea 100644
--- a/cpp/src/parquet/test-specialization.h
+++ b/cpp/src/parquet/test-specialization.h
@@ -19,8 +19,7 @@
 // Parquet column chunk within a row group. It could be extended in the future
 // to iterate through all data pages in all chunks in a file.
 
-#ifndef PARQUET_COLUMN_TEST_SPECIALIZATION_H
-#define PARQUET_COLUMN_TEST_SPECIALIZATION_H
+#pragma once
 
 #include <algorithm>
 #include <limits>
@@ -179,5 +178,3 @@ void PrimitiveTypedTest<BooleanType>::GenerateData(int64_t num_values) {
 }  // namespace test
 
 }  // namespace parquet
-
-#endif  // PARQUET_COLUMN_TEST_SPECIALIZATION_H
diff --git a/cpp/src/parquet/test-util.h b/cpp/src/parquet/test-util.h
index 3e74398b054ca..ed7c7bb901621 100644
--- a/cpp/src/parquet/test-util.h
+++ b/cpp/src/parquet/test-util.h
@@ -19,13 +19,13 @@
 // Parquet column chunk within a row group. It could be extended in the future
 // to iterate through all data pages in all chunks in a file.
 
-#ifndef PARQUET_COLUMN_TEST_UTIL_H
-#define PARQUET_COLUMN_TEST_UTIL_H
+#pragma once
 
 #include <algorithm>
 #include <limits>
 #include <memory>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include <gtest/gtest.h>
@@ -33,7 +33,7 @@
 #include "parquet/column_page.h"
 #include "parquet/column_reader.h"
 #include "parquet/column_writer.h"
-#include "parquet/encoding-internal.h"
+#include "parquet/encoding.h"
 #include "parquet/util/memory.h"
 #include "parquet/util/test-common.h"
 
@@ -50,6 +50,15 @@ bool operator==(const FixedLenByteArray& a, const FixedLenByteArray& b) {
 
 namespace test {
 
+template <typename Type, typename Sequence>
+std::shared_ptr<Buffer> EncodeValues(Encoding::type encoding, bool use_dictionary,
+                                     const Sequence& values, int length,
+                                     const ColumnDescriptor* descr) {
+  auto encoder = MakeTypedEncoder<Type>(encoding, use_dictionary, descr);
+  encoder->Put(values, length);
+  return encoder->FlushValues();
+}
+
 template <typename T>
 static void InitValues(int num_values, vector<T>& values, vector<uint8_t>& buffer) {
   random_numbers(num_values, 0, std::numeric_limits<T>::min(),
@@ -133,9 +142,8 @@ class DataPageBuilder {
 
   void AppendValues(const ColumnDescriptor* d, const vector<T>& values,
                     Encoding::type encoding = Encoding::PLAIN) {
-    PlainEncoder<Type> encoder(d);
-    encoder.Put(&values[0], static_cast<int>(values.size()));
-    std::shared_ptr<Buffer> values_sink = encoder.FlushValues();
+    std::shared_ptr<Buffer> values_sink = EncodeValues<Type>(
+        encoding, false, values.data(), static_cast<int>(values.size()), d);
     sink_->Write(values_sink->data(), values_sink->size());
 
     num_values_ = std::max(static_cast<int32_t>(values.size()), num_values_);
@@ -195,9 +203,11 @@ void DataPageBuilder<BooleanType>::AppendValues(const ColumnDescriptor* d,
   if (encoding != Encoding::PLAIN) {
     ParquetException::NYI("only plain encoding currently implemented");
   }
-  PlainEncoder<BooleanType> encoder(d);
-  encoder.Put(values, static_cast<int>(values.size()));
-  std::shared_ptr<Buffer> buffer = encoder.FlushValues();
+
+  auto encoder = MakeTypedEncoder<BooleanType>(Encoding::PLAIN, false, d);
+  dynamic_cast<BooleanEncoder*>(encoder.get())
+      ->Put(values, static_cast<int>(values.size()));
+  std::shared_ptr<Buffer> buffer = encoder->FlushValues();
   sink_->Write(buffer->data(), buffer->size());
 
   num_values_ = std::max(static_cast<int32_t>(values.size()), num_values_);
@@ -243,36 +253,39 @@ class DictionaryPageBuilder {
  public:
   typedef typename TYPE::c_type TC;
   static constexpr int TN = TYPE::type_num;
+  using SpecializedEncoder = typename EncodingTraits<TYPE>::Encoder;
 
   // This class writes data and metadata to the passed inputs
   explicit DictionaryPageBuilder(const ColumnDescriptor* d)
       : num_dict_values_(0), have_values_(false) {
-    encoder_.reset(new DictEncoder<TYPE>(d, &pool_));
+    auto encoder = MakeTypedEncoder<TYPE>(Encoding::PLAIN, true, d);
+    dict_traits_ = dynamic_cast<DictEncoder<TYPE>*>(encoder.get());
+    encoder_.reset(dynamic_cast<SpecializedEncoder*>(encoder.release()));
   }
 
-  ~DictionaryPageBuilder() { pool_.FreeAll(); }
+  ~DictionaryPageBuilder() {}
 
   shared_ptr<Buffer> AppendValues(const vector<TC>& values) {
     int num_values = static_cast<int>(values.size());
     // Dictionary encoding
     encoder_->Put(values.data(), num_values);
-    num_dict_values_ = encoder_->num_entries();
+    num_dict_values_ = dict_traits_->num_entries();
     have_values_ = true;
     return encoder_->FlushValues();
   }
 
   shared_ptr<Buffer> WriteDict() {
-    std::shared_ptr<ResizableBuffer> dict_buffer =
-        AllocateBuffer(::arrow::default_memory_pool(), encoder_->dict_encoded_size());
-    encoder_->WriteDict(dict_buffer->mutable_data());
+    std::shared_ptr<Buffer> dict_buffer =
+        AllocateBuffer(::arrow::default_memory_pool(), dict_traits_->dict_encoded_size());
+    dict_traits_->WriteDict(dict_buffer->mutable_data());
     return dict_buffer;
   }
 
   int32_t num_values() const { return num_dict_values_; }
 
  private:
-  ChunkedAllocator pool_;
-  shared_ptr<DictEncoder<TYPE>> encoder_;
+  DictEncoder<TYPE>* dict_traits_;
+  std::unique_ptr<SpecializedEncoder> encoder_;
   int32_t num_dict_values_;
   bool have_values_;
 };
@@ -443,5 +456,3 @@ static int MakePages(const ColumnDescriptor* d, int num_pages, int levels_per_pa
 }  // namespace test
 
 }  // namespace parquet
-
-#endif  // PARQUET_COLUMN_TEST_UTIL_H
diff --git a/cpp/src/parquet/thrift.h b/cpp/src/parquet/thrift.h
index 9c665acfac4ff..1afd9bf436550 100644
--- a/cpp/src/parquet/thrift.h
+++ b/cpp/src/parquet/thrift.h
@@ -15,8 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef PARQUET_THRIFT_UTIL_H
-#define PARQUET_THRIFT_UTIL_H
+#pragma once
 
 #include "arrow/util/windows_compatibility.h"
 
@@ -28,6 +27,7 @@
 #else
 #include <memory>
 #endif
+#include <string>
 
 // TCompactProtocol requires some #defines to work right.
 #define SIGNED_RIGHT_SHIFT_IS 1
@@ -105,18 +105,18 @@ static inline format::CompressionCodec::type ToThrift(Compression::type type) {
 // ----------------------------------------------------------------------
 // Thrift struct serialization / deserialization utilities
 
+using ThriftBuffer = apache::thrift::transport::TMemoryBuffer;
+
 // Deserialize a thrift message from buf/len.  buf/len must at least contain
 // all the bytes needed to store the thrift message.  On return, len will be
 // set to the actual length of the header.
 template <class T>
 inline void DeserializeThriftMsg(const uint8_t* buf, uint32_t* len, T* deserialized_msg) {
   // Deserialize msg bytes into c++ thrift msg using memory transport.
-  shared_ptr<apache::thrift::transport::TMemoryBuffer> tmem_transport(
-      new apache::thrift::transport::TMemoryBuffer(const_cast<uint8_t*>(buf), *len));
-  apache::thrift::protocol::TCompactProtocolFactoryT<
-      apache::thrift::transport::TMemoryBuffer>
-      tproto_factory;
-  shared_ptr<apache::thrift::protocol::TProtocol> tproto =
+  shared_ptr<ThriftBuffer> tmem_transport(
+      new ThriftBuffer(const_cast<uint8_t*>(buf), *len));
+  apache::thrift::protocol::TCompactProtocolFactoryT<ThriftBuffer> tproto_factory;
+  shared_ptr<apache::thrift::protocol::TProtocol> tproto =  //
       tproto_factory.getProtocol(tmem_transport);
   try {
     deserialized_msg->read(tproto.get());
@@ -129,34 +129,57 @@ inline void DeserializeThriftMsg(const uint8_t* buf, uint32_t* len, T* deseriali
   *len = *len - bytes_left;
 }
 
-// Serialize obj into a buffer. The result is returned as a string.
-// The arguments are the object to be serialized and
-// the expected size of the serialized object
-template <class T>
-inline int64_t SerializeThriftMsg(T* obj, uint32_t len, OutputStream* out) {
-  shared_ptr<apache::thrift::transport::TMemoryBuffer> mem_buffer(
-      new apache::thrift::transport::TMemoryBuffer(len));
-  apache::thrift::protocol::TCompactProtocolFactoryT<
-      apache::thrift::transport::TMemoryBuffer>
-      tproto_factory;
-  shared_ptr<apache::thrift::protocol::TProtocol> tproto =
-      tproto_factory.getProtocol(mem_buffer);
-  try {
-    mem_buffer->resetBuffer();
-    obj->write(tproto.get());
-  } catch (std::exception& e) {
-    std::stringstream ss;
-    ss << "Couldn't serialize thrift: " << e.what() << "\n";
-    throw ParquetException(ss.str());
+/// Utility class to serialize thrift objects to a binary format.  This object
+/// should be reused if possible to reuse the underlying memory.
+/// Note: thrift will encode NULLs into the serialized buffer so it is not valid
+/// to treat it as a string.
+class ThriftSerializer {
+ public:
+  explicit ThriftSerializer(int initial_buffer_size = 1024)
+      : mem_buffer_(new ThriftBuffer(initial_buffer_size)) {
+    apache::thrift::protocol::TCompactProtocolFactoryT<ThriftBuffer> factory;
+    protocol_ = factory.getProtocol(mem_buffer_);
   }
 
-  uint8_t* out_buffer;
-  uint32_t out_length;
-  mem_buffer->getBuffer(&out_buffer, &out_length);
-  out->Write(out_buffer, out_length);
-  return out_length;
-}
+  /// Serialize obj into a memory buffer.  The result is returned in buffer/len.  The
+  /// memory returned is owned by this object and will be invalid when another object
+  /// is serialized.
+  template <class T>
+  void SerializeToBuffer(const T* obj, uint32_t* len, uint8_t** buffer) {
+    SerializeObject(obj);
+    mem_buffer_->getBuffer(buffer, len);
+  }
 
-}  // namespace parquet
+  template <class T>
+  void SerializeToString(const T* obj, std::string* result) {
+    SerializeObject(obj);
+    *result = mem_buffer_->getBufferAsString();
+  }
+
+  template <class T>
+  int64_t Serialize(const T* obj, OutputStream* out) {
+    uint8_t* out_buffer;
+    uint32_t out_length;
+    SerializeToBuffer(obj, &out_length, &out_buffer);
+    out->Write(out_buffer, out_length);
+    return static_cast<int64_t>(out_length);
+  }
 
-#endif  // PARQUET_THRIFT_UTIL_H
+ private:
+  template <class T>
+  void SerializeObject(const T* obj) {
+    try {
+      mem_buffer_->resetBuffer();
+      obj->write(protocol_.get());
+    } catch (std::exception& e) {
+      std::stringstream ss;
+      ss << "Couldn't serialize thrift: " << e.what() << "\n";
+      throw ParquetException(ss.str());
+    }
+  }
+
+  shared_ptr<ThriftBuffer> mem_buffer_;
+  shared_ptr<apache::thrift::protocol::TProtocol> protocol_;
+};
+
+}  // namespace parquet
diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h
index b27718027b0da..2bc51e7dc7902 100644
--- a/cpp/src/parquet/types.h
+++ b/cpp/src/parquet/types.h
@@ -160,7 +160,8 @@ struct ByteArray {
 };
 
 inline bool operator==(const ByteArray& left, const ByteArray& right) {
-  return left.len == right.len && 0 == std::memcmp(left.ptr, right.ptr, left.len);
+  return left.len == right.len &&
+         (left.len == 0 || std::memcmp(left.ptr, right.ptr, left.len) == 0);
 }
 
 inline bool operator!=(const ByteArray& left, const ByteArray& right) {
@@ -175,6 +176,19 @@ struct FixedLenByteArray {
 
 using FLBA = FixedLenByteArray;
 
+// Julian day at unix epoch.
+//
+// The Julian Day Number (JDN) is the integer assigned to a whole solar day in
+// the Julian day count starting from noon Universal time, with Julian day
+// number 0 assigned to the day starting at noon on Monday, January 1, 4713 BC,
+// proleptic Julian calendar (November 24, 4714 BC, in the proleptic Gregorian
+// calendar),
+constexpr int64_t kJulianToUnixEpochDays = INT64_C(2440588);
+constexpr int64_t kSecondsPerDay = INT64_C(60 * 60 * 24);
+constexpr int64_t kMillisecondsPerDay = kSecondsPerDay * INT64_C(1000);
+constexpr int64_t kMicrosecondsPerDay = kMillisecondsPerDay * INT64_C(1000);
+constexpr int64_t kNanosecondsPerDay = kMicrosecondsPerDay * INT64_C(1000);
+
 MANUALLY_ALIGNED_STRUCT(1) Int96 { uint32_t value[3]; };
 STRUCT_END(Int96, 12);
 
@@ -192,6 +206,14 @@ static inline void Int96SetNanoSeconds(parquet::Int96& i96, int64_t nanoseconds)
   std::memcpy(&i96.value, &nanoseconds, sizeof(nanoseconds));
 }
 
+static inline int64_t Int96GetNanoSeconds(const parquet::Int96& i96) {
+  int64_t days_since_epoch = i96.value[2] - kJulianToUnixEpochDays;
+  int64_t nanoseconds = 0;
+
+  memcpy(&nanoseconds, &i96.value, sizeof(int64_t));
+  return days_since_epoch * kNanosecondsPerDay + nanoseconds;
+}
+
 static inline std::string Int96ToString(const Int96& a) {
   std::ostringstream result;
   std::copy(a.value, a.value + 3, std::ostream_iterator<uint32_t>(result, " "));
diff --git a/cpp/src/parquet/util/CMakeLists.txt b/cpp/src/parquet/util/CMakeLists.txt
index 72d4ca28f9b83..b5718b1601ee0 100644
--- a/cpp/src/parquet/util/CMakeLists.txt
+++ b/cpp/src/parquet/util/CMakeLists.txt
@@ -16,12 +16,7 @@
 # under the License.
 
 # Headers: util
-install(FILES
-  comparison.h
-  macros.h
-  memory.h
-  visibility.h
-  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/parquet/util")
+ARROW_INSTALL_ALL_HEADERS("parquet/util")
 
 ADD_PARQUET_TEST(comparison-test)
 ADD_PARQUET_TEST(memory-test)
diff --git a/cpp/src/parquet/util/memory-test.cc b/cpp/src/parquet/util/memory-test.cc
index bfd685db00d2a..cdeb8eef8110c 100644
--- a/cpp/src/parquet/util/memory-test.cc
+++ b/cpp/src/parquet/util/memory-test.cc
@@ -34,222 +34,6 @@ namespace parquet {
 
 class TestBuffer : public ::testing::Test {};
 
-// Utility class to call private functions on MemPool.
-class ChunkedAllocatorTest {
- public:
-  static bool CheckIntegrity(ChunkedAllocator* pool, bool current_chunk_empty) {
-    return pool->CheckIntegrity(current_chunk_empty);
-  }
-
-  static const int INITIAL_CHUNK_SIZE = ChunkedAllocator::INITIAL_CHUNK_SIZE;
-  static const int MAX_CHUNK_SIZE = ChunkedAllocator::MAX_CHUNK_SIZE;
-};
-
-const int ChunkedAllocatorTest::INITIAL_CHUNK_SIZE;
-const int ChunkedAllocatorTest::MAX_CHUNK_SIZE;
-
-TEST(ChunkedAllocatorTest, Basic) {
-  ChunkedAllocator p;
-  ChunkedAllocator p2;
-  ChunkedAllocator p3;
-
-  for (int iter = 0; iter < 2; ++iter) {
-    // allocate a total of 24K in 32-byte pieces (for which we only request 25 bytes)
-    for (int i = 0; i < 768; ++i) {
-      // pads to 32 bytes
-      p.Allocate(25);
-    }
-    // we handed back 24K
-    EXPECT_EQ(24 * 1024, p.total_allocated_bytes());
-    // .. and allocated 28K of chunks (4, 8, 16)
-    EXPECT_EQ(28 * 1024, p.GetTotalChunkSizes());
-
-    // we're passing on the first two chunks, containing 12K of data; we're left with
-    // one chunk of 16K containing 12K of data
-    p2.AcquireData(&p, true);
-    EXPECT_EQ(12 * 1024, p.total_allocated_bytes());
-    EXPECT_EQ(16 * 1024, p.GetTotalChunkSizes());
-
-    // we allocate 8K, for which there isn't enough room in the current chunk,
-    // so another one is allocated (32K)
-    p.Allocate(8 * 1024);
-    EXPECT_EQ((16 + 32) * 1024, p.GetTotalChunkSizes());
-
-    // we allocate 65K, which doesn't fit into the current chunk or the default
-    // size of the next allocated chunk (64K)
-    p.Allocate(65 * 1024);
-    EXPECT_EQ((12 + 8 + 65) * 1024, p.total_allocated_bytes());
-    if (iter == 0) {
-      EXPECT_EQ((12 + 8 + 65) * 1024, p.peak_allocated_bytes());
-    } else {
-      EXPECT_EQ((1 + 120 + 33) * 1024, p.peak_allocated_bytes());
-    }
-    EXPECT_EQ((16 + 32 + 65) * 1024, p.GetTotalChunkSizes());
-
-    // Clear() resets allocated data, but doesn't remove any chunks
-    p.Clear();
-    EXPECT_EQ(0, p.total_allocated_bytes());
-    if (iter == 0) {
-      EXPECT_EQ((12 + 8 + 65) * 1024, p.peak_allocated_bytes());
-    } else {
-      EXPECT_EQ((1 + 120 + 33) * 1024, p.peak_allocated_bytes());
-    }
-    EXPECT_EQ((16 + 32 + 65) * 1024, p.GetTotalChunkSizes());
-
-    // next allocation reuses existing chunks
-    p.Allocate(1024);
-    EXPECT_EQ(1024, p.total_allocated_bytes());
-    if (iter == 0) {
-      EXPECT_EQ((12 + 8 + 65) * 1024, p.peak_allocated_bytes());
-    } else {
-      EXPECT_EQ((1 + 120 + 33) * 1024, p.peak_allocated_bytes());
-    }
-    EXPECT_EQ((16 + 32 + 65) * 1024, p.GetTotalChunkSizes());
-
-    // ... unless it doesn't fit into any available chunk
-    p.Allocate(120 * 1024);
-    EXPECT_EQ((1 + 120) * 1024, p.total_allocated_bytes());
-    if (iter == 0) {
-      EXPECT_EQ((1 + 120) * 1024, p.peak_allocated_bytes());
-    } else {
-      EXPECT_EQ((1 + 120 + 33) * 1024, p.peak_allocated_bytes());
-    }
-    EXPECT_EQ((130 + 16 + 32 + 65) * 1024, p.GetTotalChunkSizes());
-
-    // ... Try another chunk that fits into an existing chunk
-    p.Allocate(33 * 1024);
-    EXPECT_EQ((1 + 120 + 33) * 1024, p.total_allocated_bytes());
-    EXPECT_EQ((130 + 16 + 32 + 65) * 1024, p.GetTotalChunkSizes());
-
-    // we're releasing 3 chunks, which get added to p2
-    p2.AcquireData(&p, false);
-    EXPECT_EQ(0, p.total_allocated_bytes());
-    EXPECT_EQ((1 + 120 + 33) * 1024, p.peak_allocated_bytes());
-    EXPECT_EQ(0, p.GetTotalChunkSizes());
-
-    p3.AcquireData(&p2, true);  // we're keeping the 65k chunk
-    EXPECT_EQ(33 * 1024, p2.total_allocated_bytes());
-    EXPECT_EQ(65 * 1024, p2.GetTotalChunkSizes());
-
-    p.FreeAll();
-    p2.FreeAll();
-    p3.FreeAll();
-  }
-}
-
-// Test that we can keep an allocated chunk and a free chunk.
-// This case verifies that when chunks are acquired by another memory pool the
-// remaining chunks are consistent if there were more than one used chunk and some
-// free chunks.
-TEST(ChunkedAllocatorTest, Keep) {
-  ChunkedAllocator p;
-  p.Allocate(4 * 1024);
-  p.Allocate(8 * 1024);
-  p.Allocate(16 * 1024);
-  EXPECT_EQ((4 + 8 + 16) * 1024, p.total_allocated_bytes());
-  EXPECT_EQ((4 + 8 + 16) * 1024, p.GetTotalChunkSizes());
-  p.Clear();
-  EXPECT_EQ(0, p.total_allocated_bytes());
-  EXPECT_EQ((4 + 8 + 16) * 1024, p.GetTotalChunkSizes());
-  p.Allocate(1 * 1024);
-  p.Allocate(4 * 1024);
-  EXPECT_EQ((1 + 4) * 1024, p.total_allocated_bytes());
-  EXPECT_EQ((4 + 8 + 16) * 1024, p.GetTotalChunkSizes());
-
-  ChunkedAllocator p2;
-  p2.AcquireData(&p, true);
-  EXPECT_EQ(4 * 1024, p.total_allocated_bytes());
-  EXPECT_EQ((8 + 16) * 1024, p.GetTotalChunkSizes());
-  EXPECT_EQ(1 * 1024, p2.total_allocated_bytes());
-  EXPECT_EQ(4 * 1024, p2.GetTotalChunkSizes());
-
-  p.FreeAll();
-  p2.FreeAll();
-}
-
-// Tests that we can return partial allocations.
-TEST(ChunkedAllocatorTest, ReturnPartial) {
-  ChunkedAllocator p;
-  uint8_t* ptr = p.Allocate(1024);
-  EXPECT_EQ(1024, p.total_allocated_bytes());
-  memset(ptr, 0, 1024);
-  p.ReturnPartialAllocation(1024);
-
-  uint8_t* ptr2 = p.Allocate(1024);
-  EXPECT_EQ(1024, p.total_allocated_bytes());
-  EXPECT_TRUE(ptr == ptr2);
-  p.ReturnPartialAllocation(1016);
-
-  ptr2 = p.Allocate(1016);
-  EXPECT_EQ(1024, p.total_allocated_bytes());
-  EXPECT_TRUE(ptr2 == ptr + 8);
-  p.ReturnPartialAllocation(512);
-  memset(ptr2, 1, 1016 - 512);
-
-  uint8_t* ptr3 = p.Allocate(512);
-  EXPECT_EQ(1024, p.total_allocated_bytes());
-  EXPECT_TRUE(ptr3 == ptr + 512);
-  memset(ptr3, 2, 512);
-
-  for (int i = 0; i < 8; ++i) {
-    EXPECT_EQ(0, ptr[i]);
-  }
-  for (int i = 8; i < 512; ++i) {
-    EXPECT_EQ(1, ptr[i]);
-  }
-  for (int i = 512; i < 1024; ++i) {
-    EXPECT_EQ(2, ptr[i]);
-  }
-
-  p.FreeAll();
-}
-
-// Test that the ChunkedAllocator overhead is bounded when we make allocations of
-// INITIAL_CHUNK_SIZE.
-TEST(ChunkedAllocatorTest, MemoryOverhead) {
-  ChunkedAllocator p;
-  const int alloc_size = ChunkedAllocatorTest::INITIAL_CHUNK_SIZE;
-  const int num_allocs = 1000;
-  int64_t total_allocated = 0;
-
-  for (int i = 0; i < num_allocs; ++i) {
-    uint8_t* mem = p.Allocate(alloc_size);
-    ASSERT_TRUE(mem != nullptr);
-    total_allocated += alloc_size;
-
-    int64_t wasted_memory = p.GetTotalChunkSizes() - total_allocated;
-    // The initial chunk fits evenly into MAX_CHUNK_SIZE, so should have at most
-    // one empty chunk at the end.
-    EXPECT_LE(wasted_memory, ChunkedAllocatorTest::MAX_CHUNK_SIZE);
-    // The chunk doubling algorithm should not allocate chunks larger than the total
-    // amount of memory already allocated.
-    EXPECT_LE(wasted_memory, total_allocated);
-  }
-
-  p.FreeAll();
-}
-
-// Test that the ChunkedAllocator overhead is bounded when we make alternating
-// large and small allocations.
-TEST(ChunkedAllocatorTest, FragmentationOverhead) {
-  ChunkedAllocator p;
-  const int num_allocs = 100;
-  int64_t total_allocated = 0;
-
-  for (int i = 0; i < num_allocs; ++i) {
-    int alloc_size = i % 2 == 0 ? 1 : ChunkedAllocatorTest::MAX_CHUNK_SIZE;
-    uint8_t* mem = p.Allocate(alloc_size);
-    ASSERT_TRUE(mem != nullptr);
-    total_allocated += alloc_size;
-
-    int64_t wasted_memory = p.GetTotalChunkSizes() - total_allocated;
-    // Fragmentation should not waste more than half of each completed chunk.
-    EXPECT_LE(wasted_memory, total_allocated + ChunkedAllocatorTest::MAX_CHUNK_SIZE);
-  }
-
-  p.FreeAll();
-}
-
 TEST(TestBufferedInputStream, Basics) {
   int64_t source_size = 256;
   int64_t stream_offset = 10;
@@ -315,9 +99,8 @@ TEST(TestBufferedInputStream, Basics) {
 
 TEST(TestArrowInputFile, ReadAt) {
   std::string data = "this is the data";
-  auto data_buffer = reinterpret_cast<const uint8_t*>(data.c_str());
 
-  auto file = std::make_shared<::arrow::io::BufferReader>(data_buffer, data.size());
+  auto file = std::make_shared<::arrow::io::BufferReader>(data);
   auto source = std::make_shared<ArrowInputFile>(file);
 
   ASSERT_EQ(0, source->Tell());
@@ -335,7 +118,7 @@ TEST(TestArrowInputFile, Read) {
   std::string data = "this is the data";
   auto data_buffer = reinterpret_cast<const uint8_t*>(data.c_str());
 
-  auto file = std::make_shared<::arrow::io::BufferReader>(data_buffer, data.size());
+  auto file = std::make_shared<::arrow::io::BufferReader>(data);
   auto source = std::make_shared<ArrowInputFile>(file);
 
   ASSERT_EQ(0, source->Tell());
diff --git a/cpp/src/parquet/util/memory.cc b/cpp/src/parquet/util/memory.cc
index fde424aafe71d..b3f83bdfdfd32 100644
--- a/cpp/src/parquet/util/memory.cc
+++ b/cpp/src/parquet/util/memory.cc
@@ -115,238 +115,6 @@ template class Vector<Int96>;
 template class Vector<ByteArray>;
 template class Vector<FixedLenByteArray>;
 
-const int ChunkedAllocator::INITIAL_CHUNK_SIZE;
-const int ChunkedAllocator::MAX_CHUNK_SIZE;
-
-ChunkedAllocator::ChunkedAllocator(MemoryPool* pool)
-    : current_chunk_idx_(-1),
-      next_chunk_size_(INITIAL_CHUNK_SIZE),
-      total_allocated_bytes_(0),
-      peak_allocated_bytes_(0),
-      total_reserved_bytes_(0),
-      pool_(pool) {}
-
-ChunkedAllocator::ChunkInfo::ChunkInfo(int64_t size, uint8_t* buf)
-    : data(buf), size(size), allocated_bytes(0) {}
-
-ChunkedAllocator::~ChunkedAllocator() {
-  int64_t total_bytes_released = 0;
-  for (size_t i = 0; i < chunks_.size(); ++i) {
-    total_bytes_released += chunks_[i].size;
-    pool_->Free(chunks_[i].data, chunks_[i].size);
-  }
-
-  DCHECK(chunks_.empty()) << "Must call FreeAll() or AcquireData() for this pool";
-}
-
-void ChunkedAllocator::ReturnPartialAllocation(int byte_size) {
-  DCHECK_GE(byte_size, 0);
-  DCHECK_NE(current_chunk_idx_, -1);
-  ChunkInfo& info = chunks_[current_chunk_idx_];
-  DCHECK_GE(info.allocated_bytes, byte_size);
-  info.allocated_bytes -= byte_size;
-  total_allocated_bytes_ -= byte_size;
-}
-
-template <bool CHECK_LIMIT_FIRST>
-uint8_t* ChunkedAllocator::Allocate(int size) {
-  if (size == 0) {
-    return nullptr;
-  }
-
-  int64_t num_bytes = ::arrow::BitUtil::RoundUp(size, 8);
-  if (current_chunk_idx_ == -1 ||
-      num_bytes + chunks_[current_chunk_idx_].allocated_bytes >
-          chunks_[current_chunk_idx_].size) {
-    // If we couldn't allocate a new chunk, return nullptr.
-    if (ARROW_PREDICT_FALSE(!FindChunk(num_bytes))) {
-      return nullptr;
-    }
-  }
-  ChunkInfo& info = chunks_[current_chunk_idx_];
-  uint8_t* result = info.data + info.allocated_bytes;
-  DCHECK_LE(info.allocated_bytes + num_bytes, info.size);
-  info.allocated_bytes += num_bytes;
-  total_allocated_bytes_ += num_bytes;
-  DCHECK_LE(current_chunk_idx_, static_cast<int>(chunks_.size()) - 1);
-  peak_allocated_bytes_ = std::max(total_allocated_bytes_, peak_allocated_bytes_);
-  return result;
-}
-
-uint8_t* ChunkedAllocator::Allocate(int size) { return Allocate<false>(size); }
-
-void ChunkedAllocator::Clear() {
-  current_chunk_idx_ = -1;
-  for (auto chunk = chunks_.begin(); chunk != chunks_.end(); ++chunk) {
-    chunk->allocated_bytes = 0;
-  }
-  total_allocated_bytes_ = 0;
-  DCHECK(CheckIntegrity(false));
-}
-
-void ChunkedAllocator::FreeAll() {
-  int64_t total_bytes_released = 0;
-  for (size_t i = 0; i < chunks_.size(); ++i) {
-    total_bytes_released += chunks_[i].size;
-    pool_->Free(chunks_[i].data, chunks_[i].size);
-  }
-  chunks_.clear();
-  next_chunk_size_ = INITIAL_CHUNK_SIZE;
-  current_chunk_idx_ = -1;
-  total_allocated_bytes_ = 0;
-  total_reserved_bytes_ = 0;
-}
-
-bool ChunkedAllocator::FindChunk(int64_t min_size) {
-  // Try to allocate from a free chunk. The first free chunk, if any, will be immediately
-  // after the current chunk.
-  int first_free_idx = current_chunk_idx_ + 1;
-  // (cast size() to signed int in order to avoid everything else being cast to
-  // unsigned long, in particular -1)
-  while (++current_chunk_idx_ < static_cast<int>(chunks_.size())) {
-    // we found a free chunk
-    DCHECK_EQ(chunks_[current_chunk_idx_].allocated_bytes, 0);
-
-    if (chunks_[current_chunk_idx_].size >= min_size) {
-      // This chunk is big enough.  Move it before the other free chunks.
-      if (current_chunk_idx_ != first_free_idx) {
-        std::swap(chunks_[current_chunk_idx_], chunks_[first_free_idx]);
-        current_chunk_idx_ = first_free_idx;
-      }
-      break;
-    }
-  }
-
-  if (current_chunk_idx_ == static_cast<int>(chunks_.size())) {
-    // need to allocate new chunk.
-    int64_t chunk_size;
-    DCHECK_GE(next_chunk_size_, INITIAL_CHUNK_SIZE);
-    DCHECK_LE(next_chunk_size_, MAX_CHUNK_SIZE);
-
-    chunk_size = std::max<int64_t>(min_size, next_chunk_size_);
-
-    // Allocate a new chunk. Return early if malloc fails.
-    uint8_t* buf = nullptr;
-    PARQUET_THROW_NOT_OK(pool_->Allocate(chunk_size, &buf));
-    if (ARROW_PREDICT_FALSE(buf == nullptr)) {
-      DCHECK_EQ(current_chunk_idx_, static_cast<int>(chunks_.size()));
-      current_chunk_idx_ = static_cast<int>(chunks_.size()) - 1;
-      return false;
-    }
-
-    // If there are no free chunks put it at the end, otherwise before the first free.
-    if (first_free_idx == static_cast<int>(chunks_.size())) {
-      chunks_.push_back(ChunkInfo(chunk_size, buf));
-    } else {
-      current_chunk_idx_ = first_free_idx;
-      auto insert_chunk = chunks_.begin() + current_chunk_idx_;
-      chunks_.insert(insert_chunk, ChunkInfo(chunk_size, buf));
-    }
-    total_reserved_bytes_ += chunk_size;
-    // Don't increment the chunk size until the allocation succeeds: if an attempted
-    // large allocation fails we don't want to increase the chunk size further.
-    next_chunk_size_ =
-        static_cast<int>(std::min<int64_t>(chunk_size * 2, MAX_CHUNK_SIZE));
-  }
-
-  DCHECK_LT(current_chunk_idx_, static_cast<int>(chunks_.size()));
-  DCHECK(CheckIntegrity(true));
-  return true;
-}
-
-void ChunkedAllocator::AcquireData(ChunkedAllocator* src, bool keep_current) {
-  DCHECK(src->CheckIntegrity(false));
-  int num_acquired_chunks;
-  if (keep_current) {
-    num_acquired_chunks = src->current_chunk_idx_;
-  } else if (src->GetFreeOffset() == 0) {
-    // nothing in the last chunk
-    num_acquired_chunks = src->current_chunk_idx_;
-  } else {
-    num_acquired_chunks = src->current_chunk_idx_ + 1;
-  }
-
-  if (num_acquired_chunks <= 0) {
-    if (!keep_current) src->FreeAll();
-    return;
-  }
-
-  auto end_chunk = src->chunks_.begin() + num_acquired_chunks;
-  int64_t total_transfered_bytes = 0;
-  for (auto i = src->chunks_.begin(); i != end_chunk; ++i) {
-    total_transfered_bytes += i->size;
-  }
-  src->total_reserved_bytes_ -= total_transfered_bytes;
-  total_reserved_bytes_ += total_transfered_bytes;
-
-  // insert new chunks after current_chunk_idx_
-  auto insert_chunk = chunks_.begin() + (current_chunk_idx_ + 1);
-  chunks_.insert(insert_chunk, src->chunks_.begin(), end_chunk);
-  src->chunks_.erase(src->chunks_.begin(), end_chunk);
-  current_chunk_idx_ += num_acquired_chunks;
-
-  if (keep_current) {
-    src->current_chunk_idx_ = 0;
-    DCHECK(src->chunks_.size() == 1 || src->chunks_[1].allocated_bytes == 0);
-    total_allocated_bytes_ += src->total_allocated_bytes_ - src->GetFreeOffset();
-    src->total_allocated_bytes_ = src->GetFreeOffset();
-  } else {
-    src->current_chunk_idx_ = -1;
-    total_allocated_bytes_ += src->total_allocated_bytes_;
-    src->total_allocated_bytes_ = 0;
-  }
-  peak_allocated_bytes_ = std::max(total_allocated_bytes_, peak_allocated_bytes_);
-
-  if (!keep_current) src->FreeAll();
-  DCHECK(CheckIntegrity(false));
-}
-
-std::string ChunkedAllocator::DebugString() {
-  std::stringstream out;
-  char str[16];
-  out << "ChunkedAllocator(#chunks=" << chunks_.size() << " [";
-  for (size_t i = 0; i < chunks_.size(); ++i) {
-    sprintf(str, "0x%zx=", reinterpret_cast<size_t>(chunks_[i].data));  // NOLINT
-    out << (i > 0 ? " " : "") << str << chunks_[i].size << "/"
-        << chunks_[i].allocated_bytes;
-  }
-  out << "] current_chunk=" << current_chunk_idx_
-      << " total_sizes=" << GetTotalChunkSizes()
-      << " total_alloc=" << total_allocated_bytes_ << ")";
-  return out.str();
-}
-
-int64_t ChunkedAllocator::GetTotalChunkSizes() const {
-  int64_t result = 0;
-  for (size_t i = 0; i < chunks_.size(); ++i) {
-    result += chunks_[i].size;
-  }
-  return result;
-}
-
-bool ChunkedAllocator::CheckIntegrity(bool current_chunk_empty) {
-  // check that current_chunk_idx_ points to the last chunk with allocated data
-  DCHECK_LT(current_chunk_idx_, static_cast<int>(chunks_.size()));
-  int64_t total_allocated = 0;
-  for (int i = 0; i < static_cast<int>(chunks_.size()); ++i) {
-    DCHECK_GT(chunks_[i].size, 0);
-    if (i < current_chunk_idx_) {
-      DCHECK_GT(chunks_[i].allocated_bytes, 0);
-    } else if (i == current_chunk_idx_) {
-      if (current_chunk_empty) {
-        DCHECK_EQ(chunks_[i].allocated_bytes, 0);
-      } else {
-        DCHECK_GT(chunks_[i].allocated_bytes, 0);
-      }
-    } else {
-      DCHECK_EQ(chunks_[i].allocated_bytes, 0);
-    }
-    total_allocated += chunks_[i].allocated_bytes;
-  }
-  DCHECK_EQ(total_allocated, total_allocated_bytes_);
-  return true;
-}
-
 // ----------------------------------------------------------------------
 // Arrow IO wrappers
 
@@ -465,8 +233,11 @@ void InMemoryOutputStream::Write(const uint8_t* data, int64_t length) {
     PARQUET_THROW_NOT_OK(buffer_->Resize(new_capacity));
     capacity_ = new_capacity;
   }
-  memcpy(Head(), data, length);
-  size_ += length;
+  // If length == 0, data may be null
+  if (length > 0) {
+    memcpy(Head(), data, length);
+    size_ += length;
+  }
 }
 
 int64_t InMemoryOutputStream::Tell() { return size_; }
diff --git a/cpp/src/parquet/util/memory.h b/cpp/src/parquet/util/memory.h
index cccafe8cb38bb..d63ed84dd7ead 100644
--- a/cpp/src/parquet/util/memory.h
+++ b/cpp/src/parquet/util/memory.h
@@ -66,6 +66,7 @@ class PARQUET_EXPORT Vector {
   void Swap(Vector<T>& v);
   inline T& operator[](int64_t i) const { return data_[i]; }
 
+  T* data() { return data_; }
   const T* data() const { return data_; }
 
  private:
@@ -77,149 +78,6 @@ class PARQUET_EXPORT Vector {
   PARQUET_DISALLOW_COPY_AND_ASSIGN(Vector);
 };
 
-/// A ChunkedAllocator maintains a list of memory chunks from which it
-/// allocates memory in response to Allocate() calls; Chunks stay around for
-/// the lifetime of the allocator or until they are passed on to another
-/// allocator.
-//
-/// An Allocate() call will attempt to allocate memory from the chunk that was most
-/// recently added; if that chunk doesn't have enough memory to
-/// satisfy the allocation request, the free chunks are searched for one that is
-/// big enough otherwise a new chunk is added to the list.
-/// The current_chunk_idx_ always points to the last chunk with allocated memory.
-/// In order to keep allocation overhead low, chunk sizes double with each new one
-/// added, until they hit a maximum size.
-//
-///     Example:
-///     ChunkedAllocator* p = new ChunkedAllocator();
-///     for (int i = 0; i < 1024; ++i) {
-/// returns 8-byte aligned memory (effectively 24 bytes):
-///       .. = p->Allocate(17);
-///     }
-/// at this point, 17K have been handed out in response to Allocate() calls and
-/// 28K of chunks have been allocated (chunk sizes: 4K, 8K, 16K)
-/// We track total and peak allocated bytes. At this point they would be the same:
-/// 28k bytes.  A call to Clear will return the allocated memory so
-/// total_allocate_bytes_
-/// becomes 0 while peak_allocate_bytes_ remains at 28k.
-///     p->Clear();
-/// the entire 1st chunk is returned:
-///     .. = p->Allocate(4 * 1024);
-/// 4K of the 2nd chunk are returned:
-///     .. = p->Allocate(4 * 1024);
-/// a new 20K chunk is created
-///     .. = p->Allocate(20 * 1024);
-//
-///      ChunkedAllocator* p2 = new ChunkedAllocator();
-/// the new ChunkedAllocator receives all chunks containing data from p
-///      p2->AcquireData(p, false);
-/// At this point p.total_allocated_bytes_ would be 0 while p.peak_allocated_bytes_
-/// remains unchanged.
-/// The one remaining (empty) chunk is released:
-///    delete p;
-
-class PARQUET_EXPORT ChunkedAllocator {
- public:
-  explicit ChunkedAllocator(::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
-
-  /// Frees all chunks of memory and subtracts the total allocated bytes
-  /// from the registered limits.
-  ~ChunkedAllocator();
-
-  /// Allocates 8-byte aligned section of memory of 'size' bytes at the end
-  /// of the the current chunk. Creates a new chunk if there aren't any chunks
-  /// with enough capacity.
-  uint8_t* Allocate(int size);
-
-  /// Returns 'byte_size' to the current chunk back to the mem pool. This can
-  /// only be used to return either all or part of the previous allocation returned
-  /// by Allocate().
-  void ReturnPartialAllocation(int byte_size);
-
-  /// Makes all allocated chunks available for re-use, but doesn't delete any chunks.
-  void Clear();
-
-  /// Deletes all allocated chunks. FreeAll() or AcquireData() must be called for
-  /// each mem pool
-  void FreeAll();
-
-  /// Absorb all chunks that hold data from src. If keep_current is true, let src hold on
-  /// to its last allocated chunk that contains data.
-  /// All offsets handed out by calls to GetCurrentOffset() for 'src' become invalid.
-  void AcquireData(ChunkedAllocator* src, bool keep_current);
-
-  std::string DebugString();
-
-  int64_t total_allocated_bytes() const { return total_allocated_bytes_; }
-  int64_t peak_allocated_bytes() const { return peak_allocated_bytes_; }
-  int64_t total_reserved_bytes() const { return total_reserved_bytes_; }
-
-  /// Return sum of chunk_sizes_.
-  int64_t GetTotalChunkSizes() const;
-
- private:
-  friend class ChunkedAllocatorTest;
-  static const int INITIAL_CHUNK_SIZE = 4 * 1024;
-
-  /// The maximum size of chunk that should be allocated. Allocations larger than this
-  /// size will get their own individual chunk.
-  static const int MAX_CHUNK_SIZE = 1024 * 1024;
-
-  struct ChunkInfo {
-    uint8_t* data;  // Owned by the ChunkInfo.
-    int64_t size;   // in bytes
-
-    /// bytes allocated via Allocate() in this chunk
-    int64_t allocated_bytes;
-
-    explicit ChunkInfo(int64_t size, uint8_t* buf);
-
-    ChunkInfo() : data(NULLPTR), size(0), allocated_bytes(0) {}
-  };
-
-  /// chunk from which we served the last Allocate() call;
-  /// always points to the last chunk that contains allocated data;
-  /// chunks 0..current_chunk_idx_ are guaranteed to contain data
-  /// (chunks_[i].allocated_bytes > 0 for i: 0..current_chunk_idx_);
-  /// -1 if no chunks present
-  int current_chunk_idx_;
-
-  /// The size of the next chunk to allocate.
-  int64_t next_chunk_size_;
-
-  /// sum of allocated_bytes_
-  int64_t total_allocated_bytes_;
-
-  /// Maximum number of bytes allocated from this pool at one time.
-  int64_t peak_allocated_bytes_;
-
-  /// sum of all bytes allocated in chunks_
-  int64_t total_reserved_bytes_;
-
-  std::vector<ChunkInfo> chunks_;
-
-  ::arrow::MemoryPool* pool_;
-
-  /// Find or allocated a chunk with at least min_size spare capacity and update
-  /// current_chunk_idx_. Also updates chunks_, chunk_sizes_ and allocated_bytes_
-  /// if a new chunk needs to be created.
-  bool FindChunk(int64_t min_size);
-
-  /// Check integrity of the supporting data structures; always returns true but DCHECKs
-  /// all invariants.
-  /// If 'current_chunk_empty' is false, checks that the current chunk contains data.
-  bool CheckIntegrity(bool current_chunk_empty);
-
-  /// Return offset to unoccpied space in current chunk.
-  int GetFreeOffset() const {
-    if (current_chunk_idx_ == -1) return 0;
-    return static_cast<int>(chunks_[current_chunk_idx_].allocated_bytes);
-  }
-
-  template <bool CHECK_LIMIT_FIRST>
-  uint8_t* Allocate(int size);
-};
-
 // File input and output interfaces that translate arrow::Status to exceptions
 
 class PARQUET_EXPORT FileInterface {
diff --git a/cpp/src/parquet/util/test-common.h b/cpp/src/parquet/util/test-common.h
index 4e95870025cda..cb4eb43be2a80 100644
--- a/cpp/src/parquet/util/test-common.h
+++ b/cpp/src/parquet/util/test-common.h
@@ -91,40 +91,33 @@ static vector<T> slice(const vector<T>& values, int start, int end) {
 }
 
 static inline vector<bool> flip_coins_seed(int n, double p, uint32_t seed) {
-  std::mt19937 gen(seed);
+  std::default_random_engine gen(seed);
   std::bernoulli_distribution d(p);
 
-  vector<bool> draws;
+  vector<bool> draws(n);
   for (int i = 0; i < n; ++i) {
-    draws.push_back(d(gen));
+    draws[i] = d(gen);
   }
   return draws;
 }
 
 static inline vector<bool> flip_coins(int n, double p) {
   uint64_t seed = std::chrono::high_resolution_clock::now().time_since_epoch().count();
-  std::mt19937 gen(static_cast<uint32_t>(seed));
-
-  std::bernoulli_distribution d(p);
-
-  vector<bool> draws;
-  for (int i = 0; i < n; ++i) {
-    draws.push_back(d(gen));
-  }
-  return draws;
+  return flip_coins_seed(n, p, static_cast<uint32_t>(seed));
 }
 
 void random_bytes(int n, uint32_t seed, std::vector<uint8_t>* out) {
-  std::mt19937 gen(seed);
+  std::default_random_engine gen(seed);
   std::uniform_int_distribution<int> d(0, 255);
 
+  out->resize(n);
   for (int i = 0; i < n; ++i) {
-    out->push_back(static_cast<uint8_t>(d(gen) & 0xFF));
+    (*out)[i] = static_cast<uint8_t>(d(gen));
   }
 }
 
 void random_bools(int n, double p, uint32_t seed, bool* out) {
-  std::mt19937 gen(seed);
+  std::default_random_engine gen(seed);
   std::bernoulli_distribution d(p);
   for (int i = 0; i < n; ++i) {
     out[i] = d(gen);
@@ -133,7 +126,7 @@ void random_bools(int n, double p, uint32_t seed, bool* out) {
 
 template <typename T>
 void random_numbers(int n, uint32_t seed, T min_value, T max_value, T* out) {
-  std::mt19937 gen(seed);
+  std::default_random_engine gen(seed);
   std::uniform_int_distribution<T> d(min_value, max_value);
   for (int i = 0; i < n; ++i) {
     out[i] = d(gen);
@@ -142,7 +135,7 @@ void random_numbers(int n, uint32_t seed, T min_value, T max_value, T* out) {
 
 template <>
 void random_numbers(int n, uint32_t seed, float min_value, float max_value, float* out) {
-  std::mt19937 gen(seed);
+  std::default_random_engine gen(seed);
   std::uniform_real_distribution<float> d(min_value, max_value);
   for (int i = 0; i < n; ++i) {
     out[i] = d(gen);
@@ -152,7 +145,7 @@ void random_numbers(int n, uint32_t seed, float min_value, float max_value, floa
 template <>
 void random_numbers(int n, uint32_t seed, double min_value, double max_value,
                     double* out) {
-  std::mt19937 gen(seed);
+  std::default_random_engine gen(seed);
   std::uniform_real_distribution<double> d(min_value, max_value);
   for (int i = 0; i < n; ++i) {
     out[i] = d(gen);
@@ -161,7 +154,7 @@ void random_numbers(int n, uint32_t seed, double min_value, double max_value,
 
 void random_Int96_numbers(int n, uint32_t seed, int32_t min_value, int32_t max_value,
                           Int96* out) {
-  std::mt19937 gen(seed);
+  std::default_random_engine gen(seed);
   std::uniform_int_distribution<int32_t> d(min_value, max_value);
   for (int i = 0; i < n; ++i) {
     out[i].value[0] = d(gen);
@@ -171,12 +164,12 @@ void random_Int96_numbers(int n, uint32_t seed, int32_t min_value, int32_t max_v
 }
 
 void random_fixed_byte_array(int n, uint32_t seed, uint8_t* buf, int len, FLBA* out) {
-  std::mt19937 gen(seed);
+  std::default_random_engine gen(seed);
   std::uniform_int_distribution<int> d(0, 255);
   for (int i = 0; i < n; ++i) {
     out[i].ptr = buf;
     for (int j = 0; j < len; ++j) {
-      buf[j] = static_cast<uint8_t>(d(gen) & 0xFF);
+      buf[j] = static_cast<uint8_t>(d(gen));
     }
     buf += len;
   }
@@ -184,7 +177,7 @@ void random_fixed_byte_array(int n, uint32_t seed, uint8_t* buf, int len, FLBA*
 
 void random_byte_array(int n, uint32_t seed, uint8_t* buf, ByteArray* out, int min_size,
                        int max_size) {
-  std::mt19937 gen(seed);
+  std::default_random_engine gen(seed);
   std::uniform_int_distribution<int> d1(min_size, max_size);
   std::uniform_int_distribution<int> d2(0, 255);
   for (int i = 0; i < n; ++i) {
@@ -192,7 +185,7 @@ void random_byte_array(int n, uint32_t seed, uint8_t* buf, ByteArray* out, int m
     out[i].len = len;
     out[i].ptr = buf;
     for (int j = 0; j < len; ++j) {
-      buf[j] = static_cast<uint8_t>(d2(gen) & 0xFF);
+      buf[j] = static_cast<uint8_t>(d2(gen));
     }
     buf += len;
   }
diff --git a/cpp/src/parquet/util/visibility.h b/cpp/src/parquet/util/visibility.h
index 929d3b22c8851..d731bad6ae47f 100644
--- a/cpp/src/parquet/util/visibility.h
+++ b/cpp/src/parquet/util/visibility.h
@@ -19,7 +19,8 @@
 #define PARQUET_UTIL_VISIBILITY_H
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#ifdef _MSC_VER
+
+#if defined(_MSC_VER)
 #pragma warning(push)
 // Disable warning for STL types usage in DLL interface
 // https://web.archive.org/web/20130317015847/http://connect.microsoft.com/VisualStudio/feedback/details/696593/vc-10-vs-2010-basic-string-exports
@@ -30,9 +31,20 @@
 #pragma warning(disable : 4005)
 // Disable extern before exported template warnings
 #pragma warning(disable : 4910)
+#else
+#pragma GCC diagnostic ignored "-Wattributes"
 #endif
+
+#ifdef PARQUET_STATIC
+#define PARQUET_EXPORT
+#elif defined(PARQUET_EXPORTING)
 #define PARQUET_EXPORT __declspec(dllexport)
+#else
+#define PARQUET_EXPORT __declspec(dllimport)
+#endif
+
 #define PARQUET_NO_EXPORT
+
 #else  // Not Windows
 #ifndef PARQUET_EXPORT
 #define PARQUET_EXPORT __attribute__((visibility("default")))
diff --git a/cpp/src/plasma/CMakeLists.txt b/cpp/src/plasma/CMakeLists.txt
index f9ed4e3d4e3f5..53af8c531aad8 100644
--- a/cpp/src/plasma/CMakeLists.txt
+++ b/cpp/src/plasma/CMakeLists.txt
@@ -15,31 +15,30 @@
 # specific language governing permissions and limitations
 # under the License.
 
-cmake_minimum_required(VERSION 3.2)
+add_custom_target(plasma-all)
+add_custom_target(plasma)
+add_custom_target(plasma-benchmarks)
+add_custom_target(plasma-tests)
+add_dependencies(plasma-all plasma plasma-tests plasma-benchmarks)
 
 # For the moment, Plasma is versioned like Arrow
 project(plasma VERSION "${ARROW_BASE_VERSION}")
+set(PLASMA_VERSION "${ARROW_VERSION}")
 
-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/../python/cmake_modules")
-
-find_package(PythonLibsNew REQUIRED)
 find_package(Threads)
 
 # The SO version is also the ABI version
 set(PLASMA_SO_VERSION "${ARROW_SO_VERSION}")
 set(PLASMA_FULL_SO_VERSION "${ARROW_FULL_SO_VERSION}")
 
-include_directories(SYSTEM ${PYTHON_INCLUDE_DIRS})
 include_directories("${FLATBUFFERS_INCLUDE_DIR}" "${CMAKE_CURRENT_LIST_DIR}/" "${CMAKE_CURRENT_LIST_DIR}/thirdparty/" "${CMAKE_CURRENT_LIST_DIR}/../")
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L")
-
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-conversion")
 
 # Compile flatbuffers
 
 set(PLASMA_FBS_SRC "${CMAKE_CURRENT_LIST_DIR}/format/plasma.fbs" "${CMAKE_CURRENT_LIST_DIR}/format/common.fbs")
-set(OUTPUT_DIR ${CMAKE_BINARY_DIR}/src/plasma)
+set(OUTPUT_DIR ${ARROW_BINARY_DIR}/src/plasma)
 
 set(PLASMA_FBS_OUTPUT_FILES
   "${OUTPUT_DIR}/common_generated.h"
@@ -77,16 +76,17 @@ set(PLASMA_SRCS
   io.cc
   malloc.cc
   plasma.cc
+  plasma_allocator.cc
   protocol.cc
   thirdparty/ae/ae.c)
 
 set(PLASMA_LINK_LIBS arrow_shared)
 set(PLASMA_STATIC_LINK_LIBS arrow_static)
 
-if (ARROW_GPU)
-  set(PLASMA_LINK_LIBS ${PLASMA_LINK_LIBS} arrow_gpu_shared)
-  set(PLASMA_STATIC_LINK_LIBS arrow_gpu_static ${PLASMA_STATIC_LINK_LIBS})
-  add_definitions(-DPLASMA_GPU)
+if (ARROW_CUDA)
+  set(PLASMA_LINK_LIBS ${PLASMA_LINK_LIBS} arrow_cuda_shared)
+  set(PLASMA_STATIC_LINK_LIBS arrow_cuda_static ${PLASMA_STATIC_LINK_LIBS})
+  add_definitions(-DPLASMA_CUDA)
 endif()
 
 ADD_ARROW_LIB(plasma
@@ -96,6 +96,8 @@ ADD_ARROW_LIB(plasma
   SHARED_LINK_LIBS ${FLATBUFFERS_STATIC_LIB} ${CMAKE_THREAD_LIBS_INIT} ${PLASMA_LINK_LIBS}
   STATIC_LINK_LIBS ${FLATBUFFERS_STATIC_LIB} ${CMAKE_THREAD_LIBS_INIT} ${PLASMA_STATIC_LINK_LIBS})
 
+add_dependencies(plasma ${PLASMA_LIBRARIES})
+
 foreach(LIB_TARGET ${PLASMA_LIBRARIES})
   target_compile_definitions(${LIB_TARGET}
     PRIVATE ARROW_EXPORTING)
@@ -127,6 +129,7 @@ endif()
 # be copied around and used in different locations.
 add_executable(plasma_store_server store.cc)
 target_link_libraries(plasma_store_server plasma_static ${PLASMA_STATIC_LINK_LIBS})
+add_dependencies(plasma plasma_store_server)
 
 if (ARROW_RPATH_ORIGIN)
   if (APPLE)
@@ -138,7 +141,6 @@ if (ARROW_RPATH_ORIGIN)
       INSTALL_RPATH ${_lib_install_rpath})
 endif()
 
-# Headers: top level
 install(FILES
   common.h
   compat.h
@@ -149,15 +151,12 @@ install(FILES
 
 # Plasma store
 set_target_properties(plasma_store_server PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE)
-install(TARGETS plasma_store_server DESTINATION ${CMAKE_INSTALL_BINDIR})
+install(TARGETS plasma_store_server
+  ${INSTALL_IS_OPTIONAL}
+  DESTINATION ${CMAKE_INSTALL_BINDIR})
 
 # pkg-config support
-configure_file(plasma.pc.in
-  "${CMAKE_CURRENT_BINARY_DIR}/plasma.pc"
-  @ONLY)
-install(
-  FILES "${CMAKE_CURRENT_BINARY_DIR}/plasma.pc"
-  DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/")
+ARROW_ADD_PKG_CONFIG("plasma")
 
 if(ARROW_PLASMA_JAVA_CLIENT)
   # Plasma java client support
@@ -198,8 +197,20 @@ endif()
 # Unit tests
 #######################################
 
-ADD_ARROW_TEST(test/serialization_tests
+# Adding unit tests part of the "arrow" portion of the test suite
+function(ADD_PLASMA_TEST REL_TEST_NAME)
+  set(options)
+  set(one_value_args)
+  set(multi_value_args)
+  cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
+  ADD_TEST_CASE(${REL_TEST_NAME}
+    PREFIX "plasma"
+    LABELS "plasma-tests"
+    ${ARG_UNPARSED_ARGUMENTS})
+endfunction()
+
+ADD_PLASMA_TEST(test/serialization_tests
   EXTRA_LINK_LIBS plasma_shared ${PLASMA_LINK_LIBS})
-ADD_ARROW_TEST(test/client_tests
+ADD_PLASMA_TEST(test/client_tests
   EXTRA_LINK_LIBS plasma_shared ${PLASMA_LINK_LIBS}
   EXTRA_DEPENDENCIES plasma_store_server)
diff --git a/cpp/src/plasma/client.cc b/cpp/src/plasma/client.cc
index 0c96be060e1c1..f08d6efd71ee7 100644
--- a/cpp/src/plasma/client.cc
+++ b/cpp/src/plasma/client.cc
@@ -53,18 +53,18 @@
 #include "plasma/plasma.h"
 #include "plasma/protocol.h"
 
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
 #include "arrow/gpu/cuda_api.h"
 
-using arrow::gpu::CudaBuffer;
-using arrow::gpu::CudaBufferWriter;
-using arrow::gpu::CudaContext;
-using arrow::gpu::CudaDeviceManager;
+using arrow::cuda::CudaBuffer;
+using arrow::cuda::CudaBufferWriter;
+using arrow::cuda::CudaContext;
+using arrow::cuda::CudaDeviceManager;
 #endif
 
 #define XXH_INLINE_ALL 1
 #define XXH_NAMESPACE plasma_client_
-#include "arrow/util/xxhash/xxhash.h"
+#include "arrow/vendored/xxhash/xxhash.h"
 
 #define XXH64_DEFAULT_SEED 0
 
@@ -83,13 +83,10 @@ typedef struct XXH64_state_s XXH64_state_t;
 constexpr int64_t kHashingConcurrency = 8;
 constexpr int64_t kBytesInMB = 1 << 20;
 
-// Use 100MB as an overestimate of the L3 cache size.
-constexpr int64_t kL3CacheSizeBytes = 100000000;
-
 // ----------------------------------------------------------------------
 // GPU support
 
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
 struct GpuProcessHandle {
   /// Pointer to CUDA buffer that is backing this GPU object.
   std::shared_ptr<CudaBuffer> ptr;
@@ -143,22 +140,13 @@ struct ObjectInUseEntry {
   bool is_sealed;
 };
 
-/// Configuration options for the plasma client.
-struct PlasmaClientConfig {
-  /// Number of release calls we wait until the object is actually released.
-  /// This allows us to avoid invalidating the cpu cache on workers if objects
-  /// are reused accross tasks.
-  size_t release_delay;
-};
-
 struct ClientMmapTableEntry {
+  /// The associated file descriptor on the client.
+  int fd;
   /// The result of mmap for this file descriptor.
   uint8_t* pointer;
   /// The length of the memory-mapped file.
   size_t length;
-  /// The number of objects in this memory-mapped file that are currently being
-  /// used by the client. When this count reaches zeros, we unmap the file.
-  int count;
 };
 
 class PlasmaClient::Impl : public std::enable_shared_from_this<PlasmaClient::Impl> {
@@ -169,7 +157,7 @@ class PlasmaClient::Impl : public std::enable_shared_from_this<PlasmaClient::Imp
   // PlasmaClient method implementations
 
   Status Connect(const std::string& store_socket_name,
-                 const std::string& manager_socket_name, int release_delay,
+                 const std::string& manager_socket_name, int release_delay = 0,
                  int num_retries = -1);
 
   Status Create(const ObjectID& object_id, int64_t data_size, const uint8_t* metadata,
@@ -202,34 +190,30 @@ class PlasmaClient::Impl : public std::enable_shared_from_this<PlasmaClient::Imp
 
   Status Subscribe(int* fd);
 
+  Status DecodeNotification(const uint8_t* buffer, ObjectID* object_id,
+                            int64_t* data_size, int64_t* metadata_size);
+
   Status GetNotification(int fd, ObjectID* object_id, int64_t* data_size,
                          int64_t* metadata_size);
 
   Status Disconnect();
 
-  Status Fetch(int num_object_ids, const ObjectID* object_ids);
-
-  Status Wait(int64_t num_object_requests, ObjectRequest* object_requests,
-              int num_ready_objects, int64_t timeout_ms, int* num_objects_ready);
-
-  Status Transfer(const char* addr, int port, const ObjectID& object_id);
-
-  Status Info(const ObjectID& object_id, int* object_status);
-
-  int get_manager_fd() const;
-
-  Status FlushReleaseHistory();
-
   bool IsInUse(const ObjectID& object_id);
 
  private:
-  /// This is a helper method for unmapping objects for which all references have
-  /// gone out of scope, either by calling Release or Abort.
+  /// Check if store_fd has already been received from the store. If yes,
+  /// return it. Otherwise, receive it from the store (see analogous logic
+  /// in store.cc).
   ///
-  /// @param object_id The object ID whose data we should unmap.
-  Status UnmapObject(const ObjectID& object_id);
+  /// \param store_fd File descriptor to fetch from the store.
+  /// \return Client file descriptor corresponding to store_fd.
+  int GetStoreFd(int store_fd);
 
-  Status PerformRelease(const ObjectID& object_id);
+  /// This is a helper method for marking an object as unused by this client.
+  ///
+  /// \param object_id The object ID we mark unused.
+  /// \return The return status.
+  Status MarkObjectUnused(const ObjectID& object_id);
 
   /// Common helper for Get() variants
   Status GetBuffers(const ObjectID* object_ids, int64_t num_objects, int64_t timeout_ms,
@@ -255,8 +239,6 @@ class PlasmaClient::Impl : public std::enable_shared_from_this<PlasmaClient::Imp
 
   /// File descriptor of the Unix domain socket that connects to the store.
   int store_conn_;
-  /// File descriptor of the Unix domain socket that connects to the manager.
-  int manager_conn_;
   /// Table of dlmalloc buffer files that have been memory mapped so far. This
   /// is a hash table mapping a file descriptor to a struct containing the
   /// address of the corresponding memory-mapped file.
@@ -264,18 +246,6 @@ class PlasmaClient::Impl : public std::enable_shared_from_this<PlasmaClient::Imp
   /// A hash table of the object IDs that are currently being used by this
   /// client.
   std::unordered_map<ObjectID, std::unique_ptr<ObjectInUseEntry>> objects_in_use_;
-  /// Object IDs of the last few release calls. This is a deque and
-  /// is used to delay releasing objects to see if they can be reused by
-  /// subsequent tasks so we do not unneccessarily invalidate cpu caches.
-  /// TODO(pcm): replace this with a proper lru cache using the size of the L3
-  /// cache.
-  std::deque<ObjectID> release_history_;
-  /// The number of bytes in the combined objects that are held in the release
-  /// history doubly-linked list. If this is too large then the client starts
-  /// releasing objects.
-  int64_t in_use_object_bytes_;
-  /// Configuration options for the plasma client.
-  PlasmaClientConfig config_;
   /// The amount of memory available to the Plasma store. The client needs this
   /// information to make sure that it does not delay in releasing so much
   /// memory that the store is unable to evict enough objects to free up space.
@@ -283,16 +253,16 @@ class PlasmaClient::Impl : public std::enable_shared_from_this<PlasmaClient::Imp
   /// A hash set to record the ids that users want to delete but still in use.
   std::unordered_set<ObjectID> deletion_cache_;
 
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
   /// Cuda Device Manager.
-  arrow::gpu::CudaDeviceManager* manager_;
+  arrow::cuda::CudaDeviceManager* manager_;
 #endif
 };
 
 PlasmaBuffer::~PlasmaBuffer() { ARROW_UNUSED(client_->Release(object_id_)); }
 
-PlasmaClient::Impl::Impl() {
-#ifdef PLASMA_GPU
+PlasmaClient::Impl::Impl() : store_conn_(0), store_capacity_(0) {
+#ifdef PLASMA_CUDA
   DCHECK_OK(CudaDeviceManager::GetInstance(&manager_));
 #endif
 }
@@ -305,7 +275,6 @@ PlasmaClient::Impl::~Impl() {}
 uint8_t* PlasmaClient::Impl::LookupOrMmap(int fd, int store_fd_val, int64_t map_size) {
   auto entry = mmap_table_.find(store_fd_val);
   if (entry != mmap_table_.end()) {
-    close(fd);
     return entry->second.pointer;
   } else {
     // We subtract kMmapRegionsGap from the length that was added
@@ -319,9 +288,9 @@ uint8_t* PlasmaClient::Impl::LookupOrMmap(int fd, int store_fd_val, int64_t map_
     close(fd);  // Closing this fd has an effect on performance.
 
     ClientMmapTableEntry& entry = mmap_table_[store_fd_val];
+    entry.fd = fd;
     entry.pointer = result;
     entry.length = map_size;
-    entry.count = 0;
     return result;
   }
 }
@@ -339,6 +308,17 @@ bool PlasmaClient::Impl::IsInUse(const ObjectID& object_id) {
   return (elem != objects_in_use_.end());
 }
 
+int PlasmaClient::Impl::GetStoreFd(int store_fd) {
+  auto entry = mmap_table_.find(store_fd);
+  if (entry == mmap_table_.end()) {
+    int fd = recv_fd(store_conn_);
+    ARROW_CHECK(fd >= 0) << "recv not successful";
+    return fd;
+  } else {
+    return entry->second.fd;
+  }
+}
+
 void PlasmaClient::Impl::IncrementObjectCount(const ObjectID& object_id,
                                               PlasmaObject* object, bool is_sealed) {
   // Increment the count of the object to track the fact that it is being used.
@@ -354,18 +334,6 @@ void PlasmaClient::Impl::IncrementObjectCount(const ObjectID& object_id,
     objects_in_use_[object_id]->count = 0;
     objects_in_use_[object_id]->is_sealed = is_sealed;
     object_entry = objects_in_use_[object_id].get();
-    if (object->device_num == 0) {
-      // Increment the count of the number of objects in the memory-mapped file
-      // that are being used. The corresponding decrement should happen in
-      // PlasmaClient::Release.
-      auto entry = mmap_table_.find(object->store_fd);
-      ARROW_CHECK(entry != mmap_table_.end());
-      ARROW_CHECK(entry->second.count >= 0);
-      // Update the in_use_object_bytes_.
-      in_use_object_bytes_ +=
-          (object_entry->object.data_size + object_entry->object.metadata_size);
-      entry->second.count += 1;
-    }
   } else {
     object_entry = elem->second.get();
     ARROW_CHECK(object_entry->count > 0);
@@ -394,8 +362,7 @@ Status PlasmaClient::Impl::Create(const ObjectID& object_id, int64_t data_size,
   // If the CreateReply included an error, then the store will not send a file
   // descriptor.
   if (device_num == 0) {
-    int fd = recv_fd(store_conn_);
-    ARROW_CHECK(fd >= 0) << "recv not successful";
+    int fd = GetStoreFd(store_fd);
     ARROW_CHECK(object.data_size == data_size);
     ARROW_CHECK(object.metadata_size == metadata_size);
     // The metadata should come right after the data.
@@ -410,7 +377,7 @@ Status PlasmaClient::Impl::Create(const ObjectID& object_id, int64_t data_size,
       memcpy((*data)->mutable_data() + object.data_size, metadata, metadata_size);
     }
   } else {
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
     std::lock_guard<std::mutex> lock(gpu_mutex);
     std::shared_ptr<CudaContext> context;
     RETURN_NOT_OK(manager_->GetContext(device_num - 1, &context));
@@ -494,7 +461,7 @@ Status PlasmaClient::Impl::GetBuffers(
         physical_buf = std::make_shared<Buffer>(
             data + object->data_offset, object->data_size + object->metadata_size);
       } else {
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
         physical_buf = gpu_object_map.find(object_ids[i])->second->ptr;
 #else
         ARROW_LOG(FATAL) << "Arrow GPU library is not enabled.";
@@ -532,8 +499,7 @@ Status PlasmaClient::Impl::GetBuffers(
   // in the subsequent loop based on just the store file descriptor and without
   // having to know the relevant file descriptor received from recv_fd.
   for (size_t i = 0; i < store_fds.size(); i++) {
-    int fd = recv_fd(store_conn_);
-    ARROW_CHECK(fd >= 0);
+    int fd = GetStoreFd(store_fds[i]);
     LookupOrMmap(fd, store_fds[i], mmap_sizes[i]);
   }
 
@@ -557,7 +523,7 @@ Status PlasmaClient::Impl::GetBuffers(
         physical_buf = std::make_shared<Buffer>(
             data + object->data_offset, object->data_size + object->metadata_size);
       } else {
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
         std::lock_guard<std::mutex> lock(gpu_mutex);
         auto handle = gpu_object_map.find(object_ids[i]);
         if (handle == gpu_object_map.end()) {
@@ -612,54 +578,21 @@ Status PlasmaClient::Impl::Get(const ObjectID* object_ids, int64_t num_objects,
   return GetBuffers(object_ids, num_objects, timeout_ms, wrap_buffer, out);
 }
 
-Status PlasmaClient::Impl::UnmapObject(const ObjectID& object_id) {
+Status PlasmaClient::Impl::MarkObjectUnused(const ObjectID& object_id) {
   auto object_entry = objects_in_use_.find(object_id);
   ARROW_CHECK(object_entry != objects_in_use_.end());
   ARROW_CHECK(object_entry->second->count == 0);
 
-  // Decrement the count of the number of objects in this memory-mapped file
-  // that the client is using. The corresponding increment should have
-  // happened in plasma_get.
-  int fd = object_entry->second->object.store_fd;
-  auto entry = mmap_table_.find(fd);
-  ARROW_CHECK(entry != mmap_table_.end());
-  ARROW_CHECK(entry->second.count >= 1);
-  if (entry->second.count == 1) {
-    // If no other objects are being used, then unmap the file.
-    // We subtract kMmapRegionsGap from the length that was added
-    // in fake_mmap in malloc.h, to make the size page-aligned again.
-    int err = munmap(entry->second.pointer, entry->second.length - kMmapRegionsGap);
-    if (err == -1) {
-      return Status::IOError("Error during munmap");
-    }
-    // Remove the corresponding entry from the hash table.
-    mmap_table_.erase(fd);
-  } else {
-    // If there are other objects being used, decrement the reference count.
-    entry->second.count -= 1;
-  }
-  // Update the in_use_object_bytes_.
-  in_use_object_bytes_ -= (object_entry->second->object.data_size +
-                           object_entry->second->object.metadata_size);
-  DCHECK_GE(in_use_object_bytes_, 0);
   // Remove the entry from the hash table of objects currently in use.
   objects_in_use_.erase(object_id);
   return Status::OK();
 }
 
-/// This is a helper method for implementing plasma_release. We maintain a
-/// buffer
-/// of release calls and only perform them once the buffer becomes full (as
-/// judged by the aggregate sizes of the objects). There may be multiple release
-/// calls for the same object ID in the buffer. In this case, the first release
-/// calls will not do anything. The client will only send a message to the store
-/// releasing the object when the client is truly done with the object.
-///
-/// @param object_id The object ID to attempt to release.
-Status PlasmaClient::Impl::PerformRelease(const ObjectID& object_id) {
-  // Decrement the count of the number of instances of this object that are
-  // being used by this client. The corresponding increment should have happened
-  // in PlasmaClient::Get.
+Status PlasmaClient::Impl::Release(const ObjectID& object_id) {
+  // If the client is already disconnected, ignore release requests.
+  if (store_conn_ < 0) {
+    return Status::OK();
+  }
   auto object_entry = objects_in_use_.find(object_id);
   ARROW_CHECK(object_entry != objects_in_use_.end());
   object_entry->second->count -= 1;
@@ -667,7 +600,7 @@ Status PlasmaClient::Impl::PerformRelease(const ObjectID& object_id) {
   // Check if the client is no longer using this object.
   if (object_entry->second->count == 0) {
     // Tell the store that the client no longer needs the object.
-    RETURN_NOT_OK(UnmapObject(object_id));
+    RETURN_NOT_OK(MarkObjectUnused(object_id));
     RETURN_NOT_OK(SendReleaseRequest(store_conn_, object_id));
     auto iter = deletion_cache_.find(object_id);
     if (iter != deletion_cache_.end()) {
@@ -678,50 +611,6 @@ Status PlasmaClient::Impl::PerformRelease(const ObjectID& object_id) {
   return Status::OK();
 }
 
-Status PlasmaClient::Impl::Release(const ObjectID& object_id) {
-  // If the client is already disconnected, ignore release requests.
-  if (store_conn_ < 0) {
-    return Status::OK();
-  }
-  // If an object is in the deletion cache, handle it directly without waiting.
-  auto iter = deletion_cache_.find(object_id);
-  if (iter != deletion_cache_.end()) {
-    RETURN_NOT_OK(PerformRelease(object_id));
-    return Status::OK();
-  }
-  // Add the new object to the release history.
-  release_history_.push_front(object_id);
-  // If there are too many bytes in use by the client or if there are too many
-  // pending release calls, and there are at least some pending release calls in
-  // the release_history list, then release some objects.
-
-  // TODO(wap): Eviction policy only works on host memory, and thus objects on
-  // the GPU cannot be released currently.
-  while ((in_use_object_bytes_ > std::min(kL3CacheSizeBytes, store_capacity_ / 100) ||
-          release_history_.size() > config_.release_delay) &&
-         release_history_.size() > 0) {
-    // Perform a release for the object ID for the first pending release.
-    RETURN_NOT_OK(PerformRelease(release_history_.back()));
-    // Remove the last entry from the release history.
-    release_history_.pop_back();
-  }
-  return Status::OK();
-}
-
-Status PlasmaClient::Impl::FlushReleaseHistory() {
-  // If the client is already disconnected, ignore the flush.
-  if (store_conn_ < 0) {
-    return Status::OK();
-  }
-  while (release_history_.size() > 0) {
-    // Perform a release for the object ID for the first pending release.
-    RETURN_NOT_OK(PerformRelease(release_history_.back()));
-    // Remove the last entry from the release history.
-    release_history_.pop_back();
-  }
-  return Status::OK();
-}
-
 // This method is used to query whether the plasma store contains an object.
 Status PlasmaClient::Impl::Contains(const ObjectID& object_id, bool* has_object) {
   // Check if we already have a reference to the object.
@@ -852,8 +741,6 @@ Status PlasmaClient::Impl::Abort(const ObjectID& object_id) {
   ARROW_CHECK(!object_entry->second->is_sealed)
       << "Plasma client called abort on a sealed object";
 
-  // Flush the release history.
-  RETURN_NOT_OK(FlushReleaseHistory());
   // Make sure that the Plasma client only has one reference to the object. If
   // it has more, then the client needs to release the buffer before calling
   // abort.
@@ -865,7 +752,7 @@ Status PlasmaClient::Impl::Abort(const ObjectID& object_id) {
   RETURN_NOT_OK(SendAbortRequest(store_conn_, object_id));
   // Decrease the reference count to zero, then remove the object.
   object_entry->second->count--;
-  RETURN_NOT_OK(UnmapObject(object_id));
+  RETURN_NOT_OK(MarkObjectUnused(object_id));
 
   std::vector<uint8_t> buffer;
   ObjectID id;
@@ -875,7 +762,6 @@ Status PlasmaClient::Impl::Abort(const ObjectID& object_id) {
 }
 
 Status PlasmaClient::Impl::Delete(const std::vector<ObjectID>& object_ids) {
-  RETURN_NOT_OK(FlushReleaseHistory());
   std::vector<ObjectID> not_in_use_ids;
   for (auto& object_id : object_ids) {
     // If the object is in used, skip it.
@@ -943,13 +829,10 @@ Status PlasmaClient::Impl::Subscribe(int* fd) {
   return Status::OK();
 }
 
-Status PlasmaClient::Impl::GetNotification(int fd, ObjectID* object_id,
-                                           int64_t* data_size, int64_t* metadata_size) {
-  auto notification = ReadMessageAsync(fd);
-  if (notification == NULL) {
-    return Status::IOError("Failed to read object notification from Plasma socket");
-  }
-  auto object_info = flatbuffers::GetRoot<fb::ObjectInfo>(notification.get());
+Status PlasmaClient::Impl::DecodeNotification(const uint8_t* buffer, ObjectID* object_id,
+                                              int64_t* data_size,
+                                              int64_t* metadata_size) {
+  auto object_info = flatbuffers::GetRoot<fb::ObjectInfo>(buffer);
   ARROW_CHECK(object_info->object_id()->size() == sizeof(ObjectID));
   memcpy(object_id, object_info->object_id()->data(), sizeof(ObjectID));
   if (object_info->is_deletion()) {
@@ -962,18 +845,26 @@ Status PlasmaClient::Impl::GetNotification(int fd, ObjectID* object_id,
   return Status::OK();
 }
 
+Status PlasmaClient::Impl::GetNotification(int fd, ObjectID* object_id,
+                                           int64_t* data_size, int64_t* metadata_size) {
+  auto notification = ReadMessageAsync(fd);
+  if (notification == NULL) {
+    return Status::IOError("Failed to read object notification from Plasma socket");
+  }
+  return DecodeNotification(notification.get(), object_id, data_size, metadata_size);
+}
+
 Status PlasmaClient::Impl::Connect(const std::string& store_socket_name,
                                    const std::string& manager_socket_name,
                                    int release_delay, int num_retries) {
   RETURN_NOT_OK(ConnectIpcSocketRetry(store_socket_name, num_retries, -1, &store_conn_));
   if (manager_socket_name != "") {
-    RETURN_NOT_OK(
-        ConnectIpcSocketRetry(manager_socket_name, num_retries, -1, &manager_conn_));
-  } else {
-    manager_conn_ = -1;
+    return Status::NotImplemented("plasma manager is no longer supported");
+  }
+  if (release_delay != 0) {
+    ARROW_LOG(WARNING) << "The release_delay parameter in PlasmaClient::Connect "
+                       << "is deprecated";
   }
-  config_.release_delay = release_delay;
-  in_use_object_bytes_ = 0;
   // Send a ConnectRequest to the store to get its memory capacity.
   RETURN_NOT_OK(SendConnectRequest(store_conn_));
   std::vector<uint8_t> buffer;
@@ -991,78 +882,6 @@ Status PlasmaClient::Impl::Disconnect() {
   // that were in use by us when handling the SIGPIPE.
   close(store_conn_);
   store_conn_ = -1;
-  if (manager_conn_ >= 0) {
-    close(manager_conn_);
-    manager_conn_ = -1;
-  }
-  return Status::OK();
-}
-
-Status PlasmaClient::Impl::Transfer(const char* address, int port,
-                                    const ObjectID& object_id) {
-  return SendDataRequest(manager_conn_, object_id, address, port);
-}
-
-Status PlasmaClient::Impl::Fetch(int num_object_ids, const ObjectID* object_ids) {
-  ARROW_CHECK(manager_conn_ >= 0);
-  return SendFetchRequest(manager_conn_, object_ids, num_object_ids);
-}
-
-int PlasmaClient::Impl::get_manager_fd() const { return manager_conn_; }
-
-Status PlasmaClient::Impl::Info(const ObjectID& object_id, int* object_status) {
-  ARROW_CHECK(manager_conn_ >= 0);
-
-  RETURN_NOT_OK(SendStatusRequest(manager_conn_, &object_id, 1));
-  std::vector<uint8_t> buffer;
-  RETURN_NOT_OK(PlasmaReceive(manager_conn_, MessageType::PlasmaStatusReply, &buffer));
-  ObjectID id;
-  RETURN_NOT_OK(ReadStatusReply(buffer.data(), buffer.size(), &id, object_status, 1));
-  ARROW_CHECK(object_id == id);
-  return Status::OK();
-}
-
-Status PlasmaClient::Impl::Wait(int64_t num_object_requests,
-                                ObjectRequest* object_requests, int num_ready_objects,
-                                int64_t timeout_ms, int* num_objects_ready) {
-  ARROW_CHECK(manager_conn_ >= 0);
-  ARROW_CHECK(num_object_requests > 0);
-  ARROW_CHECK(num_ready_objects > 0);
-  ARROW_CHECK(num_ready_objects <= num_object_requests);
-
-  for (int i = 0; i < num_object_requests; ++i) {
-    ARROW_CHECK(object_requests[i].type == ObjectRequestType::PLASMA_QUERY_LOCAL ||
-                object_requests[i].type == ObjectRequestType::PLASMA_QUERY_ANYWHERE);
-  }
-
-  RETURN_NOT_OK(SendWaitRequest(manager_conn_, object_requests, num_object_requests,
-                                num_ready_objects, timeout_ms));
-  std::vector<uint8_t> buffer;
-  RETURN_NOT_OK(PlasmaReceive(manager_conn_, MessageType::PlasmaWaitReply, &buffer));
-  RETURN_NOT_OK(
-      ReadWaitReply(buffer.data(), buffer.size(), object_requests, &num_ready_objects));
-
-  *num_objects_ready = 0;
-  for (int i = 0; i < num_object_requests; ++i) {
-    ObjectRequestType type = object_requests[i].type;
-    auto status = static_cast<fb::ObjectStatus>(object_requests[i].location);
-    switch (type) {
-      case ObjectRequestType::PLASMA_QUERY_LOCAL:
-        if (status == fb::ObjectStatus::Local) {
-          *num_objects_ready += 1;
-        }
-        break;
-      case ObjectRequestType::PLASMA_QUERY_ANYWHERE:
-        if (status == fb::ObjectStatus::Local || status == fb::ObjectStatus::Remote) {
-          *num_objects_ready += 1;
-        } else {
-          ARROW_CHECK(status == fb::ObjectStatus::Nonexistent);
-        }
-        break;
-      default:
-        ARROW_LOG(FATAL) << "This code should be unreachable.";
-    }
-  }
   return Status::OK();
 }
 
@@ -1138,30 +957,12 @@ Status PlasmaClient::GetNotification(int fd, ObjectID* object_id, int64_t* data_
   return impl_->GetNotification(fd, object_id, data_size, metadata_size);
 }
 
-Status PlasmaClient::Disconnect() { return impl_->Disconnect(); }
-
-Status PlasmaClient::Fetch(int num_object_ids, const ObjectID* object_ids) {
-  return impl_->Fetch(num_object_ids, object_ids);
-}
-
-Status PlasmaClient::Wait(int64_t num_object_requests, ObjectRequest* object_requests,
-                          int num_ready_objects, int64_t timeout_ms,
-                          int* num_objects_ready) {
-  return impl_->Wait(num_object_requests, object_requests, num_ready_objects, timeout_ms,
-                     num_objects_ready);
+Status PlasmaClient::DecodeNotification(const uint8_t* buffer, ObjectID* object_id,
+                                        int64_t* data_size, int64_t* metadata_size) {
+  return impl_->DecodeNotification(buffer, object_id, data_size, metadata_size);
 }
 
-Status PlasmaClient::Transfer(const char* addr, int port, const ObjectID& object_id) {
-  return impl_->Transfer(addr, port, object_id);
-}
-
-Status PlasmaClient::Info(const ObjectID& object_id, int* object_status) {
-  return impl_->Info(object_id, object_status);
-}
-
-int PlasmaClient::get_manager_fd() const { return impl_->get_manager_fd(); }
-
-Status PlasmaClient::FlushReleaseHistory() { return impl_->FlushReleaseHistory(); }
+Status PlasmaClient::Disconnect() { return impl_->Disconnect(); }
 
 bool PlasmaClient::IsInUse(const ObjectID& object_id) {
   return impl_->IsInUse(object_id);
diff --git a/cpp/src/plasma/client.h b/cpp/src/plasma/client.h
index 1ad09f5c06738..ac9e8eb0fe9c9 100644
--- a/cpp/src/plasma/client.h
+++ b/cpp/src/plasma/client.h
@@ -34,11 +34,6 @@ using arrow::Status;
 
 namespace plasma {
 
-/// We keep a queue of unreleased objects cached in the client until we start
-/// sending release requests to the store. This is to avoid frequently mapping
-/// and unmapping objects and evicting data from processor caches.
-constexpr int64_t kPlasmaDefaultReleaseDelay = 64;
-
 /// Object buffer data structure.
 struct ObjectBuffer {
   /// The data buffer.
@@ -54,21 +49,21 @@ class ARROW_EXPORT PlasmaClient {
   PlasmaClient();
   ~PlasmaClient();
 
-  /// Connect to the local plasma store and plasma manager. Return
-  /// the resulting connection.
+  /// Connect to the local plasma store. Return the resulting connection.
   ///
   /// \param store_socket_name The name of the UNIX domain socket to use to
   ///        connect to the Plasma store.
   /// \param manager_socket_name The name of the UNIX domain socket to use to
   ///        connect to the local Plasma manager. If this is "", then this
   ///        function will not connect to a manager.
-  /// \param release_delay Number of released objects that are kept around
-  ///        and not evicted to avoid too many munmaps.
+  ///        Note that plasma manager is no longer supported, this function
+  ///        will return failure if this is not "".
+  /// \param release_delay Deprecated (not used).
   /// \param num_retries number of attempts to connect to IPC socket, default 50
   /// \return The return status.
   Status Connect(const std::string& store_socket_name,
-                 const std::string& manager_socket_name,
-                 int release_delay = kPlasmaDefaultReleaseDelay, int num_retries = -1);
+                 const std::string& manager_socket_name = "", int release_delay = 0,
+                 int num_retries = -1);
 
   /// Create an object in the Plasma Store. Any metadata for this object must be
   /// be passed in when the object is created.
@@ -246,115 +241,21 @@ class ARROW_EXPORT PlasmaClient {
   Status GetNotification(int fd, ObjectID* object_id, int64_t* data_size,
                          int64_t* metadata_size);
 
+  Status DecodeNotification(const uint8_t* buffer, ObjectID* object_id,
+                            int64_t* data_size, int64_t* metadata_size);
+
   /// Disconnect from the local plasma instance, including the local store and
   /// manager.
   ///
   /// \return The return status.
   Status Disconnect();
 
-  /// Attempt to initiate the transfer of some objects from remote Plasma
-  /// Stores.
-  /// This method does not guarantee that the fetched objects will arrive
-  /// locally.
-  ///
-  /// For an object that is available in the local Plasma Store, this method
-  /// will
-  /// not do anything. For an object that is not available locally, it will
-  /// check
-  /// if the object are already being fetched. If so, it will not do anything.
-  /// If
-  /// not, it will query the object table for a list of Plasma Managers that
-  /// have
-  /// the object. The object table will return a non-empty list, and this Plasma
-  /// Manager will attempt to initiate transfers from one of those Plasma
-  /// Managers.
-  ///
-  /// This function is non-blocking.
-  ///
-  /// This method is idempotent in the sense that it is ok to call it multiple
-  /// times.
-  ///
-  /// \param num_object_ids The number of object IDs fetch is being called on.
-  /// \param object_ids The IDs of the objects that fetch is being called on.
-  /// \return The return status.
-  Status Fetch(int num_object_ids, const ObjectID* object_ids);
-
-  /// Wait for (1) a specified number of objects to be available (sealed) in the
-  /// local Plasma Store or in a remote Plasma Store, or (2) for a timeout to
-  /// expire. This is a blocking call.
-  ///
-  /// \param num_object_requests Size of the object_requests array.
-  /// \param object_requests Object event array. Each element contains a request
-  ///        for a particular object_id. The type of request is specified in the
-  ///        "type" field.
-  ///        - A PLASMA_QUERY_LOCAL request is satisfied when object_id becomes
-  ///          available in the local Plasma Store. In this case, this function
-  ///          sets the "status" field to ObjectStatus::Local. Note, if the
-  ///          status
-  ///          is not ObjectStatus::Local, it will be ObjectStatus::Nonexistent,
-  ///          but it may exist elsewhere in the system.
-  ///        - A PLASMA_QUERY_ANYWHERE request is satisfied when object_id
-  ///        becomes
-  ///          available either at the local Plasma Store or on a remote Plasma
-  ///          Store. In this case, the functions sets the "status" field to
-  ///          ObjectStatus::Local or ObjectStatus::Remote.
-  /// \param num_ready_objects The number of requests in object_requests array
-  /// that
-  ///        must be satisfied before the function returns, unless it timeouts.
-  ///        The num_ready_objects should be no larger than num_object_requests.
-  /// \param timeout_ms Timeout value in milliseconds. If this timeout expires
-  ///        before min_num_ready_objects of requests are satisfied, the
-  ///        function
-  ///        returns.
-  /// \param num_objects_ready Out parameter for number of satisfied requests in
-  ///        the object_requests list. If the returned number is less than
-  ///        min_num_ready_objects this means that timeout expired.
-  /// \return The return status.
-  Status Wait(int64_t num_object_requests, ObjectRequest* object_requests,
-              int num_ready_objects, int64_t timeout_ms, int* num_objects_ready);
-
-  /// Transfer local object to a different plasma manager.
-  ///
-  /// \param addr IP address of the plasma manager we are transfering to.
-  /// \param port Port of the plasma manager we are transfering to.
-  /// \param object_id ObjectID of the object we are transfering.
-  /// \return The return status.
-  Status Transfer(const char* addr, int port, const ObjectID& object_id);
-
-  /// Return the status of a given object. This method may query the object
-  /// table.
-  ///
-  /// \param object_id The ID of the object whose status we query.
-  /// \param object_status Out parameter for object status. Can take the
-  ///         following values.
-  ///         - PLASMA_CLIENT_LOCAL, if object is stored in the local Plasma
-  ///         Store.
-  ///           has been already scheduled by the Plasma Manager.
-  ///         - PLASMA_CLIENT_TRANSFER, if the object is either currently being
-  ///           transferred or just scheduled.
-  ///         - PLASMA_CLIENT_REMOTE, if the object is stored at a remote
-  ///           Plasma Store.
-  ///         - PLASMA_CLIENT_DOES_NOT_EXIST, if the object doesn’t exist in the
-  ///           system.
-  /// \return The return status.
-  Status Info(const ObjectID& object_id, int* object_status);
-
-  /// Get the file descriptor for the socket connection to the plasma manager.
-  ///
-  /// \return The file descriptor for the manager connection. If there is no
-  ///         connection to the manager, this is -1.
-  int get_manager_fd() const;
-
  private:
   friend class PlasmaBuffer;
   FRIEND_TEST(TestPlasmaStore, GetTest);
   FRIEND_TEST(TestPlasmaStore, LegacyGetTest);
   FRIEND_TEST(TestPlasmaStore, AbortTest);
 
-  /// This is a helper method that flushes all pending release calls to the
-  /// store.
-  Status FlushReleaseHistory();
-
   bool IsInUse(const ObjectID& object_id);
 
   class ARROW_NO_EXPORT Impl;
diff --git a/cpp/src/plasma/common.cc b/cpp/src/plasma/common.cc
index 0ca17cf814f8a..1b86fd80b4920 100644
--- a/cpp/src/plasma/common.cc
+++ b/cpp/src/plasma/common.cc
@@ -107,9 +107,6 @@ bool UniqueID::operator==(const UniqueID& rhs) const {
   return std::memcmp(data(), rhs.data(), kUniqueIDSize) == 0;
 }
 
-ARROW_EXPORT fb::ObjectStatus ObjectStatusLocal = fb::ObjectStatus::Local;
-ARROW_EXPORT fb::ObjectStatus ObjectStatusRemote = fb::ObjectStatus::Remote;
-
 const PlasmaStoreInfo* plasma_config;
 
 }  // namespace plasma
diff --git a/cpp/src/plasma/common.h b/cpp/src/plasma/common.h
index f7cdaf5ff51df..dfbd90c3aa553 100644
--- a/cpp/src/plasma/common.h
+++ b/cpp/src/plasma/common.h
@@ -33,8 +33,7 @@
 #include "plasma/compat.h"
 
 #include "arrow/status.h"
-#include "arrow/util/logging.h"
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
 #include "arrow/gpu/cuda_api.h"
 #endif
 
@@ -66,30 +65,6 @@ typedef UniqueID ObjectID;
 /// Size of object hash digests.
 constexpr int64_t kDigestSize = sizeof(uint64_t);
 
-enum class ObjectRequestType : int {
-  /// Query for object in the local plasma store.
-  PLASMA_QUERY_LOCAL = 1,
-  /// Query for object in the local plasma store or in a remote plasma store.
-  PLASMA_QUERY_ANYWHERE
-};
-
-/// Object request data structure. Used for Wait.
-struct ObjectRequest {
-  /// The ID of the requested object. If ID_NIL request any object.
-  ObjectID object_id;
-  /// Request associated to the object. It can take one of the following values:
-  ///  - PLASMA_QUERY_LOCAL: return if or when the object is available in the
-  ///    local Plasma Store.
-  ///  - PLASMA_QUERY_ANYWHERE: return if or when the object is available in
-  ///    the system (i.e., either in the local or a remote Plasma Store).
-  ObjectRequestType type;
-  /// Object location. This can be
-  ///  - ObjectLocation::Local: object is ready at the local Plasma Store.
-  ///  - ObjectLocation::Remote: object is ready at a remote Plasma Store.
-  ///  - ObjectLocation::Nonexistent: object does not exist in the system.
-  ObjectLocation location;
-};
-
 enum class ObjectState : int {
   /// Object was created but not sealed in the local Plasma Store.
   PLASMA_CREATED = 1,
@@ -97,6 +72,12 @@ enum class ObjectState : int {
   PLASMA_SEALED
 };
 
+namespace internal {
+
+struct CudaIpcPlaceholder {};
+
+}  //  namespace internal
+
 /// This type is used by the Plasma store. It is here because it is exposed to
 /// the eviction policy.
 struct ObjectTableEntry {
@@ -118,10 +99,6 @@ struct ObjectTableEntry {
   int64_t data_size;
   /// Size of the object metadata in bytes.
   int64_t metadata_size;
-#ifdef PLASMA_GPU
-  /// IPC GPU handle to share with clients.
-  std::shared_ptr<::arrow::gpu::CudaIpcMemHandle> ipc_handle;
-#endif
   /// Number of clients currently using this object.
   int ref_count;
   /// Unix epoch of when this object was created.
@@ -133,6 +110,13 @@ struct ObjectTableEntry {
   ObjectState state;
   /// The digest of the object. Used to see if two objects are the same.
   unsigned char digest[kDigestSize];
+
+#ifdef PLASMA_CUDA
+  /// IPC GPU handle to share with clients.
+  std::shared_ptr<::arrow::cuda::CudaIpcMemHandle> ipc_handle;
+#else
+  std::shared_ptr<internal::CudaIpcPlaceholder> ipc_handle;
+#endif
 };
 
 /// Mapping from ObjectIDs to information about the object.
diff --git a/cpp/src/plasma/eviction_policy.cc b/cpp/src/plasma/eviction_policy.cc
index e5beb5a579e28..da5df5a36ddd4 100644
--- a/cpp/src/plasma/eviction_policy.cc
+++ b/cpp/src/plasma/eviction_policy.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include "plasma/eviction_policy.h"
+#include "plasma/plasma_allocator.h"
 
 #include <algorithm>
 
@@ -48,8 +49,7 @@ int64_t LRUCache::ChooseObjectsToEvict(int64_t num_bytes_required,
   return bytes_evicted;
 }
 
-EvictionPolicy::EvictionPolicy(PlasmaStoreInfo* store_info)
-    : memory_used_(0), store_info_(store_info) {}
+EvictionPolicy::EvictionPolicy(PlasmaStoreInfo* store_info) : store_info_(store_info) {}
 
 int64_t EvictionPolicy::ChooseObjectsToEvict(int64_t num_bytes_required,
                                              std::vector<ObjectID>* objects_to_evict) {
@@ -59,33 +59,29 @@ int64_t EvictionPolicy::ChooseObjectsToEvict(int64_t num_bytes_required,
   for (auto& object_id : *objects_to_evict) {
     cache_.Remove(object_id);
   }
-  // Update the number of bytes used.
-  memory_used_ -= bytes_evicted;
-  ARROW_CHECK(memory_used_ >= 0);
   return bytes_evicted;
 }
 
 void EvictionPolicy::ObjectCreated(const ObjectID& object_id) {
   auto entry = store_info_->objects[object_id].get();
   cache_.Add(object_id, entry->data_size + entry->metadata_size);
-  int64_t size = entry->data_size + entry->metadata_size;
-  memory_used_ += size;
-  ARROW_CHECK(memory_used_ <= store_info_->memory_capacity);
 }
 
 bool EvictionPolicy::RequireSpace(int64_t size, std::vector<ObjectID>* objects_to_evict) {
   // Check if there is enough space to create the object.
-  int64_t required_space = memory_used_ + size - store_info_->memory_capacity;
+  int64_t required_space =
+      PlasmaAllocator::Allocated() + size - PlasmaAllocator::GetFootprintLimit();
   // Try to free up at least as much space as we need right now but ideally
   // up to 20% of the total capacity.
-  int64_t space_to_free = std::max(required_space, store_info_->memory_capacity / 5);
+  int64_t space_to_free =
+      std::max(required_space, PlasmaAllocator::GetFootprintLimit() / 5);
   ARROW_LOG(DEBUG) << "not enough space to create this object, so evicting objects";
   // Choose some objects to evict, and update the return pointers.
   int64_t num_bytes_evicted = ChooseObjectsToEvict(space_to_free, objects_to_evict);
   ARROW_LOG(INFO) << "There is not enough space to create this object, so evicting "
                   << objects_to_evict->size() << " objects to free up "
                   << num_bytes_evicted << " bytes. The number of bytes in use (before "
-                  << "this eviction) is " << memory_used_ << ".";
+                  << "this eviction) is " << PlasmaAllocator::Allocated() << ".";
   return num_bytes_evicted >= required_space && num_bytes_evicted > 0;
 }
 
@@ -105,11 +101,6 @@ void EvictionPolicy::EndObjectAccess(const ObjectID& object_id,
 void EvictionPolicy::RemoveObject(const ObjectID& object_id) {
   // If the object is in the LRU cache, remove it.
   cache_.Remove(object_id);
-
-  auto entry = store_info_->objects[object_id].get();
-  int64_t size = entry->data_size + entry->metadata_size;
-  ARROW_CHECK(memory_used_ >= size);
-  memory_used_ -= size;
 }
 
 }  // namespace plasma
diff --git a/cpp/src/plasma/eviction_policy.h b/cpp/src/plasma/eviction_policy.h
index bbd3fc4320356..68342ae102f3e 100644
--- a/cpp/src/plasma/eviction_policy.h
+++ b/cpp/src/plasma/eviction_policy.h
@@ -126,8 +126,6 @@ class EvictionPolicy {
   void RemoveObject(const ObjectID& object_id);
 
  private:
-  /// The amount of memory (in bytes) currently being used.
-  int64_t memory_used_;
   /// Pointer to the plasma store info.
   PlasmaStoreInfo* store_info_;
   /// Datastructure for the LRU cache.
diff --git a/cpp/src/plasma/fling.cc b/cpp/src/plasma/fling.cc
index 26afd87066c2b..f0960aab6bf23 100644
--- a/cpp/src/plasma/fling.cc
+++ b/cpp/src/plasma/fling.cc
@@ -16,6 +16,8 @@
 
 #include <string.h>
 
+#include "arrow/util/logging.h"
+
 void init_msg(struct msghdr* msg, struct iovec* iov, char* buf, size_t buf_len) {
   iov->iov_base = buf;
   iov->iov_len = 1;
@@ -46,11 +48,32 @@ int send_fd(int conn, int fd) {
   memcpy(CMSG_DATA(header), reinterpret_cast<void*>(&fd), sizeof(int));
 
   // Send file descriptor.
-  ssize_t r = sendmsg(conn, &msg, 0);
-  if (r >= 0) {
-    return 0;
-  } else {
-    return static_cast<int>(r);
+  while (true) {
+    ssize_t r = sendmsg(conn, &msg, 0);
+    if (r < 0) {
+      if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
+        continue;
+      } else if (errno == EMSGSIZE) {
+        ARROW_LOG(WARNING) << "Failed to send file descriptor"
+                           << " (errno = EMSGSIZE), retrying.";
+        // If we failed to send the file descriptor, loop until we have sent it
+        // successfully. TODO(rkn): This is problematic for two reasons. First
+        // of all, sending the file descriptor should just succeed without any
+        // errors, but sometimes I see a "Message too long" error number.
+        // Second, looping like this allows a client to potentially block the
+        // plasma store event loop which should never happen.
+        continue;
+      } else {
+        ARROW_LOG(INFO) << "Error in send_fd (errno = " << errno << ")";
+        return static_cast<int>(r);
+      }
+    } else if (r == 0) {
+      ARROW_LOG(INFO) << "Encountered unexpected EOF";
+      return 0;
+    } else {
+      ARROW_CHECK(r > 0);
+      return static_cast<int>(r);
+    }
   }
 }
 
@@ -60,7 +83,19 @@ int recv_fd(int conn) {
   char buf[CMSG_SPACE(sizeof(int))];
   init_msg(&msg, &iov, buf, sizeof(buf));
 
-  if (recvmsg(conn, &msg, 0) == -1) return -1;
+  while (true) {
+    ssize_t r = recvmsg(conn, &msg, 0);
+    if (r == -1) {
+      if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
+        continue;
+      } else {
+        ARROW_LOG(INFO) << "Error in recv_fd (errno = " << errno << ")";
+        return -1;
+      }
+    } else {
+      break;
+    }
+  }
 
   int found_fd = -1;
   int oh_noes = 0;
diff --git a/cpp/src/plasma/format/plasma.fbs b/cpp/src/plasma/format/plasma.fbs
index ef934fbd81ed2..b3c890391887e 100644
--- a/cpp/src/plasma/format/plasma.fbs
+++ b/cpp/src/plasma/format/plasma.fbs
@@ -42,9 +42,6 @@ enum MessageType:long {
   // Delete an object.
   PlasmaDeleteRequest,
   PlasmaDeleteReply,
-  // Get status of an object.
-  PlasmaStatusRequest,
-  PlasmaStatusReply,
   // See if the store contains an object (will be deprecated).
   PlasmaContainsRequest,
   PlasmaContainsReply,
@@ -57,11 +54,6 @@ enum MessageType:long {
   // Make room for new objects in the plasma store.
   PlasmaEvictRequest,
   PlasmaEvictReply,
-  // Fetch objects from remote Plasma stores.
-  PlasmaFetchRequest,
-  // Wait for objects to be ready either from local or remote Plasma stores.
-  PlasmaWaitRequest,
-  PlasmaWaitReply,
   // Subscribe to a list of objects or to all objects.
   PlasmaSubscribeRequest,
   // Unsubscribe.
@@ -239,35 +231,6 @@ table PlasmaDeleteReply {
   errors: [PlasmaError];
 }
 
-table PlasmaStatusRequest {
-  // IDs of the objects stored at local Plasma store we request the status of.
-  object_ids: [string];
-}
-
-enum ObjectStatus:int {
-  // Object is stored in the local Plasma Store.
-  Local,
-  // Object is stored on a remote Plasma store, and it is not stored on the
-  // local Plasma Store.
-  Remote,
-  // Object is not stored in the system.
-  Nonexistent,
-  // Object is currently transferred from a remote Plasma store the local
-  // Plasma Store.
-  Transfer
-}
-
-table PlasmaStatusReply {
-  // IDs of the objects being returned.
-  object_ids: [string];
-  // Status of the object.
-  status: [ObjectStatus];
-}
-
-// PlasmaContains is a subset of PlasmaStatus which does not
-// involve the plasma manager, only the store. We should consider
-// unifying them in the future and deprecating PlasmaContains.
-
 table PlasmaContainsRequest {
   // ID of the object we are querying.
   object_id: string;
@@ -309,43 +272,6 @@ table PlasmaEvictReply {
   num_bytes: ulong;
 }
 
-table PlasmaFetchRequest {
-  // IDs of objects to be gotten.
-  object_ids: [string];
-}
-
-table ObjectRequestSpec {
-  // ID of the object.
-  object_id: string;
-  // The type of the object. This specifies whether we
-  // will be waiting for an object store in the local or
-  // global Plasma store.
-  type: int;
-}
-
-table PlasmaWaitRequest {
-  // Array of object requests whose status we are asking for.
-  object_requests: [ObjectRequestSpec];
-  // Number of objects expected to be returned, if available.
-  num_ready_objects: int;
-  // timeout
-  timeout: long;
-}
-
-table ObjectReply {
-  // ID of the object.
-  object_id: string;
-  // The object status. This specifies where the object is stored.
-  status: ObjectStatus;
-}
-
-table PlasmaWaitReply {
-  // Array of object requests being returned.
-  object_requests: [ObjectReply];
-  // Number of objects expected to be returned, if available.
-  num_ready_objects: int;
-}
-
 table PlasmaSubscribeRequest {
 }
 
diff --git a/cpp/src/plasma/io.cc b/cpp/src/plasma/io.cc
index d63ceb6da24da..cc425428ecee5 100644
--- a/cpp/src/plasma/io.cc
+++ b/cpp/src/plasma/io.cc
@@ -22,6 +22,7 @@
 #include <sstream>
 
 #include "arrow/status.h"
+#include "arrow/util/logging.h"
 
 #include "plasma/common.h"
 #include "plasma/plasma_generated.h"
@@ -49,7 +50,7 @@ Status WriteBytes(int fd, uint8_t* cursor, size_t length) {
       if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
         continue;
       }
-      return Status::IOError(std::string(strerror(errno)));
+      return Status::IOError(strerror(errno));
     } else if (nbytes == 0) {
       return Status::IOError("Encountered unexpected EOF");
     }
@@ -80,7 +81,7 @@ Status ReadBytes(int fd, uint8_t* cursor, size_t length) {
       if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
         continue;
       }
-      return Status::IOError(std::string(strerror(errno)));
+      return Status::IOError(strerror(errno));
     } else if (0 == nbytes) {
       return Status::IOError("Encountered unexpected EOF");
     }
@@ -171,12 +172,12 @@ Status ConnectIpcSocketRetry(const std::string& pathname, int num_retries,
     *fd = ConnectIpcSock(pathname);
     --num_retries;
   }
+
   // If we could not connect to the socket, exit.
   if (*fd == -1) {
-    std::stringstream ss;
-    ss << "Could not connect to socket " << pathname;
-    return Status::IOError(ss.str());
+    return Status::IOError("Could not connect to socket ", pathname);
   }
+
   return Status::OK();
 }
 
diff --git a/cpp/src/plasma/lib/java/org_apache_arrow_plasma_PlasmaClientJNI.cc b/cpp/src/plasma/lib/java/org_apache_arrow_plasma_PlasmaClientJNI.cc
index 7cd2f3574423c..1988742af9bc7 100644
--- a/cpp/src/plasma/lib/java/org_apache_arrow_plasma_PlasmaClientJNI.cc
+++ b/cpp/src/plasma/lib/java/org_apache_arrow_plasma_PlasmaClientJNI.cc
@@ -28,6 +28,8 @@
 #include <string>
 #include <vector>
 
+#include "arrow/util/logging.h"
+
 #include "plasma/client.h"
 
 constexpr jsize OBJECT_ID_SIZE = sizeof(plasma::ObjectID) / sizeof(jbyte);
@@ -102,15 +104,15 @@ JNIEXPORT jobject JNICALL Java_org_apache_arrow_plasma_PlasmaClientJNI_create(
   std::shared_ptr<Buffer> data;
   Status s = client->Create(oid, size, md, md_size, &data);
   if (s.IsPlasmaObjectExists()) {
-    jclass Exception = env->FindClass("java/lang/Exception");
-    env->ThrowNew(Exception,
-                  "An object with this ID already exists in the plasma store.");
+    jclass exceptionClass =
+        env->FindClass("org/apache/arrow/plasma/exceptions/DuplicateObjectException");
+    env->ThrowNew(exceptionClass, oid.hex().c_str());
     return nullptr;
   }
   if (s.IsPlasmaStoreFull()) {
-    jclass Exception = env->FindClass("java/lang/Exception");
-    env->ThrowNew(Exception,
-                  "The plasma store ran out of memory and could not create this object.");
+    jclass exceptionClass =
+        env->FindClass("org/apache/arrow/plasma/exceptions/PlasmaOutOfMemoryException");
+    env->ThrowNew(exceptionClass, "");
     return nullptr;
   }
   ARROW_CHECK(s.ok());
@@ -220,79 +222,6 @@ JNIEXPORT jboolean JNICALL Java_org_apache_arrow_plasma_PlasmaClientJNI_contains
   return has_object;
 }
 
-JNIEXPORT void JNICALL Java_org_apache_arrow_plasma_PlasmaClientJNI_fetch(
-    JNIEnv* env, jclass cls, jlong conn, jobjectArray object_ids) {
-  plasma::PlasmaClient* client = reinterpret_cast<plasma::PlasmaClient*>(conn);
-  jsize num_oids = env->GetArrayLength(object_ids);
-
-  std::vector<plasma::ObjectID> oids(num_oids);
-  for (int i = 0; i < num_oids; ++i) {
-    jbyteArray_to_object_id(
-        env, reinterpret_cast<jbyteArray>(env->GetObjectArrayElement(object_ids, i)),
-        &oids[i]);
-  }
-
-  ARROW_CHECK_OK(client->Fetch(static_cast<int>(num_oids), oids.data()));
-
-  return;
-}
-
-JNIEXPORT jobjectArray JNICALL Java_org_apache_arrow_plasma_PlasmaClientJNI_wait(
-    JNIEnv* env, jclass cls, jlong conn, jobjectArray object_ids, jint timeout_ms,
-    jint num_returns) {
-  plasma::PlasmaClient* client = reinterpret_cast<plasma::PlasmaClient*>(conn);
-  jsize num_oids = env->GetArrayLength(object_ids);
-
-  if (num_returns < 0) {
-    jclass Exception = env->FindClass("java/lang/RuntimeException");
-    env->ThrowNew(Exception, "The argument num_returns cannot be less than zero.");
-    return nullptr;
-  }
-  if (num_returns > num_oids) {
-    jclass Exception = env->FindClass("java/lang/RuntimeException");
-    env->ThrowNew(Exception,
-                  "The argument num_returns cannot be greater than len(object_ids).");
-    return nullptr;
-  }
-
-  std::vector<plasma::ObjectRequest> oreqs(num_oids);
-
-  for (int i = 0; i < num_oids; ++i) {
-    jbyteArray_to_object_id(
-        env, reinterpret_cast<jbyteArray>(env->GetObjectArrayElement(object_ids, i)),
-        &oreqs[i].object_id);
-    oreqs[i].type = plasma::ObjectRequestType::PLASMA_QUERY_ANYWHERE;
-  }
-
-  int num_return_objects;
-  // TODO: may be blocked. consider to add the thread support
-  ARROW_CHECK_OK(client->Wait(static_cast<int>(num_oids), oreqs.data(), num_returns,
-                              static_cast<uint64_t>(timeout_ms), &num_return_objects));
-
-  int num_to_return = std::min(num_return_objects, num_returns);
-  jclass clsByteArray = env->FindClass("[B");
-  jobjectArray ret = env->NewObjectArray(num_to_return, clsByteArray, nullptr);
-
-  int num_returned = 0;
-  jbyteArray oid = nullptr;
-  for (int i = 0; i < num_oids; ++i) {
-    if (num_returned >= num_to_return) {
-      break;
-    }
-
-    if (oreqs[i].location == plasma::ObjectLocation::Local ||
-        oreqs[i].location == plasma::ObjectLocation::Remote) {
-      oid = env->NewByteArray(OBJECT_ID_SIZE);
-      object_id_to_jbyteArray(env, oid, &oreqs[i].object_id);
-      env->SetObjectArrayElement(ret, num_returned, oid);
-      num_returned++;
-    }
-  }
-  ARROW_CHECK(num_returned == num_to_return);
-
-  return ret;
-}
-
 JNIEXPORT jlong JNICALL Java_org_apache_arrow_plasma_PlasmaClientJNI_evict(
     JNIEnv* env, jclass cls, jlong conn, jlong num_bytes) {
   plasma::PlasmaClient* client = reinterpret_cast<plasma::PlasmaClient*>(conn);
diff --git a/cpp/src/plasma/plasma.cc b/cpp/src/plasma/plasma.cc
index 601a612be4071..e1c10369dc6ef 100644
--- a/cpp/src/plasma/plasma.cc
+++ b/cpp/src/plasma/plasma.cc
@@ -22,20 +22,18 @@
 #include <unistd.h>
 
 #include "plasma/common.h"
+#include "plasma/common_generated.h"
+#include "plasma/plasma_allocator.h"
 #include "plasma/protocol.h"
 
 namespace fb = plasma::flatbuf;
 
 namespace plasma {
 
-extern "C" {
-void dlfree(void* mem);
-}
-
 ObjectTableEntry::ObjectTableEntry() : pointer(nullptr), ref_count(0) {}
 
 ObjectTableEntry::~ObjectTableEntry() {
-  dlfree(pointer);
+  PlasmaAllocator::Free(pointer, data_size + metadata_size);
   pointer = nullptr;
 }
 
diff --git a/cpp/src/plasma/plasma.h b/cpp/src/plasma/plasma.h
index e63d967676053..e23969d05ff3b 100644
--- a/cpp/src/plasma/plasma.h
+++ b/cpp/src/plasma/plasma.h
@@ -38,14 +38,17 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 #include "plasma/common.h"
-#include "plasma/common_generated.h"
 
-#ifdef PLASMA_GPU
-using arrow::gpu::CudaIpcMemHandle;
+#ifdef PLASMA_CUDA
+using arrow::cuda::CudaIpcMemHandle;
 #endif
 
 namespace plasma {
 
+namespace flatbuf {
+struct ObjectInfoT;
+}  // namespace flatbuf
+
 #define HANDLE_SIGPIPE(s, fd_)                                              \
   do {                                                                      \
     Status _s = (s);                                                        \
@@ -68,12 +71,9 @@ constexpr int64_t kBlockSize = 64;
 
 struct Client;
 
-/// Mapping from object IDs to type and status of the request.
-typedef std::unordered_map<ObjectID, ObjectRequest> ObjectRequestMap;
-
 // TODO(pcm): Replace this by the flatbuffers message PlasmaObjectSpec.
 struct PlasmaObject {
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
   // IPC handle for Cuda.
   std::shared_ptr<CudaIpcMemHandle> ipc_handle;
 #endif
@@ -104,9 +104,6 @@ enum class ObjectStatus : int {
 struct PlasmaStoreInfo {
   /// Objects that are in the Plasma store.
   ObjectTable objects;
-  /// The amount of memory (in bytes) that we allow to be allocated in the
-  /// store.
-  int64_t memory_capacity;
   /// Boolean flag indicating whether to start the object store with hugepages
   /// support enabled. Huge pages are substantially larger than normal memory
   /// pages (e.g. 2MB or 1GB instead of 4KB) and using them can reduce
diff --git a/cpp/src/plasma/plasma_allocator.cc b/cpp/src/plasma/plasma_allocator.cc
new file mode 100644
index 0000000000000..b67eeea404bce
--- /dev/null
+++ b/cpp/src/plasma/plasma_allocator.cc
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/util/logging.h>
+
+#include "plasma/malloc.h"
+#include "plasma/plasma_allocator.h"
+
+namespace plasma {
+
+extern "C" {
+void* dlmemalign(size_t alignment, size_t bytes);
+void dlfree(void* mem);
+}
+
+int64_t PlasmaAllocator::footprint_limit_ = 0;
+int64_t PlasmaAllocator::allocated_ = 0;
+
+void* PlasmaAllocator::Memalign(size_t alignment, size_t bytes) {
+  if (allocated_ + static_cast<int64_t>(bytes) > footprint_limit_) {
+    return nullptr;
+  }
+  void* mem = dlmemalign(alignment, bytes);
+  ARROW_CHECK(mem);
+  allocated_ += bytes;
+  return mem;
+}
+
+void PlasmaAllocator::Free(void* mem, size_t bytes) {
+  dlfree(mem);
+  allocated_ -= bytes;
+}
+
+void PlasmaAllocator::SetFootprintLimit(size_t bytes) {
+  footprint_limit_ = static_cast<int64_t>(bytes);
+}
+
+int64_t PlasmaAllocator::GetFootprintLimit() { return footprint_limit_; }
+
+int64_t PlasmaAllocator::Allocated() { return allocated_; }
+
+}  // namespace plasma
diff --git a/cpp/src/plasma/plasma_allocator.h b/cpp/src/plasma/plasma_allocator.h
new file mode 100644
index 0000000000000..d9d4cc0ecbe0c
--- /dev/null
+++ b/cpp/src/plasma/plasma_allocator.h
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef PLASMA_ALLOCATOR_H
+#define PLASMA_ALLOCATOR_H
+
+#include <cstddef>
+#include <cstdint>
+
+namespace plasma {
+
+class PlasmaAllocator {
+ public:
+  /// Allocates size bytes and returns a pointer to the allocated memory. The
+  /// memory address will be a multiple of alignment, which must be a power of two.
+  ///
+  /// \param alignment Memory alignment.
+  /// \param bytes Number of bytes.
+  /// \return Pointer to allocated memory.
+  static void* Memalign(size_t alignment, size_t bytes);
+
+  /// Frees the memory space pointed to by mem, which must have been returned by
+  /// a previous call to Memalign()
+  ///
+  /// \param mem Pointer to memory to free.
+  /// \param bytes Number of bytes to be freed.
+  static void Free(void* mem, size_t bytes);
+
+  /// Sets the memory footprint limit for Plasma.
+  ///
+  /// \param bytes Plasma memory footprint limit in bytes.
+  static void SetFootprintLimit(size_t bytes);
+
+  /// Get the memory footprint limit for Plasma.
+  ///
+  /// \return Plasma memory footprint limit in bytes.
+  static int64_t GetFootprintLimit();
+
+  /// Get the number of bytes allocated by Plasma so far.
+  /// \return Number of bytes allocated by Plasma so far.
+  static int64_t Allocated();
+
+ private:
+  static int64_t allocated_;
+  static int64_t footprint_limit_;
+};
+
+}  // namespace plasma
+
+#endif  // ARROW_PLASMA_ALLOCATOR_H
diff --git a/cpp/src/plasma/protocol.cc b/cpp/src/plasma/protocol.cc
index a74db66fded8f..a878647718264 100644
--- a/cpp/src/plasma/protocol.cc
+++ b/cpp/src/plasma/protocol.cc
@@ -25,7 +25,7 @@
 #include "plasma/common.h"
 #include "plasma/io.h"
 
-#ifdef ARROW_GPU
+#ifdef PLASMA_CUDA
 #include "arrow/gpu/cuda_api.h"
 #endif
 
@@ -42,10 +42,6 @@ using flatbuffers::uoffset_t;
 #define PLASMA_CHECK_ENUM(x, y) \
   static_assert(static_cast<int>(x) == static_cast<int>(y), "protocol mismatch")
 
-PLASMA_CHECK_ENUM(ObjectLocation::Local, fb::ObjectStatus::Local);
-PLASMA_CHECK_ENUM(ObjectLocation::Remote, fb::ObjectStatus::Remote);
-PLASMA_CHECK_ENUM(ObjectLocation::Nonexistent, fb::ObjectStatus::Nonexistent);
-
 flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>>
 ToFlatbuffer(flatbuffers::FlatBufferBuilder* fbb, const ObjectID* object_ids,
              int64_t num_objects) {
@@ -129,7 +125,7 @@ Status SendCreateReply(int sock, ObjectID object_id, PlasmaObject* object,
                                  object->metadata_offset, object->metadata_size,
                                  object->device_num);
   auto object_string = fbb.CreateString(object_id.binary());
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
   flatbuffers::Offset<fb::CudaHandle> ipc_handle;
   if (object->device_num != 0) {
     std::shared_ptr<arrow::Buffer> handle;
@@ -145,7 +141,7 @@ Status SendCreateReply(int sock, ObjectID object_id, PlasmaObject* object,
   crb.add_store_fd(object->store_fd);
   crb.add_mmap_size(mmap_size);
   if (object->device_num != 0) {
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
     crb.add_ipc_handle(ipc_handle);
 #else
     ARROW_LOG(FATAL) << "This should be unreachable.";
@@ -171,7 +167,7 @@ Status ReadCreateReply(uint8_t* data, size_t size, ObjectID* object_id,
   *mmap_size = message->mmap_size();
 
   object->device_num = message->plasma_object()->device_num();
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
   if (object->device_num != 0) {
     RETURN_NOT_OK(CudaIpcMemHandle::FromBuffer(message->ipc_handle()->handle()->data(),
                                                &object->ipc_handle));
@@ -367,56 +363,6 @@ Status ReadDeleteReply(uint8_t* data, size_t size, std::vector<ObjectID>* object
   return Status::OK();
 }
 
-// Satus messages.
-
-Status SendStatusRequest(int sock, const ObjectID* object_ids, int64_t num_objects) {
-  flatbuffers::FlatBufferBuilder fbb;
-  auto message =
-      fb::CreatePlasmaStatusRequest(fbb, ToFlatbuffer(&fbb, object_ids, num_objects));
-  return PlasmaSend(sock, MessageType::PlasmaStatusRequest, &fbb, message);
-}
-
-Status ReadStatusRequest(uint8_t* data, size_t size, ObjectID object_ids[],
-                         int64_t num_objects) {
-  DCHECK(data);
-  auto message = flatbuffers::GetRoot<fb::PlasmaStatusRequest>(data);
-  DCHECK(VerifyFlatbuffer(message, data, size));
-  for (uoffset_t i = 0; i < num_objects; ++i) {
-    object_ids[i] = ObjectID::from_binary(message->object_ids()->Get(i)->str());
-  }
-  return Status::OK();
-}
-
-Status SendStatusReply(int sock, ObjectID object_ids[], int object_status[],
-                       int64_t num_objects) {
-  flatbuffers::FlatBufferBuilder fbb;
-  auto message =
-      fb::CreatePlasmaStatusReply(fbb, ToFlatbuffer(&fbb, object_ids, num_objects),
-                                  fbb.CreateVector(object_status, num_objects));
-  return PlasmaSend(sock, MessageType::PlasmaStatusReply, &fbb, message);
-}
-
-int64_t ReadStatusReply_num_objects(uint8_t* data, size_t size) {
-  DCHECK(data);
-  auto message = flatbuffers::GetRoot<fb::PlasmaStatusReply>(data);
-  DCHECK(VerifyFlatbuffer(message, data, size));
-  return message->object_ids()->size();
-}
-
-Status ReadStatusReply(uint8_t* data, size_t size, ObjectID object_ids[],
-                       int object_status[], int64_t num_objects) {
-  DCHECK(data);
-  auto message = flatbuffers::GetRoot<fb::PlasmaStatusReply>(data);
-  DCHECK(VerifyFlatbuffer(message, data, size));
-  for (uoffset_t i = 0; i < num_objects; ++i) {
-    object_ids[i] = ObjectID::from_binary(message->object_ids()->Get(i)->str());
-  }
-  for (uoffset_t i = 0; i < num_objects; ++i) {
-    object_status[i] = message->status()->data()[i];
-  }
-  return Status::OK();
-}
-
 // Contains messages.
 
 Status SendContainsRequest(int sock, ObjectID object_id) {
@@ -588,7 +534,7 @@ Status SendGetReply(int sock, ObjectID object_ids[],
     objects.push_back(PlasmaObjectSpec(object.store_fd, object.data_offset,
                                        object.data_size, object.metadata_offset,
                                        object.metadata_size, object.device_num));
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
     if (object.device_num != 0) {
       std::shared_ptr<arrow::Buffer> handle;
       RETURN_NOT_OK(object.ipc_handle->Serialize(arrow::default_memory_pool(), &handle));
@@ -609,7 +555,7 @@ Status ReadGetReply(uint8_t* data, size_t size, ObjectID object_ids[],
                     std::vector<int>& store_fds, std::vector<int64_t>& mmap_sizes) {
   DCHECK(data);
   auto message = flatbuffers::GetRoot<fb::PlasmaGetReply>(data);
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
   int handle_pos = 0;
 #endif
   DCHECK(VerifyFlatbuffer(message, data, size));
@@ -624,7 +570,7 @@ Status ReadGetReply(uint8_t* data, size_t size, ObjectID object_ids[],
     plasma_objects[i].metadata_offset = object->metadata_offset();
     plasma_objects[i].metadata_size = object->metadata_size();
     plasma_objects[i].device_num = object->device_num();
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
     if (object->device_num() != 0) {
       const void* ipc_handle = message->handles()->Get(handle_pos)->handle()->data();
       RETURN_NOT_OK(
@@ -640,95 +586,6 @@ Status ReadGetReply(uint8_t* data, size_t size, ObjectID object_ids[],
   }
   return Status::OK();
 }
-// Fetch messages.
-
-Status SendFetchRequest(int sock, const ObjectID* object_ids, int64_t num_objects) {
-  flatbuffers::FlatBufferBuilder fbb;
-  auto message =
-      fb::CreatePlasmaFetchRequest(fbb, ToFlatbuffer(&fbb, object_ids, num_objects));
-  return PlasmaSend(sock, MessageType::PlasmaFetchRequest, &fbb, message);
-}
-
-Status ReadFetchRequest(uint8_t* data, size_t size, std::vector<ObjectID>& object_ids) {
-  DCHECK(data);
-  auto message = flatbuffers::GetRoot<fb::PlasmaFetchRequest>(data);
-  DCHECK(VerifyFlatbuffer(message, data, size));
-  for (uoffset_t i = 0; i < message->object_ids()->size(); ++i) {
-    object_ids.push_back(ObjectID::from_binary(message->object_ids()->Get(i)->str()));
-  }
-  return Status::OK();
-}
-
-// Wait messages.
-
-Status SendWaitRequest(int sock, ObjectRequest object_requests[], int64_t num_requests,
-                       int num_ready_objects, int64_t timeout_ms) {
-  flatbuffers::FlatBufferBuilder fbb;
-
-  std::vector<flatbuffers::Offset<fb::ObjectRequestSpec>> object_request_specs;
-  for (int i = 0; i < num_requests; i++) {
-    object_request_specs.push_back(fb::CreateObjectRequestSpec(
-        fbb, fbb.CreateString(object_requests[i].object_id.binary()),
-        static_cast<int>(object_requests[i].type)));
-  }
-
-  auto message = fb::CreatePlasmaWaitRequest(fbb, fbb.CreateVector(object_request_specs),
-                                             num_ready_objects, timeout_ms);
-  return PlasmaSend(sock, MessageType::PlasmaWaitRequest, &fbb, message);
-}
-
-Status ReadWaitRequest(uint8_t* data, size_t size, ObjectRequestMap& object_requests,
-                       int64_t* timeout_ms, int* num_ready_objects) {
-  DCHECK(data);
-  auto message = flatbuffers::GetRoot<fb::PlasmaWaitRequest>(data);
-  DCHECK(VerifyFlatbuffer(message, data, size));
-  *num_ready_objects = message->num_ready_objects();
-  *timeout_ms = message->timeout();
-
-  for (uoffset_t i = 0; i < message->object_requests()->size(); i++) {
-    ObjectID object_id =
-        ObjectID::from_binary(message->object_requests()->Get(i)->object_id()->str());
-    ObjectRequest object_request(
-        {object_id,
-         static_cast<ObjectRequestType>(message->object_requests()->Get(i)->type()),
-         ObjectLocation::Nonexistent});
-    object_requests[object_id] = object_request;
-  }
-  return Status::OK();
-}
-
-Status SendWaitReply(int sock, const ObjectRequestMap& object_requests,
-                     int num_ready_objects) {
-  flatbuffers::FlatBufferBuilder fbb;
-
-  std::vector<flatbuffers::Offset<fb::ObjectReply>> object_replies;
-  for (const auto& entry : object_requests) {
-    const auto& object_request = entry.second;
-    object_replies.push_back(
-        fb::CreateObjectReply(fbb, fbb.CreateString(object_request.object_id.binary()),
-                              static_cast<fb::ObjectStatus>(object_request.location)));
-  }
-
-  auto message = fb::CreatePlasmaWaitReply(
-      fbb, fbb.CreateVector(object_replies.data(), num_ready_objects), num_ready_objects);
-  return PlasmaSend(sock, MessageType::PlasmaWaitReply, &fbb, message);
-}
-
-Status ReadWaitReply(uint8_t* data, size_t size, ObjectRequest object_requests[],
-                     int* num_ready_objects) {
-  DCHECK(data);
-
-  auto message = flatbuffers::GetRoot<fb::PlasmaWaitReply>(data);
-  DCHECK(VerifyFlatbuffer(message, data, size));
-  *num_ready_objects = message->num_ready_objects();
-  for (int i = 0; i < *num_ready_objects; i++) {
-    object_requests[i].object_id =
-        ObjectID::from_binary(message->object_requests()->Get(i)->object_id()->str());
-    object_requests[i].location =
-        static_cast<ObjectLocation>(message->object_requests()->Get(i)->status());
-  }
-  return Status::OK();
-}
 
 // Subscribe messages.
 
diff --git a/cpp/src/plasma/protocol.h b/cpp/src/plasma/protocol.h
index c8204584b8adb..0362bd47797d4 100644
--- a/cpp/src/plasma/protocol.h
+++ b/cpp/src/plasma/protocol.h
@@ -128,21 +128,6 @@ Status SendDeleteReply(int sock, const std::vector<ObjectID>& object_ids,
 Status ReadDeleteReply(uint8_t* data, size_t size, std::vector<ObjectID>* object_ids,
                        std::vector<PlasmaError>* errors);
 
-/* Satus messages. */
-
-Status SendStatusRequest(int sock, const ObjectID* object_ids, int64_t num_objects);
-
-Status ReadStatusRequest(uint8_t* data, size_t size, ObjectID object_ids[],
-                         int64_t num_objects);
-
-Status SendStatusReply(int sock, ObjectID object_ids[], int object_status[],
-                       int64_t num_objects);
-
-int64_t ReadStatusReply_num_objects(uint8_t* data, size_t size);
-
-Status ReadStatusReply(uint8_t* data, size_t size, ObjectID object_ids[],
-                       int object_status[], int64_t num_objects);
-
 /* Plasma Constains message functions. */
 
 Status SendContainsRequest(int sock, ObjectID object_id);
@@ -184,26 +169,6 @@ Status SendEvictReply(int sock, int64_t num_bytes);
 
 Status ReadEvictReply(uint8_t* data, size_t size, int64_t& num_bytes);
 
-/* Plasma Fetch Remote message functions. */
-
-Status SendFetchRequest(int sock, const ObjectID* object_ids, int64_t num_objects);
-
-Status ReadFetchRequest(uint8_t* data, size_t size, std::vector<ObjectID>& object_ids);
-
-/* Plasma Wait message functions. */
-
-Status SendWaitRequest(int sock, ObjectRequest object_requests[], int64_t num_requests,
-                       int num_ready_objects, int64_t timeout_ms);
-
-Status ReadWaitRequest(uint8_t* data, size_t size, ObjectRequestMap& object_requests,
-                       int64_t* timeout_ms, int* num_ready_objects);
-
-Status SendWaitReply(int sock, const ObjectRequestMap& object_requests,
-                     int num_ready_objects);
-
-Status ReadWaitReply(uint8_t* data, size_t size, ObjectRequest object_requests[],
-                     int* num_ready_objects);
-
 /* Plasma Subscribe message functions. */
 
 Status SendSubscribeRequest(int sock);
diff --git a/cpp/src/plasma/store.cc b/cpp/src/plasma/store.cc
index 28624d0bc16bf..745e336049e8b 100644
--- a/cpp/src/plasma/store.cc
+++ b/cpp/src/plasma/store.cc
@@ -52,18 +52,22 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/status.h"
+
 #include "plasma/common.h"
 #include "plasma/common_generated.h"
 #include "plasma/fling.h"
 #include "plasma/io.h"
 #include "plasma/malloc.h"
+#include "plasma/plasma_allocator.h"
+#include "plasma/protocol.h"
 
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
 #include "arrow/gpu/cuda_api.h"
 
-using arrow::gpu::CudaBuffer;
-using arrow::gpu::CudaContext;
-using arrow::gpu::CudaDeviceManager;
+using arrow::cuda::CudaBuffer;
+using arrow::cuda::CudaContext;
+using arrow::cuda::CudaDeviceManager;
 #endif
 
 using arrow::util::ArrowLog;
@@ -73,13 +77,6 @@ namespace fb = plasma::flatbuf;
 
 namespace plasma {
 
-extern "C" {
-void* dlmalloc(size_t bytes);
-void* dlmemalign(size_t alignment, size_t bytes);
-void dlfree(void* mem);
-size_t dlmalloc_set_footprint_limit(size_t bytes);
-}
-
 struct GetRequest {
   GetRequest(Client* client, const std::vector<ObjectID>& object_ids);
   /// The client that called get.
@@ -111,13 +108,11 @@ GetRequest::GetRequest(Client* client, const std::vector<ObjectID>& object_ids)
 
 Client::Client(int fd) : fd(fd), notification_fd(-1) {}
 
-PlasmaStore::PlasmaStore(EventLoop* loop, int64_t system_memory, std::string directory,
-                         bool hugepages_enabled)
+PlasmaStore::PlasmaStore(EventLoop* loop, std::string directory, bool hugepages_enabled)
     : loop_(loop), eviction_policy_(&store_info_) {
-  store_info_.memory_capacity = system_memory;
   store_info_.directory = directory;
   store_info_.hugepages_enabled = hugepages_enabled;
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
   DCHECK_OK(CudaDeviceManager::GetInstance(&manager_));
 #endif
 }
@@ -162,7 +157,7 @@ PlasmaError PlasmaStore::CreateObject(const ObjectID& object_id, int64_t data_si
   }
   // Try to evict objects until there is enough space.
   uint8_t* pointer = nullptr;
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
   std::shared_ptr<CudaBuffer> gpu_handle;
   std::shared_ptr<CudaContext> context_;
   if (device_num != 0) {
@@ -170,7 +165,7 @@ PlasmaError PlasmaStore::CreateObject(const ObjectID& object_id, int64_t data_si
   }
 #endif
   while (true) {
-    // Allocate space for the new object. We use dlmemalign instead of dlmalloc
+    // Allocate space for the new object. We use memalign instead of malloc
     // in order to align the allocated region to a 64-byte boundary. This is not
     // strictly necessary, but it is an optimization that could speed up the
     // computation of a hash of the data (see compute_object_hash_parallel in
@@ -178,8 +173,8 @@ PlasmaError PlasmaStore::CreateObject(const ObjectID& object_id, int64_t data_si
     // it is not guaranteed that the corresponding pointer in the client will be
     // 64-byte aligned, but in practice it often will be.
     if (device_num == 0) {
-      pointer =
-          reinterpret_cast<uint8_t*>(dlmemalign(kBlockSize, data_size + metadata_size));
+      pointer = reinterpret_cast<uint8_t*>(
+          PlasmaAllocator::Memalign(kBlockSize, data_size + metadata_size));
       if (pointer == nullptr) {
         // Tell the eviction policy how much space we need to create this object.
         std::vector<ObjectID> objects_to_evict;
@@ -195,7 +190,7 @@ PlasmaError PlasmaStore::CreateObject(const ObjectID& object_id, int64_t data_si
         break;
       }
     } else {
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
       DCHECK_OK(context_->Allocate(data_size + metadata_size, &gpu_handle));
       break;
 #endif
@@ -220,7 +215,7 @@ PlasmaError PlasmaStore::CreateObject(const ObjectID& object_id, int64_t data_si
   entry->device_num = device_num;
   entry->create_time = std::time(nullptr);
   entry->construct_duration = -1;
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
   if (device_num != 0) {
     DCHECK_OK(gpu_handle->ExportForIpc(&entry->ipc_handle));
     result->ipc_handle = entry->ipc_handle;
@@ -246,7 +241,7 @@ void PlasmaObject_init(PlasmaObject* object, ObjectTableEntry* entry) {
   DCHECK(object != nullptr);
   DCHECK(entry != nullptr);
   DCHECK(entry->state == ObjectState::PLASMA_SEALED);
-#ifdef PLASMA_GPU
+#ifdef PLASMA_CUDA
   if (entry->device_num != 0) {
     object->ipc_handle = entry->ipc_handle;
   }
@@ -327,21 +322,11 @@ void PlasmaStore::ReturnFromGet(GetRequest* get_req) {
   if (s.ok()) {
     // Send all of the file descriptors for the present objects.
     for (int store_fd : store_fds) {
-      int error_code = send_fd(get_req->client->fd, store_fd);
-      // If we failed to send the file descriptor, loop until we have sent it
-      // successfully. TODO(rkn): This is problematic for two reasons. First
-      // of all, sending the file descriptor should just succeed without any
-      // errors, but sometimes I see a "Message too long" error number.
-      // Second, looping like this allows a client to potentially block the
-      // plasma store event loop which should never happen.
-      while (error_code < 0) {
-        if (errno == EMSGSIZE) {
-          ARROW_LOG(WARNING) << "Failed to send file descriptor, retrying.";
-          error_code = send_fd(get_req->client->fd, store_fd);
-          continue;
-        }
-        WarnIfSigpipe(error_code, get_req->client->fd);
-        break;
+      // Only send the file descriptor if it hasn't been sent (see analogous
+      // logic in GetStoreFd in client.cc).
+      if (get_req->client->used_fds.find(store_fd) == get_req->client->used_fds.end()) {
+        WarnIfSigpipe(send_fd(get_req->client->fd, store_fd), get_req->client->fd);
+        get_req->client->used_fds.insert(store_fd);
       }
     }
   }
@@ -777,9 +762,7 @@ Status PlasmaStore::ProcessMessage(Client* client) {
   uint8_t* input = input_buffer_.data();
   size_t input_size = input_buffer_.size();
   ObjectID object_id;
-  PlasmaObject object;
-  // TODO(pcm): Get rid of the following.
-  memset(&object, 0, sizeof(object));
+  PlasmaObject object = {};
 
   // Process the different types of requests.
   switch (type) {
@@ -798,8 +781,12 @@ Status PlasmaStore::ProcessMessage(Client* client) {
       HANDLE_SIGPIPE(
           SendCreateReply(client->fd, object_id, &object, error_code, mmap_size),
           client->fd);
-      if (error_code == PlasmaError::OK && device_num == 0) {
+      // Only send the file descriptor if it hasn't been sent (see analogous
+      // logic in GetStoreFd in client.cc). Similar in ReturnFromGet.
+      if (error_code == PlasmaError::OK && device_num == 0 &&
+          client->used_fds.find(object.store_fd) == client->used_fds.end()) {
         WarnIfSigpipe(send_fd(client->fd, object.store_fd), client->fd);
+        client->used_fds.insert(object.store_fd);
       }
     } break;
     case fb::MessageType::PlasmaCreateAndSealRequest: {
@@ -889,7 +876,7 @@ Status PlasmaStore::ProcessMessage(Client* client) {
       SubscribeToUpdates(client);
       break;
     case fb::MessageType::PlasmaConnectRequest: {
-      HANDLE_SIGPIPE(SendConnectReply(client->fd, store_info_.memory_capacity),
+      HANDLE_SIGPIPE(SendConnectReply(client->fd, PlasmaAllocator::GetFootprintLimit()),
                      client->fd);
     } break;
     case fb::MessageType::PlasmaDisconnectClient:
@@ -907,22 +894,23 @@ class PlasmaStoreRunner {
  public:
   PlasmaStoreRunner() {}
 
-  void Start(char* socket_name, int64_t system_memory, std::string directory,
-             bool hugepages_enabled, bool use_one_memory_mapped_file) {
+  void Start(char* socket_name, std::string directory, bool hugepages_enabled) {
     // Create the event loop.
     loop_.reset(new EventLoop);
-    store_.reset(
-        new PlasmaStore(loop_.get(), system_memory, directory, hugepages_enabled));
+    store_.reset(new PlasmaStore(loop_.get(), directory, hugepages_enabled));
     plasma_config = store_->GetPlasmaStoreInfo();
 
-    // If the store is configured to use a single memory-mapped file, then we
-    // achieve that by mallocing and freeing a single large amount of space.
-    // that maximum allowed size up front.
-    if (use_one_memory_mapped_file) {
-      void* pointer = plasma::dlmemalign(kBlockSize, system_memory);
-      ARROW_CHECK(pointer != nullptr);
-      plasma::dlfree(pointer);
-    }
+    // We are using a single memory-mapped file by mallocing and freeing a single
+    // large amount of space up front. According to the documentation,
+    // dlmalloc might need up to 128*sizeof(size_t) bytes for internal
+    // bookkeeping.
+    void* pointer = plasma::PlasmaAllocator::Memalign(
+        kBlockSize, PlasmaAllocator::GetFootprintLimit() - 256 * sizeof(size_t));
+    ARROW_CHECK(pointer != nullptr);
+    // This will unmap the file, but the next one created will be as large
+    // as this one (this is an implementation detail of dlmalloc).
+    plasma::PlasmaAllocator::Free(
+        pointer, PlasmaAllocator::GetFootprintLimit() - 256 * sizeof(size_t));
 
     int socket = BindIpcSock(socket_name, true);
     // TODO(pcm): Check return value.
@@ -957,16 +945,15 @@ void HandleSignal(int signal) {
   }
 }
 
-void StartServer(char* socket_name, int64_t system_memory, std::string plasma_directory,
-                 bool hugepages_enabled, bool use_one_memory_mapped_file) {
+void StartServer(char* socket_name, std::string plasma_directory,
+                 bool hugepages_enabled) {
   // Ignore SIGPIPE signals. If we don't do this, then when we attempt to write
   // to a client that has already died, the store could die.
   signal(SIGPIPE, SIG_IGN);
 
   g_runner.reset(new PlasmaStoreRunner());
   signal(SIGTERM, HandleSignal);
-  g_runner->Start(socket_name, system_memory, plasma_directory, hugepages_enabled,
-                  use_one_memory_mapped_file);
+  g_runner->Start(socket_name, plasma_directory, hugepages_enabled);
 }
 
 }  // namespace plasma
@@ -978,11 +965,9 @@ int main(int argc, char* argv[]) {
   // Directory where plasma memory mapped files are stored.
   std::string plasma_directory;
   bool hugepages_enabled = false;
-  // True if a single large memory-mapped file should be created at startup.
-  bool use_one_memory_mapped_file = false;
   int64_t system_memory = -1;
   int c;
-  while ((c = getopt(argc, argv, "s:m:d:hf")) != -1) {
+  while ((c = getopt(argc, argv, "s:m:d:h")) != -1) {
     switch (c) {
       case 'd':
         plasma_directory = std::string(optarg);
@@ -997,14 +982,13 @@ int main(int argc, char* argv[]) {
         char extra;
         int scanned = sscanf(optarg, "%" SCNd64 "%c", &system_memory, &extra);
         ARROW_CHECK(scanned == 1);
+        // Set system memory capacity
+        plasma::PlasmaAllocator::SetFootprintLimit(static_cast<size_t>(system_memory));
         ARROW_LOG(INFO) << "Allowing the Plasma store to use up to "
                         << static_cast<double>(system_memory) / 1000000000
                         << "GB of memory.";
         break;
       }
-      case 'f':
-        use_one_memory_mapped_file = true;
-        break;
       default:
         exit(-1);
     }
@@ -1054,12 +1038,8 @@ int main(int argc, char* argv[]) {
     SetMallocGranularity(1024 * 1024 * 1024);  // 1 GB
   }
 #endif
-  // Make it so dlmalloc fails if we try to request more memory than is
-  // available.
-  plasma::dlmalloc_set_footprint_limit((size_t)system_memory);
   ARROW_LOG(DEBUG) << "starting server listening on " << socket_name;
-  plasma::StartServer(socket_name, system_memory, plasma_directory, hugepages_enabled,
-                      use_one_memory_mapped_file);
+  plasma::StartServer(socket_name, plasma_directory, hugepages_enabled);
   plasma::g_runner->Shutdown();
   plasma::g_runner = nullptr;
 
diff --git a/cpp/src/plasma/store.h b/cpp/src/plasma/store.h
index 44fdf603f7f44..a5c586b7f53f0 100644
--- a/cpp/src/plasma/store.h
+++ b/cpp/src/plasma/store.h
@@ -29,10 +29,18 @@
 #include "plasma/events.h"
 #include "plasma/eviction_policy.h"
 #include "plasma/plasma.h"
-#include "plasma/protocol.h"
+
+namespace arrow {
+class Status;
+}  // namespace arrow
 
 namespace plasma {
 
+namespace flatbuf {
+struct ObjectInfoT;
+enum class PlasmaError;
+}  // namespace flatbuf
+
 using flatbuf::ObjectInfoT;
 using flatbuf::PlasmaError;
 
@@ -54,6 +62,9 @@ struct Client {
   /// Object ids that are used by this client.
   std::unordered_set<ObjectID> object_ids;
 
+  /// File descriptors that are used by this client.
+  std::unordered_set<int> used_fds;
+
   /// The file descriptor used to push notifications to client. This is only valid
   /// if client subscribes to plasma store. -1 indicates invalid.
   int notification_fd;
@@ -64,8 +75,7 @@ class PlasmaStore {
   using NotificationMap = std::unordered_map<int, NotificationQueue>;
 
   // TODO: PascalCase PlasmaStore methods.
-  PlasmaStore(EventLoop* loop, int64_t system_memory, std::string directory,
-              bool hugetlbfs_enabled);
+  PlasmaStore(EventLoop* loop, std::string directory, bool hugetlbfs_enabled);
 
   ~PlasmaStore();
 
@@ -173,7 +183,7 @@ class PlasmaStore {
 
   NotificationMap::iterator SendNotifications(NotificationMap::iterator it);
 
-  Status ProcessMessage(Client* client);
+  arrow::Status ProcessMessage(Client* client);
 
  private:
   void PushNotification(ObjectInfoT* object_notification);
@@ -223,8 +233,8 @@ class PlasmaStore {
   std::unordered_map<int, std::unique_ptr<Client>> connected_clients_;
 
   std::unordered_set<ObjectID> deletion_cache_;
-#ifdef PLASMA_GPU
-  arrow::gpu::CudaDeviceManager* manager_;
+#ifdef PLASMA_CUDA
+  arrow::cuda::CudaDeviceManager* manager_;
 #endif
 };
 
diff --git a/cpp/src/plasma/test/client_tests.cc b/cpp/src/plasma/test/client_tests.cc
index 1ad60396af9ac..1678e27f90f58 100644
--- a/cpp/src/plasma/test/client_tests.cc
+++ b/cpp/src/plasma/test/client_tests.cc
@@ -60,7 +60,7 @@ class TestPlasmaStore : public ::testing::Test {
     std::string plasma_directory =
         test_executable.substr(0, test_executable.find_last_of("/"));
     std::string plasma_command = plasma_directory +
-                                 "/plasma_store_server -m 1000000000 -s " +
+                                 "/plasma_store_server -m 10000000 -s " +
                                  store_socket_name_ + " 1> /dev/null 2> /dev/null &";
     system(plasma_command.c_str());
     ARROW_CHECK_OK(client_.Connect(store_socket_name_, ""));
@@ -82,7 +82,7 @@ class TestPlasmaStore : public ::testing::Test {
 
   void CreateObject(PlasmaClient& client, const ObjectID& object_id,
                     const std::vector<uint8_t>& metadata,
-                    const std::vector<uint8_t>& data) {
+                    const std::vector<uint8_t>& data, bool release = true) {
     std::shared_ptr<Buffer> data_buffer;
     ARROW_CHECK_OK(client.Create(object_id, data.size(), &metadata[0], metadata.size(),
                                  &data_buffer));
@@ -90,7 +90,9 @@ class TestPlasmaStore : public ::testing::Test {
       data_buffer->mutable_data()[i] = data[i];
     }
     ARROW_CHECK_OK(client.Seal(object_id));
-    ARROW_CHECK_OK(client.Release(object_id));
+    if (release) {
+      ARROW_CHECK_OK(client.Release(object_id));
+    }
   }
 
   const std::string& GetStoreSocketName() const { return store_socket_name_; }
@@ -155,11 +157,12 @@ TEST_F(TestPlasmaStore, SealErrorsTest) {
 
   // Create object.
   std::vector<uint8_t> data(100, 0);
-  CreateObject(client_, object_id, {42}, data);
+  CreateObject(client_, object_id, {42}, data, false);
 
   // Trying to seal it again.
   result = client_.Seal(object_id);
   ASSERT_TRUE(result.IsPlasmaObjectAlreadySealed());
+  ARROW_CHECK_OK(client_.Release(object_id));
 }
 
 TEST_F(TestPlasmaStore, DeleteTest) {
@@ -184,7 +187,6 @@ TEST_F(TestPlasmaStore, DeleteTest) {
   ARROW_CHECK_OK(client_.Contains(object_id, &has_object));
   ASSERT_TRUE(has_object);
 
-  // Avoid race condition of Plasma Manager waiting for notification.
   ARROW_CHECK_OK(client_.Release(object_id));
   // object_id is marked as to-be-deleted, when it is not in use, it will be deleted.
   ARROW_CHECK_OK(client_.Contains(object_id, &has_object));
@@ -228,13 +230,7 @@ TEST_F(TestPlasmaStore, DeleteObjectsTest) {
   // client2_ won't send the release request immediately because the trigger
   // condition is not reached. The release is only added to release cache.
   object_buffers.clear();
-  // The reference count went to zero, but the objects are still in the release
-  // cache.
-  ARROW_CHECK_OK(client_.Contains(object_id1, &has_object));
-  ASSERT_TRUE(has_object);
-  ARROW_CHECK_OK(client_.Contains(object_id2, &has_object));
-  ASSERT_TRUE(has_object);
-  // The Delete call will flush release cache and send the Delete request.
+  // Delete the objects.
   result = client2_.Delete(std::vector<ObjectID>{object_id1, object_id2});
   ARROW_CHECK_OK(client_.Contains(object_id1, &has_object));
   ASSERT_FALSE(has_object);
@@ -254,7 +250,6 @@ TEST_F(TestPlasmaStore, ContainsTest) {
   // First create object.
   std::vector<uint8_t> data(100, 0);
   CreateObject(client_, object_id, {42}, data);
-  // Avoid race condition of Plasma Manager waiting for notification.
   std::vector<ObjectBuffer> object_buffers;
   ARROW_CHECK_OK(client_.Get({object_id}, -1, &object_buffers));
   ARROW_CHECK_OK(client_.Contains(object_id, &has_object));
@@ -277,7 +272,6 @@ TEST_F(TestPlasmaStore, GetTest) {
   // First create object.
   std::vector<uint8_t> data = {3, 5, 6, 7, 9};
   CreateObject(client_, object_id, {42}, data);
-  ARROW_CHECK_OK(client_.FlushReleaseHistory());
   EXPECT_FALSE(client_.IsInUse(object_id));
 
   object_buffers.clear();
@@ -291,11 +285,9 @@ TEST_F(TestPlasmaStore, GetTest) {
     auto metadata = object_buffers[0].metadata;
     object_buffers.clear();
     ::arrow::AssertBufferEqual(*metadata, std::string{42});
-    ARROW_CHECK_OK(client_.FlushReleaseHistory());
     EXPECT_TRUE(client_.IsInUse(object_id));
   }
   // Object is automatically released
-  ARROW_CHECK_OK(client_.FlushReleaseHistory());
   EXPECT_FALSE(client_.IsInUse(object_id));
 }
 
@@ -314,17 +306,14 @@ TEST_F(TestPlasmaStore, LegacyGetTest) {
     // First create object.
     std::vector<uint8_t> data = {3, 5, 6, 7, 9};
     CreateObject(client_, object_id, {42}, data);
-    ARROW_CHECK_OK(client_.FlushReleaseHistory());
     EXPECT_FALSE(client_.IsInUse(object_id));
 
     ARROW_CHECK_OK(client_.Get(&object_id, 1, -1, &object_buffer));
     AssertObjectBufferEqual(object_buffer, {42}, {3, 5, 6, 7, 9});
   }
   // Object needs releasing manually
-  ARROW_CHECK_OK(client_.FlushReleaseHistory());
   EXPECT_TRUE(client_.IsInUse(object_id));
   ARROW_CHECK_OK(client_.Release(object_id));
-  ARROW_CHECK_OK(client_.FlushReleaseHistory());
   EXPECT_FALSE(client_.IsInUse(object_id));
 }
 
@@ -377,11 +366,9 @@ TEST_F(TestPlasmaStore, AbortTest) {
   ASSERT_TRUE(status.IsInvalid());
   // Release, then abort.
   ARROW_CHECK_OK(client_.Release(object_id));
-  ARROW_CHECK_OK(client_.FlushReleaseHistory());
   EXPECT_TRUE(client_.IsInUse(object_id));
 
   ARROW_CHECK_OK(client_.Abort(object_id));
-  ARROW_CHECK_OK(client_.FlushReleaseHistory());
   EXPECT_FALSE(client_.IsInUse(object_id));
 
   // Test for object non-existence after the abort.
@@ -394,7 +381,6 @@ TEST_F(TestPlasmaStore, AbortTest) {
   // Test that we can get the object.
   ARROW_CHECK_OK(client_.Get({object_id}, -1, &object_buffers));
   AssertObjectBufferEqual(object_buffers[0], {42, 43}, {1, 2, 3, 4, 5});
-  ARROW_CHECK_OK(client_.Release(object_id));
 }
 
 TEST_F(TestPlasmaStore, MultipleClientTest) {
@@ -487,10 +473,10 @@ TEST_F(TestPlasmaStore, ManyObjectTest) {
   }
 }
 
-#ifdef PLASMA_GPU
-using arrow::gpu::CudaBuffer;
-using arrow::gpu::CudaBufferReader;
-using arrow::gpu::CudaBufferWriter;
+#ifdef PLASMA_CUDA
+using arrow::cuda::CudaBuffer;
+using arrow::cuda::CudaBufferReader;
+using arrow::cuda::CudaBufferWriter;
 
 namespace {
 
@@ -590,7 +576,7 @@ TEST_F(TestPlasmaStore, MultipleClientGPUTest) {
   AssertCudaRead(object_buffers[0].metadata, {5});
 }
 
-#endif  // PLASMA_GPU
+#endif  // PLASMA_CUDA
 
 }  // namespace plasma
 
diff --git a/cpp/src/plasma/test/serialization_tests.cc b/cpp/src/plasma/test/serialization_tests.cc
index 085ae97db980f..4fb3f9a5ed376 100644
--- a/cpp/src/plasma/test/serialization_tests.cc
+++ b/cpp/src/plasma/test/serialization_tests.cc
@@ -64,8 +64,7 @@ std::vector<uint8_t> read_message_from_file(int fd, MessageType message_type) {
 PlasmaObject random_plasma_object(void) {
   unsigned int seed = static_cast<unsigned int>(time(NULL));
   int random = rand_r(&seed);
-  PlasmaObject object;
-  memset(&object, 0, sizeof(object));
+  PlasmaObject object = {};
   object.store_fd = random + 7;
   object.data_offset = random + 1;
   object.metadata_offset = random + 2;
@@ -106,8 +105,7 @@ TEST(PlasmaSerialization, CreateReply) {
   ARROW_CHECK_OK(SendCreateReply(fd, object_id1, &object1, PlasmaError::OK, mmap_size1));
   std::vector<uint8_t> data = read_message_from_file(fd, MessageType::PlasmaCreateReply);
   ObjectID object_id2;
-  PlasmaObject object2;
-  memset(&object2, 0, sizeof(object2));
+  PlasmaObject object2 = {};
   int store_fd;
   int64_t mmap_size2;
   ARROW_CHECK_OK(ReadCreateReply(data.data(), data.size(), &object_id2, &object2,
@@ -254,44 +252,6 @@ TEST(PlasmaSerialization, DeleteReply) {
   close(fd);
 }
 
-TEST(PlasmaSerialization, StatusRequest) {
-  int fd = create_temp_file();
-  constexpr int64_t num_objects = 2;
-  ObjectID object_ids[num_objects];
-  object_ids[0] = random_object_id();
-  object_ids[1] = random_object_id();
-  ARROW_CHECK_OK(SendStatusRequest(fd, object_ids, num_objects));
-  std::vector<uint8_t> data =
-      read_message_from_file(fd, MessageType::PlasmaStatusRequest);
-  ObjectID object_ids_read[num_objects];
-  ARROW_CHECK_OK(
-      ReadStatusRequest(data.data(), data.size(), object_ids_read, num_objects));
-  ASSERT_EQ(object_ids[0], object_ids_read[0]);
-  ASSERT_EQ(object_ids[1], object_ids_read[1]);
-  close(fd);
-}
-
-TEST(PlasmaSerialization, StatusReply) {
-  int fd = create_temp_file();
-  ObjectID object_ids[2];
-  object_ids[0] = random_object_id();
-  object_ids[1] = random_object_id();
-  int object_statuses[2] = {42, 43};
-  ARROW_CHECK_OK(SendStatusReply(fd, object_ids, object_statuses, 2));
-  std::vector<uint8_t> data = read_message_from_file(fd, MessageType::PlasmaStatusReply);
-  int64_t num_objects = ReadStatusReply_num_objects(data.data(), data.size());
-
-  std::vector<ObjectID> object_ids_read(num_objects);
-  std::vector<int> object_statuses_read(num_objects);
-  ARROW_CHECK_OK(ReadStatusReply(data.data(), data.size(), object_ids_read.data(),
-                                 object_statuses_read.data(), num_objects));
-  ASSERT_EQ(object_ids[0], object_ids_read[0]);
-  ASSERT_EQ(object_ids[1], object_ids_read[1]);
-  ASSERT_EQ(object_statuses[0], object_statuses_read[0]);
-  ASSERT_EQ(object_statuses[1], object_statuses_read[1]);
-  close(fd);
-}
-
 TEST(PlasmaSerialization, EvictRequest) {
   int fd = create_temp_file();
   int64_t num_bytes = 111;
@@ -314,84 +274,6 @@ TEST(PlasmaSerialization, EvictReply) {
   close(fd);
 }
 
-TEST(PlasmaSerialization, FetchRequest) {
-  int fd = create_temp_file();
-  ObjectID object_ids[2];
-  object_ids[0] = random_object_id();
-  object_ids[1] = random_object_id();
-  ARROW_CHECK_OK(SendFetchRequest(fd, object_ids, 2));
-  std::vector<uint8_t> data = read_message_from_file(fd, MessageType::PlasmaFetchRequest);
-  std::vector<ObjectID> object_ids_read;
-  ARROW_CHECK_OK(ReadFetchRequest(data.data(), data.size(), object_ids_read));
-  ASSERT_EQ(object_ids[0], object_ids_read[0]);
-  ASSERT_EQ(object_ids[1], object_ids_read[1]);
-  close(fd);
-}
-
-TEST(PlasmaSerialization, WaitRequest) {
-  int fd = create_temp_file();
-  const int num_objects_in = 2;
-  ObjectRequest object_requests_in[num_objects_in] = {
-      ObjectRequest({random_object_id(), ObjectRequestType::PLASMA_QUERY_ANYWHERE,
-                     ObjectLocation::Local}),
-      ObjectRequest({random_object_id(), ObjectRequestType::PLASMA_QUERY_LOCAL,
-                     ObjectLocation::Local})};
-  const int num_ready_objects_in = 1;
-  int64_t timeout_ms = 1000;
-
-  ARROW_CHECK_OK(SendWaitRequest(fd, &object_requests_in[0], num_objects_in,
-                                 num_ready_objects_in, timeout_ms));
-  /* Read message back. */
-  std::vector<uint8_t> data = read_message_from_file(fd, MessageType::PlasmaWaitRequest);
-  int num_ready_objects_out;
-  int64_t timeout_ms_read;
-  ObjectRequestMap object_requests_out;
-  ARROW_CHECK_OK(ReadWaitRequest(data.data(), data.size(), object_requests_out,
-                                 &timeout_ms_read, &num_ready_objects_out));
-  ASSERT_EQ(num_objects_in, object_requests_out.size());
-  ASSERT_EQ(num_ready_objects_out, num_ready_objects_in);
-  for (int i = 0; i < num_objects_in; i++) {
-    const ObjectID& object_id = object_requests_in[i].object_id;
-    ASSERT_EQ(1, object_requests_out.count(object_id));
-    const auto& entry = object_requests_out.find(object_id);
-    ASSERT_TRUE(entry != object_requests_out.end());
-    ASSERT_EQ(entry->second.object_id, object_requests_in[i].object_id);
-    ASSERT_EQ(entry->second.type, object_requests_in[i].type);
-  }
-  close(fd);
-}
-
-TEST(PlasmaSerialization, WaitReply) {
-  int fd = create_temp_file();
-  const int num_objects_in = 2;
-  /* Create a map with two ObjectRequests in it. */
-  ObjectRequestMap objects_in(num_objects_in);
-  ObjectID id1 = random_object_id();
-  objects_in[id1] =
-      ObjectRequest({id1, ObjectRequestType::PLASMA_QUERY_LOCAL, ObjectLocation::Local});
-  ObjectID id2 = random_object_id();
-  objects_in[id2] = ObjectRequest(
-      {id2, ObjectRequestType::PLASMA_QUERY_LOCAL, ObjectLocation::Nonexistent});
-
-  ARROW_CHECK_OK(SendWaitReply(fd, objects_in, num_objects_in));
-  /* Read message back. */
-  std::vector<uint8_t> data = read_message_from_file(fd, MessageType::PlasmaWaitReply);
-  ObjectRequest objects_out[2];
-  int num_objects_out;
-  ARROW_CHECK_OK(
-      ReadWaitReply(data.data(), data.size(), &objects_out[0], &num_objects_out));
-  ASSERT_EQ(num_objects_in, num_objects_out);
-  for (int i = 0; i < num_objects_out; i++) {
-    /* Each object request must appear exactly once. */
-    ASSERT_EQ(objects_in.count(objects_out[i].object_id), 1);
-    const auto& entry = objects_in.find(objects_out[i].object_id);
-    ASSERT_TRUE(entry != objects_in.end());
-    ASSERT_EQ(entry->second.object_id, objects_out[i].object_id);
-    ASSERT_EQ(entry->second.location, objects_out[i].location);
-  }
-  close(fd);
-}
-
 TEST(PlasmaSerialization, DataRequest) {
   int fd = create_temp_file();
   ObjectID object_id1 = random_object_id();
diff --git a/cpp/submodules/parquet-testing b/cpp/submodules/parquet-testing
index 46ae2605c2de3..bb7b6abbb3fbe 160000
--- a/cpp/submodules/parquet-testing
+++ b/cpp/submodules/parquet-testing
@@ -1 +1 @@
-Subproject commit 46ae2605c2de306f5740587107dcf333a527f2d1
+Subproject commit bb7b6abbb3fbeff845646364a4286142127be04c
diff --git a/cpp/thirdparty/README.md b/cpp/thirdparty/README.md
index bd1cb28d81818..9be3361e5d54f 100644
--- a/cpp/thirdparty/README.md
+++ b/cpp/thirdparty/README.md
@@ -29,17 +29,24 @@ offline builds.
 To set up your own specific build toolchain, here are the relevant environment
 variables
 
+* brotli: `BROTLI_HOME`, can be disabled with `-DARROW_WITH_BROTLI=off`
 * Boost: `BOOST_ROOT`
+* double-conversion: `DOUBLE_CONVERSION_HOME`
 * Googletest: `GTEST_HOME` (only required to build the unit tests)
 * gflags: `GFLAGS_HOME` (only required to build the unit tests)
+* glog: `GLOG_HOME` (only required if `ARROW_USE_GLOG=ON`)
 * Google Benchmark: `GBENCHMARK_HOME` (only required if building benchmarks)
 * Flatbuffers: `FLATBUFFERS_HOME` (only required for -DARROW_IPC=on, which is
   the default)
 * Hadoop: `HADOOP_HOME` (only required for the HDFS I/O extensions)
 * jemalloc: `JEMALLOC_HOME`
-* brotli: `BROTLI_HOME`, can be disabled with `-DARROW_WITH_BROTLI=off`
 * lz4: `LZ4_HOME`, can be disabled with `-DARROW_WITH_LZ4=off`
+* Apache ORC: `ORC_HOME`
+* protobuf: `PROTOBUF_HOME`
+* rapidjson: `RAPIDJSON_HOME`
+* re2: `RE2_HOME` (only required to build Gandiva currently)
 * snappy: `SNAPPY_HOME`, can be disabled with `-DARROW_WITH_SNAPPY=off`
+* thrift: `THRIFT_HOME`
 * zlib: `ZLIB_HOME`, can be disabled with `-DARROW_WITH_ZLIB=off`
 * zstd: `ZSTD_HOME`, can be disabled with `-DARROW_WITH_ZSTD=off`
 
@@ -69,24 +76,26 @@ script:
 
 ```shell
 # Download tarballs into `$HOME/arrow-thirdparty-deps`
-$ ./thirdparty/download_dependencies $HOME/arrow-thirdparty-deps
-# some output omitted
-
+$ ./thirdparty/download_dependencies $HOME/arrow-thirdparty
 # Environment variables for offline Arrow build
-export ARROW_BOOST_URL=$HOME/arrow-thirdparty-deps/boost.tar.gz
-export ARROW_GTEST_URL=$HOME/arrow-thirdparty-deps/gtest.tar.gz
-export ARROW_GFLAGS_URL=$HOME/arrow-thirdparty-deps/gflags.tar.gz
-export ARROW_GBENCHMARK_URL=$HOME/arrow-thirdparty-deps/gbenchmark.tar.gz
-export ARROW_FLATBUFFERS_URL=$HOME/arrow-thirdparty-deps/flatbuffers.tar.gz
-export ARROW_RAPIDJSON_URL=$HOME/arrow-thirdparty-deps/rapidjson.tar.gz
-export ARROW_SNAPPY_URL=$HOME/arrow-thirdparty-deps/snappy.tar.gz
-export ARROW_BROTLI_URL=$HOME/arrow-thirdparty-deps/brotli.tar.gz
-export ARROW_LZ4_URL=$HOME/arrow-thirdparty-deps/lz4.tar.gz
-export ARROW_ZLIB_URL=$HOME/arrow-thirdparty-deps/zlib.tar.gz
-export ARROW_ZSTD_URL=$HOME/arrow-thirdparty-deps/zstd.tar.gz
-export ARROW_PROTOBUF_URL=$HOME/arrow-thirdparty-deps/protobuf.tar.gz
-export ARROW_GRPC_URL=$HOME/arrow-thirdparty-deps/grpc.tar.gz
-export ARROW_ORC_URL=$HOME/arrow-thirdparty-deps/orc.tar.gz
+export ARROW_BOOST_URL=$HOME/arrow-thirdparty/boost-1.67.0.tar.gz
+export ARROW_BROTLI_URL=$HOME/arrow-thirdparty/brotli-v0.6.0.tar.gz
+export ARROW_DOUBLE_CONVERSION_URL=$HOME/arrow-thirdparty/double-conversion-v3.1.1.tar.gz
+export ARROW_FLATBUFFERS_URL=$HOME/arrow-thirdparty/flatbuffers-02a7807dd8d26f5668ffbbec0360dc107bbfabd5.tar.gz
+export ARROW_GBENCHMARK_URL=$HOME/arrow-thirdparty/gbenchmark-v1.4.1.tar.gz
+export ARROW_GFLAGS_URL=$HOME/arrow-thirdparty/gflags-v2.2.0.tar.gz
+export ARROW_GLOG_URL=$HOME/arrow-thirdparty/glog-v0.3.5.tar.gz
+export ARROW_GRPC_URL=$HOME/arrow-thirdparty/grpc-v1.14.1.tar.gz
+export ARROW_GTEST_URL=$HOME/arrow-thirdparty/gtest-1.8.0.tar.gz
+export ARROW_LZ4_URL=$HOME/arrow-thirdparty/lz4-v1.7.5.tar.gz
+export ARROW_ORC_URL=$HOME/arrow-thirdparty/orc-1.5.4.tar.gz
+export ARROW_PROTOBUF_URL=$HOME/arrow-thirdparty/protobuf-v3.6.1.tar.gz
+export ARROW_RAPIDJSON_URL=$HOME/arrow-thirdparty/rapidjson-v1.1.0.tar.gz
+export ARROW_RE2_URL=$HOME/arrow-thirdparty/re2-2018-10-01.tar.gz
+export ARROW_SNAPPY_URL=$HOME/arrow-thirdparty/snappy-1.1.3.tar.gz
+export ARROW_THRIFT_URL=$HOME/arrow-thirdparty/thrift-0.11.0.tar.gz
+export ARROW_ZLIB_URL=$HOME/arrow-thirdparty/zlib-1.2.8.tar.gz
+export ARROW_ZSTD_URL=$HOME/arrow-thirdparty/zstd-v1.3.7.tar.gz
 ```
 
 This can be automated by using inline source/eval:
diff --git a/cpp/thirdparty/download_dependencies.sh b/cpp/thirdparty/download_dependencies.sh
index ea63a8a41fb4e..f782963dd1450 100755
--- a/cpp/thirdparty/download_dependencies.sh
+++ b/cpp/thirdparty/download_dependencies.sh
@@ -30,7 +30,7 @@ else
   DESTDIR=$1
 fi
 
-DESTDIR=$(realpath "${DESTDIR}")
+DESTDIR=$(readlink -f "${DESTDIR}")
 
 download_dependency() {
   local url=$1
@@ -38,7 +38,7 @@ download_dependency() {
 
   # --show-progress will not output to stdout, it is safe to pipe the result of
   # the script into eval.
-  wget --quiet --show-progress --continue --output-document="${out}" "${url}"
+  wget --quiet --continue --output-document="${out}" "${url}"
 }
 
 main() {
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 705f56c0e6130..e62a37b082407 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -25,16 +25,17 @@
 
 BOOST_VERSION=1.67.0
 BROTLI_VERSION=v0.6.0
+CARES_VERSION=1.15.0
 DOUBLE_CONVERSION_VERSION=v3.1.1
-FLATBUFFERS_VERSION=02a7807dd8d26f5668ffbbec0360dc107bbfabd5
+FLATBUFFERS_VERSION=v1.10.0
 GBENCHMARK_VERSION=v1.4.1
 GFLAGS_VERSION=v2.2.0
 GLOG_VERSION=v0.3.5
-GRPC_VERSION=v1.14.1
+GRPC_VERSION=v1.18.0
 GTEST_VERSION=1.8.0
 JEMALLOC_VERSION=17c897976c60b0e6e4f4a365c751027244dada7a
 LZ4_VERSION=v1.7.5
-ORC_VERSION=1.5.1
+ORC_VERSION=1.5.4
 PROTOBUF_VERSION=v3.6.1
 RAPIDJSON_VERSION=v1.1.0
 RE2_VERSION=2018-10-01
@@ -50,6 +51,7 @@ ZSTD_VERSION=v1.3.7
 DEPENDENCIES=(
   "ARROW_BOOST_URL boost-${BOOST_VERSION}.tar.gz https://dl.bintray.com/boostorg/release/${BOOST_VERSION}/source/boost_${BOOST_VERSION//./_}.tar.gz"
   "ARROW_BROTLI_URL brotli-${BROTLI_VERSION}.tar.gz https://github.com/google/brotli/archive/${BROTLI_VERSION}.tar.gz"
+  "ARROW_CARES_URL cares-${CARES_VERSION}.tar.gz https://c-ares.haxx.se/download/c-ares-$CARES_VERSION.tar.gz"
   "ARROW_DOUBLE_CONVERSION_URL double-conversion-${DOUBLE_CONVERSION_VERSION}.tar.gz https://github.com/google/double-conversion/archive/${DOUBLE_CONVERSION_VERSION}.tar.gz"
   "ARROW_FLATBUFFERS_URL flatbuffers-${FLATBUFFERS_VERSION}.tar.gz https://github.com/google/flatbuffers/archive/${FLATBUFFERS_VERSION}.tar.gz"
   "ARROW_GBENCHMARK_URL gbenchmark-${GBENCHMARK_VERSION}.tar.gz https://github.com/google/benchmark/archive/${GBENCHMARK_VERSION}.tar.gz"
@@ -61,6 +63,7 @@ DEPENDENCIES=(
   "ARROW_ORC_URL orc-${ORC_VERSION}.tar.gz https://github.com/apache/orc/archive/rel/release-${ORC_VERSION}.tar.gz"
   "ARROW_PROTOBUF_URL protobuf-${PROTOBUF_VERSION}.tar.gz https://github.com/google/protobuf/releases/download/${PROTOBUF_VERSION}/protobuf-all-${PROTOBUF_VERSION:1}.tar.gz"
   "ARROW_RAPIDJSON_URL rapidjson-${RAPIDJSON_VERSION}.tar.gz https://github.com/miloyip/rapidjson/archive/${RAPIDJSON_VERSION}.tar.gz"
+  "ARROW_RE2_URL re2-${RE2_VERSION}.tar.gz https://github.com/google/re2/archive/${RE2_VERSION}.tar.gz"
   "ARROW_SNAPPY_URL snappy-${SNAPPY_VERSION}.tar.gz https://github.com/google/snappy/releases/download/${SNAPPY_VERSION}/snappy-${SNAPPY_VERSION}.tar.gz"
   "ARROW_THRIFT_URL thrift-${THRIFT_VERSION}.tar.gz http://archive.apache.org/dist/thrift/${THRIFT_VERSION}/thrift-${THRIFT_VERSION}.tar.gz"
   "ARROW_ZLIB_URL zlib-${ZLIB_VERSION}.tar.gz http://zlib.net/fossils/zlib-${ZLIB_VERSION}.tar.gz"
diff --git a/cpp/tools/parquet/CMakeLists.txt b/cpp/tools/parquet/CMakeLists.txt
index 47aea28ff6828..bbbec29c13009 100644
--- a/cpp/tools/parquet/CMakeLists.txt
+++ b/cpp/tools/parquet/CMakeLists.txt
@@ -26,7 +26,9 @@ if (PARQUET_BUILD_EXECUTABLES)
     target_link_libraries(${TOOL} parquet_static)
     # Avoid unsetting RPATH when installing
     set_target_properties(${TOOL} PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE)
-    install(TARGETS ${TOOL} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+    install(TARGETS ${TOOL}
+      ${INSTALL_IS_OPTIONAL}
+      RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
   endforeach(TOOL)
 
   add_dependencies(parquet ${PARQUET_TOOLS})
diff --git a/cpp/tools/parquet/parquet-reader.cc b/cpp/tools/parquet/parquet-reader.cc
index 34bdfc103dcc0..a5b7db1330a97 100644
--- a/cpp/tools/parquet/parquet-reader.cc
+++ b/cpp/tools/parquet/parquet-reader.cc
@@ -23,7 +23,7 @@
 
 int main(int argc, char** argv) {
   if (argc > 5 || argc < 2) {
-    std::cerr << "Usage: parquet_reader [--only-metadata] [--no-memory-map] [--json]"
+    std::cerr << "Usage: parquet-reader [--only-metadata] [--no-memory-map] [--json]"
                  "[--print-key-value-metadata] [--columns=...] <file>"
               << std::endl;
     return -1;
diff --git a/cpp/valgrind.supp b/cpp/valgrind.supp
index 8e707e39e7cd8..8d2d5da904bab 100644
--- a/cpp/valgrind.supp
+++ b/cpp/valgrind.supp
@@ -21,4 +21,33 @@
     Memcheck:Cond
     fun:*CastFunctor*BooleanType*
 }
-
+{
+    <llvm>:Conditional jump or move depends on uninitialised value(s)
+    Memcheck:Cond
+    ...
+    fun:*llvm*PassManager*
+}
+{
+    <re2>:Conditional jump or move depends on uninitialised value(s)
+    Memcheck:Cond
+    ...
+    fun:*re2*RE2*
+}
+{
+    <re2>:Use of uninitialised value of size 8
+    Memcheck:Value8
+    ...
+    fun:*re2*RE2*
+}
+{
+    <re2>:Conditional jump or move depends on uninitialised value(s)
+    Memcheck:Cond
+    ...
+    fun:*re2*Prog*
+}
+{
+    <re2>:Use of uninitialised value of size 8
+    Memcheck:Value8
+    ...
+    fun:*re2*Prog*
+}
diff --git a/csharp/build/Common.props b/csharp/build/Common.props
index 9e7901d8a109f..cebd07cf46157 100644
--- a/csharp/build/Common.props
+++ b/csharp/build/Common.props
@@ -2,7 +2,4 @@
     <PropertyGroup>
         <BaseOutputPath>../../artifacts/$(AssemblyName)</BaseOutputPath>
     </PropertyGroup>
-    <Target Name="Clean">  
-        <RemoveDir Directories="../../artifacts" />  
-    </Target>
 </Project>
\ No newline at end of file
diff --git a/csharp/src/Apache.Arrow/Arrays/Array.cs b/csharp/src/Apache.Arrow/Arrays/Array.cs
index a9609f20f1210..e795ad9843717 100644
--- a/csharp/src/Apache.Arrow/Arrays/Array.cs
+++ b/csharp/src/Apache.Arrow/Arrays/Array.cs
@@ -33,8 +33,6 @@ protected Array(ArrayData data)
 
         public int NullCount => Data.NullCount;
 
-        public Bitmap NullBitmap => Data.NullBitmap;
-
         public ArrowBuffer NullBitmapBuffer => Data.Buffers[0];
 
         public virtual void Accept(IArrowArrayVisitor visitor)
@@ -43,7 +41,7 @@ public virtual void Accept(IArrowArrayVisitor visitor)
         }
 
         public bool IsValid(int index) =>
-            NullBitmapBuffer == null || NullBitmap.IsSet(index);
+            NullBitmapBuffer.IsEmpty || BitUtility.GetBit(NullBitmapBuffer.Span, index);
 
         public bool IsNull(int index) => !IsValid(index);
 
@@ -51,13 +49,14 @@ public bool IsValid(int index) =>
         internal static void Accept<T>(T array, IArrowArrayVisitor visitor)
             where T : class, IArrowArray
         {
-            if (visitor is IArrowArrayVisitor<T> v)
-            {
-                v.Visit(array);
-            }
-            else
+            switch (visitor)
             {
-                visitor.Visit(array);
+                case IArrowArrayVisitor<T> typedVisitor:
+                    typedVisitor.Visit(array);
+                    break;
+                default:
+                    visitor.Visit(array);
+                    break;
             }
         }
     }
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayData.cs b/csharp/src/Apache.Arrow/Arrays/ArrayData.cs
index a8d745c66c150..2074f125dc616 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrayData.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrayData.cs
@@ -19,7 +19,7 @@
 
 namespace Apache.Arrow
 {
-    public class ArrayData
+    public sealed class ArrayData
     {
         public readonly IArrowType DataType;
         public readonly int Length;
@@ -28,9 +28,6 @@ public class ArrayData
         public readonly ArrowBuffer[] Buffers;
         public readonly ArrayData[] Children;
 
-        public ArrowBuffer NullBitmapBuffer => Buffers[0];
-        public Bitmap NullBitmap => NullBitmapBuffer;
-
         public ArrayData(
             IArrowType dataType,
             int length, int nullCount = 0, int offset = 0,
diff --git a/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs b/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
index cbe64dfc1bba2..12ef5ee7a7291 100644
--- a/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
@@ -50,28 +50,31 @@ public BinaryArray(IArrowType dataType, int length,
 
         public ArrowBuffer ValueBuffer => Data.Buffers[2];
 
+        public ReadOnlySpan<int> ValueOffsets => ValueOffsetsBuffer.Span.CastTo<int>().Slice(0, Length + 1);
+
+        public ReadOnlySpan<byte> Values => ValueBuffer.Span.CastTo<byte>();
+
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public int GetValueOffset(int index)
         {
-            var offsets = ValueOffsetsBuffer.GetSpan<int>();
-            return offsets[Offset + index];
+            return ValueOffsets[Offset + index];
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public int GetValueLength(int index)
         {
-            var offsets = ValueOffsetsBuffer.GetSpan<int>();
+            var offsets = ValueOffsets;
             var offset = Offset + index;
+
             return offsets[offset + 1] - offsets[offset];
         }
 
-        public ReadOnlySpan<byte> GetValue(int index)
+        public ReadOnlySpan<byte> GetBytes(int index)
         {
             var offset = GetValueOffset(index);
             var length = GetValueLength(index);
-            var values = ValueBuffer.GetSpan<byte>();
-
-            return values.Slice(offset, length);
+            
+            return ValueBuffer.Span.Slice(offset, length);
         }
 
     }
diff --git a/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs b/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs
index f4197122e37c0..ddee188d98dc7 100644
--- a/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs
@@ -39,8 +39,7 @@ public BooleanArray(ArrayData data)
             if (IsNull(index))
                 return null;
 
-            var span = GetSpan();
-            return BitUtility.GetBit(span, index);
+            return BitUtility.GetBit(Values, index);
         }
     }
 }
diff --git a/csharp/src/Apache.Arrow/Arrays/ListArray.cs b/csharp/src/Apache.Arrow/Arrays/ListArray.cs
index e3872bceb0abb..3540f5afbc579 100644
--- a/csharp/src/Apache.Arrow/Arrays/ListArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ListArray.cs
@@ -13,6 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+using System;
 using Apache.Arrow.Types;
 
 namespace Apache.Arrow
@@ -23,6 +24,8 @@ public class ListArray : Array
 
         public ArrowBuffer ValueOffsetsBuffer => Data.Buffers[1];
 
+        public ReadOnlySpan<int> ValueOffsets => ValueOffsetsBuffer.Span.CastTo<int>().Slice(0, Length + 1);
+
         public ListArray(IArrowType dataType, int length,
             ArrowBuffer valueOffsetsBuffer, IArrowArray values,
             ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0)
@@ -43,14 +46,13 @@ public ListArray(ArrayData data)
 
         public int GetValueOffset(int index)
         {
-            var span = ValueOffsetsBuffer.GetSpan<int>(Offset);
-            return span[index];
+            return ValueOffsets[index];
         }
 
         public int GetValueLength(int index)
         {
-            var span = ValueOffsetsBuffer.GetSpan<int>(Offset);
-            return span[index + 1] - span[index];
+            var offsets = ValueOffsets;
+            return offsets[index + 1] - offsets[index];
         }
     }
 }
diff --git a/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs b/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs
index 6dcb10333d2fd..617bddc50dd43 100644
--- a/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs
@@ -16,8 +16,6 @@
 using System;
 using System.Collections.Generic;
 using System.Runtime.CompilerServices;
-using Apache.Arrow.Memory;
-using Apache.Arrow.Types;
 
 namespace Apache.Arrow
 {
@@ -33,18 +31,17 @@ protected PrimitiveArray(ArrayData data)
 
     public ArrowBuffer ValueBuffer => Data.Buffers[1];
 
-    public Span<T> GetSpan() => ValueBuffer.GetSpan<T>().Slice(0, Length);
+    public ReadOnlySpan<T> Values => ValueBuffer.Span.CastTo<T>().Slice(0, Length);
 
     [MethodImpl(MethodImplOptions.AggressiveInlining)]
     public T? GetValue(int index)
     {
-        var span = GetSpan();
-        return IsValid(index) ? span[index] : (T?) null;
+        return IsValid(index) ? Values[index] : (T?) null;
     }
 
     public IList<T?> ToList(bool includeNulls = false)
     {
-        var span = GetSpan();
+        var span = Values;
         var list = new List<T?>(span.Length);
 
         for (var i = 0; i < span.Length; i++)
diff --git a/csharp/src/Apache.Arrow/Arrays/StringArray.cs b/csharp/src/Apache.Arrow/Arrays/StringArray.cs
index 3c8f8c0599527..9ea9522b2bdb9 100644
--- a/csharp/src/Apache.Arrow/Arrays/StringArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/StringArray.cs
@@ -39,12 +39,12 @@ public string GetString(int index, Encoding encoding = default)
         {
             encoding = encoding ?? Encoding.UTF8;
 
-            var value = GetValue(index);
+            var bytes = GetBytes(index);
 
             unsafe
             {
-                fixed (byte* data = &MemoryMarshal.GetReference(value))
-                    return encoding.GetString(data, value.Length);
+                fixed (byte* data = &MemoryMarshal.GetReference(bytes))
+                    return encoding.GetString(data, bytes.Length);
             }
         }
     }
diff --git a/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs b/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs
index 174f6500a1058..f9fd0aec275d2 100644
--- a/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs
@@ -38,14 +38,12 @@ public TimestampArray(ArrayData data)
 
         public DateTimeOffset? GetTimestamp(int index)
         {
-            var span = GetSpan();
-
             if (IsNull(index))
             {
                 return null;
             }
 
-            var value = span[index];
+            var value = Values[index];
             var type = Data.DataType as TimestampType;
 
             switch (type.Unit)
@@ -60,7 +58,7 @@ public TimestampArray(ArrayData data)
                     return DateTimeOffset.FromUnixTimeSeconds(value);
                 default:
                     throw new InvalidDataException(
-                        string.Format("Unsupported timestamp unit <{0}>", type.Unit));
+                        $"Unsupported timestamp unit <{type.Unit}>");
             }
         }
     }
diff --git a/csharp/src/Apache.Arrow/Arrays/UnionArray.cs b/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
index 7ba7f9f5b8e27..8bccea2b59e31 100644
--- a/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
@@ -28,9 +28,9 @@ public class UnionArray: Array
 
         public ArrowBuffer ValueOffsetBuffer => Data.Buffers[2];
 
-        public ReadOnlySpan<byte> TypeIds => TypeBuffer.GetSpan<byte>();
+        public ReadOnlySpan<byte> TypeIds => TypeBuffer.Span;
 
-        public ReadOnlySpan<int> ValueOffsets => ValueOffsetBuffer.GetSpan<int>();
+        public ReadOnlySpan<int> ValueOffsets => ValueOffsetBuffer.Span.CastTo<int>().Slice(0, Length + 1);
 
         public UnionArray(ArrayData data) 
             : base(data)
diff --git a/csharp/src/Apache.Arrow/ArrowBuffer.Builder.cs b/csharp/src/Apache.Arrow/ArrowBuffer.Builder.cs
index a85fa2dc9d949..7ab26fac95aa3 100644
--- a/csharp/src/Apache.Arrow/ArrowBuffer.Builder.cs
+++ b/csharp/src/Apache.Arrow/ArrowBuffer.Builder.cs
@@ -17,70 +17,147 @@
 using System;
 using System.Collections.Generic;
 using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
 
 namespace Apache.Arrow
 {
-    public partial class ArrowBuffer
+    public partial struct ArrowBuffer
     {
-        /// <summary>
-        /// Builds an Arrow buffer from primitive values.
-        /// </summary>
-        /// <typeparam name="T">Primitive type</typeparam>
         public class Builder<T>
             where T : struct
         {
             private readonly int _size;
-            private readonly MemoryPool _pool;
-            private Memory<byte> _memory;
-            private int _offset;
+            private byte[] _buffer;
 
-            public Builder(int initialCapacity = 8, MemoryPool pool = default)
-            {
-                if (initialCapacity <= 0) initialCapacity = 1;
-                if (pool == null) pool = DefaultMemoryPool.Instance.Value;
+            public int Capacity => _buffer.Length / _size;
+            public int Length { get; private set; }
 
+            public Builder(int capacity = 8)
+            {
                 _size = Unsafe.SizeOf<T>();
-                _pool = pool;
-                _memory = _pool.Allocate(initialCapacity * _size);
+                _buffer = new byte[capacity * _size];
+
+                Length = 0;
+            }
+
+            public Builder<T> Append(ArrowBuffer buffer)
+            {
+                Append(buffer.Span.CastTo<T>());
+                return this;
             }
 
             public Builder<T> Append(T value)
             {
-                var span = GetSpan();
+                var span = EnsureCapacity(1);
+                span[Length++] = value;
+                return this;
+            }
+
+            public Builder<T> Append(ReadOnlySpan<T> source)
+            {
+                var span = EnsureCapacity(source.Length);
+                source.CopyTo(span.Slice(Length, source.Length));
+                Length += source.Length;
+                return this;
+            }
 
-                if (_offset + 1 >= span.Length)
+            public Builder<T> Append(Func<IEnumerable<T>> fn)
+            {
+                if (fn != null)
                 {
-                    // TODO: Consider a specifiable growth strategy
+                    AppendRange(fn());
+                }
 
-                    _memory = _pool.Reallocate(_memory, (_memory.Length * 3) / 2);
+                return this;
+            }
+
+            public Builder<T> AppendRange(IEnumerable<T> values)
+            {
+                if (values != null)
+                {
+                    foreach (var v in values)
+                    {
+                        Append(v);
+                    }
                 }
 
-                span[_offset++] = value;
                 return this;
             }
 
-            public Builder<T> Set(int index, T value)
+            public Builder<T> Reserve(int capacity)
             {
-                var span = GetSpan();
-                span[index] = value;
+                EnsureCapacity(capacity);
+                return this;
+            }
+
+            public Builder<T> Resize(int capacity)
+            {
+                if (capacity < 0)
+                {
+                    throw new ArgumentOutOfRangeException(nameof(capacity));
+                }
+
+                Reallocate(capacity);
+                Length = Math.Min(Length, capacity);
+
                 return this;
             }
 
             public Builder<T> Clear()
             {
-                var span = GetSpan();
-                span.Fill(default);
+                Span.Fill(default);
+                Length = 0;
                 return this;
             }
 
-            public ArrowBuffer Build()
+            public ArrowBuffer Build(MemoryPool pool = default)
+            {
+                var length = BitUtility.RoundUpToMultipleOf64(_buffer.Length);
+                var memoryPool = pool ?? MemoryPool.Default.Value;
+                var memory = memoryPool.Allocate(length);
+
+                Memory.CopyTo(memory);
+
+                return new ArrowBuffer(memory);
+            }
+
+            private Span<T> EnsureCapacity(int len)
             {
-                return new ArrowBuffer(_memory, _offset);
+                var targetCapacity = Length + len;
+
+                if (targetCapacity > Capacity)
+                {
+                    // TODO: specifiable growth strategy
+
+                    var capacity = Math.Max(
+                        targetCapacity * _size, _buffer.Length * 2);
+
+                    Reallocate(capacity);
+                }
+
+                return Span;
             }
 
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            private Span<T> GetSpan() => MemoryMarshal.Cast<byte, T>(_memory.Span);
+            private void Reallocate(int length)
+            {
+                if (length < 0)
+                {
+                    throw new ArgumentOutOfRangeException(nameof(length));
+                }
+
+                if (length != 0)
+                {
+                    System.Array.Resize(ref _buffer, length);
+                }
+            }
+
+            private Memory<byte> Memory => _buffer;
+
+            private Span<T> Span
+            {
+                [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                get => Memory.Span.CastTo<T>();
+            }
         }
+
     }
 }
diff --git a/csharp/src/Apache.Arrow/ArrowBuffer.cs b/csharp/src/Apache.Arrow/ArrowBuffer.cs
index ec2c3cbe8c123..8901ff93da55c 100644
--- a/csharp/src/Apache.Arrow/ArrowBuffer.cs
+++ b/csharp/src/Apache.Arrow/ArrowBuffer.cs
@@ -13,116 +13,43 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using Apache.Arrow.Memory;
 using System;
-using System.Buffers;
-using System.IO;
-using System.Runtime.InteropServices;
-using System.Threading;
-using System.Threading.Tasks;
+using System.Runtime.CompilerServices;
+using Apache.Arrow.Memory;
 
 namespace Apache.Arrow
 {
-    public partial class ArrowBuffer: IEquatable<ArrowBuffer>
+    public readonly partial struct ArrowBuffer: IEquatable<ArrowBuffer>
     {
-        public ArrowBuffer(Memory<byte> data, int size)
+        public static ArrowBuffer Empty => new ArrowBuffer(Memory<byte>.Empty);
+
+        private ArrowBuffer(Memory<byte> data)
         {
             Memory = data;
-            Size = size;
         }
 
-        /// <summary>
-        /// Allocates an Arrow buffer from a memory pool.
-        /// </summary>
-        /// <param name="size">Size of buffer (in bytes) to allocate.</param>
-        /// <param name="memoryPool">Memory pool to use for allocation. If null, a default memory pool is used.</param>
-        /// <returns></returns>
-        public static ArrowBuffer Allocate(int size, MemoryPool memoryPool = null)
-        {
-            if (memoryPool == null)
-                memoryPool = DefaultMemoryPool.Instance.Value;
+        public ReadOnlyMemory<byte> Memory { get; }
 
-            var buffer = memoryPool.Allocate(size);
+        public bool IsEmpty => Memory.IsEmpty;
 
-            return new ArrowBuffer(buffer, size);
-        }
+        public int Length => Memory.Length;
 
-        /// <summary>
-        /// Allocates an Arrow buffer the same length as the incoming data, then
-        /// copies the specified data to the arrow buffer.
-        /// </summary>
-        /// <param name="data">Data to copy into a new arrow buffer.</param>
-        /// <param name="memoryPool">Memory pool to use for allocation. If null, a default memory pool is used.</param>
-        /// <returns></returns>
-        public static ArrowBuffer FromMemory(Memory<byte> data, MemoryPool memoryPool = default)
+        public ReadOnlySpan<byte> Span
         {
-            var buffer = Allocate(data.Length, memoryPool);
-            data.CopyTo(buffer.Memory);
-            return buffer;
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get => Memory.Span;
         }
 
-        public async Task CopyToAsync(Stream stream, CancellationToken cancellationToken = default)
+        public ArrowBuffer Clone(MemoryPool pool = default)
         {
-            const float chunkSize = 8192f;
-
-            // TODO: Is there a better copy mechanism to use here that does not involve allocating buffers and targets .NET Standard 1.3?
-            // NOTE: Consider specialization for .NET Core 2.1
-
-            var length = Convert.ToInt32(chunkSize);
-            var buffer = ArrayPool<byte>.Shared.Rent(length);
-            var count = Convert.ToInt32(Math.Ceiling(Memory.Length / chunkSize));
-            var offset = 0;
-
-            try
-            {
-                for (var i = 0; i < count; i++)
-                {
-                    var n = Math.Min(length, Memory.Length);
-                    var slice = Memory.Slice(offset, n);
-
-                    slice.CopyTo(buffer);
-
-                    await stream.WriteAsync(buffer, 0, n, cancellationToken);
-
-                    offset += n;
-                }
-            }
-            finally
-            {
-                if (buffer != null)
-                {
-                    ArrayPool<byte>.Shared.Return(buffer);
-                }
-            }
+            return new Builder<byte>(Span.Length)
+                .Append(Span)
+                .Build(pool);
         }
 
-        public Memory<byte> Memory { get; }
-
-        public bool IsEmpty => Memory.IsEmpty;
-
-        public int Size { get; }
-
-        public int Capacity => Memory.Length;
-
-        public Span<T> GetSpan<T>(int offset)
-            where T : struct =>
-            MemoryMarshal.Cast<byte, T>(
-                Memory.Span.Slice(offset));
-
-        public Span<T> GetSpan<T>(int offset, int length)
-            where T : struct =>
-            MemoryMarshal.Cast<byte, T>(
-                Memory.Span.Slice(offset, length));
-
-        public Span<T> GetSpan<T>()
-            where T: struct =>
-            MemoryMarshal.Cast<byte, T>(Memory.Span);
-
         public bool Equals(ArrowBuffer other)
         {
-            var lhs = GetSpan<byte>();
-            var rhs = other.GetSpan<byte>();
-            return lhs.SequenceEqual(rhs);
+            return Span.SequenceEqual(other.Span);
         }
     }
 }
diff --git a/csharp/src/Apache.Arrow/BitUtility.cs b/csharp/src/Apache.Arrow/BitUtility.cs
index ea5a556162e53..3b4ee7a43d268 100644
--- a/csharp/src/Apache.Arrow/BitUtility.cs
+++ b/csharp/src/Apache.Arrow/BitUtility.cs
@@ -20,8 +20,7 @@ namespace Apache.Arrow
 {
     public static class BitUtility
     {
-        private static readonly byte[] PopcountTable = new byte[]
-        {
+        private static readonly byte[] PopcountTable = {
             0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
             1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
             1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
@@ -32,8 +31,7 @@ public static class BitUtility
             3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
         };
 
-        private static readonly byte[] BitMask = new byte[]
-        {
+        private static readonly byte[] BitMask = {
             1, 2, 4, 8, 16, 32, 64, 128
         };
 
diff --git a/csharp/src/Apache.Arrow/Bitmap.cs b/csharp/src/Apache.Arrow/Bitmap.cs
deleted file mode 100644
index 257438b323c7d..0000000000000
--- a/csharp/src/Apache.Arrow/Bitmap.cs
+++ /dev/null
@@ -1,75 +0,0 @@
-﻿// Licensed to the Apache Software Foundation (ASF) under one or more
-// contributor license agreements. See the NOTICE file distributed with
-// this work for additional information regarding copyright ownership.
-// The ASF licenses this file to You under the Apache License, Version 2.0
-// (the "License"); you may not use this file except in compliance with
-// the License.  You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-using Apache.Arrow.Memory;
-
-namespace Apache.Arrow
-{
-    public struct Bitmap
-    {
-        public ArrowBuffer Buffer { get; }
-
-        public int Length => Buffer.Size;
-
-        public Bitmap(ArrowBuffer buffer)
-        {
-            Buffer = buffer;
-        }
-
-        public static implicit operator Bitmap(ArrowBuffer buffer)
-        {
-            return new Bitmap(buffer);
-        }
-
-        public static implicit operator ArrowBuffer(Bitmap bitmap)
-        {
-            return bitmap.Buffer;
-        }
-
-        public static Bitmap Allocate(int bitCount, MemoryPool memoryPool = default)
-        {
-            var size = bitCount / 8 + (bitCount % 8 > 0 ? 1 : 0);
-            var remainder = size % 64;
-            var len = (remainder == 0) ? size : size + 64 - remainder;
-            
-            // Allocate buffer from memory pool and enable all bits
-
-            var buffer = ArrowBuffer.Allocate(len, memoryPool);
-            var span = buffer.GetSpan<byte>();
-
-            span.Fill(0xff);
-
-            return new Bitmap(buffer);
-        }
-
-        public void Clear(int index)
-        {
-            BitUtility.ClearBit(
-                Buffer.GetSpan<byte>(), index);
-        }
-
-        public void Set(int index)
-        {
-            BitUtility.SetBit(
-                Buffer.GetSpan<byte>(), index);
-        }
-
-        public bool IsSet(int index)
-        {
-            return BitUtility.GetBit(
-                Buffer.GetSpan<byte>(), index);
-        }
-    }
-}
diff --git a/csharp/src/Apache.Arrow/Memory/DefaultMemoryPool.cs b/csharp/src/Apache.Arrow/Extensions/SpanExtensions.cs
similarity index 59%
rename from csharp/src/Apache.Arrow/Memory/DefaultMemoryPool.cs
rename to csharp/src/Apache.Arrow/Extensions/SpanExtensions.cs
index bf6597918490e..b759f38060703 100644
--- a/csharp/src/Apache.Arrow/Memory/DefaultMemoryPool.cs
+++ b/csharp/src/Apache.Arrow/Extensions/SpanExtensions.cs
@@ -14,25 +14,18 @@
 // limitations under the License.
 
 using System;
-using System.Buffers;
 using System.Runtime.InteropServices;
 
-namespace Apache.Arrow.Memory
+namespace Apache.Arrow
 {
-    public class DefaultMemoryPool
+    public static class SpanExtensions
     {
-        public const int DefaultAlignment = 64;
-        public const int DefaultPadding = 8;
-
-        public static readonly Lazy<MemoryPool> Instance = new Lazy<MemoryPool>(BuildDefault, true);
-
-        private static MemoryPool BuildDefault()
-        {
-            // TODO: Replace the default memory pool instance with a platform-specific implementation
-            // of memory pool with fallback to this implementation?
-
-            return new NativeMemoryPool(DefaultPadding, DefaultAlignment);
-        }
+        public static Span<T> CastTo<T>(this Span<byte> span)
+            where T: struct =>
+            MemoryMarshal.Cast<byte, T>(span);
 
+        public static ReadOnlySpan<T> CastTo<T>(this ReadOnlySpan<byte> span)
+            where T: struct =>
+                MemoryMarshal.Cast<byte, T>(span);
     }
 }
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowFileReader.cs b/csharp/src/Apache.Arrow/Ipc/ArrowFileReader.cs
index c47eab5884d45..61c7627f0769b 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowFileReader.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowFileReader.cs
@@ -13,14 +13,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+using FlatBuffers;
 using System;
-using System.Buffers;
 using System.Buffers.Binary;
 using System.IO;
-using System.Threading.Tasks;
-using FlatBuffers;
-using System.Threading;
 using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
 
 namespace Apache.Arrow.Ipc
 {
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
index dac7b5bee21fa..98fbdf0be312d 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
@@ -16,7 +16,6 @@
 using System;
 using System.Buffers.Binary;
 using System.Collections.Generic;
-using System.Diagnostics;
 using System.IO;
 using System.Threading;
 using System.Threading.Tasks;
@@ -38,6 +37,8 @@ public ArrowFileWriter(Stream stream, Schema schema)
                 throw new ArgumentException("stream must be writable", nameof(stream));
             }
 
+            // TODO: Remove seek requirement
+
             if (!stream.CanSeek)
             {
                 throw new ArgumentException("stream must be seekable", nameof(stream));
@@ -66,7 +67,7 @@ public override async Task WriteRecordBatchAsync(RecordBatch recordBatch, Cancel
             RecordBatchBlocks.Add(block);
         }
 
-        public async Task CloseAsync(CancellationToken cancellationToken = default)
+        public async Task WriteFooterAsync(CancellationToken cancellationToken = default)
         {
             if (!HasWrittenFooter)
             {
@@ -77,19 +78,6 @@ public async Task CloseAsync(CancellationToken cancellationToken = default)
             await BaseStream.FlushAsync(cancellationToken);
         }
 
-        public override void Dispose()
-        {
-            try
-            {
-                CloseAsync().GetAwaiter().GetResult();
-            }
-            catch(Exception ex)
-            {
-                // NOTE: Dispose shouldn't throw.
-                Debug.WriteLine(ex);
-            }
-        }
-
         private async Task WriteHeaderAsync(CancellationToken cancellationToken)
         {
             cancellationToken.ThrowIfCancellationRequested();
@@ -144,10 +132,12 @@ private async Task WriteFooterAsync(Schema schema, CancellationToken cancellatio
 
             cancellationToken.ThrowIfCancellationRequested();
 
-            Buffers.RentReturn(4, (buffer) =>
+            await Buffers.RentReturnAsync(4, async (buffer) =>
             {
                 BinaryPrimitives.WriteInt32LittleEndian(buffer,
                     Convert.ToInt32(BaseStream.Position - offset));
+
+                await BaseStream.WriteAsync(buffer, 0, 4, cancellationToken);
             });
 
             // Write magic
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs
index 18e254c805b0e..f6e1ca5d06971 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs
@@ -219,11 +219,14 @@ private static ArrowBuffer BuildArrowBuffer(FlatBuffers.ByteBuffer bodyData, Fla
         {
             if (buffer.Length <= 0)
             {
-                return null;
+                return ArrowBuffer.Empty;
             }
 
             var segment = bodyData.ToArraySegment((int)buffer.Offset, (int)buffer.Length);
-            return ArrowBuffer.FromMemory(segment);
+
+            return new ArrowBuffer.Builder<byte>(segment.Count)
+                .Append(segment)
+                .Build();
         }
 
         private static ArrayData LoadPrimitiveField(Field field,
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 980a42dbf5f19..639c64a0363d3 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -60,16 +60,15 @@ public Buffer(ArrowBuffer buffer, int offset, int length)
             }
 
             private readonly List<Buffer> _buffers;
-            private int _offset;
 
             public IReadOnlyList<Buffer> Buffers => _buffers;
 
-            public int TotalLength => _offset;
+            public int TotalLength { get; private set; }
 
             public ArrowRecordBatchFlatBufferBuilder()
             {
                 _buffers = new List<Buffer>();
-                _offset = 0;
+                TotalLength = 0;
             }
 
             public void Visit(Int8Array array) => CreateBuffers(array);
@@ -113,16 +112,11 @@ private void CreateBuffers<T>(PrimitiveArray<T> array)
 
             private Buffer CreateBuffer(ArrowBuffer buffer)
             {
-                if (buffer == null)
-                {
-                    return new Buffer(null, _offset, 0);
-                }
-
-                var offset = _offset;
+                var offset = TotalLength;
 
-                _offset += buffer.Capacity;
+                TotalLength += buffer.Length;
 
-                return new Buffer(buffer, offset, buffer.Capacity);
+                return new Buffer(buffer, offset, buffer.Length);
             }
 
             public void Visit(IArrowArray array)
@@ -176,6 +170,8 @@ public ArrowStreamWriter(Stream baseStream, Schema schema)
         protected virtual async Task<Block> WriteRecordBatchInternalAsync(RecordBatch recordBatch,
             CancellationToken cancellationToken = default)
         {
+            // TODO: Truncate buffers with extraneous padding / unused capacity
+
             if (!HasWrittenSchema)
             {
                 await WriteSchemaAsync(Schema, cancellationToken);
@@ -243,10 +239,11 @@ await WriteMessageAsync(Flatbuf.MessageHeader.RecordBatch,
 
             for (var i = 0; i < buffers.Count; i++)
             {
-                if (buffers[i].DataBuffer == null)
+                if (buffers[i].DataBuffer.IsEmpty)
                     continue;
 
-                await buffers[i].DataBuffer.CopyToAsync(BaseStream, cancellationToken);
+                
+                await WriteBufferAsync(buffers[i].DataBuffer, cancellationToken);
             }
 
             // Write padding so the record batch message body length is a multiple of 8 bytes
@@ -257,7 +254,7 @@ await WriteMessageAsync(Flatbuf.MessageHeader.RecordBatch,
             await WritePaddingAsync(bodyPaddingLength);
 
             return new Block(
-                offset: Convert.ToInt32(metadataOffset), 
+                offset: Convert.ToInt32(metadataOffset),
                 length: bodyLength + bodyPaddingLength, 
                 metadataLength: Convert.ToInt32(metadataLength));
         }
@@ -266,6 +263,22 @@ public virtual Task WriteRecordBatchAsync(RecordBatch recordBatch, CancellationT
         {
             return WriteRecordBatchInternalAsync(recordBatch, cancellationToken);
         }
+    
+        public Task WriteBufferAsync(ArrowBuffer arrowBuffer, CancellationToken cancellationToken = default)
+        {
+            byte[] buffer = null;
+            try
+            {
+                var span = arrowBuffer.Span;
+                buffer = ArrayPool<byte>.Shared.Rent(span.Length);
+                span.CopyTo(buffer);
+                return BaseStream.WriteAsync(buffer, 0, buffer.Length, cancellationToken);
+            }
+            finally
+            {
+                ArrayPool<byte>.Shared.Return(buffer);
+            }
+        }
 
         protected Offset<Flatbuf.Schema> SerializeSchema(Schema schema)
         {
diff --git a/csharp/src/Apache.Arrow/Memory/MemoryPool.cs b/csharp/src/Apache.Arrow/Memory/MemoryPool.cs
index 1e2c173975b2b..569ca7439f83b 100644
--- a/csharp/src/Apache.Arrow/Memory/MemoryPool.cs
+++ b/csharp/src/Apache.Arrow/Memory/MemoryPool.cs
@@ -21,6 +21,10 @@ namespace Apache.Arrow.Memory
 
     public abstract class MemoryPool
     {
+        public const int DefaultAlignment = 64;
+
+        public static Lazy<MemoryPool> Default { get; } = new Lazy<MemoryPool>(BuildDefault, true);
+
         public class Stats
         {
             private long _bytesAllocated;
@@ -38,9 +42,12 @@ internal void Allocate(int n)
 
         public Stats Statistics { get; }
 
-        protected MemoryPool()
+        protected int Alignment { get; }
+
+        protected MemoryPool(int alignment = DefaultAlignment)
         {
             Statistics = new Stats();
+            Alignment = alignment;
         }
 
         public Memory<byte> Allocate(int length)
@@ -50,14 +57,18 @@ public Memory<byte> Allocate(int length)
                 throw new ArgumentOutOfRangeException(nameof(length));
             }
 
-            var bytesAllocated = 0;
-            var memory = AllocateInternal(length, out bytesAllocated);
+            if (length == 0)
+            {
+                return Memory<byte>.Empty;
+            }
+
+            var memory = AllocateInternal(length, out var bytesAllocated);
 
-            Statistics.Allocate(length);
+            Statistics.Allocate(bytesAllocated);
 
             // Ensure all allocated memory is zeroed.
 
-            ZeroMemory(memory);
+            ZeroMemory(memory.Span);
             
             return memory;
         }
@@ -69,18 +80,32 @@ public Memory<byte> Reallocate(Memory<byte> memory, int length)
                 throw new ArgumentOutOfRangeException(nameof(length));
             }
 
-            var bytesAllocated = 0;
-            var buffer = ReallocateInternal(memory, length, out bytesAllocated);
+            if (length == 0)
+            {
+                return Memory<byte>.Empty;
+            }
+
+            var buffer = ReallocateInternal(memory, length, out var bytesAllocated);
 
             Statistics.Allocate(bytesAllocated);
 
+            if (length > memory.Length)
+            {
+                ZeroMemory(buffer.Span.Slice(
+                    memory.Length, length - memory.Length));
+            }
+
             return buffer;
+        }
 
+        private static void ZeroMemory(Span<byte> span)
+        {
+            span.Fill(0);
         }
 
-        private static void ZeroMemory(Memory<byte> memory)
+        private static MemoryPool BuildDefault()
         {
-            memory.Span.Fill(0);
+            return new NativeMemoryPool(DefaultAlignment);
         }
 
         protected abstract Memory<byte> AllocateInternal(int length, out int bytesAllocated);
diff --git a/csharp/src/Apache.Arrow/Memory/NativeMemory.cs b/csharp/src/Apache.Arrow/Memory/NativeMemory.cs
index c5e29aa2a2318..a188f453eaf02 100644
--- a/csharp/src/Apache.Arrow/Memory/NativeMemory.cs
+++ b/csharp/src/Apache.Arrow/Memory/NativeMemory.cs
@@ -25,8 +25,8 @@ namespace Apache.Arrow.Memory
     public class NativeMemoryManager: MemoryManager<byte>
     {
         private IntPtr _ptr;
-        private int _offset;
-        private int _length;
+        private readonly int _offset;
+        private readonly int _length;
 
         public NativeMemoryManager(IntPtr ptr, int offset, int length)
         {
@@ -40,13 +40,13 @@ public NativeMemoryManager(IntPtr ptr, int offset, int length)
             Dispose(false);
         }
 
-        public unsafe override Span<byte> GetSpan()
+        public override unsafe Span<byte> GetSpan()
         {
             var ptr = CalculatePointer(0);
             return new Span<byte>(ptr, _length);
         }
 
-        public unsafe override MemoryHandle Pin(int elementIndex = 0)
+        public override unsafe MemoryHandle Pin(int elementIndex = 0)
         {
             // NOTE: Unmanaged memory doesn't require GC pinning because by definition it's not
             // managed by the garbage collector.
diff --git a/csharp/src/Apache.Arrow/Memory/NativeMemoryPool.cs b/csharp/src/Apache.Arrow/Memory/NativeMemoryPool.cs
index 9413951c39161..2ea07ce451653 100644
--- a/csharp/src/Apache.Arrow/Memory/NativeMemoryPool.cs
+++ b/csharp/src/Apache.Arrow/Memory/NativeMemoryPool.cs
@@ -20,19 +20,8 @@ namespace Apache.Arrow.Memory
 {
     public class NativeMemoryPool : MemoryPool
     {
-        private readonly int _padding;
-        private readonly int _alignment;
-
-        public NativeMemoryPool(int padding, int alignment)
-        {
-            if (padding < 0) throw new ArgumentOutOfRangeException(nameof(padding));
-            if (alignment < 0) throw new ArgumentOutOfRangeException(nameof(alignment));
-            
-            // TODO: Ensure alignment is a power of two.
-
-            _padding = padding;
-            _alignment = alignment;
-        }
+        public NativeMemoryPool(int alignment = DefaultAlignment) 
+            : base(alignment) { }
 
         protected override Memory<byte> AllocateInternal(int length, out int bytesAllocated)
         {
@@ -42,14 +31,13 @@ protected override Memory<byte> AllocateInternal(int length, out int bytesAlloca
             // to allocated memory, offset, and the allocation size. 
 
             // TODO: Should the allocation be moved to NativeMemory?
-            
-            var size = BitUtility.RoundUpToMultiplePowerOfTwo(length, _padding);
-            var ptr =  Marshal.AllocHGlobal(size + _alignment);
-            var offset = (int)(_alignment - (ptr.ToInt64() & (_alignment - 1)));
-            
-            var manager = new NativeMemoryManager(ptr, offset, size);
 
-            bytesAllocated = (size + _alignment);
+            var size = length + Alignment;
+            var ptr =  Marshal.AllocHGlobal(size);
+            var offset = (int)(Alignment - (ptr.ToInt64() & (Alignment - 1)));
+            var manager = new NativeMemoryManager(ptr, offset, length);
+
+            bytesAllocated = (length + Alignment);
 
             GC.AddMemoryPressure(bytesAllocated);
 
diff --git a/csharp/src/Apache.Arrow/Types/ArrowType.cs b/csharp/src/Apache.Arrow/Types/ArrowType.cs
index 9e4b3608b9771..c0eca23da55ac 100644
--- a/csharp/src/Apache.Arrow/Types/ArrowType.cs
+++ b/csharp/src/Apache.Arrow/Types/ArrowType.cs
@@ -13,6 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+
 namespace Apache.Arrow.Types
 {
     public abstract class ArrowType: IArrowType
@@ -24,5 +25,19 @@ public abstract class ArrowType: IArrowType
         public virtual bool IsFixedWidth => false;
 
         public abstract void Accept(IArrowTypeVisitor visitor);
+
+        internal static void Accept<T>(T type, IArrowTypeVisitor visitor)
+            where T: class, IArrowType
+        {
+            switch (visitor)
+            {
+                case IArrowTypeVisitor<T> typedVisitor:
+                    typedVisitor.Visit(type);
+                    break;
+                default:
+                    visitor.Visit(type);
+                    break;
+            }
+        }
     }
 }
diff --git a/csharp/src/Apache.Arrow/Types/BinaryType.cs b/csharp/src/Apache.Arrow/Types/BinaryType.cs
index 8ae753360c0b3..6734d93ad2e7a 100644
--- a/csharp/src/Apache.Arrow/Types/BinaryType.cs
+++ b/csharp/src/Apache.Arrow/Types/BinaryType.cs
@@ -13,9 +13,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
@@ -26,10 +23,6 @@ public class BinaryType: ArrowType
         public override ArrowTypeId TypeId => ArrowTypeId.Binary;
         public override string Name => "binary";
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<BinaryType> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
diff --git a/csharp/src/Apache.Arrow/Types/BooleanType.cs b/csharp/src/Apache.Arrow/Types/BooleanType.cs
index 5a26c879f5fa8..3b57414b0179f 100644
--- a/csharp/src/Apache.Arrow/Types/BooleanType.cs
+++ b/csharp/src/Apache.Arrow/Types/BooleanType.cs
@@ -13,13 +13,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
-    public class BooleanType: NumberType
+    public sealed class BooleanType: NumberType
     {
         public static readonly BooleanType Default = new BooleanType();
 
@@ -28,10 +25,6 @@ public class BooleanType: NumberType
         public override int BitWidth => 1;
         public override bool IsSigned => false;
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<Int8Type> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
diff --git a/csharp/src/Apache.Arrow/Types/Date32Type.cs b/csharp/src/Apache.Arrow/Types/Date32Type.cs
index 19e6823602a6b..9673bf62ff6e7 100644
--- a/csharp/src/Apache.Arrow/Types/Date32Type.cs
+++ b/csharp/src/Apache.Arrow/Types/Date32Type.cs
@@ -13,13 +13,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
-    public class Date32Type: DateType
+    public sealed class Date32Type: DateType
     {
         public static readonly Date32Type Default = new Date32Type();
 
@@ -28,10 +25,6 @@ public class Date32Type: DateType
         public override int BitWidth => 32;
         public override DateUnit Unit => DateUnit.Day;
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<Date32Type> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
diff --git a/csharp/src/Apache.Arrow/Types/Date64Type.cs b/csharp/src/Apache.Arrow/Types/Date64Type.cs
index d301ca4130d56..2a9e1aac0d6ea 100644
--- a/csharp/src/Apache.Arrow/Types/Date64Type.cs
+++ b/csharp/src/Apache.Arrow/Types/Date64Type.cs
@@ -13,13 +13,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
-    public class Date64Type: DateType
+    public sealed class Date64Type : DateType
     {
         public static readonly Date64Type Default = new Date64Type();
 
@@ -28,10 +25,6 @@ public class Date64Type: DateType
         public override int BitWidth => 64;
         public override DateUnit Unit => DateUnit.Milliseconds;
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<Date64Type> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
diff --git a/csharp/src/Apache.Arrow/Types/DateType.cs b/csharp/src/Apache.Arrow/Types/DateType.cs
index 1fa2a32294b9f..8f15b08fc88e5 100644
--- a/csharp/src/Apache.Arrow/Types/DateType.cs
+++ b/csharp/src/Apache.Arrow/Types/DateType.cs
@@ -13,9 +13,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
diff --git a/csharp/src/Apache.Arrow/Types/DecimalType.cs b/csharp/src/Apache.Arrow/Types/DecimalType.cs
index adb9a06d1a1b5..cad2e2428727b 100644
--- a/csharp/src/Apache.Arrow/Types/DecimalType.cs
+++ b/csharp/src/Apache.Arrow/Types/DecimalType.cs
@@ -15,7 +15,7 @@
 
 namespace Apache.Arrow.Types
 {
-    public class DecimalType: FixedSizeBinaryType
+    public sealed class DecimalType: FixedSizeBinaryType
     {
         public override ArrowTypeId TypeId => ArrowTypeId.Decimal;
         public override string Name => "decimal";
diff --git a/csharp/src/Apache.Arrow/Types/DoubleType.cs b/csharp/src/Apache.Arrow/Types/DoubleType.cs
index 9fb0969130cc2..aa6ade650ef89 100644
--- a/csharp/src/Apache.Arrow/Types/DoubleType.cs
+++ b/csharp/src/Apache.Arrow/Types/DoubleType.cs
@@ -13,13 +13,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
-    public class DoubleType: FloatingPointType
+    public sealed class DoubleType: FloatingPointType
     {
         public static readonly DoubleType Default = new DoubleType();
 
@@ -29,10 +26,6 @@ public class DoubleType: FloatingPointType
         public override bool IsSigned => true;
         public override PrecisionKind Precision => PrecisionKind.Double;
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<DoubleType> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
diff --git a/csharp/src/Apache.Arrow/Types/FixedSizeBinaryType.cs b/csharp/src/Apache.Arrow/Types/FixedSizeBinaryType.cs
index 6e16730314ffa..ccbfc8c1fe7f6 100644
--- a/csharp/src/Apache.Arrow/Types/FixedSizeBinaryType.cs
+++ b/csharp/src/Apache.Arrow/Types/FixedSizeBinaryType.cs
@@ -14,12 +14,10 @@
 // limitations under the License.
 
 using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
-    public class FixedSizeBinaryType: FixedWidthType
+    public class FixedSizeBinaryType : FixedWidthType
     {
         public override ArrowTypeId TypeId => ArrowTypeId.FixedSizedBinary;
         public override string Name => "fixed_size_binary";
@@ -34,12 +32,7 @@ public FixedSizeBinaryType(int byteWidth)
             ByteWidth = byteWidth;
         }
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<FixedSizeBinaryType> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
 
-        
     }
 }
diff --git a/csharp/src/Apache.Arrow/Types/FixedWidthType.cs b/csharp/src/Apache.Arrow/Types/FixedWidthType.cs
index 72b46a2b117e2..d1c9e8c1d8b8d 100644
--- a/csharp/src/Apache.Arrow/Types/FixedWidthType.cs
+++ b/csharp/src/Apache.Arrow/Types/FixedWidthType.cs
@@ -13,9 +13,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
diff --git a/csharp/src/Apache.Arrow/Types/FloatType.cs b/csharp/src/Apache.Arrow/Types/FloatType.cs
index c4232931d01e5..a3f7b39bf49ff 100644
--- a/csharp/src/Apache.Arrow/Types/FloatType.cs
+++ b/csharp/src/Apache.Arrow/Types/FloatType.cs
@@ -13,13 +13,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
-    public class FloatType: FloatingPointType
+    public sealed class FloatType: FloatingPointType
     {
         public static readonly FloatType Default = new FloatType();
 
@@ -29,10 +26,6 @@ public class FloatType: FloatingPointType
         public override bool IsSigned => true;
         public override PrecisionKind Precision => PrecisionKind.Single;
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<FloatType> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
diff --git a/csharp/src/Apache.Arrow/Types/FloatingPointType.cs b/csharp/src/Apache.Arrow/Types/FloatingPointType.cs
index 5f667c72226ae..9fbe43a99b6b2 100644
--- a/csharp/src/Apache.Arrow/Types/FloatingPointType.cs
+++ b/csharp/src/Apache.Arrow/Types/FloatingPointType.cs
@@ -13,9 +13,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
diff --git a/csharp/src/Apache.Arrow/Types/HalfFloatType.cs b/csharp/src/Apache.Arrow/Types/HalfFloatType.cs
index 22f1370af5f54..5bfa232dc18d5 100644
--- a/csharp/src/Apache.Arrow/Types/HalfFloatType.cs
+++ b/csharp/src/Apache.Arrow/Types/HalfFloatType.cs
@@ -13,13 +13,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
-    public class HalfFloatType: FloatingPointType
+    public sealed class HalfFloatType: FloatingPointType
     {
         public static readonly HalfFloatType Default = new HalfFloatType();
 
@@ -29,10 +26,6 @@ public class HalfFloatType: FloatingPointType
         public override bool IsSigned => true;
         public override PrecisionKind Precision => PrecisionKind.Half;
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<HalfFloatType> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
diff --git a/csharp/src/Apache.Arrow/Types/IArrowType.cs b/csharp/src/Apache.Arrow/Types/IArrowType.cs
index d75be542a9237..578e18b9dedf5 100644
--- a/csharp/src/Apache.Arrow/Types/IArrowType.cs
+++ b/csharp/src/Apache.Arrow/Types/IArrowType.cs
@@ -13,9 +13,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
diff --git a/csharp/src/Apache.Arrow/Types/IArrowTypeVisitor.cs b/csharp/src/Apache.Arrow/Types/IArrowTypeVisitor.cs
index 3cd602ba18863..ce5b114bf3178 100644
--- a/csharp/src/Apache.Arrow/Types/IArrowTypeVisitor.cs
+++ b/csharp/src/Apache.Arrow/Types/IArrowTypeVisitor.cs
@@ -13,9 +13,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
diff --git a/csharp/src/Apache.Arrow/Types/Int16Type.cs b/csharp/src/Apache.Arrow/Types/Int16Type.cs
index 3a7edbe0bd5a7..f1d6868ba8ae1 100644
--- a/csharp/src/Apache.Arrow/Types/Int16Type.cs
+++ b/csharp/src/Apache.Arrow/Types/Int16Type.cs
@@ -15,7 +15,7 @@
 
 namespace Apache.Arrow.Types
 {
-    public class Int16Type : NumberType
+    public sealed class Int16Type : NumberType
     {
         public static readonly Int16Type Default = new Int16Type();
 
@@ -24,10 +24,6 @@ public class Int16Type : NumberType
         public override int BitWidth => 16;
         public override bool IsSigned => true;
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<Int16Type> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
\ No newline at end of file
diff --git a/csharp/src/Apache.Arrow/Types/Int32Type.cs b/csharp/src/Apache.Arrow/Types/Int32Type.cs
index e8df522679a0f..a32c884629831 100644
--- a/csharp/src/Apache.Arrow/Types/Int32Type.cs
+++ b/csharp/src/Apache.Arrow/Types/Int32Type.cs
@@ -15,7 +15,7 @@
 
 namespace Apache.Arrow.Types
 {
-    public class Int32Type : NumberType
+    public sealed class Int32Type : NumberType
     {
         public static readonly Int32Type Default = new Int32Type();
 
@@ -24,10 +24,6 @@ public class Int32Type : NumberType
         public override int BitWidth => 32;
         public override bool IsSigned => true;
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<Int32Type> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
\ No newline at end of file
diff --git a/csharp/src/Apache.Arrow/Types/Int64Type.cs b/csharp/src/Apache.Arrow/Types/Int64Type.cs
index afdf4098861de..f45523cfb3303 100644
--- a/csharp/src/Apache.Arrow/Types/Int64Type.cs
+++ b/csharp/src/Apache.Arrow/Types/Int64Type.cs
@@ -15,7 +15,7 @@
 
 namespace Apache.Arrow.Types
 {
-    public class Int64Type : NumberType
+    public sealed class Int64Type : NumberType
     {
         public static readonly Int64Type Default = new Int64Type();
 
@@ -24,10 +24,6 @@ public class Int64Type : NumberType
         public override int BitWidth => 64;
         public override bool IsSigned => true;
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<Int64Type> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
\ No newline at end of file
diff --git a/csharp/src/Apache.Arrow/Types/Int8Type.cs b/csharp/src/Apache.Arrow/Types/Int8Type.cs
index 9687cd349cc3d..9b3f5b5b4fc96 100644
--- a/csharp/src/Apache.Arrow/Types/Int8Type.cs
+++ b/csharp/src/Apache.Arrow/Types/Int8Type.cs
@@ -13,13 +13,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
-    public class Int8Type: NumberType
+    public sealed class Int8Type : NumberType
     {
         public static readonly Int8Type Default = new Int8Type();
 
@@ -28,10 +25,6 @@ public class Int8Type: NumberType
         public override int BitWidth => 8;
         public override bool IsSigned => true;
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<Int8Type> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
diff --git a/csharp/src/Apache.Arrow/Types/IntervalUnit.cs b/csharp/src/Apache.Arrow/Types/IntervalUnit.cs
index e287548ef6206..6dda0cfe94b78 100644
--- a/csharp/src/Apache.Arrow/Types/IntervalUnit.cs
+++ b/csharp/src/Apache.Arrow/Types/IntervalUnit.cs
@@ -13,9 +13,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
@@ -25,7 +22,7 @@ public enum IntervalUnit
         DayTime = 1
     }
 
-    public class IntervalType: FixedWidthType
+    public sealed class IntervalType : FixedWidthType
     {
         public override ArrowTypeId TypeId => ArrowTypeId.Interval;
         public override string Name => "date";
@@ -38,10 +35,6 @@ public IntervalType(IntervalUnit unit = IntervalUnit.YearMonth)
             Unit = unit;
         }
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<IntervalType> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
diff --git a/csharp/src/Apache.Arrow/Types/ListType.cs b/csharp/src/Apache.Arrow/Types/ListType.cs
index fa5c87cd4eb56..5d48a610feab1 100644
--- a/csharp/src/Apache.Arrow/Types/ListType.cs
+++ b/csharp/src/Apache.Arrow/Types/ListType.cs
@@ -14,12 +14,10 @@
 // limitations under the License.
 
 using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
-    public class ListType: ArrowType
+    public sealed class ListType : ArrowType
     {
         public override ArrowTypeId TypeId => ArrowTypeId.List;
         public override string Name => "list";
@@ -33,10 +31,6 @@ public ListType(Field valueField, IArrowType valueDataType)
             ValueDataType = valueDataType ?? NullType.Default;
         }
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<ListType> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
diff --git a/csharp/src/Apache.Arrow/Types/NullType.cs b/csharp/src/Apache.Arrow/Types/NullType.cs
index b299ed8c93836..4afe1dc38a300 100644
--- a/csharp/src/Apache.Arrow/Types/NullType.cs
+++ b/csharp/src/Apache.Arrow/Types/NullType.cs
@@ -13,23 +13,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
-    public class NullType: ArrowType
+    public sealed class NullType : ArrowType
     {
         public static readonly NullType Default = new NullType();
 
         public override ArrowTypeId TypeId => ArrowTypeId.Null;
         public override string Name => "null";
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<NullType> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
diff --git a/csharp/src/Apache.Arrow/Types/NumberType.cs b/csharp/src/Apache.Arrow/Types/NumberType.cs
index a80bd44fe4312..04d21bc8cfb1b 100644
--- a/csharp/src/Apache.Arrow/Types/NumberType.cs
+++ b/csharp/src/Apache.Arrow/Types/NumberType.cs
@@ -13,9 +13,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
diff --git a/csharp/src/Apache.Arrow/Types/StringType.cs b/csharp/src/Apache.Arrow/Types/StringType.cs
index f2bb0822f8039..33620aad9e9c5 100644
--- a/csharp/src/Apache.Arrow/Types/StringType.cs
+++ b/csharp/src/Apache.Arrow/Types/StringType.cs
@@ -13,23 +13,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
-    public class StringType: ArrowType
+    public sealed class StringType : ArrowType
     {
         public static StringType Default = new StringType();
 
         public override ArrowTypeId TypeId => ArrowTypeId.String;
         public override string Name => "utf8";
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<StringType> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
diff --git a/csharp/src/Apache.Arrow/Types/StructType.cs b/csharp/src/Apache.Arrow/Types/StructType.cs
index f9b9e0ce1dfdf..fb074c101309d 100644
--- a/csharp/src/Apache.Arrow/Types/StructType.cs
+++ b/csharp/src/Apache.Arrow/Types/StructType.cs
@@ -19,7 +19,7 @@
 
 namespace Apache.Arrow.Types
 {
-    public class StructType: ArrowType
+    public sealed class StructType : ArrowType
     {
         private readonly List<Field> _fields;
 
@@ -55,10 +55,6 @@ public int GetFieldIndex(string name,
                 field => comparer.Equals(field.Name, name));
         }
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<StructType> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
diff --git a/csharp/src/Apache.Arrow/Types/Time32Type.cs b/csharp/src/Apache.Arrow/Types/Time32Type.cs
index 70cfe509727e3..99c409babdb26 100644
--- a/csharp/src/Apache.Arrow/Types/Time32Type.cs
+++ b/csharp/src/Apache.Arrow/Types/Time32Type.cs
@@ -13,13 +13,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
-    public class Time32Type: TimeType
+    public sealed class Time32Type : TimeType
     {
         public static readonly Time32Type Default = new Time32Type();
 
@@ -30,10 +27,6 @@ public class Time32Type: TimeType
         public Time32Type(TimeUnit unit = TimeUnit.Millisecond)
             : base(unit) { }
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<Int16Type> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
diff --git a/csharp/src/Apache.Arrow/Types/Time64Type.cs b/csharp/src/Apache.Arrow/Types/Time64Type.cs
index 3f727859c60b0..5d6c2e46e1b56 100644
--- a/csharp/src/Apache.Arrow/Types/Time64Type.cs
+++ b/csharp/src/Apache.Arrow/Types/Time64Type.cs
@@ -13,13 +13,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
-    public class Time64Type: TimeType
+    public sealed class Time64Type : TimeType
     {
         public static readonly Time64Type Default = new Time64Type();
 
@@ -30,10 +27,6 @@ public class Time64Type: TimeType
         public Time64Type(TimeUnit unit = TimeUnit.Millisecond)
             : base(unit) { }
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<Time64Type> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
diff --git a/csharp/src/Apache.Arrow/Types/TimeType.cs b/csharp/src/Apache.Arrow/Types/TimeType.cs
index dba488b4ea706..9afa3fb62cdc7 100644
--- a/csharp/src/Apache.Arrow/Types/TimeType.cs
+++ b/csharp/src/Apache.Arrow/Types/TimeType.cs
@@ -13,9 +13,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
diff --git a/csharp/src/Apache.Arrow/Types/TimestampType.cs b/csharp/src/Apache.Arrow/Types/TimestampType.cs
index 22da8328dc527..4137818232c19 100644
--- a/csharp/src/Apache.Arrow/Types/TimestampType.cs
+++ b/csharp/src/Apache.Arrow/Types/TimestampType.cs
@@ -13,13 +13,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow.Types
 {
-    public class TimestampType: FixedWidthType
+    public sealed class TimestampType : FixedWidthType
     {
         public static readonly TimestampType Default = new TimestampType(TimeUnit.Millisecond, "UTC");
 
@@ -38,10 +35,6 @@ public TimestampType(
             Timezone = timezone;
         }
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<TimestampType> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
diff --git a/csharp/src/Apache.Arrow/Types/UInt16Type.cs b/csharp/src/Apache.Arrow/Types/UInt16Type.cs
index eb87729cf52cf..1925ffb86b790 100644
--- a/csharp/src/Apache.Arrow/Types/UInt16Type.cs
+++ b/csharp/src/Apache.Arrow/Types/UInt16Type.cs
@@ -15,7 +15,7 @@
 
 namespace Apache.Arrow.Types
 {
-    public class UInt16Type : NumberType
+    public sealed class UInt16Type : NumberType
     {
         public static readonly UInt16Type Default = new UInt16Type();
 
@@ -24,10 +24,6 @@ public class UInt16Type : NumberType
         public override int BitWidth => 16;
         public override bool IsSigned => false;
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<UInt16Type> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
\ No newline at end of file
diff --git a/csharp/src/Apache.Arrow/Types/UInt32Type.cs b/csharp/src/Apache.Arrow/Types/UInt32Type.cs
index e520004470885..8007025f30618 100644
--- a/csharp/src/Apache.Arrow/Types/UInt32Type.cs
+++ b/csharp/src/Apache.Arrow/Types/UInt32Type.cs
@@ -15,7 +15,7 @@
 
 namespace Apache.Arrow.Types
 {
-    public class UInt32Type : NumberType
+    public sealed class UInt32Type : NumberType
     {
         public static readonly UInt32Type Default = new UInt32Type();
 
@@ -24,10 +24,6 @@ public class UInt32Type : NumberType
         public override int BitWidth => 32;
         public override bool IsSigned => false;
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<UInt32Type> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
\ No newline at end of file
diff --git a/csharp/src/Apache.Arrow/Types/UInt64Type.cs b/csharp/src/Apache.Arrow/Types/UInt64Type.cs
index 45c6fac056833..20b51ad44f548 100644
--- a/csharp/src/Apache.Arrow/Types/UInt64Type.cs
+++ b/csharp/src/Apache.Arrow/Types/UInt64Type.cs
@@ -15,7 +15,7 @@
 
 namespace Apache.Arrow.Types
 {
-    public class UInt64Type : NumberType
+    public sealed class UInt64Type : NumberType
     {
         public static readonly UInt64Type Default = new UInt64Type();
 
@@ -24,10 +24,6 @@ public class UInt64Type : NumberType
         public override int BitWidth => 64;
         public override bool IsSigned => false;
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<UInt64Type> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
\ No newline at end of file
diff --git a/csharp/src/Apache.Arrow/Types/UInt8Type.cs b/csharp/src/Apache.Arrow/Types/UInt8Type.cs
index d63e42b2039be..e2e53657200ec 100644
--- a/csharp/src/Apache.Arrow/Types/UInt8Type.cs
+++ b/csharp/src/Apache.Arrow/Types/UInt8Type.cs
@@ -15,7 +15,7 @@
 
 namespace Apache.Arrow.Types
 {
-    public class UInt8Type : NumberType
+    public sealed class UInt8Type : NumberType
     {
         public static readonly UInt8Type Default = new UInt8Type();
 
@@ -24,10 +24,6 @@ public class UInt8Type : NumberType
         public override int BitWidth => 8;
         public override bool IsSigned => false;
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<Int8Type> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
\ No newline at end of file
diff --git a/csharp/src/Apache.Arrow/Types/UnionType.cs b/csharp/src/Apache.Arrow/Types/UnionType.cs
index aadb1e7fbbe49..293271018aa26 100644
--- a/csharp/src/Apache.Arrow/Types/UnionType.cs
+++ b/csharp/src/Apache.Arrow/Types/UnionType.cs
@@ -24,7 +24,7 @@ public enum UnionMode
         Dense
     }
 
-    public class UnionType: ArrowType
+    public sealed class UnionType : ArrowType
     {
         public override ArrowTypeId TypeId => ArrowTypeId.Union;
         public override string Name => "union";
@@ -41,10 +41,6 @@ public UnionType(
             Mode = mode;
         }
 
-        public override void Accept(IArrowTypeVisitor visitor)
-        {
-            if (visitor is IArrowTypeVisitor<UnionType> v)
-                v.Visit(this);
-        }
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
     }
 }
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index dca8e2d819967..d29279b330a9b 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -6,6 +6,7 @@
   <PropertyGroup>
     <TargetFramework>netcoreapp2.1</TargetFramework>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <LangVersion>7.3</LangVersion>
   </PropertyGroup>
 
   <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowBufferBuilderTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowBufferBuilderTests.cs
new file mode 100644
index 0000000000000..eee4d14b39469
--- /dev/null
+++ b/csharp/test/Apache.Arrow.Tests/ArrowBufferBuilderTests.cs
@@ -0,0 +1,176 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Xunit;
+
+namespace Apache.Arrow.Tests
+{
+    public class ArrowBufferBuilderTests
+    {
+        public class Append
+        {
+
+            [Fact]
+            public void DoesNotThrowWithNullParameters()
+            {
+                var builder = new ArrowBuffer.Builder<int>();
+
+                builder.AppendRange(null);
+                builder.Append((Func<IEnumerable<int>>) null);
+            }
+
+            [Fact]
+            public void CapacityOnlyGrowsWhenLengthWillExceedCapacity()
+            {
+                var builder = new ArrowBuffer.Builder<int>(1);
+                var capacity = builder.Capacity;
+
+                builder.Append(1);
+
+                Assert.Equal(capacity, builder.Capacity);
+            }
+
+            [Fact]
+            public void CapacityGrowsAfterAppendWhenLengthExceedsCapacity()
+            {
+                var builder = new ArrowBuffer.Builder<int>(1);
+                var capacity = builder.Capacity;
+
+                builder.Append(1);
+                builder.Append(2);
+
+                Assert.True(builder.Capacity > capacity);
+            }
+
+            [Fact]
+            public void CapacityGrowsAfterAppendSpan()
+            {
+                var builder = new ArrowBuffer.Builder<int>(1);
+                var capacity = builder.Capacity;
+                var data = Enumerable.Range(0, 10).Select(x => x).ToArray();
+
+                builder.Append(data);
+
+                Assert.True(builder.Capacity > capacity);
+            }
+
+            [Fact]
+            public void LengthIncrementsAfterAppend()
+            {
+                var builder = new ArrowBuffer.Builder<int>(1);
+                var length = builder.Length;
+
+                builder.Append(1);
+
+                Assert.Equal(length + 1, builder.Length);
+            }
+
+            [Fact]
+            public void LengthGrowsBySpanLength()
+            {
+                var builder = new ArrowBuffer.Builder<int>(1);
+                var data = Enumerable.Range(0, 10).Select(x => x).ToArray();
+
+                builder.Append(data);
+
+                Assert.Equal(10, builder.Length);
+            }
+
+            [Fact]
+            public void BufferHasExpectedValues()
+            {
+                var builder = new ArrowBuffer.Builder<int>(1);
+
+                builder.Append(10);
+                builder.Append(20);
+
+                var buffer = builder.Build();
+                var span = buffer.Span.CastTo<int>();
+
+                Assert.Equal(10, span[0]);
+                Assert.Equal(20, span[1]);
+                Assert.Equal(0, span[2]);
+            }
+        }
+
+        public class AppendRange
+        {
+            [Fact]
+            public void CapacityGrowsAfterAppendEnumerable()
+            {
+                var builder = new ArrowBuffer.Builder<int>(1);
+                var capacity = builder.Capacity;
+                var data = Enumerable.Range(0, 10).Select(x => x);
+
+                builder.AppendRange(data);
+
+                Assert.True(builder.Capacity > capacity);
+            }
+
+            [Fact]
+            public void LengthGrowsByEnumerableCount()
+            {
+                var builder = new ArrowBuffer.Builder<int>(1);
+                var length = builder.Length;
+                var data = Enumerable.Range(0, 10).Select(x => x).ToArray();
+                var count = data.Length;
+                
+                builder.AppendRange(data);
+
+                Assert.Equal(length + count, builder.Length);
+            }
+
+            [Fact]
+            public void BufferHasExpectedValues()
+            {
+                var builder = new ArrowBuffer.Builder<int>(1);
+                var data = Enumerable.Range(0, 10).Select(x => x).ToArray();
+
+                builder.AppendRange(data);
+
+                var buffer = builder.Build();
+                var span = buffer.Span.CastTo<int>();
+
+                for (var i = 0; i < 10; i++)
+                {
+                    Assert.Equal(i, span[i]);
+                }
+            }
+        }
+
+        public class Clear
+        {
+            [Fact]
+            public void SetsAllValuesToDefault()
+            {
+                var builder = new ArrowBuffer.Builder<int>(1);
+                var data = Enumerable.Range(0, 10).Select(x => x).ToArray();
+
+                builder.AppendRange(data);
+                builder.Clear();
+
+                var buffer = builder.Build();
+                var zeros = Enumerable.Range(0, 10).Select(x => 0).ToArray();
+                var values = buffer.Span.CastTo<int>().Slice(0, 10).ToArray();
+
+                Assert.True(zeros.SequenceEqual(values));
+            }
+        }
+
+    }
+}
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowBufferTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowBufferTests.cs
index 28de056a61166..f618a9bcb65c4 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowBufferTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowBufferTests.cs
@@ -13,10 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using System;
-using System.Runtime.CompilerServices;
-using Apache.Arrow.Memory;
 using Apache.Arrow.Tests.Fixtures;
+using System;
 using Xunit;
 
 namespace Apache.Arrow.Tests
@@ -34,19 +32,20 @@ public Allocate(DefaultMemoryPoolFixture memoryPoolFixture)
             }
 
             /// <summary>
-            /// Ensure Arrow buffers are allocated in multiples of 8-bytes.
+            /// Ensure Arrow buffers are allocated in multiples of 64 bytes.
             /// </summary>
             /// <param name="size">number of bytes to allocate</param>
             /// <param name="expectedCapacity">expected buffer capacity after allocation</param>
             [Theory]
-            [InlineData(1, 8)]
-            [InlineData(8, 8)]
-            [InlineData(9, 16)]
-            [InlineData(16, 16)]
+            [InlineData(1, 64)]
+            [InlineData(8, 64)]
+            [InlineData(9, 64)]
+            [InlineData(65, 128)]
             public void AllocatesWithExpectedPadding(int size, int expectedCapacity)
             {
-                var buffer = ArrowBuffer.Allocate(size, _memoryPoolFixture.MemoryPool);
-                Assert.Equal(buffer.Capacity, expectedCapacity);
+                var buffer = new ArrowBuffer.Builder<byte>(size).Build();
+
+                Assert.Equal(buffer.Length, expectedCapacity);
             }
 
             /// <summary>
@@ -59,12 +58,11 @@ public void AllocatesWithExpectedPadding(int size, int expectedCapacity)
             [InlineData(128)]
             public unsafe void AllocatesAlignedToMultipleOf64(int size)
             {
-                var buffer = ArrowBuffer.Allocate(size, _memoryPoolFixture.MemoryPool);
+                var buffer = new ArrowBuffer.Builder<byte>(size).Build();
 
-                using (var pin = buffer.Memory.Pin())
-                {
-                    var ptr = new IntPtr(pin.Pointer);
-                    Assert.True(ptr.ToInt64() % 64 == 0);
+                fixed (byte* ptr = &buffer.Span.GetPinnableReference())
+                { 
+                    Assert.True(new IntPtr(ptr).ToInt64() % 64 == 0);
                 }
             }
 
@@ -74,10 +72,9 @@ public unsafe void AllocatesAlignedToMultipleOf64(int size)
             [Fact]
             public void HasZeroPadding()
             {
-                var buffer = ArrowBuffer.Allocate(32, _memoryPoolFixture.MemoryPool);
-                var span = buffer.GetSpan<byte>();
-
-                foreach (var b in span)
+                var buffer = new ArrowBuffer.Builder<byte>(10).Build();
+                
+                foreach (var b in buffer.Span)
                 {
                     Assert.Equal(0, b);
                 }
diff --git a/csharp/test/Apache.Arrow.Tests/Fixtures/DefaultMemoryPoolFixture.cs b/csharp/test/Apache.Arrow.Tests/Fixtures/DefaultMemoryPoolFixture.cs
index a87bfae1353fd..3b867cdb3d69b 100644
--- a/csharp/test/Apache.Arrow.Tests/Fixtures/DefaultMemoryPoolFixture.cs
+++ b/csharp/test/Apache.Arrow.Tests/Fixtures/DefaultMemoryPoolFixture.cs
@@ -23,10 +23,9 @@ public class DefaultMemoryPoolFixture
 
         public DefaultMemoryPoolFixture()
         {
-            const int padding = 8;
             const int alignment = 64;
 
-            MemoryPool = new NativeMemoryPool(padding, alignment);
+            MemoryPool = new NativeMemoryPool(alignment);
         }
     }
 }
diff --git a/dev/README.md b/dev/README.md
index 98aeef6d9a4d8..ead36d3747e76 100644
--- a/dev/README.md
+++ b/dev/README.md
@@ -28,17 +28,22 @@ https://gitbox.apache.org/setup/ to be able to push to GitHub as the main
 remote.
 
 * How to merge a Pull request:
-have an apache and apache-github remote setup
+
 ```
-git remote add apache-github https://github.com/apache/arrow.git
 git remote add apache git@github.com:apache/arrow.git
 ```
+
 run the following command
+
 ```
 dev/merge_arrow_pr.py
 ```
 
+This uses the GitHub REST API; if you encounter rate limit issues, you may set
+a `ARROW_GITHUB_API_TOKEN` environment variable to use a Personal Access Token.
+
 Note:
+
 * The directory name of your Arrow git clone must be called arrow
 * Without jira-python installed you'll have to close the JIRA manually
 
diff --git a/dev/dask_integration/Dockerfile b/dev/dask_integration/Dockerfile
deleted file mode 100644
index f0c1f03f6f93c..0000000000000
--- a/dev/dask_integration/Dockerfile
+++ /dev/null
@@ -1,22 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-FROM arrow_integration_xenial_base
-
-ADD . /apache-arrow
-WORKDIR /apache-arrow
-
-CMD arrow/dev/dask_integration/dask_integration.sh
diff --git a/dev/dask_integration/dask_integration.sh b/dev/dask_integration/dask_integration.sh
deleted file mode 100755
index f4999c0ae447f..0000000000000
--- a/dev/dask_integration/dask_integration.sh
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Set up environment and working directory
-cd /apache-arrow
-
-conda activate pyarrow-dev
-
-# install pytables from defaults for now
-conda install -y pytables
-
-pip install -q git+https://github.com/dask/partd --upgrade --no-deps
-pip install -q git+https://github.com/dask/zict --upgrade --no-deps
-pip install -q git+https://github.com/dask/distributed --upgrade --no-deps
-pip install -q git+https://github.com/mrocklin/sparse --upgrade --no-deps
-pip install -q git+https://github.com/dask/s3fs --upgrade --no-deps
-
-conda install -y -q -c conda-forge numba cython \
-    bcolz \
-    blosc \
-    bokeh \
-    boto3 \
-    chest \
-    cloudpickle \
-    coverage \
-    cytoolz \
-    distributed \
-    graphviz \
-    h5py \
-    partd \
-    psutil \
-    "pytest<=3.1.1" \
-    scikit-image \
-    scikit-learn \
-    sqlalchemy \
-    toolz
-
-pip install -q git+https://github.com/dask/fastparquet
-
-pip install -q \
-    cachey \
-    graphviz \
-    moto \
-    pyarrow \
-    --upgrade --no-deps
-
-pip install -q \
-    cityhash \
-    flake8 \
-    mmh3 \
-    pandas_datareader \
-    pytest-xdist \
-    xxhash \
-    pycodestyle
-
-export ARROW_BUILD_TYPE=release
-export ARROW_HOME=$(pwd)/dist
-export PARQUET_HOME=$(pwd)/dist
-CONDA_BASE=/home/ubuntu/miniconda
-export LD_LIBRARY_PATH=$(pwd)/dist/lib:${CONDA_BASE}/lib:${LD_LIBRARY_PATH}
-
-# Allow for --user Python installation inside Docker
-export HOME=$(pwd)
-
-# Clean up and get the dask master branch from github
-rm -rf dask .local
-export GIT_COMMITTER_NAME="Nobody"
-export GIT_COMMITTER_EMAIL="nobody@nowhere.com"
-git clone https://github.com/dask/dask.git
-pushd dask
-pip install --user -e .[complete]
-# Verify integrity of the installed dask dataframe code
-py.test dask/dataframe/tests/test_dataframe.py
-popd
-
-# Run the integration test
-pushd arrow/python/testing
-py.test dask_tests
-popd
-
-pushd dask/dask/dataframe/io
-py.test tests/test_parquet.py
-popd
diff --git a/dev/docker-compose.yml b/dev/docker-compose.yml
index a11b4015c5dd3..19fda7823ac25 100644
--- a/dev/docker-compose.yml
+++ b/dev/docker-compose.yml
@@ -31,18 +31,6 @@ services:
     volumes:
       - ../..:/apache-arrow
 
-  spark_integration:
-    build:
-      context: spark_integration
-    volumes:
-      - ../..:/apache-arrow
-
-  dask_integration:
-    build:
-      context: dask_integration
-    volumes:
-      - ../..:/apache-arrow
-
   gen_apidocs:
     build:
       context: ..
@@ -50,12 +38,6 @@ services:
     volumes:
      - ..:/arrow
 
-  iwyu:
-    build:
-      context: iwyu
-    volumes:
-     - ../..:/apache-arrow
-
   run_site:
     build:
       context: run_site
diff --git a/dev/gen_apidocs/create_documents.sh b/dev/gen_apidocs/create_documents.sh
index 6a3b06578829a..ee8f8c864d225 100755
--- a/dev/gen_apidocs/create_documents.sh
+++ b/dev/gen_apidocs/create_documents.sh
@@ -87,15 +87,6 @@ rsync -r doc/parquet-glib/html/ ../../site/asf-site/docs/c_glib/parquet-glib
 popd
 popd
 
-# Now Python documentation can be built
-pushd arrow/python
-python setup.py build_ext --build-type=$ARROW_BUILD_TYPE \
-    --with-plasma --with-parquet --inplace
-python setup.py build_sphinx -s doc/source
-mkdir -p ../site/asf-site/docs/python
-rsync -r doc/_build/html/ ../site/asf-site/docs/python
-popd
-
 # Make C++ documentation
 pushd arrow/cpp/apidoc
 rm -rf html/*
diff --git a/dev/lint/Dockerfile b/dev/lint/Dockerfile
index 71d7ec85a8e2e..c7901e1f90e32 100644
--- a/dev/lint/Dockerfile
+++ b/dev/lint/Dockerfile
@@ -24,7 +24,7 @@ RUN apt-get install -y -q \
       clang-tidy \
       iwyu
 
-RUN conda install -c conda-forge flake8 && \
+RUN conda install flake8 && \
     conda clean --all -y
 
 # https://bugs.launchpad.net/ubuntu/+source/iwyu/+bug/1769334
diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py
index 8539d5d3401fd..5a926f5f6d17a 100755
--- a/dev/merge_arrow_pr.py
+++ b/dev/merge_arrow_pr.py
@@ -24,8 +24,17 @@
 # This utility assumes you already have a local Arrow git clone and that you
 # have added remotes corresponding to both (i) the Github Apache Arrow mirror
 # and (ii) the apache git repo.
+#
+# There are several pieces of authorization possibly needed via environment
+# variables
+#
+# JIRA_USERNAME: your Apache JIRA id
+# JIRA_PASSWORD: your Apache JIRA password
+# ARROW_GITHUB_API_TOKEN: a GitHub API token to use for API requests (to avoid
+# rate limiting)
 
 import os
+import pprint
 import re
 import subprocess
 import sys
@@ -38,8 +47,8 @@
 try:
     import jira.client
 except ImportError:
-    print("Could not find jira-python library. "
-          "Run 'sudo pip install jira-python' to install.")
+    print("Could not find jira library. "
+          "Run 'sudo pip install jira' to install.")
     print("Exiting without trying to close the associated JIRA.")
     sys.exit(1)
 
@@ -48,12 +57,10 @@
 BRANCH_PREFIX = "PR_TOOL"
 JIRA_API_BASE = "https://issues.apache.org/jira"
 
-
-def get_json(url):
-    req = requests.get(url)
+def get_json(url, headers=None):
+    req = requests.get(url, headers=headers)
     return req.json()
 
-
 def run_cmd(cmd):
     if isinstance(cmd, six.string_types):
         cmd = cmd.split(' ')
@@ -192,8 +199,15 @@ def __init__(self, project_name):
         self.github_api = ("https://api.github.com/repos/apache/{0}"
                            .format(project_name))
 
+        token = os.environ.get('ARROW_GITHUB_API_TOKEN', None)
+        if token:
+            self.headers = {'Authorization': 'token {0}'.format(token)}
+        else:
+            self.headers = None
+
     def get_pr_data(self, number):
-        return get_json("%s/pulls/%s" % (self.github_api, number))
+        return get_json("%s/pulls/%s" % (self.github_api, number),
+                        headers=self.headers)
 
 
 class CommandInput(object):
@@ -225,13 +239,16 @@ def __init__(self, cmd, github_api, git_remote, jira_con, number):
         self.con = jira_con
         self.number = number
         self._pr_data = github_api.get_pr_data(number)
-        self.url = self._pr_data["url"]
-        self.title = self._pr_data["title"]
-
-        self.body = self._pr_data["body"]
-        self.target_ref = self._pr_data["base"]["ref"]
-        self.user_login = self._pr_data["user"]["login"]
-        self.base_ref = self._pr_data["head"]["ref"]
+        try:
+            self.url = self._pr_data["url"]
+            self.title = self._pr_data["title"]
+            self.body = self._pr_data["body"]
+            self.target_ref = self._pr_data["base"]["ref"]
+            self.user_login = self._pr_data["user"]["login"]
+            self.base_ref = self._pr_data["head"]["ref"]
+        except KeyError:
+            pprint.pprint(self._pr_data)
+            raise
         self.description = "%s/%s" % (self.user_login, self.base_ref)
 
         self.jira_issue = self._get_jira()
@@ -435,4 +452,4 @@ def get_version_json(version_str):
     try:
         cli()
     except Exception as e:
-        print(e.args[0])
+        raise
diff --git a/dev/release/00-prepare.sh b/dev/release/00-prepare.sh
index 9282cbfd2771d..bfcfc83825499 100755
--- a/dev/release/00-prepare.sh
+++ b/dev/release/00-prepare.sh
@@ -21,10 +21,107 @@ set -e
 
 SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
+update_versions() {
+  local base_version=$1
+  local next_version=$2
+  local type=$3
+
+  case ${type} in
+    release)
+      local version=${base_version}
+      local r_version=${base_version}
+      ;;
+    snapshot)
+      local version=${next_version}-SNAPSHOT
+      local r_version=${base_version}.9000
+      ;;
+  esac
+
+  cd "${SOURCE_DIR}/../../cpp"
+  sed -i.bak -E -e \
+    "s/^set\(ARROW_VERSION \".+\"\)/set(ARROW_VERSION \"${version}\")/" \
+    CMakeLists.txt
+  rm -f CMakeLists.txt.bak
+  git add CMakeLists.txt
+  cd -
+
+  cd "${SOURCE_DIR}/../../c_glib"
+  sed -i.bak -E -e \
+    "s/^m4_define\(\[arrow_glib_version\], .+\)/m4_define([arrow_glib_version], ${version})/" \
+    configure.ac
+  sed -i.bak -E -e \
+    "s/^version = '.+'/version = '${version}'/" \
+    meson.build
+  rm -f configure.ac.bak meson.build.bak
+  git add configure.ac meson.build
+  cd -
+
+  # We can enable this when Arrow JS uses the same version.
+  # cd "${SOURCE_DIR}/../../js"
+  # sed -i.bak -E -e \
+  #   "s/^  \"version\": \".+\"/  \"version\": \"${version}\"/" \
+  #   package.json
+  # rm -f package.json
+  # git add package.json
+  # cd -
+
+  cd "${SOURCE_DIR}/../../matlab"
+  sed -i.bak -E -e \
+    "s/^set\(MLARROW_VERSION \".+\"\)/set(MLARROW_VERSION \"${version}\")/" \
+    CMakeLists.txt
+  rm -f CMakeLists.txt.bak
+  git add CMakeLists.txt
+  cd -
+
+  cd "${SOURCE_DIR}/../../python"
+  sed -i.bak -E -e \
+    "s/^default_version = '.+'/default_version = '${version}'/" \
+    setup.py
+  rm -f setup.py.bak
+  git add setup.py
+  cd -
+
+  cd "${SOURCE_DIR}/../../r"
+  sed -i.bak -E -e \
+    "s/^Version: .+/Version: ${r_version}/" \
+    DESCRIPTION
+  rm -f DESCRIPTION.bak
+  git add DESCRIPTION
+  cd -
+
+  cd "${SOURCE_DIR}/../../ruby"
+  sed -i.bak -E -e \
+    "s/^  VERSION = \".+\"/  VERSION = \"${version}\"/g" \
+    */*/*/version.rb
+  rm -f */*/*/version.rb.bak
+  git add */*/*/version.rb
+  cd -
+
+  cd "${SOURCE_DIR}/../../rust"
+  sed -i.bak -E -e \
+    "s/^version = \".+\"/version = \"${version}\"/g" \
+    arrow/Cargo.toml parquet/Cargo.toml
+  rm -f arrow/Cargo.toml.bak parquet/Cargo.toml.bak
+  git add arrow/Cargo.toml parquet/Cargo.toml
+
+  # Update version number for parquet README
+  sed -i.bak -E -e \
+      "s/^parquet = \".+\"/parquet = \"${version}\"/g" \
+      parquet/README.md
+  sed -i.bak -E -e \
+      "s/docs.rs\/crate\/parquet\/.+\)/docs.rs\/crate\/parquet\/${version}\)/g" \
+      parquet/README.md
+  rm -f parquet/README.md.bak
+  git add parquet/README.md
+  cd -
+}
+
 if [ "$#" -eq 2 ]; then
+  ############################## Pre-Tag Commits ##############################
+
   version=$1
-  nextVersion=$2
-  nextVersionSNAPSHOT=${nextVersion}-SNAPSHOT
+  next_version=$2
+  next_version_snapshot=${next_version}-SNAPSHOT
   tag=apache-arrow-${version}
 
   echo "Updating changelog for $version"
@@ -41,44 +138,51 @@ if [ "$#" -eq 2 ]; then
   git commit -m "[Release] Update .deb/.rpm changelogs for $version"
   cd -
 
-  echo "prepare release ${version} on tag ${tag} then reset to version ${nextVersionSNAPSHOT}"
+  echo "prepare release ${version} on tag ${tag} then reset to version ${next_version_snapshot}"
 
-  cd "${SOURCE_DIR}/../../java"
+  update_versions "${version}" "${next_version}" "release"
+  git commit -m "[Release] Update versions for ${version}"
 
+  cd "${SOURCE_DIR}/../../java"
   mvn release:clean
-  mvn release:prepare -Dtag=${tag} -DreleaseVersion=${version} -DautoVersionSubmodules -DdevelopmentVersion=${nextVersionSNAPSHOT}
-
+  mvn release:prepare -Dtag=${tag} -DreleaseVersion=${version} -DautoVersionSubmodules -DdevelopmentVersion=${next_version_snapshot}
   cd -
 
-  echo "Updating .deb package names for $nextVersion"
-  deb_lib_suffix=$(echo $version | sed -r -e 's/^[0-9]+\.([0-9]+)\.[0-9]+$/\1/')
-  next_deb_lib_suffix=$(echo $nextVersion | sed -r -e 's/^[0-9]+\.([0-9]+)\.[0-9]+$/\1/')
+  ############################## Post-Tag Commits #############################
+
+  echo "Updating versions for ${next_version_snapshot}"
+  update_versions "${version}" "${next_version}" "snapshot"
+  git commit -m "[Release] Update versions for ${next_version_snapshot}"
+
+  echo "Updating .deb package names for ${next_version}"
+  deb_lib_suffix=$(echo $version | sed -E -e 's/^[0-9]+\.([0-9]+)\.[0-9]+$/\1/')
+  next_deb_lib_suffix=$(echo $next_version | sed -E -e 's/^[0-9]+\.([0-9]+)\.[0-9]+$/\1/')
   cd $SOURCE_DIR/../tasks/linux-packages/
   for target in debian*/lib*${deb_lib_suffix}.install; do
     git mv \
       ${target} \
       $(echo $target | sed -e "s/${deb_lib_suffix}/${next_deb_lib_suffix}/")
   done
-  deb_lib_suffix_substitute_pattern="s/(lib(arrow|parquet)[-a-z]*)${deb_lib_suffix}/\\1${next_deb_lib_suffix}/g"
-  sed -i.bak -r -e "${deb_lib_suffix_substitute_pattern}" debian*/control
+  deb_lib_suffix_substitute_pattern="s/(lib(arrow|gandiva|parquet|plasma)[-a-z]*)${deb_lib_suffix}/\\1${next_deb_lib_suffix}/g"
+  sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" debian*/control
   rm -f debian*/control.bak
   git add debian*/control
   cd -
   cd $SOURCE_DIR/../tasks/
-  sed -i.bak -r -e "${deb_lib_suffix_substitute_pattern}" tasks.yml
+  sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" tasks.yml
   rm -f tasks.yml.bak
   git add tasks.yml
   cd -
   cd $SOURCE_DIR
-  sed -i.bak -r -e "${deb_lib_suffix_substitute_pattern}" rat_exclude_files.txt
+  sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" rat_exclude_files.txt
   rm -f rat_exclude_files.txt.bak
   git add rat_exclude_files.txt
-  git commit -m "[Release] Update .deb package names for $nextVersion"
+  git commit -m "[Release] Update .deb package names for $next_version"
   cd -
 
   echo "Finish staging binary artifacts by running: sh dev/release/01-perform.sh"
 
 else
-  echo "Usage: $0 <version> <nextVersion>"
+  echo "Usage: $0 <version> <next_version>"
   exit
 fi
diff --git a/dev/release/02-source.sh b/dev/release/02-source.sh
index e224584223b4c..85dee3302e917 100755
--- a/dev/release/02-source.sh
+++ b/dev/release/02-source.sh
@@ -45,46 +45,28 @@ echo "Using commit $release_hash"
 
 tarball=${tag}.tar.gz
 
-extract_dir=tmp-apache-arrow
-rm -rf ${extract_dir}
+archive_name=tmp-apache-arrow
 # be conservative and use the release hash, even though git produces the same
 # archive (identical hashes) using the scm tag
-git archive ${release_hash} --prefix ${extract_dir}/ | tar xf -
-
-# build Apache Arrow C++ before building Apache Arrow GLib because
-# Apache Arrow GLib requires Apache Arrow C++.
-mkdir -p ${extract_dir}/cpp/build
-cpp_install_dir=${PWD}/${extract_dir}/cpp/install
-cd ${extract_dir}/cpp/build
-cmake .. \
-  -DCMAKE_INSTALL_PREFIX=${cpp_install_dir} \
-  -DCMAKE_INSTALL_LIBDIR=${cpp_install_dir}/lib \
-  -DARROW_BUILD_TESTS=no \
-  -DARROW_PARQUET=yes
-make -j8
-make install
-cd -
-
-# build source archive for Apache Arrow GLib by "make dist".
-cd ${extract_dir}/c_glib
-./autogen.sh
-./configure \
-  PKG_CONFIG_PATH=$cpp_install_dir/lib/pkgconfig \
-  --enable-gtk-doc
-LD_LIBRARY_PATH=$cpp_install_dir/lib:$LD_LIBRARY_PATH make -j8
-make dist
-tar xzf *.tar.gz
-rm *.tar.gz
-cd -
-rm -rf tmp-c_glib/
-mv ${extract_dir}/c_glib/apache-arrow-glib-* tmp-c_glib/
-rm -rf ${extract_dir}
+git archive ${release_hash} --prefix ${archive_name}/ > ${archive_name}.tar.gz
+
+dist_c_glib_tar_gz=c_glib.tar.gz
+docker_image_name=apache-arrow/release-source
+DEBUG=yes docker build -t ${docker_image_name} ${SOURCE_DIR}/source
+docker \
+  run \
+  --rm \
+  --interactive \
+  --volume "$PWD":/host \
+  ${docker_image_name} \
+  /build.sh ${archive_name} ${dist_c_glib_tar_gz}
 
 # replace c_glib/ by tar.gz generated by "make dist"
 rm -rf ${tag}
 git archive $release_hash --prefix ${tag}/ | tar xf -
 rm -rf ${tag}/c_glib
-mv tmp-c_glib ${tag}/c_glib
+tar xf ${dist_c_glib_tar_gz} -C ${tag}
+rm -f ${dist_c_glib_tar_gz}
 
 # Create new tarball from modified source directory
 tar czhf ${tarball} ${tag}
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 13918d55fca87..4866ec2aa3c30 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -13,18 +13,10 @@ cpp/src/arrow/io/mman.h
 cpp/src/arrow/util/random.h
 cpp/src/arrow/status.cc
 cpp/src/arrow/status.h
-cpp/src/arrow/util/string_view/string_view.hpp
-cpp/src/arrow/util/variant.h
-cpp/src/arrow/util/variant/optional.h
-cpp/src/arrow/util/variant/recursive_wrapper.h
-cpp/src/arrow/util/variant/variant_cast.h
-cpp/src/arrow/util/variant/variant_io.h
-cpp/src/arrow/util/variant/variant_visitor.h
-cpp/src/arrow/util/xxhash/xxhash.c
-cpp/src/arrow/util/xxhash/xxhash.h
+cpp/src/arrow/vendored/*
 cpp/build-support/asan_symbolize.py
 cpp/build-support/cpplint.py
-cpp/build-support/clang_format_exclusions.txt
+cpp/build-support/lint_exclusions.txt
 cpp/build-support/iwyu/*
 cpp/cmake_modules/BuildUtils.cmake
 cpp/cmake_modules/FindPythonLibsNew.cmake
@@ -48,54 +40,82 @@ dev/tasks/linux-packages/debian.ubuntu-trusty/compat
 dev/tasks/linux-packages/debian.ubuntu-trusty/control
 dev/tasks/linux-packages/debian.ubuntu-trusty/gir1.2-arrow-1.0.install
 dev/tasks/linux-packages/debian.ubuntu-trusty/gir1.2-parquet-1.0.install
+dev/tasks/linux-packages/debian.ubuntu-trusty/gir1.2-plasma-1.0.install
 dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow-dev.install
 dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow-glib-dev.install
 dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow-glib-doc.doc-base
 dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow-glib-doc.install
 dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow-glib-doc.links
-dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow-glib12.install
-dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow-python12.install
-dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow12.install
+dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow-glib13.install
+dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow-python13.install
+dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow13.install
 dev/tasks/linux-packages/debian.ubuntu-trusty/libparquet-dev.install
 dev/tasks/linux-packages/debian.ubuntu-trusty/libparquet-glib-dev.install
 dev/tasks/linux-packages/debian.ubuntu-trusty/libparquet-glib-doc.doc-base
 dev/tasks/linux-packages/debian.ubuntu-trusty/libparquet-glib-doc.install
 dev/tasks/linux-packages/debian.ubuntu-trusty/libparquet-glib-doc.links
-dev/tasks/linux-packages/debian.ubuntu-trusty/libparquet-glib12.install
-dev/tasks/linux-packages/debian.ubuntu-trusty/libparquet12.install
+dev/tasks/linux-packages/debian.ubuntu-trusty/libparquet-glib13.install
+dev/tasks/linux-packages/debian.ubuntu-trusty/libparquet13.install
+dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-dev.install
+dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib-dev.install
+dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib-doc.doc-base
+dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib-doc.install
+dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib-doc.links
+dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib13.install
+dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma13.install
 dev/tasks/linux-packages/debian.ubuntu-trusty/patches/series
+dev/tasks/linux-packages/debian.ubuntu-trusty/plasma-store-server.install
 dev/tasks/linux-packages/debian.ubuntu-trusty/rules
 dev/tasks/linux-packages/debian.ubuntu-trusty/source/format
 dev/tasks/linux-packages/debian.ubuntu-trusty/watch
 dev/tasks/linux-packages/debian/compat
 dev/tasks/linux-packages/debian/control
 dev/tasks/linux-packages/debian/gir1.2-arrow-1.0.install
-dev/tasks/linux-packages/debian/gir1.2-arrow-gpu-1.0.install
+dev/tasks/linux-packages/debian/gir1.2-arrow-cuda-1.0.install
+dev/tasks/linux-packages/debian/gir1.2-gandiva-1.0.install
 dev/tasks/linux-packages/debian/gir1.2-parquet-1.0.install
+dev/tasks/linux-packages/debian/gir1.2-plasma-1.0.install
 dev/tasks/linux-packages/debian/libarrow-dev.install
 dev/tasks/linux-packages/debian/libarrow-glib-dev.install
 dev/tasks/linux-packages/debian/libarrow-glib-doc.doc-base
 dev/tasks/linux-packages/debian/libarrow-glib-doc.install
 dev/tasks/linux-packages/debian/libarrow-glib-doc.links
-dev/tasks/linux-packages/debian/libarrow-glib12.install
-dev/tasks/linux-packages/debian/libarrow-gpu-dev.install
-dev/tasks/linux-packages/debian/libarrow-gpu-glib-dev.install
-dev/tasks/linux-packages/debian/libarrow-gpu-glib12.install
-dev/tasks/linux-packages/debian/libarrow-gpu12.install
+dev/tasks/linux-packages/debian/libarrow-glib13.install
+dev/tasks/linux-packages/debian/libarrow-cuda-dev.install
+dev/tasks/linux-packages/debian/libarrow-cuda-glib-dev.install
+dev/tasks/linux-packages/debian/libarrow-cuda-glib13.install
+dev/tasks/linux-packages/debian/libarrow-cuda13.install
 dev/tasks/linux-packages/debian/libarrow-python-dev.install
-dev/tasks/linux-packages/debian/libarrow-python12.install
-dev/tasks/linux-packages/debian/libarrow12.install
+dev/tasks/linux-packages/debian/libarrow-python13.install
+dev/tasks/linux-packages/debian/libarrow13.install
+dev/tasks/linux-packages/debian/libgandiva-dev.install
+dev/tasks/linux-packages/debian/libgandiva-glib-dev.install
+dev/tasks/linux-packages/debian/libgandiva-glib-doc.doc-base
+dev/tasks/linux-packages/debian/libgandiva-glib-doc.install
+dev/tasks/linux-packages/debian/libgandiva-glib-doc.links
+dev/tasks/linux-packages/debian/libgandiva-glib13.install
+dev/tasks/linux-packages/debian/libgandiva13.install
 dev/tasks/linux-packages/debian/libparquet-dev.install
 dev/tasks/linux-packages/debian/libparquet-glib-dev.install
 dev/tasks/linux-packages/debian/libparquet-glib-doc.doc-base
 dev/tasks/linux-packages/debian/libparquet-glib-doc.install
 dev/tasks/linux-packages/debian/libparquet-glib-doc.links
-dev/tasks/linux-packages/debian/libparquet-glib12.install
-dev/tasks/linux-packages/debian/libparquet12.install
+dev/tasks/linux-packages/debian/libparquet-glib13.install
+dev/tasks/linux-packages/debian/libparquet13.install
+dev/tasks/linux-packages/debian/libplasma-dev.install
+dev/tasks/linux-packages/debian/libplasma-glib-dev.install
+dev/tasks/linux-packages/debian/libplasma-glib-doc.doc-base
+dev/tasks/linux-packages/debian/libplasma-glib-doc.install
+dev/tasks/linux-packages/debian/libplasma-glib-doc.links
+dev/tasks/linux-packages/debian/libplasma-glib13.install
+dev/tasks/linux-packages/debian/libplasma13.install
 dev/tasks/linux-packages/debian/patches/series
+dev/tasks/linux-packages/debian/plasma-store-server.install
 dev/tasks/linux-packages/debian/rules
 dev/tasks/linux-packages/debian/source/format
 dev/tasks/linux-packages/debian/watch
+dev/tasks/conda-recipes/variants/*.yaml
+docs/requirements.txt
 go/arrow/go.sum
 go/arrow/Gopkg.lock
 go/arrow/internal/cpu/*
@@ -106,11 +126,12 @@ js/.npmignore
 js/closure-compiler-scripts/*
 python/cmake_modules
 python/cmake_modules/*
-python/doc/requirements.txt
 python/MANIFEST.in
 python/pyarrow/includes/__init__.pxd
 python/pyarrow/tests/__init__.py
 python/requirements.txt
+python/requirements-test.txt
+python/requirements-wheel.txt
 pax_global_header
 MANIFEST.in
 __init__.pxd
@@ -130,9 +151,15 @@ c_glib/config/ltmain.sh
 c_glib/doc/arrow-glib/arrow-glib.types
 c_glib/doc/arrow-glib/arrow-glib-sections.txt
 c_glib/doc/arrow-glib/arrow-glib-overrides.txt
+c_glib/doc/gandiva-glib/gandiva-glib.types
+c_glib/doc/gandiva-glib/gandiva-glib-sections.txt
+c_glib/doc/gandiva-glib/gandiva-glib-overrides.txt
 c_glib/doc/parquet-glib/parquet-glib.types
 c_glib/doc/parquet-glib/parquet-glib-sections.txt
 c_glib/doc/parquet-glib/parquet-glib-overrides.txt
+c_glib/doc/plasma-glib/plasma-glib.types
+c_glib/doc/plasma-glib/plasma-glib-sections.txt
+c_glib/doc/plasma-glib/plasma-glib-overrides.txt
 c_glib/gtk-doc.make
 csharp/.gitattributes
 csharp/src/Apache.Arrow/Flatbuf/*
@@ -160,4 +187,6 @@ r/README.md
 r/README.Rmd
 r/man/*.Rd
 .gitattributes
-rust/test/data/*.csv
\ No newline at end of file
+ruby/red-arrow/.yardopts
+rust/arrow/test/data/*.csv
+rust/rust-toolchain
diff --git a/dev/release/run-rat.sh b/dev/release/run-rat.sh
index 53a322a969718..587e93af4622d 100755
--- a/dev/release/run-rat.sh
+++ b/dev/release/run-rat.sh
@@ -18,10 +18,14 @@
 # under the License.
 #
 
+RAT_VERSION=0.12
+
 # download apache rat
-curl -s https://repo1.maven.org/maven2/org/apache/rat/apache-rat/0.12/apache-rat-0.12.jar > apache-rat-0.12.jar
+if [ ! -f apache-rat-${RAT_VERSION}.jar ]; then
+  curl -s https://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar > apache-rat-${RAT_VERSION}.jar
+fi
 
-RAT="java -jar apache-rat-0.12.jar -x "
+RAT="java -jar apache-rat-${RAT_VERSION}.jar -x "
 
 RELEASE_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
 
diff --git a/dev/release/source/Dockerfile b/dev/release/source/Dockerfile
new file mode 100644
index 0000000000000..70ed8aa866dd0
--- /dev/null
+++ b/dev/release/source/Dockerfile
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM ubuntu:18.04
+
+ENV DEBIAN_FRONTEND noninteractive
+
+RUN \
+  apt update && \
+  apt install -y -V \
+    autoconf-archive \
+    bison \
+    clang-6.0 \
+    cmake \
+    flex \
+    g++ \
+    gcc \
+    gtk-doc-tools \
+    libboost-filesystem-dev \
+    libboost-regex-dev \
+    libboost-system-dev \
+    libgirepository1.0-dev \
+    libglib2.0-doc \
+    libprotobuf-dev \
+    libprotoc-dev \
+    libtool \
+    lsb-release \
+    make \
+    pkg-config \
+    protobuf-compiler && \
+  apt clean && \
+  rm -rf /var/lib/apt/lists/*
+
+COPY build.sh /build.sh
diff --git a/dev/release/source/build.sh b/dev/release/source/build.sh
new file mode 100755
index 0000000000000..25775fdc3e813
--- /dev/null
+++ b/dev/release/source/build.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+archive_name=$1
+dist_c_glib_tar_gz=$2
+
+tar xf /host/${archive_name}.tar.gz
+
+# build Apache Arrow C++ before building Apache Arrow GLib because
+# Apache Arrow GLib requires Apache Arrow C++.
+mkdir -p ${archive_name}/cpp/build
+cpp_install_dir=${PWD}/${archive_name}/cpp/install
+cd ${archive_name}/cpp/build
+cmake .. \
+  -DCMAKE_INSTALL_PREFIX=${cpp_install_dir} \
+  -DCMAKE_INSTALL_LIBDIR=lib \
+  -DARROW_PLASMA=yes \
+  -DARROW_GANDIVA=yes \
+  -DARROW_PARQUET=yes
+make -j8
+make install
+cd -
+
+# build source archive for Apache Arrow GLib by "make dist".
+cd ${archive_name}/c_glib
+./autogen.sh
+./configure \
+  PKG_CONFIG_PATH=${cpp_install_dir}/lib/pkgconfig \
+  --enable-gtk-doc
+LD_LIBRARY_PATH=${cpp_install_dir}/lib make -j8
+make dist
+tar xzf *.tar.gz
+rm *.tar.gz
+cd -
+mv ${archive_name}/c_glib/apache-arrow-glib-* c_glib/
+tar czf /host/${dist_c_glib_tar_gz} c_glib
diff --git a/dev/release/verify-release-candidate.bat b/dev/release/verify-release-candidate.bat
index cc25b045dce47..f5f9e964231b6 100644
--- a/dev/release/verify-release-candidate.bat
+++ b/dev/release/verify-release-candidate.bat
@@ -46,12 +46,11 @@ call conda create -p %_VERIFICATION_CONDA_ENV% -f -q -y python=%PYTHON% || exit
 call activate %_VERIFICATION_CONDA_ENV% || exit /B
 
 call conda install -y ^
-      six pytest setuptools numpy pandas cython ^
-      thrift-cpp flatbuffers rapidjson ^
-      cmake ^
-      git ^
-      boost-cpp ^
-      snappy zlib brotli gflags lz4-c zstd -c conda-forge || exit /B
+     python=3.7 ^
+     git ^
+     --file=ci\conda_env_cpp.yml ^
+     --file=ci\conda_env_python.yml ^
+     -c conda-forge || exit /B
 
 set GENERATOR=Visual Studio 14 2015 Win64
 set CONFIGURATION=release
@@ -74,6 +73,7 @@ call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tool
 cmake -G "%GENERATOR%" ^
       -DCMAKE_INSTALL_PREFIX=%ARROW_HOME% ^
       -DARROW_BOOST_USE_SHARED=ON ^
+      -DARROW_BUILD_TESTS=ON ^
       -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
       -DARROW_CXXFLAGS="/MP" ^
       -DARROW_PYTHON=ON ^
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 5b666630d17a0..3694c867dcb20 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -51,10 +51,10 @@ HERE=$(cd `dirname "${BASH_SOURCE[0]:-$0}"` && pwd)
 
 ARROW_DIST_URL='https://dist.apache.org/repos/dist/dev/arrow'
 
-: ${ARROW_HAVE_GPU:=}
-if [ -z "$ARROW_HAVE_GPU" ]; then
+: ${ARROW_HAVE_CUDA:=}
+if [ -z "$ARROW_HAVE_CUDA" ]; then
   if nvidia-smi --list-gpus 2>&1 > /dev/null; then
-    ARROW_HAVE_GPU=yes
+    ARROW_HAVE_CUDA=yes
   fi
 fi
 
@@ -87,24 +87,51 @@ fetch_archive() {
   shasum -a 512 -c ${dist_name}.tar.gz.sha512
 }
 
+bintray() {
+  local command=$1
+  shift
+  local path=$1
+  shift
+  local url=https://bintray.com/api/v1${path}
+  echo "${command} ${url}" 1>&2
+  curl \
+    --fail \
+    --request ${command} \
+    ${url} \
+    "$@" | \
+      jq .
+}
+
+download_bintray_files() {
+  local target=$1
+
+  local version_name=${VERSION}-rc${RC_NUMBER}
+
+  bintray \
+    GET /packages/${BINTRAY_REPOSITORY}/${target}-rc/versions/${version_name}/files | \
+      jq -r ".[].path" | \
+      while read file; do
+    mkdir -p "$(dirname ${file})"
+    curl \
+      --fail \
+      --location \
+      --output ${file} \
+      https://dl.bintray.com/${BINTRAY_REPOSITORY}/${file}
+  done
+}
+
 verify_binary_artifacts() {
-  # --show-progress not supported on wget < 1.16
-  wget --help | grep -q '\--show-progress' && \
-      _WGET_PROGRESS_OPT="-q --show-progress" || _WGET_PROGRESS_OPT=""
-
-  # download the binaries folder for the current RC
-  rcname=apache-arrow-${VERSION}-rc${RC_NUMBER}
-  wget -P "$rcname" \
-    --quiet \
-    --no-host-directories \
-    --cut-dirs=5 \
-    $_WGET_PROGRESS_OPT \
-    --no-parent \
-    --reject 'index.html*' \
-    --recursive "$ARROW_DIST_URL/$rcname/binaries/"
+  local download_dir=binaries
+  mkdir -p ${download_dir}
+  pushd ${download_dir}
+
+  # takes longer on slow network
+  for target in centos debian python ubuntu; do
+    download_bintray_files ${target}
+  done
 
   # verify the signature and the checksums of each artifact
-  find $rcname/binaries -name '*.asc' | while read sigfile; do
+  find . -name '*.asc' | while read sigfile; do
     artifact=${sigfile/.asc/}
     gpg --verify $sigfile $artifact || exit 1
 
@@ -112,10 +139,14 @@ verify_binary_artifacts() {
     # basename of the artifact
     pushd $(dirname $artifact)
     base_artifact=$(basename $artifact)
-    shasum -a 256 -c $base_artifact.sha256 || exit 1
+    if [ -f $base_artifact.sha256 ]; then
+      shasum -a 256 -c $base_artifact.sha256 || exit 1
+    fi
     shasum -a 512 -c $base_artifact.sha512 || exit 1
     popd
   done
+
+  popd
 }
 
 setup_tempdir() {
@@ -143,12 +174,13 @@ setup_miniconda() {
 
   . $MINICONDA/etc/profile.d/conda.sh
 
-  conda create -n arrow-test -y -q python=3.6 \
+  conda create -n arrow-test -y -q -c conda-forge \
+        python=3.6 \
         nomkl \
         numpy \
         pandas \
         six \
-        cython -c conda-forge
+        cython
   conda activate arrow-test
 }
 
@@ -159,18 +191,21 @@ test_and_install_cpp() {
   pushd cpp/build
 
   ARROW_CMAKE_OPTIONS="
+${ARROW_CMAKE_OPTIONS}
 -DCMAKE_INSTALL_PREFIX=$ARROW_HOME
--DCMAKE_INSTALL_LIBDIR=$ARROW_HOME/lib
+-DCMAKE_INSTALL_LIBDIR=lib
 -DARROW_PLASMA=ON
 -DARROW_ORC=ON
 -DARROW_PYTHON=ON
+-DARROW_GANDIVA=ON
 -DARROW_PARQUET=ON
 -DARROW_BOOST_USE_SHARED=ON
 -DCMAKE_BUILD_TYPE=release
+-DARROW_BUILD_TESTS=ON
 -DARROW_BUILD_BENCHMARKS=ON
 "
-  if [ "$ARROW_HAVE_GPU" = "yes" ]; then
-    ARROW_CMAKE_OPTIONS="$ARROW_CMAKE_OPTIONS -DARROW_GPU=ON"
+  if [ "$ARROW_HAVE_CUDA" = "yes" ]; then
+    ARROW_CMAKE_OPTIONS="$ARROW_CMAKE_OPTIONS -DARROW_CUDA=ON"
   fi
   cmake $ARROW_CMAKE_OPTIONS ..
 
@@ -189,7 +224,7 @@ test_and_install_cpp() {
 test_python() {
   pushd python
 
-  pip install -r requirements.txt
+  pip install -r requirements.txt -r requirements-test.txt
 
   python setup.py build_ext --inplace --with-parquet --with-plasma
   py.test pyarrow -v --pdb
@@ -211,8 +246,6 @@ test_glib() {
     gem install bundler
   fi
 
-  # Workaround for 0.11.0. 0.11.0 doesn't include c_glib/Gemfile.
-  wget https://raw.githubusercontent.com/apache/arrow/master/c_glib/Gemfile
   bundle install --path vendor/bundle
   bundle exec ruby test/run-test.rb
 
@@ -240,17 +273,17 @@ test_js() {
 test_ruby() {
   pushd ruby
 
-  pushd red-arrow
-  bundle install --path vendor/bundle
-  bundle exec ruby test/run-test.rb
-  popd
+  local modules="red-arrow red-plasma red-gandiva red-parquet"
+  if [ "${ARROW_HAVE_CUDA}" = "yes" ]; then
+    modules="${modules} red-arrow-cuda"
+  fi
 
-  if [ "$ARROW_HAVE_GPU" = "yes" ]; then
-    pushd red-arrow-gpu
+  for module in ${modules}; do
+    pushd ${module}
     bundle install --path vendor/bundle
     bundle exec ruby test/run-test.rb
     popd
-  fi
+  done
 
   popd
 }
@@ -276,9 +309,7 @@ test_rust() {
   cargo fmt --all -- --check
   # raises on any warnings
 
-  cargo rustc -- -D warnings
-
-  cargo build
+  RUSTFLAGS="-D warnings" cargo build
   cargo test
 
   popd
@@ -331,21 +362,58 @@ if [ "$ARTIFACT" == "source" ]; then
   TARBALL=apache-arrow-$1.tar.gz
   DIST_NAME="apache-arrow-${VERSION}"
 
+  # By default test all functionalities.
+  # To deactivate one test, deactivate the test and all of its dependents
+  # To explicitly select one test, set TEST_DEFAULT=0 TEST_X=1
+  : ${TEST_DEFAULT:=1}
+  : ${TEST_JAVA:=${TEST_DEFAULT}}
+  : ${TEST_CPP:=${TEST_DEFAULT}}
+  : ${TEST_GLIB:=${TEST_DEFAULT}}
+  : ${TEST_RUBY:=${TEST_DEFAULT}}
+  : ${TEST_PYTHON:=${TEST_DEFAULT}}
+  : ${TEST_JS:=${TEST_DEFAULT}}
+  : ${TEST_INTEGRATION:=${TEST_DEFAULT}}
+  : ${TEST_RUST:=${TEST_DEFAULT}}
+
+  # Automatically test if its activated by a dependent
+  TEST_GLIB=$((${TEST_GLIB} + ${TEST_RUBY}))
+  TEST_PYTHON=$((${TEST_PYTHON} + ${TEST_INTEGRATION}))
+  TEST_CPP=$((${TEST_CPP} + ${TEST_GLIB} + ${TEST_PYTHON}))
+  TEST_JAVA=$((${TEST_JAVA} + ${TEST_INTEGRATION}))
+  TEST_JS=$((${TEST_JS} + ${TEST_INTEGRATION}))
+
   fetch_archive $DIST_NAME
   tar xvzf ${DIST_NAME}.tar.gz
   cd ${DIST_NAME}
 
-  test_package_java
-  setup_miniconda
-  test_and_install_cpp
-  test_python
-  test_glib
-  test_ruby
-  test_js
-  test_integration
-  test_rust
+  if [ ${TEST_JAVA} -gt 0 ]; then
+    test_package_java
+  fi
+  if [ ${TEST_CPP} -gt 0 ]; then
+    setup_miniconda
+    test_and_install_cpp
+  fi
+  if [ ${TEST_PYTHON} -gt 0 ]; then
+    test_python
+  fi
+  if [ ${TEST_GLIB} -gt 0 ]; then
+    test_glib
+  fi
+  if [ ${TEST_RUBY} -gt 0 ]; then
+    test_ruby
+  fi
+  if [ ${TEST_JS} -gt 0 ]; then
+    test_js
+  fi
+  if [ ${TEST_INTEGRATION} -gt 0 ]; then
+    test_integration
+  fi
+  if [ ${TEST_RUST} -gt 0 ]; then
+    test_rust
+  fi
 else
-  # takes longer on slow network
+  : ${BINTRAY_REPOSITORY:=apache/arrow}
+
   verify_binary_artifacts
 fi
 
diff --git a/dev/spark_integration/Dockerfile b/dev/spark_integration/Dockerfile
deleted file mode 100644
index 84e353a9b5096..0000000000000
--- a/dev/spark_integration/Dockerfile
+++ /dev/null
@@ -1,67 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-FROM maven:3.5.2-jdk-8-slim
-
-# Basic OS utilities
-RUN apt-get update \
- && apt-get install -y \
-      wget \
-      git \
-      pkg-config \
-      build-essential \
-      software-properties-common \
- && apt-get clean
-
-# install conda in /home/ubuntu/miniconda
-RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O conda.sh \
-  && /bin/bash conda.sh -b -p /opt/conda \
-  && rm conda.sh
-
-ENV PATH="/opt/conda/bin:${PATH}"
-
-RUN conda create -y -q -c conda-forge -n pyarrow-dev \
-      python=2.7 \
-      ipython \
-      nomkl \
-      numpy \
-      six \
-      setuptools \
-      cython \
-      pandas \
-      pytest \
-      cmake \
-      flatbuffers \
-      rapidjson \
-      boost-cpp \
-      thrift-cpp \
-      snappy \
-      zlib \
-      gflags \
-      brotli \
-      lz4-c \
-      zstd \
-      setuptools \
-      setuptools_scm \
- && conda clean --all
-
-ADD . /apache-arrow
-WORKDIR /apache-arrow
-
-CMD arrow/dev/spark_integration/spark_integration.sh
-
-# BUILD: $ docker build -f arrow/dev/spark_integration/Dockerfile -t spark-arrow .
-# RUN:   $ docker run -v $HOME/.m2:/root/.m2 spark-arrow
diff --git a/dev/spark_integration/spark_integration.sh b/dev/spark_integration/spark_integration.sh
deleted file mode 100755
index 1f6a2733385f4..0000000000000
--- a/dev/spark_integration/spark_integration.sh
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Exit on any error
-set -e
-
-# Set up environment and working directory
-cd /apache-arrow
-
-# Activate our pyarrow-dev conda env
-conda activate pyarrow-dev
-
-export ARROW_HOME=$(pwd)/arrow
-export ARROW_BUILD_TYPE=release
-export ARROW_BUILD_TOOLCHAIN=$CONDA_PREFIX
-export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
-export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m"
-
-# Build Arrow C++
-pushd arrow/cpp
-rm -rf build/*
-mkdir -p build
-cd build/
-cmake -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -DARROW_PYTHON=on -DARROW_HDFS=on -DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE -DCMAKE_INSTALL_PREFIX=$ARROW_HOME ..
-make -j4
-make install
-popd
-
-# Build pyarrow and install inplace
-export PYARROW_CXXFLAGS="-D_GLIBCXX_USE_CXX11_ABI=0"
-pushd arrow/python
-python setup.py clean
-python setup.py build_ext --build-type=$ARROW_BUILD_TYPE install
-popd
-
-# Install Arrow to local maven repo and get the version
-pushd arrow/java
-echo "Building and installing Arrow Java"
-mvn -DskipTests -Drat.skip=true clean install
-ARROW_VERSION=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | sed -n -e '/^\[.*\]/ !{ /^[0-9]/ { p; q } }'`
-echo "Using Arrow version $ARROW_VERSION"
-popd
-
-# Build Spark with Arrow
-SPARK_REPO=git://git.apache.org/spark.git
-SPARK_BRANCH=master
-
-# Get the Spark repo if not in image already
-if [ ! -d "$(pwd)/spark" ]; then
-    export GIT_COMMITTER_NAME="Nobody"
-    export GIT_COMMITTER_EMAIL="nobody@nowhere.com"
-    git clone "$SPARK_REPO"
-fi
-
-pushd spark
-
-# Make sure branch has no modifications
-git checkout "$SPARK_BRANCH"
-git reset --hard HEAD
-
-# Update Spark pom with the Arrow version just installed and build Spark, need package phase for pyspark
-sed -i -e "s/\(.*<arrow.version>\).*\(<\/arrow.version>\)/\1$ARROW_VERSION\2/g" ./pom.xml
-echo "Building Spark with Arrow $ARROW_VERSION"
-build/mvn -DskipTests clean package
-
-# Run Arrow related Scala tests only, NOTE: -Dtest=_NonExist_ is to enable surefire test discovery without running any tests so that Scalatest can run
-SPARK_SCALA_TESTS="org.apache.spark.sql.execution.arrow,org.apache.spark.sql.execution.vectorized.ColumnarBatchSuite,org.apache.spark.sql.execution.vectorized.ArrowColumnVectorSuite"
-echo "Testing Spark: $SPARK_SCALA_TESTS"
-# TODO: should be able to only build spark-sql tests with adding "-pl sql/core" but not currently working
-build/mvn -Dtest=none -DwildcardSuites="$SPARK_SCALA_TESTS" test
-
-# Run pyarrow related Python tests only
-SPARK_PYTHON_TESTS="ArrowTests PandasUDFTests ScalarPandasUDFTests GroupedMapPandasUDFTests GroupedAggPandasUDFTests WindowPandasUDFTests"
-echo "Testing PySpark: $SPARK_PYTHON_TESTS"
-SPARK_TESTING=1 bin/pyspark pyspark.sql.tests $SPARK_PYTHON_TESTS
-popd
diff --git a/dev/tasks/conda-recipes/appveyor.yml b/dev/tasks/conda-recipes/appveyor.yml
index cdc9d97537156..8558aa2662a36 100644
--- a/dev/tasks/conda-recipes/appveyor.yml
+++ b/dev/tasks/conda-recipes/appveyor.yml
@@ -16,7 +16,7 @@
 # under the License.
 
 environment:
-  ARROW_VERSION: {{ arrow.version }}
+  ARROW_VERSION: {{ arrow.no_rc_version }}
   # regardless of the python version we build against
   CONDA_INSTALL_LOCN: C:\Miniconda36-x64
 
@@ -35,25 +35,26 @@ install:
 
   - cmd: set PYTHONUNBUFFERED=1
 
-  # Add our channels.
-  - cmd: conda.exe config --set show_channel_urls true
-  - cmd: conda.exe config --remove channels defaults
-  - cmd: conda.exe config --add channels defaults
-  - cmd: conda.exe config --add channels conda-forge
-
   # Configure the VM.
-  - cmd: conda.exe install -n root --quiet --yes conda-forge-ci-setup=1
-  - cmd: run_conda_forge_build_setup
+  - cmd: conda.exe install -n root -c conda-forge --quiet --yes conda-forge-ci-setup=2 conda-build
 
 # Skip .NET project specific build phase.
 build: off
 
 test_script:
+  # Clone arrow
   - git clone -b {{ arrow.branch }} {{ arrow.remote }} arrow || exit /B
   - git -C arrow checkout {{ arrow.head }} || exit /B
-
   - pushd arrow\dev\tasks\conda-recipes
-  - conda.exe build --output-folder . -m {{ variant_config_file }} parquet-cpp arrow-cpp pyarrow
+
+  # Configure conda
+  - cmd: setup_conda_rc .\ .\ variants\{{ config }}.yaml
+  - cmd: run_conda_forge_build_setup
+
+  # Build the recipes
+  - conda.exe build --output-folder . -m variants\{{ config }}.yaml parquet-cpp arrow-cpp pyarrow
+
+  # Rename artifacts
   - pushd win-64
   - for %%f in (*.tar.bz2) do (
       set %%g=%%~nf
diff --git a/dev/tasks/conda-recipes/arrow-cpp/build.sh b/dev/tasks/conda-recipes/arrow-cpp/build.sh
index 3ae3bed389234..3d7dbb74595de 100644
--- a/dev/tasks/conda-recipes/arrow-cpp/build.sh
+++ b/dev/tasks/conda-recipes/arrow-cpp/build.sh
@@ -40,7 +40,9 @@ cmake \
     -DARROW_PYTHON=ON \
     -DARROW_PARQUET=ON \
     -DARROW_ORC=ON \
+    -DCMAKE_AR=${AR} \
+    -DCMAKE_RANLIB=${RANLIB} \
+    -GNinja \
     ..
 
-make -j${CPU_COUNT}
-make install
+ninja install
diff --git a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
index 725fd2291e75a..92c2e6b7eeee3 100644
--- a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
+++ b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
@@ -33,6 +33,7 @@ requirements:
   build:
     - cmake
     - autoconf  # [unix]
+    - ninja
     - {{ compiler('c') }}
     - {{ compiler('cxx') }}
   host:
@@ -44,6 +45,7 @@ requirements:
     - rapidjson
     - zlib
     - glog
+    - gflags
     - snappy
     - brotli
     - zstd
diff --git a/dev/tasks/conda-recipes/pyarrow/meta.yaml b/dev/tasks/conda-recipes/pyarrow/meta.yaml
index 167056ba68e9c..9f6ae79dc64d7 100644
--- a/dev/tasks/conda-recipes/pyarrow/meta.yaml
+++ b/dev/tasks/conda-recipes/pyarrow/meta.yaml
@@ -33,6 +33,9 @@ requirements:
     - {{ compiler('c') }}
     - {{ compiler('cxx') }}
   host:
+    # directly pin boost-cpp as we also seem to directly include boost symbols
+    # in the Python modules.
+    - boost-cpp
     - python
     - setuptools
     - setuptools_scm
@@ -42,6 +45,7 @@ requirements:
     - arrow-cpp {{ ARROW_VERSION }}
 
   run:
+    - boost-cpp
     - python
     - setuptools
     - {{ pin_compatible('numpy', lower_bound='1.14') }}
@@ -58,6 +62,7 @@ test:
 
   requires:
     - pytest
+    - hypothesis
 
   commands:
     - pytest --pyargs pyarrow
diff --git a/dev/tasks/conda-recipes/travis.linux.yml b/dev/tasks/conda-recipes/travis.linux.yml
index c0fc71d230a55..d07553584cb8c 100644
--- a/dev/tasks/conda-recipes/travis.linux.yml
+++ b/dev/tasks/conda-recipes/travis.linux.yml
@@ -25,7 +25,7 @@ if: tag IS blank
 env:
   global:
     - TRAVIS_TAG={{ task.tag }}
-    - ARROW_VERSION={{ arrow.version }}
+    - ARROW_VERSION={{ arrow.no_rc_version }}
     - PYTHONUNBUFFERED=1
 
 install:
@@ -39,22 +39,28 @@ install:
     curl -L -O "${MINICONDA_URL}/${MINICONDA_FILE}"
     bash $MINICONDA_FILE -b
 
-  # Configure conda.
+  # Install conda build dependency
   - |
     echo ""
     echo "Configuring conda."
     source /home/travis/miniconda3/bin/activate root
-    conda config --remove channels defaults
-    conda config --add channels defaults
-    conda config --add channels conda-forge
-    conda config --set show_channel_urls true
-    conda install --yes --quiet conda-build
+    conda install -n root -c conda-forge --quiet --yes conda-forge-ci-setup=2 conda-build
 
-script:
+before_script:
   - git clone -b {{ arrow.branch }} {{ arrow.remote }} arrow
   - git -C arrow checkout {{ arrow.head }}
   - pushd arrow/dev/tasks/conda-recipes
-  - conda build --output-folder . -m {{ variant_config_file }} parquet-cpp arrow-cpp pyarrow
+
+  # Configure conda
+  - setup_conda_rc ./ ./ variants/{{ config }}.yaml
+  - source run_conda_forge_build_setup
+
+script:
+  - |
+    conda build --croot $TRAVIS_HOME/conda_build_root \
+                --output-folder . \
+                -m variants/{{ config }}.yaml \
+                parquet-cpp arrow-cpp pyarrow
 
 deploy:
   provider: releases
diff --git a/dev/tasks/conda-recipes/travis.osx.yml b/dev/tasks/conda-recipes/travis.osx.yml
index 193539d8c9f37..99a79b84bcb66 100644
--- a/dev/tasks/conda-recipes/travis.osx.yml
+++ b/dev/tasks/conda-recipes/travis.osx.yml
@@ -16,7 +16,7 @@
 # under the License.
 
 os: osx
-osx_image: xcode6.4
+osx_image: xcode9.4
 language: generic
 
 # don't build twice
@@ -25,19 +25,9 @@ if: tag IS blank
 env:
   global:
     - TRAVIS_TAG={{ task.tag }}
-    - ARROW_VERSION={{ arrow.version }}
+    - ARROW_VERSION={{ arrow.no_rc_version }}
     - PYTHONUNBUFFERED=1
 
-before_install:
-  # Remove homebrew.
-  - |
-    echo ""
-    echo "Removing homebrew from Travis CI to avoid conflicts."
-    curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/uninstall > ~/uninstall_homebrew
-    chmod +x ~/uninstall_homebrew
-    ~/uninstall_homebrew -fq
-    rm ~/uninstall_homebrew
-
 install:
   # Install Miniconda.
   - |
@@ -47,24 +37,35 @@ install:
     MINICONDA_FILE="Miniconda3-latest-MacOSX-x86_64.sh"
     curl -L -O "${MINICONDA_URL}/${MINICONDA_FILE}"
     bash $MINICONDA_FILE -b
-
-  # Configure conda.
+  # Install conda build dependency
   - |
     echo ""
     echo "Configuring conda."
     source /Users/travis/miniconda3/bin/activate root
-    conda config --remove channels defaults
-    conda config --add channels defaults
-    conda config --add channels conda-forge
-    conda config --set show_channel_urls true
-    conda install --yes --quiet conda-forge-ci-setup=1
-    source run_conda_forge_build_setup
+    conda install -n root -c conda-forge --quiet --yes conda-forge-ci-setup=2 conda-build
 
-script:
+
+before_script:
   - git clone -b {{ arrow.branch }} {{ arrow.remote }} arrow
   - git -C arrow checkout {{ arrow.head }}
   - pushd arrow/dev/tasks/conda-recipes
-  - conda build --output-folder . -m {{ variant_config_file }} parquet-cpp arrow-cpp pyarrow
+
+  # Configure conda
+  - setup_conda_rc ./ ./ variants/{{ config }}.yaml
+
+  # XXX: workaround, see run_conda_forge_build_setup_osx#L33
+  - mkdir -p ./.ci_support
+  - cp variants/{{ config }}.yaml ./.ci_support/{{ config }}.yaml
+  - CONFIG={{ config }} source run_conda_forge_build_setup
+
+  # Compiler cleanup
+  - mangle_compiler ./ ./ ./.ci_support/{{ config }}.yaml
+
+script:
+  - |
+    conda build --output-folder . \
+                -m ./.ci_support/{{ config }}.yaml \
+                parquet-cpp arrow-cpp pyarrow
 
 deploy:
   provider: releases
diff --git a/dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython2.7.yaml b/dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython2.7.yaml
new file mode 100644
index 0000000000000..149d5fdffcff5
--- /dev/null
+++ b/dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython2.7.yaml
@@ -0,0 +1,49 @@
+boost_cpp:
+- 1.68.0
+build_number_decrement:
+- '0'
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+c_compiler:
+- gcc
+cxx_compiler:
+- gxx
+docker_image:
+- condaforge/linux-anvil-comp7
+libprotobuf:
+- '3.6'
+lz4_c:
+- 1.8.1
+pin_run_as_build:
+  boost-cpp:
+    max_pin: x.x.x
+  libprotobuf:
+    max_pin: x.x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  snappy:
+    max_pin: x.x.x
+  zlib:
+    max_pin: x.x
+  zstd:
+    max_pin: x.x.x
+python:
+- '2.7'
+snappy:
+- 1.1.7
+zip_keys:
+- - c_compiler
+  - cxx_compiler
+  - channel_sources
+  - channel_targets
+  - docker_image
+  - build_number_decrement
+zlib:
+- '1.2'
+zstd:
+- 1.3.3
diff --git a/dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython3.6.yaml b/dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython3.6.yaml
new file mode 100644
index 0000000000000..b71d9de27be0a
--- /dev/null
+++ b/dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython3.6.yaml
@@ -0,0 +1,49 @@
+boost_cpp:
+- 1.68.0
+build_number_decrement:
+- '0'
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+c_compiler:
+- gcc
+cxx_compiler:
+- gxx
+docker_image:
+- condaforge/linux-anvil-comp7
+libprotobuf:
+- '3.6'
+lz4_c:
+- 1.8.1
+pin_run_as_build:
+  boost-cpp:
+    max_pin: x.x.x
+  libprotobuf:
+    max_pin: x.x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  snappy:
+    max_pin: x.x.x
+  zlib:
+    max_pin: x.x
+  zstd:
+    max_pin: x.x.x
+python:
+- '3.6'
+snappy:
+- 1.1.7
+zip_keys:
+- - c_compiler
+  - cxx_compiler
+  - channel_sources
+  - channel_targets
+  - docker_image
+  - build_number_decrement
+zlib:
+- '1.2'
+zstd:
+- 1.3.3
diff --git a/dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython3.7.yaml b/dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython3.7.yaml
new file mode 100644
index 0000000000000..e5dbba5b34b1d
--- /dev/null
+++ b/dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython3.7.yaml
@@ -0,0 +1,49 @@
+boost_cpp:
+- 1.68.0
+build_number_decrement:
+- '0'
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+c_compiler:
+- gcc
+cxx_compiler:
+- gxx
+docker_image:
+- condaforge/linux-anvil-comp7
+libprotobuf:
+- '3.6'
+lz4_c:
+- 1.8.1
+pin_run_as_build:
+  boost-cpp:
+    max_pin: x.x.x
+  libprotobuf:
+    max_pin: x.x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  snappy:
+    max_pin: x.x.x
+  zlib:
+    max_pin: x.x
+  zstd:
+    max_pin: x.x.x
+python:
+- '3.7'
+snappy:
+- 1.1.7
+zip_keys:
+- - c_compiler
+  - cxx_compiler
+  - channel_sources
+  - channel_targets
+  - docker_image
+  - build_number_decrement
+zlib:
+- '1.2'
+zstd:
+- 1.3.3
diff --git a/dev/tasks/conda-recipes/variants/linux_python2.7.yaml b/dev/tasks/conda-recipes/variants/linux_python2.7.yaml
deleted file mode 100644
index 45026b07d60ab..0000000000000
--- a/dev/tasks/conda-recipes/variants/linux_python2.7.yaml
+++ /dev/null
@@ -1,47 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-boost_cpp:
-- 1.67.0
-c_compiler:
-- toolchain_c
-cxx_compiler:
-- toolchain_cxx
-lz4_c:
-- 1.8.1
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  snappy:
-    max_pin: x.x.x
-  zlib:
-    max_pin: x.x
-  zstd:
-    max_pin: x.x.x
-python:
-- '2.7'
-snappy:
-- 1.1.7
-zlib:
-- '1.2'
-zstd:
-- 1.3.3
diff --git a/dev/tasks/conda-recipes/variants/linux_python3.5.yaml b/dev/tasks/conda-recipes/variants/linux_python3.5.yaml
deleted file mode 100644
index 683022f834913..0000000000000
--- a/dev/tasks/conda-recipes/variants/linux_python3.5.yaml
+++ /dev/null
@@ -1,47 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-boost_cpp:
-- 1.67.0
-c_compiler:
-- toolchain_c
-cxx_compiler:
-- toolchain_cxx
-lz4_c:
-- 1.8.1
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  snappy:
-    max_pin: x.x.x
-  zlib:
-    max_pin: x.x
-  zstd:
-    max_pin: x.x.x
-python:
-- '3.5'
-snappy:
-- 1.1.7
-zlib:
-- '1.2'
-zstd:
-- 1.3.3
diff --git a/dev/tasks/conda-recipes/variants/linux_python3.6.yaml b/dev/tasks/conda-recipes/variants/linux_python3.6.yaml
deleted file mode 100644
index 6b7d8896ac369..0000000000000
--- a/dev/tasks/conda-recipes/variants/linux_python3.6.yaml
+++ /dev/null
@@ -1,47 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-boost_cpp:
-- 1.67.0
-c_compiler:
-- toolchain_c
-cxx_compiler:
-- toolchain_cxx
-lz4_c:
-- 1.8.1
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  snappy:
-    max_pin: x.x.x
-  zlib:
-    max_pin: x.x
-  zstd:
-    max_pin: x.x.x
-python:
-- '3.6'
-snappy:
-- 1.1.7
-zlib:
-- '1.2'
-zstd:
-- 1.3.3
diff --git a/dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython2.7.yaml b/dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython2.7.yaml
new file mode 100644
index 0000000000000..85d3db6a5a875
--- /dev/null
+++ b/dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython2.7.yaml
@@ -0,0 +1,52 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '10.9'
+boost_cpp:
+- 1.68.0
+build_number_decrement:
+- '0'
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+c_compiler:
+- clang
+cxx_compiler:
+- clangxx
+libprotobuf:
+- '3.6'
+lz4_c:
+- 1.8.1
+macos_machine:
+- x86_64-apple-darwin13.4.0
+macos_min_version:
+- '10.9'
+pin_run_as_build:
+  boost-cpp:
+    max_pin: x.x.x
+  libprotobuf:
+    max_pin: x.x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  snappy:
+    max_pin: x.x.x
+  zlib:
+    max_pin: x.x
+  zstd:
+    max_pin: x.x.x
+python:
+- '2.7'
+snappy:
+- 1.1.7
+zip_keys:
+- - c_compiler
+  - cxx_compiler
+  - channel_sources
+  - channel_targets
+  - build_number_decrement
+zlib:
+- '1.2'
+zstd:
+- 1.3.3
diff --git a/dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython3.6.yaml b/dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython3.6.yaml
new file mode 100644
index 0000000000000..4fd6bd2b52d82
--- /dev/null
+++ b/dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython3.6.yaml
@@ -0,0 +1,52 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '10.9'
+boost_cpp:
+- 1.68.0
+build_number_decrement:
+- '0'
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+c_compiler:
+- clang
+cxx_compiler:
+- clangxx
+libprotobuf:
+- '3.6'
+lz4_c:
+- 1.8.1
+macos_machine:
+- x86_64-apple-darwin13.4.0
+macos_min_version:
+- '10.9'
+pin_run_as_build:
+  boost-cpp:
+    max_pin: x.x.x
+  libprotobuf:
+    max_pin: x.x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  snappy:
+    max_pin: x.x.x
+  zlib:
+    max_pin: x.x
+  zstd:
+    max_pin: x.x.x
+python:
+- '3.6'
+snappy:
+- 1.1.7
+zip_keys:
+- - c_compiler
+  - cxx_compiler
+  - channel_sources
+  - channel_targets
+  - build_number_decrement
+zlib:
+- '1.2'
+zstd:
+- 1.3.3
diff --git a/dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython3.7.yaml b/dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython3.7.yaml
new file mode 100644
index 0000000000000..4e4a8df8bd1b5
--- /dev/null
+++ b/dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython3.7.yaml
@@ -0,0 +1,52 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '10.9'
+boost_cpp:
+- 1.68.0
+build_number_decrement:
+- '0'
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+c_compiler:
+- clang
+cxx_compiler:
+- clangxx
+libprotobuf:
+- '3.6'
+lz4_c:
+- 1.8.1
+macos_machine:
+- x86_64-apple-darwin13.4.0
+macos_min_version:
+- '10.9'
+pin_run_as_build:
+  boost-cpp:
+    max_pin: x.x.x
+  libprotobuf:
+    max_pin: x.x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  snappy:
+    max_pin: x.x.x
+  zlib:
+    max_pin: x.x
+  zstd:
+    max_pin: x.x.x
+python:
+- '3.7'
+snappy:
+- 1.1.7
+zip_keys:
+- - c_compiler
+  - cxx_compiler
+  - channel_sources
+  - channel_targets
+  - build_number_decrement
+zlib:
+- '1.2'
+zstd:
+- 1.3.3
diff --git a/dev/tasks/conda-recipes/variants/osx_python2.7.yaml b/dev/tasks/conda-recipes/variants/osx_python2.7.yaml
deleted file mode 100644
index b8fc15f924dd5..0000000000000
--- a/dev/tasks/conda-recipes/variants/osx_python2.7.yaml
+++ /dev/null
@@ -1,53 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-MACOSX_DEPLOYMENT_TARGET:
-- '10.9'
-boost_cpp:
-- 1.67.0
-c_compiler:
-- toolchain_c
-cxx_compiler:
-- toolchain_cxx
-lz4_c:
-- 1.8.1
-macos_machine:
-- x86_64-apple-darwin13.4.0
-macos_min_version:
-- '10.9'
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  snappy:
-    max_pin: x.x.x
-  zlib:
-    max_pin: x.x
-  zstd:
-    max_pin: x.x.x
-python:
-- '2.7'
-snappy:
-- 1.1.7
-zlib:
-- '1.2'
-zstd:
-- 1.3.3
diff --git a/dev/tasks/conda-recipes/variants/osx_python3.5.yaml b/dev/tasks/conda-recipes/variants/osx_python3.5.yaml
deleted file mode 100644
index 05f7a8dd4d36d..0000000000000
--- a/dev/tasks/conda-recipes/variants/osx_python3.5.yaml
+++ /dev/null
@@ -1,53 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-MACOSX_DEPLOYMENT_TARGET:
-- '10.9'
-boost_cpp:
-- 1.67.0
-c_compiler:
-- toolchain_c
-cxx_compiler:
-- toolchain_cxx
-lz4_c:
-- 1.8.1
-macos_machine:
-- x86_64-apple-darwin13.4.0
-macos_min_version:
-- '10.9'
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  snappy:
-    max_pin: x.x.x
-  zlib:
-    max_pin: x.x
-  zstd:
-    max_pin: x.x.x
-python:
-- '3.5'
-snappy:
-- 1.1.7
-zlib:
-- '1.2'
-zstd:
-- 1.3.3
diff --git a/dev/tasks/conda-recipes/variants/osx_python3.6.yaml b/dev/tasks/conda-recipes/variants/osx_python3.6.yaml
deleted file mode 100644
index 6b7d8896ac369..0000000000000
--- a/dev/tasks/conda-recipes/variants/osx_python3.6.yaml
+++ /dev/null
@@ -1,47 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-boost_cpp:
-- 1.67.0
-c_compiler:
-- toolchain_c
-cxx_compiler:
-- toolchain_cxx
-lz4_c:
-- 1.8.1
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  snappy:
-    max_pin: x.x.x
-  zlib:
-    max_pin: x.x
-  zstd:
-    max_pin: x.x.x
-python:
-- '3.6'
-snappy:
-- 1.1.7
-zlib:
-- '1.2'
-zstd:
-- 1.3.3
diff --git a/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.5.yaml b/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.5.yaml
deleted file mode 100644
index d886b0e39ff7f..0000000000000
--- a/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.5.yaml
+++ /dev/null
@@ -1,51 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-boost_cpp:
-- 1.67.0
-c_compiler:
-- vs2015
-cxx_compiler:
-- vs2015
-lz4_c:
-- 1.8.1
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  snappy:
-    max_pin: x.x.x
-  zlib:
-    max_pin: x.x
-  zstd:
-    max_pin: x.x.x
-python:
-- '3.5'
-snappy:
-- 1.1.7
-zip_keys:
-- - python
-  - c_compiler
-  - cxx_compiler
-zlib:
-- '1.2'
-zstd:
-- 1.3.3
diff --git a/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.6.yaml b/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.6.yaml
index 880642f5b7d85..5a57d02fd0ea3 100644
--- a/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.6.yaml
+++ b/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.6.yaml
@@ -1,31 +1,22 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
 boost_cpp:
-- 1.67.0
+- 1.68.0
 c_compiler:
 - vs2015
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
 cxx_compiler:
 - vs2015
+libprotobuf:
+- '3.6'
 lz4_c:
 - 1.8.1
 pin_run_as_build:
   boost-cpp:
     max_pin: x.x.x
+  libprotobuf:
+    max_pin: x.x
   lz4-c:
     max_pin: x.x.x
   python:
diff --git a/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.7.yaml b/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.7.yaml
new file mode 100644
index 0000000000000..06bd37d5ea7c6
--- /dev/null
+++ b/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.7.yaml
@@ -0,0 +1,42 @@
+boost_cpp:
+- 1.68.0
+c_compiler:
+- vs2015
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cxx_compiler:
+- vs2015
+libprotobuf:
+- '3.6'
+lz4_c:
+- 1.8.1
+pin_run_as_build:
+  boost-cpp:
+    max_pin: x.x.x
+  libprotobuf:
+    max_pin: x.x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  snappy:
+    max_pin: x.x.x
+  zlib:
+    max_pin: x.x
+  zstd:
+    max_pin: x.x.x
+python:
+- '3.7'
+snappy:
+- 1.1.7
+zip_keys:
+- - python
+  - c_compiler
+  - cxx_compiler
+zlib:
+- '1.2'
+zstd:
+- 1.3.3
diff --git a/dev/tasks/crossbow.py b/dev/tasks/crossbow.py
index 74facf4b7fa01..2d0c53089d056 100755
--- a/dev/tasks/crossbow.py
+++ b/dev/tasks/crossbow.py
@@ -396,11 +396,13 @@ def __init__(self, head, branch, remote, version, email=None):
         self.branch = branch
         self.remote = remote
         self.version = version
+        self.no_rc_version = re.sub(r'-rc\d+\Z', '', version)
 
     @classmethod
-    def from_repo(cls, repo):
+    def from_repo(cls, repo, version=None):
         assert isinstance(repo, Repo)
-        version = get_version(repo.path, local_scheme=lambda v: '')
+        if version is None:
+            version = get_version(repo.path, local_scheme=lambda v: '')
         return cls(head=str(repo.head.target),
                    email=repo.email,
                    branch=repo.branch.branch_name,
@@ -587,22 +589,52 @@ def load_tasks_from_config(config_path, task_names, group_names):
               help='Task configuration yml. Defaults to tasks.yml')
 @click.option('--arrow-version', '-v', default=None,
               help='Set target version explicitly')
+@click.option('--arrow-repo', '-r', default=None,
+              help='Set Github repo name explicitly, e.g. apache/arrow, '
+                   'kszucs/arrow, this repository is going to be cloned on '
+                   'the CI services. Note, that no validation happens locally '
+                   'and potentially --arrow-branch and --arrow-sha must be '
+                   'defined as well')
+@click.option('--arrow-branch', '-b', default='master',
+              help='Give the branch name explicitly, e.g. master, ARROW-1949.'
+                   'Only available if --arrow-repo is set.')
+@click.option('--arrow-sha', '-t', default='HEAD',
+              help='Set commit SHA or Tag name explicitly, e.g. f67a515, '
+                   'apache-arrow-0.11.1. Only available if both --arrow-repo '
+                   '--arrow-branch are set.')
 @click.option('--dry-run/--push', default=False,
               help='Just display the rendered CI configurations without '
                    'submitting them')
 @click.pass_context
-def submit(ctx, task, group, job_prefix, config_path, arrow_version, dry_run):
+def submit(ctx, task, group, job_prefix, config_path, arrow_version,
+           arrow_repo, arrow_branch, arrow_sha, dry_run):
     queue, arrow = ctx.obj['queue'], ctx.obj['arrow']
-    target = Target.from_repo(arrow)
 
-    # explicitly set arrow version
-    if arrow_version:
-        target.version = arrow_version
+    if arrow_repo is not None:
+        values = {'version': arrow_version,
+                  'branch': arrow_branch,
+                  'sha': arrow_sha}
+        for k, v in values.items():
+            if not v:
+                raise ValueError('Must pass --arrow-{} argument'.format(k))
+
+        # Set repo url, branch and sha explicitly - this aims to make release
+        # procedure a bit simpler.
+        # Note, that the target resivion's crossbow templates must be
+        # compatible with the locally checked out version of crossbow (which is
+        # in case of the release procedure), because the templates still
+        # contain some business logic (dependency installation, deployments)
+        # which will be reduced to a single command in the future.
+        remote = 'https://github.com/{}'.format(arrow_repo)
+        target = Target(head=arrow_sha, branch=arrow_branch, remote=remote,
+                        version=arrow_version)
+    else:
+        # instantiate target from the locally checked out repository and branch
+        target = Target.from_repo(arrow, version=arrow_version)
 
-    no_rc_version = re.sub(r'-rc\d+\Z', '', target.version)
     params = {
         'version': target.version,
-        'no_rc_version': no_rc_version,
+        'no_rc_version': target.no_rc_version,
     }
 
     # task and group variables are lists, containing multiple values
diff --git a/dev/tasks/gandiva-jars/build-cpp.sh b/dev/tasks/gandiva-jars/build-cpp.sh
index a0538cf6f3116..5f0cef3ee6cd0 100755
--- a/dev/tasks/gandiva-jars/build-cpp.sh
+++ b/dev/tasks/gandiva-jars/build-cpp.sh
@@ -27,8 +27,11 @@ pushd arrow/cpp
   pushd build
     cmake -DCMAKE_BUILD_TYPE=Release \
           -DARROW_GANDIVA=ON \
+          -DARROW_GANDIVA_JAVA=ON \
           -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \
+          -DARROW_BUILD_TESTS=ON \
           -DARROW_BUILD_UTILITIES=OFF \
+          -DARROW_BOOST_USE_SHARED=OFF \
           ..
     make -j4
     ctest
diff --git a/dev/tasks/gandiva-jars/build-java.sh b/dev/tasks/gandiva-jars/build-java.sh
index a9b0bfe8eafdd..d099035a3e220 100755
--- a/dev/tasks/gandiva-jars/build-java.sh
+++ b/dev/tasks/gandiva-jars/build-java.sh
@@ -19,11 +19,19 @@
 
 set -e
 
+source arrow/ci/travis_env_common.sh
+
+CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp/build/release
+
 pushd arrow/java
+  if [ $TRAVIS_OS_NAME == "linux" ]; then
+    ldd $CPP_BUILD_DIR/libgandiva_jni.so
+  fi
+
   # build the entire project
-  mvn clean install -DskipTests -P gandiva -Dgandiva.cpp.build.dir=../../cpp/build/release
+  mvn clean install -DskipTests -P gandiva -Dgandiva.cpp.build.dir=$CPP_BUILD_DIR
   # test only gandiva
-  mvn test -P gandiva -pl gandiva -Dgandiva.cpp.build.dir=../../cpp/build/release
+  mvn test -P gandiva -pl gandiva -Dgandiva.cpp.build.dir=$CPP_BUILD_DIR
   # copy the jars to distribution folder
   find gandiva/target/ -name "*.jar" -not -name "*tests*" -exec cp  {} ../../dist/ \;
 popd
diff --git a/dev/tasks/gandiva-jars/travis.linux.yml b/dev/tasks/gandiva-jars/travis.linux.yml
index 8b311ca962e7b..8526b48a54346 100644
--- a/dev/tasks/gandiva-jars/travis.linux.yml
+++ b/dev/tasks/gandiva-jars/travis.linux.yml
@@ -32,6 +32,7 @@ env:
     - ARROW_TRAVIS_USE_TOOLCHAIN=1
 
 before_install:
+  # gcc 4.9 is required for the static linking of libstdc++
   - export CC="gcc-4.9" CXX="g++-4.9"
   - ulimit -c unlimited -S
   - |
@@ -42,9 +43,9 @@ before_install:
 before_script:
   - git clone -b {{ arrow.branch }} {{ arrow.remote }} arrow
   - git -C arrow checkout {{ arrow.head }}
+  - export TRAVIS_BUILD_DIR=$TRAVIS_BUILD_DIR/arrow
   - arrow/ci/travis_install_linux.sh
   - arrow/ci/travis_install_clang_tools.sh
-  - export TRAVIS_BUILD_DIR=$TRAVIS_BUILD_DIR/arrow
   - arrow/ci/travis_install_toolchain.sh
 
 script:
diff --git a/dev/tasks/linux-packages/apt/debian-stretch/Dockerfile b/dev/tasks/linux-packages/apt/debian-stretch/Dockerfile
index 4dde574cbf95d..70cefaabf262e 100644
--- a/dev/tasks/linux-packages/apt/debian-stretch/Dockerfile
+++ b/dev/tasks/linux-packages/apt/debian-stretch/Dockerfile
@@ -22,6 +22,9 @@ ENV DEBIAN_FRONTEND noninteractive
 ARG DEBUG
 
 RUN sed -i'' -e 's/main$/main contrib non-free/g' /etc/apt/sources.list
+RUN \
+  echo "deb http://deb.debian.org/debian stretch-backports main" > \
+    /etc/apt/sources.list.d/backports.list
 
 RUN \
   quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
@@ -30,6 +33,7 @@ RUN \
     autoconf-archive \
     bison \
     build-essential \
+    clang-6.0 \
     cmake \
     debhelper\
     devscripts \
diff --git a/dev/tasks/linux-packages/apt/ubuntu-bionic/Dockerfile b/dev/tasks/linux-packages/apt/ubuntu-bionic/Dockerfile
index 5d3c9ba2932ed..68de4d569a663 100644
--- a/dev/tasks/linux-packages/apt/ubuntu-bionic/Dockerfile
+++ b/dev/tasks/linux-packages/apt/ubuntu-bionic/Dockerfile
@@ -28,6 +28,7 @@ RUN \
     autoconf-archive \
     bison \
     build-essential \
+    clang-6.0 \
     cmake \
     debhelper\
     devscripts \
diff --git a/dev/tasks/linux-packages/apt/ubuntu-cosmic/Dockerfile b/dev/tasks/linux-packages/apt/ubuntu-cosmic/Dockerfile
index 7840e02e54b5b..0d871eaa2635d 100644
--- a/dev/tasks/linux-packages/apt/ubuntu-cosmic/Dockerfile
+++ b/dev/tasks/linux-packages/apt/ubuntu-cosmic/Dockerfile
@@ -26,10 +26,13 @@ RUN \
   apt update ${quiet} && \
   apt install -y -V ${quiet} \
     autoconf-archive \
+    bison \
     build-essential \
+    clang-6.0 \
     cmake \
     debhelper\
     devscripts \
+    flex \
     git \
     gtk-doc-tools \
     libboost-filesystem-dev \
diff --git a/dev/tasks/linux-packages/apt/ubuntu-xenial/Dockerfile b/dev/tasks/linux-packages/apt/ubuntu-xenial/Dockerfile
index 17cb27713f08c..c7c5b1e09ece1 100644
--- a/dev/tasks/linux-packages/apt/ubuntu-xenial/Dockerfile
+++ b/dev/tasks/linux-packages/apt/ubuntu-xenial/Dockerfile
@@ -28,6 +28,7 @@ RUN \
     autoconf-archive \
     bison \
     build-essential \
+    clang-6.0 \
     cmake \
     debhelper\
     devscripts \
diff --git a/dev/tasks/linux-packages/debian.ubuntu-trusty/changelog b/dev/tasks/linux-packages/debian.ubuntu-trusty/changelog
index e54e05c89a0dd..0aa6dd85a702b 100644
--- a/dev/tasks/linux-packages/debian.ubuntu-trusty/changelog
+++ b/dev/tasks/linux-packages/debian.ubuntu-trusty/changelog
@@ -1,3 +1,9 @@
+apache-arrow (0.12.0-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Krisztián Szűcs <szucs.krisztian@gmail.com>  Wed, 16 Jan 2019 03:29:25 -0000
+
 apache-arrow (0.11.0-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/debian.ubuntu-trusty/control b/dev/tasks/linux-packages/debian.ubuntu-trusty/control
index 696f2c4b696bb..9fd699bbb55f2 100644
--- a/dev/tasks/linux-packages/debian.ubuntu-trusty/control
+++ b/dev/tasks/linux-packages/debian.ubuntu-trusty/control
@@ -20,7 +20,7 @@ Build-Depends-Indep: libglib2.0-doc
 Standards-Version: 3.9.6
 Homepage: https://arrow.apache.org/
 
-Package: libarrow12
+Package: libarrow13
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -30,7 +30,7 @@ Depends:
   ${shlibs:Depends}
 Description: Apache Arrow is a data processing library for analysis
  .
- This package provides library files.
+ This package provides C++ library files.
 
 Package: libarrow-dev
 Section: libdevel
@@ -38,12 +38,49 @@ Architecture: any
 Multi-Arch: same
 Depends:
   ${misc:Depends},
-  libarrow12 (= ${binary:Version})
+  libarrow13 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
- This package provides header files.
+ This package provides C++ header files.
+
+Package: libplasma13
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+  ${misc:Depends},
+  ${shlibs:Depends},
+  libarrow13 (= ${binary:Version})
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides C++ library files to connect plasma_store_server.
+
+Package: plasma-store-server
+Section: utils
+Architecture: any
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+  ${misc:Depends},
+  ${shlibs:Depends},
+  libplasma13 (= ${binary:Version})
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides plasma_store_server.
+
+Package: libplasma-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+  ${misc:Depends},
+  libarrow-dev (= ${binary:Version}),
+  libplasma13 (= ${binary:Version})
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides C++ header files.
 
-Package: libparquet12
+Package: libparquet13
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -62,12 +99,12 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libparquet12 (= ${binary:Version})
+  libparquet13 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides C++ header files.
 
-Package: libarrow-glib12
+Package: libarrow-glib13
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -75,10 +112,10 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow12 (= ${binary:Version})
+  libarrow13 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
- This package provides library files.
+ This package provides GLib based library files.
 
 Package: gir1.2-arrow-1.0
 Section: introspection
@@ -99,12 +136,12 @@ Depends:
   ${misc:Depends},
   libglib2.0-dev,
   libarrow-dev (= ${binary:Version}),
-  libarrow-glib12 (= ${binary:Version}),
+  libarrow-glib13 (= ${binary:Version}),
   gir1.2-arrow-1.0 (= ${binary:Version})
 Suggests: libarrow-glib-doc
 Description: Apache Arrow is a data processing library for analysis
  .
- This package provides header files.
+ This package provides GLib based header files.
 
 Package: libarrow-glib-doc
 Section: doc
@@ -117,7 +154,57 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations.
 
-Package: libparquet-glib12
+Package: libplasma-glib13
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+  ${misc:Depends},
+  ${shlibs:Depends},
+  libarrow-glib13 (= ${binary:Version}),
+  libplasma13 (= ${binary:Version})
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides GLib based library files to connect plasma_store_server.
+
+Package: gir1.2-plasma-1.0
+Section: introspection
+Architecture: any
+Multi-Arch: same
+Depends:
+  ${gir:Depends},
+  ${misc:Depends}
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides GObject Introspection typelib files.
+
+Package: libplasma-glib-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+  ${misc:Depends},
+  libplasma-dev (= ${binary:Version}),
+  libarrow-glib-dev (= ${binary:Version}),
+  libplasma-glib13 (= ${binary:Version}),
+  gir1.2-plasma-1.0 (= ${binary:Version})
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides GLib based header files.
+
+Package: libplasma-glib-doc
+Section: doc
+Architecture: all
+Multi-Arch: foreign
+Depends:
+  ${misc:Depends}
+Recommends: libglib2.0-doc
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides documentations.
+
+Package: libparquet-glib13
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -125,8 +212,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib12 (= ${binary:Version}),
-  libparquet12 (= ${binary:Version})
+  libarrow-glib13 (= ${binary:Version}),
+  libparquet13 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides GLib based library files.
@@ -150,7 +237,7 @@ Depends:
   ${misc:Depends},
   libarrow-glib-dev (= ${binary:Version}),
   libparquet-dev (= ${binary:Version}),
-  libparquet-glib12 (= ${binary:Version}),
+  libparquet-glib13 (= ${binary:Version}),
   gir1.2-parquet-1.0 (= ${binary:Version})
 Suggests: libparquet-glib-doc
 Description: Apache Parquet is a columnar storage format
diff --git a/dev/tasks/linux-packages/debian.ubuntu-trusty/gir1.2-plasma-1.0.install b/dev/tasks/linux-packages/debian.ubuntu-trusty/gir1.2-plasma-1.0.install
new file mode 100644
index 0000000000000..4366f4f1f5a25
--- /dev/null
+++ b/dev/tasks/linux-packages/debian.ubuntu-trusty/gir1.2-plasma-1.0.install
@@ -0,0 +1 @@
+usr/lib/girepository-1.0/Plasma-1.0.typelib
diff --git a/dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow-glib12.install b/dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow-glib13.install
similarity index 100%
rename from dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow-glib12.install
rename to dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow-glib13.install
diff --git a/dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow-python12.install b/dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow-python13.install
similarity index 100%
rename from dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow-python12.install
rename to dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow-python13.install
diff --git a/dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow12.install b/dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow13.install
similarity index 100%
rename from dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow12.install
rename to dev/tasks/linux-packages/debian.ubuntu-trusty/libarrow13.install
diff --git a/dev/tasks/linux-packages/debian.ubuntu-trusty/libparquet-glib12.install b/dev/tasks/linux-packages/debian.ubuntu-trusty/libparquet-glib13.install
similarity index 100%
rename from dev/tasks/linux-packages/debian.ubuntu-trusty/libparquet-glib12.install
rename to dev/tasks/linux-packages/debian.ubuntu-trusty/libparquet-glib13.install
diff --git a/dev/tasks/linux-packages/debian.ubuntu-trusty/libparquet12.install b/dev/tasks/linux-packages/debian.ubuntu-trusty/libparquet13.install
similarity index 100%
rename from dev/tasks/linux-packages/debian.ubuntu-trusty/libparquet12.install
rename to dev/tasks/linux-packages/debian.ubuntu-trusty/libparquet13.install
diff --git a/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-dev.install b/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-dev.install
new file mode 100644
index 0000000000000..d3538d2210af3
--- /dev/null
+++ b/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-dev.install
@@ -0,0 +1,3 @@
+usr/lib/*/libplasma.a
+usr/lib/*/libplasma.so
+usr/lib/*/pkgconfig/plasma.pc
diff --git a/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib-dev.install b/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib-dev.install
new file mode 100644
index 0000000000000..f21a9aa8a8f9c
--- /dev/null
+++ b/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib-dev.install
@@ -0,0 +1,5 @@
+usr/include/plasma-glib/
+usr/lib/*/libplasma-glib.a
+usr/lib/*/libplasma-glib.so
+usr/lib/*/pkgconfig/plasma-glib.pc
+usr/share/gir-1.0/Plasma-1.0.gir
diff --git a/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib-doc.doc-base b/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib-doc.doc-base
new file mode 100644
index 0000000000000..7863d7d07a36c
--- /dev/null
+++ b/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib-doc.doc-base
@@ -0,0 +1,9 @@
+Document: plasma-glib
+Title: Plasma GLib Reference Manual
+Author: The Apache Software Foundation
+Abstract: Plasma GLib is an in-memory object store and cache for big data that uses GLib.
+Section: Programming
+
+Format: HTML
+Index: /usr/share/doc/libarrow-glib-doc/plasma-glib/index.html
+Files: /usr/share/doc/libarrow-glib-doc/plasma-glib/*.html
diff --git a/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib-doc.install b/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib-doc.install
new file mode 100644
index 0000000000000..ef5a63b340c4e
--- /dev/null
+++ b/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib-doc.install
@@ -0,0 +1 @@
+usr/share/doc/libarrow-glib-doc/plasma-glib/
diff --git a/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib-doc.links b/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib-doc.links
new file mode 100644
index 0000000000000..baea0ef4f4b78
--- /dev/null
+++ b/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib-doc.links
@@ -0,0 +1,3 @@
+usr/share/doc/libglib2.0-doc/glib usr/share/doc/libplasma-glib-doc/glib
+usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libplasma-glib-doc/gobject
+usr/share/doc/libarrow-glib-doc/plasma-glib usr/share/gtk-doc/html/plasma-glib
diff --git a/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib13.install b/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib13.install
new file mode 100644
index 0000000000000..339bcca3e7278
--- /dev/null
+++ b/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma-glib13.install
@@ -0,0 +1 @@
+usr/lib/*/libplasma-glib.so.*
diff --git a/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma13.install b/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma13.install
new file mode 100644
index 0000000000000..f8a744b65975d
--- /dev/null
+++ b/dev/tasks/linux-packages/debian.ubuntu-trusty/libplasma13.install
@@ -0,0 +1 @@
+usr/lib/*/libplasma.so.*
diff --git a/dev/tasks/linux-packages/debian.ubuntu-trusty/plasma-store-server.install b/dev/tasks/linux-packages/debian.ubuntu-trusty/plasma-store-server.install
new file mode 100644
index 0000000000000..9c38179c17dc1
--- /dev/null
+++ b/dev/tasks/linux-packages/debian.ubuntu-trusty/plasma-store-server.install
@@ -0,0 +1 @@
+usr/bin/plasma_store_server
diff --git a/dev/tasks/linux-packages/debian.ubuntu-trusty/rules b/dev/tasks/linux-packages/debian.ubuntu-trusty/rules
index 01956fec40a9d..4eb26772df00c 100755
--- a/dev/tasks/linux-packages/debian.ubuntu-trusty/rules
+++ b/dev/tasks/linux-packages/debian.ubuntu-trusty/rules
@@ -22,9 +22,9 @@ override_dh_auto_configure:
 	  --builddirectory=cpp_build \
 	  -- \
 	  -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
-	  -DARROW_BUILD_TESTS=OFF \
 	  -DARROW_ORC=ON \
-	  -DARROW_PARQUET=ON
+	  -DARROW_PARQUET=ON \
+	  -DARROW_PLASMA=ON
 	dh_auto_configure \
 	  --sourcedirectory=c_glib \
 	  --builddirectory=c_glib_build \
diff --git a/dev/tasks/linux-packages/debian/changelog b/dev/tasks/linux-packages/debian/changelog
index a0aff6fb28a53..c3a1d58c846d7 100644
--- a/dev/tasks/linux-packages/debian/changelog
+++ b/dev/tasks/linux-packages/debian/changelog
@@ -1,3 +1,9 @@
+apache-arrow (0.12.0-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Krisztián Szűcs <szucs.krisztian@gmail.com>  Wed, 16 Jan 2019 03:29:25 -0000
+
 apache-arrow (0.11.0-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/debian/control b/dev/tasks/linux-packages/debian/control
index d497a31d1443a..6aa5b551aa8d4 100644
--- a/dev/tasks/linux-packages/debian/control
+++ b/dev/tasks/linux-packages/debian/control
@@ -5,6 +5,7 @@ Maintainer: Kouhei Sutou <kou@clear-code.com>
 Build-Depends:
   autoconf-archive,
   bison,
+  clang-6.0,
   cmake,
   debhelper (>= 9.20160115),
   dh-autoreconf,
@@ -27,7 +28,7 @@ Build-Depends-Indep: libglib2.0-doc
 Standards-Version: 3.9.6
 Homepage: https://arrow.apache.org/
 
-Package: libarrow12
+Package: libarrow13
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -37,9 +38,9 @@ Depends:
   ${shlibs:Depends}
 Description: Apache Arrow is a data processing library for analysis
  .
- This package provides library files.
+ This package provides C++ library files.
 
-Package: libarrow-python12
+Package: libarrow-python13
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -47,14 +48,14 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow12 (= ${binary:Version}),
+  libarrow13 (= ${binary:Version}),
   python3,
   python3-numpy
 Description: Apache Arrow is a data processing library for analysis
  .
- This package provides library files for Python support.
+ This package provides C++ library files for Python support.
 
-Package: libarrow-gpu12
+Package: libarrow-cuda13
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -62,10 +63,10 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow12 (= ${binary:Version})
+  libarrow13 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
- This package provides library files for GPU support.
+ This package provides C++ library files for CUDA support.
 
 Package: libarrow-dev
 Section: libdevel
@@ -73,10 +74,10 @@ Architecture: any
 Multi-Arch: same
 Depends:
   ${misc:Depends},
-  libarrow12 (= ${binary:Version})
+  libarrow13 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
- This package provides header files.
+ This package provides C++ header files.
 
 Package: libarrow-python-dev
 Section: libdevel
@@ -85,24 +86,88 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-python12 (= ${binary:Version})
+  libarrow-python13 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
- This package provides header files for Python support.
+ This package provides C++ header files for Python support.
 
-Package: libarrow-gpu-dev
+Package: libarrow-cuda-dev
 Section: libdevel
 Architecture: any
 Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-gpu12 (= ${binary:Version})
+  libarrow-cuda13 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
- This package provides header files for GPU support.
+ This package provides C++ header files for CUDA support.
+
+Package: libgandiva13
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+  ${misc:Depends},
+  ${shlibs:Depends},
+  libarrow13 (= ${binary:Version})
+Description: Gandiva is a toolset for compiling and evaluating expressions
+ on Arrow Data.
+ .
+ This package provides C++ library files.
+
+Package: libgandiva-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+  ${misc:Depends},
+  libarrow-dev (= ${binary:Version}),
+  libgandiva13 (= ${binary:Version})
+Description: Gandiva is a toolset for compiling and evaluating expressions
+ on Arrow Data.
+ .
+ This package provides C++ header files.
 
-Package: libparquet12
+Package: libplasma13
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+  ${misc:Depends},
+  ${shlibs:Depends},
+  libarrow-cuda13 (= ${binary:Version})
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides C++ library files to connect plasma_store_server.
+
+Package: plasma-store-server
+Section: utils
+Architecture: any
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+  ${misc:Depends},
+  ${shlibs:Depends},
+  libplasma13 (= ${binary:Version})
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides plasma_store_server.
+
+Package: libplasma-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+  ${misc:Depends},
+  libarrow-cuda-dev (= ${binary:Version}),
+  libplasma13 (= ${binary:Version})
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides C++ header files.
+
+Package: libparquet13
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -121,12 +186,12 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libparquet12 (= ${binary:Version})
+  libparquet13 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides C++ header files.
 
-Package: libarrow-glib12
+Package: libarrow-glib13
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -134,10 +199,10 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow12 (= ${binary:Version})
+  libarrow13 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
- This package provides library files.
+ This package provides GLib based library files.
 
 Package: gir1.2-arrow-1.0
 Section: introspection
@@ -158,12 +223,12 @@ Depends:
   ${misc:Depends},
   libglib2.0-dev,
   libarrow-dev (= ${binary:Version}),
-  libarrow-glib12 (= ${binary:Version}),
+  libarrow-glib13 (= ${binary:Version}),
   gir1.2-arrow-1.0 (= ${binary:Version})
 Suggests: libarrow-glib-doc
 Description: Apache Arrow is a data processing library for analysis
  .
- This package provides header files.
+ This package provides GLib based header files.
 
 Package: libarrow-glib-doc
 Section: doc
@@ -176,7 +241,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations.
 
-Package: libarrow-gpu-glib12
+Package: libarrow-cuda-glib13
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -184,13 +249,13 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib12 (= ${binary:Version}),
-  libarrow-gpu12 (= ${binary:Version})
+  libarrow-glib13 (= ${binary:Version}),
+  libarrow-cuda13 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
- This package provides library files for GPU support.
+ This package provides GLib based library files for CUDA support.
 
-Package: gir1.2-arrow-gpu-1.0
+Package: gir1.2-arrow-cuda-1.0
 Section: introspection
 Architecture: any
 Multi-Arch: same
@@ -199,24 +264,127 @@ Depends:
   ${misc:Depends}
 Description: Apache Arrow is a data processing library for analysis
  .
- This package provides GObject Introspection typelib files for GPU support.
+ This package provides GObject Introspection typelib files for CUDA support.
 
-Package: libarrow-gpu-glib-dev
+Package: libarrow-cuda-glib-dev
 Section: libdevel
 Architecture: any
 Multi-Arch: same
 Depends:
   ${misc:Depends},
-  libarrow-dev (= ${binary:Version}),
+  libarrow-cuda-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-gpu-dev (= ${binary:Version}),
-  libarrow-gpu-glib12 (= ${binary:Version}),
-  gir1.2-arrow-gpu-1.0 (= ${binary:Version})
+  libarrow-cuda-glib13 (= ${binary:Version}),
+  gir1.2-arrow-cuda-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
- This package provides header files for GPU support.
+ This package provides GLib based header files for CUDA support.
+
+Package: libgandiva-glib13
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+  ${misc:Depends},
+  ${shlibs:Depends},
+  libarrow-glib13 (= ${binary:Version}),
+  libgandiva13 (= ${binary:Version})
+Description: Gandiva is a toolset for compiling and evaluating expressions
+ on Arrow Data.
+ .
+ This package provides GLib based library files.
+
+Package: gir1.2-gandiva-1.0
+Section: introspection
+Architecture: any
+Multi-Arch: same
+Depends:
+  ${gir:Depends},
+  ${misc:Depends}
+Description: Gandiva is a toolset for compiling and evaluating expressions
+ on Arrow Data.
+ .
+ This package provides GObject Introspection typelib files.
+
+Package: libgandiva-glib-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+  ${misc:Depends},
+  libgandiva-dev (= ${binary:Version}),
+  libarrow-glib-dev (= ${binary:Version}),
+  libgandiva-glib13 (= ${binary:Version}),
+  gir1.2-gandiva-1.0 (= ${binary:Version})
+Description: Gandiva is a toolset for compiling and evaluating expressions
+ on Arrow Data.
+ .
+ This package provides GLib based header files.
+
+Package: libgandiva-glib-doc
+Section: doc
+Architecture: all
+Multi-Arch: foreign
+Depends:
+  ${misc:Depends}
+Recommends: libglib2.0-doc
+Description: Gandiva is a toolset for compiling and evaluating expressions
+ on Arrow Data.
+ .
+ This package provides documentations.
+
+Package: libplasma-glib13
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+  ${misc:Depends},
+  ${shlibs:Depends},
+  libarrow-cuda-glib13 (= ${binary:Version}),
+  libplasma13 (= ${binary:Version})
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides GLib based library files to connect plasma_store_server.
+
+Package: gir1.2-plasma-1.0
+Section: introspection
+Architecture: any
+Multi-Arch: same
+Depends:
+  ${gir:Depends},
+  ${misc:Depends}
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides GObject Introspection typelib files.
+
+Package: libplasma-glib-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+  ${misc:Depends},
+  libplasma-dev (= ${binary:Version}),
+  libarrow-cuda-glib-dev (= ${binary:Version}),
+  libplasma-glib13 (= ${binary:Version}),
+  gir1.2-plasma-1.0 (= ${binary:Version})
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides GLib based header files.
+
+Package: libplasma-glib-doc
+Section: doc
+Architecture: all
+Multi-Arch: foreign
+Depends:
+  ${misc:Depends}
+Recommends: libglib2.0-doc
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides documentations.
 
-Package: libparquet-glib12
+Package: libparquet-glib13
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -224,8 +392,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib12 (= ${binary:Version}),
-  libparquet12 (= ${binary:Version})
+  libarrow-glib13 (= ${binary:Version}),
+  libparquet13 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides GLib based library files.
@@ -249,7 +417,7 @@ Depends:
   ${misc:Depends},
   libarrow-glib-dev (= ${binary:Version}),
   libparquet-dev (= ${binary:Version}),
-  libparquet-glib12 (= ${binary:Version}),
+  libparquet-glib13 (= ${binary:Version}),
   gir1.2-parquet-1.0 (= ${binary:Version})
 Suggests: libparquet-glib-doc
 Description: Apache Parquet is a columnar storage format
diff --git a/dev/tasks/linux-packages/debian/gir1.2-arrow-cuda-1.0.install b/dev/tasks/linux-packages/debian/gir1.2-arrow-cuda-1.0.install
new file mode 100644
index 0000000000000..ef0d9f56f9dbc
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/gir1.2-arrow-cuda-1.0.install
@@ -0,0 +1 @@
+usr/lib/*/girepository-1.0/ArrowCUDA-1.0.typelib
diff --git a/dev/tasks/linux-packages/debian/gir1.2-arrow-gpu-1.0.install b/dev/tasks/linux-packages/debian/gir1.2-arrow-gpu-1.0.install
deleted file mode 100644
index 10e0ca983be1a..0000000000000
--- a/dev/tasks/linux-packages/debian/gir1.2-arrow-gpu-1.0.install
+++ /dev/null
@@ -1 +0,0 @@
-usr/lib/*/girepository-1.0/ArrowGPU-1.0.typelib
diff --git a/dev/tasks/linux-packages/debian/gir1.2-gandiva-1.0.install b/dev/tasks/linux-packages/debian/gir1.2-gandiva-1.0.install
new file mode 100644
index 0000000000000..0433b367a24c8
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/gir1.2-gandiva-1.0.install
@@ -0,0 +1 @@
+usr/lib/*/girepository-1.0/Gandiva-1.0.typelib
diff --git a/dev/tasks/linux-packages/debian/gir1.2-plasma-1.0.install b/dev/tasks/linux-packages/debian/gir1.2-plasma-1.0.install
new file mode 100644
index 0000000000000..7b7ce21581dfd
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/gir1.2-plasma-1.0.install
@@ -0,0 +1 @@
+usr/lib/*/girepository-1.0/Plasma-1.0.typelib
diff --git a/dev/tasks/linux-packages/debian/libarrow-cuda-dev.install b/dev/tasks/linux-packages/debian/libarrow-cuda-dev.install
new file mode 100644
index 0000000000000..2270d9258668d
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/libarrow-cuda-dev.install
@@ -0,0 +1,3 @@
+usr/lib/*/libarrow_cuda.a
+usr/lib/*/libarrow_cuda.so
+usr/lib/*/pkgconfig/arrow-cuda.pc
diff --git a/dev/tasks/linux-packages/debian/libarrow-cuda-glib-dev.install b/dev/tasks/linux-packages/debian/libarrow-cuda-glib-dev.install
new file mode 100644
index 0000000000000..7025fd202850e
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/libarrow-cuda-glib-dev.install
@@ -0,0 +1,5 @@
+usr/include/arrow-cuda-glib/
+usr/lib/*/libarrow-cuda-glib.a
+usr/lib/*/libarrow-cuda-glib.so
+usr/lib/*/pkgconfig/arrow-cuda-glib.pc
+usr/share/gir-1.0/ArrowCUDA-1.0.gir
diff --git a/dev/tasks/linux-packages/debian/libarrow-cuda-glib13.install b/dev/tasks/linux-packages/debian/libarrow-cuda-glib13.install
new file mode 100644
index 0000000000000..a6d6375268d34
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/libarrow-cuda-glib13.install
@@ -0,0 +1 @@
+usr/lib/*/libarrow-cuda-glib.so.*
diff --git a/dev/tasks/linux-packages/debian/libarrow-cuda13.install b/dev/tasks/linux-packages/debian/libarrow-cuda13.install
new file mode 100644
index 0000000000000..5ae46468764f2
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/libarrow-cuda13.install
@@ -0,0 +1 @@
+usr/lib/*/libarrow_cuda.so.*
diff --git a/dev/tasks/linux-packages/debian/libarrow-glib12.install b/dev/tasks/linux-packages/debian/libarrow-glib13.install
similarity index 100%
rename from dev/tasks/linux-packages/debian/libarrow-glib12.install
rename to dev/tasks/linux-packages/debian/libarrow-glib13.install
diff --git a/dev/tasks/linux-packages/debian/libarrow-gpu-dev.install b/dev/tasks/linux-packages/debian/libarrow-gpu-dev.install
deleted file mode 100644
index 1892fb851535c..0000000000000
--- a/dev/tasks/linux-packages/debian/libarrow-gpu-dev.install
+++ /dev/null
@@ -1,3 +0,0 @@
-usr/lib/*/libarrow_gpu.a
-usr/lib/*/libarrow_gpu.so
-usr/lib/*/pkgconfig/arrow-gpu.pc
diff --git a/dev/tasks/linux-packages/debian/libarrow-gpu-glib-dev.install b/dev/tasks/linux-packages/debian/libarrow-gpu-glib-dev.install
deleted file mode 100644
index 9b3ef8fb25b35..0000000000000
--- a/dev/tasks/linux-packages/debian/libarrow-gpu-glib-dev.install
+++ /dev/null
@@ -1,5 +0,0 @@
-usr/include/arrow-gpu-glib/
-usr/lib/*/libarrow-gpu-glib.a
-usr/lib/*/libarrow-gpu-glib.so
-usr/lib/*/pkgconfig/arrow-gpu-glib.pc
-usr/share/gir-1.0/ArrowGPU-1.0.gir
diff --git a/dev/tasks/linux-packages/debian/libarrow-gpu-glib12.install b/dev/tasks/linux-packages/debian/libarrow-gpu-glib12.install
deleted file mode 100644
index 4d97e5a60eb09..0000000000000
--- a/dev/tasks/linux-packages/debian/libarrow-gpu-glib12.install
+++ /dev/null
@@ -1 +0,0 @@
-usr/lib/*/libarrow-gpu-glib.so.*
diff --git a/dev/tasks/linux-packages/debian/libarrow-gpu12.install b/dev/tasks/linux-packages/debian/libarrow-gpu12.install
deleted file mode 100644
index cabd7e47d1e9a..0000000000000
--- a/dev/tasks/linux-packages/debian/libarrow-gpu12.install
+++ /dev/null
@@ -1 +0,0 @@
-usr/lib/*/libarrow_gpu.so.*
diff --git a/dev/tasks/linux-packages/debian/libarrow-python12.install b/dev/tasks/linux-packages/debian/libarrow-python13.install
similarity index 100%
rename from dev/tasks/linux-packages/debian/libarrow-python12.install
rename to dev/tasks/linux-packages/debian/libarrow-python13.install
diff --git a/dev/tasks/linux-packages/debian/libarrow12.install b/dev/tasks/linux-packages/debian/libarrow13.install
similarity index 100%
rename from dev/tasks/linux-packages/debian/libarrow12.install
rename to dev/tasks/linux-packages/debian/libarrow13.install
diff --git a/dev/tasks/linux-packages/debian/libgandiva-dev.install b/dev/tasks/linux-packages/debian/libgandiva-dev.install
new file mode 100644
index 0000000000000..1e5d264378e69
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/libgandiva-dev.install
@@ -0,0 +1,3 @@
+usr/lib/*/libgandiva.a
+usr/lib/*/libgandiva.so
+usr/lib/*/pkgconfig/gandiva.pc
diff --git a/dev/tasks/linux-packages/debian/libgandiva-glib-dev.install b/dev/tasks/linux-packages/debian/libgandiva-glib-dev.install
new file mode 100644
index 0000000000000..4189dac66ed90
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/libgandiva-glib-dev.install
@@ -0,0 +1,5 @@
+usr/include/gandiva-glib/
+usr/lib/*/libgandiva-glib.a
+usr/lib/*/libgandiva-glib.so
+usr/lib/*/pkgconfig/gandiva-glib.pc
+usr/share/gir-1.0/Gandiva-1.0.gir
diff --git a/dev/tasks/linux-packages/debian/libgandiva-glib-doc.doc-base b/dev/tasks/linux-packages/debian/libgandiva-glib-doc.doc-base
new file mode 100644
index 0000000000000..bed6a124c5e08
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/libgandiva-glib-doc.doc-base
@@ -0,0 +1,9 @@
+Document: gandiva-glib
+Title: Gandiva GLib Reference Manual
+Author: The Apache Software Foundation
+Abstract: Gandiva GLib is a toolset for compiling and evaluating expressions on Arrow Data that uses GLib.
+Section: Programming
+
+Format: HTML
+Index: /usr/share/doc/libarrow-glib-doc/gandiva-glib/index.html
+Files: /usr/share/doc/libarrow-glib-doc/gandiva-glib/*.html
diff --git a/dev/tasks/linux-packages/debian/libgandiva-glib-doc.install b/dev/tasks/linux-packages/debian/libgandiva-glib-doc.install
new file mode 100644
index 0000000000000..54d2d066c275a
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/libgandiva-glib-doc.install
@@ -0,0 +1 @@
+usr/share/doc/libarrow-glib-doc/gandiva-glib/
diff --git a/dev/tasks/linux-packages/debian/libgandiva-glib-doc.links b/dev/tasks/linux-packages/debian/libgandiva-glib-doc.links
new file mode 100644
index 0000000000000..291b004ed717a
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/libgandiva-glib-doc.links
@@ -0,0 +1,3 @@
+usr/share/doc/libglib2.0-doc/glib usr/share/doc/libgandiva-glib-doc/glib
+usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libgandiva-glib-doc/gobject
+usr/share/doc/libarrow-glib-doc/gandiva-glib usr/share/gtk-doc/html/gandiva-glib
diff --git a/dev/tasks/linux-packages/debian/libgandiva-glib13.install b/dev/tasks/linux-packages/debian/libgandiva-glib13.install
new file mode 100644
index 0000000000000..6257fd43823c0
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/libgandiva-glib13.install
@@ -0,0 +1 @@
+usr/lib/*/libgandiva-glib.so.*
diff --git a/dev/tasks/linux-packages/debian/libgandiva13.install b/dev/tasks/linux-packages/debian/libgandiva13.install
new file mode 100644
index 0000000000000..38a05876db6e6
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/libgandiva13.install
@@ -0,0 +1,2 @@
+usr/lib/*/libgandiva.so.*
+usr/lib/*/gandiva/
diff --git a/dev/tasks/linux-packages/debian/libparquet-glib12.install b/dev/tasks/linux-packages/debian/libparquet-glib13.install
similarity index 100%
rename from dev/tasks/linux-packages/debian/libparquet-glib12.install
rename to dev/tasks/linux-packages/debian/libparquet-glib13.install
diff --git a/dev/tasks/linux-packages/debian/libparquet12.install b/dev/tasks/linux-packages/debian/libparquet13.install
similarity index 100%
rename from dev/tasks/linux-packages/debian/libparquet12.install
rename to dev/tasks/linux-packages/debian/libparquet13.install
diff --git a/dev/tasks/linux-packages/debian/libplasma-dev.install b/dev/tasks/linux-packages/debian/libplasma-dev.install
new file mode 100644
index 0000000000000..d3538d2210af3
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/libplasma-dev.install
@@ -0,0 +1,3 @@
+usr/lib/*/libplasma.a
+usr/lib/*/libplasma.so
+usr/lib/*/pkgconfig/plasma.pc
diff --git a/dev/tasks/linux-packages/debian/libplasma-glib-dev.install b/dev/tasks/linux-packages/debian/libplasma-glib-dev.install
new file mode 100644
index 0000000000000..f21a9aa8a8f9c
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/libplasma-glib-dev.install
@@ -0,0 +1,5 @@
+usr/include/plasma-glib/
+usr/lib/*/libplasma-glib.a
+usr/lib/*/libplasma-glib.so
+usr/lib/*/pkgconfig/plasma-glib.pc
+usr/share/gir-1.0/Plasma-1.0.gir
diff --git a/dev/tasks/linux-packages/debian/libplasma-glib-doc.doc-base b/dev/tasks/linux-packages/debian/libplasma-glib-doc.doc-base
new file mode 100644
index 0000000000000..7863d7d07a36c
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/libplasma-glib-doc.doc-base
@@ -0,0 +1,9 @@
+Document: plasma-glib
+Title: Plasma GLib Reference Manual
+Author: The Apache Software Foundation
+Abstract: Plasma GLib is an in-memory object store and cache for big data that uses GLib.
+Section: Programming
+
+Format: HTML
+Index: /usr/share/doc/libarrow-glib-doc/plasma-glib/index.html
+Files: /usr/share/doc/libarrow-glib-doc/plasma-glib/*.html
diff --git a/dev/tasks/linux-packages/debian/libplasma-glib-doc.install b/dev/tasks/linux-packages/debian/libplasma-glib-doc.install
new file mode 100644
index 0000000000000..ef5a63b340c4e
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/libplasma-glib-doc.install
@@ -0,0 +1 @@
+usr/share/doc/libarrow-glib-doc/plasma-glib/
diff --git a/dev/tasks/linux-packages/debian/libplasma-glib-doc.links b/dev/tasks/linux-packages/debian/libplasma-glib-doc.links
new file mode 100644
index 0000000000000..baea0ef4f4b78
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/libplasma-glib-doc.links
@@ -0,0 +1,3 @@
+usr/share/doc/libglib2.0-doc/glib usr/share/doc/libplasma-glib-doc/glib
+usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libplasma-glib-doc/gobject
+usr/share/doc/libarrow-glib-doc/plasma-glib usr/share/gtk-doc/html/plasma-glib
diff --git a/dev/tasks/linux-packages/debian/libplasma-glib13.install b/dev/tasks/linux-packages/debian/libplasma-glib13.install
new file mode 100644
index 0000000000000..339bcca3e7278
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/libplasma-glib13.install
@@ -0,0 +1 @@
+usr/lib/*/libplasma-glib.so.*
diff --git a/dev/tasks/linux-packages/debian/libplasma13.install b/dev/tasks/linux-packages/debian/libplasma13.install
new file mode 100644
index 0000000000000..f8a744b65975d
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/libplasma13.install
@@ -0,0 +1 @@
+usr/lib/*/libplasma.so.*
diff --git a/dev/tasks/linux-packages/debian/plasma-store-server.install b/dev/tasks/linux-packages/debian/plasma-store-server.install
new file mode 100644
index 0000000000000..9c38179c17dc1
--- /dev/null
+++ b/dev/tasks/linux-packages/debian/plasma-store-server.install
@@ -0,0 +1 @@
+usr/bin/plasma_store_server
diff --git a/dev/tasks/linux-packages/debian/rules b/dev/tasks/linux-packages/debian/rules
index ce39fde6ebd23..d82f306cd2656 100755
--- a/dev/tasks/linux-packages/debian/rules
+++ b/dev/tasks/linux-packages/debian/rules
@@ -24,16 +24,18 @@ override_dh_auto_configure:
 	  --builddirectory=cpp_build \
 	  -- \
 	  -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
-	  -DARROW_BUILD_TESTS=OFF \
 	  -DARROW_PYTHON=ON \
 	  -DARROW_BOOST_USE_SHARED=ON \
 	  -DARROW_ORC=ON \
 	  -DARROW_PARQUET=ON \
+	  -DARROW_PLASMA=ON \
+	  -DARROW_GANDIVA=ON \
+	  -DARROW_GANDIVA_JAVA=OFF \
 	  -DPROTOBUF_HOME=/usr \
 	  -DARROW_PROTOBUF_USE_SHARED=ON \
 	  -DPythonInterp_FIND_VERSION=ON \
 	  -DPythonInterp_FIND_VERSION_MAJOR=3 \
-	  -DARROW_GPU=ON
+	  -DARROW_CUDA=ON
 	dh_auto_configure \
 	  --sourcedirectory=c_glib \
 	  --builddirectory=c_glib_build \
diff --git a/dev/tasks/linux-packages/yum/arrow.spec.in b/dev/tasks/linux-packages/yum/arrow.spec.in
index 9db9d43e1e66e..9391ea583f338 100644
--- a/dev/tasks/linux-packages/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/yum/arrow.spec.in
@@ -75,7 +75,7 @@ cd cpp/build
 %if %{use_parquet}
   -DARROW_PARQUET=ON \
 %endif
-  -DARROW_BUILD_TESTS=OFF
+  -DARROW_PLASMA=ON
 make %{?_smp_mflags}
 cd -
 
@@ -88,8 +88,11 @@ cd c_glib
   --enable-gtk-doc
 sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool
 sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool
-LD_LIBRARY_PATH=$PWD/arrow-glib/.libs/:$PWD/parquet-glib/.libs/:$PWD/../cpp/build/$build_type \
-  make %{?_smp_mflags}
+ld_library_path=$PWD/arrow-glib/.libs/
+ld_library_path=$ld_library_path:$PWD/plasma-glib/.libs/
+ld_library_path=$ld_library_path:$PWD/parquet-glib/.libs/
+ld_library_path=$ld_library_path:$PWD/../cpp/build/$build_type
+LD_LIBRARY_PATH=$ld_library_path make %{?_smp_mflags}
 cd -
 %endif
 
@@ -172,6 +175,48 @@ Libraries and header files for Apache Arrow CPython extensions.
 %{_libdir}/pkgconfig/arrow-python.pc
 %endif
 
+%package -n plasma-libs
+Summary:	Runtime libraries for Plasma in-memory object store
+License:	Apache-2.0
+Requires:	%{name}-libs = %{version}-%{release}
+
+%description -n plasma-libs
+This package contains the libraries for Plasma in-memory object store.
+
+%files -n plasma-libs
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_libdir}/libplasma.so.*
+
+%package -n plasma-store-server
+Summary:	Server for Plasma in-memory object store
+License:	Apache-2.0
+Requires:	plasma-libs = %{version}-%{release}
+
+%description -n plasma-store-server
+This package contains the server for Plasma in-memory object store.
+
+%files -n plasma-store-server
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_bindir}/plasma_store_server
+
+%package -n plasma-devel
+Summary:	Libraries and header files for Plasma in-memory object store
+License:	Apache-2.0
+Requires:	plasma-libs = %{version}-%{release}
+
+%description -n plasma-devel
+Libraries and header files for Plasma in-memory object store.
+
+%files -n plasma-devel
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_includedir}/plasma/
+%{_libdir}/libplasma.a
+%{_libdir}/libplasma.so
+%{_libdir}/pkgconfig/plasma*.pc
+
 %if %{use_parquet}
 %package -n parquet-libs
 Summary:	Runtime libraries for Apache Parquet C++
@@ -183,7 +228,7 @@ Requires:	boost-regex
 Requires:	%{name}-libs = %{version}-%{release}
 
 %description -n parquet-libs
-This package contains the libraries for Apache Parquet
+This package contains the libraries for Apache Parquet C++.
 
 %files -n parquet-libs
 %defattr(-,root,root,-)
@@ -197,7 +242,7 @@ Requires:	parquet-libs = %{version}-%{release}
 Requires:	zlib-devel
 
 %description -n parquet-devel
-Libraries and header files for Apache Parquet.
+Libraries and header files for Apache Parquet C++.
 
 %files -n parquet-devel
 %defattr(-,root,root,-)
@@ -257,6 +302,51 @@ Documentation for Apache Arrow GLib.
 %doc README.md LICENSE.txt NOTICE.txt
 %{_docdir}/arrow-glib/
 %{_datadir}/gtk-doc/html/arrow-glib/
+
+%package -n plasma-glib-libs
+Summary:	Runtime libraries for Plasma GLib
+License:	Apache-2.0
+Requires:	plasma-libs = %{version}-%{release}
+Requires:	%{name}-glib-libs = %{version}-%{release}
+
+%description -n plasma-glib-libs
+This package contains the libraries for Plasma GLib.
+
+%files -n plasma-glib-libs
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_libdir}/libplasma-glib.so.*
+%{_datadir}/gir-1.0/Plasma-1.0.gir
+
+%package -n plasma-glib-devel
+Summary:	Libraries and header files for Plasma GLib
+License:	Apache-2.0
+Requires:	plasma-devel = %{version}-%{release}
+Requires:	%{name}-glib-devel = %{version}-%{release}
+
+%description -n plasma-glib-devel
+Libraries and header files for Plasma GLib.
+
+%files -n plasma-glib-devel
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_includedir}/plasma-glib/
+%{_libdir}/libplasma-glib.a
+%{_libdir}/libplasma-glib.so
+%{_libdir}/pkgconfig/plasma-glib.pc
+%{_libdir}/girepository-1.0/Plasma-1.0.typelib
+
+%package -n plasma-glib-doc
+Summary:	Documentation for Plasma GLib
+License:	Apache-2.0
+
+%description -n plasma-glib-doc
+Documentation for Plasma GLib.
+
+%files -n plasma-glib-doc
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_datadir}/gtk-doc/html/plasma-glib/
 %endif
 
 %if %{use_parquet} && %{use_glib}
@@ -307,6 +397,9 @@ Documentation for Apache Parquet GLib.
 %endif
 
 %changelog
+* Wed Jan 16 2019 Krisztián Szűcs <szucs.krisztian@gmail.com> - 0.12.0-1
+- New upstream release.
+
 * Thu Oct 04 2018 Kouhei Sutou <kou@clear-code.com> - 0.11.0-1
 - New upstream release.
 
diff --git a/dev/tasks/linux-packages/yum/centos-6/Dockerfile b/dev/tasks/linux-packages/yum/centos-6/Dockerfile
index 8143b99efd180..c7de92296767a 100644
--- a/dev/tasks/linux-packages/yum/centos-6/Dockerfile
+++ b/dev/tasks/linux-packages/yum/centos-6/Dockerfile
@@ -20,14 +20,13 @@ FROM centos:6
 ARG DEBUG
 
 ENV \
-  SRPM_DOWNLOAD_URL=http://vault.centos.org/7.4.1708/os/Source/SPackages \
+  SRPM_DOWNLOAD_URL=http://vault.centos.org/7.6.1810/os/Source/SPackages \
   LIBARCHIVE_SRPM_BASE=libarchive-3.1.2-10.el7_2.src.rpm
 
 RUN \
   quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
   yum update -y ${quiet} && \
   yum install -y ${quiet} \
-    centos-release-scl \
     epel-release && \
   yum install -y \
     autoconf268 \
@@ -43,9 +42,10 @@ RUN \
     ~/rpmbuild/SPECS/libarchive.spec && \
   yum install -y ${quiet} ~/rpmbuild/RPMS/*/libarchive-3.*.rpm && \
   rm -rf ${LIBARCHIVE_SRPM_BASE} ~/rpmbuild/ && \
+  yum install -y ${quiet} \
+    centos-release-scl && \
   yum install -y ${quiet} \
     boost-devel \
-    centos-release-scl \
     cmake3 \
     devtoolset-6 \
     git \
diff --git a/dev/tasks/python-wheels/appveyor.yml b/dev/tasks/python-wheels/appveyor.yml
index 016041a6c6701..be6ad302e1a5c 100644
--- a/dev/tasks/python-wheels/appveyor.yml
+++ b/dev/tasks/python-wheels/appveyor.yml
@@ -20,11 +20,11 @@ os: Visual Studio 2015
 environment:
   ARCH: "64"
   GENERATOR: Visual Studio 14 2015 Win64
-  NUMPY: "{{ numpy_version }}"
+  NUMPY: "1.14.5"
   PYTHON: "{{ python_version }}"
   MSVC_DEFAULT_OPTIONS: ON
   ARROW_SRC: C:\apache-arrow
-  PYARROW_VERSION: {{ arrow.version }}
+  PYARROW_VERSION: {{ arrow.no_rc_version }}
   PYARROW_REF: {{ arrow.head }}
 
 init:
diff --git a/dev/tasks/python-wheels/linux-test.sh b/dev/tasks/python-wheels/linux-test.sh
index 163730a9f38da..45efdb0c91e05 100755
--- a/dev/tasks/python-wheels/linux-test.sh
+++ b/dev/tasks/python-wheels/linux-test.sh
@@ -24,11 +24,18 @@ pip install /arrow/python/manylinux1/dist/*.whl
 python --version
 
 # Test optional dependencies
-python -c "import pyarrow"
-python -c "import pyarrow.orc"
-python -c "import pyarrow.parquet"
-python -c "import pyarrow.plasma"
+command="
+import sys
+import pyarrow
+import pyarrow.orc
+import pyarrow.parquet
+import pyarrow.plasma
+
+if sys.version_info.major > 2:
+    import pyarrow.gandiva
+"
+python -c "$command"
 
 # Run pyarrow tests
-pip install pytest pandas
+pip install -r /arrow/python/requirements-test.txt
 pytest --pyargs pyarrow
diff --git a/dev/tasks/python-wheels/osx-build.sh b/dev/tasks/python-wheels/osx-build.sh
index 5c69904ff4348..22c44c157337f 100755
--- a/dev/tasks/python-wheels/osx-build.sh
+++ b/dev/tasks/python-wheels/osx-build.sh
@@ -99,9 +99,8 @@ function build_wheel {
     # build will also work with newer NumPy versions.
     export ARROW_HOME=`pwd`/arrow-dist
     export PARQUET_HOME=`pwd`/arrow-dist
-    if [ -n "$BUILD_DEPENDS" ]; then
-        pip install $(pip_opts) $BUILD_DEPENDS
-    fi
+
+    pip install $(pip_opts) -r python/requirements-wheel.txt cython
 
     pushd cpp
     mkdir build
@@ -161,10 +160,6 @@ function install_run {
 
     wheelhouse="$PWD/python/dist"
 
-    # Install test dependencies and built wheel
-    if [ -n "$TEST_DEPENDS" ]; then
-        pip install $(pip_opts) $TEST_DEPENDS
-    fi
     # Install compatible wheel
     pip install $(pip_opts) \
         $(python $multibuild_dir/supported_wheels.py $wheelhouse/*.whl)
@@ -179,7 +174,8 @@ function install_run {
     python -c "import pyarrow.plasma"
 
     # Run pyarrow tests
-    pip install pytest pytest-faulthandler
+    pip install $(pip_opts) -r python/requirements-test.txt
+
     py.test --pyargs pyarrow
 
     popd
diff --git a/dev/tasks/python-wheels/travis.linux.yml b/dev/tasks/python-wheels/travis.linux.yml
index 9a8f804d1cc51..b5cbc65bc7e7e 100644
--- a/dev/tasks/python-wheels/travis.linux.yml
+++ b/dev/tasks/python-wheels/travis.linux.yml
@@ -40,9 +40,8 @@ script:
   # build wheel
   - pushd arrow/python/manylinux1
   - docker run --shm-size=2g
-      -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.version }}
+      -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }}
       -e PYTHON_VERSIONS="{{ python_version }},{{ unicode_width }}"
-      -e WHEEL_VERSION={{ wheel_version }}
       -v $PWD:/io
       -v $PWD/../../:/arrow
       quay.io/xhochy/arrow_manylinux1_x86_64_base:latest /io/build_arrow.sh
diff --git a/dev/tasks/python-wheels/travis.osx.yml b/dev/tasks/python-wheels/travis.osx.yml
index 2f0d168a3fb46..a98841335e728 100644
--- a/dev/tasks/python-wheels/travis.osx.yml
+++ b/dev/tasks/python-wheels/travis.osx.yml
@@ -26,11 +26,9 @@ env:
     - PLAT=x86_64
     - TRAVIS_TAG={{ task.tag }}
     - MACOSX_DEPLOYMENT_TARGET="10.9"
-    - PYARROW_VERSION={{ arrow.version }}
+    - PYARROW_VERSION={{ arrow.no_rc_version }}
     - PYARROW_BUILD_VERBOSE=1
     - MB_PYTHON_VERSION={{ python_version }}
-    - BUILD_DEPENDS="wheel=={{ wheel_version }} numpy=={{ numpy_version }} cython==0.27.3 six"
-    - TEST_DEPENDS="numpy=={{ numpy_version }} pandas=={{ pandas_version }} six"
 
 before_install:
   - git clone https://github.com/matthew-brett/multibuild # TODO pin it
diff --git a/dev/tasks/python-wheels/win-build.bat b/dev/tasks/python-wheels/win-build.bat
index 22e306ab1f1eb..f85c8e8b7490e 100644
--- a/dev/tasks/python-wheels/win-build.bat
+++ b/dev/tasks/python-wheels/win-build.bat
@@ -82,7 +82,7 @@ popd
 @rem test the wheel
 call deactivate
 conda create -n wheel-test -q -y python=%PYTHON% ^
-      numpy=%NUMPY% pandas pytest
+      numpy=%NUMPY% pandas pytest hypothesis
 call activate wheel-test
 
 pip install --no-index --find-links=%ARROW_SRC%\python\dist\ pyarrow
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 514942df93e1a..751420c3e6094 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -19,14 +19,14 @@ groups:
   # these groups are just for convenience
   # makes it easier to submit related tasks
   conda:
-    - conda-linux-py27
-    - conda-linux-py35
-    - conda-linux-py36
-    - conda-osx-py27
-    - conda-osx-py35
-    - conda-osx-py36
-    - conda-win-py35
-    - conda-win-py36
+    - conda-linux-gcc-py27
+    - conda-linux-gcc-py36
+    - conda-linux-gcc-py37
+    - conda-osx-clang-py27
+    - conda-osx-clang-py36
+    - conda-osx-clang-py37
+    - conda-win-vs2015-py36
+    - conda-win-vs2015-py37
   wheel:
     - wheel-linux-cp27m
     - wheel-linux-cp27mu
@@ -60,85 +60,85 @@ tasks:
   #   artifacts: list of regex patterns, each needs to match a single github
   #              release asset, version variable is replaced in the pattern
   #              e.g.:
-  #     - pyarrow-{version}-py36(h[a-z0-9]+)_0-linux-64.tar.bz2
+  #     - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0-linux-64.tar.bz2
 
   ############################## Conda Linux ##################################
 
-  conda-linux-py27:
+  conda-linux-gcc-py27:
     platform: linux
     template: conda-recipes/travis.linux.yml
     params:
-      variant_config_file: variants/linux_python2.7.yaml
+      config: linux_c_compilergcccxx_compilergxxpython2.7
     artifacts:
-      - arrow-cpp-{version}-py27(h[a-z0-9]+)_0.tar.bz2
-      - pyarrow-{version}-py27(h[a-z0-9]+)_0.tar.bz2
+      - arrow-cpp-{no_rc_version}-py27(h[a-z0-9]+)_0.tar.bz2
+      - pyarrow-{no_rc_version}-py27(h[a-z0-9]+)_0.tar.bz2
 
-  conda-linux-py35:
+  conda-linux-gcc-py36:
     platform: linux
     template: conda-recipes/travis.linux.yml
     params:
-      variant_config_file: variants/linux_python3.5.yaml
+      config: linux_c_compilergcccxx_compilergxxpython3.6
     artifacts:
-      - arrow-cpp-{version}-py35(h[a-z0-9]+)_0.tar.bz2
-      - pyarrow-{version}-py35(h[a-z0-9]+)_0.tar.bz2
+      - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0.tar.bz2
+      - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0.tar.bz2
 
-  conda-linux-py36:
+  conda-linux-gcc-py37:
     platform: linux
     template: conda-recipes/travis.linux.yml
     params:
-      variant_config_file: variants/linux_python3.6.yaml
+      config: linux_c_compilergcccxx_compilergxxpython3.7
     artifacts:
-      - arrow-cpp-{version}-py36(h[a-z0-9]+)_0.tar.bz2
-      - pyarrow-{version}-py36(h[a-z0-9]+)_0.tar.bz2
+      - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0.tar.bz2
+      - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0.tar.bz2
 
   ############################## Conda OSX ####################################
 
-  conda-osx-py27:
+  conda-osx-clang-py27:
     platform: osx
     template: conda-recipes/travis.osx.yml
     params:
-      variant_config_file: variants/osx_python2.7.yaml
+      config: osx_c_compilerclangcxx_compilerclangxxpython2.7
     artifacts:
-      - arrow-cpp-{version}-py27(h[a-z0-9]+)_0.tar.bz2
-      - pyarrow-{version}-py27(h[a-z0-9]+)_0.tar.bz2
+      - arrow-cpp-{no_rc_version}-py27(h[a-z0-9]+)_0.tar.bz2
+      - pyarrow-{no_rc_version}-py27(h[a-z0-9]+)_0.tar.bz2
 
-  conda-osx-py35:
+  conda-osx-clang-py36:
     platform: osx
     template: conda-recipes/travis.osx.yml
     params:
-      variant_config_file: variants/osx_python3.5.yaml
+      config: osx_c_compilerclangcxx_compilerclangxxpython3.6
     artifacts:
-      - arrow-cpp-{version}-py35(h[a-z0-9]+)_0.tar.bz2
-      - pyarrow-{version}-py35(h[a-z0-9]+)_0.tar.bz2
+      - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0.tar.bz2
+      - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0.tar.bz2
 
-  conda-osx-py36:
+  conda-osx-clang-py37:
     platform: osx
     template: conda-recipes/travis.osx.yml
     params:
-      variant_config_file: variants/osx_python3.6.yaml
+      config: osx_c_compilerclangcxx_compilerclangxxpython3.7
     artifacts:
-      - arrow-cpp-{version}-py36(h[a-z0-9]+)_0.tar.bz2
-      - pyarrow-{version}-py36(h[a-z0-9]+)_0.tar.bz2
+      - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0.tar.bz2
+      - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0.tar.bz2
 
   ############################## Conda Windows ################################
 
-  conda-win-py35:
+  conda-win-vs2015-py36:
     platform: win
     template: conda-recipes/appveyor.yml
     params:
-      variant_config_file: variants\win_c_compilervs2015cxx_compilervs2015python3.5.yaml
+      config: win_c_compilervs2015cxx_compilervs2015python3.6
     artifacts:
-      - arrow-cpp-{version}-py35_vc14(h[a-z0-9]+)_0.tar.bz2
-      - pyarrow-{version}-py35(h[a-z0-9]+)_0.tar.bz2
+      - arrow-cpp-{no_rc_version}-py36_vc14(h[a-z0-9]+)_0.tar.bz2
+      - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0.tar.bz2
 
-  conda-win-py36:
+  conda-win-vs2015-py37:
     platform: win
     template: conda-recipes/appveyor.yml
     params:
-      variant_config_file: variants\win_c_compilervs2015cxx_compilervs2015python3.6.yaml
+      config: win_c_compilervs2015cxx_compilervs2015python3.7
     artifacts:
-      - arrow-cpp-{version}-py36_vc14(h[a-z0-9]+)_0.tar.bz2
-      - pyarrow-{version}-py36(h[a-z0-9]+)_0.tar.bz2
+      - arrow-cpp-{no_rc_version}-py37_vc14(h[a-z0-9]+)_0.tar.bz2
+      - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0.tar.bz2
 
   ############################## Wheel Linux ##################################
 
@@ -146,60 +146,55 @@ tasks:
     platform: linux
     template: python-wheels/travis.linux.yml
     params:
-      wheel_version: 0.31.1
       python_version: 2.7
       unicode_width: 16
       test_docker_images: []
     artifacts:
-      - pyarrow-{version}-cp27-cp27m-manylinux1_x86_64.whl
+      - pyarrow-{no_rc_version}-cp27-cp27m-manylinux1_x86_64.whl
 
   wheel-linux-cp27mu:
     platform: linux
     template: python-wheels/travis.linux.yml
     params:
-      wheel_version: 0.31.1
       python_version: 2.7
       unicode_width: 32
       test_docker_images:
         - python:2.7-slim  # debian ucs4
     artifacts:
-      - pyarrow-{version}-cp27-cp27mu-manylinux1_x86_64.whl
+      - pyarrow-{no_rc_version}-cp27-cp27mu-manylinux1_x86_64.whl
 
   wheel-linux-cp35m:
     platform: linux
     template: python-wheels/travis.linux.yml
     params:
-      wheel_version: 0.31.1
       python_version: 3.5
       unicode_width: 16
       test_docker_images:
         - python:3.5-slim
     artifacts:
-      - pyarrow-{version}-cp35-cp35m-manylinux1_x86_64.whl
+      - pyarrow-{no_rc_version}-cp35-cp35m-manylinux1_x86_64.whl
 
   wheel-linux-cp36m:
     platform: linux
     template: python-wheels/travis.linux.yml
     params:
-      wheel_version: 0.31.1
       python_version: 3.6
       unicode_width: 16
       test_docker_images:
         - python:3.6-slim
     artifacts:
-      - pyarrow-{version}-cp36-cp36m-manylinux1_x86_64.whl
+      - pyarrow-{no_rc_version}-cp36-cp36m-manylinux1_x86_64.whl
 
   wheel-linux-cp37m:
     platform: linux
     template: python-wheels/travis.linux.yml
     params:
-      wheel_version: 0.31.1
       python_version: 3.7
       unicode_width: 16
       test_docker_images:
         - python:3.7-slim
     artifacts:
-      - pyarrow-{version}-cp37-cp37m-manylinux1_x86_64.whl
+      - pyarrow-{no_rc_version}-cp37-cp37m-manylinux1_x86_64.whl
 
   ############################## Wheel OSX ####################################
 
@@ -207,45 +202,33 @@ tasks:
     platform: osx
     template: python-wheels/travis.osx.yml
     params:
-      numpy_version: 1.14.5
-      pandas_version: 0.23.0
       python_version: 2.7
-      wheel_version: 0.31.1
     artifacts:
-      - pyarrow-{version}-cp27-cp27m-macosx_10_6_intel.whl
+      - pyarrow-{no_rc_version}-cp27-cp27m-macosx_10_6_intel.whl
 
   wheel-osx-cp35m:
     platform: osx
     template: python-wheels/travis.osx.yml
     params:
-      numpy_version: 1.14.5
-      pandas_version: 0.23.0
       python_version: 3.5
-      wheel_version: 0.31.1
     artifacts:
-      - pyarrow-{version}-cp35-cp35m-macosx_10_6_intel.whl
+      - pyarrow-{no_rc_version}-cp35-cp35m-macosx_10_6_intel.whl
 
   wheel-osx-cp36m:
     platform: osx
     template: python-wheels/travis.osx.yml
     params:
-      numpy_version: 1.14.5
-      pandas_version: 0.23.0
       python_version: 3.6
-      wheel_version: 0.31.1
     artifacts:
-      - pyarrow-{version}-cp36-cp36m-macosx_10_6_intel.whl
+      - pyarrow-{no_rc_version}-cp36-cp36m-macosx_10_6_intel.whl
 
   wheel-osx-cp37m:
     platform: osx
     template: python-wheels/travis.osx.yml
     params:
-      numpy_version: 1.14.5
-      pandas_version: 0.23.0
       python_version: 3.7
-      wheel_version: 0.31.1
     artifacts:
-      - pyarrow-{version}-cp37-cp37m-macosx_10_6_intel.whl
+      - pyarrow-{no_rc_version}-cp37-cp37m-macosx_10_6_intel.whl
 
   ############################## Wheel Windows ################################
 
@@ -253,28 +236,25 @@ tasks:
     platform: win
     template: python-wheels/appveyor.yml
     params:
-      numpy_version: 1.14.5
       python_version: 3.5
     artifacts:
-      - pyarrow-{version}-cp35-cp35m-win_amd64.whl
+      - pyarrow-{no_rc_version}-cp35-cp35m-win_amd64.whl
 
   wheel-win-cp36m:
     platform: win
     template: python-wheels/appveyor.yml
     params:
-      numpy_version: 1.14.5
       python_version: 3.6
     artifacts:
-      - pyarrow-{version}-cp36-cp36m-win_amd64.whl
+      - pyarrow-{no_rc_version}-cp36-cp36m-win_amd64.whl
 
   wheel-win-cp37m:
     platform: win
     template: python-wheels/appveyor.yml
     params:
-      numpy_version: 1.14.5
       python_version: 3.7
     artifacts:
-      - pyarrow-{version}-cp37-cp37m-win_amd64.whl
+      - pyarrow-{no_rc_version}-cp37-cp37m-win_amd64.whl
 
   ############################## Linux PKGS ####################################
 
@@ -293,31 +273,47 @@ tasks:
       - apache-arrow_{no_rc_version}-1.dsc
       - apache-arrow_{no_rc_version}.orig.tar.gz
       - gir1.2-arrow-1.0_{no_rc_version}-1_amd64.deb
-      - gir1.2-arrow-gpu-1.0_{no_rc_version}-1_amd64.deb
+      - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_amd64.deb
+      - gir1.2-gandiva-1.0_{no_rc_version}-1_amd64.deb
       - gir1.2-parquet-1.0_{no_rc_version}-1_amd64.deb
+      - gir1.2-plasma-1.0_{no_rc_version}-1_amd64.deb
       - libarrow-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-doc_{no_rc_version}-1_all.deb
-      - libarrow-glib12-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-glib12_{no_rc_version}-1_amd64.deb
-      - libarrow-gpu-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-gpu-glib-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-gpu-glib12-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-gpu-glib12_{no_rc_version}-1_amd64.deb
-      - libarrow-gpu12-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-gpu12_{no_rc_version}-1_amd64.deb
+      - libarrow-glib13-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-glib13_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-dev_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-glib-dev_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-glib13-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-glib13_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda13-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda13_{no_rc_version}-1_amd64.deb
       - libarrow-python-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-python12-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-python12_{no_rc_version}-1_amd64.deb
-      - libarrow12-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow12_{no_rc_version}-1_amd64.deb
+      - libarrow-python13-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-python13_{no_rc_version}-1_amd64.deb
+      - libarrow13-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow13_{no_rc_version}-1_amd64.deb
+      - libgandiva-dev_{no_rc_version}-1_amd64.deb
+      - libgandiva-glib-dev_{no_rc_version}-1_amd64.deb
+      - libgandiva-glib-doc_{no_rc_version}-1_all.deb
+      - libgandiva-glib13-dbgsym_{no_rc_version}-1_amd64.deb
+      - libgandiva-glib13_{no_rc_version}-1_amd64.deb
+      - libgandiva13-dbgsym_{no_rc_version}-1_amd64.deb
+      - libgandiva13_{no_rc_version}-1_amd64.deb
       - libparquet-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-doc_{no_rc_version}-1_all.deb
-      - libparquet-glib12-dbgsym_{no_rc_version}-1_amd64.deb
-      - libparquet-glib12_{no_rc_version}-1_amd64.deb
-      - libparquet12-dbgsym_{no_rc_version}-1_amd64.deb
-      - libparquet12_{no_rc_version}-1_amd64.deb
+      - libparquet-glib13-dbgsym_{no_rc_version}-1_amd64.deb
+      - libparquet-glib13_{no_rc_version}-1_amd64.deb
+      - libparquet13-dbgsym_{no_rc_version}-1_amd64.deb
+      - libparquet13_{no_rc_version}-1_amd64.deb
+      - libplasma-dev_{no_rc_version}-1_amd64.deb
+      - libplasma-glib-dev_{no_rc_version}-1_amd64.deb
+      - libplasma-glib-doc_{no_rc_version}-1_all.deb
+      - libplasma-glib13-dbgsym_{no_rc_version}-1_amd64.deb
+      - libplasma-glib13_{no_rc_version}-1_amd64.deb
+      - libplasma13-dbgsym_{no_rc_version}-1_amd64.deb
+      - libplasma13_{no_rc_version}-1_amd64.deb
 
   ubuntu-trusty:
     platform: linux
@@ -335,16 +331,22 @@ tasks:
       - apache-arrow_{no_rc_version}.orig.tar.gz
       - gir1.2-arrow-1.0_{no_rc_version}-1_amd64.deb
       - gir1.2-parquet-1.0_{no_rc_version}-1_amd64.deb
+      - gir1.2-plasma-1.0_{no_rc_version}-1_amd64.deb
       - libarrow-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-doc_{no_rc_version}-1_all.deb
-      - libarrow-glib12_{no_rc_version}-1_amd64.deb
-      - libarrow12_{no_rc_version}-1_amd64.deb
+      - libarrow-glib13_{no_rc_version}-1_amd64.deb
+      - libarrow13_{no_rc_version}-1_amd64.deb
       - libparquet-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-doc_{no_rc_version}-1_all.deb
-      - libparquet-glib12_{no_rc_version}-1_amd64.deb
-      - libparquet12_{no_rc_version}-1_amd64.deb
+      - libparquet-glib13_{no_rc_version}-1_amd64.deb
+      - libparquet13_{no_rc_version}-1_amd64.deb
+      - libplasma-dev_{no_rc_version}-1_amd64.deb
+      - libplasma-glib-dev_{no_rc_version}-1_amd64.deb
+      - libplasma-glib-doc_{no_rc_version}-1_all.deb
+      - libplasma-glib13_{no_rc_version}-1_amd64.deb
+      - libplasma13_{no_rc_version}-1_amd64.deb
 
   ubuntu-xenial:
     platform: linux
@@ -361,24 +363,36 @@ tasks:
       - apache-arrow_{no_rc_version}-1.dsc
       - apache-arrow_{no_rc_version}.orig.tar.gz
       - gir1.2-arrow-1.0_{no_rc_version}-1_amd64.deb
-      - gir1.2-arrow-gpu-1.0_{no_rc_version}-1_amd64.deb
+      - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_amd64.deb
+      - gir1.2-gandiva-1.0_{no_rc_version}-1_amd64.deb
       - gir1.2-parquet-1.0_{no_rc_version}-1_amd64.deb
+      - gir1.2-plasma-1.0_{no_rc_version}-1_amd64.deb
       - libarrow-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-doc_{no_rc_version}-1_all.deb
-      - libarrow-glib12_{no_rc_version}-1_amd64.deb
-      - libarrow-gpu-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-gpu-glib-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-gpu-glib12_{no_rc_version}-1_amd64.deb
-      - libarrow-gpu12_{no_rc_version}-1_amd64.deb
+      - libarrow-glib13_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-dev_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-glib-dev_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-glib13_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda13_{no_rc_version}-1_amd64.deb
       - libarrow-python-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-python12_{no_rc_version}-1_amd64.deb
-      - libarrow12_{no_rc_version}-1_amd64.deb
+      - libarrow-python13_{no_rc_version}-1_amd64.deb
+      - libarrow13_{no_rc_version}-1_amd64.deb
+      - libgandiva-dev_{no_rc_version}-1_amd64.deb
+      - libgandiva-glib-dev_{no_rc_version}-1_amd64.deb
+      - libgandiva-glib-doc_{no_rc_version}-1_all.deb
+      - libgandiva-glib13_{no_rc_version}-1_amd64.deb
+      - libgandiva13_{no_rc_version}-1_amd64.deb
       - libparquet-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-doc_{no_rc_version}-1_all.deb
-      - libparquet-glib12_{no_rc_version}-1_amd64.deb
-      - libparquet12_{no_rc_version}-1_amd64.deb
+      - libparquet-glib13_{no_rc_version}-1_amd64.deb
+      - libparquet13_{no_rc_version}-1_amd64.deb
+      - libplasma-dev_{no_rc_version}-1_amd64.deb
+      - libplasma-glib-dev_{no_rc_version}-1_amd64.deb
+      - libplasma-glib-doc_{no_rc_version}-1_all.deb
+      - libplasma-glib13_{no_rc_version}-1_amd64.deb
+      - libplasma13_{no_rc_version}-1_amd64.deb
 
   ubuntu-bionic:
     platform: linux
@@ -395,24 +409,36 @@ tasks:
       - apache-arrow_{no_rc_version}-1.dsc
       - apache-arrow_{no_rc_version}.orig.tar.gz
       - gir1.2-arrow-1.0_{no_rc_version}-1_amd64.deb
-      - gir1.2-arrow-gpu-1.0_{no_rc_version}-1_amd64.deb
+      - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_amd64.deb
+      - gir1.2-gandiva-1.0_{no_rc_version}-1_amd64.deb
       - gir1.2-parquet-1.0_{no_rc_version}-1_amd64.deb
+      - gir1.2-plasma-1.0_{no_rc_version}-1_amd64.deb
       - libarrow-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-doc_{no_rc_version}-1_all.deb
-      - libarrow-glib12_{no_rc_version}-1_amd64.deb
-      - libarrow-gpu-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-gpu-glib-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-gpu-glib12_{no_rc_version}-1_amd64.deb
-      - libarrow-gpu12_{no_rc_version}-1_amd64.deb
+      - libarrow-glib13_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-dev_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-glib-dev_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-glib13_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda13_{no_rc_version}-1_amd64.deb
       - libarrow-python-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-python12_{no_rc_version}-1_amd64.deb
-      - libarrow12_{no_rc_version}-1_amd64.deb
+      - libarrow-python13_{no_rc_version}-1_amd64.deb
+      - libarrow13_{no_rc_version}-1_amd64.deb
+      - libgandiva-dev_{no_rc_version}-1_amd64.deb
+      - libgandiva-glib-dev_{no_rc_version}-1_amd64.deb
+      - libgandiva-glib-doc_{no_rc_version}-1_all.deb
+      - libgandiva-glib13_{no_rc_version}-1_amd64.deb
+      - libgandiva13_{no_rc_version}-1_amd64.deb
       - libparquet-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-doc_{no_rc_version}-1_all.deb
-      - libparquet-glib12_{no_rc_version}-1_amd64.deb
-      - libparquet12_{no_rc_version}-1_amd64.deb
+      - libparquet-glib13_{no_rc_version}-1_amd64.deb
+      - libparquet13_{no_rc_version}-1_amd64.deb
+      - libplasma-dev_{no_rc_version}-1_amd64.deb
+      - libplasma-glib-dev_{no_rc_version}-1_amd64.deb
+      - libplasma-glib-doc_{no_rc_version}-1_all.deb
+      - libplasma-glib13_{no_rc_version}-1_amd64.deb
+      - libplasma13_{no_rc_version}-1_amd64.deb
 
   ubuntu-cosmic:
     platform: linux
@@ -429,24 +455,36 @@ tasks:
       - apache-arrow_{no_rc_version}-1.dsc
       - apache-arrow_{no_rc_version}.orig.tar.gz
       - gir1.2-arrow-1.0_{no_rc_version}-1_amd64.deb
-      - gir1.2-arrow-gpu-1.0_{no_rc_version}-1_amd64.deb
+      - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_amd64.deb
+      - gir1.2-gandiva-1.0_{no_rc_version}-1_amd64.deb
       - gir1.2-parquet-1.0_{no_rc_version}-1_amd64.deb
+      - gir1.2-plasma-1.0_{no_rc_version}-1_amd64.deb
       - libarrow-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-doc_{no_rc_version}-1_all.deb
-      - libarrow-glib12_{no_rc_version}-1_amd64.deb
-      - libarrow-gpu-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-gpu-glib-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-gpu-glib12_{no_rc_version}-1_amd64.deb
-      - libarrow-gpu12_{no_rc_version}-1_amd64.deb
+      - libarrow-glib13_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-dev_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-glib-dev_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-glib13_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda13_{no_rc_version}-1_amd64.deb
       - libarrow-python-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-python12_{no_rc_version}-1_amd64.deb
-      - libarrow12_{no_rc_version}-1_amd64.deb
+      - libarrow-python13_{no_rc_version}-1_amd64.deb
+      - libarrow13_{no_rc_version}-1_amd64.deb
+      - libgandiva-dev_{no_rc_version}-1_amd64.deb
+      - libgandiva-glib-dev_{no_rc_version}-1_amd64.deb
+      - libgandiva-glib-doc_{no_rc_version}-1_all.deb
+      - libgandiva-glib13_{no_rc_version}-1_amd64.deb
+      - libgandiva13_{no_rc_version}-1_amd64.deb
       - libparquet-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-doc_{no_rc_version}-1_all.deb
-      - libparquet-glib12_{no_rc_version}-1_amd64.deb
-      - libparquet12_{no_rc_version}-1_amd64.deb
+      - libparquet-glib13_{no_rc_version}-1_amd64.deb
+      - libparquet13_{no_rc_version}-1_amd64.deb
+      - libplasma-dev_{no_rc_version}-1_amd64.deb
+      - libplasma-glib-dev_{no_rc_version}-1_amd64.deb
+      - libplasma-glib-doc_{no_rc_version}-1_all.deb
+      - libplasma-glib13_{no_rc_version}-1_amd64.deb
+      - libplasma13_{no_rc_version}-1_amd64.deb
 
   centos-6:
     platform: linux
@@ -462,6 +500,8 @@ tasks:
       - arrow-libs-{no_rc_version}-1.el6.x86_64.rpm
       - arrow-python-devel-{no_rc_version}-1.el6.x86_64.rpm
       - arrow-python-libs-{no_rc_version}-1.el6.x86_64.rpm
+      - plasma-devel-{no_rc_version}-1.el6.x86_64.rpm
+      - plasma-libs-{no_rc_version}-1.el6.x86_64.rpm
 
   centos-7:
     platform: linux
@@ -485,6 +525,11 @@ tasks:
       - parquet-glib-doc-{no_rc_version}-1.el7.x86_64.rpm
       - parquet-glib-libs-{no_rc_version}-1.el7.x86_64.rpm
       - parquet-libs-{no_rc_version}-1.el7.x86_64.rpm
+      - plasma-devel-{no_rc_version}-1.el7.x86_64.rpm
+      - plasma-glib-devel-{no_rc_version}-1.el7.x86_64.rpm
+      - plasma-glib-doc-{no_rc_version}-1.el7.x86_64.rpm
+      - plasma-glib-libs-{no_rc_version}-1.el7.x86_64.rpm
+      - plasma-libs-{no_rc_version}-1.el7.x86_64.rpm
 
   ############################## Gandiva Tasks ################################
 
@@ -492,10 +537,10 @@ tasks:
     platform: linux
     template: gandiva-jars/travis.linux.yml
     artifacts:
-      - arrow-gandiva-{version}-SNAPSHOT.jar
+      - arrow-gandiva-{no_rc_version}-SNAPSHOT.jar
 
   gandiva-jar-osx:
     platform: osx
     template: gandiva-jars/travis.osx.yml
     artifacts:
-      - arrow-gandiva-{version}-SNAPSHOT.jar
+      - arrow-gandiva-{no_rc_version}-SNAPSHOT.jar
diff --git a/dev/tasks/tests.yml b/dev/tasks/tests.yml
index 2365455a8a7cb..32131abd9f2a1 100644
--- a/dev/tasks/tests.yml
+++ b/dev/tasks/tests.yml
@@ -19,20 +19,44 @@ groups:
   # these groups are just for convenience
   # makes it easier to submit related tasks
   docker:
+    - docker-r
     - docker-rust
     - docker-cpp
+    - docker-cpp-alpine
+    - docker-cpp-cmake32
     - docker-c_glib
     - docker-go
     - docker-python-2.7
     - docker-python-3.6
     - docker-python-3.7
+    - docker-python-2.7-alpine
+    - docker-python-3.6-alpine
     - docker-java
     - docker-js
+    - docker-docs
     - docker-lint
     - docker-iwyu
     - docker-clang-format
+    - docker-pandas-master
+    - docker-dask-integration
     - docker-hdfs-integration
+    - docker-spark-integration
+
+  integration:
     - docker-pandas-master
+    - docker-dask-integration
+    - docker-hdfs-integration
+    - docker-spark-integration
+
+  cpp-python:
+    - docker-cpp
+    - docker-cpp-alpine
+    - docker-cpp-cmake32
+    - docker-python-2.7
+    - docker-python-2.7-alpine
+    - docker-python-3.6
+    - docker-python-3.6-alpine
+    - docker-python-3.7
 
 tasks:
   # arbitrary_task_name:
@@ -46,6 +70,15 @@ tasks:
 
   ############################## Language containers #########################
 
+  docker-r:
+    platform: linux
+    template: docker-tests/travis.linux.yml
+    params:
+      commands:
+        - docker-compose build cpp
+        - docker-compose build r
+        - docker-compose run r
+
   docker-rust:
     platform: linux
     template: docker-tests/travis.linux.yml
@@ -62,6 +95,22 @@ tasks:
         - docker-compose build cpp
         - docker-compose run cpp
 
+  docker-cpp-alpine:
+    platform: linux
+    template: docker-tests/travis.linux.yml
+    params:
+      commands:
+        - docker-compose build cpp-alpine
+        - docker-compose run cpp-alpine
+
+  docker-cpp-cmake32:
+    platform: linux
+    template: docker-tests/travis.linux.yml
+    params:
+      commands:
+        - docker-compose build cpp-cmake32
+        - docker-compose run cpp-cmake32
+
   docker-c_glib:
     platform: linux
     template: docker-tests/travis.linux.yml
@@ -128,6 +177,42 @@ tasks:
         - docker-compose build python
         - docker-compose run python
 
+  docker-python-2.7-alpine:
+    platform: linux
+    template: docker-tests/travis.linux.yml
+    params:
+      environment:
+        PYTHON_VERSION: 2.7
+      commands:
+        - docker-compose build cpp-alpine
+        - docker-compose build python-alpine
+        - docker-compose run python-alpine
+
+  docker-python-3.6-alpine:
+    platform: linux
+    template: docker-tests/travis.linux.yml
+    params:
+      environment:
+        PYTHON_VERSION: 3.6
+      commands:
+        - docker-compose build cpp-alpine
+        - docker-compose build python-alpine
+        - docker-compose run python-alpine
+
+  ###################### Documentation building tests #########################
+
+  docker-docs:
+    platform: linux
+    template: docker-tests/travis.linux.yml
+    params:
+      environment:
+        PYTHON_VERSION: 3.6
+      commands:
+        - docker-compose build cpp
+        - docker-compose build python
+        - docker-compose build docs
+        - docker-compose run docs
+
   ############################## Linter tests #################################
 
   docker-lint:
@@ -168,14 +253,42 @@ tasks:
 
   ############################## Integration tests ############################
 
+  docker-dask-integration:
+    platform: linux
+    template: docker-tests/travis.linux.yml
+    params:
+      environment:
+        PYTHON_VERSION: 3.6
+      commands:
+        - docker-compose build cpp
+        - docker-compose build python
+        - docker-compose build dask-integration
+        - docker-compose run dask-integration
+
   docker-hdfs-integration:
     platform: linux
     template: docker-tests/travis.linux.yml
     params:
+      environment:
+        PYTHON_VERSION: 3.6
       commands:
+        - docker-compose build cpp
+        - docker-compose build python
         - docker-compose build hdfs-integration
         - docker-compose run hdfs-integration
 
+  docker-spark-integration:
+    platform: linux
+    template: docker-tests/travis.linux.yml
+    params:
+      environment:
+        PYTHON_VERSION: 3.6
+      commands:
+        - docker-compose build cpp
+        - docker-compose build python
+        - docker-compose build spark-integration
+        - docker-compose run spark-integration
+
   docker-pandas-master:
     platform: linux
     template: docker-tests/travis.linux.yml
diff --git a/docker-compose.yml b/docker-compose.yml
index 50e4dded6146f..b93fed74b18d1 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -19,13 +19,31 @@
 
 version: '3.5'
 
-x-volumes:
-  &volumes
+# TODO(kszucs): set arrow's mount to :ro mode, once all of the builds are
+# passing without write access to the source directory. The following builds
+# are contaminating the source directory:
+# - docs
+# - python-alpine (writes .egg directory)
+# - rust (writes Cargo.lock)
+# - java (without the rsync trick)
+
+x-ubuntu-volumes:
+  &ubuntu-volumes
   - .:/arrow:delegated
-  - ${ARROW_DOCKER_CACHE_DIR:-./docker_cache}:/build:delegated
+  - ubuntu-cache:/build:delegated
 
-services:
+x-alpine-volumes:
+  &alpine-volumes
+  - .:/arrow:delegated
+  - alpine-cache:/build:delegated
 
+volumes:
+  ubuntu-cache:
+  alpine-cache:
+  maven-cache:
+  spark-cache:
+
+services:
   ######################### Language Containers ###############################
 
   c_glib:
@@ -37,7 +55,7 @@ services:
     build:
       context: .
       dockerfile: c_glib/Dockerfile
-    volumes: *volumes
+    volumes: *ubuntu-volumes
 
   cpp:
     # Usage:
@@ -50,7 +68,36 @@ services:
       dockerfile: cpp/Dockerfile
     environment:
       PARQUET_TEST_DATA: /arrow/cpp/submodules/parquet-testing/data
-    volumes: *volumes
+    volumes: *ubuntu-volumes
+
+  cpp-cmake32:
+    # Usage:
+    #   docker-compose build cpp-cmake32
+    #   docker-compose run cpp-cmake32
+    image: arrow:cpp-cmake32
+    shm_size: 2G
+    build:
+      context: .
+      dockerfile: cpp/Dockerfile
+      args:
+        EXTRA_CONDA_PKGS: cmake=3.2
+    environment:
+      ARROW_ORC: "OFF"
+      PARQUET_TEST_DATA: /arrow/cpp/submodules/parquet-testing/data
+    volumes: *ubuntu-volumes
+
+  cpp-alpine:
+    # Usage:
+    #   docker-compose build cpp-alpine
+    #   docker-compose run cpp-alpine
+    image: arrow:cpp-alpine
+    shm_size: 2G
+    build:
+      context: .
+      dockerfile: cpp/Dockerfile.alpine
+    environment:
+      PARQUET_TEST_DATA: /arrow/cpp/submodules/parquet-testing/data
+    volumes: *alpine-volumes
 
   go:
     # Usage:
@@ -60,7 +107,7 @@ services:
     build:
       context: .
       dockerfile: go/Dockerfile
-    volumes: *volumes
+    volumes: *ubuntu-volumes
 
   java:
     # Usage:
@@ -71,8 +118,8 @@ services:
       context: .
       dockerfile: java/Dockerfile
     volumes:
-      - .:/arrow:delegated
-      - $HOME/.m2:/root/.m2:delegated
+      - .:/arrow:ro  # ensures that docker won't contaminate the host directory
+      - maven-cache:/root/.m2:delegated
 
   js:
     image: arrow:js
@@ -93,7 +140,22 @@ services:
       dockerfile: python/Dockerfile
       args:
         PYTHON_VERSION: ${PYTHON_VERSION:-3.6}
-    volumes: *volumes
+    volumes: *ubuntu-volumes
+
+  python-alpine:
+    # Usage:
+    #   export PYTHON_VERSION=2.7|3.6 (minor version is ignored)
+    #   docker-compose build cpp-alpine
+    #   docker-compose build python-alpine
+    #   docker-compose run python-alpine
+    image: arrow:python-${PYTHON_VERSION:-3.6}-alpine
+    shm_size: 2G
+    build:
+      context: .
+      dockerfile: python/Dockerfile.alpine
+      args:
+        PYTHON_VERSION: ${PYTHON_VERSION:-3.6}
+    volumes: *alpine-volumes
 
   rust:
     # Usage:
@@ -103,7 +165,9 @@ services:
     build:
       context: .
       dockerfile: rust/Dockerfile
-    volumes: *volumes
+    environment:
+      PARQUET_TEST_DATA: /arrow/cpp/submodules/parquet-testing/data
+    volumes: *ubuntu-volumes
 
   r:
     # Usage:
@@ -114,15 +178,18 @@ services:
     build:
       context: .
       dockerfile: r/Dockerfile
-    volumes: *volumes
+    volumes: *ubuntu-volumes
 
   ######################### Tools and Linters #################################
 
   # TODO(kszucs): site
-  # TODO(kszucs): apidoc
+  # TODO(kszucs): {cpp,java,glib,js}-apidoc
 
   lint:
     # Usage:
+    #   export PYTHON_VERSION=3.6
+    #   docker-compose build cpp
+    #   docker-compose build python
     #   docker-compose build lint
     #   docker-compose run lint
     image: arrow:lint
@@ -130,10 +197,13 @@ services:
       context: .
       dockerfile: dev/lint/Dockerfile
     command: arrow/dev/lint/run_linters.sh
-    volumes: *volumes
+    volumes: *ubuntu-volumes
 
   iwyu:
     # Usage:
+    #   export PYTHON_VERSION=3.6
+    #   docker-compose build cpp
+    #   docker-compose build python
     #   docker-compose build lint
     #   docker-compose run iwyu
     image: arrow:lint
@@ -141,15 +211,29 @@ services:
       CC: clang
       CXX: clang++
     command: arrow/dev/lint/run_iwyu.sh
-    volumes: *volumes
+    volumes: *ubuntu-volumes
 
   clang-format:
     # Usage:
+    #   docker-compose build cpp
+    #   docker-compose build python
     #   docker-compose build lint
     #   docker-compose run clang-format
     image: arrow:lint
     command: arrow/dev/lint/run_clang_format.sh
-    volumes: *volumes
+    volumes: *ubuntu-volumes
+
+  docs:
+    # Usage:
+    #   docker-compose build cpp
+    #   docker-compose build python
+    #   docker-compose build docs
+    #   docker-compose run docs
+    image: arrow:docs
+    build:
+      context: .
+      dockerfile: docs/Dockerfile
+    volumes: *ubuntu-volumes
 
   ######################### Integration Tests #################################
 
@@ -159,6 +243,20 @@ services:
   #     - "21050"
   #   hostname: impala
 
+  pandas-master:
+    # Usage:
+    #   export PYTHON_VERSION=3.6
+    #   docker-compose build cpp
+    #   docker-compose build python
+    #   docker-compose build --no-cache pandas-master
+    #   docker-compose run pandas-master
+    image: arrow:pandas-master
+    build:
+      context: .
+      dockerfile: integration/pandas/Dockerfile
+    shm_size: 2G
+    volumes: *ubuntu-volumes
+
   hdfs-namenode:
     image: gelog/hadoop
     shm_size: 2G
@@ -168,7 +266,17 @@ services:
     command: hdfs namenode
     hostname: hdfs-namenode
 
-  hdfs-datanode:
+  hdfs-datanode-1:
+    image: gelog/hadoop
+    command: hdfs datanode
+    ports:
+      # The host port is randomly assigned by Docker, to allow scaling
+      # to multiple DataNodes on the same host
+      - "50075"
+    links:
+      - hdfs-namenode:hdfs-namenode
+
+  hdfs-datanode-2:
     image: gelog/hadoop
     command: hdfs datanode
     ports:
@@ -185,9 +293,11 @@ services:
     #   docker-compose build python
     #   docker-compose build hdfs-integration
     #   docker-compose run hdfs-integration
+    image: arrow:hdfs-${HDFS_VERSION:-2.6.5}
     links:
       - hdfs-namenode:hdfs-namenode
-      - hdfs-datanode:hdfs-datanode
+      - hdfs-datanode-1:hdfs-datanode-1
+      - hdfs-datanode-2:hdfs-datanode-2
     environment:
       - ARROW_HDFS_TEST_HOST=hdfs-namenode
       - ARROW_HDFS_TEST_PORT=9000
@@ -195,22 +305,41 @@ services:
     build:
       context: .
       dockerfile: integration/hdfs/Dockerfile
+      args:
+        HDFS_VERSION: ${HDFS_VERSION:-2.6.5}
+    volumes: *ubuntu-volumes
 
-  pandas-master:
+  # TODO(kszucs): pass dask version explicitly as a build argument
+  dask-integration:
     # Usage:
     #   export PYTHON_VERSION=3.6
     #   docker-compose build cpp
     #   docker-compose build python
-    #   docker-compose build --no-cache pandas-master
-    #   docker-compose run pandas-master
-    image: arrow:pandas-master
+    #   docker-compose build dask-integration
+    #   docker-compose run dask-integration
+    image: arrow:dask-integration
     build:
       context: .
-      dockerfile: integration/pandas/Dockerfile
-    shm_size: 2G
-    volumes: *volumes
+      dockerfile: integration/dask/Dockerfile
+    volumes: *ubuntu-volumes
 
+  spark-integration:
+    # Usage:
+    #   export PYTHON_VERSION=3.6
+    #   docker-compose build cpp
+    #   docker-compose build python
+    #   docker-compose build spark-integration
+    #   docker-compose run spark-integration
+    image: arrow:spark-${SPARK_VERSION:-2.4.0}
+    environment:
+      - SPARK_VERSION=${SPARK_VERSION:-2.4.0}
+    build:
+      context: .
+      dockerfile: integration/spark/Dockerfile
+    volumes:
+      - .:/arrow:ro  # ensures that docker won't contaminate the host directory
+      - ubuntu-cache:/build:delegated
+      - maven-cache:/root/.m2:delegated
+      - spark-cache:/spark:delegated
 
-  # TODO(kszucs): dask-integration
   # TODO(kszucs): hive-integration
-  # TODO(kszucs): spark-integration
diff --git a/python/doc/.gitignore b/docs/.gitignore
similarity index 97%
rename from python/doc/.gitignore
rename to docs/.gitignore
index 3bee39fa36fe4..d2e9f6ccc8f79 100644
--- a/python/doc/.gitignore
+++ b/docs/.gitignore
@@ -16,4 +16,4 @@
 # under the License.
 
 _build
-source/generated
\ No newline at end of file
+source/python/generated
diff --git a/docs/Dockerfile b/docs/Dockerfile
new file mode 100644
index 0000000000000..d9441c2441868
--- /dev/null
+++ b/docs/Dockerfile
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM arrow:python-3.6
+
+ADD ci/conda_env_sphinx.yml /arrow/ci/
+RUN conda install --file arrow/ci/conda_env_sphinx.yml && \
+    conda clean --all
+
+CMD arrow/ci/docker_build_cpp.sh && \
+    arrow/ci/docker_build_python.sh && \
+    arrow/ci/docker_build_sphinx.sh
diff --git a/python/doc/Makefile b/docs/Makefile
similarity index 100%
rename from python/doc/Makefile
rename to docs/Makefile
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000000000..4430d65cebb11
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,30 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Apache Arrow Documentation
+
+This directory contains source files for building the main project
+documentation. This includes the [Arrow columnar format specification][2].
+
+Instructions for building the documentation site are found in
+[docs/source/building.rst][1]. The build depends on the API
+documentation for some of the project subcomponents.
+
+[1]: https://github.com/apache/arrow/blob/master/docs/source/building.rst
+[2]: https://github.com/apache/arrow/tree/master/docs/source/format
\ No newline at end of file
diff --git a/python/doc/environment.yml b/docs/environment.yml
similarity index 100%
rename from python/doc/environment.yml
rename to docs/environment.yml
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000000000..36f2086c20b3f
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,52 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements.  See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership.  The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License.  You may obtain a copy of the License at
+@rem
+@rem   http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied.  See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+
+:end
+popd
diff --git a/python/doc/requirements.txt b/docs/requirements.txt
similarity index 78%
rename from python/doc/requirements.txt
rename to docs/requirements.txt
index f3c3414a4be9a..77ca6574c5356 100644
--- a/python/doc/requirements.txt
+++ b/docs/requirements.txt
@@ -1,5 +1,5 @@
+breathe
 ipython
-matplotlib
 numpydoc
 sphinx
 sphinx_rtd_theme
diff --git a/python/doc/source/_static/stub b/docs/source/_static/stub
similarity index 100%
rename from python/doc/source/_static/stub
rename to docs/source/_static/stub
diff --git a/python/doc/source/_templates/layout.html b/docs/source/_templates/layout.html
similarity index 100%
rename from python/doc/source/_templates/layout.html
rename to docs/source/_templates/layout.html
diff --git a/docs/source/building.rst b/docs/source/building.rst
new file mode 100644
index 0000000000000..c6ff97424fcfb
--- /dev/null
+++ b/docs/source/building.rst
@@ -0,0 +1,90 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _building-docs:
+
+Building the Documentation
+==========================
+
+Prerequisites
+-------------
+
+The documentation build process uses `Doxygen <http://www.doxygen.nl/>`_ and
+`Sphinx <http://www.sphinx-doc.org/>`_ along with a few extensions.
+
+If you're using Conda, the required software can be installed in a single line:
+
+.. code-block:: shell
+
+   conda install -c conda-forge --file=ci/conda_env_sphinx.yml
+
+Otherwise, you'll first need to install `Doxygen <http://www.doxygen.nl/>`_
+yourself (for example from your distribution's official repositories, if
+using Linux).  Then you can install the Python-based requirements with the
+following command:
+
+.. code-block:: shell
+
+   pip install -r docs/requirements.txt
+
+Building
+--------
+
+.. note::
+
+   If you are building the documentation on Windows, not all sections
+   may build properly.
+
+These two steps are mandatory and must be executed in order.
+
+#. Process the C++ API using Doxygen
+
+   .. code-block:: shell
+
+      pushd cpp/apidoc
+      doxygen
+      popd
+
+#. Build the complete documentation using Sphinx
+
+   .. code-block:: shell
+
+      pushd docs
+      make html
+      popd
+
+After these steps are completed, the documentation is rendered in HTML
+format in ``docs/_build/html``.  In particular, you can point your browser
+at ``docs/_build/html/index.html`` to read the docs and review any changes
+you made.
+
+
+.. _building-docker:
+
+Building with Docker
+--------------------
+
+You can use Docker to build the documentation:
+
+.. code-block:: shell
+
+  docker-compose build cpp
+  docker-compose build python
+  docker-compose build docs
+  docker-compose run docs
+
+The final output is located under ``docs/_build/html``.
diff --git a/python/doc/source/conf.py b/docs/source/conf.py
similarity index 84%
rename from python/doc/source/conf.py
rename to docs/source/conf.py
index f8327902f218a..d525fa943138b 100644
--- a/python/doc/source/conf.py
+++ b/docs/source/conf.py
@@ -30,7 +30,7 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-import glob
+import pyarrow
 import os
 import sys
 
@@ -53,20 +53,25 @@
     'sphinx.ext.autodoc',
     'sphinx.ext.autosummary',
     'sphinx.ext.doctest',
+    'sphinx.ext.ifconfig',
     'sphinx.ext.mathjax',
     'sphinx.ext.viewcode',
     'sphinx.ext.napoleon',
     'IPython.sphinxext.ipython_directive',
-    'IPython.sphinxext.ipython_console_highlighting'
+    'IPython.sphinxext.ipython_console_highlighting',
+    'breathe'
 ]
 
 # Show members for classes in .. autosummary
-autodoc_default_flags = [
-    'members',
-    'undoc-members',
-    'show-inheritance',
-    'inherited-members'
-]
+autodoc_default_options = {
+    'members': None,
+    'undoc-members': None,
+    'show-inheritance': None,
+    'inherited-members': None
+}
+
+# Overriden conditionally below
+autodoc_mock_imports = []
 
 # ipython directive options
 ipython_mplbackend = ''
@@ -77,13 +82,16 @@
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
 
+breathe_projects = {"arrow_cpp": "../../cpp/apidoc/xml"}
+breathe_default_project = "arrow_cpp"
+
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 #
-# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
 
-autosummary_generate = glob.glob("*.rst")
+source_suffix = ['.rst']
+
+autosummary_generate = True
 
 # The encoding of source files.
 #
@@ -93,7 +101,7 @@
 master_doc = 'index'
 
 # General information about the project.
-project = u'pyarrow'
+project = u'Apache Arrow'
 copyright = u'2016-2018 Apache Software Foundation'
 author = u'Apache Software Foundation'
 
@@ -102,9 +110,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = u''
+version = pyarrow.__version__
 # The full version, including alpha/beta/rc tags.
-release = u''
+release = pyarrow.__version__
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -180,7 +188,7 @@
 # The name for this set of Sphinx documents.
 # "<project> v<release> documentation" by default.
 #
-# html_title = u'pyarrow v0.1.0'
+html_title = u'Apache Arrow v{}'.format(version)
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
 #
@@ -280,7 +288,7 @@
 # html_search_scorer = 'scorer.js'
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'pyarrowdoc'
+htmlhelp_basename = 'arrowdoc'
 
 # -- Options for LaTeX output ---------------------------------------------
 
@@ -306,7 +314,7 @@
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, 'pyarrow.tex', u'pyarrow Documentation',
+    (master_doc, 'arrow.tex', u'Apache Arrow Documentation',
      u'Apache Arrow Team', 'manual'),
 ]
 
@@ -348,7 +356,7 @@
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
-    (master_doc, 'pyarrow', u'pyarrow Documentation',
+    (master_doc, 'arrow', u'Apache Arrow Documentation',
      [author], 1)
 ]
 
@@ -363,8 +371,8 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (master_doc, 'pyarrow', u'pyarrow Documentation',
-     author, 'pyarrow', 'One line description of project.',
+    (master_doc, 'arrow', u'Apache Arrow Documentation',
+     author, 'Apache Arrow', 'One line description of project.',
      'Miscellaneous'),
 ]
 
@@ -383,3 +391,32 @@
 # If true, do not generate a @detailmenu in the "Top" node's menu.
 #
 # texinfo_no_detailmenu = False
+
+
+# -- Customization --------------------------------------------------------
+
+# Conditional API doc generation
+
+# Sphinx has two features for conditional inclusion:
+# - The "only" directive
+#   https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#including-content-based-on-tags
+# - The "ifconfig" extension
+#   https://www.sphinx-doc.org/en/master/usage/extensions/ifconfig.html
+#
+# Both have issues, but "ifconfig" seems to work in this setting.
+
+try:
+    import pyarrow.cuda
+    cuda_enabled = True
+except ImportError:
+    cuda_enabled = False
+    # Mock pyarrow.cuda to avoid autodoc warnings.
+    # XXX I can't get autodoc_mock_imports to work, so mock manually instead
+    # (https://github.com/sphinx-doc/sphinx/issues/2174#issuecomment-453177550)
+    from unittest import mock
+    pyarrow.cuda = sys.modules['pyarrow.cuda'] = mock.Mock()
+
+def setup(app):
+    # Use a config value to indicate whether CUDA API docs can be generated.
+    # This will also rebuild appropriately when the value changes.
+    app.add_config_value('cuda_enabled', cuda_enabled, 'env')
diff --git a/docs/source/cpp/api.rst b/docs/source/cpp/api.rst
new file mode 100644
index 0000000000000..522609e85aacd
--- /dev/null
+++ b/docs/source/cpp/api.rst
@@ -0,0 +1,32 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+*************
+API Reference
+*************
+
+.. toctree::
+   :maxdepth: 3
+
+   api/support
+   api/memory
+   api/datatype
+   api/array
+   api/builder
+   api/table
+   api/utilities
+   api/cuda
diff --git a/docs/source/cpp/api/array.rst b/docs/source/cpp/api/array.rst
new file mode 100644
index 0000000000000..bb981d1a0477d
--- /dev/null
+++ b/docs/source/cpp/api/array.rst
@@ -0,0 +1,92 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+======
+Arrays
+======
+
+.. doxygenclass:: arrow::Array
+   :project: arrow_cpp
+   :members:
+
+Concrete array subclasses
+=========================
+
+.. doxygenclass:: arrow::DictionaryArray
+   :project: arrow_cpp
+   :members:
+
+Non-nested
+----------
+
+.. doxygenclass:: arrow::FlatArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::NullArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::BinaryArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::StringArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::PrimitiveArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::BooleanArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::FixedSizeBinaryArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::Decimal128Array
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::NumericArray
+   :project: arrow_cpp
+   :members:
+
+Nested
+------
+
+.. doxygenclass:: arrow::UnionArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::ListArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::StructArray
+   :project: arrow_cpp
+   :members:
+
+Chunked Arrays
+==============
+
+.. doxygenclass:: arrow::ChunkedArray
+   :project: arrow_cpp
+   :members:
diff --git a/docs/source/cpp/api/builder.rst b/docs/source/cpp/api/builder.rst
new file mode 100644
index 0000000000000..9e6540aa557fb
--- /dev/null
+++ b/docs/source/cpp/api/builder.rst
@@ -0,0 +1,56 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+==============
+Array Builders
+==============
+
+.. doxygenclass:: arrow::ArrayBuilder
+   :members:
+
+Concrete builder subclasses
+===========================
+
+.. doxygenclass:: arrow::NullBuilder
+   :members:
+
+.. doxygenclass:: arrow::BooleanBuilder
+   :members:
+
+.. doxygenclass:: arrow::NumericBuilder
+   :members:
+
+.. doxygenclass:: arrow::BinaryBuilder
+   :members:
+
+.. doxygenclass:: arrow::StringBuilder
+   :members:
+
+.. doxygenclass:: arrow::FixedSizeBinaryBuilder
+   :members:
+
+.. doxygenclass:: arrow::Decimal128Builder
+   :members:
+
+.. doxygenclass:: arrow::ListBuilder
+   :members:
+
+.. doxygenclass:: arrow::StructBuilder
+   :members:
+
+.. doxygenclass:: arrow::DictionaryBuilder
+   :members:
diff --git a/docs/source/cpp/api/cuda.rst b/docs/source/cpp/api/cuda.rst
new file mode 100644
index 0000000000000..e0b90e3a51357
--- /dev/null
+++ b/docs/source/cpp/api/cuda.rst
@@ -0,0 +1,69 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+============
+CUDA support
+============
+
+CUDA Contexts
+=============
+
+.. doxygenclass:: arrow::cuda::CudaDeviceManager
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::cuda::CudaContext
+   :project: arrow_cpp
+   :members:
+
+Device and Host Buffers
+=======================
+
+.. doxygenclass:: arrow::cuda::CudaBuffer
+   :project: arrow_cpp
+   :members:
+
+.. doxygenfunction:: arrow::cuda::AllocateCudaHostBuffer
+   :project: arrow_cpp
+
+.. doxygenclass:: arrow::cuda::CudaHostBuffer
+   :project: arrow_cpp
+   :members:
+
+Device Memory Input / Output
+============================
+
+.. doxygenclass:: arrow::cuda::CudaBufferReader
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::cuda::CudaBufferWriter
+   :project: arrow_cpp
+   :members:
+
+CUDA IPC
+========
+
+.. doxygenclass:: arrow::cuda::CudaIpcMemHandle
+   :project: arrow_cpp
+   :members:
+
+.. doxygenfunction:: arrow::cuda::SerializeRecordBatch
+   :project: arrow_cpp
+
+.. doxygenfunction:: arrow::cuda::ReadRecordBatch
+   :project: arrow_cpp
diff --git a/docs/source/cpp/api/datatype.rst b/docs/source/cpp/api/datatype.rst
new file mode 100644
index 0000000000000..adfc6e4171e66
--- /dev/null
+++ b/docs/source/cpp/api/datatype.rst
@@ -0,0 +1,148 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+==========
+Data Types
+==========
+
+.. doxygenenum:: arrow::Type::type
+
+.. doxygenclass:: arrow::DataType
+   :members:
+
+.. _api-type-factories:
+
+Factory functions
+=================
+
+These functions are recommended for creating data types.  They may return
+new objects or existing singletons, depending on the type requested.
+
+.. doxygengroup:: type-factories
+   :project: arrow_cpp
+   :content-only:
+
+Concrete type subclasses
+========================
+
+Primitive
+---------
+
+.. doxygenclass:: arrow::NullType
+   :members:
+
+.. doxygenclass:: arrow::BooleanType
+   :members:
+
+.. doxygenclass:: arrow::Int8Type
+   :members:
+
+.. doxygenclass:: arrow::Int16Type
+   :members:
+
+.. doxygenclass:: arrow::Int32Type
+   :members:
+
+.. doxygenclass:: arrow::Int64Type
+   :members:
+
+.. doxygenclass:: arrow::UInt8Type
+   :members:
+
+.. doxygenclass:: arrow::UInt16Type
+   :members:
+
+.. doxygenclass:: arrow::UInt32Type
+   :members:
+
+.. doxygenclass:: arrow::UInt64Type
+   :members:
+
+.. doxygenclass:: arrow::HalfFloatType
+   :members:
+
+.. doxygenclass:: arrow::FloatType
+   :members:
+
+.. doxygenclass:: arrow::DoubleType
+   :members:
+
+Time-related
+------------
+
+.. doxygenenum:: arrow::TimeUnit::type
+
+.. doxygenclass:: arrow::Date32Type
+   :members:
+
+.. doxygenclass:: arrow::Date64Type
+   :members:
+
+.. doxygenclass:: arrow::Time32Type
+   :members:
+
+.. doxygenclass:: arrow::Time64Type
+   :members:
+
+.. doxygenclass:: arrow::TimestampType
+   :members:
+
+Binary-like
+-----------
+
+.. doxygenclass:: arrow::BinaryType
+   :members:
+
+.. doxygenclass:: arrow::StringType
+   :members:
+
+.. doxygenclass:: arrow::FixedSizeBinaryType
+   :members:
+
+.. doxygenclass:: arrow::Decimal128Type
+   :members:
+
+Nested
+------
+
+.. doxygenclass:: arrow::ListType
+   :members:
+
+.. doxygenclass:: arrow::StructType
+   :members:
+
+.. doxygenclass:: arrow::UnionType
+   :members:
+
+Dictionary-encoded
+------------------
+
+.. doxygenclass:: arrow::DictionaryType
+   :members:
+
+Fields and Schemas
+==================
+
+.. doxygengroup:: schema-factories
+   :project: arrow_cpp
+   :content-only:
+
+.. doxygenclass:: arrow::Field
+   :members:
+
+.. doxygenclass:: arrow::Schema
+   :members:
diff --git a/docs/source/cpp/api/memory.rst b/docs/source/cpp/api/memory.rst
new file mode 100644
index 0000000000000..c921229e6cb17
--- /dev/null
+++ b/docs/source/cpp/api/memory.rst
@@ -0,0 +1,90 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Memory (management)
+===================
+
+Buffers
+-------
+
+.. doxygenclass:: arrow::Buffer
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::MutableBuffer
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::ResizableBuffer
+   :project: arrow_cpp
+   :members:
+
+Memory Pools
+------------
+
+.. doxygenfunction:: arrow::default_memory_pool
+   :project: arrow_cpp
+
+.. doxygenclass:: arrow::MemoryPool
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::LoggingMemoryPool
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::ProxyMemoryPool
+   :project: arrow_cpp
+   :members:
+
+Allocation Functions
+--------------------
+
+These functions allocate a buffer from a particular memory pool.
+
+.. doxygengroup:: buffer-allocation-functions
+   :project: arrow_cpp
+   :content-only:
+
+Slicing
+-------
+
+.. doxygengroup:: buffer-slicing-functions
+   :project: arrow_cpp
+   :content-only:
+
+Buffer Builders
+---------------
+
+.. doxygenclass:: arrow::BufferBuilder
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::TypedBufferBuilder
+   :project: arrow_cpp
+   :members:
+
+STL Integration
+---------------
+
+.. doxygenclass:: arrow::stl_allocator
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::STLMemoryPool
+   :project: arrow_cpp
+   :members:
diff --git a/docs/source/cpp/api/support.rst b/docs/source/cpp/api/support.rst
new file mode 100644
index 0000000000000..b165a9973b4c1
--- /dev/null
+++ b/docs/source/cpp/api/support.rst
@@ -0,0 +1,29 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Programming Support
+===================
+
+Error return and reporting
+--------------------------
+
+.. doxygenclass:: arrow::Status
+   :project: arrow_cpp
+   :members:
+
+.. doxygendefine:: ARROW_RETURN_NOT_OK
+
diff --git a/docs/source/cpp/api/table.rst b/docs/source/cpp/api/table.rst
new file mode 100644
index 0000000000000..e8b4f8e066e30
--- /dev/null
+++ b/docs/source/cpp/api/table.rst
@@ -0,0 +1,52 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+========================
+Two-dimensional Datasets
+========================
+
+Columns
+=======
+
+.. doxygenclass:: arrow::Column
+   :project: arrow_cpp
+   :members:
+
+Tables
+======
+
+.. doxygenclass:: arrow::Table
+   :project: arrow_cpp
+   :members:
+
+.. doxygenfunction:: arrow::ConcatenateTables
+   :project: arrow_cpp
+
+Record Batches
+==============
+
+.. doxygenclass:: arrow::RecordBatch
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::RecordBatchReader
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::TableBatchReader
+   :project: arrow_cpp
+   :members:
diff --git a/docs/source/cpp/api/utilities.rst b/docs/source/cpp/api/utilities.rst
new file mode 100644
index 0000000000000..1c18a20425c24
--- /dev/null
+++ b/docs/source/cpp/api/utilities.rst
@@ -0,0 +1,27 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+=========
+Utilities
+=========
+
+Decimal Numbers
+===============
+
+.. doxygenclass:: arrow::Decimal128
+   :project: arrow_cpp
+   :members:
diff --git a/docs/source/cpp/arrays.rst b/docs/source/cpp/arrays.rst
new file mode 100644
index 0000000000000..0c5272d2aed5e
--- /dev/null
+++ b/docs/source/cpp/arrays.rst
@@ -0,0 +1,211 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+======
+Arrays
+======
+
+The central type in Arrow is the class :class:`arrow::Array`.   An array
+represents a known-length sequence of values all having the same type.
+Internally, those values are represented by one or several buffers, the
+number and meaning of which depend on the array's data type, as documented
+in :doc:`the Arrow data layout specification <../format/Layout>`.
+
+Those buffers consist of the value data itself and an optional bitmap buffer
+that indicates which array entries are null values.  The bitmap buffer
+can be entirely omitted if the array is known to have zero null values.
+
+There are concrete subclasses of :class:`arrow::Array` for each data type,
+that help you access individual values of the array.
+
+Building an array
+=================
+
+As Arrow objects are immutable, there are classes provided that help you
+build these objects incrementally from third-party data.  These classes
+are organized in a hierarchy around the :class:`arrow::ArrayBuilder` base class,
+with concrete subclasses tailored for each particular data type.
+
+For example, to build an array of ``int64_t`` elements, we can use the
+:class:`arrow::Int64Builder` class. In the following example, we build an array
+of the range 1 to 8 where the element that should hold the value 4 is nulled::
+
+   arrow::Int64Builder builder;
+   builder.Append(1);
+   builder.Append(2);
+   builder.Append(3);
+   builder.AppendNull();
+   builder.Append(5);
+   builder.Append(6);
+   builder.Append(7);
+   builder.Append(8);
+
+   std::shared_ptr<arrow::Array> array;
+   arrow::Status st = builder.Finish(&array);
+   if (!st.ok()) {
+      // ... do something on array building failure
+   }
+
+The resulting Array (which can be casted to the concrete :class:`arrow::Int64Array`
+subclass if you want to access its values) then consists of two
+:class:`arrow::Buffer`\s.
+The first buffer holds the null bitmap, which consists here of a single byte with
+the bits ``0|0|0|0|1|0|0|0``. As we use  `least-significant bit (LSB) numbering`_.
+this indicates that the fourth entry in the array is null. The second
+buffer is simply an ``int64_t`` array containing all the above values.
+As the fourth entry is null, the value at that position in the buffer is
+undefined.
+
+Here is how you could access the concrete array's contents::
+
+   // Cast the Array to its actual type to access its data
+   auto int64_array = std::static_pointer_cast<arrow::Int64Array>(array);
+
+   // Get the pointer to the null bitmap.
+   const uint8_t* null_bitmap = int64_array->null_bitmap_data();
+
+   // Get the pointer to the actual data
+   const int64_t* data = int64_array->raw_values();
+
+   // Alternatively, given an array index, query its null bit and value directly
+   int64_t index = 2;
+   if (!int64_array->IsNull(index)) {
+      int64_t value = int64_array->Value(index);
+   }
+
+.. note::
+   :class:`arrow::Int64Array` (respectively :class:`arrow::Int64Builder`) is
+   just a ``typedef``, provided for convenience, of ``arrow::NumericArray<Int64Type>``
+   (respectively ``arrow::NumericBuilder<Int64Type>``).
+
+.. _least-significant bit (LSB) numbering: https://en.wikipedia.org/wiki/Bit_numbering
+
+Performance
+-----------
+
+While it is possible to build an array value-by-value as in the example above,
+to attain highest performance it is recommended to use the bulk appending
+methods (usually named ``AppendValues``) in the concrete :class:`arrow::ArrayBuilder`
+subclasses.
+
+If you know the number of elements in advance, it is also recommended to
+presize the working area by calling the :func:`~arrow::ArrayBuilder::Resize`
+or :func:`~arrow::ArrayBuilder::Reserve` methods.
+
+Here is how one could rewrite the above example to take advantage of those
+APIs::
+
+   arrow::Int64Builder builder;
+   // Make place for 8 values in total
+   builder.Resize(8);
+   // Bulk append the given values (with a null in 4th place as indicated by the
+   // validity vector)
+   std::vector<bool> validity = {true, true, true, false, true, true, true, true};
+   std::vector<int64_t> values = {1, 2, 3, 0, 5, 6, 7, 8};
+   builder.AppendValues(values, validity);
+
+   std::shared_ptr<arrow::Array> array;
+   arrow::Status st = builder.Finish(&array);
+
+If you still must append values one by one, some concrete builder subclasses
+have methods marked "Unsafe" that assume the working area has been correctly
+presized, and offer higher performance in exchange::
+
+   arrow::Int64Builder builder;
+   // Make place for 8 values in total
+   builder.Resize(8);
+   builder.UnsafeAppend(1);
+   builder.UnsafeAppend(2);
+   builder.UnsafeAppend(3);
+   builder.UnsafeAppendNull();
+   builder.UnsafeAppend(5);
+   builder.UnsafeAppend(6);
+   builder.UnsafeAppend(7);
+   builder.UnsafeAppend(8);
+
+   std::shared_ptr<arrow::Array> array;
+   arrow::Status st = builder.Finish(&array);
+
+
+Size Limitations and Recommendations
+====================================
+
+Some array types are structurally limited to 32-bit sizes.  This is the case
+for list arrays (which can hold up to 2^31 elements), string arrays and binary
+arrays (which can hold up to 2GB of binary data), at least.  Some other array
+types can hold up to 2^63 elements in the C++ implementation, but other Arrow
+implementations can have a 32-bit size limitation for those array types as well.
+
+For these reasons, it is recommended that huge data be chunked in subsets of
+more reasonable size.
+
+Chunked Arrays
+==============
+
+A :class:`arrow::ChunkedArray` is, like an array, a logical sequence of values;
+but unlike a simple array, a chunked array does not require the entire sequence
+to be physically contiguous in memory.  Also, the constituents of a chunked array
+need not have the same size, but they must all have the same data type.
+
+A chunked array is constructed by agregating any number of arrays.  Here we'll
+build a chunked array with the same logical values as in the example above,
+but in two separate chunks::
+
+   std::vector<std::shared_ptr<arrow::Array>> chunks;
+   std::shared_ptr<arrow::Array> array;
+
+   // Build first chunk
+   arrow::Int64Builder builder;
+   builder.Append(1);
+   builder.Append(2);
+   builder.Append(3);
+   if (!builder.Finish(&array).ok()) {
+      // ... do something on array building failure
+   }
+   chunks.push_back(std::move(array));
+
+   // Build second chunk
+   builder.Reset();
+   builder.AppendNull();
+   builder.Append(5);
+   builder.Append(6);
+   builder.Append(7);
+   builder.Append(8);
+   if (!builder.Finish(&array).ok()) {
+      // ... do something on array building failure
+   }
+   chunks.push_back(std::move(array));
+
+   auto chunked_array = std::make_shared<arrow::ChunkedArray>(std::move(chunks));
+
+   assert(chunked_array->num_chunks() == 2);
+   // Logical length in number of values
+   assert(chunked_array->length() == 8);
+   assert(chunked_array->null_count() == 1);
+
+Slicing
+=======
+
+Like for physical memory buffers, it is possible to make zero-copy slices
+of arrays and chunked arrays, to obtain an array or chunked array referring
+to some logical subsequence of the data.  This is done by calling the
+:func:`arrow::Array::Slice` and :func:`arrow::ChunkedArray::Slice` methods,
+respectively.
+
diff --git a/docs/source/cpp/conventions.rst b/docs/source/cpp/conventions.rst
new file mode 100644
index 0000000000000..b0424358901b4
--- /dev/null
+++ b/docs/source/cpp/conventions.rst
@@ -0,0 +1,91 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+Conventions
+===========
+
+The Arrow C++ API follows a few simple guidelines.  As with many rules,
+there may be exceptions.
+
+Language version
+----------------
+
+Arrow is C++11-compatible.  A few backports are used for newer functionality,
+for example the :class:`std::string_view` class.
+
+Namespacing
+-----------
+
+All the Arrow API (except macros) is namespaced inside a ``arrow`` namespace,
+and nested namespaces thereof.
+
+Safe pointers
+-------------
+
+Arrow objects are usually passed and stored using safe pointers -- most of
+the time :class:`std::shared_ptr` but sometimes also :class:`std::unique_ptr`.
+
+Immutability
+------------
+
+Many Arrow objects are immutable: once constructed, their logical properties
+cannot change anymore.  This makes it possible to use them in multi-threaded
+scenarios without requiring tedious and error-prone synchronization.
+
+There are obvious exceptions to this, such as IO objects or mutable data buffers.
+
+Error reporting
+---------------
+
+Most APIs indicate a successful or erroneous outcome by returning a
+:class:`arrow::Status` instance.  Arrow doesn't throw exceptions of its
+own, but third-party exceptions might propagate through, especially
+:class:`std::bad_alloc` (but Arrow doesn't use the standard allocators for
+large data).
+
+As a consequence, the result value of a function is generally passed as an
+out-pointer parameter, rather than as a function return value.
+
+(however, functions which always determiniscally succeed may eschew this
+convention and return their result directly)
+
+Here is an example of checking the outcome of an operation::
+
+   const int64_t buffer_size = 4096;
+   std::shared_ptr<arrow::Buffer> buffer;
+
+   auto status = arrow::AllocateBuffer(buffer_size, &buffer);
+   if (!status.ok()) {
+      // ... handle error
+   }
+
+If the caller function itself returns a :class:`arrow::Status` and wants
+to propagate any non-successful outcomes, a convenience macro
+:cpp:func:`ARROW_RETURN_NON_OK` is available::
+
+   arrow::Status DoSomething() {
+      const int64_t buffer_size = 4096;
+      std::shared_ptr<arrow::Buffer> buffer;
+      ARROW_RETURN_NON_OK(arrow::AllocateBuffer(buffer_size, &buffer));
+      // ... allocation successful, do something with buffer below
+
+      // return success at the end
+      return Status::OK();
+   }
diff --git a/docs/source/cpp/datatypes.rst b/docs/source/cpp/datatypes.rst
new file mode 100644
index 0000000000000..117c05b8755e7
--- /dev/null
+++ b/docs/source/cpp/datatypes.rst
@@ -0,0 +1,65 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+Data Types
+==========
+
+Data types govern how physical data is interpreted.  Their :ref:`specification
+<spec-logical-types>` allows binary interoperability between different Arrow
+implementations, including from different programming languages and runtimes
+(for example it is possible to access the same data, without copying, from
+both Python and Java using the :py:mod:`pyarrow.jvm` bridge module).
+
+Information about a data type in C++ can be represented in three ways:
+
+1. Using a :class:`arrow::DataType` instance (e.g. as a function argument)
+2. Using a :class:`arrow::DataType` concrete subclass (e.g. as a template
+   parameter)
+3. Using a :type:`arrow::Type::type` enum value (e.g. as the condition of
+   a switch statement)
+
+The first form (using a :class:`arrow::DataType` instance) is the most idiomatic
+and flexible.  Runtime-parametric types can only be fully represented with
+a DataType instance.  For example, a :class:`arrow::TimestampType` needs to be
+constructed at runtime with a :type:`arrow::TimeUnit::type` parameter; a
+:class:`arrow::Decimal128Type` with *scale* and *precision* parameters;
+a :class:`arrow::ListType` with a full child type (itself a
+:class:`arrow::DataType` instance).
+
+The two other forms can be used where performance is critical, in order to
+avoid paying the price of dynamic typing and polymorphism.  However, some
+amount of runtime switching can still be required for parametric types.
+It is not possible to reify all possible types at compile time, since Arrow
+data types allows arbitrary nesting.
+
+Creating data types
+-------------------
+
+To instantiate data types, it is recommended to call the provided
+:ref:`factory functions <api-type-factories>`::
+
+   std::shared_ptr<arrow::DataType> type;
+
+   // A 16-bit integer type
+   type = arrow::int16();
+   // A 64-bit timestamp type (with microsecond granularity)
+   type = arrow::timestamp(arrow::TimeUnit::MICRO);
+   // A list type of single-precision floating-point values
+   type = arrow::list(arrow::float32());
diff --git a/docs/source/cpp/examples.rst b/docs/source/cpp/examples.rst
new file mode 100644
index 0000000000000..5f4372fbba2f2
--- /dev/null
+++ b/docs/source/cpp/examples.rst
@@ -0,0 +1,30 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+Examples
+========
+
+Row to columnar conversion
+--------------------------
+
+The following example converts an array of structs to a :class:`arrow::Table`
+instance, and then converts it back to the original array of structs.
+
+.. literalinclude:: ../../../cpp/examples/arrow/row-wise-conversion-example.cc
diff --git a/docs/source/cpp/getting_started.rst b/docs/source/cpp/getting_started.rst
new file mode 100644
index 0000000000000..7c55b76912d1b
--- /dev/null
+++ b/docs/source/cpp/getting_started.rst
@@ -0,0 +1,31 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+Getting Started
+===============
+
+.. toctree::
+
+   overview
+   conventions
+   memory
+   arrays
+   datatypes
+   tables
diff --git a/docs/source/cpp/index.rst b/docs/source/cpp/index.rst
new file mode 100644
index 0000000000000..1d70e6acbf0ce
--- /dev/null
+++ b/docs/source/cpp/index.rst
@@ -0,0 +1,32 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+C++ Implementation
+==================
+
+.. toctree::
+   :maxdepth: 2
+
+   getting_started
+   examples
+   api
+
+.. TODO add "topics" chapter
+.. - nested arrays
+.. - dictionary encoding
+
+.. TODO add "building" or "development" chapter
diff --git a/docs/source/cpp/memory.rst b/docs/source/cpp/memory.rst
new file mode 100644
index 0000000000000..23b4725e4b971
--- /dev/null
+++ b/docs/source/cpp/memory.rst
@@ -0,0 +1,127 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+=================
+Memory Management
+=================
+
+Buffers
+=======
+
+To avoid passing around raw data pointers with varying and non-obvious
+lifetime rules, Arrow provides a generic abstraction called :class:`arrow::Buffer`.
+A Buffer encapsulates a pointer and data size, and generally also ties its
+lifetime to that of an underlying provider (in other words, a Buffer should
+*always* point to valid memory till its destruction).  Buffers are untyped:
+they simply denote a physical memory area regardless of its intended meaning
+or interpretation.
+
+Buffers may be allocated by Arrow itself , or by third-party routines.
+For example, it is possible to pass the data of a Python bytestring as a Arrow
+buffer, keeping the Python object alive as necessary.
+
+In addition, buffers come in various flavours: mutable or not, resizable or
+not.  Generally, you will hold a mutable buffer when building up a piece
+of data, then it will be frozen as an immutable container such as an
+:doc:`array <arrays>`.
+
+.. note::
+   Some buffers may point to non-CPU memory, such as GPU-backed memory
+   provided by a CUDA context.  If you're writing a GPU-aware application,
+   you will need to be careful not to interpret a GPU memory pointer as
+   a CPU-reachable pointer, or vice-versa.
+
+Accessing Buffer Memory
+-----------------------
+
+Buffers provide fast access to the underlying memory using the
+:func:`~arrow::Buffer::size` and :func:`~arrow::Buffer::data` accessors
+(or :func:`~arrow::Buffer::mutable_data` for writable access to a mutable
+buffer).
+
+Slicing
+-------
+
+It is possible to make zero-copy slices of buffers, to obtain a buffer
+referring to some contiguous subset of the underlying data.  This is done
+by calling the :func:`arrow::SliceBuffer` and :func:`arrow::SliceMutableBuffer`
+functions.
+
+Allocating a Buffer
+-------------------
+
+You can allocate a buffer yourself by calling one of the
+:func:`arrow::AllocateBuffer` or :func:`arrow::AllocateResizableBuffer`
+overloads::
+
+   std::shared_ptr<arrow::Buffer> buffer;
+
+   if (!arrow::AllocateBuffer(4096, &buffer).ok()) {
+      // ... handle allocation error
+   }
+   uint8_t* buffer_data = buffer->mutable_data();
+   memcpy(buffer_data, "hello world", 11);
+
+Allocating a buffer this way ensures it is 64-bytes aligned and padded
+as recommended by the :doc:`Arrow memory specification <../format/Layout>`.
+
+Building a Buffer
+-----------------
+
+You can also allocate *and* build a Buffer incrementally, using the
+:class:`arrow::BufferBuilder` API::
+
+   BufferBuilder builder;
+   builder.Resize(11);
+   builder.Append("hello ", 6);
+   builder.Append("world", 5);
+
+   std::shared_ptr<arrow::Buffer> buffer;
+   if (!builder.Finish(&buffer).ok()) {
+      // ... handle buffer allocation error
+   }
+
+Memory Pools
+============
+
+When allocating a Buffer using the Arrow C++ API, the buffer's underlying
+memory is allocated by a :class:`arrow::MemoryPool` instance.  Usually this
+will be the process-wide *default memory pool*, but many Arrow APIs allow
+you to pass another MemoryPool instance for their internal allocations.
+
+Memory pools are used for large long-lived data such as array buffers.
+Other data, such as small C++ objects and temporary workspaces, usually
+goes through the regular C++ allocators.
+
+Default Memory Pool
+-------------------
+
+Depending on how Arrow was compiled, the default memory pool may use the
+standard C ``malloc`` allocator, or a `jemalloc <http://jemalloc.net/>`_ heap.
+
+STL Integration
+---------------
+
+If you wish to use a Arrow memory pool to allocate the data of STL containers,
+you can do so using the :class:`arrow::stl_allocator` wrapper.
+
+Conversely, you can also use a STL allocator to allocate Arrow memory,
+using the :class:`arrow::STLMemoryPool` class.  However, this may be less
+performant, as STL allocators don't provide a resizing operation.
diff --git a/docs/source/cpp/overview.rst b/docs/source/cpp/overview.rst
new file mode 100644
index 0000000000000..490efc1b7a2c1
--- /dev/null
+++ b/docs/source/cpp/overview.rst
@@ -0,0 +1,93 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+High-Level Overview
+===================
+
+The Arrow C++ library is comprised of different parts, each of which serves
+a specific purpose.
+
+The physical layer
+------------------
+
+**Memory management** abstractions provide a uniform API over memory that
+may be allocated through various means, such as heap allocation, the memory
+mapping of a file or a static memory area.  In particular, the **buffer**
+abstraction represents a contiguous area of physical data.
+
+The one-dimensional layer
+-------------------------
+
+**Data types** govern the *logical* interpretation of *physical* data.
+Many operations in Arrow are parametered, at compile-time or at runtime,
+by a data type.
+
+**Arrays** assemble one or several buffers with a data type, allowing to
+view them as a logical contiguous sequence of values (possibly nested).
+
+**Chunked arrays** are a generalization of arrays, comprising several same-type
+arrays into a longer logical sequence of values.
+
+The two-dimensional layer
+-------------------------
+
+**Schemas** describe a logical collection of several pieces of data,
+each with a distinct name and type, and optional metadata.
+
+**Columns** are like chunked arrays, but with optional metadata.
+
+**Tables** are collections of columns in accordance to a schema.  They are
+the most capable dataset-providing abstraction in Arrow.
+
+**Record batches** are collections of contiguous arrays, described
+by a schema.  They allow incremental construction or serialization of tables.
+
+The compute layer
+-----------------
+
+**Datums** are flexible dataset references, able to hold for example an array or table
+reference.
+
+**Kernels** are specialized computation functions running in a loop over a
+given set of datums representing input and output parameters to the functions.
+
+The IO layer
+------------
+
+**Streams** allow untyped sequential or seekable access over external data
+of various kinds (for example compressed or memory-mapped).
+
+The Inter-Process Communication (IPC) layer
+-------------------------------------------
+
+A **messaging format** allows interchange of Arrow data between processes, using
+as few copies as possible.
+
+The file formats layer
+----------------------
+
+Reading and writing Arrow data from/to various file formats is possible, for
+example **Parquet**, **CSV**, **Orc** or the Arrow-specific **Feather** format.
+
+The devices layer
+-----------------
+
+Basic **CUDA** integration is provided, allowing to describe Arrow data backed
+by GPU-allocated memory.
diff --git a/docs/source/cpp/tables.rst b/docs/source/cpp/tables.rst
new file mode 100644
index 0000000000000..d42f0c6c4f53e
--- /dev/null
+++ b/docs/source/cpp/tables.rst
@@ -0,0 +1,87 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+========================
+Two-dimensional Datasets
+========================
+
+While arrays and chunked arrays represent a one-dimensional sequence of
+homogenous values, data often comes in the form of two-dimensional sets of
+heterogenous data (such as database tables, CSV files...).  Arrow provides
+several abstractions to handle such data conveniently and efficiently.
+
+Fields
+======
+
+Fields are used to denote the particular columns of a table (and also
+the particular members of a nested data type such as :class:`arrow::StructType`).
+A field, i.e. an instance of :class:`arrow::Field`, holds together a data
+type, a field name and some optional metadata.
+
+The recommended way to create a field is to call the :func:`arrow::field`
+factory function.
+
+Schemas
+=======
+
+A schema describes the overall structure of a two-dimensional dataset such
+as a table.  It holds a sequence of fields together with some optional
+schema-wide metadata (in addition to per-field metadata).  The recommended
+way to create a schema is to call one the :func:`arrow::schema` factory
+function overloads::
+
+   // Create a schema describing datasets with two columns:
+   // a int32 column "A" and a utf8-encoded string column "B"
+   std::shared_ptr<arrow::Field> field_a, field_b;
+   std::shared_ptr<arrow::Schema> schema;
+
+   field_a = arrow::field("A", arrow::int32());
+   field_b = arrow::field("B", arrow::utf8());
+   schema = arrow::schema({field_a, field_b});
+
+Columns
+=======
+
+A :class:`arrow::Column` is a chunked array tied together with a field.
+The field describes the column's name (for lookup in a larger dataset)
+and its metadata.
+
+Tables
+======
+
+A :class:`arrow::Table` is a two-dimensional dataset of a number of columns,
+together with a schema.  The columns' names and types must match the schema.
+Also, each column must have the same logical length in number of elements
+(although each column can be chunked in a different way).
+
+Record Batches
+==============
+
+A :class:`arrow::RecordBatch` is a two-dimensional dataset of a number of
+contiguous arrays, each the same length.  Like a table, a record batch also
+has a schema which must match its arrays' datatypes.
+
+Record batches are a convenient unit of work for various serialization
+and computation functions, possibly incremental.
+
+A table can be streamed as an arbitrary number of record batches using
+a :class:`arrow::TableBatchReader`.  Conversely, a logical sequence of
+record batches can be assembled to form a table using one of the
+:func:`arrow::Table::FromRecordBatches` factory function overloads.
diff --git a/format/Arrow.graffle b/docs/source/format/Arrow.graffle
similarity index 100%
rename from format/Arrow.graffle
rename to docs/source/format/Arrow.graffle
diff --git a/format/Arrow.png b/docs/source/format/Arrow.png
similarity index 100%
rename from format/Arrow.png
rename to docs/source/format/Arrow.png
diff --git a/docs/source/format/Guidelines.rst b/docs/source/format/Guidelines.rst
new file mode 100644
index 0000000000000..5b032206c2611
--- /dev/null
+++ b/docs/source/format/Guidelines.rst
@@ -0,0 +1,43 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Implementation guidelines
+=========================
+
+An execution engine (or framework, or UDF executor, or storage engine, etc) can implements only a subset of the Arrow spec and/or extend it given the following constraints:
+
+Implementing a subset the spec
+------------------------------
+
+If only producing (and not consuming) arrow vectors.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Any subset of the vector spec and the corresponding metadata can be implemented.
+
+If consuming and producing vectors
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+There is a minimal subset of vectors to be supported.
+Production of a subset of vectors and their corresponding metadata is always fine.
+Consumption of vectors should at least convert the unsupported input vectors to the supported subset (for example Timestamp.millis to timestamp.micros or int32 to int64)
+
+Extensibility
+-------------
+
+An execution engine implementor can also extend their memory representation with their own vectors internally as long as they are never exposed. Before sending data to another system expecting Arrow data these custom vectors should be converted to a type that exist in the Arrow spec.
+An example of this is operating on compressed data.
+These custom vectors are not exchanged externally and there is no support for custom metadata.
diff --git a/docs/source/format/IPC.rst b/docs/source/format/IPC.rst
new file mode 100644
index 0000000000000..62a1237436ae3
--- /dev/null
+++ b/docs/source/format/IPC.rst
@@ -0,0 +1,261 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Interprocess messaging / communication (IPC)
+============================================
+
+Encapsulated message format
+---------------------------
+
+Data components in the stream and file formats are represented as encapsulated
+*messages* consisting of:
+
+* A length prefix indicating the metadata size
+* The message metadata as a `Flatbuffer`_
+* Padding bytes to an 8-byte boundary
+* The message body, which must be a multiple of 8 bytes
+
+Schematically, we have: ::
+
+    <metadata_size: int32>
+    <metadata_flatbuffer: bytes>
+    <padding>
+    <message body>
+
+The complete serialized message must be a multiple of 8 bytes so that messages
+can be relocated between streams. Otherwise the amount of padding between the
+metadata and the message body could be non-deterministic.
+
+The ``metadata_size`` includes the size of the flatbuffer plus padding. The
+``Message`` flatbuffer includes a version number, the particular message (as a
+flatbuffer union), and the size of the message body: ::
+
+    table Message {
+      version: org.apache.arrow.flatbuf.MetadataVersion;
+      header: MessageHeader;
+      bodyLength: long;
+    }
+
+Currently, we support 4 types of messages:
+
+* Schema
+* RecordBatch
+* DictionaryBatch
+* Tensor
+
+Streaming format
+----------------
+
+We provide a streaming format for record batches. It is presented as a sequence
+of encapsulated messages, each of which follows the format above. The schema
+comes first in the stream, and it is the same for all of the record batches
+that follow. If any fields in the schema are dictionary-encoded, one or more
+``DictionaryBatch`` messages will be included. ``DictionaryBatch`` and
+``RecordBatch`` messages may be interleaved, but before any dictionary key is used
+in a ``RecordBatch`` it should be defined in a ``DictionaryBatch``. ::
+
+    <SCHEMA>
+    <DICTIONARY 0>
+    ...
+    <DICTIONARY k - 1>
+    <RECORD BATCH 0>
+    ...
+    <DICTIONARY x DELTA>
+    ...
+    <DICTIONARY y DELTA>
+    ...
+    <RECORD BATCH n - 1>
+    <EOS [optional]: int32>
+
+When a stream reader implementation is reading a stream, after each message, it
+may read the next 4 bytes to know how large the message metadata that follows
+is. Once the message flatbuffer is read, you can then read the message body.
+
+The stream writer can signal end-of-stream (EOS) either by writing a 0 length
+as an ``int32`` or simply closing the stream interface.
+
+File format
+-----------
+
+We define a "file format" supporting random access in a very similar format to
+the streaming format. The file starts and ends with a magic string ``ARROW1``
+(plus padding). What follows in the file is identical to the stream format. At
+the end of the file, we write a *footer* containing a redundant copy of the
+schema (which is a part of the streaming format) plus memory offsets and sizes
+for each of the data blocks in the file. This enables random access any record
+batch in the file. See ``File.fbs`` for the precise details of the file
+footer.
+
+Schematically we have: ::
+
+    <magic number "ARROW1">
+    <empty padding bytes [to 8 byte boundary]>
+    <STREAMING FORMAT>
+    <FOOTER>
+    <FOOTER SIZE: int32>
+    <magic number "ARROW1">
+
+In the file format, there is no requirement that dictionary keys should be
+defined in a ``DictionaryBatch`` before they are used in a ``RecordBatch``, as long
+as the keys are defined somewhere in the file.
+
+RecordBatch body structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``RecordBatch`` metadata contains a depth-first (pre-order) flattened set of
+field metadata and physical memory buffers (some comments from ``Message.fbs``
+have been shortened / removed): ::
+
+    table RecordBatch {
+      length: long;
+      nodes: [FieldNode];
+      buffers: [Buffer];
+    }
+
+    struct FieldNode {
+      length: long;
+      null_count: long;
+    }
+
+    struct Buffer {
+      /// The relative offset into the shared memory page where the bytes for this
+      /// buffer starts
+      offset: long;
+
+      /// The absolute length (in bytes) of the memory buffer. The memory is found
+      /// from offset (inclusive) to offset + length (non-inclusive).
+      length: long;
+    }
+
+In the context of a file, the ``page`` is not used, and the ``Buffer`` offsets use
+as a frame of reference the start of the message body. So, while in a general
+IPC setting these offsets may be anyplace in one or more shared memory regions,
+in the file format the offsets start from 0.
+
+The location of a record batch and the size of the metadata block as well as
+the body of buffers is stored in the file footer: ::
+
+    struct Block {
+      offset: long;
+      metaDataLength: int;
+      bodyLength: long;
+    }
+
+The ``metaDataLength`` here includes the metadata length prefix, serialized
+metadata, and any additional padding bytes, and by construction must be a
+multiple of 8 bytes.
+
+Some notes about this
+
+* The ``Block`` offset indicates the starting byte of the record batch.
+* The metadata length includes the flatbuffer size, the record batch metadata
+  flatbuffer, and any padding bytes
+
+Dictionary Batches
+~~~~~~~~~~~~~~~~~~
+
+Dictionaries are written in the stream and file formats as a sequence of record
+batches, each having a single field. The complete semantic schema for a
+sequence of record batches, therefore, consists of the schema along with all of
+the dictionaries. The dictionary types are found in the schema, so it is
+necessary to read the schema to first determine the dictionary types so that
+the dictionaries can be properly interpreted. ::
+
+    table DictionaryBatch {
+      id: long;
+      data: RecordBatch;
+      isDelta: boolean = false;
+    }
+
+The dictionary ``id`` in the message metadata can be referenced one or more times
+in the schema, so that dictionaries can even be used for multiple fields. See
+the :doc:`Layout` document for more about the semantics of
+dictionary-encoded data.
+
+The dictionary ``isDelta`` flag allows dictionary batches to be modified
+mid-stream.  A dictionary batch with ``isDelta`` set indicates that its vector
+should be concatenated with those of any previous batches with the same ``id``. A
+stream which encodes one column, the list of strings
+``["A", "B", "C", "B", "D", "C", "E", "A"]``, with a delta dictionary batch could
+take the form: ::
+
+    <SCHEMA>
+    <DICTIONARY 0>
+    (0) "A"
+    (1) "B"
+    (2) "C"
+
+    <RECORD BATCH 0>
+    0
+    1
+    2
+    1
+
+    <DICTIONARY 0 DELTA>
+    (3) "D"
+    (4) "E"
+
+    <RECORD BATCH 1>
+    3
+    2
+    4
+    0
+    EOS
+
+Tensor (Multi-dimensional Array) Message Format
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``Tensor`` message types provides a way to write a multidimensional array of
+fixed-size values (such as a NumPy ndarray) using Arrow's shared memory
+tools. Arrow implementations in general are not required to implement this data
+format, though we provide a reference implementation in C++.
+
+When writing a standalone encapsulated tensor message, we use the format as
+indicated above, but additionally align the starting offset of the metadata as
+well as the starting offset of the tensor body (if writing to a shared memory
+region) to be multiples of 64 bytes: ::
+
+    <PADDING>
+    <metadata size: int32>
+    <metadata>
+    <tensor body>
+
+SparseTensor Message Format
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``SparseTensor`` message types provides another way to write a
+multidimensional array of fixed-size values using Arrow's shared memory tools
+in addition to ``Tensor``. ``SparseTensor`` is designed specifically for tensors
+whose elements are almost zeros. Arrow implementations in general are not
+required to implement this data format likewise ``Tensor``.
+
+When writing a standalone encapsulated sparse tensor message, we use the format as
+indicated above, but additionally align the starting offset of the metadata as
+well as the starting offsets of the sparse index and the sparse tensor body
+(if writing to a shared memory region) to be multiples of 64 bytes:
+
+    <PADDING>
+    <metadata size: int32>
+    <metadata>
+    <sparse index>
+    <PADDING>
+    <sparse tensor body>
+
+The contents of the sparse tensor index is depends on what kinds of sparse
+format is used.
+
+.. _Flatbuffer: https://github.com/google/flatbuffers
diff --git a/docs/source/format/Layout.rst b/docs/source/format/Layout.rst
new file mode 100644
index 0000000000000..efe0594803a40
--- /dev/null
+++ b/docs/source/format/Layout.rst
@@ -0,0 +1,668 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Physical memory layout
+======================
+
+Definitions / Terminology
+-------------------------
+
+Since different projects have used different words to describe various
+concepts, here is a small glossary to help disambiguate.
+
+* Array: a sequence of values with known length all having the same type.
+* Slot or array slot: a single logical value in an array of some particular data type
+* Contiguous memory region: a sequential virtual address space with a given
+  length. Any byte can be reached via a single pointer offset less than the
+  region's length.
+* Contiguous memory buffer: A contiguous memory region that stores
+  a multi-value component of an Array.  Sometimes referred to as just "buffer".
+* Primitive type: a data type that occupies a fixed-size memory slot specified
+  in bit width or byte width
+* Nested or parametric type: a data type whose full structure depends on one or
+  more other child relative types. Two fully-specified nested types are equal
+  if and only if their child types are equal. For example, ``List<U>`` is distinct
+  from ``List<V>`` iff U and V are different relative types.
+* Relative type or simply type (unqualified): either a specific primitive type
+  or a fully-specified nested type. When we say slot we mean a relative type
+  value, not necessarily any physical storage region.
+* Logical type: A data type that is implemented using some relative (physical)
+  type. For example, Decimal values are stored as 16 bytes in a fixed byte
+  size array. Similarly, strings can be stored as ``List<1-byte>``.
+* Parent and child arrays: names to express relationships between physical
+  value arrays in a nested type structure. For example, a ``List<T>``-type parent
+  array has a T-type array as its child (see more on lists below).
+* Leaf node or leaf: A primitive value array that may or may not be a child
+  array of some array with a nested type.
+
+Requirements, goals, and non-goals
+----------------------------------
+
+Base requirements
+
+* A physical memory layout enabling zero-deserialization data interchange
+  amongst a variety of systems handling flat and nested columnar data, including
+  such systems as Spark, Drill, Impala, Kudu, Ibis, ODBC protocols, and
+  proprietary systems that utilize the open source components.
+* All array slots are accessible in constant time, with complexity growing
+  linearly in the nesting level
+* Capable of representing fully-materialized and decoded / decompressed `Parquet`_
+  data
+* It is required to have all the contiguous memory buffers in an IPC payload
+  aligned at 8-byte boundaries. In other words, each buffer must start at
+  an aligned 8-byte offset. Additionally, each buffer should be padded to a multiple
+  of 8 bytes.
+* For performance reasons it **preferred/recommended** to align buffers to a 
+  64-byte boundary and pad to a multiple of 64 bytes, but this is not absolutely 
+  necessary.  The rationale is discussed in more details below.
+* Any relative type can have null slots
+* Arrays are immutable once created. Implementations can provide APIs to mutate
+  an array, but applying mutations will require a new array data structure to
+  be built.
+* Arrays are relocatable (e.g. for RPC/transient storage) without pointer
+  swizzling. Another way of putting this is that contiguous memory regions can
+  be migrated to a different address space (e.g. via a memcpy-type of
+  operation) without altering their contents.
+
+Goals (for this document)
+-------------------------
+
+* To describe relative types (physical value types and a preliminary set of
+  nested types) sufficient for an unambiguous implementation
+* Memory layout and random access patterns for each relative type
+* Null value representation
+
+Non-goals (for this document)
+-----------------------------
+
+* To enumerate or specify logical types that can be implemented as primitive
+  (fixed-width) value types. For example: signed and unsigned integers,
+  floating point numbers, boolean, exact decimals, date and time types,
+  CHAR(K), VARCHAR(K), etc.
+* To specify standardized metadata or a data layout for RPC or transient file
+  storage.
+* To define a selection or masking vector construct
+* Implementation-specific details
+* Details of a user or developer C/C++/Java API.
+* Any "table" structure composed of named arrays each having their own type or
+  any other structure that composes arrays.
+* Any memory management or reference counting subsystem
+* To enumerate or specify types of encodings or compression support
+
+Byte Order (`Endianness`_)
+---------------------------
+
+The Arrow format is little endian by default.
+The Schema metadata has an endianness field indicating endianness of RecordBatches.
+Typically this is the endianness of the system where the RecordBatch was generated.
+The main use case is exchanging RecordBatches between systems with the same Endianness.
+At first we will return an error when trying to read a Schema with an endianness
+that does not match the underlying system. The reference implementation is focused on
+Little Endian and provides tests for it. Eventually we may provide automatic conversion
+via byte swapping.
+
+Alignment and Padding
+---------------------
+
+As noted above, all buffers must be aligned in memory at 8-byte boundaries and padded
+to a length that is a multiple of 8 bytes.  The alignment requirement follows best
+practices for optimized memory access:
+
+* Elements in numeric arrays will be guaranteed to be retrieved via aligned access.
+* On some architectures alignment can help limit partially used cache lines.
+
+The recommendation for 64 byte alignment comes from the `Intel performance guide`_
+that recommends alignment of memory to match SIMD register width.
+The specific padding length was chosen because it matches the largest known
+SIMD instruction registers available as of April 2016 (Intel AVX-512).
+
+The recommended padding of 64 bytes allows for using `SIMD`_ instructions
+consistently in loops without additional conditional checks.
+This should allow for simpler, efficient and CPU cache-friendly code.
+In other
+words, we can load the entire 64-byte buffer into a 512-bit wide SIMD register
+and get data-level parallelism on all the columnar values packed into the 64-byte
+buffer. Guaranteed padding can also allow certain compilers
+to generate more optimized code directly (e.g. One can safely use Intel's
+``-qopt-assume-safe-padding``).
+
+Unless otherwise noted, padded bytes do not need to have a specific value.
+
+Array lengths
+-------------
+
+Array lengths are represented in the Arrow metadata as a 64-bit signed
+integer. An implementation of Arrow is considered valid even if it only
+supports lengths up to the maximum 32-bit signed integer, though. If using
+Arrow in a multi-language environment, we recommend limiting lengths to
+2 :sup:`31` - 1 elements or less. Larger data sets can be represented using
+multiple array chunks.
+
+Null count
+----------
+
+The number of null value slots is a property of the physical array and
+considered part of the data structure. The null count is represented in the
+Arrow metadata as a 64-bit signed integer, as it may be as large as the array
+length.
+
+Null bitmaps
+------------
+
+Any relative type can have null value slots, whether primitive or nested type.
+
+An array with nulls must have a contiguous memory buffer, known as the null (or
+validity) bitmap, whose length is a multiple of 8 bytes (64 bytes recommended)
+and large enough to have at least 1 bit for each array slot.
+
+Whether any array slot is valid (non-null) is encoded in the respective bits of
+this bitmap. A 1 (set bit) for index ``j`` indicates that the value is not null,
+while a 0 (bit not set) indicates that it is null. Bitmaps are to be
+initialized to be all unset at allocation time (this includes padding).::
+
+    is_valid[j] -> bitmap[j / 8] & (1 << (j % 8))
+
+We use `least-significant bit (LSB) numbering`_ (also known as
+bit-endianness). This means that within a group of 8 bits, we read
+right-to-left: ::
+
+    values = [0, 1, null, 2, null, 3]
+
+    bitmap
+    j mod 8   7  6  5  4  3  2  1  0
+              0  0  1  0  1  0  1  1
+
+Arrays having a 0 null count may choose to not allocate the null
+bitmap. Implementations may choose to always allocate one anyway as a matter of
+convenience, but this should be noted when memory is being shared.
+
+Nested type arrays have their own null bitmap and null count regardless of
+the null count and null bits of their child arrays.
+
+Primitive value arrays
+----------------------
+
+A primitive value array represents a fixed-length array of values each having
+the same physical slot width typically measured in bytes, though the spec also
+provides for bit-packed types (e.g. boolean values encoded in bits).
+
+Internally, the array contains a contiguous memory buffer whose total size is
+equal to the slot width multiplied by the array length. For bit-packed types,
+the size is rounded up to the nearest byte.
+
+The associated null bitmap is contiguously allocated (as described above) but
+does not need to be adjacent in memory to the values buffer.
+
+
+Example Layout: Int32 Array
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For example a primitive array of int32s: ::
+
+    [1, null, 2, 4, 8]
+
+Would look like: ::
+
+    * Length: 5, Null count: 1
+    * Null bitmap buffer:
+
+      |Byte 0 (validity bitmap) | Bytes 1-63            |
+      |-------------------------|-----------------------|
+      | 00011101                | 0 (padding)           |
+
+    * Value Buffer:
+
+      |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-63 |
+      |------------|-------------|-------------|-------------|-------------|-------------|
+      | 1          | unspecified | 2           | 4           | 8           | unspecified |
+
+Example Layout: Non-null int32 Array
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+``[1, 2, 3, 4, 8]`` has two possible layouts: ::
+
+    * Length: 5, Null count: 0
+    * Null bitmap buffer:
+
+      | Byte 0 (validity bitmap) | Bytes 1-63            |
+      |--------------------------|-----------------------|
+      | 00011111                 | 0 (padding)           |
+
+    * Value Buffer:
+
+      |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | bytes 12-15 | bytes 16-19 | Bytes 20-63 |
+      |------------|-------------|-------------|-------------|-------------|-------------|
+      | 1          | 2           | 3           | 4           | 8           | unspecified |
+
+or with the bitmap elided: ::
+
+    * Length 5, Null count: 0
+    * Null bitmap buffer: Not required
+    * Value Buffer:
+
+      |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | bytes 12-15 | bytes 16-19 | Bytes 20-63 |
+      |------------|-------------|-------------|-------------|-------------|-------------|
+      | 1          | 2           | 3           | 4           | 8           | unspecified |
+
+List type
+---------
+
+List is a nested type in which each array slot contains a variable-size
+sequence of values all having the same relative type (heterogeneity can be
+achieved through unions, described later).
+
+A list type is specified like ``List<T>``, where ``T`` is any relative type
+(primitive or nested).
+
+A list-array is represented by the combination of the following:
+
+* A values array, a child array of type T. T may also be a nested type.
+* An offsets buffer containing 32-bit signed integers with length equal to the
+  length of the top-level array plus one. Note that this limits the size of the
+  values array to 2 :sup:`31` -1.
+
+The offsets array encodes a start position in the values array, and the length
+of the value in each slot is computed using the first difference with the next
+element in the offsets array. For example, the position and length of slot j is
+computed as: ::
+
+    slot_position = offsets[j]
+    slot_length = offsets[j + 1] - offsets[j]  // (for 0 <= j < length)
+
+The first value in the offsets array is 0, and the last element is the length
+of the values array.
+
+Example Layout: ``List<Char>`` Array
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Let's consider an example, the type ``List<Char>``, where Char is a 1-byte
+logical type.
+
+For an array of length 4 with respective values: ::
+
+    [['j', 'o', 'e'], null, ['m', 'a', 'r', 'k'], []]
+
+will have the following representation: ::
+
+    * Length: 4, Null count: 1
+    * Null bitmap buffer:
+
+      | Byte 0 (validity bitmap) | Bytes 1-63            |
+      |--------------------------|-----------------------|
+      | 00001101                 | 0 (padding)           |
+
+    * Offsets buffer (int32)
+
+      | Bytes 0-3  | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-63 |
+      |------------|-------------|-------------|-------------|-------------|-------------|
+      | 0          | 3           | 3           | 7           | 7           | unspecified |
+
+    * Values array (char array):
+      * Length: 7,  Null count: 0
+      * Null bitmap buffer: Not required
+
+        | Bytes 0-6  | Bytes 7-63  |
+        |------------|-------------|
+        | joemark    | unspecified |
+
+Example Layout: ``List<List<byte>>``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+``[[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], [[9, 10]]]``
+
+will be be represented as follows: ::
+
+    * Length 3
+    * Nulls count: 0
+    * Null bitmap buffer: Not required
+    * Offsets buffer (int32)
+
+      | Bytes 0-3  | Bytes 4-7  | Bytes 8-11 | Bytes 12-15 | Bytes 16-63 |
+      |------------|------------|------------|-------------|-------------|
+      | 0          |  2         |  5         |  6          | unspecified |
+
+    * Values array (`List<byte>`)
+      * Length: 6, Null count: 1
+      * Null bitmap buffer:
+
+        | Byte 0 (validity bitmap) | Bytes 1-63  |
+        |--------------------------|-------------|
+        | 00110111                 | 0 (padding) |
+
+      * Offsets buffer (int32)
+
+        | Bytes 0-27           | Bytes 28-63 |
+        |----------------------|-------------|
+        | 0, 2, 4, 7, 7, 8, 10 | unspecified |
+
+      * Values array (bytes):
+        * Length: 10, Null count: 0
+        * Null bitmap buffer: Not required
+
+          | Bytes 0-9                     | Bytes 10-63 |
+          |-------------------------------|-------------|
+          | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 | unspecified |
+
+Struct type
+-----------
+
+A struct is a nested type parameterized by an ordered sequence of relative
+types (which can all be distinct), called its fields.
+
+Typically the fields have names, but the names and their types are part of the
+type metadata, not the physical memory layout.
+
+A struct array does not have any additional allocated physical storage for its values.
+A struct array must still have an allocated null bitmap, if it has one or more null values.
+
+Physically, a struct type has one child array for each field. The child arrays are independent and need not be adjacent to each other in memory.
+
+For example, the struct (field names shown here as strings for illustration
+purposes)::
+
+    Struct <
+      name: String (= List<char>),
+      age: Int32
+    >
+
+has two child arrays, one ``List<char>`` array (layout as above) and one 4-byte
+primitive value array having ``Int32`` logical type.
+
+Example Layout: ``Struct<List<char>, Int32>``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The layout for ``[{'joe', 1}, {null, 2}, null, {'mark', 4}]`` would be: ::
+
+    * Length: 4, Null count: 1
+    * Null bitmap buffer:
+
+      |Byte 0 (validity bitmap) | Bytes 1-63            |
+      |-------------------------|-----------------------|
+      | 00001011                | 0 (padding)           |
+
+    * Children arrays:
+      * field-0 array (`List<char>`):
+        * Length: 4, Null count: 2
+        * Null bitmap buffer:
+
+          | Byte 0 (validity bitmap) | Bytes 1-63            |
+          |--------------------------|-----------------------|
+          | 00001001                 | 0 (padding)           |
+
+        * Offsets buffer:
+
+          | Bytes 0-19     |
+          |----------------|
+          | 0, 3, 3, 3, 7  |
+
+         * Values array:
+            * Length: 7, Null count: 0
+            * Null bitmap buffer: Not required
+
+            * Value buffer:
+
+              | Bytes 0-6      |
+              |----------------|
+              | joemark        |
+
+      * field-1 array (int32 array):
+        * Length: 4, Null count: 1
+        * Null bitmap buffer:
+
+          | Byte 0 (validity bitmap) | Bytes 1-63            |
+          |--------------------------|-----------------------|
+          | 00001011                 | 0 (padding)           |
+
+        * Value Buffer:
+
+          |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-63 |
+          |------------|-------------|-------------|-------------|-------------|
+          | 1          | 2           | unspecified | 4           | unspecified |
+
+While a struct does not have physical storage for each of its semantic slots
+(i.e. each scalar C-like struct), an entire struct slot can be set to null via
+the null bitmap. Any of the child field arrays can have null values according
+to their respective independent null bitmaps.
+This implies that for a particular struct slot the null bitmap for the struct
+array might indicate a null slot when one or more of its child arrays has a
+non-null value in their corresponding slot.  When reading the struct array the
+parent null bitmap is authoritative.
+This is illustrated in the example above, the child arrays have valid entries
+for the null struct but are 'hidden' from the consumer by the parent array's
+null bitmap.  However, when treated independently corresponding
+values of the children array will be non-null.
+
+Dense union type
+----------------
+
+A dense union is semantically similar to a struct, and contains an ordered
+sequence of relative types. While a struct contains multiple arrays, a union is
+semantically a single array in which each slot can have a different type.
+
+The union types may be named, but like structs this will be a matter of the
+metadata and will not affect the physical memory layout.
+
+We define two distinct union types that are optimized for different use
+cases. This first, the dense union, represents a mixed-type array with 5 bytes
+of overhead for each value. Its physical layout is as follows:
+
+* One child array for each relative type
+* Types buffer: A buffer of 8-bit signed integers, enumerated from 0 corresponding
+  to each type.  A union with more then 127 possible types can be modeled as a
+  union of unions.
+* Offsets buffer: A buffer of signed int32 values indicating the relative offset
+  into the respective child array for the type in a given slot. The respective
+  offsets for each child value array must be in order / increasing.
+
+Critically, the dense union allows for minimal overhead in the ubiquitous
+union-of-structs with non-overlapping-fields use case (``Union<s1: Struct1, s2:
+Struct2, s3: Struct3, ...>``)
+
+Example Layout: Dense union
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+An example layout for logical union of:
+``Union<f: float, i: int32>`` having the values:
+``[{f=1.2}, null, {f=3.4}, {i=5}]``::
+
+    * Length: 4, Null count: 1
+    * Null bitmap buffer:
+      |Byte 0 (validity bitmap) | Bytes 1-63            |
+      |-------------------------|-----------------------|
+      |00001101                 | 0 (padding)           |
+
+    * Types buffer:
+
+      |Byte 0   | Byte 1      | Byte 2   | Byte 3   | Bytes 4-63  |
+      |---------|-------------|----------|----------|-------------|
+      | 0       | unspecified | 0        | 1        | unspecified |
+
+    * Offset buffer:
+
+      |Byte 0-3 | Byte 4-7    | Byte 8-11 | Byte 12-15 | Bytes 16-63 |
+      |---------|-------------|-----------|------------|-------------|
+      | 0       | unspecified | 1         | 0          | unspecified |
+
+    * Children arrays:
+      * Field-0 array (f: float):
+        * Length: 2, nulls: 0
+        * Null bitmap buffer: Not required
+
+        * Value Buffer:
+
+          | Bytes 0-7 | Bytes 8-63  |
+          |-----------|-------------|
+          | 1.2, 3.4  | unspecified |
+
+
+      * Field-1 array (i: int32):
+        * Length: 1, nulls: 0
+        * Null bitmap buffer: Not required
+
+        * Value Buffer:
+
+          | Bytes 0-3 | Bytes 4-63  |
+          |-----------|-------------|
+          | 5         | unspecified |
+
+Sparse union type
+-----------------
+
+A sparse union has the same structure as a dense union, with the omission of
+the offsets array. In this case, the child arrays are each equal in length to
+the length of the union.
+
+While a sparse union may use significantly more space compared with a dense
+union, it has some advantages that may be desirable in certain use cases:
+
+* A sparse union is more amenable to vectorized expression evaluation in some use cases.
+* Equal-length arrays can be interpreted as a union by only defining the types array.
+
+Example layout: ``SparseUnion<u0: Int32, u1: Float, u2: List<Char>>``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For the union array: ::
+
+    [{u0=5}, {u1=1.2}, {u2='joe'}, {u1=3.4}, {u0=4}, {u2='mark'}]
+
+will have the following layout: ::
+
+    * Length: 6, Null count: 0
+    * Null bitmap buffer: Not required
+
+    * Types buffer:
+
+     | Byte 0     | Byte 1      | Byte 2      | Byte 3      | Byte 4      | Byte 5       | Bytes  6-63           |
+     |------------|-------------|-------------|-------------|-------------|--------------|-----------------------|
+     | 0          | 1           | 2           | 1           | 0           | 2            | unspecified (padding) |
+
+    * Children arrays:
+
+      * u0 (Int32):
+        * Length: 6, Null count: 4
+        * Null bitmap buffer:
+
+          |Byte 0 (validity bitmap) | Bytes 1-63            |
+          |-------------------------|-----------------------|
+          |00010001                 | 0 (padding)           |
+
+        * Value buffer:
+
+          |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-23  | Bytes 24-63           |
+          |------------|-------------|-------------|-------------|-------------|--------------|-----------------------|
+          | 5          | unspecified | unspecified | unspecified | 4           |  unspecified | unspecified (padding) |
+
+      * u1 (float):
+        * Length: 6, Null count: 4
+        * Null bitmap buffer:
+
+          |Byte 0 (validity bitmap) | Bytes 1-63            |
+          |-------------------------|-----------------------|
+          | 00001010                | 0 (padding)           |
+
+        * Value buffer:
+
+          |Bytes 0-3    | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-23  | Bytes 24-63           |
+          |-------------|-------------|-------------|-------------|-------------|--------------|-----------------------|
+          | unspecified |  1.2        | unspecified | 3.4         | unspecified |  unspecified | unspecified (padding) |
+
+      * u2 (`List<char>`)
+        * Length: 6, Null count: 4
+        * Null bitmap buffer:
+
+          | Byte 0 (validity bitmap) | Bytes 1-63            |
+          |--------------------------|-----------------------|
+          | 00100100                 | 0 (padding)           |
+
+        * Offsets buffer (int32)
+
+          | Bytes 0-3  | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-23 | Bytes 24-27 | Bytes 28-63 |
+          |------------|-------------|-------------|-------------|-------------|-------------|-------------|-------------|
+          | 0          | 0           | 0           | 3           | 3           | 3           | 7           | unspecified |
+
+        * Values array (char array):
+          * Length: 7,  Null count: 0
+          * Null bitmap buffer: Not required
+
+            | Bytes 0-7  | Bytes 8-63            |
+            |------------|-----------------------|
+            | joemark    | unspecified (padding) |
+
+Note that nested types in a sparse union must be internally consistent
+(e.g. see the List in the diagram), i.e. random access at any index j
+on any child array will not cause an error.
+In other words, the array for the nested type must be valid if it is
+reinterpreted as a non-nested array.
+
+Similar to structs, a particular child array may have a non-null slot
+even if the null bitmap of the parent union array indicates the slot is
+null.  Additionally, a child array may have a non-null slot even if
+the types array indicates that a slot contains a different type at the index.
+
+Dictionary encoding
+-------------------
+
+When a field is dictionary encoded, the values are represented by an array of
+signed integers representing the index of the value in the dictionary.
+The Dictionary is received as one or more DictionaryBatches with the id
+referenced by a dictionary attribute defined in the metadata (Message.fbs)
+in the Field table.  The dictionary has the same layout as the type of the
+field would dictate. Each entry in the dictionary can be accessed by its
+index in the DictionaryBatches.  When a Schema references a Dictionary id,
+it must send at least one DictionaryBatch for this id.
+
+As an example, you could have the following data: ::
+
+    type: List<String>
+
+    [
+     ['a', 'b'],
+     ['a', 'b'],
+     ['a', 'b'],
+     ['c', 'd', 'e'],
+     ['c', 'd', 'e'],
+     ['c', 'd', 'e'],
+     ['c', 'd', 'e'],
+     ['a', 'b']
+    ]
+
+In dictionary-encoded form, this could appear as: ::
+
+    data List<String> (dictionary-encoded, dictionary id i)
+       type: Int32
+       values:
+       [0, 0, 0, 1, 1, 1, 0]
+
+    dictionary i
+       type: List<String>
+       values:
+       [
+        ['a', 'b'],
+        ['c', 'd', 'e'],
+       ]
+
+References
+----------
+
+Apache Drill Documentation - `Value Vectors`_
+
+.. _least-significant bit (LSB) numbering: https://en.wikipedia.org/wiki/Bit_numbering
+.. _Intel performance guide: https://software.intel.com/en-us/articles/practical-intel-avx-optimization-on-2nd-generation-intel-core-processors
+.. _Endianness: https://en.wikipedia.org/wiki/Endianness
+.. _SIMD: https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-introduction-to-the-simd-data-layout-templates
+.. _Parquet: https://parquet.apache.org/documentation/latest/
+.. _Value Vectors: https://drill.apache.org/docs/value-vectors/
diff --git a/docs/source/format/Metadata.rst b/docs/source/format/Metadata.rst
new file mode 100644
index 0000000000000..293d0113875a6
--- /dev/null
+++ b/docs/source/format/Metadata.rst
@@ -0,0 +1,396 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Metadata: Logical types, schemas, data headers
+==============================================
+
+This is documentation for the Arrow metadata specification, which enables
+systems to communicate the
+
+* Logical array types (which are implemented using the physical memory layouts
+  specified in :doc:`Layout`)
+
+* Schemas for table-like collections of Arrow data structures
+
+* "Data headers" indicating the physical locations of memory buffers sufficient
+  to reconstruct a Arrow data structures without copying memory.
+
+Canonical implementation
+------------------------
+
+We are using `Flatbuffers`_ for low-overhead reading and writing of the Arrow
+metadata. See ``Message.fbs``.
+
+Schemas
+-------
+
+The ``Schema`` type describes a table-like structure consisting of any number of
+Arrow arrays, each of which can be interpreted as a column in the table. A
+schema by itself does not describe the physical structure of any particular set
+of data.
+
+A schema consists of a sequence of **fields**, which are metadata describing
+the columns. The Flatbuffers IDL for a field is: ::
+
+    table Field {
+      // Name is not required, in i.e. a List
+      name: string;
+      nullable: bool;
+      type: Type;
+
+      // Present only if the field is dictionary encoded
+      dictionary: DictionaryEncoding;
+
+      // children apply only to Nested data types like Struct, List and Union
+      children: [Field];
+
+      // User-defined metadata
+      custom_metadata: [ KeyValue ];
+    }
+
+The ``type`` is the logical type of the field. Nested types, such as List,
+Struct, and Union, have a sequence of child fields.
+
+A JSON representation of the schema is also provided:
+
+Field: ::
+
+    {
+      "name" : "name_of_the_field",
+      "nullable" : false,
+      "type" : /* Type */,
+      "children" : [ /* Field */ ],
+    }
+
+Type: ::
+
+    {
+      "name" : "null|struct|list|union|int|floatingpoint|utf8|binary|fixedsizebinary|bool|decimal|date|time|timestamp|interval"
+      // fields as defined in the Flatbuffer depending on the type name
+    }
+
+Union: ::
+
+    {
+      "name" : "union",
+      "mode" : "Sparse|Dense",
+      "typeIds" : [ /* integer */ ]
+    }
+
+The ``typeIds`` field in the Union are the codes used to denote each type, which
+may be different from the index of the child array. This is so that the union
+type ids do not have to be enumerated from 0.
+
+Int: ::
+
+    {
+      "name" : "int",
+      "bitWidth" : /* integer */,
+      "isSigned" : /* boolean */
+    }
+
+FloatingPoint: ::
+
+    {
+      "name" : "floatingpoint",
+      "precision" : "HALF|SINGLE|DOUBLE"
+    }
+
+Decimal: ::
+
+    {
+      "name" : "decimal",
+      "precision" : /* integer */,
+      "scale" : /* integer */
+    }
+
+Timestamp: ::
+
+    {
+      "name" : "timestamp",
+      "unit" : "SECOND|MILLISECOND|MICROSECOND|NANOSECOND"
+    }
+
+Date: ::
+
+    {
+      "name" : "date",
+      "unit" : "DAY|MILLISECOND"
+    }
+
+Time: ::
+
+    {
+      "name" : "time",
+      "unit" : "SECOND|MILLISECOND|MICROSECOND|NANOSECOND",
+      "bitWidth": /* integer: 32 or 64 */
+    }
+
+Interval: ::
+
+    {
+      "name" : "interval",
+      "unit" : "YEAR_MONTH|DAY_TIME"
+    }
+
+Schema: ::
+
+    {
+      "fields" : [
+        /* Field */
+      ]
+    }
+
+Record data headers
+-------------------
+
+A record batch is a collection of top-level named, equal length Arrow arrays
+(or vectors). If one of the arrays contains nested data, its child arrays are
+not required to be the same length as the top-level arrays.
+
+One can be thought of as a realization of a particular schema. The metadata
+describing a particular record batch is called a "data header". Here is the
+Flatbuffers IDL for a record batch data header: ::
+
+    table RecordBatch {
+      length: long;
+      nodes: [FieldNode];
+      buffers: [Buffer];
+    }
+
+The ``RecordBatch`` metadata provides for record batches with length exceeding
+2 :sup:`31` - 1, but Arrow implementations are not required to implement support
+beyond this size.
+
+The ``nodes`` and ``buffers`` fields are produced by a depth-first traversal /
+flattening of a schema (possibly containing nested types) for a given in-memory
+data set.
+
+Buffers
+~~~~~~~
+
+A buffer is metadata describing a contiguous memory region relative to some
+virtual address space. This may include:
+
+* Shared memory, e.g. a memory-mapped file
+* An RPC message received in-memory
+* Data in a file
+
+The key form of the Buffer type is: ::
+
+    struct Buffer {
+      offset: long;
+      length: long;
+    }
+
+In the context of a record batch, each field has some number of buffers
+associated with it, which are derived from their physical memory layout.
+
+Each logical type (separate from its children, if it is a nested type) has a
+deterministic number of buffers associated with it. These will be specified in
+the logical types section.
+
+Field metadata
+~~~~~~~~~~~~~~
+
+The ``FieldNode`` values contain metadata about each level in a nested type
+hierarchy. ::
+
+    struct FieldNode {
+      /// The number of value slots in the Arrow array at this level of a nested
+      /// tree
+      length: long;
+
+      /// The number of observed nulls.
+      null_count: lohng;
+    }
+
+The ``FieldNode`` metadata provides for fields with length exceeding 2 :sup:`31` - 1,
+but Arrow implementations are not required to implement support for large
+arrays.
+
+Flattening of nested data
+-------------------------
+
+Nested types are flattened in the record batch in depth-first order. When
+visiting each field in the nested type tree, the metadata is appended to the
+top-level ``fields`` array and the buffers associated with that field (but not
+its children) are appended to the ``buffers`` array.
+
+For example, let's consider the schema ::
+
+    col1: Struct<a: Int32, b: List<Int64>, c: Float64>
+    col2: Utf8
+
+The flattened version of this is: ::
+
+    FieldNode 0: Struct name='col1'
+    FieldNode 1: Int32 name=a'
+    FieldNode 2: List name='b'
+    FieldNode 3: Int64 name='item'  # arbitrary
+    FieldNode 4: Float64 name='c'
+    FieldNode 5: Utf8 name='col2'
+
+For the buffers produced, we would have the following (as described in more
+detail for each type below): ::
+
+    buffer 0: field 0 validity bitmap
+
+    buffer 1: field 1 validity bitmap
+    buffer 2: field 1 values <int32_t*>
+
+    buffer 3: field 2 validity bitmap
+    buffer 4: field 2 list offsets <int32_t*>
+
+    buffer 5: field 3 validity bitmap
+    buffer 6: field 3 values <int64_t*>
+
+    buffer 7: field 4 validity bitmap
+    buffer 8: field 4 values <double*>
+
+    buffer 9: field 5 validity bitmap
+    buffer 10: field 5 offsets <int32_t*>
+    buffer 11: field 5 data <uint8_t*>
+
+.. _spec-logical-types:
+
+Logical types
+-------------
+
+A logical type consists of a type name and metadata along with an explicit
+mapping to a physical memory representation. These may fall into some different
+categories:
+
+* Types represented as fixed-width primitive arrays (for example: C-style
+  integers and floating point numbers)
+* Types having equivalent memory layout to a physical nested type (e.g. strings
+  use the list representation, but logically are not nested types)
+
+Integers
+~~~~~~~~
+
+In the first version of Arrow we provide the standard 8-bit through 64-bit size
+standard C integer types, both signed and unsigned:
+
+* Signed types: Int8, Int16, Int32, Int64
+* Unsigned types: UInt8, UInt16, UInt32, UInt64
+
+The IDL looks like: ::
+
+    table Int {
+      bitWidth: int;
+      is_signed: bool;
+    }
+
+The integer endianness is currently set globally at the schema level. If a
+schema is set to be little-endian, then all integer types occurring within must
+be little-endian. Integers that are part of other data representations, such as
+list offsets and union types, must have the same endianness as the entire
+record batch.
+
+Floating point numbers
+~~~~~~~~~~~~~~~~~~~~~~
+
+We provide 3 types of floating point numbers as fixed bit-width primitive array
+
+- Half precision, 16-bit width
+- Single precision, 32-bit width
+- Double precision, 64-bit width
+
+The IDL looks like: ::
+
+    enum Precision:int {HALF, SINGLE, DOUBLE}
+
+    table FloatingPoint {
+      precision: Precision;
+    }
+
+Boolean
+~~~~~~~
+
+The Boolean logical type is represented as a 1-bit wide primitive physical
+type. The bits are numbered using least-significant bit (LSB) ordering.
+
+Like other fixed bit-width primitive types, boolean data appears as 2 buffers
+in the data header (one bitmap for the validity vector and one for the values).
+
+List
+~~~~
+
+The ``List`` logical type is the logical (and identically-named) counterpart to
+the List physical type.
+
+In data header form, the list field node contains 2 buffers:
+
+* Validity bitmap
+* List offsets
+
+The buffers associated with a list's child field are handled recursively
+according to the child logical type (e.g. ``List<Utf8>`` vs. ``List<Boolean>``).
+
+Utf8 and Binary
+~~~~~~~~~~~~~~~
+
+We specify two logical types for variable length bytes:
+
+* ``Utf8`` data is Unicode values with UTF-8 encoding
+* ``Binary`` is any other variable length bytes
+
+These types both have the same memory layout as the nested type ``List<UInt8>``,
+with the constraint that the inner bytes can contain no null values. From a
+logical type perspective they are primitive, not nested types.
+
+In data header form, while ``List<UInt8>`` would appear as 2 field nodes (``List``
+and ``UInt8``) and 4 buffers (2 for each of the nodes, as per above), these types
+have a simplified representation single field node (of ``Utf8`` or ``Binary``
+logical type, which have no children) and 3 buffers:
+
+* Validity bitmap
+* List offsets
+* Byte data
+
+Decimal
+~~~~~~~
+
+Decimals are represented as a 2's complement 128-bit (16 byte) signed integer
+in little-endian byte order.
+
+Timestamp
+~~~~~~~~~
+
+All timestamps are stored as a 64-bit integer, with one of four unit
+resolutions: second, millisecond, microsecond, and nanosecond.
+
+Date
+~~~~
+
+We support two different date types:
+
+* Days since the UNIX epoch as a 32-bit integer
+* Milliseconds since the UNIX epoch as a 64-bit integer
+
+Time
+~~~~
+
+Time supports the same unit resolutions: second, millisecond, microsecond, and
+nanosecond. We represent time as the smallest integer accommodating the
+indicated unit. For second and millisecond: 32-bit, for the others 64-bit.
+
+Dictionary encoding
+-------------------
+
+.. _Flatbuffers: http://github.com/google/flatbuffers
diff --git a/docs/source/format/README.rst b/docs/source/format/README.rst
new file mode 100644
index 0000000000000..f2f770bdc95c1
--- /dev/null
+++ b/docs/source/format/README.rst
@@ -0,0 +1,53 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Arrow specification documents
+=============================
+
+Currently, the Arrow specification consists of these pieces:
+
+- Metadata specification (see :doc:`Metadata`)
+- Physical memory layout specification (see :doc:`Layout`)
+- Logical Types, Schemas, and Record Batch Metadata (see Schema.fbs)
+- Encapsulated Messages (see Message.fbs)
+- Mechanics of messaging between Arrow systems (IPC, RPC, etc.) (see :doc:`IPC`)
+- Tensor (Multi-dimensional array) Metadata (see Tensor.fbs)
+
+The metadata currently uses Google's `flatbuffers library`_ for serializing a
+couple related pieces of information:
+
+- Schemas for tables or record (row) batches. This contains the logical types,
+  field names, and other metadata. Schemas do not contain any information about
+  actual data.
+- *Data headers* for record (row) batches. These must correspond to a known
+  schema, and enable a system to send and receive Arrow row batches in a form
+  that can be precisely disassembled or reconstructed.
+
+Arrow Format Maturity and Stability
+-----------------------------------
+
+We have made significant progress hardening the Arrow in-memory format and
+Flatbuffer metadata since the project started in February 2016. We have
+integration tests which verify binary compatibility between the Java and C++
+implementations, for example.
+
+Major versions may still include breaking changes to the memory format or
+metadata, so it is recommended to use the same released version of all
+libraries in your applications for maximum compatibility. Data stored in the
+Arrow IPC formats should not be used for long term storage.
+
+.. _flatbuffers library: http://github.com/google/flatbuffers
diff --git a/python/doc/source/index.rst b/docs/source/index.rst
similarity index 62%
rename from python/doc/source/index.rst
rename to docs/source/index.rst
index 712b105a5bfbb..2b367b33823a2 100644
--- a/python/doc/source/index.rst
+++ b/docs/source/index.rst
@@ -15,8 +15,8 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
-Python bindings for Apache Arrow
-================================
+Apache Arrow
+============
 
 Apache Arrow is a cross-language development platform for in-memory data. It
 specifies a standardized language-independent columnar memory format for flat
@@ -24,31 +24,25 @@ and hierarchical data, organized for efficient analytic operations on modern
 hardware. It also provides computational libraries and zero-copy streaming
 messaging and interprocess communication.
 
-The Arrow Python bindings have first-class integration with NumPy, pandas, and
-built-in Python objects.
+.. toctree::
+   :maxdepth: 1
+   :caption: Memory Format
+
+   format/README
+   format/Guidelines
+   format/Layout
+   format/Metadata
+   format/IPC
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Languages
 
-This is the documentation of the Python API of Apache Arrow. For more details
-on the format and other language bindings see
-`the main page for Arrow <https://arrow.apache.org/>`_. Here will we only
-detail the usage of the Python API for Arrow and the leaf libraries that add
-additional functionality such as reading Apache Parquet files into Arrow
-structures.
+   cpp/index
+   python/index
 
 .. toctree::
    :maxdepth: 2
-   :caption: Getting Started
-
-   install
-   development
-   memory
-   data
-   ipc
-   filesystems
-   plasma
-   numpy
-   pandas
-   csv
-   parquet
-   extending
-   api
-   getting_involved
+   :caption: Other Topics
+
+   building
diff --git a/docs/source/python/api.rst b/docs/source/python/api.rst
new file mode 100644
index 0000000000000..b06509f7a5b19
--- /dev/null
+++ b/docs/source/python/api.rst
@@ -0,0 +1,36 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _api:
+
+*************
+API Reference
+*************
+
+.. toctree::
+   :maxdepth: 2
+
+   api/datatypes
+   api/arrays
+   api/memory
+   api/files
+   api/tables
+   api/ipc
+   api/formats
+   api/plasma
+   api/cuda
+   api/misc
diff --git a/docs/source/python/api/arrays.rst b/docs/source/python/api/arrays.rst
new file mode 100644
index 0000000000000..db45eeff0ca5a
--- /dev/null
+++ b/docs/source/python/api/arrays.rst
@@ -0,0 +1,109 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _api.array:
+.. currentmodule:: pyarrow
+
+Arrays and Scalars
+==================
+
+Factory Function
+----------------
+
+This function is the main entry point to create an Arrow array from Python.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   array
+
+Array Types
+-----------
+
+An array's Python class depends on its data type.  Concrete array classes
+may expose data type-specific methods or properties.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   Array
+   BooleanArray
+   FloatingPointArray
+   IntegerArray
+   Int8Array
+   Int16Array
+   Int32Array
+   Int64Array
+   NullArray
+   NumericArray
+   UInt8Array
+   UInt16Array
+   UInt32Array
+   UInt64Array
+   BinaryArray
+   StringArray
+   FixedSizeBinaryArray
+   Time32Array
+   Time64Array
+   Date32Array
+   Date64Array
+   TimestampArray
+   Decimal128Array
+   DictionaryArray
+   ListArray
+   StructArray
+   UnionArray
+
+.. _api.scalar:
+
+Array Scalars
+-------------
+
+Indexing an array wraps the represented value in a scalar object whose
+concrete type depends on the array data type.  You shouldn't instantiate
+any of those classes directly.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   NA
+   Scalar
+   ArrayValue
+   BooleanValue
+   Int8Value
+   Int16Value
+   Int32Value
+   Int64Value
+   UInt8Value
+   UInt16Value
+   UInt32Value
+   UInt64Value
+   FloatValue
+   DoubleValue
+   BinaryValue
+   StringValue
+   FixedSizeBinaryValue
+   Time32Value
+   Time64Value
+   Date32Value
+   Date64Value
+   TimestampValue
+   DecimalValue
+   DictionaryValue
+   ListValue
+   StructValue
+   UnionValue
diff --git a/docs/source/python/api/cuda.rst b/docs/source/python/api/cuda.rst
new file mode 100644
index 0000000000000..364f032403586
--- /dev/null
+++ b/docs/source/python/api/cuda.rst
@@ -0,0 +1,62 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow.cuda
+
+CUDA Integration
+================
+
+.. ifconfig:: not cuda_enabled
+
+   .. error::
+      This documentation was built without CUDA enabled.  The CUDA
+      API docs are not available.
+
+.. NOTE We still generate those API docs (with empty docstrings)
+.. when CUDA is disabled and `pyarrow.cuda` mocked (see conf.py).
+.. Otherwise we'd get autodoc warnings, see https://github.com/sphinx-doc/sphinx/issues/4770
+
+CUDA Contexts
+-------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   Context
+
+CUDA Buffers
+------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   CudaBuffer
+   new_host_buffer
+   HostBuffer
+   BufferReader
+   BufferWriter
+
+Serialization and IPC
+---------------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   serialize_record_batch
+   read_record_batch
+   read_message
+   IpcMemHandle
diff --git a/docs/source/python/api/datatypes.rst b/docs/source/python/api/datatypes.rst
new file mode 100644
index 0000000000000..5ad0204966337
--- /dev/null
+++ b/docs/source/python/api/datatypes.rst
@@ -0,0 +1,134 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _api.types:
+.. currentmodule:: pyarrow
+
+Data Types and Schemas
+======================
+
+Factory Functions
+-----------------
+
+These should be used to create Arrow data types and schemas.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   null
+   bool_
+   int8
+   int16
+   int32
+   int64
+   uint8
+   uint16
+   uint32
+   uint64
+   float16
+   float32
+   float64
+   time32
+   time64
+   timestamp
+   date32
+   date64
+   binary
+   string
+   utf8
+   decimal128
+   list_
+   struct
+   dictionary
+   field
+   schema
+   from_numpy_dtype
+
+.. _api.type_classes:
+.. currentmodule:: pyarrow
+
+Type Classes
+------------
+
+Do not instantiate these classes directly.  Instead, call one of the factory
+functions above.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   DataType
+   DictionaryType
+   ListType
+   StructType
+   UnionType
+   TimestampType
+   Time32Type
+   Time64Type
+   FixedSizeBinaryType
+   Decimal128Type
+   Field
+   Schema
+
+.. _api.types.checking:
+.. currentmodule:: pyarrow.types
+
+Type Checking
+-------------
+
+These functions are predicates to check whether a :class:`DataType` instance
+represents a given data type (such as ``int32``) or general category
+(such as "is a signed integer").
+
+.. autosummary::
+   :toctree: ../generated/
+
+   is_boolean
+   is_integer
+   is_signed_integer
+   is_unsigned_integer
+   is_int8
+   is_int16
+   is_int32
+   is_int64
+   is_uint8
+   is_uint16
+   is_uint32
+   is_uint64
+   is_floating
+   is_float16
+   is_float32
+   is_float64
+   is_decimal
+   is_list
+   is_struct
+   is_union
+   is_nested
+   is_temporal
+   is_timestamp
+   is_date
+   is_date32
+   is_date64
+   is_time
+   is_time32
+   is_time64
+   is_null
+   is_binary
+   is_unicode
+   is_string
+   is_fixed_size_binary
+   is_map
+   is_dictionary
diff --git a/docs/source/python/api/files.rst b/docs/source/python/api/files.rst
new file mode 100644
index 0000000000000..106dfde8abffb
--- /dev/null
+++ b/docs/source/python/api/files.rst
@@ -0,0 +1,65 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+
+Streams and File Access
+=======================
+
+.. _api.io:
+
+Factory Functions
+-----------------
+
+These factory functions are the recommended way to create a Arrow stream.
+They accept various kinds of sources, such as in-memory buffers or on-disk files.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   input_stream
+   output_stream
+   memory_map
+   create_memory_map
+
+Stream Classes
+--------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   NativeFile
+   OSFile
+   PythonFile
+   BufferReader
+   BufferOutputStream
+   FixedSizeBufferWriter
+   MemoryMappedFile
+   CompressedInputStream
+   CompressedOutputStream
+
+File Systems
+------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   hdfs.connect
+   LocalFileSystem
+
+.. class:: HadoopFileSystem
+   :noindex:
diff --git a/docs/source/python/api/formats.rst b/docs/source/python/api/formats.rst
new file mode 100644
index 0000000000000..8de30ece93584
--- /dev/null
+++ b/docs/source/python/api/formats.rst
@@ -0,0 +1,70 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Tabular File Formats
+====================
+
+.. currentmodule:: pyarrow.csv
+
+.. _api.csv:
+
+CSV Files
+---------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ReadOptions
+   ParseOptions
+   ConvertOptions
+   read_csv
+
+.. _api.feather:
+
+Feather Files
+-------------
+
+.. currentmodule:: pyarrow.feather
+
+.. autosummary::
+   :toctree: ../generated/
+
+   read_feather
+   write_feather
+
+.. currentmodule:: pyarrow
+
+.. _api.parquet:
+
+Parquet Files
+-------------
+
+.. currentmodule:: pyarrow.parquet
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ParquetDataset
+   ParquetFile
+   ParquetWriter
+   read_table
+   read_metadata
+   read_pandas
+   read_schema
+   write_metadata
+   write_table
+   write_to_dataset
diff --git a/docs/source/python/api/ipc.rst b/docs/source/python/api/ipc.rst
new file mode 100644
index 0000000000000..bd14d30dcb274
--- /dev/null
+++ b/docs/source/python/api/ipc.rst
@@ -0,0 +1,59 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+
+.. _api.ipc:
+
+Serialization and IPC
+=====================
+
+Inter-Process Communication
+---------------------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ipc.open_file
+   ipc.open_stream
+   Message
+   MessageReader
+   RecordBatchFileReader
+   RecordBatchFileWriter
+   RecordBatchStreamReader
+   RecordBatchStreamWriter
+   read_message
+   read_record_batch
+   get_record_batch_size
+   read_tensor
+   write_tensor
+   get_tensor_size
+
+Serialization
+-------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   serialize
+   serialize_to
+   deserialize
+   deserialize_components
+   deserialize_from
+   read_serialized
+   SerializedPyObject
+   SerializationContext
diff --git a/docs/source/python/api/memory.rst b/docs/source/python/api/memory.rst
new file mode 100644
index 0000000000000..da9156fcad539
--- /dev/null
+++ b/docs/source/python/api/memory.rst
@@ -0,0 +1,68 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+
+.. _api.memory:
+
+Buffers and Memory
+==================
+
+In-Memory Buffers
+-----------------
+
+Factory Functions
+~~~~~~~~~~~~~~~~~
+
+.. autosummary::
+   :toctree: ../generated/
+
+   allocate_buffer
+   py_buffer
+   foreign_buffer
+
+Classes
+~~~~~~~
+
+.. autosummary::
+   :toctree: ../generated/
+
+   Buffer
+   ResizableBuffer
+
+Miscellaneous
+~~~~~~~~~~~~~
+
+.. autosummary::
+   :toctree: ../generated/
+
+   compress
+   decompress
+
+.. _api.memory_pool:
+
+Memory Pools
+------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   MemoryPool
+   default_memory_pool
+   total_allocated_bytes
+   set_memory_pool
+   log_memory_allocations
diff --git a/docs/source/python/api/misc.rst b/docs/source/python/api/misc.rst
new file mode 100644
index 0000000000000..c13b80620f154
--- /dev/null
+++ b/docs/source/python/api/misc.rst
@@ -0,0 +1,40 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+
+Miscellaneous
+=============
+
+Multi-Threading
+---------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   cpu_count
+   set_cpu_count
+
+Using with C extensions
+-----------------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   get_include
+   get_libraries
+   get_library_dirs
diff --git a/docs/source/python/api/plasma.rst b/docs/source/python/api/plasma.rst
new file mode 100644
index 0000000000000..8df9e4e21ac8b
--- /dev/null
+++ b/docs/source/python/api/plasma.rst
@@ -0,0 +1,33 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow.plasma
+
+.. _api.plasma:
+
+Plasma In-Memory Object Store
+=============================
+
+Classes
+-------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ObjectID
+   PlasmaClient
+   PlasmaBuffer
diff --git a/docs/source/python/api/tables.rst b/docs/source/python/api/tables.rst
new file mode 100644
index 0000000000000..5a229d29fa60b
--- /dev/null
+++ b/docs/source/python/api/tables.rst
@@ -0,0 +1,54 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+
+.. _api.table:
+
+Tables and Tensors
+==================
+
+Factory Functions
+-----------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   column
+   chunked_array
+   concat_tables
+
+Classes
+-------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ChunkedArray
+   Column
+   RecordBatch
+   Table
+
+.. _api.tensor:
+
+Tensors
+-------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   Tensor
diff --git a/docs/source/python/benchmarks.rst b/docs/source/python/benchmarks.rst
new file mode 100644
index 0000000000000..12205c57355bb
--- /dev/null
+++ b/docs/source/python/benchmarks.rst
@@ -0,0 +1,55 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Benchmarks
+==========
+
+The ``pyarrow`` package comes with a suite of benchmarks meant to
+run with `ASV`_.  You'll need to install the ``asv`` package first
+(``pip install asv`` or ``conda install -c conda-forge asv``).
+
+Running the benchmarks
+----------------------
+
+To run the benchmarks for a locally-built Arrow, run ``asv dev`` or
+``asv run --python=same``.
+
+Running for arbitrary Git revisions
+-----------------------------------
+
+ASV allows to store results and generate graphs of the benchmarks over
+the project's evolution.  You need to have the latest development version of ASV:
+
+.. code::
+
+    pip install git+https://github.com/airspeed-velocity/asv
+
+The build scripts assume that Conda's ``activate`` script is on the PATH
+(the ``conda activate`` command unfortunately isn't available from
+non-interactive scripts).
+
+Now you should be ready to run ``asv run`` or whatever other command
+suits your needs.  Note that this can be quite long, as each Arrow needs
+to be rebuilt for each Git revision you're running the benchmarks for.
+
+Compatibility
+-------------
+
+We only expect the benchmarking setup to work with Python 3.6 or later,
+on a Unix-like system with bash.
+
+.. _asv: https://asv.readthedocs.org/
diff --git a/python/doc/source/csv.rst b/docs/source/python/csv.rst
similarity index 95%
rename from python/doc/source/csv.rst
rename to docs/source/python/csv.rst
index f1bcea9e24795..17023b1610d48 100644
--- a/python/doc/source/csv.rst
+++ b/docs/source/python/csv.rst
@@ -86,3 +86,7 @@ overhead of reading CSV files.
 Performance options can be controlled through the :class:`ReadOptions` class.
 Multi-threaded reading is the default for highest performance, distributing
 the workload efficiently over all available cores.
+
+.. note::
+   The number of threads to use concurrently is automatically inferred by Arrow
+   and can be inspected using the :func:`~pyarrow.cpu_count()` function.
diff --git a/docs/source/python/cuda.rst b/docs/source/python/cuda.rst
new file mode 100644
index 0000000000000..b0150c1c5c8a2
--- /dev/null
+++ b/docs/source/python/cuda.rst
@@ -0,0 +1,159 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow.cuda
+
+CUDA Integration
+================
+
+Arrow is not limited to CPU buffers (located in the computer's main memory,
+also named "host memory").  It also has provisions for accessing buffers
+located on a CUDA-capable GPU device (in "device memory").
+
+.. note::
+   This functionality is optional and must have been enabled at build time.
+   If this is not done by your package manager, you might have to build Arrow
+   yourself.
+
+CUDA Contexts
+-------------
+
+A CUDA context represents access to a particular CUDA-capable device.
+For example, this is creating a CUDA context accessing CUDA device number 0::
+
+   >>> from pyarrow import cuda
+   >>> ctx = cuda.Context(0)
+   >>>
+
+CUDA Buffers
+------------
+
+A CUDA buffer can be created by copying data from host memory to the memory
+of a CUDA device, using the :meth:`Context.buffer_from_data` method.
+The source data can be any Python buffer-like object, including Arrow buffers::
+
+   >>> import numpy as np
+   >>> arr = np.arange(4, dtype=np.int32)
+   >>> arr.nbytes
+   16
+   >>> cuda_buf = ctx.buffer_from_data(arr)
+   >>> type(cuda_buf)
+   pyarrow._cuda.CudaBuffer
+   >>> cuda_buf.size     # The buffer's size in bytes
+   16
+   >>> cuda_buf.address  # The buffer's address in device memory
+   30088364544
+   >>> cuda_buf.context.device_number
+   0
+
+Conversely, you can copy back a CUDA buffer to device memory, getting a regular
+CPU buffer::
+
+   >>> buf = cuda_buf.copy_to_host()
+   >>> type(buf)
+   pyarrow.lib.Buffer
+   >>> np.frombuffer(buf, dtype=np.int32)
+   array([0, 1, 2, 3], dtype=int32)
+
+.. warning::
+   Many Arrow functions expect a CPU buffer but will not check the buffer's
+   actual type.  You will get a crash if you pass a CUDA buffer to such a
+   function::
+
+      >>> pa.py_buffer(b"x" * 16).equals(cuda_buf)
+      Segmentation fault
+
+Numba Integration
+-----------------
+
+There is not much you can do directly with Arrow CUDA buffers from Python,
+but they support interoperation with `Numba <https://numba.pydata.org/>`_,
+a JIT compiler which can turn Python code into optimized CUDA kernels.
+
+Arrow to Numba
+~~~~~~~~~~~~~~
+
+First let's define a Numba CUDA kernel operating on an ``int32`` array.  Here,
+we will simply increment each array element (assuming the array is writable)::
+
+   import numba.cuda
+
+   @numba.cuda.jit
+   def increment_by_one(an_array):
+       pos = numba.cuda.grid(1)
+       if pos < an_array.size:
+           an_array[pos] += 1
+
+Then we need to wrap our CUDA buffer into a Numba "device array" with the right
+array metadata (shape, strides and datatype).  This is necessary so that Numba
+can identify the array's characteristics and compile the kernel with the
+appropriate type declarations.
+
+In this case the metadata can simply be got from the original Numpy array.
+Note the GPU data isn't copied, just pointed to::
+
+   >>> from numba.cuda.cudadrv.devicearray import DeviceNDArray
+   >>> device_arr = DeviceNDArray(arr.shape, arr.strides, arr.dtype, gpu_data=cuda_buf.to_numba())
+
+(ideally we could have defined an Arrow array in CPU memory, copied it to CUDA
+memory without losing type information, and then invoked the Numba kernel on it
+without constructing the DeviceNDArray by hand; this is not yet possible)
+
+Finally we can run the Numba CUDA kernel on the Numba device array (here
+with a 16x16 grid size)::
+
+   >>> increment_by_one[16, 16](device_arr)
+
+And the results can be checked by copying back the CUDA buffer to CPU memory::
+
+   >>> np.frombuffer(cuda_buf.copy_to_host(), dtype=np.int32)
+   array([1, 2, 3, 4], dtype=int32)
+
+Numba to Arrow
+~~~~~~~~~~~~~~
+
+Conversely, a Numba-created device array can be viewed as an Arrow CUDA buffer,
+using the :meth:`CudaBuffer.from_numba` factory method.
+
+For the sake of example, let's first create a Numba device array::
+
+   >>> arr = np.arange(10, 14, dtype=np.int32)
+   >>> arr
+   array([10, 11, 12, 13], dtype=int32)
+   >>> device_arr = numba.cuda.to_device(arr)
+
+Then we can create a CUDA buffer pointing the device array's memory.
+We don't need to pass a CUDA context explicitly this time: the appropriate
+CUDA context is automatically retrieved and adapted from the Numba object.
+
+::
+
+   >>> cuda_buf = cuda.CudaBuffer.from_numba(device_arr.gpu_data)
+   >>> cuda_buf.size
+   16
+   >>> cuda_buf.address
+   30088364032
+   >>> cuda_buf.context.device_number
+   0
+
+Of course, we can copy the CUDA buffer back to host memory::
+
+   >>> np.frombuffer(cuda_buf.copy_to_host(), dtype=np.int32)
+   array([10, 11, 12, 13], dtype=int32)
+
+.. seealso::
+   Documentation for Numba's `CUDA support <https://numba.pydata.org/numba-doc/latest/cuda/index.html>`_.
diff --git a/python/doc/source/data.rst b/docs/source/python/data.rst
similarity index 100%
rename from python/doc/source/data.rst
rename to docs/source/python/data.rst
diff --git a/python/doc/source/development.rst b/docs/source/python/development.rst
similarity index 85%
rename from python/doc/source/development.rst
rename to docs/source/python/development.rst
index 3bd66893aff3d..d85537110e48c 100644
--- a/python/doc/source/development.rst
+++ b/docs/source/python/development.rst
@@ -76,28 +76,41 @@ Using Conda
 Let's create a conda environment with all the C++ build and Python dependencies
 from conda-forge:
 
+On Linux and OSX:
+
 .. code-block:: shell
 
-   conda create -y -q -n pyarrow-dev \
-         python=3.6 numpy six setuptools cython pandas pytest \
-         cmake flatbuffers rapidjson boost-cpp thrift-cpp snappy zlib \
-         gflags brotli jemalloc lz4-c zstd -c conda-forge
+    conda create -y -n pyarrow-dev -c conda-forge \
+        --file arrow/ci/conda_env_unix.yml \
+        --file arrow/ci/conda_env_cpp.yml \
+        --file arrow/ci/conda_env_python.yml \
+        python=3.6
+
    conda activate pyarrow-dev
 
+For Windows, see the `Developing on Windows`_ section below.
+
 We need to set some environment variables to let Arrow's build system know
 about our build toolchain:
 
 .. code-block:: shell
 
    export ARROW_BUILD_TYPE=release
-
    export ARROW_BUILD_TOOLCHAIN=$CONDA_PREFIX
    export ARROW_HOME=$CONDA_PREFIX
    export PARQUET_HOME=$CONDA_PREFIX
+   export BOOST_HOME=$CONDA_PREFIX
 
 Using pip
 ~~~~~~~~~
 
+.. warning::
+
+   If you installed Python using the Anaconda distribution or `Miniconda
+   <https://conda.io/miniconda.html>`_, you cannot currently use ``virtualenv``
+   to manage your development. Please follow the conda-based development
+   instructions instead.
+
 On macOS, install all dependencies through Homebrew that are required for
 building Arrow C++:
 
@@ -114,9 +127,13 @@ dependencies will be automatically built by Arrow's third-party toolchain.
                           libboost-filesystem-dev \
                           libboost-system-dev \
                           libboost-regex-dev \
+                          python-dev \
+                          autoconf \
                           flex \
                           bison
 
+If you are building Arrow for Python 3, install ``python3-dev`` instead of ``python-dev``.
+
 On Arch Linux, you can get these dependencies via pacman.
 
 .. code-block:: shell
@@ -174,6 +191,12 @@ Now build and install the Arrow C++ libraries:
 
 If you don't want to build and install the Plasma in-memory object store,
 you can omit the ``-DARROW_PLASMA=on`` flag.
+Also, if multiple versions of Python are installed in your environment,
+you may have to pass additional parameters to cmake so that
+it can find the right executable, headers and libraries.
+For example, specifying `-DPYTHON_EXECUTABLE=$VIRTUAL_ENV/bin/python`
+(assuming that you're in virtualenv) enables cmake to choose
+the python executable which you are using.
 
 .. note::
 
@@ -186,9 +209,10 @@ Now, build pyarrow:
 
 .. code-block:: shell
 
-   cd arrow/python
+   pushd arrow/python
    python setup.py build_ext --build-type=$ARROW_BUILD_TYPE \
           --with-parquet --with-plasma --inplace
+   popd
 
 If you did not build with plasma, you can omit ``--with-plasma``.
 
@@ -216,6 +240,7 @@ libraries), one can set ``--bundle-arrow-cpp``:
 
 .. code-block:: shell
 
+   pip install wheel  # if not installed
    python setup.py build_ext --build-type=$ARROW_BUILD_TYPE \
           --with-parquet --with-plasma --bundle-arrow-cpp bdist_wheel
 
@@ -276,11 +301,11 @@ First, starting from fresh clones of Apache Arrow:
 
 .. code-block:: shell
 
-   conda create -y -q -n pyarrow-dev ^
-         python=3.6 numpy six setuptools cython pandas pytest ^
-         cmake flatbuffers rapidjson boost-cpp thrift-cpp snappy zlib ^
-         gflags brotli lz4-c zstd -c conda-forge
-   activate pyarrow-dev
+    conda create -y -n pyarrow-dev -c conda-forge ^
+        --file arrow\ci\conda_env_cpp.yml ^
+        --file arrow\ci\conda_env_python.yml ^
+        python=3.7
+   conda activate pyarrow-dev
 
 Now, we build and install Arrow C++ libraries
 
@@ -330,3 +355,8 @@ Getting ``python-test.exe`` to run is a bit tricky because your
    set PYTHONHOME=%CONDA_PREFIX%
 
 Now ``python-test.exe`` or simply ``ctest`` (to run all tests) should work.
+
+Building the Documentation
+==========================
+
+See :ref:`building-docs` for instructions to build the HTML documentation.
diff --git a/python/doc/source/extending.rst b/docs/source/python/extending.rst
similarity index 100%
rename from python/doc/source/extending.rst
rename to docs/source/python/extending.rst
diff --git a/python/doc/source/filesystems.rst b/docs/source/python/filesystems.rst
similarity index 100%
rename from python/doc/source/filesystems.rst
rename to docs/source/python/filesystems.rst
diff --git a/python/doc/source/getting_involved.rst b/docs/source/python/getting_involved.rst
similarity index 100%
rename from python/doc/source/getting_involved.rst
rename to docs/source/python/getting_involved.rst
diff --git a/docs/source/python/index.rst b/docs/source/python/index.rst
new file mode 100644
index 0000000000000..9f96771494c79
--- /dev/null
+++ b/docs/source/python/index.rst
@@ -0,0 +1,51 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Python bindings
+===============
+
+This is the documentation of the Python API of Apache Arrow. For more details
+on the Arrow format and other language bindings see the
+:doc:`parent documentation <../index>`.
+
+The Arrow Python bindings (also named "PyArrow") have first-class integration
+with NumPy, pandas, and built-in Python objects. They are based on the C++
+implementation of Arrow.
+
+Here will we detail the usage of the Python API for Arrow and the leaf
+libraries that add additional functionality such as reading Apache Parquet
+files into Arrow structures.
+
+.. toctree::
+   :maxdepth: 2
+
+   install
+   memory
+   data
+   ipc
+   filesystems
+   plasma
+   numpy
+   pandas
+   csv
+   parquet
+   cuda
+   extending
+   api
+   development
+   getting_involved
+   benchmarks
diff --git a/python/doc/source/install.rst b/docs/source/python/install.rst
similarity index 97%
rename from python/doc/source/install.rst
rename to docs/source/python/install.rst
index d07d9004d2632..8092b6ce6a0ef 100644
--- a/python/doc/source/install.rst
+++ b/docs/source/python/install.rst
@@ -15,8 +15,8 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
-Install PyArrow
-===============
+Installing PyArrow
+==================
 
 Conda
 -----
diff --git a/python/doc/source/ipc.rst b/docs/source/python/ipc.rst
similarity index 97%
rename from python/doc/source/ipc.rst
rename to docs/source/python/ipc.rst
index 3f7e787cd0c2f..812d843b0df56 100644
--- a/python/doc/source/ipc.rst
+++ b/docs/source/python/ipc.rst
@@ -84,11 +84,11 @@ particular stream. Now we can do:
 
 Now ``buf`` contains the complete stream as an in-memory byte buffer. We can
 read such a stream with :class:`~pyarrow.RecordBatchStreamReader` or the
-convenience function ``pyarrow.open_stream``:
+convenience function ``pyarrow.ipc.open_stream``:
 
 .. ipython:: python
 
-   reader = pa.open_stream(buf)
+   reader = pa.ipc.open_stream(buf)
    reader.schema
 
    batches = [b for b in reader]
@@ -125,11 +125,11 @@ The :class:`~pyarrow.RecordBatchFileWriter` has the same API as
 The difference between :class:`~pyarrow.RecordBatchFileReader` and
 :class:`~pyarrow.RecordBatchStreamReader` is that the input source must have a
 ``seek`` method for random access. The stream reader only requires read
-operations. We can also use the ``pyarrow.open_file`` method to open a file:
+operations. We can also use the ``pyarrow.ipc.open_file`` method to open a file:
 
 .. ipython:: python
 
-   reader = pa.open_file(buf)
+   reader = pa.ipc.open_file(buf)
 
 Because we have access to the entire payload, we know the number of record
 batches in the file, and can read any at random:
@@ -149,7 +149,7 @@ DataFrame output:
 
 .. ipython:: python
 
-   df = pa.open_file(buf).read_pandas()
+   df = pa.ipc.open_file(buf).read_pandas()
    df[:5]
 
 Arbitrary Object Serialization
diff --git a/python/doc/source/memory.rst b/docs/source/python/memory.rst
similarity index 92%
rename from python/doc/source/memory.rst
rename to docs/source/python/memory.rst
index 1ee81e754da1c..ba66807b38a8e 100644
--- a/python/doc/source/memory.rst
+++ b/docs/source/python/memory.rst
@@ -35,8 +35,8 @@ Referencing and Allocating Memory
 pyarrow.Buffer
 --------------
 
-The :class:`~pyarrow.Buffer` object wraps the C++ ``arrow::Buffer`` type and is
-the primary tool for memory management in Apache Arrow in C++. It permits
+The :class:`Buffer` object wraps the C++ :cpp:class:`arrow::Buffer` type
+which is the primary tool for memory management in Apache Arrow in C++. It permits
 higher-level array classes to safely interact with memory which they may or may
 not own. ``arrow::Buffer`` can be zero-copy sliced to permit Buffers to cheaply
 reference other Buffers, while preserving memory lifetime and clean
@@ -46,8 +46,9 @@ There are many implementations of ``arrow::Buffer``, but they all provide a
 standard interface: a data pointer and length. This is similar to Python's
 built-in `buffer protocol` and ``memoryview`` objects.
 
-A :class:`~pyarrow.Buffer` can be created from any Python object which
-implements the buffer protocol. Let's consider a bytes object:
+A :class:`Buffer` can be created from any Python object implementing
+the buffer protocol by calling the :func:`py_buffer` function. Let's consider
+a bytes object:
 
 .. ipython:: python
 
@@ -61,18 +62,22 @@ implements the buffer protocol. Let's consider a bytes object:
 Creating a Buffer in this way does not allocate any memory; it is a zero-copy
 view on the memory exported from the ``data`` bytes object.
 
-The Buffer's ``to_pybytes`` method can convert to a Python byte string:
+External memory, under the form of a raw pointer and size, can also be
+referenced using the :func:`foreign_buffer` function.
+
+Buffers can be used in circumstances where a Python buffer or memoryview is
+required, and such conversions are zero-copy:
 
 .. ipython:: python
 
-   buf.to_pybytes()
+   memoryview(buf)
 
-Buffers can be used in circumstances where a Python buffer or memoryview is
-required, and such conversions are also zero-copy:
+The Buffer's :meth:`~Buffer.to_pybytes` method converts the Buffer's data to a
+Python bytestring (thus making a copy of the data):
 
 .. ipython:: python
 
-   memoryview(buf)
+   buf.to_pybytes()
 
 Memory Pools
 ------------
@@ -104,6 +109,9 @@ the buffer is garbaged-collected, all of the memory is freed:
    buf = None
    pa.total_allocated_bytes()
 
+.. seealso::
+   On-GPU buffers using Arrow's optional :doc:`CUDA integration <cuda>`.
+
 
 Input and Output
 ================
diff --git a/python/doc/source/numpy.rst b/docs/source/python/numpy.rst
similarity index 97%
rename from python/doc/source/numpy.rst
rename to docs/source/python/numpy.rst
index 303e1823851fd..870f9cb734792 100644
--- a/python/doc/source/numpy.rst
+++ b/docs/source/python/numpy.rst
@@ -17,8 +17,8 @@
 
 .. _numpy_interop:
 
-Using PyArrow with NumPy
-========================
+NumPy Integration
+=================
 
 PyArrow allows converting back and forth from
 `NumPy <https://www.numpy.org/>`_ arrays to Arrow :ref:`Arrays <data.array>`.
diff --git a/python/doc/source/pandas.rst b/docs/source/python/pandas.rst
similarity index 82%
rename from python/doc/source/pandas.rst
rename to docs/source/python/pandas.rst
index 6ade17185a2c9..dbc5e77e83bff 100644
--- a/python/doc/source/pandas.rst
+++ b/docs/source/python/pandas.rst
@@ -17,8 +17,8 @@
 
 .. _pandas_interop:
 
-Using PyArrow with pandas
-=========================
+Pandas Integration
+==================
 
 To interface with `pandas <https://pandas.pydata.org/>`_, PyArrow provides
 various conversion routines to consume pandas structures and convert back
@@ -29,6 +29,13 @@ to them.
    (such as a different type system, and support for null values) that this
    is a separate topic from :ref:`numpy_interop`.
 
+To follow examples in this document, make sure to run:
+
+.. ipython:: python
+
+   import pandas as pd
+   import pyarrow as pa
+
 DataFrames
 ----------
 
@@ -120,5 +127,64 @@ Arrow -> pandas Conversion
 +-------------------------------------+--------------------------------------------------------+
 | ``TIMESTAMP(unit=*)``               | ``pd.Timestamp`` (``np.datetime64[ns]``)               |
 +-------------------------------------+--------------------------------------------------------+
-| ``DATE``                            | ``pd.Timestamp`` (``np.datetime64[ns]``)               |
+| ``DATE``                            | ``object``(with ``datetime.date`` objects)             |
 +-------------------------------------+--------------------------------------------------------+
+
+Categorical types
+~~~~~~~~~~~~~~~~~
+
+TODO
+
+Datetime (Timestamp) types
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+TODO
+
+Date types
+~~~~~~~~~~
+
+While dates can be handled using the ``datetime64[ns]`` type in
+pandas, some systems work with object arrays of Python's built-in
+``datetime.date`` object:
+
+.. ipython:: python
+
+   from datetime import date
+   s = pd.Series([date(2018, 12, 31), None, date(2000, 1, 1)])
+   s
+
+When converting to an Arrow array, the ``date32`` type will be used by
+default:
+
+.. ipython:: python
+
+   arr = pa.array(s)
+   arr.type
+   arr[0]
+
+To use the 64-bit ``date64``, specify this explicitly:
+
+.. ipython:: python
+
+   arr = pa.array(s, type='date64')
+   arr.type
+
+When converting back with ``to_pandas``, object arrays of
+``datetime.date`` objects are returned:
+
+.. ipython:: python
+
+   arr.to_pandas()
+
+If you want to use NumPy's ``datetime64`` dtype instead, pass
+``date_as_object=False``:
+
+.. ipython:: python
+
+   s2 = pd.Series(arr.to_pandas(date_as_object=False))
+   s2.dtype
+
+Time types
+~~~~~~~~~~
+
+TODO
diff --git a/python/doc/source/parquet.rst b/docs/source/python/parquet.rst
similarity index 100%
rename from python/doc/source/parquet.rst
rename to docs/source/python/parquet.rst
diff --git a/python/doc/source/plasma.rst b/docs/source/python/plasma.rst
similarity index 98%
rename from python/doc/source/plasma.rst
rename to docs/source/python/plasma.rst
index 09837cf6e9ef9..660c5fbba7918 100644
--- a/python/doc/source/plasma.rst
+++ b/docs/source/python/plasma.rst
@@ -60,7 +60,7 @@ socket name:
 .. code-block:: python
 
   import pyarrow.plasma as plasma
-  client = plasma.connect("/tmp/plasma", "", 0)
+  client = plasma.connect("/tmp/plasma")
 
 If the following error occurs from running the above Python code, that
 means that either the socket given is incorrect, or the ``./plasma_store`` is
@@ -68,7 +68,7 @@ not currently running. Check to see if the Plasma store is still running.
 
 .. code-block:: shell
 
-  >>> client = plasma.connect("/tmp/plasma", "", 0)
+  >>> client = plasma.connect("/tmp/plasma")
   Connection to socket failed for pathname /tmp/plasma
   Could not connect to socket /tmp/plasma
 
@@ -179,7 +179,7 @@ the object buffer.
 
   # Create a different client. Note that this second client could be
   # created in the same or in a separate, concurrent Python session.
-  client2 = plasma.connect("/tmp/plasma", "", 0)
+  client2 = plasma.connect("/tmp/plasma")
 
   # Get the object in the second client. This blocks until the object has been sealed.
   object_id2 = plasma.ObjectID(20 * b"a")
@@ -221,7 +221,7 @@ of the object info might change in the future):
   import pyarrow.plasma as plasma
   import time
 
-  client = plasma.connect("/tmp/plasma", "", 0)
+  client = plasma.connect("/tmp/plasma")
 
   client.put("hello, world")
   # Sleep a little so we get different creation times
@@ -452,7 +452,7 @@ You can test this with the following script:
   import pyarrow.plasma as plasma
   import time
 
-  client = plasma.connect("/tmp/plasma", "", 0)
+  client = plasma.connect("/tmp/plasma")
 
   data = np.random.randn(100000000)
   tensor = pa.Tensor.from_numpy(data)
diff --git a/format/Guidelines.md b/format/Guidelines.md
deleted file mode 100644
index 7b5f3a11bfc48..0000000000000
--- a/format/Guidelines.md
+++ /dev/null
@@ -1,35 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Implementation guidelines
-
-An execution engine (or framework, or UDF executor, or storage engine, etc) can implements only a subset of the Arrow spec and/or extend it given the following constraints:
-
-## Implementing a subset the spec
-### If only producing (and not consuming) arrow vectors.
-Any subset of the vector spec and the corresponding metadata can be implemented.
-
-### If consuming and producing vectors
-There is a minimal subset of vectors to be supported.
-Production of a subset of vectors and their corresponding metadata is always fine.
-Consumption of vectors should at least convert the unsupported input vectors to the supported subset (for example Timestamp.millis to timestamp.micros or int32 to int64)
-
-## Extensibility
-An execution engine implementor can also extend their memory representation with their own vectors internally as long as they are never exposed. Before sending data to another system expecting Arrow data these custom vectors should be converted to a type that exist in the Arrow spec.
-An example of this is operating on compressed data.
-These custom vectors are not exchanged externally and there is no support for custom metadata.
diff --git a/format/IPC.md b/format/IPC.md
deleted file mode 100644
index 97c1790e67ea2..0000000000000
--- a/format/IPC.md
+++ /dev/null
@@ -1,253 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Interprocess messaging / communication (IPC)
-
-## Encapsulated message format
-
-Data components in the stream and file formats are represented as encapsulated
-*messages* consisting of:
-
-* A length prefix indicating the metadata size
-* The message metadata as a [Flatbuffer][3]
-* Padding bytes to an 8-byte boundary
-* The message body, which must be a multiple of 8 bytes
-
-Schematically, we have:
-
-```
-<metadata_size: int32>
-<metadata_flatbuffer: bytes>
-<padding>
-<message body>
-```
-
-The complete serialized message must be a multiple of 8 bytes so that messages
-can be relocated between streams. Otherwise the amount of padding between the
-metadata and the message body could be non-deterministic.
-
-The `metadata_size` includes the size of the flatbuffer plus padding. The
-`Message` flatbuffer includes a version number, the particular message (as a
-flatbuffer union), and the size of the message body:
-
-```
-table Message {
-  version: org.apache.arrow.flatbuf.MetadataVersion;
-  header: MessageHeader;
-  bodyLength: long;
-}
-```
-
-Currently, we support 4 types of messages:
-
-* Schema
-* RecordBatch
-* DictionaryBatch
-* Tensor
-
-## Streaming format
-
-We provide a streaming format for record batches. It is presented as a sequence
-of encapsulated messages, each of which follows the format above. The schema
-comes first in the stream, and it is the same for all of the record batches
-that follow. If any fields in the schema are dictionary-encoded, one or more
-`DictionaryBatch` messages will be included. `DictionaryBatch` and
-`RecordBatch` messages may be interleaved, but before any dictionary key is used
-in a `RecordBatch` it should be defined in a `DictionaryBatch`.
-
-```
-<SCHEMA>
-<DICTIONARY 0>
-...
-<DICTIONARY k - 1>
-<RECORD BATCH 0>
-...
-<DICTIONARY x DELTA>
-...
-<DICTIONARY y DELTA>
-...
-<RECORD BATCH n - 1>
-<EOS [optional]: int32>
-```
-
-When a stream reader implementation is reading a stream, after each message, it
-may read the next 4 bytes to know how large the message metadata that follows
-is. Once the message flatbuffer is read, you can then read the message body.
-
-The stream writer can signal end-of-stream (EOS) either by writing a 0 length
-as an `int32` or simply closing the stream interface.
-
-## File format
-
-We define a "file format" supporting random access in a very similar format to
-the streaming format. The file starts and ends with a magic string `ARROW1`
-(plus padding). What follows in the file is identical to the stream format. At
-the end of the file, we write a *footer* containing a redundant copy of the
-schema (which is a part of the streaming format) plus memory offsets and sizes
-for each of the data blocks in the file. This enables random access any record
-batch in the file. See [format/File.fbs][1] for the precise details of the file
-footer.
-
-Schematically we have:
-
-```
-<magic number "ARROW1">
-<empty padding bytes [to 8 byte boundary]>
-<STREAMING FORMAT>
-<FOOTER>
-<FOOTER SIZE: int32>
-<magic number "ARROW1">
-```
-
-In the file format, there is no requirement that dictionary keys should be
-defined in a `DictionaryBatch` before they are used in a `RecordBatch`, as long
-as the keys are defined somewhere in the file.
-
-### RecordBatch body structure
-
-The `RecordBatch` metadata contains a depth-first (pre-order) flattened set of
-field metadata and physical memory buffers (some comments from [Message.fbs][2]
-have been shortened / removed):
-
-```
-table RecordBatch {
-  length: long;
-  nodes: [FieldNode];
-  buffers: [Buffer];
-}
-
-struct FieldNode {
-  length: long;
-  null_count: long;
-}
-
-struct Buffer {
-  /// The relative offset into the shared memory page where the bytes for this
-  /// buffer starts
-  offset: long;
-
-  /// The absolute length (in bytes) of the memory buffer. The memory is found
-  /// from offset (inclusive) to offset + length (non-inclusive).
-  length: long;
-}
-```
-
-In the context of a file, the `page` is not used, and the `Buffer` offsets use
-as a frame of reference the start of the message body. So, while in a general
-IPC setting these offsets may be anyplace in one or more shared memory regions,
-in the file format the offsets start from 0.
-
-The location of a record batch and the size of the metadata block as well as
-the body of buffers is stored in the file footer:
-
-```
-struct Block {
-  offset: long;
-  metaDataLength: int;
-  bodyLength: long;
-}
-```
-
-The `metaDataLength` here includes the metadata length prefix, serialized
-metadata, and any additional padding bytes, and by construction must be a
-multiple of 8 bytes.
-
-Some notes about this
-
-* The `Block` offset indicates the starting byte of the record batch.
-* The metadata length includes the flatbuffer size, the record batch metadata
-  flatbuffer, and any padding bytes
-
-### Dictionary Batches
-
-Dictionaries are written in the stream and file formats as a sequence of record
-batches, each having a single field. The complete semantic schema for a
-sequence of record batches, therefore, consists of the schema along with all of
-the dictionaries. The dictionary types are found in the schema, so it is
-necessary to read the schema to first determine the dictionary types so that
-the dictionaries can be properly interpreted.
-
-```
-table DictionaryBatch {
-  id: long;
-  data: RecordBatch;
-  isDelta: boolean = false;
-}
-```
-
-The dictionary `id` in the message metadata can be referenced one or more times
-in the schema, so that dictionaries can even be used for multiple fields. See
-the [Physical Layout][4] document for more about the semantics of
-dictionary-encoded data.
-
-The dictionary `isDelta` flag allows dictionary batches to be modified
-mid-stream.  A dictionary batch with `isDelta` set indicates that its vector
-should be concatenated with those of any previous batches with the same `id`. A
-stream which encodes one column, the list of strings
-`["A", "B", "C", "B", "D", "C", "E", "A"]`, with a delta dictionary batch could
-take the form:
-
-```
-<SCHEMA>
-<DICTIONARY 0>
-(0) "A"
-(1) "B"
-(2) "C"
-
-<RECORD BATCH 0>
-0
-1
-2
-1
-
-<DICTIONARY 0 DELTA>
-(3) "D"
-(4) "E"
-
-<RECORD BATCH 1>
-3
-2
-4
-0
-EOS
-```
-
-### Tensor (Multi-dimensional Array) Message Format
-
-The `Tensor` message types provides a way to write a multidimensional array of
-fixed-size values (such as a NumPy ndarray) using Arrow's shared memory
-tools. Arrow implementations in general are not required to implement this data
-format, though we provide a reference implementation in C++.
-
-When writing a standalone encapsulated tensor message, we use the format as
-indicated above, but additionally align the starting offset of the metadata as
-well as the starting offset of the tensor body (if writing to a shared memory
-region) to be multiples of 64 bytes:
-
-```
-<PADDING>
-<metadata size: int32>
-<metadata>
-<tensor body>
-```
-
-[1]: https://github.com/apache/arrow/blob/master/format/File.fbs
-[2]: https://github.com/apache/arrow/blob/master/format/Message.fbs
-[3]: https://github.com/google/flatbuffers
-[4]: https://github.com/apache/arrow/blob/master/format/Layout.md
diff --git a/format/Layout.md b/format/Layout.md
deleted file mode 100644
index 80af1d3d37a3b..0000000000000
--- a/format/Layout.md
+++ /dev/null
@@ -1,664 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Arrow: Physical memory layout
-
-## Definitions / Terminology
-
-Since different projects have used different words to describe various
-concepts, here is a small glossary to help disambiguate.
-
-* Array: a sequence of values with known length all having the same type.
-* Slot or array slot: a single logical value in an array of some particular data type
-* Contiguous memory region: a sequential virtual address space with a given
-  length. Any byte can be reached via a single pointer offset less than the
-  region's length.
-* Contiguous memory buffer: A contiguous memory region that stores
-  a multi-value component of an Array.  Sometimes referred to as just "buffer".
-* Primitive type: a data type that occupies a fixed-size memory slot specified
-  in bit width or byte width
-* Nested or parametric type: a data type whose full structure depends on one or
-  more other child relative types. Two fully-specified nested types are equal
-  if and only if their child types are equal. For example, `List<U>` is distinct
-  from `List<V>` iff U and V are different relative types.
-* Relative type or simply type (unqualified): either a specific primitive type
-  or a fully-specified nested type. When we say slot we mean a relative type
-  value, not necessarily any physical storage region.
-* Logical type: A data type that is implemented using some relative (physical)
-  type. For example, Decimal values are stored as 16 bytes in a fixed byte
-  size array. Similarly, strings can be stored as `List<1-byte>`.
-* Parent and child arrays: names to express relationships between physical
-  value arrays in a nested type structure. For example, a `List<T>`-type parent
-  array has a T-type array as its child (see more on lists below).
-* Leaf node or leaf: A primitive value array that may or may not be a child
-  array of some array with a nested type.
-
-## Requirements, goals, and non-goals
-
-Base requirements
-
-* A physical memory layout enabling zero-deserialization data interchange
-  amongst a variety of systems handling flat and nested columnar data, including
-  such systems as Spark, Drill, Impala, Kudu, Ibis, ODBC protocols, and
-  proprietary systems that utilize the open source components.
-* All array slots are accessible in constant time, with complexity growing
-  linearly in the nesting level
-* Capable of representing fully-materialized and decoded / decompressed [Parquet][5]
-  data
-* It is required to have all the contiguous memory buffers in an IPC payload
-  aligned at 8-byte boundaries. In other words, each buffer must start at
-  an aligned 8-byte offset.
-* The general recommendation is to align the buffers at 64-byte boundary, but
-  this is not absolutely necessary.
-* Any relative type can have null slots
-* Arrays are immutable once created. Implementations can provide APIs to mutate
-  an array, but applying mutations will require a new array data structure to
-  be built.
-* Arrays are relocatable (e.g. for RPC/transient storage) without pointer
-  swizzling. Another way of putting this is that contiguous memory regions can
-  be migrated to a different address space (e.g. via a memcpy-type of
-  operation) without altering their contents.
-
-## Goals (for this document)
-
-* To describe relative types (physical value types and a preliminary set of
-  nested types) sufficient for an unambiguous implementation
-* Memory layout and random access patterns for each relative type
-* Null value representation
-
-## Non-goals (for this document)
-
-* To enumerate or specify logical types that can be implemented as primitive
-  (fixed-width) value types. For example: signed and unsigned integers,
-  floating point numbers, boolean, exact decimals, date and time types,
-  CHAR(K), VARCHAR(K), etc.
-* To specify standardized metadata or a data layout for RPC or transient file
-  storage.
-* To define a selection or masking vector construct
-* Implementation-specific details
-* Details of a user or developer C/C++/Java API.
-* Any "table" structure composed of named arrays each having their own type or
-  any other structure that composes arrays.
-* Any memory management or reference counting subsystem
-* To enumerate or specify types of encodings or compression support
-
-## Byte Order ([Endianness][3])
-
-The Arrow format is little endian by default.
-The Schema metadata has an endianness field indicating endianness of RecordBatches.
-Typically this is the endianness of the system where the RecordBatch was generated.
-The main use case is exchanging RecordBatches between systems with the same Endianness.
-At first we will return an error when trying to read a Schema with an endianness
-that does not match the underlying system. The reference implementation is focused on
-Little Endian and provides tests for it. Eventually we may provide automatic conversion
-via byte swapping.
-
-## Alignment and Padding
-
-As noted above, all buffers must be aligned in memory at 8-byte boundaries and padded
-to a length that is a multiple of 8 bytes.  The alignment requirement follows best
-practices for optimized memory access:
-
-* Elements in numeric arrays will be guaranteed to be retrieved via aligned access.
-* On some architectures alignment can help limit partially used cache lines.
-* 64 byte alignment is recommended by the [Intel performance guide][2] for
-  data-structures over 64 bytes (which will be a common case for Arrow Arrays).
-
-Recommending padding to a multiple of 64 bytes allows for using [SIMD][4] instructions
-consistently in loops without additional conditional checks.
-This should allow for simpler, efficient and CPU cache-friendly code.
-The specific padding length was chosen because it matches the largest known
-SIMD instruction registers available as of April 2016 (Intel AVX-512). In other
-words, we can load the entire 64-byte buffer into a 512-bit wide SIMD register
-and get data-level parallelism on all the columnar values packed into the 64-byte
-buffer. Guaranteed padding can also allow certain compilers
-to generate more optimized code directly (e.g. One can safely use Intel's
-`-qopt-assume-safe-padding`).
-
-Unless otherwise noted, padded bytes do not need to have a specific value.
-
-## Array lengths
-
-Array lengths are represented in the Arrow metadata as a 64-bit signed
-integer. An implementation of Arrow is considered valid even if it only
-supports lengths up to the maximum 32-bit signed integer, though. If using
-Arrow in a multi-language environment, we recommend limiting lengths to
-2<sup>31</sup> - 1 elements or less. Larger data sets can be represented using
-multiple array chunks.
-
-## Null count
-
-The number of null value slots is a property of the physical array and
-considered part of the data structure. The null count is represented in the
-Arrow metadata as a 64-bit signed integer, as it may be as large as the array
-length.
-
-## Null bitmaps
-
-Any relative type can have null value slots, whether primitive or nested type.
-
-An array with nulls must have a contiguous memory buffer, known as the null (or
-validity) bitmap, whose length is a multiple of 64 bytes (as discussed above)
-and large enough to have at least 1 bit for each array
-slot.
-
-Whether any array slot is valid (non-null) is encoded in the respective bits of
-this bitmap. A 1 (set bit) for index `j` indicates that the value is not null,
-while a 0 (bit not set) indicates that it is null. Bitmaps are to be
-initialized to be all unset at allocation time (this includes padding).
-
-```
-is_valid[j] -> bitmap[j / 8] & (1 << (j % 8))
-```
-
-We use [least-significant bit (LSB) numbering][1] (also known as
-bit-endianness). This means that within a group of 8 bits, we read
-right-to-left:
-
-```
-values = [0, 1, null, 2, null, 3]
-
-bitmap
-j mod 8   7  6  5  4  3  2  1  0
-          0  0  1  0  1  0  1  1
-```
-
-Arrays having a 0 null count may choose to not allocate the null
-bitmap. Implementations may choose to always allocate one anyway as a matter of
-convenience, but this should be noted when memory is being shared.
-
-Nested type arrays have their own null bitmap and null count regardless of
-the null count and null bits of their child arrays.
-
-## Primitive value arrays
-
-A primitive value array represents a fixed-length array of values each having
-the same physical slot width typically measured in bytes, though the spec also
-provides for bit-packed types (e.g. boolean values encoded in bits).
-
-Internally, the array contains a contiguous memory buffer whose total size is
-equal to the slot width multiplied by the array length. For bit-packed types,
-the size is rounded up to the nearest byte.
-
-The associated null bitmap is contiguously allocated (as described above) but
-does not need to be adjacent in memory to the values buffer.
-
-
-### Example Layout: Int32 Array
-For example a primitive array of int32s:
-
-[1, null, 2, 4, 8]
-
-Would look like:
-
-```
-* Length: 5, Null count: 1
-* Null bitmap buffer:
-
-  |Byte 0 (validity bitmap) | Bytes 1-63            |
-  |-------------------------|-----------------------|
-  | 00011101                | 0 (padding)           |
-
-* Value Buffer:
-
-  |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-63 |
-  |------------|-------------|-------------|-------------|-------------|-------------|
-  | 1          | unspecified | 2           | 4           | 8           | unspecified |
-```
-
-### Example Layout: Non-null int32 Array
-
-[1, 2, 3, 4, 8] has two possible layouts:
-
-```
-* Length: 5, Null count: 0
-* Null bitmap buffer:
-
-  | Byte 0 (validity bitmap) | Bytes 1-63            |
-  |--------------------------|-----------------------|
-  | 00011111                 | 0 (padding)           |
-
-* Value Buffer:
-
-  |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | bytes 12-15 | bytes 16-19 | Bytes 20-63 |
-  |------------|-------------|-------------|-------------|-------------|-------------|
-  | 1          | 2           | 3           | 4           | 8           | unspecified |
-```
-
-or with the bitmap elided:
-
-```
-* Length 5, Null count: 0
-* Null bitmap buffer: Not required
-* Value Buffer:
-
-  |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | bytes 12-15 | bytes 16-19 | Bytes 20-63 |
-  |------------|-------------|-------------|-------------|-------------|-------------|
-  | 1          | 2           | 3           | 4           | 8           | unspecified |
-```
-
-## List type
-
-List is a nested type in which each array slot contains a variable-size
-sequence of values all having the same relative type (heterogeneity can be
-achieved through unions, described later).
-
-A list type is specified like `List<T>`, where `T` is any relative type
-(primitive or nested).
-
-A list-array is represented by the combination of the following:
-
-* A values array, a child array of type T. T may also be a nested type.
-* An offsets buffer containing 32-bit signed integers with length equal to the
-  length of the top-level array plus one. Note that this limits the size of the
-  values array to 2<sup>31</sup>-1.
-
-The offsets array encodes a start position in the values array, and the length
-of the value in each slot is computed using the first difference with the next
-element in the offsets array. For example, the position and length of slot j is
-computed as:
-
-```
-slot_position = offsets[j]
-slot_length = offsets[j + 1] - offsets[j]  // (for 0 <= j < length)
-```
-
-The first value in the offsets array is 0, and the last element is the length
-of the values array.
-
-### Example Layout: `List<Char>` Array
-Let's consider an example, the type `List<Char>`, where Char is a 1-byte
-logical type.
-
-For an array of length 4 with respective values:
-
-[['j', 'o', 'e'], null, ['m', 'a', 'r', 'k'], []]
-
-will have the following representation:
-
-```
-* Length: 4, Null count: 1
-* Null bitmap buffer:
-
-  | Byte 0 (validity bitmap) | Bytes 1-63            |
-  |--------------------------|-----------------------|
-  | 00001101                 | 0 (padding)           |
-
-* Offsets buffer (int32)
-
-  | Bytes 0-3  | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-63 |
-  |------------|-------------|-------------|-------------|-------------|-------------|
-  | 0          | 3           | 3           | 7           | 7           | unspecified |
-
-* Values array (char array):
-  * Length: 7,  Null count: 0
-  * Null bitmap buffer: Not required
-
-    | Bytes 0-6  | Bytes 7-63  |
-    |------------|-------------|
-    | joemark    | unspecified |
-```
-
-### Example Layout: `List<List<byte>>`
-[[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], [[9, 10]]]
-
-will be be represented as follows:
-
-```
-* Length 3
-* Nulls count: 0
-* Null bitmap buffer: Not required
-* Offsets buffer (int32)
-
-  | Bytes 0-3  | Bytes 4-7  | Bytes 8-11 | Bytes 12-15 | Bytes 16-63 |
-  |------------|------------|------------|-------------|-------------|
-  | 0          |  2         |  5         |  6          | unspecified |
-
-* Values array (`List<byte>`)
-  * Length: 6, Null count: 1
-  * Null bitmap buffer:
-
-    | Byte 0 (validity bitmap) | Bytes 1-63  |
-    |--------------------------|-------------|
-    | 00110111                 | 0 (padding) |
-
-  * Offsets buffer (int32)
-
-    | Bytes 0-27           | Bytes 28-63 |
-    |----------------------|-------------|
-    | 0, 2, 4, 7, 7, 8, 10 | unspecified |
-
-  * Values array (bytes):
-    * Length: 10, Null count: 0
-    * Null bitmap buffer: Not required
-
-      | Bytes 0-9                     | Bytes 10-63 |
-      |-------------------------------|-------------|
-      | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 | unspecified |
-```
-
-## Struct type
-
-A struct is a nested type parameterized by an ordered sequence of relative
-types (which can all be distinct), called its fields.
-
-Typically the fields have names, but the names and their types are part of the
-type metadata, not the physical memory layout.
-
-A struct array does not have any additional allocated physical storage for its values.
-A struct array must still have an allocated null bitmap, if it has one or more null values.
-
-Physically, a struct type has one child array for each field. The child arrays are independent and need not be adjacent to each other in memory.
-
-For example, the struct (field names shown here as strings for illustration
-purposes)
-
-```
-Struct <
-  name: String (= List<char>),
-  age: Int32
->
-```
-
-has two child arrays, one List<char> array (layout as above) and one 4-byte
-primitive value array having Int32 logical type.
-
-### Example Layout: `Struct<List<char>, Int32>`:
-The layout for [{'joe', 1}, {null, 2}, null, {'mark', 4}] would be:
-
-```
-* Length: 4, Null count: 1
-* Null bitmap buffer:
-
-  |Byte 0 (validity bitmap) | Bytes 1-63            |
-  |-------------------------|-----------------------|
-  | 00001011                | 0 (padding)           |
-
-* Children arrays:
-  * field-0 array (`List<char>`):
-    * Length: 4, Null count: 2
-    * Null bitmap buffer:
-
-      | Byte 0 (validity bitmap) | Bytes 1-63            |
-      |--------------------------|-----------------------|
-      | 00001001                 | 0 (padding)           |
-
-    * Offsets buffer:
-
-      | Bytes 0-19     |
-      |----------------|
-      | 0, 3, 3, 3, 7  |
-
-     * Values array:
-        * Length: 7, Null count: 0
-        * Null bitmap buffer: Not required
-
-        * Value buffer:
-
-          | Bytes 0-6      |
-          |----------------|
-          | joemark        |
-
-  * field-1 array (int32 array):
-    * Length: 4, Null count: 1
-    * Null bitmap buffer:
-
-      | Byte 0 (validity bitmap) | Bytes 1-63            |
-      |--------------------------|-----------------------|
-      | 00001011                 | 0 (padding)           |
-
-    * Value Buffer:
-
-      |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-63 |
-      |------------|-------------|-------------|-------------|-------------|
-      | 1          | 2           | unspecified | 4           | unspecified |
-
-```
-
-While a struct does not have physical storage for each of its semantic slots
-(i.e. each scalar C-like struct), an entire struct slot can be set to null via
-the null bitmap. Any of the child field arrays can have null values according
-to their respective independent null bitmaps.
-This implies that for a particular struct slot the null bitmap for the struct
-array might indicate a null slot when one or more of its child arrays has a
-non-null value in their corresponding slot.  When reading the struct array the
-parent null bitmap is authoritative.
-This is illustrated in the example above, the child arrays have valid entries
-for the null struct but are 'hidden' from the consumer by the parent array's
-null bitmap.  However, when treated independently corresponding
-values of the children array will be non-null.
-
-## Dense union type
-
-A dense union is semantically similar to a struct, and contains an ordered
-sequence of relative types. While a struct contains multiple arrays, a union is
-semantically a single array in which each slot can have a different type.
-
-The union types may be named, but like structs this will be a matter of the
-metadata and will not affect the physical memory layout.
-
-We define two distinct union types that are optimized for different use
-cases. This first, the dense union, represents a mixed-type array with 5 bytes
-of overhead for each value. Its physical layout is as follows:
-
-* One child array for each relative type
-* Types buffer: A buffer of 8-bit signed integers, enumerated from 0 corresponding
-  to each type.  A union with more then 127 possible types can be modeled as a
-  union of unions.
-* Offsets buffer: A buffer of signed int32 values indicating the relative offset
-  into the respective child array for the type in a given slot. The respective
-  offsets for each child value array must be in order / increasing.
-
-Critically, the dense union allows for minimal overhead in the ubiquitous
-union-of-structs with non-overlapping-fields use case (`Union<s1: Struct1, s2:
-Struct2, s3: Struct3, ...>`)
-
-### Example Layout: Dense union
-
-An example layout for logical union of:
-`Union<f: float, i: int32>` having the values:
-[{f=1.2}, null, {f=3.4}, {i=5}]
-
-```
-* Length: 4, Null count: 1
-* Null bitmap buffer:
-  |Byte 0 (validity bitmap) | Bytes 1-63            |
-  |-------------------------|-----------------------|
-  |00001101                 | 0 (padding)           |
-
-* Types buffer:
-
-  |Byte 0   | Byte 1      | Byte 2   | Byte 3   | Bytes 4-63  |
-  |---------|-------------|----------|----------|-------------|
-  | 0       | unspecified | 0        | 1        | unspecified |
-
-* Offset buffer:
-
-  |Byte 0-3 | Byte 4-7    | Byte 8-11 | Byte 12-15 | Bytes 16-63 |
-  |---------|-------------|-----------|------------|-------------|
-  | 0       | unspecified | 1         | 0          | unspecified |
-
-* Children arrays:
-  * Field-0 array (f: float):
-    * Length: 2, nulls: 0
-    * Null bitmap buffer: Not required
-
-    * Value Buffer:
-
-      | Bytes 0-7 | Bytes 8-63  |
-      |-----------|-------------|
-      | 1.2, 3.4  | unspecified |
-
-
-  * Field-1 array (i: int32):
-    * Length: 1, nulls: 0
-    * Null bitmap buffer: Not required
-
-    * Value Buffer:
-
-      | Bytes 0-3 | Bytes 4-63  |
-      |-----------|-------------|
-      | 5         | unspecified |
-```
-
-## Sparse union type
-
-A sparse union has the same structure as a dense union, with the omission of
-the offsets array. In this case, the child arrays are each equal in length to
-the length of the union.
-
-While a sparse union may use significantly more space compared with a dense
-union, it has some advantages that may be desirable in certain use cases:
-
-* A sparse union is more amenable to vectorized expression evaluation in some use cases.
-* Equal-length arrays can be interpreted as a union by only defining the types array.
-
-### Example layout: `SparseUnion<u0: Int32, u1: Float, u2: List<Char>>`
-
-For the union array:
-
-[{u0=5}, {u1=1.2}, {u2='joe'}, {u1=3.4}, {u0=4}, {u2='mark'}]
-
-will have the following layout:
-```
-* Length: 6, Null count: 0
-* Null bitmap buffer: Not required
-
-* Types buffer:
-
- | Byte 0     | Byte 1      | Byte 2      | Byte 3      | Byte 4      | Byte 5       | Bytes  6-63           |
- |------------|-------------|-------------|-------------|-------------|--------------|-----------------------|
- | 0          | 1           | 2           | 1           | 0           | 2            | unspecified (padding) |
-
-* Children arrays:
-
-  * u0 (Int32):
-    * Length: 6, Null count: 4
-    * Null bitmap buffer:
-
-      |Byte 0 (validity bitmap) | Bytes 1-63            |
-      |-------------------------|-----------------------|
-      |00010001                 | 0 (padding)           |
-
-    * Value buffer:
-
-      |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-23  | Bytes 24-63           |
-      |------------|-------------|-------------|-------------|-------------|--------------|-----------------------|
-      | 5          | unspecified | unspecified | unspecified | 4           |  unspecified | unspecified (padding) |
-
-  * u1 (float):
-    * Length: 6, Null count: 4
-    * Null bitmap buffer:
-
-      |Byte 0 (validity bitmap) | Bytes 1-63            |
-      |-------------------------|-----------------------|
-      | 00001010                | 0 (padding)           |
-
-    * Value buffer:
-
-      |Bytes 0-3    | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-23  | Bytes 24-63           |
-      |-------------|-------------|-------------|-------------|-------------|--------------|-----------------------|
-      | unspecified |  1.2        | unspecified | 3.4         | unspecified |  unspecified | unspecified (padding) |
-
-  * u2 (`List<char>`)
-    * Length: 6, Null count: 4
-    * Null bitmap buffer:
-
-      | Byte 0 (validity bitmap) | Bytes 1-63            |
-      |--------------------------|-----------------------|
-      | 00100100                 | 0 (padding)           |
-
-    * Offsets buffer (int32)
-
-      | Bytes 0-3  | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-23 | Bytes 24-27 | Bytes 28-63 |
-      |------------|-------------|-------------|-------------|-------------|-------------|-------------|-------------|
-      | 0          | 0           | 0           | 3           | 3           | 3           | 7           | unspecified |
-
-    * Values array (char array):
-      * Length: 7,  Null count: 0
-      * Null bitmap buffer: Not required
-
-        | Bytes 0-7  | Bytes 8-63            |
-        |------------|-----------------------|
-        | joemark    | unspecified (padding) |
-```
-
-Note that nested types in a sparse union must be internally consistent
-(e.g. see the List in the diagram), i.e. random access at any index j
-on any child array will not cause an error.
-In other words, the array for the nested type must be valid if it is
-reinterpreted as a non-nested array.
-
-Similar to structs, a particular child array may have a non-null slot
-even if the null bitmap of the parent union array indicates the slot is
-null.  Additionally, a child array may have a non-null slot even if
-the types array indicates that a slot contains a different type at the index.
-
-## Dictionary encoding
-
-When a field is dictionary encoded, the values are represented by an array of
-Int32 representing the index of the value in the dictionary.  The Dictionary is
-received as one or more DictionaryBatches with the id referenced by a
-dictionary attribute defined in the metadata ([Message.fbs][7]) in the Field
-table.  The dictionary has the same layout as the type of the field would
-dictate. Each entry in the dictionary can be accessed by its index in the
-DictionaryBatches.  When a Schema references a Dictionary id, it must send at
-least one DictionaryBatch for this id.
-
-As an example, you could have the following data:
-```
-type: List<String>
-
-[
- ['a', 'b'],
- ['a', 'b'],
- ['a', 'b'],
- ['c', 'd', 'e'],
- ['c', 'd', 'e'],
- ['c', 'd', 'e'],
- ['c', 'd', 'e'],
- ['a', 'b']
-]
-```
-In dictionary-encoded form, this could appear as:
-```
-data List<String> (dictionary-encoded, dictionary id i)
-indices: [0, 0, 0, 1, 1, 1, 0]
-
-dictionary i
-
-type: List<String>
-
-[
- ['a', 'b'],
- ['c', 'd', 'e'],
-]
-```
-
-## References
-
-Apache Drill Documentation - [Value Vectors][6]
-
-[1]: https://en.wikipedia.org/wiki/Bit_numbering
-[2]: https://software.intel.com/en-us/articles/practical-intel-avx-optimization-on-2nd-generation-intel-core-processors
-[3]: https://en.wikipedia.org/wiki/Endianness
-[4]: https://software.intel.com/en-us/node/600110
-[5]: https://parquet.apache.org/documentation/latest/
-[6]: https://drill.apache.org/docs/value-vectors/
-[7]: https://github.com/apache/arrow/blob/master/format/Message.fbs
diff --git a/format/Message.fbs b/format/Message.fbs
index 830718139d88c..e14fdca8f155c 100644
--- a/format/Message.fbs
+++ b/format/Message.fbs
@@ -87,7 +87,7 @@ table DictionaryBatch {
 /// which may include experimental metadata types. For maximum compatibility,
 /// it is best to send data using RecordBatch
 union MessageHeader {
-  Schema, DictionaryBatch, RecordBatch, Tensor
+  Schema, DictionaryBatch, RecordBatch, Tensor, SparseTensor
 }
 
 table Message {
@@ -96,4 +96,4 @@ table Message {
   bodyLength: long;
 }
 
-root_type Message;
\ No newline at end of file
+root_type Message;
diff --git a/format/Metadata.md b/format/Metadata.md
deleted file mode 100644
index 33d5065f89e23..0000000000000
--- a/format/Metadata.md
+++ /dev/null
@@ -1,409 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Metadata: Logical types, schemas, data headers
-
-This is documentation for the Arrow metadata specification, which enables
-systems to communicate the
-
-* Logical array types (which are implemented using the physical memory layouts
-  specified in [Layout.md][1])
-
-* Schemas for table-like collections of Arrow data structures
-
-* "Data headers" indicating the physical locations of memory buffers sufficient
-  to reconstruct a Arrow data structures without copying memory.
-
-## Canonical implementation
-
-We are using [Flatbuffers][2] for low-overhead reading and writing of the Arrow
-metadata. See [Message.fbs][3].
-
-## Schemas
-
-The `Schema` type describes a table-like structure consisting of any number of
-Arrow arrays, each of which can be interpreted as a column in the table. A
-schema by itself does not describe the physical structure of any particular set
-of data.
-
-A schema consists of a sequence of **fields**, which are metadata describing
-the columns. The Flatbuffers IDL for a field is:
-
-```
-table Field {
-  // Name is not required, in i.e. a List
-  name: string;
-  nullable: bool;
-  type: Type;
-
-  // Present only if the field is dictionary encoded
-  dictionary: DictionaryEncoding;
-
-  // children apply only to Nested data types like Struct, List and Union
-  children: [Field];
-
-  // User-defined metadata
-  custom_metadata: [ KeyValue ];
-}
-```
-
-The `type` is the logical type of the field. Nested types, such as List,
-Struct, and Union, have a sequence of child fields.
-
-A JSON representation of the schema is also provided:
-Field:
-```
-{
-  "name" : "name_of_the_field",
-  "nullable" : false,
-  "type" : /* Type */,
-  "children" : [ /* Field */ ],
-}
-```
-
-Type:
-```
-{
-  "name" : "null|struct|list|union|int|floatingpoint|utf8|binary|fixedsizebinary|bool|decimal|date|time|timestamp|interval"
-  // fields as defined in the Flatbuffer depending on the type name
-}
-```
-
-Union:
-```
-{
-  "name" : "union",
-  "mode" : "Sparse|Dense",
-  "typeIds" : [ /* integer */ ]
-}
-```
-
-The `typeIds` field in the Union are the codes used to denote each type, which
-may be different from the index of the child array. This is so that the union
-type ids do not have to be enumerated from 0.
-
-Int:
-```
-{
-  "name" : "int",
-  "bitWidth" : /* integer */,
-  "isSigned" : /* boolean */
-}
-```
-FloatingPoint:
-```
-{
-  "name" : "floatingpoint",
-  "precision" : "HALF|SINGLE|DOUBLE"
-}
-```
-Decimal:
-```
-{
-  "name" : "decimal",
-  "precision" : /* integer */,
-  "scale" : /* integer */
-}
-```
-
-Timestamp:
-
-```
-{
-  "name" : "timestamp",
-  "unit" : "SECOND|MILLISECOND|MICROSECOND|NANOSECOND"
-}
-```
-
-Date:
-
-```
-{
-  "name" : "date",
-  "unit" : "DAY|MILLISECOND"
-}
-```
-
-Time:
-
-```
-{
-  "name" : "time",
-  "unit" : "SECOND|MILLISECOND|MICROSECOND|NANOSECOND",
-  "bitWidth": /* integer: 32 or 64 */
-}
-```
-
-Interval:
-
-```
-{
-  "name" : "interval",
-  "unit" : "YEAR_MONTH|DAY_TIME"
-}
-```
-Schema:
-```
-{
-  "fields" : [
-    /* Field */
-  ]
-}
-```
-
-## Record data headers
-
-A record batch is a collection of top-level named, equal length Arrow arrays
-(or vectors). If one of the arrays contains nested data, its child arrays are
-not required to be the same length as the top-level arrays.
-
-One can be thought of as a realization of a particular schema. The metadata
-describing a particular record batch is called a "data header". Here is the
-Flatbuffers IDL for a record batch data header
-
-```
-table RecordBatch {
-  length: long;
-  nodes: [FieldNode];
-  buffers: [Buffer];
-}
-```
-
-The `RecordBatch` metadata provides for record batches with length exceeding
-2^31 - 1, but Arrow implementations are not required to implement support
-beyond this size.
-
-The `nodes` and `buffers` fields are produced by a depth-first traversal /
-flattening of a schema (possibly containing nested types) for a given in-memory
-data set.
-
-### Buffers
-
-A buffer is metadata describing a contiguous memory region relative to some
-virtual address space. This may include:
-
-* Shared memory, e.g. a memory-mapped file
-* An RPC message received in-memory
-* Data in a file
-
-The key form of the Buffer type is:
-
-```
-struct Buffer {
-  offset: long;
-  length: long;
-}
-```
-
-In the context of a record batch, each field has some number of buffers
-associated with it, which are derived from their physical memory layout.
-
-Each logical type (separate from its children, if it is a nested type) has a
-deterministic number of buffers associated with it. These will be specified in
-the logical types section.
-
-### Field metadata
-
-The `FieldNode` values contain metadata about each level in a nested type
-hierarchy.
-
-```
-struct FieldNode {
-  /// The number of value slots in the Arrow array at this level of a nested
-  /// tree
-  length: long;
-
-  /// The number of observed nulls.
-  null_count: lohng;
-}
-```
-
-The `FieldNode` metadata provides for fields with length exceeding 2^31 - 1,
-but Arrow implementations are not required to implement support for large
-arrays.
-
-## Flattening of nested data
-
-Nested types are flattened in the record batch in depth-first order. When
-visiting each field in the nested type tree, the metadata is appended to the
-top-level `fields` array and the buffers associated with that field (but not
-its children) are appended to the `buffers` array.
-
-For example, let's consider the schema
-
-```
-col1: Struct<a: Int32, b: List<Int64>, c: Float64>
-col2: Utf8
-```
-
-The flattened version of this is:
-
-```
-FieldNode 0: Struct name='col1'
-FieldNode 1: Int32 name=a'
-FieldNode 2: List name='b'
-FieldNode 3: Int64 name='item'  # arbitrary
-FieldNode 4: Float64 name='c'
-FieldNode 5: Utf8 name='col2'
-```
-
-For the buffers produced, we would have the following (as described in more
-detail for each type below):
-
-```
-buffer 0: field 0 validity bitmap
-
-buffer 1: field 1 validity bitmap
-buffer 2: field 1 values <int32_t*>
-
-buffer 3: field 2 validity bitmap
-buffer 4: field 2 list offsets <int32_t*>
-
-buffer 5: field 3 validity bitmap
-buffer 6: field 3 values <int64_t*>
-
-buffer 7: field 4 validity bitmap
-buffer 8: field 4 values <double*>
-
-buffer 9: field 5 validity bitmap
-buffer 10: field 5 offsets <int32_t*>
-buffer 11: field 5 data <uint8_t*>
-```
-
-## Logical types
-
-A logical type consists of a type name and metadata along with an explicit
-mapping to a physical memory representation. These may fall into some different
-categories:
-
-* Types represented as fixed-width primitive arrays (for example: C-style
-  integers and floating point numbers)
-* Types having equivalent memory layout to a physical nested type (e.g. strings
-  use the list representation, but logically are not nested types)
-
-### Integers
-
-In the first version of Arrow we provide the standard 8-bit through 64-bit size
-standard C integer types, both signed and unsigned:
-
-* Signed types: Int8, Int16, Int32, Int64
-* Unsigned types: UInt8, UInt16, UInt32, UInt64
-
-The IDL looks like:
-
-```
-table Int {
-  bitWidth: int;
-  is_signed: bool;
-}
-```
-
-The integer endianness is currently set globally at the schema level. If a
-schema is set to be little-endian, then all integer types occurring within must
-be little-endian. Integers that are part of other data representations, such as
-list offsets and union types, must have the same endianness as the entire
-record batch.
-
-### Floating point numbers
-
-We provide 3 types of floating point numbers as fixed bit-width primitive array
-
-- Half precision, 16-bit width
-- Single precision, 32-bit width
-- Double precision, 64-bit width
-
-The IDL looks like:
-
-```
-enum Precision:int {HALF, SINGLE, DOUBLE}
-
-table FloatingPoint {
-  precision: Precision;
-}
-```
-
-### Boolean
-
-The Boolean logical type is represented as a 1-bit wide primitive physical
-type. The bits are numbered using least-significant bit (LSB) ordering.
-
-Like other fixed bit-width primitive types, boolean data appears as 2 buffers
-in the data header (one bitmap for the validity vector and one for the values).
-
-### List
-
-The `List` logical type is the logical (and identically-named) counterpart to
-the List physical type.
-
-In data header form, the list field node contains 2 buffers:
-
-* Validity bitmap
-* List offsets
-
-The buffers associated with a list's child field are handled recursively
-according to the child logical type (e.g. `List<Utf8>` vs. `List<Boolean>`).
-
-### Utf8 and Binary
-
-We specify two logical types for variable length bytes:
-
-* `Utf8` data is Unicode values with UTF-8 encoding
-* `Binary` is any other variable length bytes
-
-These types both have the same memory layout as the nested type `List<UInt8>`,
-with the constraint that the inner bytes can contain no null values. From a
-logical type perspective they are primitive, not nested types.
-
-In data header form, while `List<UInt8>` would appear as 2 field nodes (`List`
-and `UInt8`) and 4 buffers (2 for each of the nodes, as per above), these types
-have a simplified representation single field node (of `Utf8` or `Binary`
-logical type, which have no children) and 3 buffers:
-
-* Validity bitmap
-* List offsets
-* Byte data
-
-### Decimal
-
-Decimals are represented as a 2's complement 128-bit (16 byte) signed integer
-in little-endian byte order.
-
-### Timestamp
-
-All timestamps are stored as a 64-bit integer, with one of four unit
-resolutions: second, millisecond, microsecond, and nanosecond.
-
-### Date
-
-We support two different date types:
-
-* Days since the UNIX epoch as a 32-bit integer
-* Milliseconds since the UNIX epoch as a 64-bit integer
-
-### Time
-
-Time supports the same unit resolutions: second, millisecond, microsecond, and
-nanosecond. We represent time as the smallest integer accommodating the
-indicated unit. For second and millisecond: 32-bit, for the others 64-bit.
-
-## Dictionary encoding
-
-[1]: https://github.com/apache/arrow/blob/master/format/Layout.md
-[2]: http://github.com/google/flatbuffers
-[3]: https://github.com/apache/arrow/blob/master/format/Message.fbs
diff --git a/format/README.md b/format/README.md
deleted file mode 100644
index c87ac2a00d6ea..0000000000000
--- a/format/README.md
+++ /dev/null
@@ -1,53 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-## Arrow specification documents
-
-Currently, the Arrow specification consists of these pieces:
-
-- Metadata specification (see Metadata.md)
-- Physical memory layout specification (see Layout.md)
-- Logical Types, Schemas, and Record Batch Metadata (see Schema.fbs)
-- Encapsulated Messages (see Message.fbs)
-- Mechanics of messaging between Arrow systems (IPC, RPC, etc.) (see IPC.md)
-- Tensor (Multi-dimensional array) Metadata (see Tensor.fbs)
-
-The metadata currently uses Google's [flatbuffers library][1] for serializing a
-couple related pieces of information:
-
-- Schemas for tables or record (row) batches. This contains the logical types,
-  field names, and other metadata. Schemas do not contain any information about
-  actual data.
-- *Data headers* for record (row) batches. These must correspond to a known
-   schema, and enable a system to send and receive Arrow row batches in a form
-   that can be precisely disassembled or reconstructed.
-
-## Arrow Format Maturity and Stability
-
-We have made significant progress hardening the Arrow in-memory format and
-Flatbuffer metadata since the project started in February 2016. We have
-integration tests which verify binary compatibility between the Java and C++
-implementations, for example.
-
-Major versions may still include breaking changes to the memory format or
-metadata, so it is recommended to use the same released version of all
-libraries in your applications for maximum compatibility. Data stored in the
-Arrow IPC formats should not be used for long term storage.
-
-[1]: http://github.com/google/flatbuffers
diff --git a/format/README.rst b/format/README.rst
new file mode 100644
index 0000000000000..0eaad49b7e394
--- /dev/null
+++ b/format/README.rst
@@ -0,0 +1,25 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Arrow Protocol Files
+====================
+
+This folder contains binary protocol definitions for the Arrow columnar format
+and other parts of the project, like the Flight RPC framework.
+
+For documentation about the Arrow format, see the `docs/source/format`
+directory.
diff --git a/format/Tensor.fbs b/format/Tensor.fbs
index 18b614c3bde62..e77b353a0f33f 100644
--- a/format/Tensor.fbs
+++ b/format/Tensor.fbs
@@ -23,6 +23,9 @@ include "Schema.fbs";
 
 namespace org.apache.arrow.flatbuf;
 
+/// ----------------------------------------------------------------------
+/// Data structures for dense tensors
+
 /// Shape data for a single axis in a tensor
 table TensorDim {
   /// Length of dimension
@@ -48,3 +51,96 @@ table Tensor {
 }
 
 root_type Tensor;
+
+/// ----------------------------------------------------------------------
+/// EXPERIMENTAL: Data structures for sparse tensors
+
+/// Coodinate format of sparse tensor index.
+table SparseTensorIndexCOO {
+  /// COO's index list are represented as a NxM matrix,
+  /// where N is the number of non-zero values,
+  /// and M is the number of dimensions of a sparse tensor.
+  /// indicesBuffer stores the location and size of this index matrix.
+  /// The type of index value is long, so the stride for the index matrix is unnecessary.
+  ///
+  /// For example, let X be a 2x3x4x5 tensor, and it has the following 6 non-zero values:
+  ///
+  ///   X[0, 1, 2, 0] := 1
+  ///   X[1, 1, 2, 3] := 2
+  ///   X[0, 2, 1, 0] := 3
+  ///   X[0, 1, 3, 0] := 4
+  ///   X[0, 1, 2, 1] := 5
+  ///   X[1, 2, 0, 4] := 6
+  ///
+  /// In COO format, the index matrix of X is the following 4x6 matrix:
+  ///
+  ///   [[0, 0, 0, 0, 1, 1],
+  ///    [1, 1, 1, 2, 1, 2],
+  ///    [2, 2, 3, 1, 2, 0],
+  ///    [0, 1, 0, 0, 3, 4]]
+  ///
+  /// Note that the indices are sorted in lexcographical order.
+  indicesBuffer: Buffer;
+}
+
+/// Compressed Sparse Row format, that is matrix-specific.
+table SparseMatrixIndexCSR {
+  /// indptrBuffer stores the location and size of indptr array that
+  /// represents the range of the rows.
+  /// The i-th row spans from indptr[i] to indptr[i+1] in the data.
+  /// The length of this array is 1 + (the number of rows), and the type
+  /// of index value is long.
+  ///
+  /// For example, let X be the following 6x4 matrix:
+  ///
+  ///   X := [[0, 1, 2, 0],
+  ///         [0, 0, 3, 0],
+  ///         [0, 4, 0, 5],
+  ///         [0, 0, 0, 0],
+  ///         [6, 0, 7, 8],
+  ///         [0, 9, 0, 0]].
+  ///
+  /// The array of non-zero values in X is:
+  ///
+  ///   values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
+  ///
+  /// And the indptr of X is:
+  ///
+  ///   indptr(X) = [0, 2, 3, 5, 5, 8, 10].
+  indptrBuffer: Buffer;
+
+  /// indicesBuffer stores the location and size of the array that
+  /// contains the column indices of the corresponding non-zero values.
+  /// The type of index value is long.
+  ///
+  /// For example, the indices of the above X is:
+  ///
+  ///   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
+  indicesBuffer: Buffer;
+}
+
+union SparseTensorIndex {
+  SparseTensorIndexCOO,
+  SparseMatrixIndexCSR
+}
+
+table SparseTensor {
+  /// The type of data contained in a value cell.
+  /// Currently only fixed-width value types are supported,
+  /// no strings or nested types.
+  type: Type;
+
+  /// The dimensions of the tensor, optionally named.
+  shape: [TensorDim];
+
+  /// The number of non-zero values in a sparse tensor.
+  non_zero_length: long;
+
+  /// Sparse tensor index
+  sparseIndex: SparseTensorIndex;
+
+  /// The location and size of the tensor's data
+  data: Buffer;
+}
+
+root_type SparseTensor;
diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go
index b188dcd68c729..ef37aef42f602 100644
--- a/go/arrow/array/array.go
+++ b/go/arrow/array/array.go
@@ -180,8 +180,8 @@ func init() {
 		arrow.STRING:            func(data *Data) Interface { return NewStringData(data) },
 		arrow.BINARY:            func(data *Data) Interface { return NewBinaryData(data) },
 		arrow.FIXED_SIZE_BINARY: func(data *Data) Interface { return NewFixedSizeBinaryData(data) },
-		arrow.DATE32:            unsupportedArrayType,
-		arrow.DATE64:            unsupportedArrayType,
+		arrow.DATE32:            func(data *Data) Interface { return NewDate32Data(data) },
+		arrow.DATE64:            func(data *Data) Interface { return NewDate64Data(data) },
 		arrow.TIMESTAMP:         func(data *Data) Interface { return NewTimestampData(data) },
 		arrow.TIME32:            func(data *Data) Interface { return NewTime32Data(data) },
 		arrow.TIME64:            func(data *Data) Interface { return NewTime64Data(data) },
diff --git a/go/arrow/array/binary.go b/go/arrow/array/binary.go
index 0b89b7e5817cc..850fb09b4a81a 100644
--- a/go/arrow/array/binary.go
+++ b/go/arrow/array/binary.go
@@ -38,7 +38,13 @@ func NewBinaryData(data *Data) *Binary {
 }
 
 // Value returns the slice at index i. This value should not be mutated.
-func (a *Binary) Value(i int) []byte { return a.valueBytes[a.valueOffsets[i]:a.valueOffsets[i+1]] }
+func (a *Binary) Value(i int) []byte {
+	if i < 0 || i >= a.array.data.length {
+		panic("arrow/array: index out of range")
+	}
+	idx := a.array.data.offset + i
+	return a.valueBytes[a.valueOffsets[idx]:a.valueOffsets[idx+1]]
+}
 
 // ValueString returns the string at index i without performing additional allocations.
 // The string is only valid for the lifetime of the Binary array.
@@ -47,10 +53,32 @@ func (a *Binary) ValueString(i int) string {
 	return *(*string)(unsafe.Pointer(&b))
 }
 
-func (a *Binary) ValueOffset(i int) int { return int(a.valueOffsets[i]) }
-func (a *Binary) ValueLen(i int) int    { return int(a.valueOffsets[i+1] - a.valueOffsets[i]) }
-func (a *Binary) ValueOffsets() []int32 { return a.valueOffsets }
-func (a *Binary) ValueBytes() []byte    { return a.valueBytes }
+func (a *Binary) ValueOffset(i int) int {
+	if i < 0 || i >= a.array.data.length {
+		panic("arrow/array: index out of range")
+	}
+	return int(a.valueOffsets[a.array.data.offset+i])
+}
+
+func (a *Binary) ValueLen(i int) int {
+	if i < 0 || i >= a.array.data.length {
+		panic("arrow/array: index out of range")
+	}
+	beg := a.array.data.offset + i
+	return int(a.valueOffsets[beg+1] - a.valueOffsets[beg])
+}
+
+func (a *Binary) ValueOffsets() []int32 {
+	beg := a.array.data.offset
+	end := beg + a.array.data.length + 1
+	return a.valueOffsets[beg:end]
+}
+
+func (a *Binary) ValueBytes() []byte {
+	beg := a.array.data.offset
+	end := beg + a.array.data.length
+	return a.valueBytes[a.valueOffsets[beg]:a.valueOffsets[end]]
+}
 
 func (a *Binary) setData(data *Data) {
 	if len(data.buffers) != 3 {
diff --git a/go/arrow/array/binary_test.go b/go/arrow/array/binary_test.go
index 87d1b58c47d14..2af45dee60f76 100644
--- a/go/arrow/array/binary_test.go
+++ b/go/arrow/array/binary_test.go
@@ -17,6 +17,7 @@
 package array
 
 import (
+	"reflect"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -62,3 +63,345 @@ func TestBinary(t *testing.T) {
 
 	b.Release()
 }
+
+func TestBinarySliceData(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	values := []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"}
+
+	b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+	defer b.Release()
+
+	for _, v := range values {
+		b.AppendString(v)
+	}
+
+	arr := b.NewArray().(*Binary)
+	defer arr.Release()
+
+	if got, want := arr.Len(), len(values); got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	vs := make([]string, arr.Len())
+
+	for i := range vs {
+		vs[i] = arr.ValueString(i)
+	}
+
+	if got, want := vs, values; !reflect.DeepEqual(got, want) {
+		t.Fatalf("got=%v, want=%v", got, want)
+	}
+
+	tests := []struct {
+		interval [2]int64
+		want     []string
+	}{
+		{
+			interval: [2]int64{0, 0},
+			want:     []string{},
+		},
+		{
+			interval: [2]int64{0, 5},
+			want:     []string{"a", "bc", "def", "g", "hijk"},
+		},
+		{
+			interval: [2]int64{0, 10},
+			want:     []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"},
+		},
+		{
+			interval: [2]int64{5, 10},
+			want:     []string{"lm", "n", "opq", "rs", "tu"},
+		},
+		{
+			interval: [2]int64{10, 10},
+			want:     []string{},
+		},
+		{
+			interval: [2]int64{2, 7},
+			want:     []string{"def", "g", "hijk", "lm", "n"},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run("", func(t *testing.T) {
+
+			slice := NewSlice(arr, tc.interval[0], tc.interval[1]).(*Binary)
+			defer slice.Release()
+
+			if got, want := slice.Len(), len(tc.want); got != want {
+				t.Fatalf("got=%d, want=%d", got, want)
+			}
+
+			vs := make([]string, slice.Len())
+
+			for i := range vs {
+				vs[i] = slice.ValueString(i)
+			}
+
+			if got, want := vs, tc.want; !reflect.DeepEqual(got, want) {
+				t.Fatalf("got=%v, want=%v", got, want)
+			}
+		})
+	}
+}
+
+func TestBinarySliceDataWithNull(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"}
+	valids := []bool{true, true, false, false, true, true, true, true, false, true}
+
+	b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+	defer b.Release()
+
+	b.AppendStringValues(values, valids)
+
+	arr := b.NewArray().(*Binary)
+	defer arr.Release()
+
+	if got, want := arr.Len(), len(values); got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	if got, want := arr.NullN(), 3; got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	vs := make([]string, arr.Len())
+
+	for i := range vs {
+		vs[i] = arr.ValueString(i)
+	}
+
+	if got, want := vs, values; !reflect.DeepEqual(got, want) {
+		t.Fatalf("got=%v, want=%v", got, want)
+	}
+
+	tests := []struct {
+		interval [2]int64
+		nulls    int
+		want     []string
+	}{
+		{
+			interval: [2]int64{0, 2},
+			nulls:    0,
+			want:     []string{"a", "bc"},
+		},
+		{
+			interval: [2]int64{0, 3},
+			nulls:    1,
+			want:     []string{"a", "bc", ""},
+		},
+		{
+			interval: [2]int64{0, 4},
+			nulls:    2,
+			want:     []string{"a", "bc", "", ""},
+		},
+		{
+			interval: [2]int64{4, 8},
+			nulls:    0,
+			want:     []string{"hijk", "lm", "", "opq"},
+		},
+		{
+			interval: [2]int64{2, 9},
+			nulls:    3,
+			want:     []string{"", "", "hijk", "lm", "", "opq", ""},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run("", func(t *testing.T) {
+
+			slice := NewSlice(arr, tc.interval[0], tc.interval[1]).(*Binary)
+			defer slice.Release()
+
+			if got, want := slice.Len(), len(tc.want); got != want {
+				t.Fatalf("got=%d, want=%d", got, want)
+			}
+
+			if got, want := slice.NullN(), tc.nulls; got != want {
+				t.Errorf("got=%d, want=%d", got, want)
+			}
+
+			vs := make([]string, slice.Len())
+
+			for i := range vs {
+				vs[i] = slice.ValueString(i)
+			}
+
+			if got, want := vs, tc.want; !reflect.DeepEqual(got, want) {
+				t.Fatalf("got=%v, want=%v", got, want)
+			}
+		})
+	}
+}
+
+func TestBinarySliceOutOfBounds(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	values := []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"}
+
+	b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+	defer b.Release()
+
+	for _, v := range values {
+		b.AppendString(v)
+	}
+
+	arr := b.NewArray().(*Binary)
+	defer arr.Release()
+
+	slice := NewSlice(arr, 3, 8).(*Binary)
+	defer slice.Release()
+
+	tests := []struct {
+		index int
+		panic bool
+	}{
+		{
+			index: -1,
+			panic: true,
+		},
+		{
+			index: 5,
+			panic: true,
+		},
+		{
+			index: 0,
+			panic: false,
+		},
+		{
+			index: 4,
+			panic: false,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run("", func(t *testing.T) {
+
+			var val string
+
+			if tc.panic {
+				defer func() {
+					e := recover()
+					if e == nil {
+						t.Fatalf("this should have panicked, but did not; slice value %q", val)
+					}
+					if got, want := e.(string), "arrow/array: index out of range"; got != want {
+						t.Fatalf("invalid error. got=%q, want=%q", got, want)
+					}
+				}()
+			} else {
+				defer func() {
+					if e := recover(); e != nil {
+						t.Fatalf("unexpected panic: %v", e)
+					}
+				}()
+			}
+
+			val = slice.ValueString(tc.index)
+		})
+	}
+}
+
+func TestBinaryValueOffset(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"}
+	valids := []bool{true, true, false, false, true, true, true, true, false, true}
+
+	b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+	defer b.Release()
+
+	b.AppendStringValues(values, valids)
+
+	arr := b.NewArray().(*Binary)
+	defer arr.Release()
+
+	slice := NewSlice(arr, 2, 9).(*Binary)
+	defer slice.Release()
+
+	offset := 3
+	vs := values[2:9]
+
+	for i, v := range vs {
+		assert.Equal(t, offset, slice.ValueOffset(i))
+		offset += len(v)
+	}
+}
+
+func TestBinaryValueLen(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"}
+	valids := []bool{true, true, false, false, true, true, true, true, false, true}
+
+	b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+	defer b.Release()
+
+	b.AppendStringValues(values, valids)
+
+	arr := b.NewArray().(*Binary)
+	defer arr.Release()
+
+	slice := NewSlice(arr, 2, 9).(*Binary)
+	defer slice.Release()
+
+	vs := values[2:9]
+
+	for i, v := range vs {
+		assert.Equal(t, len(v), slice.ValueLen(i))
+	}
+}
+
+func TestBinaryValueOffsets(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"}
+	valids := []bool{true, true, false, false, true, true, true, true, false, true}
+
+	b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+	defer b.Release()
+
+	b.AppendStringValues(values, valids)
+
+	arr := b.NewArray().(*Binary)
+	defer arr.Release()
+
+	assert.Equal(t, []int32{0, 1, 3, 3, 3, 7, 9, 9, 12, 12, 14}, arr.ValueOffsets())
+
+	slice := NewSlice(arr, 2, 9).(*Binary)
+	defer slice.Release()
+
+	assert.Equal(t, []int32{3, 3, 3, 7, 9, 9, 12, 12}, slice.ValueOffsets())
+}
+
+func TestBinaryValueBytes(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"}
+	valids := []bool{true, true, false, false, true, true, true, true, false, true}
+
+	b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+	defer b.Release()
+
+	b.AppendStringValues(values, valids)
+
+	arr := b.NewArray().(*Binary)
+	defer arr.Release()
+
+	assert.Equal(t, []byte{'a', 'b', 'c', 'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q', 't', 'u'}, arr.ValueBytes())
+
+	slice := NewSlice(arr, 2, 9).(*Binary)
+	defer slice.Release()
+
+	assert.Equal(t, []byte{'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q'}, slice.ValueBytes())
+}
diff --git a/go/arrow/array/boolean.go b/go/arrow/array/boolean.go
index 19a692345e357..68de951e0ce8c 100644
--- a/go/arrow/array/boolean.go
+++ b/go/arrow/array/boolean.go
@@ -45,7 +45,12 @@ func NewBooleanData(data *Data) *Boolean {
 	return a
 }
 
-func (a *Boolean) Value(i int) bool { return bitutil.BitIsSet(a.values, i) }
+func (a *Boolean) Value(i int) bool {
+	if i < 0 || i >= a.array.data.length {
+		panic("arrow/array: index out of range")
+	}
+	return bitutil.BitIsSet(a.values, a.array.data.offset+i)
+}
 
 func (a *Boolean) String() string {
 	o := new(strings.Builder)
diff --git a/go/arrow/array/boolean_test.go b/go/arrow/array/boolean_test.go
new file mode 100644
index 0000000000000..e6f4b9bf2bc51
--- /dev/null
+++ b/go/arrow/array/boolean_test.go
@@ -0,0 +1,260 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array_test
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/apache/arrow/go/arrow/array"
+	"github.com/apache/arrow/go/arrow/memory"
+)
+
+func TestBooleanSliceData(t *testing.T) {
+	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer pool.AssertSize(t, 0)
+
+	values := []bool{true, false, true, true, true, true, true, false, true, false}
+
+	b := array.NewBooleanBuilder(pool)
+	defer b.Release()
+
+	for _, v := range values {
+		b.Append(v)
+	}
+
+	arr := b.NewArray().(*array.Boolean)
+	defer arr.Release()
+
+	if got, want := arr.Len(), len(values); got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	vs := make([]bool, arr.Len())
+
+	for i := range vs {
+		vs[i] = arr.Value(i)
+	}
+
+	if got, want := vs, values; !reflect.DeepEqual(got, want) {
+		t.Fatalf("got=%v, want=%v", got, want)
+	}
+
+	tests := []struct {
+		interval [2]int64
+		want     []bool
+	}{
+		{
+			interval: [2]int64{0, 0},
+			want:     []bool{},
+		},
+		{
+			interval: [2]int64{10, 10},
+			want:     []bool{},
+		},
+		{
+			interval: [2]int64{0, 5},
+			want:     []bool{true, false, true, true, true},
+		},
+		{
+			interval: [2]int64{5, 10},
+			want:     []bool{true, true, false, true, false},
+		},
+		{
+			interval: [2]int64{2, 7},
+			want:     []bool{true, true, true, true, true},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run("", func(t *testing.T) {
+
+			slice := array.NewSlice(arr, tc.interval[0], tc.interval[1]).(*array.Boolean)
+			defer slice.Release()
+
+			if got, want := slice.Len(), len(tc.want); got != want {
+				t.Fatalf("got=%d, want=%d", got, want)
+			}
+
+			vs := make([]bool, slice.Len())
+
+			for i := range vs {
+				vs[i] = slice.Value(i)
+			}
+
+			if got, want := vs, tc.want; !reflect.DeepEqual(got, want) {
+				t.Fatalf("got=%v, want=%v", got, want)
+			}
+		})
+	}
+}
+
+func TestBooleanSliceDataWithNull(t *testing.T) {
+	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer pool.AssertSize(t, 0)
+
+	values := []bool{true, false, true, false, false, false, true, false, true, false}
+	valids := []bool{true, false, true, true, true, true, true, false, true, true}
+
+	b := array.NewBooleanBuilder(pool)
+	defer b.Release()
+
+	b.AppendValues(values, valids)
+
+	arr := b.NewArray().(*array.Boolean)
+	defer arr.Release()
+
+	if got, want := arr.Len(), len(valids); got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	if got, want := arr.NullN(), 2; got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	vs := make([]bool, arr.Len())
+
+	for i := range vs {
+		vs[i] = arr.Value(i)
+	}
+
+	if got, want := vs, values; !reflect.DeepEqual(got, want) {
+		t.Fatalf("got=%v, want=%v", got, want)
+	}
+
+	tests := []struct {
+		interval [2]int64
+		nulls    int
+		want     []bool
+	}{
+		{
+			interval: [2]int64{2, 9},
+			nulls:    1,
+			want:     []bool{true, false, false, false, true, false, true},
+		},
+		{
+			interval: [2]int64{0, 7},
+			nulls:    1,
+			want:     []bool{true, false, true, false, false, false, true},
+		},
+		{
+			interval: [2]int64{1, 8},
+			nulls:    2,
+			want:     []bool{false, true, false, false, false, true, false},
+		},
+		{
+			interval: [2]int64{2, 7},
+			nulls:    0,
+			want:     []bool{true, false, false, false, true},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run("", func(t *testing.T) {
+
+			slice := array.NewSlice(arr, tc.interval[0], tc.interval[1]).(*array.Boolean)
+			defer slice.Release()
+
+			if got, want := slice.NullN(), tc.nulls; got != want {
+				t.Errorf("got=%d, want=%d", got, want)
+			}
+
+			if got, want := slice.Len(), len(tc.want); got != want {
+				t.Fatalf("got=%d, want=%d", got, want)
+			}
+
+			vs := make([]bool, slice.Len())
+
+			for i := range vs {
+				vs[i] = slice.Value(i)
+			}
+
+			if got, want := vs, tc.want; !reflect.DeepEqual(got, want) {
+				t.Fatalf("got=%v, want=%v", got, want)
+			}
+		})
+	}
+}
+
+func TestBooleanSliceOutOfBounds(t *testing.T) {
+	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer pool.AssertSize(t, 0)
+
+	values := []bool{true, false, true, false, true, false, true, false, true, false}
+
+	b := array.NewBooleanBuilder(pool)
+	defer b.Release()
+
+	for _, v := range values {
+		b.Append(v)
+	}
+
+	arr := b.NewArray().(*array.Boolean)
+	defer arr.Release()
+
+	slice := array.NewSlice(arr, 3, 8).(*array.Boolean)
+	defer slice.Release()
+
+	tests := []struct {
+		index int
+		panic bool
+	}{
+		{
+			index: -1,
+			panic: true,
+		},
+		{
+			index: 5,
+			panic: true,
+		},
+		{
+			index: 0,
+			panic: false,
+		},
+		{
+			index: 4,
+			panic: false,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run("", func(t *testing.T) {
+
+			var val bool
+
+			if tc.panic {
+				defer func() {
+					e := recover()
+					if e == nil {
+						t.Fatalf("this should have panicked, but did not; slice value %v", val)
+					}
+					if got, want := e.(string), "arrow/array: index out of range"; got != want {
+						t.Fatalf("invalid error. got=%q, want=%q", got, want)
+					}
+				}()
+			} else {
+				defer func() {
+					if e := recover(); e != nil {
+						t.Fatalf("unexpected panic: %v", e)
+					}
+				}()
+			}
+
+			val = slice.Value(tc.index)
+		})
+	}
+}
diff --git a/go/arrow/array/numeric.gen.go b/go/arrow/array/numeric.gen.go
index 1f734c05127b4..1fb8257d940c4 100644
--- a/go/arrow/array/numeric.gen.go
+++ b/go/arrow/array/numeric.gen.go
@@ -609,3 +609,93 @@ func (a *Time64) setData(data *Data) {
 		a.values = a.values[beg:end]
 	}
 }
+
+// A type which represents an immutable sequence of arrow.Date32 values.
+type Date32 struct {
+	array
+	values []arrow.Date32
+}
+
+func NewDate32Data(data *Data) *Date32 {
+	a := &Date32{}
+	a.refCount = 1
+	a.setData(data)
+	return a
+}
+
+func (a *Date32) Value(i int) arrow.Date32     { return a.values[i] }
+func (a *Date32) Date32Values() []arrow.Date32 { return a.values }
+
+func (a *Date32) String() string {
+	o := new(strings.Builder)
+	o.WriteString("[")
+	for i, v := range a.values {
+		if i > 0 {
+			fmt.Fprintf(o, " ")
+		}
+		switch {
+		case a.IsNull(i):
+			o.WriteString("(null)")
+		default:
+			fmt.Fprintf(o, "%v", v)
+		}
+	}
+	o.WriteString("]")
+	return o.String()
+}
+
+func (a *Date32) setData(data *Data) {
+	a.array.setData(data)
+	vals := data.buffers[1]
+	if vals != nil {
+		a.values = arrow.Date32Traits.CastFromBytes(vals.Bytes())
+		beg := a.array.data.offset
+		end := beg + a.array.data.length
+		a.values = a.values[beg:end]
+	}
+}
+
+// A type which represents an immutable sequence of arrow.Date64 values.
+type Date64 struct {
+	array
+	values []arrow.Date64
+}
+
+func NewDate64Data(data *Data) *Date64 {
+	a := &Date64{}
+	a.refCount = 1
+	a.setData(data)
+	return a
+}
+
+func (a *Date64) Value(i int) arrow.Date64     { return a.values[i] }
+func (a *Date64) Date64Values() []arrow.Date64 { return a.values }
+
+func (a *Date64) String() string {
+	o := new(strings.Builder)
+	o.WriteString("[")
+	for i, v := range a.values {
+		if i > 0 {
+			fmt.Fprintf(o, " ")
+		}
+		switch {
+		case a.IsNull(i):
+			o.WriteString("(null)")
+		default:
+			fmt.Fprintf(o, "%v", v)
+		}
+	}
+	o.WriteString("]")
+	return o.String()
+}
+
+func (a *Date64) setData(data *Data) {
+	a.array.setData(data)
+	vals := data.buffers[1]
+	if vals != nil {
+		a.values = arrow.Date64Traits.CastFromBytes(vals.Bytes())
+		beg := a.array.data.offset
+		end := beg + a.array.data.length
+		a.values = a.values[beg:end]
+	}
+}
diff --git a/go/arrow/array/numeric_test.go b/go/arrow/array/numeric_test.go
index 9e8267a70de6c..fc7f04addbe0d 100644
--- a/go/arrow/array/numeric_test.go
+++ b/go/arrow/array/numeric_test.go
@@ -394,3 +394,223 @@ func TestTime64SliceDataWithNull(t *testing.T) {
 		t.Fatalf("got=%v, want=%v", got, want)
 	}
 }
+
+func TestNewDate32Data(t *testing.T) {
+	exp := []arrow.Date32{1, 2, 4, 8, 16}
+
+	dtype := &arrow.Date32Type{}
+	ad := array.NewData(
+		dtype, len(exp),
+		[]*memory.Buffer{nil, memory.NewBufferBytes(arrow.Date32Traits.CastToBytes(exp))},
+		nil, 0, 0,
+	)
+	fa := array.NewDate32Data(ad)
+
+	assert.Equal(t, len(exp), fa.Len(), "unexpected Len()")
+	assert.Equal(t, exp, fa.Date32Values(), "unexpected Date32Values()")
+}
+
+func TestDate32SliceData(t *testing.T) {
+	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer pool.AssertSize(t, 0)
+
+	const (
+		beg = 2
+		end = 4
+	)
+
+	var (
+		vs  = []arrow.Date32{1, 2, 3, 4, 5}
+		sub = vs[beg:end]
+	)
+
+	b := array.NewDate32Builder(pool)
+	defer b.Release()
+
+	for _, v := range vs {
+		b.Append(v)
+	}
+
+	arr := b.NewArray().(*array.Date32)
+	defer arr.Release()
+
+	if got, want := arr.Len(), len(vs); got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	if got, want := arr.Date32Values(), vs; !reflect.DeepEqual(got, want) {
+		t.Fatalf("got=%v, want=%v", got, want)
+	}
+
+	slice := array.NewSlice(arr, beg, end).(*array.Date32)
+	defer slice.Release()
+
+	if got, want := slice.Len(), len(sub); got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	if got, want := slice.Date32Values(), sub; !reflect.DeepEqual(got, want) {
+		t.Fatalf("got=%v, want=%v", got, want)
+	}
+}
+
+func TestDate32SliceDataWithNull(t *testing.T) {
+	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer pool.AssertSize(t, 0)
+
+	const (
+		beg = 2
+		end = 5
+	)
+
+	var (
+		valids = []bool{true, true, true, false, true, true}
+		vs     = []arrow.Date32{1, 2, 3, 0, 4, 5}
+		sub    = vs[beg:end]
+	)
+
+	b := array.NewDate32Builder(pool)
+	defer b.Release()
+
+	b.AppendValues(vs, valids)
+
+	arr := b.NewArray().(*array.Date32)
+	defer arr.Release()
+
+	if got, want := arr.Len(), len(valids); got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	if got, want := arr.NullN(), 1; got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	if got, want := arr.Date32Values(), vs; !reflect.DeepEqual(got, want) {
+		t.Fatalf("got=%v, want=%v", got, want)
+	}
+
+	slice := array.NewSlice(arr, beg, end).(*array.Date32)
+	defer slice.Release()
+
+	if got, want := slice.NullN(), 1; got != want {
+		t.Errorf("got=%d, want=%d", got, want)
+	}
+
+	if got, want := slice.Len(), len(sub); got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	if got, want := slice.Date32Values(), sub; !reflect.DeepEqual(got, want) {
+		t.Fatalf("got=%v, want=%v", got, want)
+	}
+}
+
+func TestNewDate64Data(t *testing.T) {
+	exp := []arrow.Date64{1, 2, 4, 8, 16}
+
+	dtype := &arrow.Date64Type{}
+	ad := array.NewData(
+		dtype, len(exp),
+		[]*memory.Buffer{nil, memory.NewBufferBytes(arrow.Date64Traits.CastToBytes(exp))},
+		nil, 0, 0,
+	)
+	fa := array.NewDate64Data(ad)
+
+	assert.Equal(t, len(exp), fa.Len(), "unexpected Len()")
+	assert.Equal(t, exp, fa.Date64Values(), "unexpected Date64Values()")
+}
+
+func TestDate64SliceData(t *testing.T) {
+	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer pool.AssertSize(t, 0)
+
+	const (
+		beg = 2
+		end = 4
+	)
+
+	var (
+		vs  = []arrow.Date64{1, 2, 3, 4, 5}
+		sub = vs[beg:end]
+	)
+
+	b := array.NewDate64Builder(pool)
+	defer b.Release()
+
+	for _, v := range vs {
+		b.Append(v)
+	}
+
+	arr := b.NewArray().(*array.Date64)
+	defer arr.Release()
+
+	if got, want := arr.Len(), len(vs); got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	if got, want := arr.Date64Values(), vs; !reflect.DeepEqual(got, want) {
+		t.Fatalf("got=%v, want=%v", got, want)
+	}
+
+	slice := array.NewSlice(arr, beg, end).(*array.Date64)
+	defer slice.Release()
+
+	if got, want := slice.Len(), len(sub); got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	if got, want := slice.Date64Values(), sub; !reflect.DeepEqual(got, want) {
+		t.Fatalf("got=%v, want=%v", got, want)
+	}
+}
+
+func TestDate64SliceDataWithNull(t *testing.T) {
+	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer pool.AssertSize(t, 0)
+
+	const (
+		beg = 2
+		end = 5
+	)
+
+	var (
+		valids = []bool{true, true, true, false, true, true}
+		vs     = []arrow.Date64{1, 2, 3, 0, 4, 5}
+		sub    = vs[beg:end]
+	)
+
+	b := array.NewDate64Builder(pool)
+	defer b.Release()
+
+	b.AppendValues(vs, valids)
+
+	arr := b.NewArray().(*array.Date64)
+	defer arr.Release()
+
+	if got, want := arr.Len(), len(valids); got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	if got, want := arr.NullN(), 1; got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	if got, want := arr.Date64Values(), vs; !reflect.DeepEqual(got, want) {
+		t.Fatalf("got=%v, want=%v", got, want)
+	}
+
+	slice := array.NewSlice(arr, beg, end).(*array.Date64)
+	defer slice.Release()
+
+	if got, want := slice.NullN(), 1; got != want {
+		t.Errorf("got=%d, want=%d", got, want)
+	}
+
+	if got, want := slice.Len(), len(sub); got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	if got, want := slice.Date64Values(), sub; !reflect.DeepEqual(got, want) {
+		t.Fatalf("got=%v, want=%v", got, want)
+	}
+}
diff --git a/go/arrow/array/numericbuilder.gen.go b/go/arrow/array/numericbuilder.gen.go
index 3a7dc167f15aa..946c5ba74aaeb 100644
--- a/go/arrow/array/numericbuilder.gen.go
+++ b/go/arrow/array/numericbuilder.gen.go
@@ -1772,6 +1772,274 @@ func (b *Time64Builder) newData() (data *Data) {
 	return
 }
 
+type Date32Builder struct {
+	builder
+
+	data    *memory.Buffer
+	rawData []arrow.Date32
+}
+
+func NewDate32Builder(mem memory.Allocator) *Date32Builder {
+	return &Date32Builder{builder: builder{refCount: 1, mem: mem}}
+}
+
+// Release decreases the reference count by 1.
+// When the reference count goes to zero, the memory is freed.
+func (b *Date32Builder) Release() {
+	debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases")
+
+	if atomic.AddInt64(&b.refCount, -1) == 0 {
+		if b.nullBitmap != nil {
+			b.nullBitmap.Release()
+			b.nullBitmap = nil
+		}
+		if b.data != nil {
+			b.data.Release()
+			b.data = nil
+			b.rawData = nil
+		}
+	}
+}
+
+func (b *Date32Builder) Append(v arrow.Date32) {
+	b.Reserve(1)
+	b.UnsafeAppend(v)
+}
+
+func (b *Date32Builder) AppendNull() {
+	b.Reserve(1)
+	b.UnsafeAppendBoolToBitmap(false)
+}
+
+func (b *Date32Builder) UnsafeAppend(v arrow.Date32) {
+	bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
+	b.rawData[b.length] = v
+	b.length++
+}
+
+func (b *Date32Builder) UnsafeAppendBoolToBitmap(isValid bool) {
+	if isValid {
+		bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
+	} else {
+		b.nulls++
+	}
+	b.length++
+}
+
+// AppendValues will append the values in the v slice. The valid slice determines which values
+// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty,
+// all values in v are appended and considered valid.
+func (b *Date32Builder) AppendValues(v []arrow.Date32, valid []bool) {
+	if len(v) != len(valid) && len(valid) != 0 {
+		panic("len(v) != len(valid) && len(valid) != 0")
+	}
+
+	b.Reserve(len(v))
+	if len(v) > 0 {
+		arrow.Date32Traits.Copy(b.rawData[b.length:], v)
+	}
+	b.builder.unsafeAppendBoolsToBitmap(valid, len(v))
+}
+
+func (b *Date32Builder) init(capacity int) {
+	b.builder.init(capacity)
+
+	b.data = memory.NewResizableBuffer(b.mem)
+	bytesN := arrow.Date32Traits.BytesRequired(capacity)
+	b.data.Resize(bytesN)
+	b.rawData = arrow.Date32Traits.CastFromBytes(b.data.Bytes())
+}
+
+// Reserve ensures there is enough space for appending n elements
+// by checking the capacity and calling Resize if necessary.
+func (b *Date32Builder) Reserve(n int) {
+	b.builder.reserve(n, b.Resize)
+}
+
+// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(),
+// additional memory will be allocated. If n is smaller, the allocated memory may reduced.
+func (b *Date32Builder) Resize(n int) {
+	nBuilder := n
+	if n < minBuilderCapacity {
+		n = minBuilderCapacity
+	}
+
+	if b.capacity == 0 {
+		b.init(n)
+	} else {
+		b.builder.resize(nBuilder, b.init)
+		b.data.Resize(arrow.Date32Traits.BytesRequired(n))
+		b.rawData = arrow.Date32Traits.CastFromBytes(b.data.Bytes())
+	}
+}
+
+// NewArray creates a Date32 array from the memory buffers used by the builder and resets the Date32Builder
+// so it can be used to build a new array.
+func (b *Date32Builder) NewArray() Interface {
+	return b.NewDate32Array()
+}
+
+// NewDate32Array creates a Date32 array from the memory buffers used by the builder and resets the Date32Builder
+// so it can be used to build a new array.
+func (b *Date32Builder) NewDate32Array() (a *Date32) {
+	data := b.newData()
+	a = NewDate32Data(data)
+	data.Release()
+	return
+}
+
+func (b *Date32Builder) newData() (data *Data) {
+	bytesRequired := arrow.Date32Traits.BytesRequired(b.length)
+	if bytesRequired > 0 && bytesRequired < b.data.Len() {
+		// trim buffers
+		b.data.Resize(bytesRequired)
+	}
+	data = NewData(arrow.PrimitiveTypes.Date32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0)
+	b.reset()
+
+	if b.data != nil {
+		b.data.Release()
+		b.data = nil
+		b.rawData = nil
+	}
+
+	return
+}
+
+type Date64Builder struct {
+	builder
+
+	data    *memory.Buffer
+	rawData []arrow.Date64
+}
+
+func NewDate64Builder(mem memory.Allocator) *Date64Builder {
+	return &Date64Builder{builder: builder{refCount: 1, mem: mem}}
+}
+
+// Release decreases the reference count by 1.
+// When the reference count goes to zero, the memory is freed.
+func (b *Date64Builder) Release() {
+	debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases")
+
+	if atomic.AddInt64(&b.refCount, -1) == 0 {
+		if b.nullBitmap != nil {
+			b.nullBitmap.Release()
+			b.nullBitmap = nil
+		}
+		if b.data != nil {
+			b.data.Release()
+			b.data = nil
+			b.rawData = nil
+		}
+	}
+}
+
+func (b *Date64Builder) Append(v arrow.Date64) {
+	b.Reserve(1)
+	b.UnsafeAppend(v)
+}
+
+func (b *Date64Builder) AppendNull() {
+	b.Reserve(1)
+	b.UnsafeAppendBoolToBitmap(false)
+}
+
+func (b *Date64Builder) UnsafeAppend(v arrow.Date64) {
+	bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
+	b.rawData[b.length] = v
+	b.length++
+}
+
+func (b *Date64Builder) UnsafeAppendBoolToBitmap(isValid bool) {
+	if isValid {
+		bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
+	} else {
+		b.nulls++
+	}
+	b.length++
+}
+
+// AppendValues will append the values in the v slice. The valid slice determines which values
+// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty,
+// all values in v are appended and considered valid.
+func (b *Date64Builder) AppendValues(v []arrow.Date64, valid []bool) {
+	if len(v) != len(valid) && len(valid) != 0 {
+		panic("len(v) != len(valid) && len(valid) != 0")
+	}
+
+	b.Reserve(len(v))
+	if len(v) > 0 {
+		arrow.Date64Traits.Copy(b.rawData[b.length:], v)
+	}
+	b.builder.unsafeAppendBoolsToBitmap(valid, len(v))
+}
+
+func (b *Date64Builder) init(capacity int) {
+	b.builder.init(capacity)
+
+	b.data = memory.NewResizableBuffer(b.mem)
+	bytesN := arrow.Date64Traits.BytesRequired(capacity)
+	b.data.Resize(bytesN)
+	b.rawData = arrow.Date64Traits.CastFromBytes(b.data.Bytes())
+}
+
+// Reserve ensures there is enough space for appending n elements
+// by checking the capacity and calling Resize if necessary.
+func (b *Date64Builder) Reserve(n int) {
+	b.builder.reserve(n, b.Resize)
+}
+
+// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(),
+// additional memory will be allocated. If n is smaller, the allocated memory may reduced.
+func (b *Date64Builder) Resize(n int) {
+	nBuilder := n
+	if n < minBuilderCapacity {
+		n = minBuilderCapacity
+	}
+
+	if b.capacity == 0 {
+		b.init(n)
+	} else {
+		b.builder.resize(nBuilder, b.init)
+		b.data.Resize(arrow.Date64Traits.BytesRequired(n))
+		b.rawData = arrow.Date64Traits.CastFromBytes(b.data.Bytes())
+	}
+}
+
+// NewArray creates a Date64 array from the memory buffers used by the builder and resets the Date64Builder
+// so it can be used to build a new array.
+func (b *Date64Builder) NewArray() Interface {
+	return b.NewDate64Array()
+}
+
+// NewDate64Array creates a Date64 array from the memory buffers used by the builder and resets the Date64Builder
+// so it can be used to build a new array.
+func (b *Date64Builder) NewDate64Array() (a *Date64) {
+	data := b.newData()
+	a = NewDate64Data(data)
+	data.Release()
+	return
+}
+
+func (b *Date64Builder) newData() (data *Data) {
+	bytesRequired := arrow.Date64Traits.BytesRequired(b.length)
+	if bytesRequired > 0 && bytesRequired < b.data.Len() {
+		// trim buffers
+		b.data.Resize(bytesRequired)
+	}
+	data = NewData(arrow.PrimitiveTypes.Date64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0)
+	b.reset()
+
+	if b.data != nil {
+		b.data.Release()
+		b.data = nil
+		b.rawData = nil
+	}
+
+	return
+}
+
 var (
 	_ Builder = (*Int64Builder)(nil)
 	_ Builder = (*Uint64Builder)(nil)
@@ -1786,4 +2054,6 @@ var (
 	_ Builder = (*TimestampBuilder)(nil)
 	_ Builder = (*Time32Builder)(nil)
 	_ Builder = (*Time64Builder)(nil)
+	_ Builder = (*Date32Builder)(nil)
+	_ Builder = (*Date64Builder)(nil)
 )
diff --git a/go/arrow/array/numericbuilder_test.go b/go/arrow/array/numericbuilder_test.go
index 65f3c86c2ea35..3bb49a3af7310 100644
--- a/go/arrow/array/numericbuilder_test.go
+++ b/go/arrow/array/numericbuilder_test.go
@@ -362,3 +362,223 @@ func TestTime64Builder_Resize(t *testing.T) {
 
 	ab.Release()
 }
+
+func TestNewDate32Builder(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	ab := array.NewDate32Builder(mem)
+
+	ab.Append(1)
+	ab.Append(2)
+	ab.Append(3)
+	ab.AppendNull()
+	ab.Append(5)
+	ab.Append(6)
+	ab.AppendNull()
+	ab.Append(8)
+	ab.Append(9)
+	ab.Append(10)
+
+	// check state of builder before NewDate32Array
+	assert.Equal(t, 10, ab.Len(), "unexpected Len()")
+	assert.Equal(t, 2, ab.NullN(), "unexpected NullN()")
+
+	a := ab.NewDate32Array()
+
+	// check state of builder after NewDate32Array
+	assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDate32Array did not reset state")
+	assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDate32Array did not reset state")
+	assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDate32Array did not reset state")
+
+	// check state of array
+	assert.Equal(t, 2, a.NullN(), "unexpected null count")
+	assert.Equal(t, []arrow.Date32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Date32Values(), "unexpected Date32Values")
+	assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity
+	assert.Len(t, a.Date32Values(), 10, "unexpected length of Date32Values")
+
+	a.Release()
+
+	ab.Append(7)
+	ab.Append(8)
+
+	a = ab.NewDate32Array()
+
+	assert.Equal(t, 0, a.NullN())
+	assert.Equal(t, []arrow.Date32{7, 8}, a.Date32Values())
+	assert.Len(t, a.Date32Values(), 2)
+
+	a.Release()
+}
+
+func TestDate32Builder_AppendValues(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	ab := array.NewDate32Builder(mem)
+
+	exp := []arrow.Date32{1, 2, 3, 4}
+	ab.AppendValues(exp, nil)
+	a := ab.NewDate32Array()
+	assert.Equal(t, exp, a.Date32Values())
+
+	a.Release()
+	ab.Release()
+}
+
+func TestDate32Builder_Empty(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	ab := array.NewDate32Builder(mem)
+
+	exp := []arrow.Date32{1, 2, 3, 4}
+	ab.AppendValues(exp, nil)
+	a := ab.NewDate32Array()
+	assert.Equal(t, exp, a.Date32Values())
+	a.Release()
+
+	a = ab.NewDate32Array()
+	assert.Zero(t, a.Len())
+	a.Release()
+
+	ab.Release()
+}
+
+func TestDate32Builder_Resize(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	ab := array.NewDate32Builder(mem)
+
+	assert.Equal(t, 0, ab.Cap())
+	assert.Equal(t, 0, ab.Len())
+
+	ab.Reserve(63)
+	assert.Equal(t, 64, ab.Cap())
+	assert.Equal(t, 0, ab.Len())
+
+	for i := 0; i < 63; i++ {
+		ab.Append(0)
+	}
+	assert.Equal(t, 64, ab.Cap())
+	assert.Equal(t, 63, ab.Len())
+
+	ab.Resize(5)
+	assert.Equal(t, 5, ab.Len())
+
+	ab.Resize(32)
+	assert.Equal(t, 5, ab.Len())
+
+	ab.Release()
+}
+
+func TestNewDate64Builder(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	ab := array.NewDate64Builder(mem)
+
+	ab.Append(1)
+	ab.Append(2)
+	ab.Append(3)
+	ab.AppendNull()
+	ab.Append(5)
+	ab.Append(6)
+	ab.AppendNull()
+	ab.Append(8)
+	ab.Append(9)
+	ab.Append(10)
+
+	// check state of builder before NewDate64Array
+	assert.Equal(t, 10, ab.Len(), "unexpected Len()")
+	assert.Equal(t, 2, ab.NullN(), "unexpected NullN()")
+
+	a := ab.NewDate64Array()
+
+	// check state of builder after NewDate64Array
+	assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDate64Array did not reset state")
+	assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDate64Array did not reset state")
+	assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDate64Array did not reset state")
+
+	// check state of array
+	assert.Equal(t, 2, a.NullN(), "unexpected null count")
+	assert.Equal(t, []arrow.Date64{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Date64Values(), "unexpected Date64Values")
+	assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity
+	assert.Len(t, a.Date64Values(), 10, "unexpected length of Date64Values")
+
+	a.Release()
+
+	ab.Append(7)
+	ab.Append(8)
+
+	a = ab.NewDate64Array()
+
+	assert.Equal(t, 0, a.NullN())
+	assert.Equal(t, []arrow.Date64{7, 8}, a.Date64Values())
+	assert.Len(t, a.Date64Values(), 2)
+
+	a.Release()
+}
+
+func TestDate64Builder_AppendValues(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	ab := array.NewDate64Builder(mem)
+
+	exp := []arrow.Date64{1, 2, 3, 4}
+	ab.AppendValues(exp, nil)
+	a := ab.NewDate64Array()
+	assert.Equal(t, exp, a.Date64Values())
+
+	a.Release()
+	ab.Release()
+}
+
+func TestDate64Builder_Empty(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	ab := array.NewDate64Builder(mem)
+
+	exp := []arrow.Date64{1, 2, 3, 4}
+	ab.AppendValues(exp, nil)
+	a := ab.NewDate64Array()
+	assert.Equal(t, exp, a.Date64Values())
+	a.Release()
+
+	a = ab.NewDate64Array()
+	assert.Zero(t, a.Len())
+	a.Release()
+
+	ab.Release()
+}
+
+func TestDate64Builder_Resize(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	ab := array.NewDate64Builder(mem)
+
+	assert.Equal(t, 0, ab.Cap())
+	assert.Equal(t, 0, ab.Len())
+
+	ab.Reserve(63)
+	assert.Equal(t, 64, ab.Cap())
+	assert.Equal(t, 0, ab.Len())
+
+	for i := 0; i < 63; i++ {
+		ab.Append(0)
+	}
+	assert.Equal(t, 64, ab.Cap())
+	assert.Equal(t, 63, ab.Len())
+
+	ab.Resize(5)
+	assert.Equal(t, 5, ab.Len())
+
+	ab.Resize(32)
+	assert.Equal(t, 5, ab.Len())
+
+	ab.Release()
+}
diff --git a/go/arrow/csv/csv.go b/go/arrow/csv/csv.go
index 36f3abd6230de..022c46d8ece74 100644
--- a/go/arrow/csv/csv.go
+++ b/go/arrow/csv/csv.go
@@ -17,8 +17,6 @@
 // Package csv reads CSV files and presents the extracted data as records.
 package csv
 
-// TODO: implement a row chunker to accumulate N rows into the current record.
-
 import (
 	"encoding/csv"
 	"errors"
@@ -61,6 +59,19 @@ func WithAllocator(mem memory.Allocator) Option {
 	}
 }
 
+// WithChunk specifies the chunk size used while parsing CSV files.
+//
+// If n is zero or 1, no chunking will take place and the reader will create
+// one record per row.
+// If n is greater than 1, chunks of n rows will be read.
+// If n is negative, the reader will load the whole CSV file into memory and
+// create one big record with all the rows.
+func WithChunk(n int) Option {
+	return func(r *Reader) {
+		r.chunk = n
+	}
+}
+
 // Reader wraps encoding/csv.Reader and creates array.Records from a schema.
 type Reader struct {
 	r      *csv.Reader
@@ -71,6 +82,10 @@ type Reader struct {
 	cur  array.Record
 	err  error
 
+	chunk int
+	done  bool
+	next  func() bool
+
 	mem memory.Allocator
 }
 
@@ -82,7 +97,8 @@ type Reader struct {
 func NewReader(r io.Reader, schema *arrow.Schema, opts ...Option) *Reader {
 	validate(schema)
 
-	rr := &Reader{r: csv.NewReader(r), schema: schema, refs: 1}
+	rr := &Reader{r: csv.NewReader(r), schema: schema, refs: 1, chunk: 1}
+	rr.r.ReuseRecord = true
 	for _, opt := range opts {
 		opt(rr)
 	}
@@ -93,6 +109,14 @@ func NewReader(r io.Reader, schema *arrow.Schema, opts ...Option) *Reader {
 
 	rr.bld = array.NewRecordBuilder(rr.mem, rr.schema)
 
+	switch {
+	case rr.chunk < 0:
+		rr.next = rr.nextall
+	case rr.chunk > 1:
+		rr.next = rr.nextn
+	default:
+		rr.next = rr.next1
+	}
 	return rr
 }
 
@@ -117,13 +141,20 @@ func (r *Reader) Next() bool {
 		r.cur = nil
 	}
 
-	if r.err != nil {
+	if r.err != nil || r.done {
 		return false
 	}
 
+	return r.next()
+}
+
+// next1 reads one row from the CSV file and creates a single Record
+// from that row.
+func (r *Reader) next1() bool {
 	var recs []string
 	recs, r.err = r.r.Read()
 	if r.err != nil {
+		r.done = true
 		if r.err == io.EOF {
 			r.err = nil
 		}
@@ -132,8 +163,65 @@ func (r *Reader) Next() bool {
 
 	r.validate(recs)
 	r.read(recs)
+	r.cur = r.bld.NewRecord()
 
-	return r.err == nil
+	return true
+}
+
+// nextall reads the whole CSV file into memory and creates one single
+// Record from all the CSV rows.
+func (r *Reader) nextall() bool {
+	defer func() {
+		r.done = true
+	}()
+
+	var (
+		recs [][]string
+	)
+
+	recs, r.err = r.r.ReadAll()
+	if r.err != nil {
+		return false
+	}
+
+	for _, rec := range recs {
+		r.validate(rec)
+		r.read(rec)
+	}
+	r.cur = r.bld.NewRecord()
+
+	return true
+}
+
+// nextn reads n rows from the CSV file, where n is the chunk size, and creates
+// a Record from these rows.
+func (r *Reader) nextn() bool {
+	var (
+		recs []string
+		n    = 0
+	)
+
+	for i := 0; i < r.chunk && !r.done; i++ {
+		recs, r.err = r.r.Read()
+		if r.err != nil {
+			r.done = true
+			break
+		}
+
+		r.validate(recs)
+		r.read(recs)
+		n++
+	}
+
+	if r.err != nil {
+		r.done = true
+		if r.err == io.EOF {
+			r.err = nil
+		}
+	}
+
+	r.cur = r.bld.NewRecord()
+	return n > 0
 }
 
 func (r *Reader) validate(recs []string) {
@@ -193,7 +281,6 @@ func (r *Reader) read(recs []string) {
 			r.bld.Field(i).(*array.StringBuilder).Append(str)
 		}
 	}
-	r.cur = r.bld.NewRecord()
 }
 
 func (r *Reader) readI8(str string) int8 {
diff --git a/go/arrow/csv/csv_test.go b/go/arrow/csv/csv_test.go
index 534e8eabd3a97..97f31cc209c27 100644
--- a/go/arrow/csv/csv_test.go
+++ b/go/arrow/csv/csv_test.go
@@ -20,8 +20,6 @@ import (
 	"bytes"
 	"fmt"
 	"io/ioutil"
-	"log"
-	"os"
 	"testing"
 
 	"github.com/apache/arrow/go/arrow"
@@ -30,17 +28,24 @@ import (
 )
 
 func Example() {
-	f, err := os.Open("testdata/simple.csv")
-	if err != nil {
-		log.Fatal(err)
-	}
-	defer f.Close()
+	f := bytes.NewBufferString(`## a simple set of data: int64;float64;string
+0;0;str-0
+1;1;str-1
+2;2;str-2
+3;3;str-3
+4;4;str-4
+5;5;str-5
+6;6;str-6
+7;7;str-7
+8;8;str-8
+9;9;str-9
+`)
 
 	schema := arrow.NewSchema(
 		[]arrow.Field{
-			arrow.Field{Name: "i64", Type: arrow.PrimitiveTypes.Int64},
-			arrow.Field{Name: "f64", Type: arrow.PrimitiveTypes.Float64},
-			arrow.Field{Name: "str", Type: arrow.BinaryTypes.String},
+			{Name: "i64", Type: arrow.PrimitiveTypes.Int64},
+			{Name: "f64", Type: arrow.PrimitiveTypes.Float64},
+			{Name: "str", Type: arrow.BinaryTypes.String},
 		},
 		nil,
 	)
@@ -89,6 +94,59 @@ func Example() {
 	// rec[2]["str"]: ["str-9"]
 }
 
+func Example_withChunk() {
+	f := bytes.NewBufferString(`## a simple set of data: int64;float64;string
+0;0;str-0
+1;1;str-1
+2;2;str-2
+3;3;str-3
+4;4;str-4
+5;5;str-5
+6;6;str-6
+7;7;str-7
+8;8;str-8
+9;9;str-9
+`)
+
+	schema := arrow.NewSchema(
+		[]arrow.Field{
+			{Name: "i64", Type: arrow.PrimitiveTypes.Int64},
+			{Name: "f64", Type: arrow.PrimitiveTypes.Float64},
+			{Name: "str", Type: arrow.BinaryTypes.String},
+		},
+		nil,
+	)
+	r := csv.NewReader(
+		f, schema,
+		csv.WithComment('#'), csv.WithComma(';'),
+		csv.WithChunk(3),
+	)
+	defer r.Release()
+
+	n := 0
+	for r.Next() {
+		rec := r.Record()
+		for i, col := range rec.Columns() {
+			fmt.Printf("rec[%d][%q]: %v\n", i, rec.ColumnName(i), col)
+		}
+		n++
+	}
+
+	// Output:
+	// rec[0]["i64"]: [0 1 2]
+	// rec[1]["f64"]: [0 1 2]
+	// rec[2]["str"]: ["str-0" "str-1" "str-2"]
+	// rec[0]["i64"]: [3 4 5]
+	// rec[1]["f64"]: [3 4 5]
+	// rec[2]["str"]: ["str-3" "str-4" "str-5"]
+	// rec[0]["i64"]: [6 7 8]
+	// rec[1]["f64"]: [6 7 8]
+	// rec[2]["str"]: ["str-6" "str-7" "str-8"]
+	// rec[0]["i64"]: [9]
+	// rec[1]["f64"]: [9]
+	// rec[2]["str"]: ["str-9"]
+}
+
 func TestCSVReader(t *testing.T) {
 	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
 	defer mem.AssertSize(t, 0)
@@ -190,3 +248,318 @@ rec[11]["str"]: ["str-2"]
 		r.Release()
 	}
 }
+
+func TestCSVReaderWithChunk(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	raw, err := ioutil.ReadFile("testdata/simple.csv")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	schema := arrow.NewSchema(
+		[]arrow.Field{
+			arrow.Field{Name: "i64", Type: arrow.PrimitiveTypes.Int64},
+			arrow.Field{Name: "f64", Type: arrow.PrimitiveTypes.Float64},
+			arrow.Field{Name: "str", Type: arrow.BinaryTypes.String},
+		},
+		nil,
+	)
+
+	for _, tc := range []struct {
+		name    string
+		opts    []csv.Option
+		records int
+		want    string
+	}{
+		{
+			name:    "chunk=default",
+			opts:    []csv.Option{csv.WithAllocator(mem), csv.WithComment('#'), csv.WithComma(';')},
+			records: 10,
+			want: `rec[0]["i64"]: [0]
+rec[1]["f64"]: [0]
+rec[2]["str"]: ["str-0"]
+rec[0]["i64"]: [1]
+rec[1]["f64"]: [1]
+rec[2]["str"]: ["str-1"]
+rec[0]["i64"]: [2]
+rec[1]["f64"]: [2]
+rec[2]["str"]: ["str-2"]
+rec[0]["i64"]: [3]
+rec[1]["f64"]: [3]
+rec[2]["str"]: ["str-3"]
+rec[0]["i64"]: [4]
+rec[1]["f64"]: [4]
+rec[2]["str"]: ["str-4"]
+rec[0]["i64"]: [5]
+rec[1]["f64"]: [5]
+rec[2]["str"]: ["str-5"]
+rec[0]["i64"]: [6]
+rec[1]["f64"]: [6]
+rec[2]["str"]: ["str-6"]
+rec[0]["i64"]: [7]
+rec[1]["f64"]: [7]
+rec[2]["str"]: ["str-7"]
+rec[0]["i64"]: [8]
+rec[1]["f64"]: [8]
+rec[2]["str"]: ["str-8"]
+rec[0]["i64"]: [9]
+rec[1]["f64"]: [9]
+rec[2]["str"]: ["str-9"]
+`,
+		},
+		{
+			name: "chunk=0",
+			opts: []csv.Option{
+				csv.WithAllocator(mem), csv.WithComment('#'), csv.WithComma(';'),
+				csv.WithChunk(0),
+			},
+			records: 10,
+			want: `rec[0]["i64"]: [0]
+rec[1]["f64"]: [0]
+rec[2]["str"]: ["str-0"]
+rec[0]["i64"]: [1]
+rec[1]["f64"]: [1]
+rec[2]["str"]: ["str-1"]
+rec[0]["i64"]: [2]
+rec[1]["f64"]: [2]
+rec[2]["str"]: ["str-2"]
+rec[0]["i64"]: [3]
+rec[1]["f64"]: [3]
+rec[2]["str"]: ["str-3"]
+rec[0]["i64"]: [4]
+rec[1]["f64"]: [4]
+rec[2]["str"]: ["str-4"]
+rec[0]["i64"]: [5]
+rec[1]["f64"]: [5]
+rec[2]["str"]: ["str-5"]
+rec[0]["i64"]: [6]
+rec[1]["f64"]: [6]
+rec[2]["str"]: ["str-6"]
+rec[0]["i64"]: [7]
+rec[1]["f64"]: [7]
+rec[2]["str"]: ["str-7"]
+rec[0]["i64"]: [8]
+rec[1]["f64"]: [8]
+rec[2]["str"]: ["str-8"]
+rec[0]["i64"]: [9]
+rec[1]["f64"]: [9]
+rec[2]["str"]: ["str-9"]
+`,
+		},
+		{
+			name: "chunk=1",
+			opts: []csv.Option{
+				csv.WithAllocator(mem), csv.WithComment('#'), csv.WithComma(';'),
+				csv.WithChunk(1),
+			},
+			records: 10,
+			want: `rec[0]["i64"]: [0]
+rec[1]["f64"]: [0]
+rec[2]["str"]: ["str-0"]
+rec[0]["i64"]: [1]
+rec[1]["f64"]: [1]
+rec[2]["str"]: ["str-1"]
+rec[0]["i64"]: [2]
+rec[1]["f64"]: [2]
+rec[2]["str"]: ["str-2"]
+rec[0]["i64"]: [3]
+rec[1]["f64"]: [3]
+rec[2]["str"]: ["str-3"]
+rec[0]["i64"]: [4]
+rec[1]["f64"]: [4]
+rec[2]["str"]: ["str-4"]
+rec[0]["i64"]: [5]
+rec[1]["f64"]: [5]
+rec[2]["str"]: ["str-5"]
+rec[0]["i64"]: [6]
+rec[1]["f64"]: [6]
+rec[2]["str"]: ["str-6"]
+rec[0]["i64"]: [7]
+rec[1]["f64"]: [7]
+rec[2]["str"]: ["str-7"]
+rec[0]["i64"]: [8]
+rec[1]["f64"]: [8]
+rec[2]["str"]: ["str-8"]
+rec[0]["i64"]: [9]
+rec[1]["f64"]: [9]
+rec[2]["str"]: ["str-9"]
+`,
+		},
+		{
+			name: "chunk=3",
+			opts: []csv.Option{
+				csv.WithAllocator(mem), csv.WithComment('#'), csv.WithComma(';'),
+				csv.WithChunk(3),
+			},
+			records: 4,
+			want: `rec[0]["i64"]: [0 1 2]
+rec[1]["f64"]: [0 1 2]
+rec[2]["str"]: ["str-0" "str-1" "str-2"]
+rec[0]["i64"]: [3 4 5]
+rec[1]["f64"]: [3 4 5]
+rec[2]["str"]: ["str-3" "str-4" "str-5"]
+rec[0]["i64"]: [6 7 8]
+rec[1]["f64"]: [6 7 8]
+rec[2]["str"]: ["str-6" "str-7" "str-8"]
+rec[0]["i64"]: [9]
+rec[1]["f64"]: [9]
+rec[2]["str"]: ["str-9"]
+`,
+		},
+		{
+			name: "chunk=6",
+			opts: []csv.Option{
+				csv.WithAllocator(mem), csv.WithComment('#'), csv.WithComma(';'),
+				csv.WithChunk(6),
+			},
+			records: 2,
+			want: `rec[0]["i64"]: [0 1 2 3 4 5]
+rec[1]["f64"]: [0 1 2 3 4 5]
+rec[2]["str"]: ["str-0" "str-1" "str-2" "str-3" "str-4" "str-5"]
+rec[0]["i64"]: [6 7 8 9]
+rec[1]["f64"]: [6 7 8 9]
+rec[2]["str"]: ["str-6" "str-7" "str-8" "str-9"]
+`,
+		},
+		{
+			name: "chunk=10",
+			opts: []csv.Option{
+				csv.WithAllocator(mem), csv.WithComment('#'), csv.WithComma(';'),
+				csv.WithChunk(10),
+			},
+			records: 1,
+			want: `rec[0]["i64"]: [0 1 2 3 4 5 6 7 8 9]
+rec[1]["f64"]: [0 1 2 3 4 5 6 7 8 9]
+rec[2]["str"]: ["str-0" "str-1" "str-2" "str-3" "str-4" "str-5" "str-6" "str-7" "str-8" "str-9"]
+`,
+		},
+		{
+			name: "chunk=11",
+			opts: []csv.Option{
+				csv.WithAllocator(mem), csv.WithComment('#'), csv.WithComma(';'),
+				csv.WithChunk(11),
+			},
+			records: 1,
+			want: `rec[0]["i64"]: [0 1 2 3 4 5 6 7 8 9]
+rec[1]["f64"]: [0 1 2 3 4 5 6 7 8 9]
+rec[2]["str"]: ["str-0" "str-1" "str-2" "str-3" "str-4" "str-5" "str-6" "str-7" "str-8" "str-9"]
+`,
+		},
+		{
+			name: "chunk=-1",
+			opts: []csv.Option{
+				csv.WithAllocator(mem), csv.WithComment('#'), csv.WithComma(';'),
+				csv.WithChunk(-1),
+			},
+			records: 1,
+			want: `rec[0]["i64"]: [0 1 2 3 4 5 6 7 8 9]
+rec[1]["f64"]: [0 1 2 3 4 5 6 7 8 9]
+rec[2]["str"]: ["str-0" "str-1" "str-2" "str-3" "str-4" "str-5" "str-6" "str-7" "str-8" "str-9"]
+`,
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			r := csv.NewReader(bytes.NewReader(raw), schema, tc.opts...)
+
+			defer r.Release()
+
+			r.Retain()
+			r.Release()
+
+			if got, want := r.Schema(), schema; !got.Equal(want) {
+				t.Fatalf("invalid schema: got=%v, want=%v", got, want)
+			}
+
+			out := new(bytes.Buffer)
+
+			n := 0
+			for r.Next() {
+				rec := r.Record()
+				for i, col := range rec.Columns() {
+					fmt.Fprintf(out, "rec[%d][%q]: %v\n", i, rec.ColumnName(i), col)
+				}
+				n++
+			}
+
+			if got, want := n, tc.records; got != want {
+				t.Fatalf("invalid number of records: got=%d, want=%d", got, want)
+			}
+
+			if got, want := out.String(), tc.want; got != want {
+				t.Fatalf("invalid output:\ngot:\n%s\nwant:\n%s\n", got, want)
+			}
+
+			if r.Err() != nil {
+				t.Fatalf("unexpected error: %v", r.Err())
+			}
+		})
+	}
+}
+
+func BenchmarkRead(b *testing.B) {
+	gen := func(rows, cols int) []byte {
+		buf := new(bytes.Buffer)
+		for i := 0; i < rows; i++ {
+			for j := 0; j < cols; j++ {
+				if j > 0 {
+					fmt.Fprintf(buf, ";")
+				}
+				fmt.Fprintf(buf, "%d;%f;str-%d", i, float64(i), i)
+			}
+			fmt.Fprintf(buf, "\n")
+		}
+		return buf.Bytes()
+	}
+
+	for _, rows := range []int{10, 1e2, 1e3, 1e4, 1e5} {
+		for _, cols := range []int{1, 10, 100, 1000} {
+			raw := gen(rows, cols)
+			for _, chunks := range []int{-1, 0, 10, 100, 1000} {
+				b.Run(fmt.Sprintf("rows=%d cols=%d chunks=%d", rows, cols, chunks), func(b *testing.B) {
+					benchRead(b, raw, rows, cols, chunks)
+				})
+			}
+		}
+	}
+}
+
+func benchRead(b *testing.B, raw []byte, rows, cols, chunks int) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(b, 0)
+
+	var fields []arrow.Field
+	for i := 0; i < cols; i++ {
+		fields = append(fields, []arrow.Field{
+			arrow.Field{Name: fmt.Sprintf("i64-%d", i), Type: arrow.PrimitiveTypes.Int64},
+			arrow.Field{Name: fmt.Sprintf("f64-%d", i), Type: arrow.PrimitiveTypes.Float64},
+			arrow.Field{Name: fmt.Sprintf("str-%d", i), Type: arrow.BinaryTypes.String},
+		}...)
+	}
+
+	schema := arrow.NewSchema(fields, nil)
+	chunk := 0
+	if chunks != 0 {
+		chunk = rows / chunks
+	}
+	opts := []csv.Option{
+		csv.WithAllocator(mem), csv.WithComment('#'), csv.WithComma(';'),
+		csv.WithChunk(chunk),
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		r := csv.NewReader(bytes.NewReader(raw), schema, opts...)
+
+		n := int64(0)
+		for r.Next() {
+			n += r.Record().NumRows()
+		}
+
+		r.Release()
+		if n != int64(rows) {
+			b.Fatalf("invalid number of rows. want=%d, got=%d", n, rows)
+		}
+	}
+}
diff --git a/go/arrow/datatype_fixedwidth.go b/go/arrow/datatype_fixedwidth.go
index 60cc98a4b97d9..444495058a591 100644
--- a/go/arrow/datatype_fixedwidth.go
+++ b/go/arrow/datatype_fixedwidth.go
@@ -37,6 +37,8 @@ type (
 	Time32    int32
 	Time64    int64
 	TimeUnit  int
+	Date32    int32
+	Date64    int64
 )
 
 const (
diff --git a/go/arrow/datatype_numeric.gen.go b/go/arrow/datatype_numeric.gen.go
index 2ec4c4098a4a6..9b5dc835b1ea2 100644
--- a/go/arrow/datatype_numeric.gen.go
+++ b/go/arrow/datatype_numeric.gen.go
@@ -78,6 +78,18 @@ func (t *Float64Type) ID() Type      { return FLOAT64 }
 func (t *Float64Type) Name() string  { return "float64" }
 func (t *Float64Type) BitWidth() int { return 64 }
 
+type Date32Type struct{}
+
+func (t *Date32Type) ID() Type      { return DATE32 }
+func (t *Date32Type) Name() string  { return "date32" }
+func (t *Date32Type) BitWidth() int { return 32 }
+
+type Date64Type struct{}
+
+func (t *Date64Type) ID() Type      { return DATE64 }
+func (t *Date64Type) Name() string  { return "date64" }
+func (t *Date64Type) BitWidth() int { return 64 }
+
 var (
 	PrimitiveTypes = struct {
 		Int8    DataType
@@ -90,6 +102,8 @@ var (
 		Uint64  DataType
 		Float32 DataType
 		Float64 DataType
+		Date32  DataType
+		Date64  DataType
 	}{
 
 		Int8:    &Int8Type{},
@@ -102,5 +116,7 @@ var (
 		Uint64:  &Uint64Type{},
 		Float32: &Float32Type{},
 		Float64: &Float64Type{},
+		Date32:  &Date32Type{},
+		Date64:  &Date64Type{},
 	}
 )
diff --git a/go/arrow/datatype_numeric.gen.go.tmpldata b/go/arrow/datatype_numeric.gen.go.tmpldata
index 415b51b2e16bd..9badc6ee2b211 100644
--- a/go/arrow/datatype_numeric.gen.go.tmpldata
+++ b/go/arrow/datatype_numeric.gen.go.tmpldata
@@ -48,5 +48,15 @@
     "Name": "Float64",
     "Type": "float64",
     "Size": 64
+  },
+  {
+    "Name": "Date32",
+    "Type": "date32",
+    "Size": 32
+  },
+  {
+    "Name": "Date64",
+    "Type": "date64",
+    "Size": 64
   }
 ]
diff --git a/go/arrow/numeric.tmpldata b/go/arrow/numeric.tmpldata
index b9e976eea0534..45452ab4468c6 100644
--- a/go/arrow/numeric.tmpldata
+++ b/go/arrow/numeric.tmpldata
@@ -107,5 +107,23 @@
     "Opt": {
       "Parametric": true
     }
+  },
+  {
+    "Name": "Date32",
+    "name": "date32",
+    "Type": "Date32",
+    "QualifiedType": "arrow.Date32",
+    "InternalType": "int32",
+    "Default": "0",
+    "Size": "4"
+  },
+  {
+    "Name": "Date64",
+    "name": "date64",
+    "Type": "Date64",
+    "QualifiedType": "arrow.Date64",
+    "InternalType": "int64",
+    "Default": "0",
+    "Size": "8"
   }
-]
\ No newline at end of file
+]
diff --git a/go/arrow/type_traits_numeric.gen.go b/go/arrow/type_traits_numeric.gen.go
index 59ed13f541a53..14fafbc57659b 100644
--- a/go/arrow/type_traits_numeric.gen.go
+++ b/go/arrow/type_traits_numeric.gen.go
@@ -38,6 +38,8 @@ var (
 	TimestampTraits timestampTraits
 	Time32Traits    time32Traits
 	Time64Traits    time64Traits
+	Date32Traits    date32Traits
+	Date64Traits    date64Traits
 )
 
 // Int64 traits
@@ -663,3 +665,99 @@ func (time64Traits) CastToBytes(b []Time64) []byte {
 
 // Copy copies src to dst.
 func (time64Traits) Copy(dst, src []Time64) { copy(dst, src) }
+
+// Date32 traits
+
+const (
+	// Date32SizeBytes specifies the number of bytes required to store a single Date32 in memory
+	Date32SizeBytes = int(unsafe.Sizeof(Date32(0)))
+)
+
+type date32Traits struct{}
+
+// BytesRequired returns the number of bytes required to store n elements in memory.
+func (date32Traits) BytesRequired(n int) int { return Date32SizeBytes * n }
+
+// PutValue
+func (date32Traits) PutValue(b []byte, v Date32) {
+	binary.LittleEndian.PutUint32(b, uint32(v))
+}
+
+// CastFromBytes reinterprets the slice b to a slice of type Date32.
+//
+// NOTE: len(b) must be a multiple of Date32SizeBytes.
+func (date32Traits) CastFromBytes(b []byte) []Date32 {
+	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
+
+	var res []Date32
+	s := (*reflect.SliceHeader)(unsafe.Pointer(&res))
+	s.Data = h.Data
+	s.Len = h.Len / Date32SizeBytes
+	s.Cap = h.Cap / Date32SizeBytes
+
+	return res
+}
+
+// CastToBytes reinterprets the slice b to a slice of bytes.
+func (date32Traits) CastToBytes(b []Date32) []byte {
+	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
+
+	var res []byte
+	s := (*reflect.SliceHeader)(unsafe.Pointer(&res))
+	s.Data = h.Data
+	s.Len = h.Len * Date32SizeBytes
+	s.Cap = h.Cap * Date32SizeBytes
+
+	return res
+}
+
+// Copy copies src to dst.
+func (date32Traits) Copy(dst, src []Date32) { copy(dst, src) }
+
+// Date64 traits
+
+const (
+	// Date64SizeBytes specifies the number of bytes required to store a single Date64 in memory
+	Date64SizeBytes = int(unsafe.Sizeof(Date64(0)))
+)
+
+type date64Traits struct{}
+
+// BytesRequired returns the number of bytes required to store n elements in memory.
+func (date64Traits) BytesRequired(n int) int { return Date64SizeBytes * n }
+
+// PutValue
+func (date64Traits) PutValue(b []byte, v Date64) {
+	binary.LittleEndian.PutUint64(b, uint64(v))
+}
+
+// CastFromBytes reinterprets the slice b to a slice of type Date64.
+//
+// NOTE: len(b) must be a multiple of Date64SizeBytes.
+func (date64Traits) CastFromBytes(b []byte) []Date64 {
+	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
+
+	var res []Date64
+	s := (*reflect.SliceHeader)(unsafe.Pointer(&res))
+	s.Data = h.Data
+	s.Len = h.Len / Date64SizeBytes
+	s.Cap = h.Cap / Date64SizeBytes
+
+	return res
+}
+
+// CastToBytes reinterprets the slice b to a slice of bytes.
+func (date64Traits) CastToBytes(b []Date64) []byte {
+	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
+
+	var res []byte
+	s := (*reflect.SliceHeader)(unsafe.Pointer(&res))
+	s.Data = h.Data
+	s.Len = h.Len * Date64SizeBytes
+	s.Cap = h.Cap * Date64SizeBytes
+
+	return res
+}
+
+// Copy copies src to dst.
+func (date64Traits) Copy(dst, src []Date64) { copy(dst, src) }
diff --git a/integration/dask/Dockerfile b/integration/dask/Dockerfile
new file mode 100644
index 0000000000000..5e054c51c561e
--- /dev/null
+++ b/integration/dask/Dockerfile
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM arrow:python-3.6
+
+# setup /etc/localtime
+RUN DEBIAN_FRONTEND=noninteractive \
+    apt-get install -y -q tzdata
+
+# install dask release from conda
+RUN conda install -c conda-forge dask pytest=3 && \
+    conda clean --all
+
+# build and test
+CMD arrow/ci/docker_build_cpp.sh && \
+    arrow/ci/docker_build_python.sh && \
+    arrow/integration/dask/runtest.sh
diff --git a/integration/dask/runtest.sh b/integration/dask/runtest.sh
new file mode 100755
index 0000000000000..baf9ccf4455db
--- /dev/null
+++ b/integration/dask/runtest.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+# check that optional pyarrow modules are available
+# because pytest would just skip the dask tests
+python -c "import pyarrow.orc"
+python -c "import pyarrow.parquet"
+
+# TODO(kszucs): the following tests are also uses pyarrow
+# pytest -sv --pyargs dask.bytes.tests.test_s3
+# pytest -sv --pyargs dask.bytes.tests.test_hdfs
+# pytest -sv --pyargs dask.bytes.tests.test_local
+
+# TODO(kszucs): re-enable it, for more see ARROW-3910
+# pytest -v --pyargs dask.dataframe.io.tests.test_orc
+pytest -v --pyargs dask.dataframe.io.tests.test_parquet
+pytest -v --pyargs dask.dataframe.tests.test_dataframe
diff --git a/integration/hdfs/Dockerfile b/integration/hdfs/Dockerfile
index a1d3e4eb0a598..4f72e254f06e3 100644
--- a/integration/hdfs/Dockerfile
+++ b/integration/hdfs/Dockerfile
@@ -15,63 +15,35 @@
 # specific language governing permissions and limitations
 # under the License.
 
-FROM gelog/hadoop
+FROM arrow:python-3.6
 
-RUN apt-get update && \
-    apt-get install -y \
-        autoconf \
-        automake \
-        make \
-        gcc \
-        g++ \
-        git \
-        wget \
-        pkg-config \
-        ninja-build
-
-ENV CC=gcc \
-    CXX=g++ \
-    PATH=/opt/conda/bin:$PATH \
-    CONDA_PREFIX=/opt/conda
-
-# install dependencies
-ARG PYTHON_VERSION=3.6
-ADD ci/docker_install_conda.sh \
-    ci/conda_env_cpp.yml \
-    ci/conda_env_python.yml \
-    /arrow/ci/
-RUN arrow/ci/docker_install_conda.sh && \
-    conda install -c conda-forge \
-        --file arrow/ci/conda_env_cpp.yml \
-        --file arrow/ci/conda_env_python.yml \
-        python=$PYTHON_VERSION && \
-    conda clean --all
-
-# installing in the previous step boost=1.60 and boost-cpp=1.67 gets installed,
-# cmake finds 1.60 and parquet fails to compile
-# installing it in a separate step, boost=1.60 and boost-cpp=1.64 gets
-# installed, cmake finds 1.64
-# libhdfs3 needs to be pinned, see ARROW-1465 and ARROW-1445
+# installing libhdfs3, it needs to be pinned, see ARROW-1465 and ARROW-1445
 RUN conda install -y -c conda-forge hdfs3 libhdfs3=2.2.31 && \
     conda clean --all
 
+# installing libhdfs (JNI)
+ARG HADOOP_VERSION=2.6.5
+ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \
+    HADOOP_HOME=/usr/local/hadoop \
+    HADOOP_OPTS=-Djava.library.path=/usr/local/hadoop/lib/native \
+    PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
+RUN apt-get update -y && \
+    apt-get install -y openjdk-8-jdk && \
+    wget -q -O hadoop-$HADOOP_VERSION.tar.gz "https://www.apache.org/dyn/mirrors/mirrors.cgi?action=download&filename=hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz" && \
+    tar -zxf /hadoop-$HADOOP_VERSION.tar.gz && \
+    rm /hadoop-$HADOOP_VERSION.tar.gz && \
+    mv /hadoop-$HADOOP_VERSION /usr/local/hadoop
+ADD integration/hdfs/hdfs-site.xml $HADOOP_HOME/etc/hadoop/
+
 # build cpp with tests
-ENV ARROW_HDFS=ON \
+ENV CC=gcc \
+    CXX=g++ \
+    ARROW_ORC=ON \
+    ARROW_HDFS=ON \
     ARROW_PYTHON=ON \
-    ARROW_BUILD_TESTS=ON \
-    LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${HADOOP_HOME}/lib/native"
-ADD ci/docker_build_cpp.sh /arrow/ci/
-ADD cpp /arrow/cpp
-ADD format /arrow/format
-ADD java/pom.xml /arrow/java/pom.xml
-RUN arrow/ci/docker_build_cpp.sh
-
-# build python
-ADD ci/docker_build_python.sh /arrow/ci/
-ADD python /arrow/python
-RUN arrow/ci/docker_build_python.sh
+    ARROW_BUILD_TESTS=ON
 
-# execute integration tests
-ENV LIBHDFS3_CONF=/arrow/integration/hdfs/libhdfs3.xml
-ADD integration /arrow/integration
-CMD arrow/integration/hdfs/runtest.sh
+# build and test
+CMD arrow/ci/docker_build_cpp.sh && \
+    arrow/ci/docker_build_python.sh && \
+    arrow/integration/hdfs/runtest.sh
diff --git a/integration/hdfs/hdfs-site.xml b/integration/hdfs/hdfs-site.xml
new file mode 100644
index 0000000000000..a80b945a664b7
--- /dev/null
+++ b/integration/hdfs/hdfs-site.xml
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+	<property>
+		<name>dfs.replication</name>
+		<value>2</value>
+	</property>
+	<property>
+		<name>dfs.datanode.data.dir</name>
+		<value>file:///data/dfs/data</value>
+	</property>
+	<property>
+		<name>dfs.namenode.name.dir</name>
+		<value>file:///data/dfs/name</value>
+	</property>
+	<property>
+		<name>dfs.namenode.checkpoint.dir</name>
+		<value>file:///data/dfs/namesecondary </value>
+	</property>
+	<property>
+		<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
+		<value>false</value>
+	</property>
+	<property>
+		<name>dfs.default.replica</name>
+		<value>1</value>
+  </property>
+</configuration>
diff --git a/integration/hdfs/libhdfs3.xml b/integration/hdfs/libhdfs3.xml
deleted file mode 100644
index f929929b386da..0000000000000
--- a/integration/hdfs/libhdfs3.xml
+++ /dev/null
@@ -1,332 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-
-<!-- From Apache HAWQ (incubating) -->
-
-<configuration>
-
-<!-- KDC
-	<property>
-		<name>hadoop.security.authentication</name>
-		<value>kerberos</value>
-	</property>
-KDC -->
-
-<!-- HA
-	<property>
-		<name>dfs.nameservices</name>
-		<value>phdcluster</value>
-	</property>
-
-	<property>
-		<name>dfs.ha.namenodes.phdcluster</name>
-		<value>nn1,nn2</value>
-	</property>6
-
-	<property>
-		<name>dfs.namenode.rpc-address.phdcluster.nn1</name>
-		<value>mdw:9000</value>
-	</property>
-
-	<property>
-		<name>dfs.namenode.rpc-address.phdcluster.nn2</name>
-		<value>smdw:9000</value>
-	</property>
-
-	<property>
-		<name>dfs.namenode.http-address.phdcluster.nn1</name>
-		<value>mdw:50070</value>
-	</property>
-
-	<property>
-		<name>dfs.namenode.http-address.phdcluster.nn2</name>
-		<value>smdw:50070</value>
-	</property>
-
-HA -->
-
-	<!-- RPC client configuration -->
-	<property>
-		<name>rpc.client.timeout</name>
-		<value>3600000</value>
-		<description>
-		timeout interval of a RPC invocation in millisecond. default is 3600000.
-		</description>
-	</property>
-	<property>
-		<name>rpc.client.connect.tcpnodelay</name>
-		<value>true</value>
-		<description>
-		whether set socket TCP_NODELAY to true when connect to RPC server. default is true.
-		</description>
-	</property>
-
-	<property>
-		<name>rpc.client.max.idle</name>
-		<value>10000</value>
-		<description>
-		the max idle time of a RPC connection in millisecond. default is 10000.
-		</description>
-	</property>
-
-	<property>
-		<name>rpc.client.ping.interval</name>
-		<value>10000</value>
-		<description>
-		the interval which the RPC client send a heart beat to server. 0 means disable, default is 10000.
-		</description>
-	</property>
-
-	<property>
-		<name>rpc.client.connect.timeout</name>
-		<value>600000</value>
-		<description>
-		the timeout interval in millisecond when the RPC client is trying to setup the connection. default is 600000.
-		</description>
-	</property>
-
-	<property>
-		<name>rpc.client.connect.retry</name>
-		<value>10</value>
-		<description>
-		the max retry times if the RPC client fail to setup the connection to server. default is 10.
-		</description>
-	</property>
-
-	<property>
-		<name>rpc.client.read.timeout</name>
-		<value>3600000</value>
-		<description>
-		the timeout interval in millisecond when the RPC client is trying to read from server. default is 3600000.
-		</description>
-	</property>
-
-	<property>
-		<name>rpc.client.write.timeout</name>
-		<value>3600000</value>
-		<description>
-		the timeout interval in millisecond when the RPC client is trying to write to server. default is 3600000.
-		</description>
-	</property>
-
-	<property>
-		<name>rpc.client.socket.linger.timeout</name>
-		<value>-1</value>
-		<description>
-		set value to socket SO_LINGER when connect to RPC server. -1 means default OS value. default is -1.
-		</description>
-	</property>
-
-	<!-- dfs client configuration -->
-	<property>
-		<name>dfs.client.read.shortcircuit</name>
-		<value>false</value>
-		<description>
-		whether reading block file bypass datanode if the block and the client are on the same node. default is true.
-		</description>
-	</property>
-
-	<property>
-		<name>dfs.default.replica</name>
-		<value>1</value>
-		<description>
-		the default number of replica. default is 3.
-		</description>
-	</property>
-
-	<property>
-		<name>dfs.prefetchsize</name>
-		<value>10</value>
-		<description>
-		the default number of blocks which information will be prefetched. default is 10.
-		</description>
-	</property>
-
-	<property>
-		<name>dfs.client.failover.max.attempts</name>
-		<value>15</value>
-		<description>
-		if multiply namenodes are configured, it is the max retry times when the dfs client try to issue a RPC call. default is 15.
-		</description>
-	</property>
-
-	<property>
-		<name>dfs.default.blocksize</name>
-		<value>134217728</value>
-		<description>
-		default block size. default is 134217728.
-		</description>
-	</property>
-
-	<property>
-		<name>dfs.client.log.severity</name>
-		<value>INFO</value>
-		<description>
-		the minimal log severity level, valid values include FATAL, ERROR, INFO, DEBUG1, DEBUG2, DEBUG3. default is INFO.
-		</description>
-	</property>
-
-	<!-- input client configuration -->
-	<property>
-		<name>input.connect.timeout</name>
-		<value>600000</value>
-		<description>
-		the timeout interval in millisecond when the input stream is trying to setup the connection to datanode. default is 600000.
-		</description>
-	</property>
-
-	<property>
-		<name>input.read.timeout</name>
-		<value>3600000</value>
-		<description>
-		the timeout interval in millisecond when the input stream is trying to read from datanode. default is 3600000.
-		</description>
-	</property>
-
-	<property>
-		<name>input.write.timeout</name>
-		<value>3600000</value>
-		<description>
-		the timeout interval in millisecond when the input stream is trying to write to datanode. default is 3600000.
-		</description>
-	</property>
-
-	<property>
-		<name>input.localread.default.buffersize</name>
-		<value>2097152</value>
-		<description>
-		number of bytes of the buffer which is used to hold the data from block file and verify checksum.
-		it is only used when "dfs.client.read.shortcircuit" is set to true. default is 1048576.
-		</description>
-	</property>
-
-	<property>
-		<name>input.localread.blockinfo.cachesize</name>
-		<value>1000</value>
-		<description>
-		the size of block file path information cache. default is 1000.
-		</description>
-	</property>
-
-	<property>
-		<name>input.read.getblockinfo.retry</name>
-		<value>3</value>
-		<description>
-		the max retry times when the client fail to get block information from namenode. default is 3.
-		</description>
-	</property>
-
-	<!-- output client configuration -->
-	<property>
-		<name>output.replace-datanode-on-failure</name>
-		<value>false</value>
-		<description>
-		whether the client add new datanode into pipeline if the number of nodes in pipeline is less the specified number of replicas. default is false.
-		</description>
-	</property>
-
-	<property>
-		<name>output.default.chunksize</name>
-		<value>512</value>
-		<description>
-		the number of bytes of a chunk in pipeline. default is 512.
-		</description>
-	</property>
-
-	<property>
-		<name>output.default.packetsize</name>
-		<value>65536</value>
-		<description>
-		the number of bytes of a packet in pipeline. default is 65536.
-		</description>
-	</property>
-
-	<property>
-		<name>output.default.write.retry</name>
-		<value>10</value>
-		<description>
-		the max retry times when the client fail to setup the pipeline. default is 10.
-		</description>
-	</property>
-
-	<property>
-		<name>output.connect.timeout</name>
-		<value>600000</value>
-		<description>
-		the timeout interval in millisecond when the output stream is trying to setup the connection to datanode. default is 600000.
-		</description>
-	</property>
-
-	<property>
-		<name>output.read.timeout</name>
-		<value>3600000</value>
-		<description>
-		the timeout interval in millisecond when the output stream is trying to read from datanode. default is 3600000.
-		</description>
-	</property>
-
-	<property>
-		<name>output.write.timeout</name>
-		<value>3600000</value>
-		<description>
-		the timeout interval in millisecond when the output stream is trying to write to datanode. default is 3600000.
-		</description>
-	</property>
-
-	<property>
-		<name>output.packetpool.size</name>
-		<value>1024</value>
-		<description>
-		the max number of packets in a file's packet pool. default is 1024.
-		</description>
-	</property>
-
-	<property>
-		<name>output.close.timeout</name>
-		<value>900000</value>
-		<description>
-		the timeout interval in millisecond when close an output stream. default is 900000.
-		</description>
-	</property>
-
-	<property>
-		<name>dfs.domain.socket.path</name>
-		<value>/var/lib/hadoop-hdfs/dn_socket</value>
-		<description>
-		Optional.  This is a path to a UNIX domain socket that will be used for
-		communication between the DataNode and local HDFS clients.
-		If the string "_PORT" is present in this path, it will be replaced by the
-		TCP port of the DataNode.
-		</description>
-	</property>
-
-	<property>
-		<name>dfs.client.use.legacy.blockreader.local</name>
-		<value>false</value>
-		<description>
-		Legacy short-circuit reader implementation based on HDFS-2246 is used
-		if this configuration parameter is true.
-		This is for the platforms other than Linux
-		where the new implementation based on HDFS-347 is not available.
-		</description>
-	</property>
-
-</configuration>
diff --git a/integration/hdfs/runtest.sh b/integration/hdfs/runtest.sh
index a90eb93645369..44afb4a1888a4 100755
--- a/integration/hdfs/runtest.sh
+++ b/integration/hdfs/runtest.sh
@@ -17,12 +17,21 @@
 # specific language governing permissions and limitations
 # under the License.
 
+# exit on any error
 set -e
 
 export CLASSPATH=`$HADOOP_HOME/bin/hadoop classpath --glob`
+export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
+export LIBHDFS3_CONF=$HADOOP_CONF_DIR/hdfs-site.xml
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HADOOP_HOME/lib/native/
 
+# execute cpp tests
 pushd /build/cpp
-  debug/io-hdfs-test
+  debug/arrow-io-hdfs-test
 popd
 
-pytest -v --pyargs pyarrow
+# cannot use --pyargs with custom arguments like --hdfs or --only-hdfs, because
+# pytest ignores them, see https://github.com/pytest-dev/pytest/issues/3517
+export PYARROW_TEST_ONLY_HDFS=ON
+
+pytest -v --pyargs pyarrow.tests.test_hdfs
diff --git a/integration/integration_test.py b/integration/integration_test.py
index 8021aa643263e..0bced26f15acd 100644
--- a/integration/integration_test.py
+++ b/integration/integration_test.py
@@ -18,6 +18,7 @@
 from collections import OrderedDict
 import argparse
 import binascii
+import contextlib
 import glob
 import itertools
 import json
@@ -26,7 +27,9 @@
 import six
 import string
 import subprocess
+import sys
 import tempfile
+import traceback
 import uuid
 import errno
 
@@ -893,8 +896,8 @@ def generate_dictionary_case():
                           dictionaries=[dict1, dict2])
 
 
-def get_generated_json_files():
-    temp_dir = tempfile.mkdtemp()
+def get_generated_json_files(tempdir=None):
+    tempdir = tempdir or tempfile.mkdtemp()
 
     def _temp_path():
         return
@@ -910,7 +913,7 @@ def _temp_path():
 
     generated_paths = []
     for file_obj in file_objs:
-        out_path = os.path.join(temp_dir, 'generated_' +
+        out_path = os.path.join(tempdir, 'generated_' +
                                 file_obj.name + '.json')
         file_obj.write(out_path)
         generated_paths.append(out_path)
@@ -924,17 +927,36 @@ def _temp_path():
 
 class IntegrationRunner(object):
 
-    def __init__(self, json_files, testers, debug=False):
+    def __init__(self, json_files, testers, tempdir=None, debug=False):
         self.json_files = json_files
         self.testers = testers
-        self.temp_dir = tempfile.mkdtemp()
+        self.temp_dir = tempdir or tempfile.mkdtemp()
         self.debug = debug
 
     def run(self):
+        failures = []
         for producer, consumer in itertools.product(
                 filter(lambda t: t.PRODUCER, self.testers),
                 filter(lambda t: t.CONSUMER, self.testers)):
-            self._compare_implementations(producer, consumer)
+            try:
+                self._compare_implementations(producer, consumer)
+            except Exception:
+                traceback.print_exc()
+                failures.append((producer, consumer, sys.exc_info()))
+        return failures
+
+    def run_flight(self):
+        failures = []
+        servers = filter(lambda t: t.FLIGHT_SERVER, self.testers)
+        clients = filter(lambda t: (t.FLIGHT_CLIENT and t.CONSUMER),
+                         self.testers)
+        for server, client in itertools.product(servers, clients):
+            try:
+                self._compare_flight_implementations(server, client)
+            except Exception:
+                traceback.print_exc()
+                failures.append((server, client, sys.exc_info()))
+        return failures
 
     def _compare_implementations(self, producer, consumer):
         print('##########################################################')
@@ -950,10 +972,12 @@ def _compare_implementations(self, producer, consumer):
 
             name = os.path.splitext(os.path.basename(json_path))[0]
 
+            file_id = guid()[:8]
+
             # Make the random access file
             print('-- Creating binary inputs')
-            producer_file_path = os.path.join(self.temp_dir, guid() + '_' +
-                                              name + '.json_to_arrow')
+            producer_file_path = os.path.join(self.temp_dir, file_id + '_' +
+                                              name + '.json_as_file')
             producer.json_to_file(json_path, producer_file_path)
 
             # Validate the file
@@ -961,20 +985,55 @@ def _compare_implementations(self, producer, consumer):
             consumer.validate(json_path, producer_file_path)
 
             print('-- Validating stream')
-            producer_stream_path = os.path.join(self.temp_dir, guid() + '_' +
-                                                name + '.arrow_to_stream')
-            consumer_file_path = os.path.join(self.temp_dir, guid() + '_' +
-                                              name + '.stream_to_arrow')
+            producer_stream_path = os.path.join(self.temp_dir, file_id + '_' +
+                                                name +
+                                                '.producer_file_as_stream')
+            consumer_file_path = os.path.join(self.temp_dir, file_id + '_' +
+                                              name +
+                                              '.consumer_stream_as_file')
             producer.file_to_stream(producer_file_path,
                                     producer_stream_path)
             consumer.stream_to_file(producer_stream_path,
                                     consumer_file_path)
             consumer.validate(json_path, consumer_file_path)
 
+    def _compare_flight_implementations(self, producer, consumer):
+        print('##########################################################')
+        print(
+            '{0} serving, {1} requesting'.format(producer.name, consumer.name)
+        )
+        print('##########################################################')
+
+        for json_path in self.json_files:
+            print('==========================================================')
+            print('Testing file {0}'.format(json_path))
+            print('==========================================================')
+
+            name = os.path.splitext(os.path.basename(json_path))[0]
+
+            file_id = guid()[:8]
+
+            with producer.flight_server():
+                # Have the client request the file
+                consumer_file_path = os.path.join(
+                    self.temp_dir,
+                    file_id + '_' + name + '.consumer_requested_file')
+                consumer.flight_request(producer.FLIGHT_PORT,
+                                        json_path, consumer_file_path)
+
+                # Validate the file
+                print('-- Validating file')
+                consumer.validate(json_path, consumer_file_path)
+
+                # TODO: also have the client upload the file
+
 
 class Tester(object):
     PRODUCER = False
     CONSUMER = False
+    FLIGHT_SERVER = False
+    FLIGHT_CLIENT = False
+    FLIGHT_PORT = 31337
 
     def __init__(self, debug=False):
         self.debug = debug
@@ -991,10 +1050,20 @@ def file_to_stream(self, file_path, stream_path):
     def validate(self, json_path, arrow_path):
         raise NotImplementedError
 
+    def flight_server(self):
+        raise NotImplementedError
+
+    def flight_request(self, port, json_path, arrow_path):
+        raise NotImplementedError
+
 
 class JavaTester(Tester):
     PRODUCER = True
     CONSUMER = True
+    FLIGHT_SERVER = True
+    FLIGHT_CLIENT = True
+
+    FLIGHT_PORT = 31338
 
     _arrow_version = load_version_from_pom()
     ARROW_TOOLS_JAR = os.environ.get(
@@ -1002,6 +1071,15 @@ class JavaTester(Tester):
         os.path.join(ARROW_HOME,
                      'java/tools/target/arrow-tools-{}-'
                      'jar-with-dependencies.jar'.format(_arrow_version)))
+    ARROW_FLIGHT_JAR = os.environ.get(
+        'ARROW_FLIGHT_JAVA_INTEGRATION_JAR',
+        os.path.join(ARROW_HOME,
+                     'java/flight/target/arrow-flight-{}-'
+                     'jar-with-dependencies.jar'.format(_arrow_version)))
+    ARROW_FLIGHT_SERVER = ('org.apache.arrow.flight.example.integration.'
+                           'IntegrationTestServer')
+    ARROW_FLIGHT_CLIENT = ('org.apache.arrow.flight.example.integration.'
+                           'IntegrationTestClient')
 
     name = 'Java'
 
@@ -1044,18 +1122,58 @@ def file_to_stream(self, file_path, stream_path):
             print(' '.join(cmd))
         run_cmd(cmd)
 
+    def flight_request(self, port, json_path, arrow_path):
+        cmd = ['java', '-cp', self.ARROW_FLIGHT_JAR,
+               self.ARROW_FLIGHT_CLIENT,
+               '-port', str(port),
+               '-j', json_path,
+               '-a', arrow_path]
+        if self.debug:
+            print(' '.join(cmd))
+        run_cmd(cmd)
+
+    @contextlib.contextmanager
+    def flight_server(self):
+        cmd = ['java', '-cp', self.ARROW_FLIGHT_JAR,
+               self.ARROW_FLIGHT_SERVER,
+               '-port', str(self.FLIGHT_PORT)]
+        if self.debug:
+            print(' '.join(cmd))
+        server = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+        try:
+            output = server.stdout.readline().decode()
+            if not output.startswith("Server listening on localhost"):
+                raise RuntimeError(
+                    "Flight-Java server did not start properly, output: " +
+                    output)
+            yield
+        finally:
+            server.terminate()
+            server.wait(5)
+
 
 class CPPTester(Tester):
     PRODUCER = True
     CONSUMER = True
+    FLIGHT_SERVER = True
+    FLIGHT_CLIENT = True
 
     EXE_PATH = os.environ.get(
         'ARROW_CPP_EXE_PATH',
         os.path.join(ARROW_HOME, 'cpp/build/debug'))
 
-    CPP_INTEGRATION_EXE = os.path.join(EXE_PATH, 'json-integration-test')
-    STREAM_TO_FILE = os.path.join(EXE_PATH, 'stream-to-file')
-    FILE_TO_STREAM = os.path.join(EXE_PATH, 'file-to-stream')
+    CPP_INTEGRATION_EXE = os.path.join(EXE_PATH, 'arrow-json-integration-test')
+    STREAM_TO_FILE = os.path.join(EXE_PATH, 'arrow-stream-to-file')
+    FILE_TO_STREAM = os.path.join(EXE_PATH, 'arrow-file-to-stream')
+
+    FLIGHT_PORT = 31337
+
+    FLIGHT_SERVER_CMD = [
+        os.path.join(EXE_PATH, 'flight-test-integration-server'),
+        "-port", str(FLIGHT_PORT)]
+    FLIGHT_CLIENT_CMD = [
+        os.path.join(EXE_PATH, 'flight-test-integration-client'),
+        "-host", "localhost"]
 
     name = 'C++'
 
@@ -1095,6 +1213,33 @@ def file_to_stream(self, file_path, stream_path):
             print(cmd)
         os.system(cmd)
 
+    @contextlib.contextmanager
+    def flight_server(self):
+        if self.debug:
+            print(' '.join(self.FLIGHT_SERVER_CMD))
+        server = subprocess.Popen(self.FLIGHT_SERVER_CMD,
+                                  stdout=subprocess.PIPE)
+        try:
+            output = server.stdout.readline().decode()
+            if not output.startswith("Server listening on localhost"):
+                raise RuntimeError(
+                    "Flight-C++ server did not start properly, output: " +
+                    output)
+            yield
+        finally:
+            server.terminate()
+            server.wait(5)
+
+    def flight_request(self, port, json_path, arrow_path):
+        cmd = self.FLIGHT_CLIENT_CMD + [
+            '-port=' + str(port),
+            '-path=' + json_path,
+            '-output=' + arrow_path
+        ]
+        if self.debug:
+            print(' '.join(cmd))
+        subprocess.run(cmd)
+
 
 class JSTester(Tester):
     PRODUCER = True
@@ -1118,7 +1263,7 @@ def _run(self, exe_cmd, arrow_path=None, json_path=None,
         if json_path is not None:
             cmd.extend(['-j', json_path])
 
-        cmd.extend(['--mode', command, '-t', 'es5', '-m', 'umd'])
+        cmd.extend(['--mode', command])
 
         if self.debug:
             print(' '.join(cmd))
@@ -1162,17 +1307,32 @@ def get_static_json_files():
     return glob.glob(glob_pattern)
 
 
-def run_all_tests(debug=False):
+def run_all_tests(run_flight=False, debug=False, tempdir=None):
     testers = [CPPTester(debug=debug),
                JavaTester(debug=debug),
                JSTester(debug=debug)]
     static_json_files = get_static_json_files()
-    generated_json_files = get_generated_json_files()
+    generated_json_files = get_generated_json_files(tempdir=tempdir)
     json_files = static_json_files + generated_json_files
 
-    runner = IntegrationRunner(json_files, testers, debug=debug)
-    runner.run()
-    print('-- All tests passed!')
+    runner = IntegrationRunner(json_files, testers,
+                               tempdir=tempdir, debug=debug)
+    failures = []
+    failures.extend(runner.run())
+    if run_flight:
+        failures.extend(runner.run_flight())
+
+    print()
+    print('##########################################################')
+    if not failures:
+        print('-- All tests passed!')
+    else:
+        print('-- Tests completed, failures:')
+    for producer, consumer, exc_info in failures:
+        print("FAILED TEST:", producer.name, "producing, ",
+              consumer.name, "consuming")
+        traceback.print_exception(*exc_info)
+        print()
 
 
 def write_js_test_json(directory):
@@ -1192,9 +1352,16 @@ def write_js_test_json(directory):
     parser.add_argument('--write_generated_json', dest='generated_json_path',
                         action='store', default=False,
                         help='Generate test JSON')
+    parser.add_argument('--run_flight', dest='run_flight',
+                        action='store_true', default=False,
+                        help='Run Flight integration tests')
     parser.add_argument('--debug', dest='debug', action='store_true',
                         default=False,
                         help='Run executables in debug mode as relevant')
+    parser.add_argument('--tempdir', dest='tempdir',
+                        default=tempfile.mkdtemp(),
+                        help=('Directory to use for writing '
+                              'integration test temporary files'))
     args = parser.parse_args()
     if args.generated_json_path:
         try:
@@ -1204,4 +1371,5 @@ def write_js_test_json(directory):
                 raise
         write_js_test_json(args.generated_json_path)
     else:
-        run_all_tests(debug=args.debug)
+        run_all_tests(run_flight=args.run_flight,
+                      debug=args.debug, tempdir=args.tempdir)
diff --git a/integration/spark/2.4.0.patch b/integration/spark/2.4.0.patch
new file mode 100644
index 0000000000000..7992e010f63b9
--- /dev/null
+++ b/integration/spark/2.4.0.patch
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+diff --git a/pom.xml b/pom.xml
+index f0e5ed9c56..b30d4d61d6 100644
+--- a/pom.xml
++++ b/pom.xml
+@@ -2092,0 +2093,2 @@
++              <arg>-Xmax-classfile-name</arg>
++              <arg>128</arg>
diff --git a/integration/spark/Dockerfile b/integration/spark/Dockerfile
new file mode 100644
index 0000000000000..5c28cca0db447
--- /dev/null
+++ b/integration/spark/Dockerfile
@@ -0,0 +1,57 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM arrow:python-3.6
+
+# installing java and maven
+ARG MAVEN_VERSION=3.5.4
+ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \
+    MAVEN_HOME=/usr/local/maven \
+    M2_HOME=/root/.m2 \
+    PATH=/root/.m2/bin:/usr/local/maven/bin:$PATH
+RUN apt-get update -q -y && \
+    apt-get install -q -y openjdk-8-jdk && \
+    wget -q -O maven-$MAVEN_VERSION.tar.gz "https://www.apache.org/dyn/mirrors/mirrors.cgi?action=download&filename=maven/maven-3/$MAVEN_VERSION/binaries/apache-maven-$MAVEN_VERSION-bin.tar.gz" && \
+    tar -zxf /maven-$MAVEN_VERSION.tar.gz && \
+    rm /maven-$MAVEN_VERSION.tar.gz && \
+    mv /apache-maven-$MAVEN_VERSION /usr/local/maven
+
+# installing specific version of spark
+ARG SPARK_VERSION=2.4.0
+RUN mkdir /spark && \
+    cd /spark && \
+    wget -q -O spark.tar.gz https://github.com/apache/spark/archive/v$SPARK_VERSION.tar.gz && \
+    tar -xzf spark.tar.gz && \
+    rm spark.tar.gz
+
+# patching spark is required in order to:
+# - avoid too long filenames error https://issues.apache.org/jira/browse/SPARK-4820
+ADD integration/spark/$SPARK_VERSION.patch /arrow/integration/spark/$SPARK_VERSION.patch
+RUN cd /spark/spark-$SPARK_VERSION && \
+    patch -p1 < /arrow/integration/spark/$SPARK_VERSION.patch
+
+# build cpp with tests
+ENV CC=gcc \
+    CXX=g++ \
+    ARROW_PYTHON=ON \
+    ARROW_HDFS=ON \
+    ARROW_BUILD_TESTS=OFF
+
+# build and test
+CMD arrow/ci/docker_build_cpp.sh && \
+    arrow/ci/docker_build_python.sh && \
+    arrow/ci/docker_build_java.sh && \
+    arrow/integration/spark/runtest.sh
diff --git a/integration/spark/runtest.sh b/integration/spark/runtest.sh
new file mode 100755
index 0000000000000..173f69efb27b4
--- /dev/null
+++ b/integration/spark/runtest.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# exit on any error
+set -e
+
+SPARK_VERSION=${SPARK_VERSION:-2.4.0}
+
+# rsynced source directory to build java libs
+arrow_src=/build/java/arrow
+
+pushd $arrow_src/java
+  ARROW_VERSION=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | sed -n -e '/^\[.*\]/ !{ /^[0-9]/ { p; q } }'`
+popd
+
+MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m -Dorg.slf4j.simpleLogger.defaultLogLevel=warn"
+
+# build Spark with Arrow
+pushd /spark/spark-${SPARK_VERSION}
+  # update Spark pom with the Arrow version just installed and build Spark, need package phase for pyspark
+  echo "Building Spark with Arrow $ARROW_VERSION"
+  mvn -q versions:set-property -Dproperty=arrow.version -DnewVersion=$ARROW_VERSION
+
+  build/mvn -DskipTests package -pl sql/core -pl assembly -am
+
+  SPARK_SCALA_TESTS=(
+    "org.apache.spark.sql.execution.arrow"
+    "org.apache.spark.sql.execution.vectorized.ColumnarBatchSuite"
+    "org.apache.spark.sql.execution.vectorized.ArrowColumnVectorSuite")
+
+  (echo "Testing Spark:"; IFS=$'\n'; echo "${SPARK_SCALA_TESTS[*]}")
+
+  # TODO: should be able to only build spark-sql tests with adding "-pl sql/core" but not currently working
+  build/mvn -Dtest=none -DwildcardSuites=$(IFS=,; echo "${SPARK_SCALA_TESTS[*]}") test
+
+  # Run pyarrow related Python tests only
+  echo "Testing PySpark:"
+  python/run-tests --modules pyspark-sql
+popd
diff --git a/java/Dockerfile b/java/Dockerfile
index 4ef9d28775e5f..d722e3d7ab82c 100644
--- a/java/Dockerfile
+++ b/java/Dockerfile
@@ -17,6 +17,10 @@
 
 FROM maven:3.5.2-jdk-8-slim
 
-WORKDIR /arrow/java
+# rsync is required to prevent the contamination of arrow directory
+# (mounted from the host)
+RUN apt-get update -y && apt-get install -y rsync
 
-CMD mvn test
+CMD arrow/ci/docker_build_java.sh && \
+    cd /build/java/arrow/java && \
+    mvn test
diff --git a/java/README.md b/java/README.md
index d4efe428c4264..c69ff88ffa260 100644
--- a/java/README.md
+++ b/java/README.md
@@ -45,23 +45,36 @@ mvn install -P gandiva -pl gandiva -am -Dgandiva.cpp.build.dir=../../debug
 This library is still in Alpha stages, and subject to API changes without
 deprecation warnings.
 
+## Java Code Style Guide
+
+Arrow Java follows the Google style guide [here][3] with the following
+differences:
+
+* Imports are grouped, from top to bottom, in this order: static imports,
+standard Java, org.\*, com.\*
+* Line length can be up to 120 characters
+* Operators for line wrapping are at end-of-line
+* Naming rules for methods, parameters, etc. have been relaxed
+* Disabled `NoFinalizer`, `OverloadMethodsDeclarationOrder`, and
+`VariableDeclarationUsageDistance` due to the existing code base. These rules
+should be followed when possible.
+
+Refer to `java/dev/checkstyle/checkstyle.xml for rule specifics.
+
 ## Test Logging Configuration
 
 When running tests, Arrow Java uses the Logback logger with SLF4J. By default,
-Logback has a log level set to DEBUG. Besides setting this level
-programmatically, it can also be configured with a file named either
-"logback.xml" or "logback-test.xml" residing in the classpath. The file
-location can also be specified in the Maven command line with the following
-option `-Dlogback.configurationFile=file:<absolute-file-path>`. A sample
-logback.xml file is available in `java/dev` with a log level of ERROR. Arrow
-Java can be built with this file using the following command run in the project
-root directory:
+it uses the logback.xml present in the corresponding module's src/test/resources
+directory, which has the default log level set to INFO.
+Arrow Java can be built with an alternate logback configuration file using the
+following command run in the project root directory:
 
 ```bash
-mvn -Dlogback.configurationFile=file:`pwd`/dev/logback.xml
+mvn -Dlogback.configurationFile=file:<path-of-logback-file>
 ```
 
 See [Logback Configuration][1] for more details.
 
 [1]: https://logback.qos.ch/manual/configuration.html
 [2]: https://github.com/apache/arrow/blob/master/cpp/README.md
+[3]: http://google.github.io/styleguide/javaguide.html
diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml
index ab0b9b55a92f5..dc919502c5858 100644
--- a/java/adapter/jdbc/pom.xml
+++ b/java/adapter/jdbc/pom.xml
@@ -16,7 +16,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>0.12.0-SNAPSHOT</version>
+        <version>0.13.0-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java
index fce3b476f8b41..e47bc9344fd0f 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java
@@ -89,7 +89,9 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, B
     Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");
     Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
 
-    return sqlToArrow(connection, query, allocator, Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT));
+    JdbcToArrowConfig config =
+            new JdbcToArrowConfig(allocator, Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT));
+    return sqlToArrow(connection, query, config);
   }
 
   /**
@@ -115,8 +117,29 @@ public static VectorSchemaRoot sqlToArrow(
     Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
     Preconditions.checkNotNull(calendar, "Calendar object can not be null");
 
+    return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar));
+  }
+
+  /**
+   * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects.
+   *
+   * @param connection Database connection to be used. This method will not close the passed connection object.
+   *                   Since the caller has passed the connection object it's the responsibility of the caller
+   *                   to close or return the connection to the pool.
+   * @param query      The DB Query to fetch the data.
+   * @param config     Configuration
+   * @return Arrow Data Objects {@link VectorSchemaRoot}
+   * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as
+   *                      ResultSet and Statement objects.
+   */
+  public static VectorSchemaRoot sqlToArrow(Connection connection, String query, JdbcToArrowConfig config)
+      throws SQLException, IOException {
+    Preconditions.checkNotNull(connection, "JDBC connection object can not be null");
+    Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");
+    Preconditions.checkNotNull(config, "The configuration cannot be null");
+
     try (Statement stmt = connection.createStatement()) {
-      return sqlToArrow(stmt.executeQuery(query), allocator, calendar);
+      return sqlToArrow(stmt.executeQuery(query), config);
     }
   }
 
@@ -147,7 +170,9 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
     Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
     Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
 
-    return sqlToArrow(resultSet, allocator, null);
+    JdbcToArrowConfig config = 
+            new JdbcToArrowConfig(allocator, Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT));
+    return sqlToArrow(resultSet, config);
   }
 
   /**
@@ -161,10 +186,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
   public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException {
     Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
 
-    RootAllocator rootAllocator = new RootAllocator(Integer.MAX_VALUE);
-    VectorSchemaRoot root = sqlToArrow(resultSet, rootAllocator, calendar);
-
-    return root;
+    return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar));
   }
 
   /**
@@ -181,9 +203,25 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
     Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
     Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
 
+    return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar));
+  }
+
+  /**
+   * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
+   *
+   * @param resultSet ResultSet to use to fetch the data from underlying database
+   * @param config    Configuration of the conversion from JDBC to Arrow.
+   * @return Arrow Data Objects {@link VectorSchemaRoot}
+   * @throws SQLException on error
+   */
+  public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig config)
+      throws SQLException, IOException {
+    Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
+    Preconditions.checkNotNull(config, "The configuration cannot be null");
+
     VectorSchemaRoot root = VectorSchemaRoot.create(
-            JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), calendar), allocator);
-    JdbcToArrowUtils.jdbcToArrowVectors(resultSet, root, calendar);
+            JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config), config.getAllocator());
+    JdbcToArrowUtils.jdbcToArrowVectors(resultSet, root, config);
     return root;
   }
 }
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
new file mode 100644
index 0000000000000..59813a830cbed
--- /dev/null
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc;
+
+import java.util.Calendar;
+
+import org.apache.arrow.memory.BaseAllocator;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * This class configures the JDBC-to-Arrow conversion process.
+ * <p>
+ * The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot},
+ * and the calendar is used to define the time zone of any {@link org.apahe.arrow.vector.pojo.ArrowType.Timestamp}
+ * fields that are created during the conversion.
+ * </p>
+ * <p>
+ * Neither field may be <code>null</code>.
+ * </p>
+ */
+public final class JdbcToArrowConfig {
+  private Calendar calendar;
+  private BaseAllocator allocator;
+
+  /**
+   * Constructs a new configuration from the provided allocator and calendar.  The <code>allocator</code>
+   * is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define
+   * Arrow Timestamp fields, and to read time-based fields from the JDBC <code>ResultSet</code>. 
+   *
+   * @param allocator The memory allocator to construct the Arrow vectors with.
+   * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results.
+   */
+  JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar) {
+    Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
+    Preconditions.checkNotNull(calendar, "Calendar object can not be null");
+
+    this.allocator = allocator;
+    this.calendar = calendar;
+  }
+
+  /**
+   * The calendar to use when defining Arrow Timestamp fields
+   * and retrieving time-based fields from the database.
+   * @return the calendar.
+   */
+  public Calendar getCalendar() {
+    return calendar;
+  }
+
+  /**
+   * The Arrow memory allocator.
+   * @return the allocator.
+   */
+  public BaseAllocator getAllocator() {
+    return allocator;
+  }
+}
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
new file mode 100644
index 0000000000000..df97c3a975196
--- /dev/null
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc;
+
+import java.util.Calendar;
+
+import org.apache.arrow.memory.BaseAllocator;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * This class builds {@link JdbcToArrowConfig}s.
+ */
+public class JdbcToArrowConfigBuilder {
+  private Calendar calendar;
+  private BaseAllocator allocator;
+
+  /**
+   * Default constructor for the <code>JdbcToArrowConfigBuilder}</code>.
+   * Use the setter methods for the allocator and calendar; both must be
+   * set.  Otherwise, {@link #build()} will throw a {@link NullPointerException}.
+   */
+  public JdbcToArrowConfigBuilder() {
+    this.allocator = null;
+    this.calendar = null;
+  }
+
+  /**
+   * Constructor for the <code>JdbcToArrowConfigBuilder</code>.  Both the
+   * allocator and calendar are required.  A {@link NullPointerException}
+   * will be thrown if one of the arguments is <code>null</code>.
+   * <p>
+   * The allocator is used to construct Arrow vectors from the JDBC ResultSet.
+   * The calendar is used to determine the time zone of {@link java.sql.Timestamp}
+   * fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and
+   * {@link java.sql.Timestamp} fields to a single, common time zone when reading
+   * from the result set.
+   * </p>
+   *
+   * @param allocator The Arrow Vector memory allocator.
+   * @param calendar The calendar to use when constructing timestamp fields.
+   */
+  public JdbcToArrowConfigBuilder(BaseAllocator allocator, Calendar calendar) {
+    this();
+
+    Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
+    Preconditions.checkNotNull(calendar, "Calendar object can not be null");
+
+    this.allocator = allocator;
+    this.calendar = calendar;
+  }
+
+  /**
+   * Sets the memory allocator to use when constructing the Arrow vectors from the ResultSet.
+   *
+   * @param allocator the allocator to set.
+   * @exception NullPointerException if <code>allocator</code> is null.
+   */
+  public JdbcToArrowConfigBuilder setAllocator(BaseAllocator allocator) {
+    Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
+    this.allocator = allocator;
+    return this;
+  }
+
+  /**
+   * Sets the {@link Calendar} to use when constructing timestamp fields in the
+   * Arrow schema, and reading time-based fields from the JDBC <code>ResultSet</code>.
+   *
+   * @param calendar the calendar to set.
+   * @exception NullPointerExeption if <code>calendar</code> is <code>null</code>.
+   */
+  public JdbcToArrowConfigBuilder setCalendar(Calendar calendar) {
+    Preconditions.checkNotNull(calendar, "Calendar object can not be null");
+    this.calendar = calendar;
+    return this;
+  }
+
+  /**
+   * This builds the {@link JdbcToArrowConfig} from the provided
+   * {@link BaseAllocator} and {@link Calendar}.
+   *
+   * @return The built {@link JdbcToArrowConfig}
+   * @throws NullPointerException if either the allocator or calendar was not set.
+   */
+  public JdbcToArrowConfig build() {
+    return new JdbcToArrowConfig(allocator, calendar);
+  }
+}
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
index a6798c63cd021..b6adbbc7334a4 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
@@ -38,6 +38,7 @@
 import java.util.Calendar;
 import java.util.List;
 
+import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.BaseFixedWidthVector;
 import org.apache.arrow.vector.BigIntVector;
 import org.apache.arrow.vector.BitVector;
@@ -90,6 +91,21 @@ public class JdbcToArrowUtils {
   private static final int DEFAULT_STREAM_BUFFER_SIZE = 1024;
   private static final int DEFAULT_CLOB_SUBSTRING_READ_SIZE = 256;
 
+  /**
+   * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}.
+   *
+   * @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from.
+   * @param calendar The calendar to use the time zone field of, to construct Timestamp fields from.
+   * @return {@link Schema}
+   * @throws SQLException on error
+   */
+  public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException {
+    Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null");
+    Preconditions.checkNotNull(calendar, "Calendar object can't be null");
+
+    return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar));
+  }
+
   /**
    * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}.
    *
@@ -120,20 +136,14 @@ public class JdbcToArrowUtils {
    * CLOB --> ArrowType.Utf8
    * BLOB --> ArrowType.Binary
    *
-   * <p>If a {@link java.util.Calendar} is set, {@link java.sql.Timestamp} fields in the {@link java.sql.ResultSet} will
-   * be converted to an Arrow {@link org.apache.arrow.vector.TimeStampVector} using the <code>Calendar</code>'s time
-   * zone.  If the <code>Calendar</code> is <code>null</code>, no time zone will be set on the
-   * <code>TimeStampVector</code>.
-   *
-   * @param rsmd ResultSetMetaData
+   * @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from.
+   * @param config The configuration to use when constructing the schema.
    * @return {@link Schema}
    * @throws SQLException on error
    */
-  public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException {
-
+  public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) throws SQLException {
     Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null");
-
-    final String tz = (calendar != null) ? calendar.getTimeZone().getID() : null;
+    Preconditions.checkNotNull(config, "The configuration object must not be null");
 
     List<Field> fields = new ArrayList<>();
     int columnCount = rsmd.getColumnCount();
@@ -184,8 +194,8 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar
           fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Time(TimeUnit.MILLISECOND, 32)), null));
           break;
         case Types.TIMESTAMP:
-          fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, tz)),
-              null));
+          fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND,
+              config.getCalendar().getTimeZone().getID())), null));
           break;
         case Types.BINARY:
         case Types.VARBINARY:
@@ -228,8 +238,8 @@ private static void allocateVectors(VectorSchemaRoot root, int size) {
    * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate
    * the given Arrow Vector objects.
    *
-   * @param rs   ResultSet to use to fetch the data from underlying database
-   * @param root Arrow {@link VectorSchemaRoot} object to populate
+   * @param rs       ResultSet to use to fetch the data from underlying database
+   * @param root     Arrow {@link VectorSchemaRoot} object to populate
    * @param calendar The calendar to use when reading {@link Date}, {@link Time}, or {@link Timestamp}
    *                 data types from the {@link ResultSet}, or <code>null</code> if not converting.
    * @throws SQLException on error
@@ -240,11 +250,32 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen
     Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null");
     Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null");
 
+    jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar));
+  }
+
+  /**
+   * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate
+   * the given Arrow Vector objects.
+   *
+   * @param rs     ResultSet to use to fetch the data from underlying database
+   * @param root   Arrow {@link VectorSchemaRoot} object to populate
+   * @param config The configuration to use when reading the data.
+   * @throws SQLException on error
+   */
+  public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcToArrowConfig config)
+      throws SQLException, IOException {
+
+    Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null");
+    Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null");
+    Preconditions.checkNotNull(config, "JDBC-to-Arrow configuration cannot be null");
+
     ResultSetMetaData rsmd = rs.getMetaData();
     int columnCount = rsmd.getColumnCount();
 
     allocateVectors(root, DEFAULT_BUFFER_SIZE);
 
+    final Calendar calendar = config.getCalendar();
+
     int rowCount = 0;
     while (rs.next()) {
       for (int i = 1; i <= columnCount; i++) {
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java
index a147babc4524d..b1a93291d2be7 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java
@@ -45,7 +45,7 @@ public abstract class AbstractJdbcToArrowTest {
    * @return Table object
    * @throws IOException on error
    */
-  protected static Table getTable(String ymlFilePath, Class clss) throws IOException {
+  protected static Table getTable(String ymlFilePath, @SuppressWarnings("rawtypes") Class clss) throws IOException {
     return new ObjectMapper(new YAMLFactory()).readValue(
             clss.getClassLoader().getResourceAsStream(ymlFilePath), Table.class);
   }
@@ -94,7 +94,7 @@ public void destroy() throws SQLException {
    * @throws ClassNotFoundException on error
    * @throws IOException on error
    */
-  public static Object[][] prepareTestData(String[] testFiles, Class clss)
+  public static Object[][] prepareTestData(String[] testFiles, @SuppressWarnings("rawtypes") Class clss)
       throws SQLException, ClassNotFoundException, IOException {
     Object[][] tableArr = new Object[testFiles.length][];
     int i = 0;
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java
new file mode 100644
index 0000000000000..b4f92fa417026
--- /dev/null
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc;
+
+import static org.junit.Assert.*;
+
+import java.util.Calendar;
+import java.util.Locale;
+import java.util.TimeZone;
+
+import org.apache.arrow.memory.BaseAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Test;
+
+public class JdbcToArrowConfigTest {
+
+  private static final RootAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+  private static final Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT);
+
+  @Test(expected = NullPointerException.class)
+  public void testConfigNullArguments() {
+    new JdbcToArrowConfig(null, null);
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testBuilderNullArguments() {
+    new JdbcToArrowConfigBuilder(null, null);
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testConfigNullCalendar() {
+    new JdbcToArrowConfig(allocator, null);
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testBuilderNullCalendar() {
+    new JdbcToArrowConfigBuilder(allocator, null);
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testConfigNullAllocator() {
+    new JdbcToArrowConfig(null, calendar);
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testBuilderNullAllocator() {
+    new JdbcToArrowConfigBuilder(null, calendar);
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testSetNullAllocator() {
+    JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar);
+    builder.setAllocator(null);
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testSetNullCalendar() {
+    JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar);
+    builder.setCalendar(null);
+  }
+
+  @Test
+  public void testConfig() {
+    JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar);
+    JdbcToArrowConfig config = builder.build();
+
+    assertTrue(allocator == config.getAllocator());
+    assertTrue(calendar == config.getCalendar());
+
+    Calendar newCalendar = Calendar.getInstance();
+    BaseAllocator newAllocator = new RootAllocator(Integer.SIZE);
+
+    builder.setAllocator(newAllocator).setCalendar(newCalendar);
+    config = builder.build();
+
+    assertTrue(newAllocator == config.getAllocator());
+    assertTrue(newCalendar == config.getCalendar());
+  }
+}
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java
index c7dff431da650..d33c07a075e81 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java
@@ -31,6 +31,7 @@
 
 import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
 import org.apache.arrow.adapter.jdbc.JdbcToArrow;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder;
 import org.apache.arrow.adapter.jdbc.Table;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.VarCharVector;
@@ -116,6 +117,13 @@ public void testJdbcToArroValues() throws SQLException, IOException {
         new RootAllocator(Integer.MAX_VALUE)));
     testDataSets(JdbcToArrow.sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
         Calendar.getInstance()));
+    testDataSets(JdbcToArrow.sqlToArrow(
+        conn.createStatement().executeQuery(table.getQuery()),
+        new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()));
+    testDataSets(JdbcToArrow.sqlToArrow(
+        conn,
+        table.getQuery(),
+        new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()));
   }
 
   /**
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
index f6cd7645e0cac..5bdb38ff8be9f 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
@@ -40,6 +40,7 @@
 
 import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
 import org.apache.arrow.adapter.jdbc.JdbcToArrow;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder;
 import org.apache.arrow.adapter.jdbc.Table;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.BigIntVector;
@@ -142,6 +143,13 @@ public void testJdbcToArroValues() throws SQLException, IOException {
     testDataSets(JdbcToArrow.sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
         new RootAllocator(Integer.MAX_VALUE)));
     testDataSets(JdbcToArrow.sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()));
+    testDataSets(JdbcToArrow.sqlToArrow(
+        conn.createStatement().executeQuery(table.getQuery()),
+        new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()));
+    testDataSets(JdbcToArrow.sqlToArrow(
+        conn,
+        table.getQuery(),
+        new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()));
   }
 
   /**
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java
index 7933732f014b0..629bcfeaed304 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java
@@ -27,6 +27,7 @@
 
 import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
 import org.apache.arrow.adapter.jdbc.JdbcToArrow;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder;
 import org.apache.arrow.adapter.jdbc.Table;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.BigIntVector;
@@ -99,6 +100,13 @@ public void testJdbcToArroValues() throws SQLException, IOException {
     testDataSets(JdbcToArrow.sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
         new RootAllocator(Integer.MAX_VALUE)));
     testDataSets(JdbcToArrow.sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()));
+    testDataSets(JdbcToArrow.sqlToArrow(
+        conn.createStatement().executeQuery(table.getQuery()),
+        new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()));
+    testDataSets(JdbcToArrow.sqlToArrow(
+        conn,
+        table.getQuery(),
+        new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()));
   }
 
 
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
index 4cbfeafb0a531..f74e683d7d753 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
@@ -48,6 +48,7 @@
 
 import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
 import org.apache.arrow.adapter.jdbc.JdbcToArrow;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder;
 import org.apache.arrow.adapter.jdbc.Table;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.BigIntVector;
@@ -133,6 +134,13 @@ public void testJdbcToArroValues() throws SQLException, IOException {
         new RootAllocator(Integer.MAX_VALUE)));
     testDataSets(JdbcToArrow.sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
         Calendar.getInstance()));
+    testDataSets(JdbcToArrow.sqlToArrow(
+        conn.createStatement().executeQuery(table.getQuery()),
+        new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()));
+    testDataSets(JdbcToArrow.sqlToArrow(
+        conn,
+        table.getQuery(),
+        new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()));
   }
 
   /**
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java
index 93dc10477f697..fee56c7c07e91 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java
@@ -30,6 +30,7 @@
 
 import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
 import org.apache.arrow.adapter.jdbc.JdbcToArrow;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder;
 import org.apache.arrow.adapter.jdbc.Table;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.DateMilliVector;
@@ -105,6 +106,17 @@ public void testJdbcToArroValues() throws SQLException, IOException {
         new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))));
     testDataSets(JdbcToArrow.sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
         Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))));
+    testDataSets(JdbcToArrow.sqlToArrow(
+        conn.createStatement().executeQuery(table.getQuery()),
+        new JdbcToArrowConfigBuilder(
+            new RootAllocator(Integer.MAX_VALUE),
+            Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))).build()));
+    testDataSets(JdbcToArrow.sqlToArrow(
+        conn,
+        table.getQuery(),
+        new JdbcToArrowConfigBuilder(
+            new RootAllocator(Integer.MAX_VALUE),
+            Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))).build()));
   }
 
   /**
diff --git a/java/dev/logback.xml b/java/adapter/jdbc/src/test/resources/logback.xml
similarity index 84%
rename from java/dev/logback.xml
rename to java/adapter/jdbc/src/test/resources/logback.xml
index 10d54806b184a..4c54d18a210ff 100644
--- a/java/dev/logback.xml
+++ b/java/adapter/jdbc/src/test/resources/logback.xml
@@ -1,3 +1,4 @@
+<?xml version="1.0" encoding="UTF-8" ?>
 <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
   license agreements. See the NOTICE file distributed with this work for additional
   information regarding copyright ownership. The ASF licenses this file to
@@ -9,12 +10,8 @@
   OF ANY KIND, either express or implied. See the License for the specific
   language governing permissions and limitations under the License. -->
 
-<!-- This can be used when running tests with Maven by specifying the following option:
-$ mvn -Dlogback.configurationFile=file:${ARROW_HOME}/java/dev/logback.xml test
--->
-
 <configuration>
-
+  <statusListener class="ch.qos.logback.core.status.NopStatusListener"/>
   <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
     <!-- encoders are assigned the type
          ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
@@ -23,7 +20,9 @@ $ mvn -Dlogback.configurationFile=file:${ARROW_HOME}/java/dev/logback.xml test
     </encoder>
   </appender>
 
-  <root level="ERROR">
+  <logger name="org.apache.arrow" additivity="false">
+    <level value="info" />
     <appender-ref ref="STDOUT" />
-  </root>
+  </logger>
+
 </configuration>
diff --git a/java/flight/pom.xml b/java/flight/pom.xml
index c6de29f8d005d..48939df886fd4 100644
--- a/java/flight/pom.xml
+++ b/java/flight/pom.xml
@@ -11,7 +11,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>0.12.0-SNAPSHOT</version>
+    <version>0.13.0-SNAPSHOT</version>
   </parent>
 
   <artifactId>arrow-flight</artifactId>
@@ -48,19 +48,16 @@
       <groupId>io.grpc</groupId>
       <artifactId>grpc-netty</artifactId>
       <version>${dep.grpc.version}</version>
-      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>io.grpc</groupId>
       <artifactId>grpc-core</artifactId>
       <version>${dep.grpc.version}</version>
-      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>io.grpc</groupId>
       <artifactId>grpc-protobuf</artifactId>
       <version>${dep.grpc.version}</version>
-      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>io.netty</groupId>
@@ -75,11 +72,15 @@
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
     </dependency>
+    <dependency>
+      <groupId>commons-cli</groupId>
+      <artifactId>commons-cli</artifactId>
+      <version>1.4</version>
+    </dependency>
     <dependency>
       <groupId>io.grpc</groupId>
       <artifactId>grpc-stub</artifactId>
       <version>${dep.grpc.version}</version>
-      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>com.google.protobuf</groupId>
@@ -103,6 +104,10 @@
       <groupId>org.slf4j</groupId>
       <artifactId>slf4j-api</artifactId>
     </dependency>
+    <dependency>
+      <groupId>javax.annotation</groupId>
+      <artifactId>javax.annotation-api</artifactId>
+     </dependency>
   </dependencies>
   <build>
     <extensions>
@@ -214,13 +219,50 @@
               <goal>analyze-only</goal>
             </goals>
             <configuration>
-              <ignoredDependencies combine.self="append">
+              <ignoredDependencies combine.children="append">
                 <ignoredDependency>io.netty:netty-tcnative-boringssl-static:*</ignoredDependency>
               </ignoredDependencies>
             </configuration>
           </execution>
         </executions>
       </plugin>
+      <plugin> <!-- add generated sources to classpath -->
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <version>1.9.1</version>
+        <executions>
+          <execution>
+            <id>add-generated-sources-to-classpath</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>add-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>${project.build.directory}/generated-sources/protobuf</source>
+              </sources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <version>3.0.0</version>
+        <configuration>
+          <descriptorRefs>
+            <descriptorRef>jar-with-dependencies</descriptorRef>
+          </descriptorRefs>
+        </configuration>
+        <executions>
+          <execution>
+            <id>make-assembly</id>
+            <phase>package</phase>
+            <goals>
+              <goal>single</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 </project>
diff --git a/java/flight/src/main/java/org/apache/arrow/flight/ArrowMessage.java b/java/flight/src/main/java/org/apache/arrow/flight/ArrowMessage.java
index 9764ff39a4a19..d2f7bb6c713b5 100644
--- a/java/flight/src/main/java/org/apache/arrow/flight/ArrowMessage.java
+++ b/java/flight/src/main/java/org/apache/arrow/flight/ArrowMessage.java
@@ -22,6 +22,8 @@
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 
 import org.apache.arrow.flatbuf.Message;
@@ -52,10 +54,12 @@
 import io.grpc.MethodDescriptor.Marshaller;
 import io.grpc.internal.ReadableBuffer;
 import io.grpc.protobuf.ProtoUtils;
+
 import io.netty.buffer.ArrowBuf;
 import io.netty.buffer.ByteBuf;
 import io.netty.buffer.ByteBufInputStream;
 import io.netty.buffer.CompositeByteBuf;
+import io.netty.buffer.Unpooled;
 
 /**
  * The in-memory representation of FlightData used to manage a stream of Arrow messages.
@@ -95,6 +99,18 @@ public static HeaderType getHeader(byte b) {
 
   }
 
+  // Pre-allocated buffers for padding serialized ArrowMessages.
+  private static List<ByteBuf> PADDING_BUFFERS = Arrays.asList(
+      null,
+      Unpooled.copiedBuffer(new byte[] { 0 }),
+      Unpooled.copiedBuffer(new byte[] { 0, 0 }),
+      Unpooled.copiedBuffer(new byte[] { 0, 0, 0 }),
+      Unpooled.copiedBuffer(new byte[] { 0, 0, 0, 0 }),
+      Unpooled.copiedBuffer(new byte[] { 0, 0, 0, 0, 0 }),
+      Unpooled.copiedBuffer(new byte[] { 0, 0, 0, 0, 0, 0 }),
+      Unpooled.copiedBuffer(new byte[] { 0, 0, 0, 0, 0, 0, 0 })
+  );
+
   private final FlightDescriptor descriptor;
   private final Message message;
   private final List<ArrowBuf> bufs;
@@ -253,8 +269,17 @@ private InputStream asInputStream(BufferAllocator allocator) {
       cos.writeTag(FlightData.DATA_BODY_FIELD_NUMBER, WireFormat.WIRETYPE_LENGTH_DELIMITED);
 
       int size = 0;
+      List<ByteBuf> allBufs = new ArrayList<>();
       for (ArrowBuf b : bufs) {
+        allBufs.add(b);
         size += b.readableBytes();
+        // [ARROW-4213] These buffers must be aligned to an 8-byte boundary in order to be readable from C++.
+        if (b.readableBytes() % 8 != 0) {
+          int paddingBytes = 8 - (b.readableBytes() % 8);
+          assert paddingBytes > 0 && paddingBytes < 8;
+          size += paddingBytes;
+          allBufs.add(PADDING_BUFFERS.get(paddingBytes).retain());
+        }
       }
       // rawvarint is used for length definition.
       cos.writeUInt32NoTag(size);
@@ -263,7 +288,7 @@ private InputStream asInputStream(BufferAllocator allocator) {
       ArrowBuf initialBuf = allocator.buffer(baos.size());
       initialBuf.writeBytes(baos.toByteArray());
       final CompositeByteBuf bb = new CompositeByteBuf(allocator.getAsByteBufAllocator(), true, bufs.size() + 1,
-          ImmutableList.<ByteBuf>builder().add(initialBuf).addAll(bufs).build());
+          ImmutableList.<ByteBuf>builder().add(initialBuf).addAll(allBufs).build());
       final ByteBufInputStream is = new DrainableByteBufInputStream(bb);
       return is;
     } catch (Exception ex) {
diff --git a/java/flight/src/main/java/org/apache/arrow/flight/FlightInfo.java b/java/flight/src/main/java/org/apache/arrow/flight/FlightInfo.java
index 5e7aad178e70d..9accbbe434a10 100644
--- a/java/flight/src/main/java/org/apache/arrow/flight/FlightInfo.java
+++ b/java/flight/src/main/java/org/apache/arrow/flight/FlightInfo.java
@@ -17,13 +17,22 @@
 
 package org.apache.arrow.flight;
 
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
 import java.util.List;
 import java.util.stream.Collectors;
 
 import org.apache.arrow.flight.impl.Flight;
 import org.apache.arrow.flight.impl.Flight.FlightGetInfo;
+import org.apache.arrow.vector.ipc.ReadChannel;
+import org.apache.arrow.vector.ipc.WriteChannel;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
 import org.apache.arrow.vector.types.pojo.Schema;
 
+import com.fasterxml.jackson.databind.util.ByteBufferBackedInputStream;
+
 import com.google.common.collect.ImmutableList;
 import com.google.protobuf.ByteString;
 
@@ -45,8 +54,15 @@ public FlightInfo(Schema schema, FlightDescriptor descriptor, List<FlightEndpoin
   }
 
   FlightInfo(FlightGetInfo flightGetInfo) {
-    schema = flightGetInfo.getSchema().size() > 0 ?
-        Schema.deserialize(flightGetInfo.getSchema().asReadOnlyByteBuffer()) : new Schema(ImmutableList.of());
+    try {
+      final ByteBuffer schemaBuf = flightGetInfo.getSchema().asReadOnlyByteBuffer();
+      schema = flightGetInfo.getSchema().size() > 0 ?
+          MessageSerializer.deserializeSchema(
+              new ReadChannel(Channels.newChannel(new ByteBufferBackedInputStream(schemaBuf))))
+          : new Schema(ImmutableList.of());
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
     descriptor = new FlightDescriptor(flightGetInfo.getFlightDescriptor());
     endpoints = flightGetInfo.getEndpointList().stream().map(t -> new FlightEndpoint(t)).collect(Collectors.toList());
     bytes = flightGetInfo.getTotalBytes();
@@ -74,9 +90,16 @@ public List<FlightEndpoint> getEndpoints() {
   }
 
   FlightGetInfo toProtocol() {
+    // Encode schema in a Message payload
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    try {
+      MessageSerializer.serialize(new WriteChannel(Channels.newChannel(baos)), schema);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
     return Flight.FlightGetInfo.newBuilder()
         .addAllEndpoint(endpoints.stream().map(t -> t.toProtocol()).collect(Collectors.toList()))
-        .setSchema(ByteString.copyFrom(schema.toByteArray()))
+        .setSchema(ByteString.copyFrom(baos.toByteArray()))
         .setFlightDescriptor(descriptor.toProtocol())
         .setTotalBytes(FlightInfo.this.bytes)
         .setTotalRecords(records)
diff --git a/java/flight/src/main/java/org/apache/arrow/flight/FlightService.java b/java/flight/src/main/java/org/apache/arrow/flight/FlightService.java
index 91499123134c3..389497e884d09 100644
--- a/java/flight/src/main/java/org/apache/arrow/flight/FlightService.java
+++ b/java/flight/src/main/java/org/apache/arrow/flight/FlightService.java
@@ -128,7 +128,8 @@ public boolean isCancelled() {
     @Override
     public void start(VectorSchemaRoot root) {
       responseObserver.onNext(new ArrowMessage(null, root.getSchema()));
-      unloader = new VectorUnloader(root, true, false);
+      // [ARROW-4213] We must align buffers to be compatible with other languages.
+      unloader = new VectorUnloader(root, true, true);
     }
 
     @Override
diff --git a/java/flight/src/main/java/org/apache/arrow/flight/FlightStream.java b/java/flight/src/main/java/org/apache/arrow/flight/FlightStream.java
index 5cba7ab47aa30..616b9cdc267a5 100644
--- a/java/flight/src/main/java/org/apache/arrow/flight/FlightStream.java
+++ b/java/flight/src/main/java/org/apache/arrow/flight/FlightStream.java
@@ -178,6 +178,7 @@ public void onNext(ArrowMessage msg) {
     public void onError(Throwable t) {
       ex = t;
       queue.add(DONE_EX);
+      root.setException(t);
     }
 
     @Override
diff --git a/java/flight/src/main/java/org/apache/arrow/flight/example/integration/IntegrationTestClient.java b/java/flight/src/main/java/org/apache/arrow/flight/example/integration/IntegrationTestClient.java
new file mode 100644
index 0000000000000..803a56c6c1afe
--- /dev/null
+++ b/java/flight/src/main/java/org/apache/arrow/flight/example/integration/IntegrationTestClient.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.example.integration;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.arrow.flight.FlightClient;
+import org.apache.arrow.flight.FlightDescriptor;
+import org.apache.arrow.flight.FlightEndpoint;
+import org.apache.arrow.flight.FlightInfo;
+import org.apache.arrow.flight.FlightStream;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+
+/**
+ * An Example Flight Server that provides access to the InMemoryStore.
+ */
+class IntegrationTestClient {
+  private static final org.slf4j.Logger LOGGER = org.slf4j.LoggerFactory.getLogger(IntegrationTestClient.class);
+  private final Options options;
+
+  private IntegrationTestClient() {
+    options = new Options();
+    options.addOption("a", "arrow", true, "arrow file");
+    options.addOption("j", "json", true, "json file");
+    options.addOption("host", true, "The host to connect to.");
+    options.addOption("port", true, "The port to connect to." );
+  }
+
+  public static void main(String[] args) {
+    try {
+      new IntegrationTestClient().run(args);
+    } catch (ParseException e) {
+      fatalError("Invalid parameters", e);
+    } catch (IOException e) {
+      fatalError("Error accessing files", e);
+    }
+  }
+
+  static void fatalError(String message, Throwable e) {
+    System.err.println(message);
+    System.err.println(e.getMessage());
+    LOGGER.error(message, e);
+    System.exit(1);
+  }
+
+  private void run(String[] args) throws ParseException, IOException {
+    CommandLineParser parser = new DefaultParser();
+    CommandLine cmd = parser.parse(options, args, false);
+
+    String fileName = cmd.getOptionValue("arrow");
+    if (fileName == null) {
+      throw new IllegalArgumentException("missing arrow file parameter");
+    }
+    File arrowFile = new File(fileName);
+    if (arrowFile.exists()) {
+      throw new IllegalArgumentException("arrow file already exists: " + arrowFile.getAbsolutePath());
+    }
+
+    final String host = cmd.getOptionValue("host", "localhost");
+    final int port = Integer.parseInt(cmd.getOptionValue("port", "31337"));
+
+    final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+    FlightClient client = new FlightClient(allocator, new Location(host, port));
+    FlightInfo info = client.getInfo(FlightDescriptor.path(cmd.getOptionValue("json")));
+    List<FlightEndpoint> endpoints = info.getEndpoints();
+    if (endpoints.isEmpty()) {
+      throw new RuntimeException("No endpoints returned from Flight server.");
+    }
+
+    FlightStream stream = client.getStream(info.getEndpoints().get(0).getTicket());
+    try (VectorSchemaRoot root = stream.getRoot();
+         FileOutputStream fileOutputStream = new FileOutputStream(arrowFile);
+         ArrowFileWriter arrowWriter = new ArrowFileWriter(root, new DictionaryProvider.MapDictionaryProvider(),
+                 fileOutputStream.getChannel())) {
+      while (stream.next()) {
+        arrowWriter.writeBatch();
+      }
+    }
+  }
+}
diff --git a/java/flight/src/main/java/org/apache/arrow/flight/example/integration/IntegrationTestServer.java b/java/flight/src/main/java/org/apache/arrow/flight/example/integration/IntegrationTestServer.java
new file mode 100644
index 0000000000000..7b45e53a149be
--- /dev/null
+++ b/java/flight/src/main/java/org/apache/arrow/flight/example/integration/IntegrationTestServer.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.example.integration;
+
+import java.io.File;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.concurrent.Callable;
+
+import org.apache.arrow.flight.Action;
+import org.apache.arrow.flight.ActionType;
+import org.apache.arrow.flight.Criteria;
+import org.apache.arrow.flight.FlightDescriptor;
+import org.apache.arrow.flight.FlightEndpoint;
+import org.apache.arrow.flight.FlightInfo;
+import org.apache.arrow.flight.FlightProducer;
+import org.apache.arrow.flight.FlightServer;
+import org.apache.arrow.flight.FlightStream;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.flight.Result;
+import org.apache.arrow.flight.Ticket;
+import org.apache.arrow.flight.auth.ServerAuthHandler;
+import org.apache.arrow.flight.impl.Flight;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.JsonFileReader;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+
+class IntegrationTestServer {
+  private final Options options;
+
+  private IntegrationTestServer() {
+    options = new Options();
+    options.addOption("port", true, "The port to serve on.");
+  }
+
+  private void run(String[] args) throws Exception {
+    CommandLineParser parser = new DefaultParser();
+    CommandLine cmd = parser.parse(options, args, false);
+
+    final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+    final int port = Integer.parseInt(cmd.getOptionValue("port", "31337"));
+    try (final IntegrationFlightProducer producer = new IntegrationFlightProducer(allocator);
+         final FlightServer server = new FlightServer(allocator, port, producer, ServerAuthHandler.NO_OP)) {
+      server.start();
+      // Print out message for integration test script
+      System.out.println("Server listening on localhost:" + server.getPort());
+      while (true) {
+        Thread.sleep(30000);
+      }
+    }
+  }
+
+  public static void main(String[] args) {
+    try {
+      new IntegrationTestServer().run(args);
+    } catch (ParseException e) {
+      IntegrationTestClient.fatalError("Error parsing arguments", e);
+    } catch (Exception e) {
+      IntegrationTestClient.fatalError("Runtime error", e);
+    }
+  }
+
+  static class IntegrationFlightProducer implements FlightProducer, AutoCloseable {
+    private final BufferAllocator allocator;
+
+    IntegrationFlightProducer(BufferAllocator allocator) {
+      this.allocator = allocator;
+    }
+
+    @Override
+    public void close() {
+      allocator.close();
+    }
+
+    @Override
+    public void getStream(Ticket ticket, ServerStreamListener listener) {
+      String path = new String(ticket.getBytes(), StandardCharsets.UTF_8);
+      File inputFile = new File(path);
+      try (JsonFileReader reader = new JsonFileReader(inputFile, allocator)) {
+        Schema schema = reader.start();
+        try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+          listener.start(root);
+          while (reader.read(root)) {
+            listener.putNext();
+          }
+          listener.completed();
+        }
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    @Override
+    public void listFlights(Criteria criteria, StreamListener<FlightInfo> listener) {
+      listener.onCompleted();
+    }
+
+    @Override
+    public FlightInfo getFlightInfo(FlightDescriptor descriptor) {
+      if (descriptor.isCommand()) {
+        throw new UnsupportedOperationException("Commands not supported.");
+      }
+      if (descriptor.getPath().size() < 1) {
+        throw new IllegalArgumentException("Must provide a path.");
+      }
+      String path = descriptor.getPath().get(0);
+      File inputFile = new File(path);
+      try (JsonFileReader reader = new JsonFileReader(inputFile, allocator)) {
+        Schema schema = reader.start();
+        return new FlightInfo(schema, descriptor,
+            Collections.singletonList(new FlightEndpoint(new Ticket(path.getBytes()),
+            new Location("localhost", 31338))),
+            0, 0);
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    @Override
+    public Callable<Flight.PutResult> acceptPut(FlightStream flightStream) {
+      return null;
+    }
+
+    @Override
+    public Result doAction(Action action) {
+      return null;
+    }
+
+    @Override
+    public void listActions(StreamListener<ActionType> listener) {
+      listener.onCompleted();
+    }
+  }
+}
diff --git a/java/flight/src/test/java/org/apache/arrow/flight/TestBackPressure.java b/java/flight/src/test/java/org/apache/arrow/flight/TestBackPressure.java
index 6b23a40f29348..71c90d3a00d47 100644
--- a/java/flight/src/test/java/org/apache/arrow/flight/TestBackPressure.java
+++ b/java/flight/src/test/java/org/apache/arrow/flight/TestBackPressure.java
@@ -29,6 +29,7 @@
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
 import org.junit.Assert;
+import org.junit.Ignore;
 import org.junit.Test;
 
 import com.google.common.collect.ImmutableList;
@@ -78,6 +79,7 @@ public void ensureIndependentSteams() throws Exception {
   /**
    * Make sure that a stream doesn't go faster than the consumer is consuming.
    */
+  @Ignore
   @Test
   public void ensureWaitUntilProceed() throws Exception {
     // request some values.
diff --git a/java/flight/src/test/resources/logback.xml b/java/flight/src/test/resources/logback.xml
index e409e92b6fae1..444b2ed6d8392 100644
--- a/java/flight/src/test/resources/logback.xml
+++ b/java/flight/src/test/resources/logback.xml
@@ -11,6 +11,7 @@
   language governing permissions and limitations under the License. -->
 
 <configuration>
+  <statusListener class="ch.qos.logback.core.status.NopStatusListener"/>
   <appender name="SOCKET"
     class="de.huxhorn.lilith.logback.appender.ClassicMultiplexSocketAppender">
     <Compressing>true</Compressing>
@@ -24,4 +25,4 @@
     <appender-ref ref="FILE" />
   </logger>
 
-</configuration>
\ No newline at end of file
+</configuration>
diff --git a/java/format/pom.xml b/java/format/pom.xml
index d5ccd5ff08be8..2c3dc03acab1d 100644
--- a/java/format/pom.xml
+++ b/java/format/pom.xml
@@ -15,7 +15,7 @@
 <parent>
   <artifactId>arrow-java-root</artifactId>
   <groupId>org.apache.arrow</groupId>
-  <version>0.12.0-SNAPSHOT</version>
+  <version>0.13.0-SNAPSHOT</version>
 </parent>
 
 <artifactId>arrow-format</artifactId>
diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml
index 39752e2d36913..a0901530a612d 100644
--- a/java/gandiva/pom.xml
+++ b/java/gandiva/pom.xml
@@ -16,7 +16,7 @@
     <parent>
       <groupId>org.apache.arrow</groupId>
       <artifactId>arrow-java-root</artifactId>
-      <version>0.12.0-SNAPSHOT</version>
+      <version>0.13.0-SNAPSHOT</version>
     </parent>
 
     <groupId>org.apache.arrow.gandiva</groupId>
@@ -29,7 +29,7 @@
         <protobuf.version>2.5.0</protobuf.version>
         <dep.guava.version>18.0</dep.guava.version>
         <checkstyle.failOnViolation>true</checkstyle.failOnViolation>
-        <gandiva.cpp.build.dir>../../cpp/debug</gandiva.cpp.build.dir>
+        <gandiva.cpp.build.dir>../../cpp/debug/debug</gandiva.cpp.build.dir>
     </properties>
     <dependencies>
         <dependency>
@@ -133,7 +133,7 @@
                 </includes>
             </resource>
             <resource>
-                <directory>${gandiva.cpp.build.dir}</directory>
+                <directory>${gandiva.cpp.build.dir}/../src/gandiva</directory>
                 <includes>
                     <include>irhelpers.bc</include>
                 </includes>
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
index 96788b39e08ec..46deee95fa717 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
@@ -17,8 +17,6 @@
 
 package org.apache.arrow.gandiva.evaluator;
 
-import org.apache.arrow.gandiva.exceptions.GandivaException;
-
 /**
  * Used to construct gandiva configuration objects.
  */
@@ -26,16 +24,6 @@ public class ConfigurationBuilder {
 
   private String byteCodeFilePath = "";
 
-  private static volatile long defaultConfiguration = 0L;
-
-  /**
-   * Ctor - ensure that gandiva is loaded.
-   * @throws GandivaException - if library cannot be loaded.
-   */
-  public ConfigurationBuilder() throws GandivaException {
-    JniWrapper.getInstance();
-  }
-
   public ConfigurationBuilder withByteCodeFilePath(final String byteCodeFilePath) {
     this.byteCodeFilePath = byteCodeFilePath;
     return this;
@@ -45,26 +33,6 @@ public String getByteCodeFilePath() {
     return byteCodeFilePath;
   }
 
-  /**
-   * Get the default configuration to invoke gandiva.
-   * @return default configuration
-   * @throws GandivaException if unable to get native builder instance.
-   */
-  static long getDefaultConfiguration() throws GandivaException {
-    if (defaultConfiguration == 0L) {
-      synchronized (ConfigurationBuilder.class) {
-        if (defaultConfiguration == 0L) {
-          String defaultByteCodeFilePath = JniWrapper.getInstance().getByteCodeFilePath();
-
-          defaultConfiguration = new ConfigurationBuilder()
-            .withByteCodeFilePath(defaultByteCodeFilePath)
-            .buildConfigInstance();
-        }
-      }
-    }
-    return defaultConfiguration;
-  }
-
   public native long buildConfigInstance();
 
   public native void releaseConfigInstance(long configId);
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java
new file mode 100644
index 0000000000000..37dd0f61056b0
--- /dev/null
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.Decimal;
+
+public class DecimalTypeUtil {
+
+  public enum OperationType {
+    ADD,
+    SUBTRACT,
+    MULTIPLY,
+    DIVIDE,
+    MOD
+  }
+
+  private static final int MIN_ADJUSTED_SCALE = 6;
+  /// The maximum precision representable by a 16-byte decimal
+  private static final int MAX_PRECISION = 38;
+
+  public static Decimal getResultTypeForOperation(OperationType operation, Decimal operand1, Decimal
+          operand2) {
+    int s1 = operand1.getScale();
+    int s2 = operand2.getScale();
+    int p1 = operand1.getPrecision();
+    int p2 = operand2.getPrecision();
+    int resultScale = 0;
+    int resultPrecision = 0;
+    switch (operation) {
+      case ADD:
+      case SUBTRACT:
+        resultScale = Math.max(operand1.getScale(), operand2.getScale());
+        resultPrecision = resultScale + Math.max(operand1.getPrecision() - operand1.getScale(),
+                operand2.getPrecision() - operand2.getScale()) + 1;
+        break;
+      case MULTIPLY:
+        resultScale = s1 + s2;
+        resultPrecision = p1 + p2 + 1;
+        break;
+      case DIVIDE:
+        resultScale =
+                Math.max(MIN_ADJUSTED_SCALE, operand1.getScale() + operand2.getPrecision() + 1);
+        resultPrecision =
+                operand1.getPrecision() - operand1.getScale() + operand2.getScale() + resultScale;
+        break;
+      case MOD:
+        resultScale = Math.max(operand1.getScale(), operand2.getScale());
+        resultPrecision = Math.min(operand1.getPrecision() - operand1.getScale(),
+                                    operand2.getPrecision() - operand2.getScale()) +
+                           resultScale;
+        break;
+      default:
+        throw new RuntimeException("Needs support");
+    }
+    return adjustScaleIfNeeded(resultPrecision, resultScale);
+  }
+
+  private static Decimal adjustScaleIfNeeded(int precision, int scale) {
+    if (precision > MAX_PRECISION) {
+      int minScale = Math.min(scale, MIN_ADJUSTED_SCALE);
+      int delta = precision - MAX_PRECISION;
+      precision = MAX_PRECISION;
+      scale = Math.max(scale - delta, minScale);
+    }
+    return new Decimal(precision, scale);
+  }
+
+}
+
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java
index 9c41c1942e9b3..b9986791850a7 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java
@@ -70,7 +70,7 @@ public static ExpressionRegistry getInstance() throws GandivaException {
       synchronized (ExpressionRegistry.class) {
         if (INSTANCE == null) {
           // ensure library is setup.
-          JniWrapper.getInstance();
+          JniLoader.getInstance();
           Set<ArrowType> typesFromGandiva = getSupportedTypesFromGandiva();
           Set<FunctionSignature> functionsFromGandiva = getSupportedFunctionsFromGandiva();
           INSTANCE = new ExpressionRegistry(typesFromGandiva, functionsFromGandiva);
@@ -173,10 +173,11 @@ private static ArrowType getArrowType(ExtGandivaType type) {
                 BIT_WIDTH_64);
       case GandivaType.NONE_VALUE:
         return new ArrowType.Null();
+      case GandivaType.DECIMAL_VALUE:
+        return new ArrowType.Decimal(0,0);
       case GandivaType.FIXED_SIZE_BINARY_VALUE:
       case GandivaType.MAP_VALUE:
       case GandivaType.INTERVAL_VALUE:
-      case GandivaType.DECIMAL_VALUE:
       case GandivaType.DICTIONARY_VALUE:
       case GandivaType.LIST_VALUE:
       case GandivaType.STRUCT_VALUE:
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Filter.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Filter.java
index 25904d3dc1d76..4e9abedadf0f5 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Filter.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Filter.java
@@ -43,11 +43,13 @@ public class Filter {
 
   private static final Logger logger = LoggerFactory.getLogger(Filter.class);
 
+  private final JniWrapper wrapper;
   private final long moduleId;
   private final Schema schema;
   private boolean closed;
 
-  private Filter(long moduleId, Schema schema) {
+  private Filter(JniWrapper wrapper, long moduleId, Schema schema) {
+    this.wrapper = wrapper;
     this.moduleId = moduleId;
     this.schema = schema;
     this.closed = false;
@@ -63,7 +65,7 @@ private Filter(long moduleId, Schema schema) {
    * @return A native filter object that can be used to invoke on a RecordBatch
    */
   public static Filter make(Schema schema, Condition condition) throws GandivaException {
-    return make(schema, condition, ConfigurationBuilder.getDefaultConfiguration());
+    return make(schema, condition, JniLoader.getDefaultConfiguration());
   }
 
   /**
@@ -81,11 +83,11 @@ public static Filter make(Schema schema, Condition condition, long configuration
     // Invoke the JNI layer to create the LLVM module representing the filter.
     GandivaTypes.Condition conditionBuf = condition.toProtobuf();
     GandivaTypes.Schema schemaBuf = ArrowTypeHelper.arrowSchemaToProtobuf(schema);
-    JniWrapper gandivaBridge = JniWrapper.getInstance();
-    long moduleId = gandivaBridge.buildFilter(schemaBuf.toByteArray(),
+    JniWrapper wrapper = JniLoader.getInstance().getWrapper();
+    long moduleId = wrapper.buildFilter(schemaBuf.toByteArray(),
         conditionBuf.toByteArray(), configurationId);
-    logger.info("Created module for the projector with id {}", moduleId);
-    return new Filter(moduleId, schema);
+    logger.debug("Created module for the filter with id {}", moduleId);
+    return new Filter(wrapper, moduleId, schema);
   }
 
   /**
@@ -144,7 +146,7 @@ private void evaluate(int numRows, List<ArrowBuf> buffers, List<ArrowBuffer> buf
       bufSizes[idx++] = bufLayout.getSize();
     }
 
-    int numRecords = JniWrapper.getInstance().evaluateFilter(this.moduleId, numRows,
+    int numRecords = wrapper.evaluateFilter(this.moduleId, numRows,
         bufAddrs, bufSizes,
         selectionVector.getType().getNumber(),
         selectionVector.getBuffer().memoryAddress(), selectionVector.getBuffer().capacity());
@@ -161,7 +163,7 @@ public void close() throws GandivaException {
       return;
     }
 
-    JniWrapper.getInstance().closeFilter(this.moduleId);
+    wrapper.closeFilter(this.moduleId);
     this.closed = true;
   }
 }
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java
new file mode 100644
index 0000000000000..ccb5307049460
--- /dev/null
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import static java.util.UUID.randomUUID;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.StandardCopyOption;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+
+/**
+ * This class handles loading of the jni library, and acts as a bridge for the native functions.
+ */
+class JniLoader {
+  private static final String LIBRARY_NAME = "gandiva_jni";
+  private static final String IRHELPERS_BC = "irhelpers.bc";
+
+  private static volatile JniLoader INSTANCE;
+  private static volatile long defaultConfiguration = 0L;
+
+  private final String byteCodeFilePath;
+  private final JniWrapper wrapper;
+
+  private JniLoader(String byteCodeFilePath) {
+    this.byteCodeFilePath = byteCodeFilePath;
+    this.wrapper = new JniWrapper();
+  }
+
+  static JniLoader getInstance() throws GandivaException {
+    if (INSTANCE == null) {
+      synchronized (JniLoader.class) {
+        if (INSTANCE == null) {
+          INSTANCE = setupInstance();
+        }
+      }
+    }
+    return INSTANCE;
+  }
+
+  private static JniLoader setupInstance() throws GandivaException {
+    try {
+      String tempDir = System.getProperty("java.io.tmpdir");
+      loadGandivaLibraryFromJar(tempDir);
+      File byteCodeFile = moveFileFromJarToTemp(tempDir, IRHELPERS_BC);
+      return new JniLoader(byteCodeFile.getAbsolutePath());
+    } catch (IOException ioException) {
+      throw new GandivaException("unable to create native instance", ioException);
+    }
+  }
+
+  private static void loadGandivaLibraryFromJar(final String tmpDir)
+          throws IOException, GandivaException {
+    final String libraryToLoad = System.mapLibraryName(LIBRARY_NAME);
+    final File libraryFile = moveFileFromJarToTemp(tmpDir, libraryToLoad);
+    System.load(libraryFile.getAbsolutePath());
+  }
+
+
+  private static File moveFileFromJarToTemp(final String tmpDir, String libraryToLoad)
+          throws IOException, GandivaException {
+    final File temp = setupFile(tmpDir, libraryToLoad);
+    try (final InputStream is = JniLoader.class.getClassLoader()
+            .getResourceAsStream(libraryToLoad)) {
+      if (is == null) {
+        throw new GandivaException(libraryToLoad + " was not found inside JAR.");
+      } else {
+        Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING);
+      }
+    }
+    return temp;
+  }
+
+  private static File setupFile(String tmpDir, String libraryToLoad)
+          throws IOException, GandivaException {
+    // accommodate multiple processes running with gandiva jar.
+    // length should be ok since uuid is only 36 characters.
+    final String randomizeFileName = libraryToLoad + randomUUID();
+    final File temp = new File(tmpDir, randomizeFileName);
+    if (temp.exists() && !temp.delete()) {
+      throw new GandivaException("File: " + temp.getAbsolutePath() +
+          " already exists and cannot be removed.");
+    }
+    if (!temp.createNewFile()) {
+      throw new GandivaException("File: " + temp.getAbsolutePath() +
+          " could not be created.");
+    }
+    temp.deleteOnExit();
+    return temp;
+  }
+
+  /**
+   * Returns the byte code file path extracted from jar.
+   */
+  public String getByteCodeFilePath() {
+    return byteCodeFilePath;
+  }
+
+  /**
+   * Returns the jni wrapper.
+   */
+  JniWrapper getWrapper() throws GandivaException {
+    return wrapper;
+  }
+
+  /**
+   * Get the default configuration to invoke gandiva.
+   * @return default configuration
+   * @throws GandivaException if unable to get native builder instance.
+   */
+  static long getDefaultConfiguration() throws GandivaException {
+    if (defaultConfiguration == 0L) {
+      synchronized (ConfigurationBuilder.class) {
+        if (defaultConfiguration == 0L) {
+          String defaultByteCodeFilePath = JniLoader.getInstance().getByteCodeFilePath();
+
+          defaultConfiguration = new ConfigurationBuilder()
+              .withByteCodeFilePath(defaultByteCodeFilePath)
+              .buildConfigInstance();
+        }
+      }
+    }
+    return defaultConfiguration;
+  }
+}
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
index eea42f6976ce4..f00b0fbb9151a 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
@@ -17,100 +17,15 @@
 
 package org.apache.arrow.gandiva.evaluator;
 
-import static java.util.UUID.randomUUID;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.file.Files;
-import java.nio.file.StandardCopyOption;
-
 import org.apache.arrow.gandiva.exceptions.GandivaException;
 
 /**
  * This class is implemented in JNI. This provides the Java interface
- * to invoke functions in JNI
+ * to invoke functions in JNI.
+ * This file is used to generated the .h files required for jni. Avoid all
+ * external dependencies in this file.
  */
-class JniWrapper {
-  private static final String LIBRARY_NAME = "gandiva_jni";
-  private static final String IRHELPERS_BC = "irhelpers.bc";
-
-  private static volatile JniWrapper INSTANCE;
-
-  private final String byteCodeFilePath;
-
-  private JniWrapper(String byteCodeFilePath) {
-    this.byteCodeFilePath = byteCodeFilePath;
-  }
-
-  static JniWrapper getInstance() throws GandivaException {
-    if (INSTANCE == null) {
-      synchronized (JniWrapper.class) {
-        if (INSTANCE == null) {
-          INSTANCE = setupInstance();
-        }
-      }
-    }
-    return INSTANCE;
-  }
-
-  private static JniWrapper setupInstance() throws GandivaException {
-    try {
-      String tempDir = System.getProperty("java.io.tmpdir");
-      loadGandivaLibraryFromJar(tempDir);
-      File byteCodeFile = moveFileFromJarToTemp(tempDir, IRHELPERS_BC);
-      return new JniWrapper(byteCodeFile.getAbsolutePath());
-    } catch (IOException ioException) {
-      throw new GandivaException("unable to create native instance", ioException);
-    }
-  }
-
-  private static void loadGandivaLibraryFromJar(final String tmpDir)
-          throws IOException, GandivaException {
-    final String libraryToLoad = System.mapLibraryName(LIBRARY_NAME);
-    final File libraryFile = moveFileFromJarToTemp(tmpDir, libraryToLoad);
-    System.load(libraryFile.getAbsolutePath());
-  }
-
-
-  private static File moveFileFromJarToTemp(final String tmpDir, String libraryToLoad)
-          throws IOException, GandivaException {
-    final File temp = setupFile(tmpDir, libraryToLoad);
-    try (final InputStream is = JniWrapper.class.getClassLoader()
-            .getResourceAsStream(libraryToLoad)) {
-      if (is == null) {
-        throw new GandivaException(libraryToLoad + " was not found inside JAR.");
-      } else {
-        Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING);
-      }
-    }
-    return temp;
-  }
-
-  private static File setupFile(String tmpDir, String libraryToLoad)
-          throws IOException, GandivaException {
-    // accommodate multiple processes running with gandiva jar.
-    // length should be ok since uuid is only 36 characters.
-    final String randomizeFileName = libraryToLoad + randomUUID();
-    final File temp = new File(tmpDir, randomizeFileName);
-    if (temp.exists() && !temp.delete()) {
-      throw new GandivaException("File: " + temp.getAbsolutePath() +
-          " already exists and cannot be removed.");
-    }
-    if (!temp.createNewFile()) {
-      throw new GandivaException("File: " + temp.getAbsolutePath() +
-          " could not be created.");
-    }
-    temp.deleteOnExit();
-    return temp;
-  }
-
-  /**
-   * Returns the byte code file path extracted from jar.
-   */
-  public String getByteCodeFilePath() {
-    return byteCodeFilePath;
-  }
+public class JniWrapper {
 
   /**
    * Generates the projector module to evaluate the expressions with
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java
index d7578936b3d83..d13195ca4d391 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java
@@ -46,12 +46,14 @@ public class Projector {
   private static final org.slf4j.Logger logger =
           org.slf4j.LoggerFactory.getLogger(Projector.class);
 
+  private JniWrapper wrapper;
   private final long moduleId;
   private final Schema schema;
   private final int numExprs;
   private boolean closed;
 
-  private Projector(long moduleId, Schema schema, int numExprs) {
+  private Projector(JniWrapper wrapper, long moduleId, Schema schema, int numExprs) {
+    this.wrapper = wrapper;
     this.moduleId = moduleId;
     this.schema = schema;
     this.numExprs = numExprs;
@@ -71,7 +73,7 @@ private Projector(long moduleId, Schema schema, int numExprs) {
    */
   public static Projector make(Schema schema, List<ExpressionTree> exprs)
           throws GandivaException {
-    return make(schema, exprs, ConfigurationBuilder.getDefaultConfiguration());
+    return make(schema, exprs, JniLoader.getDefaultConfiguration());
   }
 
   /**
@@ -96,11 +98,11 @@ public static Projector make(Schema schema, List<ExpressionTree> exprs, long
 
     // Invoke the JNI layer to create the LLVM module representing the expressions
     GandivaTypes.Schema schemaBuf = ArrowTypeHelper.arrowSchemaToProtobuf(schema);
-    JniWrapper gandivaBridge = JniWrapper.getInstance();
-    long moduleId = gandivaBridge.buildProjector(schemaBuf.toByteArray(), builder.build()
-            .toByteArray(), configurationId);
-    logger.info("Created module for the projector with id {}", moduleId);
-    return new Projector(moduleId, schema, exprs.size());
+    JniWrapper wrapper = JniLoader.getInstance().getWrapper();
+    long moduleId = wrapper.buildProjector(schemaBuf.toByteArray(),
+        builder.build().toByteArray(), configurationId);
+    logger.debug("Created module for the projector with id {}", moduleId);
+    return new Projector(wrapper, moduleId, schema, exprs.size());
   }
 
   /**
@@ -175,9 +177,7 @@ private void evaluate(int numRows, List<ArrowBuf> buffers, List<ArrowBuffer> buf
       valueVector.setValueCount(numRows);
     }
 
-    JniWrapper.getInstance().evaluateProjector(this.moduleId, numRows,
-            bufAddrs, bufSizes,
-            outAddrs, outSizes);
+    wrapper.evaluateProjector(this.moduleId, numRows, bufAddrs, bufSizes, outAddrs, outSizes);
   }
 
   /**
@@ -188,7 +188,7 @@ public void close() throws GandivaException {
       return;
     }
 
-    JniWrapper.getInstance().closeProjector(this.moduleId);
+    wrapper.closeProjector(this.moduleId);
     this.closed = true;
   }
 }
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DecimalNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DecimalNode.java
new file mode 100644
index 0000000000000..1b908b9962fb3
--- /dev/null
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DecimalNode.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import java.nio.charset.Charset;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+
+import com.google.protobuf.ByteString;
+
+
+/**
+ * Used to represent expression tree nodes representing decimal constants.
+ * Used in the expression (x + 5.0)
+ */
+class DecimalNode implements TreeNode {
+  private final String value;
+  private final int precision;
+  private final int scale;
+
+  DecimalNode(String value, int precision, int scale) {
+    this.value = value;
+    this.precision = precision;
+    this.scale = scale;
+  }
+
+  @Override
+  public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
+    GandivaTypes.DecimalNode.Builder decimalNode = GandivaTypes.DecimalNode.newBuilder();
+    decimalNode.setValue(value);
+    decimalNode.setPrecision(precision);
+    decimalNode.setScale(scale);
+
+    GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder();
+    builder.setDecimalNode(decimalNode.build());
+    return builder.build();
+  }
+}
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
index f5568591c2002..a220c547e44a6 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
@@ -55,6 +55,10 @@ public static TreeNode makeBinaryLiteral(byte[] binaryConstant) {
     return new BinaryNode(binaryConstant);
   }
 
+  public static TreeNode makeDecimalLiteral(String decimalConstant, int precision, int scale) {
+    return new DecimalNode(decimalConstant, precision, scale);
+  }
+
   /**
    * create a null literal.
    */
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java
index aeb3d418a70ac..97c2883c58e5e 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java
@@ -17,6 +17,8 @@
 
 package org.apache.arrow.gandiva.evaluator;
 
+import java.math.BigDecimal;
+import java.math.BigInteger;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Random;
@@ -27,6 +29,7 @@
 import org.apache.arrow.gandiva.expression.ExpressionTree;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.DecimalVector;
 import org.apache.arrow.vector.IntVector;
 import org.apache.arrow.vector.ValueVector;
 import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
@@ -229,6 +232,18 @@ ArrowBuf intBuf(int[] ints) {
     return buffer;
   }
 
+  DecimalVector decimalVector(String[] values, int precision, int scale) {
+    DecimalVector vector = new DecimalVector("decimal" + Math.random(), allocator, precision, scale);
+    vector.allocateNew();
+    for (int i = 0; i < values.length; i++) {
+      BigDecimal decimal = new BigDecimal(values[i]);
+      vector.setSafe(i, decimal);
+    }
+
+    vector.setValueCount(values.length);
+    return vector;
+  }
+
   ArrowBuf longBuf(long[] longs) {
     ArrowBuf buffer = allocator.buffer(longs.length * 8);
     for (int i = 0; i < longs.length; i++) {
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtilTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtilTest.java
new file mode 100644
index 0000000000000..4a4fb82951c16
--- /dev/null
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtilTest.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class DecimalTypeUtilTest {
+
+  @Test
+  public void testOutputTypesForAdd() {
+    ArrowType.Decimal operand1 = getDecimal(30, 10);
+    ArrowType.Decimal operand2 = getDecimal(30, 10);
+    ArrowType.Decimal resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+            .OperationType.ADD, operand1, operand2);
+    Assert.assertTrue(getDecimal(31, 10).equals(resultType));
+
+    operand1 = getDecimal(30, 6);
+    operand2 = getDecimal(30, 5);
+    resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+            .OperationType.ADD, operand1, operand2);
+    Assert.assertTrue(getDecimal(32, 6).equals(resultType));
+
+    operand1 = getDecimal(30, 10);
+    operand2 = getDecimal(38, 10);
+    resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+            .OperationType.ADD, operand1, operand2);
+    Assert.assertTrue(getDecimal(38, 9).equals(resultType));
+
+    operand1 = getDecimal(38, 10);
+    operand2 = getDecimal(38, 38);
+    resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+            .OperationType.ADD, operand1, operand2);
+    Assert.assertTrue(getDecimal(38, 9).equals(resultType));
+
+    operand1 = getDecimal(38, 10);
+    operand2 = getDecimal(38, 2);
+    resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+            .OperationType.ADD, operand1, operand2);
+    Assert.assertTrue(getDecimal(38, 6).equals(resultType));
+
+  }
+
+  @Test
+  public void testOutputTypesForMultiply() {
+    ArrowType.Decimal operand1 = getDecimal(30, 10);
+    ArrowType.Decimal operand2 = getDecimal(30, 10);
+    ArrowType.Decimal resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+                    .OperationType.MULTIPLY, operand1, operand2);
+    Assert.assertTrue(getDecimal(38, 6).equals(resultType));
+
+    operand1 = getDecimal(38, 10);
+    operand2 = getDecimal(9, 2);
+    resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+            .OperationType.MULTIPLY, operand1, operand2);
+    Assert.assertTrue(getDecimal(38, 6).equals(resultType));
+
+  }
+
+  @Test
+  public void testOutputTypesForMod() {
+    ArrowType.Decimal operand1 = getDecimal(30, 10);
+    ArrowType.Decimal operand2 = getDecimal(28  , 7);
+    ArrowType.Decimal resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+                    .OperationType.MOD, operand1, operand2);
+    Assert.assertTrue(getDecimal(30, 10).equals(resultType));
+  }
+
+  private ArrowType.Decimal getDecimal(int precision, int scale) {
+    return new ArrowType.Decimal(precision, scale);
+  }
+
+}
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/MicroBenchmarkTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/MicroBenchmarkTest.java
index cd297034df80f..6934c3f9e7d1a 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/MicroBenchmarkTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/MicroBenchmarkTest.java
@@ -26,10 +26,12 @@
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
 import org.junit.Assert;
+import org.junit.Ignore;
 import org.junit.Test;
 
 import com.google.common.collect.Lists;
 
+@Ignore
 public class MicroBenchmarkTest extends BaseEvaluatorTest {
 
   private double toleranceRatio = 4.0;
@@ -58,7 +60,7 @@ public void testAdd3() throws Exception {
         1 * MILLION, 16 * THOUSAND,
         4);
     System.out.println("Time taken for projecting 1m records of add3 is " + timeTaken + "ms");
-    Assert.assertTrue(timeTaken <= 10 * toleranceRatio);
+    Assert.assertTrue(timeTaken <= 13 * toleranceRatio);
   }
 
   @Test
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java
new file mode 100644
index 0000000000000..a3a0b4818ac22
--- /dev/null
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.expression.ExpressionTree;
+import org.apache.arrow.gandiva.expression.TreeBuilder;
+import org.apache.arrow.gandiva.expression.TreeNode;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class ProjectorDecimalTest extends org.apache.arrow.gandiva.evaluator.BaseEvaluatorTest {
+
+  @Test
+  public void test_add() throws GandivaException {
+    int precision = 38;
+    int scale = 8;
+    ArrowType.Decimal decimal = new ArrowType.Decimal(precision, scale);
+    Field a = Field.nullable("a", decimal);
+    Field b = Field.nullable("b", decimal);
+    List<Field> args = Lists.newArrayList(a, b);
+
+    ArrowType.Decimal outputType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+            .OperationType.ADD, decimal, decimal);
+    Field retType = Field.nullable("c", outputType);
+    ExpressionTree root = TreeBuilder.makeExpression("add", args, retType);
+
+    List<ExpressionTree> exprs = Lists.newArrayList(root);
+
+    Schema schema = new Schema(args);
+    Projector eval = Projector.make(schema, exprs);
+
+    int numRows = 4;
+    byte[] validity = new byte[]{(byte) 255};
+    String[] aValues = new String[]{"1.12345678","2.12345678","3.12345678","4.12345678"};
+    String[] bValues = new String[]{"2.12345678","3.12345678","4.12345678","5.12345678"};
+
+    DecimalVector valuesa = decimalVector(aValues, precision, scale);
+    DecimalVector valuesb = decimalVector(bValues, precision, scale);
+    ArrowRecordBatch batch =
+            new ArrowRecordBatch(
+                    numRows,
+                    Lists.newArrayList(new ArrowFieldNode(numRows, 0), new ArrowFieldNode(numRows, 0)),
+                    Lists.newArrayList(valuesa.getValidityBuffer(), valuesa.getDataBuffer(),
+                            valuesb.getValidityBuffer(), valuesb.getDataBuffer()));
+
+    DecimalVector outVector = new DecimalVector("decimal_output", allocator, outputType.getPrecision(),
+            outputType.getScale());
+    outVector.allocateNew(numRows);
+
+    List<ValueVector> output = new ArrayList<ValueVector>();
+    output.add(outVector);
+    eval.evaluate(batch, output);
+
+    // should have scaled down.
+    BigDecimal[] expOutput = new BigDecimal[]{BigDecimal.valueOf(3.2469136),
+                                              BigDecimal.valueOf(5.2469136),
+                                              BigDecimal.valueOf(7.2469136),
+                                              BigDecimal.valueOf(9.2469136)};
+
+    for (int i = 0; i < 4; i++) {
+      assertFalse(outVector.isNull(i));
+      assertTrue("index : " + i + " failed compare", expOutput[i].compareTo(outVector.getObject(i)
+      ) == 0);
+    }
+
+    // free buffers
+    releaseRecordBatch(batch);
+    releaseValueVectors(output);
+    eval.close();
+  }
+
+  @Test
+  public void test_add_literal() throws GandivaException {
+    int precision = 2;
+    int scale = 0;
+    ArrowType.Decimal decimal = new ArrowType.Decimal(precision, scale);
+    ArrowType.Decimal literalType = new ArrowType.Decimal(2, 1);
+    Field a = Field.nullable("a", decimal);
+
+    ArrowType.Decimal outputType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+            .OperationType.ADD, decimal, literalType);
+    Field retType = Field.nullable("c", outputType);
+    TreeNode field = TreeBuilder.makeField(a);
+    TreeNode literal = TreeBuilder.makeDecimalLiteral("6", 2, 1);
+    List<TreeNode> args = Lists.newArrayList(field, literal);
+    TreeNode root = TreeBuilder.makeFunction("add", args, outputType);
+    ExpressionTree tree = TreeBuilder.makeExpression(root, retType);
+
+    List<ExpressionTree> exprs = Lists.newArrayList(tree);
+
+    Schema schema = new Schema(Lists.newArrayList(a));
+    Projector eval = Projector.make(schema, exprs);
+
+    int numRows = 4;
+    String[] aValues = new String[]{"1", "2", "3", "4"};
+
+    DecimalVector valuesa = decimalVector(aValues, precision, scale);
+    ArrowRecordBatch batch =
+            new ArrowRecordBatch(
+                    numRows,
+                    Lists.newArrayList(new ArrowFieldNode(numRows, 0)),
+                    Lists.newArrayList(valuesa.getValidityBuffer(), valuesa.getDataBuffer()));
+
+    DecimalVector outVector = new DecimalVector("decimal_output", allocator, outputType.getPrecision(),
+            outputType.getScale());
+    outVector.allocateNew(numRows);
+
+    List<ValueVector> output = new ArrayList<ValueVector>();
+    output.add(outVector);
+    eval.evaluate(batch, output);
+
+    BigDecimal[] expOutput = new BigDecimal[]{BigDecimal.valueOf(1.6), BigDecimal.valueOf(2.6),
+            BigDecimal.valueOf(3.6), BigDecimal.valueOf(4.6)};
+
+    for (int i = 0; i < 4; i++) {
+      assertFalse(outVector.isNull(i));
+      assertTrue(expOutput[i].compareTo(outVector.getObject(i)) == 0);
+    }
+
+    // free buffers
+    releaseRecordBatch(batch);
+    releaseValueVectors(output);
+    eval.close();
+  }
+}
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
index 7c56f21741db2..d333556d2d7d2 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
@@ -131,6 +131,8 @@ public void testMakeProjectorParallel() throws GandivaException, InterruptedExce
     executors.awaitTermination(100, java.util.concurrent.TimeUnit.SECONDS);
   }
 
+  // Will be fixed by https://issues.apache.org/jira/browse/ARROW-4371
+  @Ignore
   @Test
   public void testMakeProjector() throws GandivaException {
     Field a = Field.nullable("a", int64);
diff --git a/java/gandiva/src/test/resources/logback.xml b/java/gandiva/src/test/resources/logback.xml
index 9e23b555feec3..f9e449fa67b2e 100644
--- a/java/gandiva/src/test/resources/logback.xml
+++ b/java/gandiva/src/test/resources/logback.xml
@@ -11,9 +11,18 @@
   language governing permissions and limitations under the License. -->
 
 <configuration>
+  <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+    <!-- encoders are assigned the type
+         ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
+    <encoder>
+      <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
+    </encoder>
+  </appender>
+
+  <statusListener class="ch.qos.logback.core.status.NopStatusListener"/>
   <logger name="org.apache.arrow" additivity="false">
     <level value="info" />
-    <appender-ref ref="FILE" />
+    <appender-ref ref="STDOUT" />
   </logger>
 
 </configuration>
diff --git a/java/memory/pom.xml b/java/memory/pom.xml
index 7e2c42d997ee1..b882382e6f0b8 100644
--- a/java/memory/pom.xml
+++ b/java/memory/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>0.12.0-SNAPSHOT</version>
+    <version>0.13.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-memory</artifactId>
   <name>Arrow Memory</name>
diff --git a/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java b/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java
index bfe97015f1218..5d37df59acfb1 100644
--- a/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java
+++ b/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java
@@ -791,7 +791,7 @@ protected short _getShort(int index) {
   }
 
   /**
-   * @see  {@link #getShortLE(int)}.
+   * @see ArrowBuf#getShortLE(int).
    */
   @Override
   protected short _getShortLE(int index) {
@@ -804,7 +804,7 @@ protected int _getInt(int index) {
   }
 
   /**
-   * @see  {@link #getIntLE(int)}.
+   * @see ArrowBuf#getIntLE(int).
    */
   @Override
   protected int _getIntLE(int index) {
@@ -812,7 +812,7 @@ protected int _getIntLE(int index) {
   }
 
   /**
-   * @see  {@link #getUnsignedMedium(int)}.
+   * @see ArrowBuf#getUnsignedMedium(int).
    */
   @Override
   protected int _getUnsignedMedium(int index) {
@@ -820,7 +820,7 @@ protected int _getUnsignedMedium(int index) {
   }
 
   /**
-   * @see  {@link #getUnsignedMediumLE(int)}.
+   * @see ArrowBuf#getUnsignedMediumLE(int).
    */
   @Override
   protected int _getUnsignedMediumLE(int index) {
@@ -833,7 +833,7 @@ protected long _getLong(int index) {
   }
 
   /**
-   * @see  {@link #getLongLE(int)}.
+   * @see ArrowBuf#getLongLE(int).
    */
   @Override
   protected long _getLongLE(int index) {
@@ -851,7 +851,7 @@ protected void _setShort(int index, int value) {
   }
 
   /**
-   * @see  {@link #setShortLE(int, int)}.
+   * @see ArrowBuf#setShortLE(int, int).
    */
   @Override
   protected void _setShortLE(int index, int value) {
@@ -864,7 +864,7 @@ protected void _setMedium(int index, int value) {
   }
 
   /**
-   * @see  {@link #setMediumLE(int, int)}.
+   * @see ArrowBuf#setMediumLE(int, int).
    */
   @Override
   protected void _setMediumLE(int index, int value) {
@@ -877,7 +877,7 @@ protected void _setInt(int index, int value) {
   }
 
   /**
-   * @see  {@link #setIntLE(int, int)}.
+   * @see ArrowBuf#setIntLE(int, int).
    */
   @Override
   protected void _setIntLE(int index, int value) {
@@ -890,7 +890,7 @@ protected void _setLong(int index, long value) {
   }
 
   /**
-   * @see  {@link #setLongLE(int, long)}.
+   * @see ArrowBuf#setLongLE(int, long).
    */
   @Override
   public void _setLongLE(int index, long value) {
diff --git a/java/memory/src/main/java/org/apache/arrow/memory/AllocationManager.java b/java/memory/src/main/java/org/apache/arrow/memory/AllocationManager.java
index aaa1f506fb5c2..c10d246013290 100644
--- a/java/memory/src/main/java/org/apache/arrow/memory/AllocationManager.java
+++ b/java/memory/src/main/java/org/apache/arrow/memory/AllocationManager.java
@@ -22,11 +22,8 @@
 import java.util.IdentityHashMap;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
-import java.util.concurrent.locks.ReadWriteLock;
-import java.util.concurrent.locks.ReentrantReadWriteLock;
 
 import org.apache.arrow.memory.BaseAllocator.Verbosity;
-import org.apache.arrow.memory.util.AutoCloseableLock;
 import org.apache.arrow.memory.util.HistoricalLog;
 import org.apache.arrow.util.Preconditions;
 
@@ -73,9 +70,6 @@ public class AllocationManager {
   // ARROW-1627 Trying to minimize memory overhead caused by previously used IdentityHashMap
   // see JIRA for details
   private final LowCostIdentityHashMap<BaseAllocator, BufferLedger> map = new LowCostIdentityHashMap<>();
-  private final ReadWriteLock lock = new ReentrantReadWriteLock();
-  private final AutoCloseableLock readLock = new AutoCloseableLock(lock.readLock());
-  private final AutoCloseableLock writeLock = new AutoCloseableLock(lock.writeLock());
   private final long amCreationTime = System.nanoTime();
 
   private volatile BufferLedger owningLedger;
@@ -115,9 +109,8 @@ private BufferLedger associate(final BaseAllocator allocator, final boolean reta
           "A buffer can only be associated between two allocators that share the same root.");
     }
 
-    try (AutoCloseableLock read = readLock.open()) {
-
-      final BufferLedger ledger = map.get(allocator);
+    synchronized (this) {
+      BufferLedger ledger = map.get(allocator);
       if (ledger != null) {
         if (retain) {
           ledger.inc();
@@ -125,20 +118,7 @@ private BufferLedger associate(final BaseAllocator allocator, final boolean reta
         return ledger;
       }
 
-    }
-    try (AutoCloseableLock write = writeLock.open()) {
-      // we have to recheck existing ledger since a second reader => writer could be competing
-      // with us.
-
-      final BufferLedger existingLedger = map.get(allocator);
-      if (existingLedger != null) {
-        if (retain) {
-          existingLedger.inc();
-        }
-        return existingLedger;
-      }
-
-      final BufferLedger ledger = new BufferLedger(allocator);
+      ledger = new BufferLedger(allocator);
       if (retain) {
         ledger.inc();
       }
@@ -153,7 +133,7 @@ private BufferLedger associate(final BaseAllocator allocator, final boolean reta
    * The way that a particular BufferLedger communicates back to the AllocationManager that it
    * now longer needs to hold
    * a reference to particular piece of memory.
-   * Can only be called when you already hold the writeLock.
+   * Can only be called when you already hold the lock.
    */
   private void release(final BufferLedger ledger) {
     final BaseAllocator allocator = ledger.getAllocator();
@@ -250,7 +230,7 @@ public boolean transferBalance(final BufferLedger target) {
       // since two balance transfers out from the allocator manager could cause incorrect
       // accounting, we need to ensure
       // that this won't happen by synchronizing on the allocator manager instance.
-      try (AutoCloseableLock write = writeLock.open()) {
+      synchronized (AllocationManager.this) {
         if (owningLedger != this) {
           return true;
         }
@@ -330,7 +310,7 @@ public int decrement(int decrement) {
       allocator.assertOpen();
 
       final int outcome;
-      try (AutoCloseableLock write = writeLock.open()) {
+      synchronized (AllocationManager.this) {
         outcome = bufRefCnt.addAndGet(-decrement);
         if (outcome == 0) {
           lDestructionTime = System.nanoTime();
@@ -431,7 +411,7 @@ public int getSize() {
      * @return Amount of accounted(owned) memory associated with this ledger.
      */
     public int getAccountedSize() {
-      try (AutoCloseableLock read = readLock.open()) {
+      synchronized (AllocationManager.this) {
         if (owningLedger == this) {
           return size;
         } else {
diff --git a/java/memory/src/test/resources/logback.xml b/java/memory/src/test/resources/logback.xml
new file mode 100644
index 0000000000000..4c54d18a210ff
--- /dev/null
+++ b/java/memory/src/test/resources/logback.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+  license agreements. See the NOTICE file distributed with this work for additional
+  information regarding copyright ownership. The ASF licenses this file to
+  You under the Apache License, Version 2.0 (the "License"); you may not use
+  this file except in compliance with the License. You may obtain a copy of
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+  by applicable law or agreed to in writing, software distributed under the
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+  OF ANY KIND, either express or implied. See the License for the specific
+  language governing permissions and limitations under the License. -->
+
+<configuration>
+  <statusListener class="ch.qos.logback.core.status.NopStatusListener"/>
+  <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+    <!-- encoders are assigned the type
+         ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
+    <encoder>
+      <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
+    </encoder>
+  </appender>
+
+  <logger name="org.apache.arrow" additivity="false">
+    <level value="info" />
+    <appender-ref ref="STDOUT" />
+  </logger>
+
+</configuration>
diff --git a/java/plasma/pom.xml b/java/plasma/pom.xml
index d50171a309155..8c51fa2554557 100644
--- a/java/plasma/pom.xml
+++ b/java/plasma/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>0.12.0-SNAPSHOT</version>
+        <version>0.13.0-SNAPSHOT</version>
     </parent>
     <artifactId>arrow-plasma</artifactId>
     <name>Arrow Plasma Client</name>
diff --git a/java/plasma/src/main/java/org/apache/arrow/plasma/ObjectStoreLink.java b/java/plasma/src/main/java/org/apache/arrow/plasma/ObjectStoreLink.java
index 3b67bc08ecfdc..f933c85b8365f 100644
--- a/java/plasma/src/main/java/org/apache/arrow/plasma/ObjectStoreLink.java
+++ b/java/plasma/src/main/java/org/apache/arrow/plasma/ObjectStoreLink.java
@@ -19,6 +19,9 @@
 
 import java.util.List;
 
+import org.apache.arrow.plasma.exceptions.DuplicateObjectException;
+import org.apache.arrow.plasma.exceptions.PlasmaOutOfMemoryException;
+
 /**
  * Object store interface, which provides the capabilities to put and get raw byte array, and serves.
  */
@@ -42,7 +45,8 @@ class ObjectStoreData {
    * @param value The value to put in the object store.
    * @param metadata encodes whatever metadata the user wishes to encode.
    */
-  void put(byte[] objectId, byte[] value, byte[] metadata);
+  void put(byte[] objectId, byte[] value, byte[] metadata)
+          throws DuplicateObjectException, PlasmaOutOfMemoryException;
 
   /**
    * Get a buffer from the PlasmaStore based on the <tt>objectId</tt>.
@@ -79,16 +83,6 @@ default byte[] get(byte[] objectId, int timeoutMs, boolean isMetadata) {
    */
   List<ObjectStoreData> get(byte[][] objectIds, int timeoutMs);
 
-  /**
-   * Wait until <tt>numReturns</tt> objects in <tt>objectIds</tt> are ready.
-   *
-   * @param objectIds List of object IDs to wait for.
-   * @param timeoutMs Return to the caller after <tt>timeoutMs</tt> milliseconds.
-   * @param numReturns We are waiting for this number of objects to be ready.
-   * @return List of object IDs that are ready
-   */
-  List<byte[]> wait(byte[][] objectIds, int timeoutMs, int numReturns);
-
   /**
    * Compute the hash of an object in the object store.
    *
@@ -98,23 +92,6 @@ default byte[] get(byte[] objectId, int timeoutMs, boolean isMetadata) {
    */
   byte[] hash(byte[] objectId);
 
-  /**
-   * Fetch the object with the given ID from other plasma manager instances.
-   *
-   * @param objectId The object ID used to identify the object.
-   */
-  default void fetch(byte[] objectId) {
-    byte[][] objectIds = {objectId};
-    fetch(objectIds);
-  }
-
-  /**
-   * Fetch the objects with the given IDs from other plasma manager instances.
-   *
-   * @param objectIds List of object IDs used to identify the objects.
-   */
-  void fetch(byte[][] objectIds);
-
   /**
    * Evict some objects to recover given count of bytes.
    *
diff --git a/java/plasma/src/main/java/org/apache/arrow/plasma/PlasmaClient.java b/java/plasma/src/main/java/org/apache/arrow/plasma/PlasmaClient.java
index db1f35e1641bb..a708f41853d75 100644
--- a/java/plasma/src/main/java/org/apache/arrow/plasma/PlasmaClient.java
+++ b/java/plasma/src/main/java/org/apache/arrow/plasma/PlasmaClient.java
@@ -19,9 +19,10 @@
 
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 
+import org.apache.arrow.plasma.exceptions.DuplicateObjectException;
+import org.apache.arrow.plasma.exceptions.PlasmaOutOfMemoryException;
 
 /**
  * The PlasmaClient is used to interface with a plasma store and manager.
@@ -45,18 +46,9 @@ public PlasmaClient(String storeSocketName, String managerSocketName, int releas
   // interface methods --------------------
 
   @Override
-  public void put(byte[] objectId, byte[] value, byte[] metadata) {
-    ByteBuffer buf = null;
-    try {
-      buf = PlasmaClientJNI.create(conn, objectId, value.length, metadata);
-    } catch (Exception e) {
-      System.err.println("ObjectId " + objectId + " error at PlasmaClient put");
-      e.printStackTrace();
-    }
-    if (buf == null) {
-      return;
-    }
-
+  public void put(byte[] objectId, byte[] value, byte[] metadata)
+          throws DuplicateObjectException, PlasmaOutOfMemoryException {
+    ByteBuffer buf = PlasmaClientJNI.create(conn, objectId, value.length, metadata);
     buf.put(value);
     PlasmaClientJNI.seal(conn, objectId);
     PlasmaClientJNI.release(conn, objectId);
@@ -81,34 +73,11 @@ public List<byte[]> get(byte[][] objectIds, int timeoutMs, boolean isMetadata) {
     return ret;
   }
 
-  @Override
-  public List<byte[]> wait(byte[][] objectIds, int timeoutMs, int numReturns) {
-    byte[][] readys = PlasmaClientJNI.wait(conn, objectIds, timeoutMs, numReturns);
-
-    List<byte[]> ret = new ArrayList<>();
-    for (byte[] ready : readys) {
-      for (byte[] id : objectIds) {
-        if (Arrays.equals(ready, id)) {
-          ret.add(id);
-          break;
-        }
-      }
-    }
-
-    assert (ret.size() == readys.length);
-    return ret;
-  }
-
   @Override
   public byte[] hash(byte[] objectId) {
     return PlasmaClientJNI.hash(conn, objectId);
   }
 
-  @Override
-  public void fetch(byte[][] objectIds) {
-    PlasmaClientJNI.fetch(conn, objectIds);
-  }
-
   @Override
   public List<ObjectStoreData> get(byte[][] objectIds, int timeoutMs) {
     ByteBuffer[][] bufs = PlasmaClientJNI.get(conn, objectIds, timeoutMs);
diff --git a/java/plasma/src/main/java/org/apache/arrow/plasma/PlasmaClientJNI.java b/java/plasma/src/main/java/org/apache/arrow/plasma/PlasmaClientJNI.java
index 4f7598eae2283..7f8cf8287e510 100644
--- a/java/plasma/src/main/java/org/apache/arrow/plasma/PlasmaClientJNI.java
+++ b/java/plasma/src/main/java/org/apache/arrow/plasma/PlasmaClientJNI.java
@@ -19,6 +19,9 @@
 
 import java.nio.ByteBuffer;
 
+import org.apache.arrow.plasma.exceptions.DuplicateObjectException;
+import org.apache.arrow.plasma.exceptions.PlasmaOutOfMemoryException;
+
 /**
  * JNI static methods for PlasmaClient.
  */
@@ -28,7 +31,8 @@ public class PlasmaClientJNI {
 
   public static native void disconnect(long conn);
 
-  public static native ByteBuffer create(long conn, byte[] objectId, int size, byte[] metadata);
+  public static native ByteBuffer create(long conn, byte[] objectId, int size, byte[] metadata)
+          throws DuplicateObjectException, PlasmaOutOfMemoryException;
 
   public static native byte[] hash(long conn, byte[] objectId);
 
diff --git a/java/plasma/src/main/java/org/apache/arrow/plasma/exceptions/DuplicateObjectException.java b/java/plasma/src/main/java/org/apache/arrow/plasma/exceptions/DuplicateObjectException.java
new file mode 100644
index 0000000000000..464d54d6d5b18
--- /dev/null
+++ b/java/plasma/src/main/java/org/apache/arrow/plasma/exceptions/DuplicateObjectException.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.plasma.exceptions;
+
+public class DuplicateObjectException extends RuntimeException {
+
+  public DuplicateObjectException(String objectId) {
+    super("An object with ID " + objectId + " already exists in the plasma store.");
+  }
+
+  public DuplicateObjectException(String objectId, Throwable t) {
+    super("An object with ID " + objectId + " already exists in the plasma store.", t);
+  }
+}
diff --git a/java/plasma/src/main/java/org/apache/arrow/plasma/exceptions/PlasmaOutOfMemoryException.java b/java/plasma/src/main/java/org/apache/arrow/plasma/exceptions/PlasmaOutOfMemoryException.java
new file mode 100644
index 0000000000000..831a4caf62807
--- /dev/null
+++ b/java/plasma/src/main/java/org/apache/arrow/plasma/exceptions/PlasmaOutOfMemoryException.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.plasma.exceptions;
+
+public class PlasmaOutOfMemoryException extends RuntimeException {
+
+  public PlasmaOutOfMemoryException() {
+    super("The plasma store ran out of memory.");
+  }
+
+  public PlasmaOutOfMemoryException(Throwable t) {
+    super("The plasma store ran out of memory.", t);
+  }
+}
diff --git a/java/plasma/src/test/java/org/apache/arrow/plasma/PlasmaClientTest.java b/java/plasma/src/test/java/org/apache/arrow/plasma/PlasmaClientTest.java
index 70e277a61e478..3f326d30d834a 100644
--- a/java/plasma/src/test/java/org/apache/arrow/plasma/PlasmaClientTest.java
+++ b/java/plasma/src/test/java/org/apache/arrow/plasma/PlasmaClientTest.java
@@ -23,6 +23,9 @@
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 
+import org.apache.arrow.plasma.exceptions.DuplicateObjectException;
+import org.junit.Assert;
+
 public class PlasmaClientTest {
 
   private String storeSuffix = "/tmp/store";
@@ -142,8 +145,12 @@ public void doTest() {
     assert Arrays.equals(values.get(0), value1);
     assert Arrays.equals(values.get(1), value2);
     System.out.println("Plasma java client get multi-object test success.");
-    pLink.put(id1, value1, null);
-    System.out.println("Plasma java client put same object twice exception test success.");
+    try {
+      pLink.put(id1, value1, null);
+      Assert.fail("Fail to throw DuplicateObjectException when put an object into plasma store twice.");
+    } catch (DuplicateObjectException e) {
+      System.out.println("Plasma java client put same object twice exception test success.");
+    }
     byte[] id1Hash = pLink.hash(id1);
     assert id1Hash != null;
     System.out.println("Plasma java client hash test success.");
diff --git a/java/plasma/src/test/resources/logback.xml b/java/plasma/src/test/resources/logback.xml
new file mode 100644
index 0000000000000..4c54d18a210ff
--- /dev/null
+++ b/java/plasma/src/test/resources/logback.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+  license agreements. See the NOTICE file distributed with this work for additional
+  information regarding copyright ownership. The ASF licenses this file to
+  You under the Apache License, Version 2.0 (the "License"); you may not use
+  this file except in compliance with the License. You may obtain a copy of
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+  by applicable law or agreed to in writing, software distributed under the
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+  OF ANY KIND, either express or implied. See the License for the specific
+  language governing permissions and limitations under the License. -->
+
+<configuration>
+  <statusListener class="ch.qos.logback.core.status.NopStatusListener"/>
+  <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+    <!-- encoders are assigned the type
+         ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
+    <encoder>
+      <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
+    </encoder>
+  </appender>
+
+  <logger name="org.apache.arrow" additivity="false">
+    <level value="info" />
+    <appender-ref ref="STDOUT" />
+  </logger>
+
+</configuration>
diff --git a/java/pom.xml b/java/pom.xml
index 0df1178c1d62e..9093bfa46d7db 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -20,7 +20,7 @@
 
   <groupId>org.apache.arrow</groupId>
   <artifactId>arrow-java-root</artifactId>
-  <version>0.12.0-SNAPSHOT</version>
+  <version>0.13.0-SNAPSHOT</version>
   <packaging>pom</packaging>
 
   <name>Apache Arrow Java Root POM</name>
@@ -261,7 +261,7 @@
 
         <configuration>
           <dateFormat>dd.MM.yyyy '@' HH:mm:ss z</dateFormat>
-          <verbose>true</verbose>
+          <verbose>false</verbose>
           <skipPoms>false</skipPoms>
           <generateGitPropertiesFile>true</generateGitPropertiesFile>
           <failOnNoGitDirectory>false</failOnNoGitDirectory>
@@ -334,6 +334,10 @@
             <configuration>
               <ignoreNonCompile>true</ignoreNonCompile>
               <failOnWarning>true</failOnWarning>
+              <ignoredDependencies>
+                <!-- source annotations (not kept in compiled code) -->
+                <ignoredDependency>javax.annotation:javax.annotation-api:*</ignoredDependency>
+              </ignoredDependencies>
             </configuration>
           </execution>
         </executions>
@@ -350,7 +354,7 @@
         <plugin>
           <groupId>org.apache.rat</groupId>
           <artifactId>apache-rat-plugin</artifactId>
-          <version>0.11</version>
+          <version>0.13</version>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
@@ -530,6 +534,11 @@
         <artifactId>slf4j-api</artifactId>
         <version>${dep.slf4j.version}</version>
       </dependency>
+      <dependency>
+        <groupId>javax.annotation</groupId>
+        <artifactId>javax.annotation-api</artifactId>
+        <version>1.3.2</version>
+      </dependency>
     </dependencies>
   </dependencyManagement>
 
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index d01c95120a56a..c22f3beb3fa01 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>0.12.0-SNAPSHOT</version>
+        <version>0.13.0-SNAPSHOT</version>
     </parent>
     <artifactId>arrow-tools</artifactId>
     <name>Arrow Tools</name>
diff --git a/java/tools/src/main/java/org/apache/arrow/tools/EchoServer.java b/java/tools/src/main/java/org/apache/arrow/tools/EchoServer.java
index 95ff71067a51d..6f68a9ef5e295 100644
--- a/java/tools/src/main/java/org/apache/arrow/tools/EchoServer.java
+++ b/java/tools/src/main/java/org/apache/arrow/tools/EchoServer.java
@@ -37,9 +37,9 @@ public class EchoServer {
   private boolean closed = false;
 
   public EchoServer(int port) throws IOException {
-    LOGGER.info("Starting echo server.");
+    LOGGER.debug("Starting echo server.");
     serverSocket = new ServerSocket(port);
-    LOGGER.info("Running echo server on port: " + port());
+    LOGGER.debug("Running echo server on port: " + port());
   }
 
   public static void main(String[] args) throws Exception {
@@ -59,9 +59,9 @@ public int port() {
   public void run() throws IOException {
     try {
       while (!closed) {
-        LOGGER.info("Waiting to accept new client connection.");
+        LOGGER.debug("Waiting to accept new client connection.");
         Socket clientSocket = serverSocket.accept();
-        LOGGER.info("Accepted new client connection.");
+        LOGGER.debug("Accepted new client connection.");
         try (ClientConnection client = new ClientConnection(clientSocket)) {
           try {
             client.run();
@@ -69,7 +69,7 @@ public void run() throws IOException {
             LOGGER.warn("Error handling client connection.", e);
           }
         }
-        LOGGER.info("Closed connection with client");
+        LOGGER.debug("Closed connection with client");
       }
     } catch (java.net.SocketException ex) {
       if (!closed) {
@@ -77,7 +77,7 @@ public void run() throws IOException {
       }
     } finally {
       serverSocket.close();
-      LOGGER.info("Server closed.");
+      LOGGER.debug("Server closed.");
     }
   }
 
@@ -116,7 +116,7 @@ public void run() throws IOException {
           }
           writer.end();
           Preconditions.checkState(reader.bytesRead() == writer.bytesWritten());
-          LOGGER.info(String.format("Echoed %d records", echoed));
+          LOGGER.debug(String.format("Echoed %d records", echoed));
         }
       }
     }
diff --git a/java/tools/src/test/resources/logback.xml b/java/tools/src/test/resources/logback.xml
new file mode 100644
index 0000000000000..ff848da2a8be1
--- /dev/null
+++ b/java/tools/src/test/resources/logback.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+  license agreements. See the NOTICE file distributed with this work for additional
+  information regarding copyright ownership. The ASF licenses this file to
+  You under the Apache License, Version 2.0 (the "License"); you may not use
+  this file except in compliance with the License. You may obtain a copy of
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+  by applicable law or agreed to in writing, software distributed under the
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+  OF ANY KIND, either express or implied. See the License for the specific
+  language governing permissions and limitations under the License. -->
+
+<configuration>
+  <statusListener class="ch.qos.logback.core.status.NopStatusListener"/>
+  <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+    <!-- encoders are assigned the type
+         ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
+    <encoder>
+      <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
+    </encoder>
+  </appender>
+  <logger name="org.apache.arrow" additivity="false">
+    <level value="info" />
+    <appender-ref ref="STDOUT" />
+  </logger>
+
+</configuration>
diff --git a/java/vector/pom.xml b/java/vector/pom.xml
index fc4eb8fd16746..3f3275242ec8a 100644
--- a/java/vector/pom.xml
+++ b/java/vector/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>0.12.0-SNAPSHOT</version>
+    <version>0.13.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-vector</artifactId>
   <name>Arrow Vectors</name>
diff --git a/java/vector/src/main/java/org/apache/arrow/util/AutoCloseables.java b/java/vector/src/main/java/org/apache/arrow/util/AutoCloseables.java
index d89478ecc709c..2f6ee9fb52ea6 100644
--- a/java/vector/src/main/java/org/apache/arrow/util/AutoCloseables.java
+++ b/java/vector/src/main/java/org/apache/arrow/util/AutoCloseables.java
@@ -172,10 +172,10 @@ public static RollbackCloseable rollbackable(AutoCloseable... closeables) {
   }
 
   /**
-   * close() an {@see java.lang.AutoCloseable} without throwing a (checked)
-   * {@see java.lang.Exception}. This wraps the close() call with a
+   * close() an {@link java.lang.AutoCloseable} without throwing a (checked)
+   * {@link java.lang.Exception}. This wraps the close() call with a
    * try-catch that will rethrow an Exception wrapped with a
-   * {@see java.lang.RuntimeException}, providing a way to call close()
+   * {@link java.lang.RuntimeException}, providing a way to call close()
    * without having to do the try-catch everywhere or propagate the Exception.
    *
    * @param autoCloseable the AutoCloseable to close; may be null
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
index bc0b77a0aeb0a..f3c2837cfa7e8 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
@@ -22,7 +22,6 @@
 import java.util.Collections;
 import java.util.List;
 
-import org.apache.arrow.memory.BaseAllocator;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.OutOfMemoryException;
 import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
@@ -43,8 +42,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector
         implements FixedWidthVector, FieldVector, VectorDefinitionSetter {
   private final int typeWidth;
 
-  protected int valueAllocationSizeInBytes;
-  protected int validityAllocationSizeInBytes;
+  protected int initialValueAllocation;
 
   protected final Field field;
   private int allocationMonitor;
@@ -61,14 +59,7 @@ public BaseFixedWidthVector(final String name, final BufferAllocator allocator,
     allocationMonitor = 0;
     validityBuffer = allocator.getEmpty();
     valueBuffer = allocator.getEmpty();
-    if (typeWidth > 0) {
-      valueAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * typeWidth;
-      validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION);
-    } else {
-      /* specialized handling for BitVector */
-      valueAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION);
-      validityAllocationSizeInBytes = valueAllocationSizeInBytes;
-    }
+    initialValueAllocation = INITIAL_VALUE_ALLOCATION;
   }
 
 
@@ -159,12 +150,8 @@ public ArrowBuf getOffsetBuffer() {
    */
   @Override
   public void setInitialCapacity(int valueCount) {
-    final long size = (long) valueCount * typeWidth;
-    if (size > MAX_ALLOCATION_SIZE) {
-      throw new OversizedAllocationException("Requested amount of memory is more than max allowed");
-    }
-    valueAllocationSizeInBytes = (int) size;
-    validityAllocationSizeInBytes = getValidityBufferSizeFromCount(valueCount);
+    computeAndCheckBufferSize(valueCount);
+    initialValueAllocation = valueCount;
   }
 
   /**
@@ -267,18 +254,13 @@ public void allocateNew() {
    */
   @Override
   public boolean allocateNewSafe() {
-    long curAllocationSizeValue = valueAllocationSizeInBytes;
-    long curAllocationSizeValidity = validityAllocationSizeInBytes;
-
-    if (curAllocationSizeValue > MAX_ALLOCATION_SIZE) {
-      throw new OversizedAllocationException("Requested amount of memory exceeds limit");
-    }
+    computeAndCheckBufferSize(initialValueAllocation);
 
     /* we are doing a new allocation -- release the current buffers */
     clear();
 
     try {
-      allocateBytes(curAllocationSizeValue, curAllocationSizeValidity);
+      allocateBytes(initialValueAllocation);
     } catch (Exception e) {
       clear();
       return false;
@@ -295,28 +277,32 @@ public boolean allocateNewSafe() {
    * @throws org.apache.arrow.memory.OutOfMemoryException on error
    */
   public void allocateNew(int valueCount) {
-    long valueBufferSize = valueCount * typeWidth;
-    long validityBufferSize = getValidityBufferSizeFromCount(valueCount);
-    if (typeWidth == 0) {
-      /* specialized handling for BitVector */
-      valueBufferSize = validityBufferSize;
-    }
-
-    if (valueBufferSize > MAX_ALLOCATION_SIZE) {
-      throw new OversizedAllocationException("Requested amount of memory is more than max allowed");
-    }
+    computeAndCheckBufferSize(valueCount);
 
     /* we are doing a new allocation -- release the current buffers */
     clear();
 
     try {
-      allocateBytes(valueBufferSize, validityBufferSize);
+      allocateBytes(valueCount);
     } catch (Exception e) {
       clear();
       throw e;
     }
   }
 
+  /*
+   * Compute the buffer size required for 'valueCount', and check if it's within bounds.
+   */
+  private long computeAndCheckBufferSize(int valueCount) {
+    final long size = computeCombinedBufferSize(valueCount, typeWidth);
+    if (size > MAX_ALLOCATION_SIZE) {
+      throw new OversizedAllocationException("Memory required for vector capacity " +
+          valueCount +
+          " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")");
+    }
+    return size;
+  }
+
   /**
    * Actual memory allocation is done by this function. All the calculations
    * and knowledge about what size to allocate is upto the callers of this
@@ -326,14 +312,10 @@ public void allocateNew(int valueCount) {
    * within the bounds of max allocation allowed and any other error
    * conditions.
    */
-  private void allocateBytes(final long valueBufferSize, final long validityBufferSize) {
-    /* allocate data buffer */
-    int curSize = (int) valueBufferSize;
-    valueBuffer = allocator.buffer(curSize);
-    valueBuffer.readerIndex(0);
-    valueAllocationSizeInBytes = curSize;
-    /* allocate validity buffer */
-    allocateValidityBuffer((int) validityBufferSize);
+  private void allocateBytes(int valueCount) {
+    DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(valueCount, typeWidth);
+    valueBuffer = buffers.getDataBuf();
+    validityBuffer = buffers.getValidityBuf();
     zeroVector();
   }
 
@@ -346,7 +328,6 @@ private void allocateBytes(final long valueBufferSize, final long validityBuffer
   private void allocateValidityBuffer(final int validityBufferSize) {
     validityBuffer = allocator.buffer(validityBufferSize);
     validityBuffer.readerIndex(0);
-    validityAllocationSizeInBytes = validityBufferSize;
   }
 
   /**
@@ -422,43 +403,28 @@ public ArrowBuf[] getBuffers(boolean clear) {
    */
   @Override
   public void reAlloc() {
-    valueBuffer = reallocBufferHelper(valueBuffer, true);
-    validityBuffer = reallocBufferHelper(validityBuffer, false);
-  }
-
-  /**
-   * Helper method for reallocating a particular internal buffer
-   * Returns the new buffer.
-   */
-  private ArrowBuf reallocBufferHelper(ArrowBuf buffer, final boolean dataBuffer) {
-    final int currentBufferCapacity = buffer.capacity();
-    long baseSize = (dataBuffer ? valueAllocationSizeInBytes
-            : validityAllocationSizeInBytes);
-
-    if (baseSize < (long) currentBufferCapacity) {
-      baseSize = (long) currentBufferCapacity;
-    }
-
-    long newAllocationSize = baseSize * 2L;
-    newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize);
-    assert newAllocationSize >= 1;
-
-    if (newAllocationSize > MAX_ALLOCATION_SIZE) {
-      throw new OversizedAllocationException("Unable to expand the buffer");
+    int targetValueCount = getValueCapacity() * 2;
+    if (targetValueCount == 0) {
+      if (initialValueAllocation > 0) {
+        targetValueCount = initialValueAllocation * 2;
+      } else {
+        targetValueCount = INITIAL_VALUE_ALLOCATION * 2;
+      }
     }
+    computeAndCheckBufferSize(targetValueCount);
 
-    final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
-    newBuf.setBytes(0, buffer, 0, currentBufferCapacity);
-    newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
-    buffer.release(1);
-    buffer = newBuf;
-    if (dataBuffer) {
-      valueAllocationSizeInBytes = (int) newAllocationSize;
-    } else {
-      validityAllocationSizeInBytes = (int) newAllocationSize;
-    }
+    DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(targetValueCount, typeWidth);
+    final ArrowBuf newValueBuffer = buffers.getDataBuf();
+    newValueBuffer.setBytes(0, valueBuffer, 0, valueBuffer.capacity());
+    newValueBuffer.setZero(valueBuffer.capacity(), newValueBuffer.capacity() - valueBuffer.capacity());
+    valueBuffer.release();
+    valueBuffer = newValueBuffer;
 
-    return buffer;
+    final ArrowBuf newValidityBuffer = buffers.getValidityBuf();
+    newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity());
+    newValidityBuffer.setZero(validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity());
+    validityBuffer.release();
+    validityBuffer = newValidityBuffer;
   }
 
   @Override
@@ -511,9 +477,6 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers
     valueBuffer = dataBuffer.retain(allocator);
 
     valueCount = fieldNode.getLength();
-
-    valueAllocationSizeInBytes = valueBuffer.capacity();
-    validityAllocationSizeInBytes = validityBuffer.capacity();
   }
 
   /**
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
index 4cbf4be19dfeb..4e014bbd2aefe 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
@@ -20,6 +20,7 @@
 import java.util.Collections;
 import java.util.Iterator;
 
+import org.apache.arrow.memory.BaseAllocator;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.vector.util.TransferPair;
@@ -33,7 +34,14 @@ public abstract class BaseValueVector implements ValueVector {
 
   public static final String MAX_ALLOCATION_SIZE_PROPERTY = "arrow.vector.max_allocation_bytes";
   public static final int MAX_ALLOCATION_SIZE = Integer.getInteger(MAX_ALLOCATION_SIZE_PROPERTY, Integer.MAX_VALUE);
-  public static final int INITIAL_VALUE_ALLOCATION = 4096;
+  /*
+   * For all fixed width vectors, the value and validity buffers are sliced from a single buffer.
+   * Similarly, for variable width vectors, the offsets and validity buffers are sliced from a
+   * single buffer. To ensure the single buffer is power-of-2 size, the initial value allocation
+   * should be less than power-of-2. For IntVectors, this comes to 3970*4 (15880) for the data
+   * buffer and 504 bytes for the validity buffer, totalling to 16384 (2^16).
+   */
+  public static final int INITIAL_VALUE_ALLOCATION = 3970;
 
   protected final BufferAllocator allocator;
   protected final String name;
@@ -98,5 +106,94 @@ protected ArrowBuf releaseBuffer(ArrowBuf buffer) {
   protected static int getValidityBufferSizeFromCount(final int valueCount) {
     return (int) Math.ceil(valueCount / 8.0);
   }
+
+  /* round up to the next multiple of 8 */
+  private static long roundUp8(long size) {
+    return ((size + 7) / 8) * 8;
+  }
+
+  protected long computeCombinedBufferSize(int valueCount, int typeWidth) {
+    Preconditions.checkArgument(valueCount >= 0, "valueCount must be >= 0");
+    Preconditions.checkArgument(typeWidth >= 0, "typeWidth must be >= 0");
+
+    // compute size of validity buffer.
+    long bufferSize = roundUp8(getValidityBufferSizeFromCount(valueCount));
+
+    // add the size of the value buffer.
+    if (typeWidth == 0) {
+      // for boolean type, value-buffer and validity-buffer are of same size.
+      bufferSize *= 2;
+    } else {
+      bufferSize += roundUp8(valueCount * typeWidth);
+    }
+    return BaseAllocator.nextPowerOfTwo(bufferSize);
+  }
+
+  class DataAndValidityBuffers {
+    private ArrowBuf dataBuf;
+    private ArrowBuf validityBuf;
+
+    DataAndValidityBuffers(ArrowBuf dataBuf, ArrowBuf validityBuf) {
+      this.dataBuf = dataBuf;
+      this.validityBuf = validityBuf;
+    }
+
+    public ArrowBuf getDataBuf() {
+      return dataBuf;
+    }
+
+    public ArrowBuf getValidityBuf() {
+      return validityBuf;
+    }
+
+  }
+
+  protected DataAndValidityBuffers allocFixedDataAndValidityBufs(int valueCount, int typeWidth) {
+    long bufferSize = computeCombinedBufferSize(valueCount, typeWidth);
+    assert bufferSize < MAX_ALLOCATION_SIZE;
+
+    int validityBufferSize;
+    int dataBufferSize;
+    if (typeWidth == 0) {
+      validityBufferSize = dataBufferSize = (int) (bufferSize / 2);
+    } else {
+      // Due to roundup to power-of-2 allocation, the bufferSize could be greater than the
+      // requested size. Utilize the allocated buffer fully.;
+      int actualCount = (int) ((bufferSize * 8.0) / (8 * typeWidth + 1));
+      do {
+        validityBufferSize = (int) roundUp8(getValidityBufferSizeFromCount(actualCount));
+        dataBufferSize = (int) roundUp8(actualCount * typeWidth);
+        if (validityBufferSize + dataBufferSize <= bufferSize) {
+          break;
+        }
+        --actualCount;
+      } while (true);
+    }
+
+
+    /* allocate combined buffer */
+    ArrowBuf combinedBuffer = allocator.buffer((int) bufferSize);
+
+    /* slice into requested lengths */
+    ArrowBuf dataBuf = null;
+    ArrowBuf validityBuf = null;
+    int bufferOffset = 0;
+    for (int numBuffers = 0; numBuffers < 2; ++numBuffers) {
+      int len = (numBuffers == 0 ? dataBufferSize : validityBufferSize);
+      ArrowBuf buf = combinedBuffer.slice(bufferOffset, len);
+      buf.retain();
+      buf.readerIndex(0);
+      buf.writerIndex(0);
+
+      bufferOffset += len;
+      if (numBuffers == 0) {
+        dataBuf = buf;
+      } else {
+        validityBuf = buf;
+      }
+    }
+    combinedBuffer.release();
+    return new DataAndValidityBuffers(dataBuf, validityBuf);
+  }
 }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
index 390dfe955b6ce..ac148a25c7c29 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
@@ -38,10 +38,8 @@ public abstract class BaseVariableWidthVector extends BaseValueVector
         implements VariableWidthVector, FieldVector, VectorDefinitionSetter {
   private static final int DEFAULT_RECORD_BYTE_COUNT = 8;
   private static final int INITIAL_BYTE_COUNT = INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT;
-
-  private int valueAllocationSizeInBytes;
-  private int validityAllocationSizeInBytes;
-  private int offsetAllocationSizeInBytes;
+  private int initialValueAllocation;
+  private int initialValueAllocationSizeInBytes;
 
   /* protected members */
   public static final int OFFSET_WIDTH = 4; /* 4 byte unsigned int to track offsets */
@@ -57,9 +55,9 @@ public abstract class BaseVariableWidthVector extends BaseValueVector
   public BaseVariableWidthVector(final String name, final BufferAllocator allocator,
                                          FieldType fieldType) {
     super(name, allocator);
-    valueAllocationSizeInBytes = INITIAL_BYTE_COUNT;
-    validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION);
-    offsetAllocationSizeInBytes = (INITIAL_VALUE_ALLOCATION) * OFFSET_WIDTH;
+    initialValueAllocationSizeInBytes = INITIAL_BYTE_COUNT;
+    // -1 because we require one extra slot for the offset array.
+    initialValueAllocation = INITIAL_VALUE_ALLOCATION - 1;
     field = new Field(name, fieldType, null);
     valueCount = 0;
     lastSet = -1;
@@ -155,15 +153,10 @@ public long getDataBufferAddress() {
   @Override
   public void setInitialCapacity(int valueCount) {
     final long size = (long) valueCount * DEFAULT_RECORD_BYTE_COUNT;
-    if (size > MAX_ALLOCATION_SIZE) {
-      throw new OversizedAllocationException("Requested amount of memory is more than max allowed");
-    }
-    valueAllocationSizeInBytes = (int) size;
-    validityAllocationSizeInBytes = getValidityBufferSizeFromCount(valueCount);
-    /* to track the end offset of last data element in vector, we need
-     * an additional slot in offset buffer.
-     */
-    offsetAllocationSizeInBytes = (valueCount + 1) * OFFSET_WIDTH;
+    checkDataBufferSize(size);
+    computeAndCheckOffsetsBufferSize(valueCount);
+    initialValueAllocationSizeInBytes = (int) size;
+    initialValueAllocation = valueCount;
   }
 
   /**
@@ -175,17 +168,10 @@ public void setInitialCapacity(int valueCount) {
   @Override
   public void setInitialCapacity(int valueCount, double density) {
     long size = Math.max((long)(valueCount * density), 1L);
-
-    if (size > MAX_ALLOCATION_SIZE) {
-      throw new OversizedAllocationException("Requested amount of memory is more than max allowed");
-    }
-
-    valueAllocationSizeInBytes = (int) size;
-    validityAllocationSizeInBytes = getValidityBufferSizeFromCount(valueCount);
-    /* to track the end offset of last data element in vector, we need
-     * an additional slot in offset buffer.
-     */
-    offsetAllocationSizeInBytes = (valueCount + 1) * OFFSET_WIDTH;
+    checkDataBufferSize(size);
+    computeAndCheckOffsetsBufferSize(valueCount);
+    initialValueAllocationSizeInBytes = (int) size;
+    initialValueAllocation = valueCount;
   }
 
   /**
@@ -376,20 +362,14 @@ public void allocateNew() {
    */
   @Override
   public boolean allocateNewSafe() {
-    long curAllocationSizeValue = valueAllocationSizeInBytes;
-    long curAllocationSizeValidity = validityAllocationSizeInBytes;
-    long curAllocationSizeOffset = offsetAllocationSizeInBytes;
-
-    if (curAllocationSizeValue > MAX_ALLOCATION_SIZE ||
-            curAllocationSizeOffset > MAX_ALLOCATION_SIZE) {
-      throw new OversizedAllocationException("Requested amount of memory exceeds limit");
-    }
+    checkDataBufferSize(initialValueAllocationSizeInBytes);
+    computeAndCheckOffsetsBufferSize(initialValueAllocation);
 
     /* we are doing a new allocation -- release the current buffers */
     clear();
 
     try {
-      allocateBytes(curAllocationSizeValue, curAllocationSizeValidity, curAllocationSizeOffset);
+      allocateBytes(initialValueAllocationSizeInBytes, initialValueAllocation);
     } catch (Exception e) {
       clear();
       return false;
@@ -409,35 +389,59 @@ public boolean allocateNewSafe() {
   @Override
   public void allocateNew(int totalBytes, int valueCount) {
     assert totalBytes >= 0;
-    final int offsetBufferSize = (valueCount + 1) * OFFSET_WIDTH;
-    final int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
 
-    if (totalBytes > MAX_ALLOCATION_SIZE ||
-            offsetBufferSize > MAX_ALLOCATION_SIZE) {
-      throw new OversizedAllocationException("Requested amount of memory exceeds limit");
-    }
+    checkDataBufferSize(totalBytes);
+    computeAndCheckOffsetsBufferSize(valueCount);
 
     /* we are doing a new allocation -- release the current buffers */
     clear();
 
     try {
-      allocateBytes(totalBytes, validityBufferSize, offsetBufferSize);
+      allocateBytes(totalBytes, valueCount);
     } catch (Exception e) {
       clear();
       throw e;
     }
   }
 
+  /* Check if the data buffer size is within bounds. */
+  private void checkDataBufferSize(long size) {
+    if (size > MAX_ALLOCATION_SIZE) {
+      throw new OversizedAllocationException("Memory required for vector " +
+          " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")");
+    }
+  }
+
+  /*
+   * Compute the buffer size required for 'valueCount' offsets and validity, and check if it's
+   * within bounds.
+   */
+  private long computeAndCheckOffsetsBufferSize(int valueCount) {
+    /* to track the end offset of last data element in vector, we need
+     * an additional slot in offset buffer.
+     */
+    final long size = computeCombinedBufferSize(valueCount + 1, OFFSET_WIDTH);
+    if (size > MAX_ALLOCATION_SIZE) {
+      throw new OversizedAllocationException("Memory required for vector capacity " +
+          valueCount +
+          " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")");
+    }
+    return size;
+  }
+
   /* allocate the inner buffers */
-  private void allocateBytes(final long valueBufferSize, final long validityBufferSize,
-                             final long offsetBufferSize) {
+  private void allocateBytes(final int valueBufferSize, final int valueCount) {
     /* allocate data buffer */
-    int curSize = (int) valueBufferSize;
+    int curSize = valueBufferSize;
     valueBuffer = allocator.buffer(curSize);
     valueBuffer.readerIndex(0);
-    valueAllocationSizeInBytes = curSize;
-    allocateValidityBuffer(validityBufferSize);
-    allocateOffsetBuffer(offsetBufferSize);
+
+    /* allocate offset buffer and validity buffer */
+    DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(valueCount + 1, OFFSET_WIDTH);
+    offsetBuffer = buffers.getDataBuf();
+    validityBuffer = buffers.getValidityBuf();
+    initOffsetBuffer();
+    initValidityBuffer();
   }
 
   /* allocate offset buffer */
@@ -445,7 +449,6 @@ private void allocateOffsetBuffer(final long size) {
     final int curSize = (int) size;
     offsetBuffer = allocator.buffer(curSize);
     offsetBuffer.readerIndex(0);
-    offsetAllocationSizeInBytes = curSize;
     initOffsetBuffer();
   }
 
@@ -454,7 +457,6 @@ private void allocateValidityBuffer(final long size) {
     final int curSize = (int) size;
     validityBuffer = allocator.buffer(curSize);
     validityBuffer.readerIndex(0);
-    validityAllocationSizeInBytes = curSize;
     initValidityBuffer();
   }
 
@@ -476,7 +478,7 @@ public void reAlloc() {
    * @throws OutOfMemoryException if the internal memory allocation fails
    */
   public void reallocDataBuffer() {
-    long baseSize = valueAllocationSizeInBytes;
+    long baseSize = initialValueAllocationSizeInBytes;
     final int currentBufferCapacity = valueBuffer.capacity();
 
     if (baseSize < (long) currentBufferCapacity) {
@@ -487,15 +489,12 @@ public void reallocDataBuffer() {
     newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize);
     assert newAllocationSize >= 1;
 
-    if (newAllocationSize > MAX_ALLOCATION_SIZE) {
-      throw new OversizedAllocationException("Unable to expand the buffer");
-    }
+    checkDataBufferSize(newAllocationSize);
 
     final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
     newBuf.setBytes(0, valueBuffer, 0, currentBufferCapacity);
     valueBuffer.release();
     valueBuffer = newBuf;
-    valueAllocationSizeInBytes = (int) newAllocationSize;
   }
 
   /**
@@ -522,40 +521,28 @@ public void reallocDataBuffer() {
    * @throws OutOfMemoryException if the internal memory allocation fails
    */
   public void reallocValidityAndOffsetBuffers() {
-    offsetBuffer = reallocBufferHelper(offsetBuffer, true);
-    validityBuffer = reallocBufferHelper(validityBuffer, false);
-  }
-
-  /* helper method to realloc a particular buffer. returns the allocated buffer */
-  private ArrowBuf reallocBufferHelper(ArrowBuf buffer, final boolean offsetBuffer) {
-    final int currentBufferCapacity = buffer.capacity();
-    long baseSize = (offsetBuffer ? offsetAllocationSizeInBytes
-            : validityAllocationSizeInBytes);
-
-    if (baseSize < (long) currentBufferCapacity) {
-      baseSize = (long) currentBufferCapacity;
-    }
-
-    long newAllocationSize = baseSize * 2L;
-    newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize);
-    assert newAllocationSize >= 1;
-
-    if (newAllocationSize > MAX_ALLOCATION_SIZE) {
-      throw new OversizedAllocationException("Unable to expand the buffer");
+    int targetOffsetCount = (offsetBuffer.capacity() / OFFSET_WIDTH)  * 2;
+    if (targetOffsetCount == 0) {
+      if (initialValueAllocation > 0) {
+        targetOffsetCount = 2 * (initialValueAllocation + 1);
+      } else {
+        targetOffsetCount = 2 * (INITIAL_VALUE_ALLOCATION + 1);
+      }
     }
+    computeAndCheckOffsetsBufferSize(targetOffsetCount);
 
-    final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
-    newBuf.setBytes(0, buffer, 0, currentBufferCapacity);
-    newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
-    buffer.release(1);
-    buffer = newBuf;
-    if (offsetBuffer) {
-      offsetAllocationSizeInBytes = (int) newAllocationSize;
-    } else {
-      validityAllocationSizeInBytes = (int) newAllocationSize;
-    }
+    DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(targetOffsetCount, OFFSET_WIDTH);
+    final ArrowBuf newOffsetBuffer = buffers.getDataBuf();
+    newOffsetBuffer.setBytes(0, offsetBuffer, 0, offsetBuffer.capacity());
+    newOffsetBuffer.setZero(offsetBuffer.capacity(), newOffsetBuffer.capacity() - offsetBuffer.capacity());
+    offsetBuffer.release();
+    offsetBuffer = newOffsetBuffer;
 
-    return buffer;
+    final ArrowBuf newValidityBuffer = buffers.getValidityBuf();
+    newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity());
+    newValidityBuffer.setZero(validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity());
+    validityBuffer.release();
+    validityBuffer = newValidityBuffer;
   }
 
   /**
@@ -919,7 +906,7 @@ public long getStartEnd(int index) {
   @Override
   public void setIndexDefined(int index) {
     while (index >= getValidityBufferValueCapacity()) {
-      validityBuffer = reallocBufferHelper(validityBuffer, false);
+      reallocValidityAndOffsetBuffers();
     }
     BitVectorHelper.setValidityBitToOne(validityBuffer, index);
   }
@@ -1072,7 +1059,7 @@ public void setSafe(int index, ByteBuffer value, int start, int length) {
    */
   public void setNull(int index) {
     while (index >= getValidityBufferValueCapacity()) {
-      validityBuffer = reallocBufferHelper(validityBuffer, false);
+      reallocValidityAndOffsetBuffers();
     }
     BitVectorHelper.setValidityBit(validityBuffer, index, 0);
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
index 7aac28cbf1fc4..c6c964233419d 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
@@ -91,11 +91,10 @@ public MinorType getMinorType() {
   @Override
   public void setInitialCapacity(int valueCount) {
     final int size = getValidityBufferSizeFromCount(valueCount);
-    if (size > MAX_ALLOCATION_SIZE) {
+    if (size * 2 > MAX_ALLOCATION_SIZE) {
       throw new OversizedAllocationException("Requested amount of memory is more than max allowed");
     }
-    valueAllocationSizeInBytes = size;
-    validityAllocationSizeInBytes = size;
+    initialValueAllocation = valueCount;
   }
 
   /**
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java
index 48bc8936d9fbe..a407166c4f6d0 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java
@@ -40,19 +40,18 @@ public void testTransferFixedWidth() {
     IntVector v1 = new IntVector("v1", childAllocator1);
     v1.allocateNew();
     v1.setValueCount(4095);
+    long totalAllocatedMemory = childAllocator1.getAllocatedMemory();
 
     IntVector v2 = new IntVector("v2", childAllocator2);
 
     v1.makeTransferPair(v2).transfer();
 
     assertEquals(0, childAllocator1.getAllocatedMemory());
-    int expectedBitVector = 512;
-    int expectedValueVector = 4096 * 4;
-    assertEquals(expectedBitVector + expectedValueVector, childAllocator2.getAllocatedMemory());
+    assertEquals(totalAllocatedMemory, childAllocator2.getAllocatedMemory());
   }
 
   @Test
-  public void testTransferVariableidth() {
+  public void testTransferVariableWidth() {
     BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
     BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 100000, 100000);
     BufferAllocator childAllocator2 = allocator.newChildAllocator("child2", 100000, 100000);
@@ -63,15 +62,12 @@ public void testTransferVariableidth() {
     v1.setValueCount(4001);
 
     VarCharVector v2 = new VarCharVector("v2", childAllocator2);
+    long memoryBeforeTransfer = childAllocator1.getAllocatedMemory();
 
     v1.makeTransferPair(v2).transfer();
 
     assertEquals(0, childAllocator1.getAllocatedMemory());
-    int expectedValueVector = 4096 * 8;
-    int expectedOffsetVector = 4096 * 4;
-    int expectedBitVector = 512;
-    int expected = expectedBitVector + expectedOffsetVector + expectedValueVector;
-    assertEquals(expected, childAllocator2.getAllocatedMemory());
+    assertEquals(memoryBeforeTransfer, childAllocator2.getAllocatedMemory());
   }
 
   private static class Pointer<T> {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java b/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java
index f7d3ddb397315..b10db95b6cf48 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java
@@ -69,14 +69,16 @@ public void terminate() throws Exception {
 
   @Test /* NullableVarChar */
   public void testCopyFromWithNulls() {
-    try (final VarCharVector vector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator);
-         final VarCharVector vector2 =
-             newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
+    try (final VarCharVector vector =
+            newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator);
+        final VarCharVector vector2 =
+            newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
       vector.allocateNew();
-      int capacity = vector.getValueCapacity();
-      assertEquals(4095, capacity);
+      assertTrue(vector.getValueCapacity() >= 1);
+      assertEquals(0, vector.getValueCount());
+      int initialCapacity = vector.getValueCapacity();
 
-      for (int i = 0; i < 4095; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if (i % 3 == 0) {
           continue;
         }
@@ -85,43 +87,53 @@ public void testCopyFromWithNulls() {
       }
 
       /* NO reAlloc() should have happened in setSafe() */
-      capacity = vector.getValueCapacity();
-      assertEquals(4095, capacity);
+      int capacity = vector.getValueCapacity();
+      assertEquals(initialCapacity, capacity);
 
-      vector.setValueCount(4095);
+      vector.setValueCount(initialCapacity);
 
-      for (int i = 0; i < 4095; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if (i % 3 == 0) {
           assertNull(vector.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i, Integer.toString(i), vector.getObject(i).toString());
+          assertEquals(
+              "unexpected value at index: " + i,
+              Integer.toString(i),
+              vector.getObject(i).toString());
         }
       }
 
+      vector2.setInitialCapacity(initialCapacity);
       vector2.allocateNew();
       capacity = vector2.getValueCapacity();
-      assertEquals(4095, capacity);
+      assertEquals(initialCapacity, capacity);
 
-      for (int i = 0; i < 4095; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         vector2.copyFromSafe(i, i, vector);
         if (i % 3 == 0) {
           assertNull(vector2.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString());
+          assertEquals(
+              "unexpected value at index: " + i,
+              Integer.toString(i),
+              vector2.getObject(i).toString());
         }
       }
 
       /* NO reAlloc() should have happened in copyFrom */
       capacity = vector2.getValueCapacity();
-      assertEquals(4095, capacity);
+      assertEquals(initialCapacity, capacity);
 
-      vector2.setValueCount(4095);
+      vector2.setValueCount(initialCapacity);
 
-      for (int i = 0; i < 4095; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if (i % 3 == 0) {
           assertNull(vector2.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString());
+          assertEquals(
+              "unexpected value at index: " + i,
+              Integer.toString(i),
+              vector2.getObject(i).toString());
         }
       }
     }
@@ -129,14 +141,16 @@ public void testCopyFromWithNulls() {
 
   @Test /* NullableVarChar */
   public void testCopyFromWithNulls1() {
-    try (final VarCharVector vector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator);
-         final VarCharVector vector2 =
-             newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
+    try (final VarCharVector vector =
+            newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator);
+        final VarCharVector vector2 =
+            newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
       vector.allocateNew();
-      int capacity = vector.getValueCapacity();
-      assertEquals(4095, capacity);
+      assertTrue(vector.getValueCapacity() >= 1);
+      assertEquals(0, vector.getValueCount());
+      int initialCapacity = vector.getValueCapacity();
 
-      for (int i = 0; i < 4095; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if (i % 3 == 0) {
           continue;
         }
@@ -145,47 +159,57 @@ public void testCopyFromWithNulls1() {
       }
 
       /* NO reAlloc() should have happened in setSafe() */
-      capacity = vector.getValueCapacity();
-      assertEquals(4095, capacity);
+      int capacity = vector.getValueCapacity();
+      assertEquals(initialCapacity, capacity);
 
-      vector.setValueCount(4095);
+      vector.setValueCount(initialCapacity);
 
-      for (int i = 0; i < 4095; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if (i % 3 == 0) {
           assertNull(vector.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i, Integer.toString(i), vector.getObject(i).toString());
+          assertEquals(
+              "unexpected value at index: " + i,
+              Integer.toString(i),
+              vector.getObject(i).toString());
         }
       }
 
       /* set lesser initial capacity than actually needed
        * to trigger reallocs in copyFromSafe()
        */
-      vector2.allocateNew(1024 * 10, 1024);
+      vector2.allocateNew((initialCapacity / 4) * 10, initialCapacity / 4);
 
       capacity = vector2.getValueCapacity();
-      assertEquals(1024, capacity);
+      assertTrue(capacity >= initialCapacity / 4);
+      assertTrue(capacity < initialCapacity / 2);
 
-      for (int i = 0; i < 4095; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         vector2.copyFromSafe(i, i, vector);
         if (i % 3 == 0) {
           assertNull(vector2.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString());
+          assertEquals(
+              "unexpected value at index: " + i,
+              Integer.toString(i),
+              vector2.getObject(i).toString());
         }
       }
 
       /* 2 reAllocs should have happened in copyFromSafe() */
       capacity = vector2.getValueCapacity();
-      assertEquals(4096, capacity);
+      assertTrue(capacity >= initialCapacity);
 
-      vector2.setValueCount(4095);
+      vector2.setValueCount(initialCapacity);
 
-      for (int i = 0; i < 4095; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if (i % 3 == 0) {
           assertNull(vector2.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString());
+          assertEquals(
+              "unexpected value at index: " + i,
+              Integer.toString(i),
+              vector2.getObject(i).toString());
         }
       }
     }
@@ -194,28 +218,29 @@ public void testCopyFromWithNulls1() {
   @Test /* IntVector */
   public void testCopyFromWithNulls2() {
     try (final IntVector vector1 = new IntVector(EMPTY_SCHEMA_PATH, allocator);
-         final IntVector vector2 = new IntVector(EMPTY_SCHEMA_PATH, allocator)) {
+        final IntVector vector2 = new IntVector(EMPTY_SCHEMA_PATH, allocator)) {
 
       vector1.allocateNew();
-      assertEquals(4096, vector1.getValueCapacity());
+      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
       assertEquals(0, vector1.getValueCount());
+      int initialCapacity = vector1.getValueCapacity();
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           continue;
         }
         vector1.setSafe(i, 1000 + i);
       }
 
-      vector1.setValueCount(4096);
+      vector1.setValueCount(initialCapacity);
 
       /* No realloc should have happened in setSafe or
        * setValueCount
        */
-      assertEquals(4096, vector1.getValueCapacity());
-      assertEquals(4096, vector1.getValueCount());
+      assertEquals(initialCapacity, vector1.getValueCapacity());
+      assertEquals(initialCapacity, vector1.getValueCount());
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           assertNull(vector1.getObject(i));
         } else {
@@ -226,23 +251,24 @@ public void testCopyFromWithNulls2() {
       /* set lesser initial capacity than actually needed
        * to trigger reallocs in copyFromSafe()
        */
-      vector2.allocateNew(1024);
-      assertEquals(1024, vector2.getValueCapacity());
+      vector2.allocateNew(initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         vector2.copyFromSafe(i, i, vector1);
       }
 
       /* 2 realloc should have happened in copyFromSafe() */
-      assertEquals(4096, vector2.getValueCapacity());
-      vector2.setValueCount(8192);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity);
+      vector2.setValueCount(initialCapacity * 2);
       /* setValueCount() should have done another realloc */
-      assertEquals(8192, vector2.getValueCount());
-      assertEquals(8192, vector2.getValueCapacity());
+      assertEquals(initialCapacity * 2, vector2.getValueCount());
+      assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
 
       /* check vector data after copy and realloc */
-      for (int i = 0; i < 8192; i++) {
-        if (((i & 1) == 0) || (i >= 4096)) {
+      for (int i = 0; i < initialCapacity * 2; i++) {
+        if (((i & 1) == 0) || (i >= initialCapacity)) {
           assertNull(vector2.getObject(i));
         } else {
           assertEquals("unexpected value at index: " + i, 1000 + i, vector2.get(i));
@@ -254,60 +280,60 @@ public void testCopyFromWithNulls2() {
   @Test /* BigIntVector */
   public void testCopyFromWithNulls3() {
     try (final BigIntVector vector1 = new BigIntVector(EMPTY_SCHEMA_PATH, allocator);
-         final BigIntVector vector2 = new BigIntVector(EMPTY_SCHEMA_PATH, allocator)) {
+        final BigIntVector vector2 = new BigIntVector(EMPTY_SCHEMA_PATH, allocator)) {
 
       vector1.allocateNew();
-      assertEquals(4096, vector1.getValueCapacity());
+      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
       assertEquals(0, vector1.getValueCount());
+      int initialCapacity = vector1.getValueCapacity();
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           continue;
         }
-        vector1.setSafe(i, 10000000000L + (long)i);
+        vector1.setSafe(i, 10000000000L + (long) i);
       }
 
-      vector1.setValueCount(4096);
+      vector1.setValueCount(initialCapacity);
 
       /* No realloc should have happened in setSafe or
        * setValueCount
        */
-      assertEquals(4096, vector1.getValueCapacity());
-      assertEquals(4096, vector1.getValueCount());
+      assertEquals(initialCapacity, vector1.getValueCapacity());
+      assertEquals(initialCapacity, vector1.getValueCount());
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           assertNull(vector1.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i,
-                  10000000000L + (long)i, vector1.get(i));
+          assertEquals("unexpected value at index: " + i, 10000000000L + (long) i, vector1.get(i));
         }
       }
 
       /* set lesser initial capacity than actually needed
        * to trigger reallocs in copyFromSafe()
        */
-      vector2.allocateNew(1024);
-      assertEquals(1024, vector2.getValueCapacity());
+      vector2.allocateNew(initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         vector2.copyFromSafe(i, i, vector1);
       }
 
       /* 2 realloc should have happened in copyFromSafe() */
-      assertEquals(4096, vector2.getValueCapacity());
-      vector2.setValueCount(8192);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity);
+      vector2.setValueCount(initialCapacity * 2);
       /* setValueCount() should have done another realloc */
-      assertEquals(8192, vector2.getValueCount());
-      assertEquals(8192, vector2.getValueCapacity());
+      assertEquals(initialCapacity * 2, vector2.getValueCount());
+      assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
 
       /* check vector data after copy and realloc */
-      for (int i = 0; i < 8192; i++) {
-        if (((i & 1) == 0) || (i >= 4096)) {
+      for (int i = 0; i < initialCapacity * 2; i++) {
+        if (((i & 1) == 0) || (i >= initialCapacity)) {
           assertNull(vector2.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i,
-                  10000000000L + (long)i, vector2.get(i));
+          assertEquals("unexpected value at index: " + i, 10000000000L + (long) i, vector2.get(i));
         }
       }
     }
@@ -316,8 +342,9 @@ public void testCopyFromWithNulls3() {
   @Test /* BitVector */
   public void testCopyFromWithNulls4() {
     try (final BitVector vector1 = new BitVector(EMPTY_SCHEMA_PATH, allocator);
-         final BitVector vector2 = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
+        final BitVector vector2 = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
 
+      vector1.setInitialCapacity(4096);
       vector1.allocateNew();
       assertEquals(4096, vector1.getValueCapacity());
       assertEquals(0, vector1.getValueCount());
@@ -394,60 +421,60 @@ public void testCopyFromWithNulls4() {
   @Test /* Float4Vector */
   public void testCopyFromWithNulls5() {
     try (final Float4Vector vector1 = new Float4Vector(EMPTY_SCHEMA_PATH, allocator);
-         final Float4Vector vector2 = new Float4Vector(EMPTY_SCHEMA_PATH, allocator)) {
+        final Float4Vector vector2 = new Float4Vector(EMPTY_SCHEMA_PATH, allocator)) {
 
       vector1.allocateNew();
-      assertEquals(4096, vector1.getValueCapacity());
+      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
       assertEquals(0, vector1.getValueCount());
+      int initialCapacity = vector1.getValueCapacity();
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           continue;
         }
-        vector1.setSafe(i, 100.25f + (float)i);
+        vector1.setSafe(i, 100.25f + (float) i);
       }
 
-      vector1.setValueCount(4096);
+      vector1.setValueCount(initialCapacity);
 
       /* No realloc should have happened in setSafe or
        * setValueCount
        */
-      assertEquals(4096, vector1.getValueCapacity());
-      assertEquals(4096, vector1.getValueCount());
+      assertEquals(initialCapacity, vector1.getValueCapacity());
+      assertEquals(initialCapacity, vector1.getValueCount());
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           assertNull(vector1.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i,
-                  100.25f + (float)i, vector1.get(i), 0);
+          assertEquals("unexpected value at index: " + i, 100.25f + (float) i, vector1.get(i), 0);
         }
       }
 
       /* set lesser initial capacity than actually needed
        * to trigger reallocs in copyFromSafe()
        */
-      vector2.allocateNew(1024);
-      assertEquals(1024, vector2.getValueCapacity());
+      vector2.allocateNew(initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         vector2.copyFromSafe(i, i, vector1);
       }
 
       /* 2 realloc should have happened in copyFromSafe() */
-      assertEquals(4096, vector2.getValueCapacity());
-      vector2.setValueCount(8192);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity);
+      vector2.setValueCount(initialCapacity * 2);
       /* setValueCount() should have done another realloc */
-      assertEquals(8192, vector2.getValueCount());
-      assertEquals(8192, vector2.getValueCapacity());
+      assertEquals(initialCapacity * 2, vector2.getValueCount());
+      assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
 
       /* check vector data after copy and realloc */
-      for (int i = 0; i < 8192; i++) {
-        if (((i & 1) == 0) || (i >= 4096)) {
+      for (int i = 0; i < initialCapacity * 2; i++) {
+        if (((i & 1) == 0) || (i >= initialCapacity)) {
           assertNull(vector2.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i,
-                  100.25f + i * 1.0f, vector2.get(i), 0);
+          assertEquals("unexpected value at index: " + i, 100.25f + i * 1.0f, vector2.get(i), 0);
         }
       }
     }
@@ -456,60 +483,62 @@ public void testCopyFromWithNulls5() {
   @Test /* Float8Vector */
   public void testCopyFromWithNulls6() {
     try (final Float8Vector vector1 = new Float8Vector(EMPTY_SCHEMA_PATH, allocator);
-         final Float8Vector vector2 = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) {
+        final Float8Vector vector2 = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) {
 
       vector1.allocateNew();
-      assertEquals(4096, vector1.getValueCapacity());
+      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
       assertEquals(0, vector1.getValueCount());
+      int initialCapacity = vector1.getValueCapacity();
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           continue;
         }
         vector1.setSafe(i, 123456.7865 + (double) i);
       }
 
-      vector1.setValueCount(4096);
+      vector1.setValueCount(initialCapacity);
 
       /* No realloc should have happened in setSafe or
        * setValueCount
        */
-      assertEquals(4096, vector1.getValueCapacity());
-      assertEquals(4096, vector1.getValueCount());
+      assertEquals(initialCapacity, vector1.getValueCapacity());
+      assertEquals(initialCapacity, vector1.getValueCount());
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           assertNull(vector1.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i,
-                  123456.7865 + (double) i, vector1.get(i), 0);
+          assertEquals(
+              "unexpected value at index: " + i, 123456.7865 + (double) i, vector1.get(i), 0);
         }
       }
 
       /* set lesser initial capacity than actually needed
        * to trigger reallocs in copyFromSafe()
        */
-      vector2.allocateNew(1024);
-      assertEquals(1024, vector2.getValueCapacity());
+      vector2.allocateNew(initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         vector2.copyFromSafe(i, i, vector1);
       }
 
       /* 2 realloc should have happened in copyFromSafe() */
-      assertEquals(4096, vector2.getValueCapacity());
-      vector2.setValueCount(8192);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity);
+      vector2.setValueCount(initialCapacity * 2);
       /* setValueCount() should have done another realloc */
-      assertEquals(8192, vector2.getValueCount());
-      assertEquals(8192, vector2.getValueCapacity());
+      assertEquals(initialCapacity * 2, vector2.getValueCount());
+      assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
 
       /* check vector data after copy and realloc */
-      for (int i = 0; i < 8192; i++) {
-        if (((i & 1) == 0) || (i >= 4096)) {
+      for (int i = 0; i < initialCapacity * 2; i++) {
+        if (((i & 1) == 0) || (i >= initialCapacity)) {
           assertNull(vector2.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i,
-                  123456.7865 + (double) i, vector2.get(i), 0);
+          assertEquals(
+              "unexpected value at index: " + i, 123456.7865 + (double) i, vector2.get(i), 0);
         }
       }
     }
@@ -518,30 +547,31 @@ public void testCopyFromWithNulls6() {
   @Test /* IntervalDayVector */
   public void testCopyFromWithNulls7() {
     try (final IntervalDayVector vector1 = new IntervalDayVector(EMPTY_SCHEMA_PATH, allocator);
-         final IntervalDayVector vector2 = new IntervalDayVector(EMPTY_SCHEMA_PATH, allocator)) {
+        final IntervalDayVector vector2 = new IntervalDayVector(EMPTY_SCHEMA_PATH, allocator)) {
 
       vector1.allocateNew();
-      assertEquals(4096, vector1.getValueCapacity());
+      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
       assertEquals(0, vector1.getValueCount());
+      int initialCapacity = vector1.getValueCapacity();
 
       final int days = 10;
       final int milliseconds = 10000;
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           continue;
         }
         vector1.setSafe(i, days + i, milliseconds + i);
       }
 
-      vector1.setValueCount(4096);
+      vector1.setValueCount(initialCapacity);
 
       /* No realloc should have happened in setSafe or
        * setValueCount
        */
-      assertEquals(4096, vector1.getValueCapacity());
-      assertEquals(4096, vector1.getValueCount());
+      assertEquals(initialCapacity, vector1.getValueCapacity());
+      assertEquals(initialCapacity, vector1.getValueCount());
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           assertNull(vector1.getObject(i));
         } else {
@@ -554,23 +584,24 @@ public void testCopyFromWithNulls7() {
       /* set lesser initial capacity than actually needed
        * to trigger reallocs in copyFromSafe()
        */
-      vector2.allocateNew(1024);
-      assertEquals(1024, vector2.getValueCapacity());
+      vector2.allocateNew(initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         vector2.copyFromSafe(i, i, vector1);
       }
 
       /* 2 realloc should have happened in copyFromSafe() */
-      assertEquals(4096, vector2.getValueCapacity());
-      vector2.setValueCount(8192);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity);
+      vector2.setValueCount(initialCapacity * 2);
       /* setValueCount() should have done another realloc */
-      assertEquals(8192, vector2.getValueCount());
-      assertEquals(8192, vector2.getValueCapacity());
+      assertEquals(initialCapacity * 2, vector2.getValueCount());
+      assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
 
       /* check vector data after copy and realloc */
-      for (int i = 0; i < 8192; i++) {
-        if (((i & 1) == 0) || (i >= 4096)) {
+      for (int i = 0; i < initialCapacity * 2; i++) {
+        if (((i & 1) == 0) || (i >= initialCapacity)) {
           assertNull(vector2.getObject(i));
         } else {
           final Period p = vector2.getObject(i);
@@ -584,15 +615,16 @@ public void testCopyFromWithNulls7() {
   @Test /* IntervalYearVector */
   public void testCopyFromWithNulls8() {
     try (final IntervalYearVector vector1 = new IntervalYearVector(EMPTY_SCHEMA_PATH, allocator);
-         final IntervalYearVector vector2 = new IntervalYearVector(EMPTY_SCHEMA_PATH, allocator)) {
+        final IntervalYearVector vector2 = new IntervalYearVector(EMPTY_SCHEMA_PATH, allocator)) {
 
       vector1.allocateNew();
-      assertEquals(4096, vector1.getValueCapacity());
+      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
       assertEquals(0, vector1.getValueCount());
+      int initialCapacity = vector1.getValueCapacity();
 
       final int interval = 30; /* 2 years 6 months */
-      final Period[]  periods = new Period[4096];
-      for (int i = 0; i < 4096; i++) {
+      final Period[] periods = new Period[4096];
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           continue;
         }
@@ -600,18 +632,19 @@ public void testCopyFromWithNulls8() {
         final Period p = new Period();
         final int years = (interval + i) / org.apache.arrow.vector.util.DateUtility.yearsToMonths;
         final int months = (interval + i) % org.apache.arrow.vector.util.DateUtility.yearsToMonths;
-        periods[i] = p.plusYears(years).plusMonths(months);;
+        periods[i] = p.plusYears(years).plusMonths(months);
+        ;
       }
 
-      vector1.setValueCount(4096);
+      vector1.setValueCount(initialCapacity);
 
       /* No realloc should have happened in setSafe or
        * setValueCount
        */
-      assertEquals(4096, vector1.getValueCapacity());
-      assertEquals(4096, vector1.getValueCount());
+      assertEquals(initialCapacity, vector1.getValueCapacity());
+      assertEquals(initialCapacity, vector1.getValueCount());
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           assertNull(vector1.getObject(i));
         } else {
@@ -624,23 +657,24 @@ public void testCopyFromWithNulls8() {
       /* set lesser initial capacity than actually needed
        * to trigger reallocs in copyFromSafe()
        */
-      vector2.allocateNew(1024);
-      assertEquals(1024, vector2.getValueCapacity());
+      vector2.allocateNew(initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         vector2.copyFromSafe(i, i, vector1);
       }
 
       /* 2 realloc should have happened in copyFromSafe() */
-      assertEquals(4096, vector2.getValueCapacity());
-      vector2.setValueCount(8192);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity);
+      vector2.setValueCount(initialCapacity * 2);
       /* setValueCount() should have done another realloc */
-      assertEquals(8192, vector2.getValueCount());
-      assertEquals(8192, vector2.getValueCapacity());
+      assertEquals(initialCapacity * 2, vector2.getValueCount());
+      assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
 
       /* check vector data after copy and realloc */
-      for (int i = 0; i < 8192; i++) {
-        if (((i & 1) == 0) || (i >= 4096)) {
+      for (int i = 0; i < initialCapacity * 2; i++) {
+        if (((i & 1) == 0) || (i >= initialCapacity)) {
           assertNull(vector2.getObject(i));
         } else {
           final Period p = vector2.getObject(i);
@@ -653,61 +687,61 @@ public void testCopyFromWithNulls8() {
   @Test /* SmallIntVector */
   public void testCopyFromWithNulls9() {
     try (final SmallIntVector vector1 = new SmallIntVector(EMPTY_SCHEMA_PATH, allocator);
-         final SmallIntVector vector2 = new SmallIntVector(EMPTY_SCHEMA_PATH, allocator)) {
+        final SmallIntVector vector2 = new SmallIntVector(EMPTY_SCHEMA_PATH, allocator)) {
 
       vector1.allocateNew();
-      assertEquals(4096, vector1.getValueCapacity());
+      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
       assertEquals(0, vector1.getValueCount());
+      int initialCapacity = vector1.getValueCapacity();
 
       final short val = 1000;
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           continue;
         }
-        vector1.setSafe(i, val + (short)i);
+        vector1.setSafe(i, val + (short) i);
       }
 
-      vector1.setValueCount(4096);
+      vector1.setValueCount(initialCapacity);
 
       /* No realloc should have happened in setSafe or
        * setValueCount
        */
-      assertEquals(4096, vector1.getValueCapacity());
-      assertEquals(4096, vector1.getValueCount());
+      assertEquals(initialCapacity, vector1.getValueCapacity());
+      assertEquals(initialCapacity, vector1.getValueCount());
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           assertNull(vector1.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i,
-                  val + (short)i, vector1.get(i));
+          assertEquals("unexpected value at index: " + i, val + (short) i, vector1.get(i));
         }
       }
 
       /* set lesser initial capacity than actually needed
        * to trigger reallocs in copyFromSafe()
        */
-      vector2.allocateNew(1024);
-      assertEquals(1024, vector2.getValueCapacity());
+      vector2.allocateNew(initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         vector2.copyFromSafe(i, i, vector1);
       }
 
       /* 2 realloc should have happened in copyFromSafe() */
-      assertEquals(4096, vector2.getValueCapacity());
-      vector2.setValueCount(8192);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity);
+      vector2.setValueCount(initialCapacity * 2);
       /* setValueCount() should have done another realloc */
-      assertEquals(8192, vector2.getValueCount());
-      assertEquals(8192, vector2.getValueCapacity());
+      assertEquals(initialCapacity * 2, vector2.getValueCount());
+      assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
 
       /* check vector data after copy and realloc */
-      for (int i = 0; i < 8192; i++) {
-        if (((i & 1) == 0) || (i >= 4096)) {
+      for (int i = 0; i < initialCapacity * 2; i++) {
+        if (((i & 1) == 0) || (i >= initialCapacity)) {
           assertNull(vector2.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i,
-                  val + (short)i, vector2.get(i));
+          assertEquals("unexpected value at index: " + i, val + (short) i, vector2.get(i));
         }
       }
     }
@@ -716,61 +750,61 @@ public void testCopyFromWithNulls9() {
   @Test /* TimeMicroVector */
   public void testCopyFromWithNulls10() {
     try (final TimeMicroVector vector1 = new TimeMicroVector(EMPTY_SCHEMA_PATH, allocator);
-         final TimeMicroVector vector2 = new TimeMicroVector(EMPTY_SCHEMA_PATH, allocator)) {
+        final TimeMicroVector vector2 = new TimeMicroVector(EMPTY_SCHEMA_PATH, allocator)) {
 
       vector1.allocateNew();
-      assertEquals(4096, vector1.getValueCapacity());
+      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
       assertEquals(0, vector1.getValueCount());
+      int initialCapacity = vector1.getValueCapacity();
 
       final long val = 100485765432L;
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           continue;
         }
-        vector1.setSafe(i, val + (long)i);
+        vector1.setSafe(i, val + (long) i);
       }
 
-      vector1.setValueCount(4096);
+      vector1.setValueCount(initialCapacity);
 
       /* No realloc should have happened in setSafe or
        * setValueCount
        */
-      assertEquals(4096, vector1.getValueCapacity());
-      assertEquals(4096, vector1.getValueCount());
+      assertEquals(initialCapacity, vector1.getValueCapacity());
+      assertEquals(initialCapacity, vector1.getValueCount());
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           assertNull(vector1.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i,
-                  val + (long)i, vector1.get(i));
+          assertEquals("unexpected value at index: " + i, val + (long) i, vector1.get(i));
         }
       }
 
       /* set lesser initial capacity than actually needed
        * to trigger reallocs in copyFromSafe()
        */
-      vector2.allocateNew(1024);
-      assertEquals(1024, vector2.getValueCapacity());
+      vector2.allocateNew(initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         vector2.copyFromSafe(i, i, vector1);
       }
 
       /* 2 realloc should have happened in copyFromSafe() */
-      assertEquals(4096, vector2.getValueCapacity());
-      vector2.setValueCount(8192);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity);
+      vector2.setValueCount(initialCapacity * 2);
       /* setValueCount() should have done another realloc */
-      assertEquals(8192, vector2.getValueCount());
-      assertEquals(8192, vector2.getValueCapacity());
+      assertEquals(initialCapacity * 2, vector2.getValueCount());
+      assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
 
       /* check vector data after copy and realloc */
-      for (int i = 0; i < 8192; i++) {
-        if (((i & 1) == 0) || (i >= 4096)) {
+      for (int i = 0; i < initialCapacity * 2; i++) {
+        if (((i & 1) == 0) || (i >= initialCapacity)) {
           assertNull(vector2.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i,
-                  val + (long) i, vector2.get(i));
+          assertEquals("unexpected value at index: " + i, val + (long) i, vector2.get(i));
         }
       }
     }
@@ -779,61 +813,61 @@ public void testCopyFromWithNulls10() {
   @Test /* TimeMilliVector */
   public void testCopyFromWithNulls11() {
     try (final TimeMilliVector vector1 = new TimeMilliVector(EMPTY_SCHEMA_PATH, allocator);
-         final TimeMilliVector vector2 = new TimeMilliVector(EMPTY_SCHEMA_PATH, allocator)) {
+        final TimeMilliVector vector2 = new TimeMilliVector(EMPTY_SCHEMA_PATH, allocator)) {
 
       vector1.allocateNew();
-      assertEquals(4096, vector1.getValueCapacity());
+      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
       assertEquals(0, vector1.getValueCount());
+      int initialCapacity = vector1.getValueCapacity();
 
       final int val = 1000;
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           continue;
         }
         vector1.setSafe(i, val + i);
       }
 
-      vector1.setValueCount(4096);
+      vector1.setValueCount(initialCapacity);
 
       /* No realloc should have happened in setSafe or
        * setValueCount
        */
-      assertEquals(4096, vector1.getValueCapacity());
-      assertEquals(4096, vector1.getValueCount());
+      assertEquals(initialCapacity, vector1.getValueCapacity());
+      assertEquals(initialCapacity, vector1.getValueCount());
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           assertNull(vector1.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i,
-                  val + i, vector1.get(i));
+          assertEquals("unexpected value at index: " + i, val + i, vector1.get(i));
         }
       }
 
       /* set lesser initial capacity than actually needed
        * to trigger reallocs in copyFromSafe()
        */
-      vector2.allocateNew(1024);
-      assertEquals(1024, vector2.getValueCapacity());
+      vector2.allocateNew(initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         vector2.copyFromSafe(i, i, vector1);
       }
 
       /* 2 realloc should have happened in copyFromSafe() */
-      assertEquals(4096, vector2.getValueCapacity());
-      vector2.setValueCount(8192);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity);
+      vector2.setValueCount(initialCapacity * 2);
       /* setValueCount() should have done another realloc */
-      assertEquals(8192, vector2.getValueCount());
-      assertEquals(8192, vector2.getValueCapacity());
+      assertEquals(initialCapacity * 2, vector2.getValueCount());
+      assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
 
       /* check vector data after copy and realloc */
-      for (int i = 0; i < 8192; i++) {
-        if (((i & 1) == 0) || (i >= 4096)) {
+      for (int i = 0; i < initialCapacity * 2; i++) {
+        if (((i & 1) == 0) || (i >= initialCapacity)) {
           assertNull(vector2.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i,
-                  val + i, vector2.get(i));
+          assertEquals("unexpected value at index: " + i, val + i, vector2.get(i));
         }
       }
     }
@@ -842,14 +876,15 @@ public void testCopyFromWithNulls11() {
   @Test /* TinyIntVector */
   public void testCopyFromWithNulls12() {
     try (final TinyIntVector vector1 = new TinyIntVector(EMPTY_SCHEMA_PATH, allocator);
-         final TinyIntVector vector2 = new TinyIntVector(EMPTY_SCHEMA_PATH, allocator)) {
+        final TinyIntVector vector2 = new TinyIntVector(EMPTY_SCHEMA_PATH, allocator)) {
 
       vector1.allocateNew();
-      assertEquals(4096, vector1.getValueCapacity());
+      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
       assertEquals(0, vector1.getValueCount());
+      int initialCapacity = vector1.getValueCapacity();
 
       byte val = -128;
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           continue;
         }
@@ -857,16 +892,16 @@ public void testCopyFromWithNulls12() {
         val++;
       }
 
-      vector1.setValueCount(4096);
+      vector1.setValueCount(initialCapacity);
 
       /* No realloc should have happened in setSafe or
        * setValueCount
        */
-      assertEquals(4096, vector1.getValueCapacity());
-      assertEquals(4096, vector1.getValueCount());
+      assertEquals(initialCapacity, vector1.getValueCapacity());
+      assertEquals(initialCapacity, vector1.getValueCount());
 
       val = -128;
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           assertNull(vector1.getObject(i));
         } else {
@@ -878,24 +913,24 @@ public void testCopyFromWithNulls12() {
       /* set lesser initial capacity than actually needed
        * to trigger reallocs in copyFromSafe()
        */
-      vector2.allocateNew(1024);
-      assertEquals(1024, vector2.getValueCapacity());
+      vector2.allocateNew(initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         vector2.copyFromSafe(i, i, vector1);
       }
 
       /* 2 realloc should have happened in copyFromSafe() */
-      assertEquals(4096, vector2.getValueCapacity());
-      vector2.setValueCount(8192);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity);
+      vector2.setValueCount(initialCapacity * 2);
       /* setValueCount() should have done another realloc */
-      assertEquals(8192, vector2.getValueCount());
-      assertEquals(8192, vector2.getValueCapacity());
+      assertEquals(initialCapacity * 2, vector2.getValueCount());
+      assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
 
       /* check vector data after copy and realloc */
       val = -128;
-      for (int i = 0; i < 8192; i++) {
-        if (((i & 1) == 0) || (i >= 4096)) {
+      for (int i = 0; i < initialCapacity * 2; i++) {
+        if (((i & 1) == 0) || (i >= initialCapacity)) {
           assertNull(vector2.getObject(i));
         } else {
           assertEquals("unexpected value at index: " + i, val, vector2.get(i));
@@ -908,32 +943,33 @@ public void testCopyFromWithNulls12() {
   @Test /* DecimalVector */
   public void testCopyFromWithNulls13() {
     try (final DecimalVector vector1 = new DecimalVector(EMPTY_SCHEMA_PATH, allocator, 30, 16);
-         final DecimalVector vector2 = new DecimalVector(EMPTY_SCHEMA_PATH, allocator, 30, 16)) {
+        final DecimalVector vector2 = new DecimalVector(EMPTY_SCHEMA_PATH, allocator, 30, 16)) {
 
       vector1.allocateNew();
-      assertEquals(4096, vector1.getValueCapacity());
+      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
       assertEquals(0, vector1.getValueCount());
+      int initialCapacity = vector1.getValueCapacity();
 
       final double baseValue = 104567897654.876543654;
       final BigDecimal[] decimals = new BigDecimal[4096];
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           continue;
         }
-        BigDecimal decimal = new BigDecimal(baseValue + (double)i);
+        BigDecimal decimal = new BigDecimal(baseValue + (double) i);
         vector1.setSafe(i, decimal);
         decimals[i] = decimal;
       }
 
-      vector1.setValueCount(4096);
+      vector1.setValueCount(initialCapacity);
 
       /* No realloc should have happened in setSafe or
        * setValueCount
        */
-      assertEquals(4096, vector1.getValueCapacity());
-      assertEquals(4096, vector1.getValueCount());
+      assertEquals(initialCapacity, vector1.getValueCapacity());
+      assertEquals(initialCapacity, vector1.getValueCount());
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           assertNull(vector1.getObject(i));
         } else {
@@ -945,23 +981,24 @@ public void testCopyFromWithNulls13() {
       /* set lesser initial capacity than actually needed
        * to trigger reallocs in copyFromSafe()
        */
-      vector2.allocateNew(1024);
-      assertEquals(1024, vector2.getValueCapacity());
+      vector2.allocateNew(initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         vector2.copyFromSafe(i, i, vector1);
       }
 
       /* 2 realloc should have happened in copyFromSafe() */
-      assertEquals(4096, vector2.getValueCapacity());
-      vector2.setValueCount(8192);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity);
+      vector2.setValueCount(initialCapacity * 2);
       /* setValueCount() should have done another realloc */
-      assertEquals(8192, vector2.getValueCount());
-      assertEquals(8192, vector2.getValueCapacity());
+      assertEquals(initialCapacity * 2, vector2.getValueCount());
+      assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
 
       /* check vector data after copy and realloc */
-      for (int i = 0; i < 8192; i++) {
-        if (((i & 1) == 0) || (i >= 4096)) {
+      for (int i = 0; i < initialCapacity * 2; i++) {
+        if (((i & 1) == 0) || (i >= initialCapacity)) {
           assertNull(vector2.getObject(i));
         } else {
           final BigDecimal decimal = vector2.getObject(i);
@@ -974,61 +1011,61 @@ public void testCopyFromWithNulls13() {
   @Test /* TimeStampVector */
   public void testCopyFromWithNulls14() {
     try (final TimeStampVector vector1 = new TimeStampMicroVector(EMPTY_SCHEMA_PATH, allocator);
-         final TimeStampVector vector2 = new TimeStampMicroVector(EMPTY_SCHEMA_PATH, allocator)) {
+        final TimeStampVector vector2 = new TimeStampMicroVector(EMPTY_SCHEMA_PATH, allocator)) {
 
       vector1.allocateNew();
-      assertEquals(4096, vector1.getValueCapacity());
+      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
       assertEquals(0, vector1.getValueCount());
+      int initialCapacity = vector1.getValueCapacity();
 
       final long val = 20145678912L;
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           continue;
         }
-        vector1.setSafe(i, val + (long)i);
+        vector1.setSafe(i, val + (long) i);
       }
 
-      vector1.setValueCount(4096);
+      vector1.setValueCount(initialCapacity);
 
       /* No realloc should have happened in setSafe or
        * setValueCount
        */
-      assertEquals(4096, vector1.getValueCapacity());
-      assertEquals(4096, vector1.getValueCount());
+      assertEquals(initialCapacity, vector1.getValueCapacity());
+      assertEquals(initialCapacity, vector1.getValueCount());
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
           assertNull(vector1.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i,
-                  val + (long)i, vector1.get(i));
+          assertEquals("unexpected value at index: " + i, val + (long) i, vector1.get(i));
         }
       }
 
       /* set lesser initial capacity than actually needed
        * to trigger reallocs in copyFromSafe()
        */
-      vector2.allocateNew(1024);
-      assertEquals(1024, vector2.getValueCapacity());
+      vector2.allocateNew(initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+      assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
 
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         vector2.copyFromSafe(i, i, vector1);
       }
 
       /* 2 realloc should have happened in copyFromSafe() */
-      assertEquals(4096, vector2.getValueCapacity());
-      vector2.setValueCount(8192);
+      assertTrue(vector2.getValueCapacity() >= initialCapacity);
+      vector2.setValueCount(initialCapacity * 2);
       /* setValueCount() should have done another realloc */
-      assertEquals(8192, vector2.getValueCount());
-      assertEquals(8192, vector2.getValueCapacity());
+      assertEquals(initialCapacity * 2, vector2.getValueCount());
+      assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
 
       /* check vector data after copy and realloc */
-      for (int i = 0; i < 8192; i++) {
-        if (((i & 1) == 0) || (i >= 4096)) {
+      for (int i = 0; i < initialCapacity * 2; i++) {
+        if (((i & 1) == 0) || (i >= initialCapacity)) {
           assertNull(vector2.getObject(i));
         } else {
-          assertEquals("unexpected value at index: " + i,
-                  val + (long) i, vector2.get(i));
+          assertEquals("unexpected value at index: " + i, val + (long) i, vector2.get(i));
         }
       }
     }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
index 4e8d8f0f39944..68102b1c32a46 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
@@ -774,13 +774,13 @@ public void testSetInitialCapacity() {
       vector.setInitialCapacity(512);
       vector.allocateNew();
       assertEquals(512, vector.getValueCapacity());
-      assertEquals(4096, vector.getDataVector().getValueCapacity());
+      assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 5);
 
       /* use density as 4 */
       vector.setInitialCapacity(512, 4);
       vector.allocateNew();
       assertEquals(512, vector.getValueCapacity());
-      assertEquals(512 * 4, vector.getDataVector().getValueCapacity());
+      assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 4);
 
       /**
        * inner value capacity we pass to data vector is 512 * 0.1 => 51
@@ -793,7 +793,7 @@ public void testSetInitialCapacity() {
       vector.setInitialCapacity(512, 0.1);
       vector.allocateNew();
       assertEquals(512, vector.getValueCapacity());
-      assertEquals(64, vector.getDataVector().getValueCapacity());
+      assertTrue(vector.getDataVector().getValueCapacity() >= 51);
 
       /**
        * inner value capacity we pass to data vector is 512 * 0.01 => 5
@@ -806,7 +806,7 @@ public void testSetInitialCapacity() {
       vector.setInitialCapacity(512, 0.01);
       vector.allocateNew();
       assertEquals(512, vector.getValueCapacity());
-      assertEquals(8, vector.getDataVector().getValueCapacity());
+      assertTrue(vector.getDataVector().getValueCapacity() >= 5);
 
       /**
        * inner value capacity we pass to data vector is 5 * 0.1 => 0
@@ -822,7 +822,7 @@ public void testSetInitialCapacity() {
       vector.setInitialCapacity(5, 0.1);
       vector.allocateNew();
       assertEquals(7, vector.getValueCapacity());
-      assertEquals(1, vector.getDataVector().getValueCapacity());
+      assertTrue(vector.getDataVector().getValueCapacity() >= 1);
     }
   }
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
index 4772a86356b95..30fe23cae4afd 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
@@ -32,6 +32,7 @@
 import java.util.Arrays;
 import java.util.List;
 
+import org.apache.arrow.memory.BaseAllocator;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
@@ -68,8 +69,8 @@ public void init() {
   private static final byte[] STR5 = "EEE5".getBytes(utf8Charset);
   private static final byte[] STR6 = "FFFFF6".getBytes(utf8Charset);
   private static final int MAX_VALUE_COUNT =
-            Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 4;
-  private static final int MAX_VALUE_COUNT_8BYTE = MAX_VALUE_COUNT / 2;
+      (int)(Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 7);
+  private static final int MAX_VALUE_COUNT_8BYTE = (int)(MAX_VALUE_COUNT / 2);
 
   @After
   public void terminate() throws Exception {
@@ -108,7 +109,7 @@ public void testFixedType1() {
 
       vector.allocateNew(1024);
       initialCapacity = vector.getValueCapacity();
-      assertEquals(1024, initialCapacity);
+      assertTrue(initialCapacity >= 1024);
 
       // Put and set a few values
       vector.setSafe(0, 100);
@@ -124,7 +125,7 @@ public void testFixedType1() {
       assertEquals(104, vector.get(1023));
 
       try {
-        vector.set(1024, 10000);
+        vector.set(initialCapacity, 10000);
       } catch (IndexOutOfBoundsException ie) {
         error = true;
       } finally {
@@ -133,7 +134,7 @@ public void testFixedType1() {
       }
 
       try {
-        vector.get(1024);
+        vector.get(initialCapacity);
       } catch (IndexOutOfBoundsException ie) {
         error = true;
       } finally {
@@ -142,10 +143,10 @@ public void testFixedType1() {
       }
 
       /* this should trigger a realloc() */
-      vector.setSafe(1024, 10000);
+      vector.setSafe(initialCapacity, 10000);
 
       /* underlying buffer should now be able to store double the number of values */
-      assertEquals(initialCapacity * 2, vector.getValueCapacity());
+      assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
 
       /* check vector data after realloc */
       assertEquals(100, vector.get(0));
@@ -153,16 +154,17 @@ public void testFixedType1() {
       assertEquals(102, vector.get(100));
       assertEquals(103, vector.get(1022));
       assertEquals(104, vector.get(1023));
-      assertEquals(10000, vector.get(1024));
+      assertEquals(10000, vector.get(initialCapacity));
 
       /* reset the vector */
+      int capacityBeforeReset = vector.getValueCapacity();
       vector.reset();
 
       /* capacity shouldn't change after reset */
-      assertEquals(initialCapacity * 2, vector.getValueCapacity());
+      assertEquals(capacityBeforeReset, vector.getValueCapacity());
 
       /* vector data should have been zeroed out */
-      for (int i = 0; i < (initialCapacity * 2); i++) {
+      for (int i = 0; i < capacityBeforeReset; i++) {
         // TODO: test vector.get(i) is 0 after unsafe get added
         assertEquals("non-zero data not expected at index: " + i, true, vector.isNull(i));
       }
@@ -180,7 +182,7 @@ public void testFixedType2() {
       intVector.setInitialCapacity(MAX_VALUE_COUNT);
 
       try {
-        intVector.setInitialCapacity(MAX_VALUE_COUNT + 1);
+        intVector.setInitialCapacity(MAX_VALUE_COUNT * 2);
       } catch (OversizedAllocationException oe) {
         error = true;
       } finally {
@@ -195,17 +197,18 @@ public void testFixedType2() {
       /* allocate 64 bytes (16 * 4) */
       intVector.allocateNew();
       /* underlying buffer should be able to store 16 values */
-      assertEquals(initialCapacity, intVector.getValueCapacity());
+      assertTrue(intVector.getValueCapacity() >= initialCapacity);
+      initialCapacity = intVector.getValueCapacity();
 
       /* populate the vector */
       int j = 1;
-      for (int i = 0; i < 16; i += 2) {
+      for (int i = 0; i < initialCapacity; i += 2) {
         intVector.set(i, j);
         j++;
       }
 
       try {
-        intVector.set(16, 9);
+        intVector.set(initialCapacity, j);
       } catch (IndexOutOfBoundsException ie) {
         error = true;
       } finally {
@@ -215,13 +218,13 @@ public void testFixedType2() {
 
       /* check vector contents */
       j = 1;
-      for (int i = 0; i < 16; i += 2) {
+      for (int i = 0; i < initialCapacity; i += 2) {
         assertEquals("unexpected value at index: " + i, j, intVector.get(i));
         j++;
       }
 
       try {
-        intVector.get(16);
+        intVector.get(initialCapacity);
       } catch (IndexOutOfBoundsException ie) {
         error = true;
       } finally {
@@ -230,26 +233,27 @@ public void testFixedType2() {
       }
 
       /* this should trigger a realloc() */
-      intVector.setSafe(16, 9);
+      intVector.setSafe(initialCapacity, j);
 
       /* underlying buffer should now be able to store double the number of values */
-      assertEquals(initialCapacity * 2, intVector.getValueCapacity());
+      assertTrue(intVector.getValueCapacity() >= initialCapacity * 2);
 
       /* vector data should still be intact after realloc */
       j = 1;
-      for (int i = 0; i <= 16; i += 2) {
+      for (int i = 0; i <= initialCapacity; i += 2) {
         assertEquals("unexpected value at index: " + i, j, intVector.get(i));
         j++;
       }
 
       /* reset the vector */
+      int capacityBeforeRealloc = intVector.getValueCapacity();
       intVector.reset();
 
       /* capacity shouldn't change after reset */
-      assertEquals(initialCapacity * 2, intVector.getValueCapacity());
+      assertEquals(capacityBeforeRealloc, intVector.getValueCapacity());
 
       /* vector data should have been zeroed out */
-      for (int i = 0; i < (initialCapacity * 2); i++) {
+      for (int i = 0; i < capacityBeforeRealloc; i++) {
         assertEquals("non-zero data not expected at index: " + i, true, intVector.isNull(i));
       }
     }
@@ -266,7 +270,7 @@ public void testFixedType3() {
       floatVector.setInitialCapacity(MAX_VALUE_COUNT);
 
       try {
-        floatVector.setInitialCapacity(MAX_VALUE_COUNT + 1);
+        floatVector.setInitialCapacity(MAX_VALUE_COUNT * 2);
       } catch (OversizedAllocationException oe) {
         error = true;
       } finally {
@@ -281,7 +285,8 @@ public void testFixedType3() {
       /* allocate 64 bytes (16 * 4) */
       floatVector.allocateNew();
       /* underlying buffer should be able to store 16 values */
-      assertEquals(initialCapacity, floatVector.getValueCapacity());
+      assertTrue(floatVector.getValueCapacity() >= initialCapacity);
+      initialCapacity = floatVector.getValueCapacity();
 
       floatVector.zeroVector();
 
@@ -296,7 +301,7 @@ public void testFixedType3() {
       floatVector.set(14, 8.5f);
 
       try {
-        floatVector.set(16, 9.5f);
+        floatVector.set(initialCapacity, 9.5f);
       } catch (IndexOutOfBoundsException ie) {
         error = true;
       } finally {
@@ -315,7 +320,7 @@ public void testFixedType3() {
       assertEquals(8.5f, floatVector.get(14), 0);
 
       try {
-        floatVector.get(16);
+        floatVector.get(initialCapacity);
       } catch (IndexOutOfBoundsException ie) {
         error = true;
       } finally {
@@ -324,10 +329,10 @@ public void testFixedType3() {
       }
 
       /* this should trigger a realloc() */
-      floatVector.setSafe(16, 9.5f);
+      floatVector.setSafe(initialCapacity, 9.5f);
 
       /* underlying buffer should now be able to store double the number of values */
-      assertEquals(initialCapacity * 2, floatVector.getValueCapacity());
+      assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2);
 
       /* vector data should still be intact after realloc */
       assertEquals(1.5f, floatVector.get(0), 0);
@@ -338,16 +343,17 @@ public void testFixedType3() {
       assertEquals(6.6f, floatVector.get(10), 0);
       assertEquals(7.8f, floatVector.get(12), 0);
       assertEquals(8.5f, floatVector.get(14), 0);
-      assertEquals(9.5f, floatVector.get(16), 0);
+      assertEquals(9.5f, floatVector.get(initialCapacity), 0);
 
       /* reset the vector */
+      int capacityBeforeReset = floatVector.getValueCapacity();
       floatVector.reset();
 
       /* capacity shouldn't change after reset */
-      assertEquals(initialCapacity * 2, floatVector.getValueCapacity());
+      assertEquals(capacityBeforeReset, floatVector.getValueCapacity());
 
       /* vector data should be zeroed out */
-      for (int i = 0; i < (initialCapacity * 2); i++) {
+      for (int i = 0; i < capacityBeforeReset; i++) {
         assertEquals("non-zero data not expected at index: " + i, true, floatVector.isNull(i));
       }
     }
@@ -364,7 +370,7 @@ public void testFixedType4() {
       floatVector.setInitialCapacity(MAX_VALUE_COUNT_8BYTE);
 
       try {
-        floatVector.setInitialCapacity(MAX_VALUE_COUNT_8BYTE + 1);
+        floatVector.setInitialCapacity(MAX_VALUE_COUNT_8BYTE * 2);
       } catch (OversizedAllocationException oe) {
         error = true;
       } finally {
@@ -379,7 +385,8 @@ public void testFixedType4() {
       /* allocate 128 bytes (16 * 8) */
       floatVector.allocateNew();
       /* underlying buffer should be able to store 16 values */
-      assertEquals(initialCapacity, floatVector.getValueCapacity());
+      assertTrue(floatVector.getValueCapacity() >= initialCapacity);
+      initialCapacity = floatVector.getValueCapacity();
 
       /* populate the vector */
       floatVector.set(0, 1.55);
@@ -392,7 +399,7 @@ public void testFixedType4() {
       floatVector.set(14, 8.56);
 
       try {
-        floatVector.set(16, 9.53);
+        floatVector.set(initialCapacity, 9.53);
       } catch (IndexOutOfBoundsException ie) {
         error = true;
       } finally {
@@ -411,7 +418,7 @@ public void testFixedType4() {
       assertEquals(8.56, floatVector.get(14), 0);
 
       try {
-        floatVector.get(16);
+        floatVector.get(initialCapacity);
       } catch (IndexOutOfBoundsException ie) {
         error = true;
       } finally {
@@ -420,10 +427,10 @@ public void testFixedType4() {
       }
 
       /* this should trigger a realloc() */
-      floatVector.setSafe(16, 9.53);
+      floatVector.setSafe(initialCapacity, 9.53);
 
       /* underlying buffer should now be able to store double the number of values */
-      assertEquals(initialCapacity * 2, floatVector.getValueCapacity());
+      assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2);
 
       /* vector data should still be intact after realloc */
       assertEquals(1.55, floatVector.get(0), 0);
@@ -434,16 +441,17 @@ public void testFixedType4() {
       assertEquals(6.67, floatVector.get(10), 0);
       assertEquals(7.87, floatVector.get(12), 0);
       assertEquals(8.56, floatVector.get(14), 0);
-      assertEquals(9.53, floatVector.get(16), 0);
+      assertEquals(9.53, floatVector.get(initialCapacity), 0);
 
       /* reset the vector */
+      int capacityBeforeReset = floatVector.getValueCapacity();
       floatVector.reset();
 
       /* capacity shouldn't change after reset */
-      assertEquals(initialCapacity * 2, floatVector.getValueCapacity());
+      assertEquals(capacityBeforeReset, floatVector.getValueCapacity());
 
       /* vector data should be zeroed out */
-      for (int i = 0; i < (initialCapacity * 2); i++) {
+      for (int i = 0; i < capacityBeforeReset; i++) {
         assertEquals("non-zero data not expected at index: " + i, true, floatVector.isNull(i));
       }
     }
@@ -463,36 +471,37 @@ public void testNullableFixedType1() {
       assertEquals(0, vector.getValueCapacity());
 
       vector.allocateNew();
-      assertEquals(initialCapacity, vector.getValueCapacity());
+      assertTrue(vector.getValueCapacity() >= initialCapacity);
+      initialCapacity = vector.getValueCapacity();
 
       // Put and set a few values
       vector.set(0, 100);
       vector.set(1, 101);
       vector.set(100, 102);
-      vector.set(1022, 103);
-      vector.set(1023, 104);
+      vector.set(initialCapacity - 2, 103);
+      vector.set(initialCapacity - 1, 104);
 
       /* check vector contents */
       assertEquals(100, vector.get(0));
       assertEquals(101, vector.get(1));
       assertEquals(102, vector.get(100));
-      assertEquals(103, vector.get(1022));
-      assertEquals(104, vector.get(1023));
+      assertEquals(103, vector.get(initialCapacity - 2));
+      assertEquals(104, vector.get(initialCapacity - 1));
 
       int val = 0;
 
       /* check unset bits/null values */
-      for (int i = 2, j = 101; i <= 99 || j <= 1021; i++, j++) {
+      for (int i = 2, j = 101; i <= 99 || j <= initialCapacity - 3; i++, j++) {
         if (i <= 99) {
           assertTrue(vector.isNull(i));
         }
-        if (j <= 1021) {
+        if (j <= initialCapacity - 3) {
           assertTrue(vector.isNull(j));
         }
       }
 
       try {
-        vector.set(1024, 10000);
+        vector.set(initialCapacity, 10000);
       } catch (IndexOutOfBoundsException ie) {
         error = true;
       } finally {
@@ -501,7 +510,7 @@ public void testNullableFixedType1() {
       }
 
       try {
-        vector.get(1024);
+        vector.get(initialCapacity);
       } catch (IndexOutOfBoundsException ie) {
         error = true;
       } finally {
@@ -510,39 +519,40 @@ public void testNullableFixedType1() {
       }
 
       /* should trigger a realloc of the underlying bitvector and valuevector */
-      vector.setSafe(1024, 10000);
+      vector.setSafe(initialCapacity, 10000);
 
       /* check new capacity */
-      assertEquals(initialCapacity * 2, vector.getValueCapacity());
+      assertTrue(vector.getValueCapacity() >= initialCapacity * 2);
 
       /* vector contents should still be intact after realloc */
       assertEquals(100, vector.get(0));
       assertEquals(101, vector.get(1));
       assertEquals(102, vector.get(100));
-      assertEquals(103, vector.get(1022));
-      assertEquals(104, vector.get(1023));
-      assertEquals(10000, vector.get(1024));
+      assertEquals(103, vector.get(initialCapacity - 2));
+      assertEquals(104, vector.get(initialCapacity - 1));
+      assertEquals(10000, vector.get(initialCapacity));
 
       val = 0;
 
       /* check unset bits/null values */
-      for (int i = 2, j = 101; i < 99 || j < 1021; i++, j++) {
+      for (int i = 2, j = 101; i < 99 || j < initialCapacity - 3; i++, j++) {
         if (i <= 99) {
           assertTrue(vector.isNull(i));
         }
-        if (j <= 1021) {
+        if (j <= initialCapacity - 3) {
           assertTrue(vector.isNull(j));
         }
       }
 
       /* reset the vector */
+      int capacityBeforeReset = vector.getValueCapacity();
       vector.reset();
 
       /* capacity shouldn't change after reset */
-      assertEquals(initialCapacity * 2, vector.getValueCapacity());
+      assertEquals(capacityBeforeReset, vector.getValueCapacity());
 
       /* vector data should be zeroed out */
-      for (int i = 0; i < (initialCapacity * 2); i++) {
+      for (int i = 0; i < capacityBeforeReset; i++) {
         assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
       }
     }
@@ -560,7 +570,8 @@ public void testNullableFixedType2() {
       assertEquals(0, vector.getValueCapacity());
 
       vector.allocateNew();
-      assertEquals(initialCapacity, vector.getValueCapacity());
+      assertTrue(vector.getValueCapacity() >= initialCapacity);
+      initialCapacity = vector.getValueCapacity();
 
       /* populate the vector */
       vector.set(0, 100.5f);
@@ -573,7 +584,7 @@ public void testNullableFixedType2() {
       vector.set(14, 89.5f);
 
       try {
-        vector.set(16, 90.5f);
+        vector.set(initialCapacity, 90.5f);
       } catch (IndexOutOfBoundsException ie) {
         error = true;
       } finally {
@@ -600,7 +611,7 @@ public void testNullableFixedType2() {
       assertTrue(vector.isNull(15));
 
       try {
-        vector.get(16);
+        vector.get(initialCapacity);
       } catch (IndexOutOfBoundsException ie) {
         error = true;
       } finally {
@@ -609,10 +620,10 @@ public void testNullableFixedType2() {
       }
 
       /* this should trigger a realloc() */
-      vector.setSafe(16, 90.5f);
+      vector.setSafe(initialCapacity, 90.5f);
 
       /* underlying buffer should now be able to store double the number of values */
-      assertEquals(initialCapacity * 2, vector.getValueCapacity());
+      assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
 
       /* vector data should still be intact after realloc */
       assertEquals(100.5f, vector.get(0), 0);
@@ -633,13 +644,14 @@ public void testNullableFixedType2() {
       assertTrue(vector.isNull(15));
 
       /* reset the vector */
+      int capacityBeforeReset = vector.getValueCapacity();
       vector.reset();
 
       /* capacity shouldn't change after reset */
-      assertEquals(initialCapacity * 2, vector.getValueCapacity());
+      assertEquals(capacityBeforeReset, vector.getValueCapacity());
 
       /* vector data should be zeroed out */
-      for (int i = 0; i < (initialCapacity * 2); i++) {
+      for (int i = 0; i < capacityBeforeReset; i++) {
         assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
       }
     }
@@ -656,8 +668,9 @@ public void testNullableFixedType3() {
       assertEquals(0, vector.getValueCapacity());
       /* allocate space for 4KB data (1024 * 4) */
       vector.allocateNew(initialCapacity);
-      /* underlying buffer should be able to store 16 values */
-      assertEquals(initialCapacity, vector.getValueCapacity());
+      /* underlying buffer should be able to store 1024 values */
+      assertTrue(vector.getValueCapacity() >= initialCapacity);
+      initialCapacity = vector.getValueCapacity();
 
       vector.set(0, 1);
       vector.set(1, 2);
@@ -687,7 +700,7 @@ public void testNullableFixedType3() {
       ArrowBuf validityVectorBuf = buffers.get(0);
 
       /* bitvector tracks 1024 integers --> 1024 bits --> 128 bytes */
-      assertEquals(128, validityVectorBuf.readableBytes());
+      assertTrue(validityVectorBuf.readableBytes() >= 128);
       assertEquals(3, validityVectorBuf.getByte(0)); // 1st and second bit defined
       for (int i = 1; i < 12; i++) {
         assertEquals(0, validityVectorBuf.getByte(i)); // nothing defined until 100
@@ -699,15 +712,15 @@ public void testNullableFixedType3() {
       assertEquals(-64, validityVectorBuf.getByte(127)); // 1022nd and 1023rd bit defined
 
       /* this should trigger a realloc() */
-      vector.setSafe(1024, 6);
+      vector.setSafe(initialCapacity, 6);
 
       /* underlying buffer should now be able to store double the number of values */
-      assertEquals(initialCapacity * 2, vector.getValueCapacity());
+      assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
 
       /* vector data should still be intact after realloc */
       j = 1;
       for (int i = 0; i < (initialCapacity * 2); i++) {
-        if ((i > 1024) || (i >= 2 && i <= 99) || (i >= 101 && i <= 1021)) {
+        if ((i > 1023 && i != initialCapacity) || (i >= 2 && i <= 99) || (i >= 101 && i <= 1021)) {
           assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
         } else {
           assertFalse("null data not expected at index: " + i, vector.isNull(i));
@@ -717,19 +730,20 @@ public void testNullableFixedType3() {
       }
 
       /* reset the vector */
+      int capacityBeforeReset = vector.getValueCapacity();
       vector.reset();
 
       /* capacity shouldn't change after reset */
-      assertEquals(initialCapacity * 2, vector.getValueCapacity());
+      assertEquals(capacityBeforeReset, vector.getValueCapacity());
 
       /* vector data should have been zeroed out */
-      for (int i = 0; i < (initialCapacity * 2); i++) {
+      for (int i = 0; i < capacityBeforeReset; i++) {
         assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
       }
 
-      vector.allocateNew(4096);
+      vector.allocateNew(initialCapacity * 4);
       // vector has been erased
-      for (int i = 0; i < 4096; i++) {
+      for (int i = 0; i < initialCapacity * 4; i++) {
         assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
       }
     }
@@ -764,7 +778,7 @@ public void testNullableFixedType4() {
       }
 
       vector.setSafe(valueCapacity, 20000000);
-      assertEquals(valueCapacity * 2, vector.getValueCapacity());
+      assertTrue(vector.getValueCapacity() >= valueCapacity * 2);
 
       for (int i = 0; i < vector.getValueCapacity(); i++) {
         if (i == valueCapacity) {
@@ -795,14 +809,15 @@ public void testNullableFixedType4() {
         }
       }
 
-      vector.setSafe((valueCapacity *  2) + 1000, 400000000);
-      assertEquals(valueCapacity * 4, vector.getValueCapacity());
+      int valueCapacityBeforeRealloc = vector.getValueCapacity();
+      vector.setSafe(valueCapacityBeforeRealloc + 1000, 400000000);
+      assertTrue(vector.getValueCapacity() >= valueCapacity * 4);
 
       for (int i = 0; i < vector.getValueCapacity(); i++) {
-        if (i == (valueCapacity * 2 + 1000)) {
+        if (i == (valueCapacityBeforeRealloc + 1000)) {
           assertFalse("unexpected null value at index: " + i, vector.isNull(i));
           assertEquals("unexpected value at index: " + i, 400000000, vector.get(i));
-        } else if (i < valueCapacity * 2 && (i % 2) == 0) {
+        } else if (i < valueCapacityBeforeRealloc && (i % 2) == 0) {
           assertFalse("unexpected null value at index: " + i, vector.isNull(i));
           assertEquals("unexpected value at index: " + i, baseValue + i, vector.get(i));
         } else {
@@ -811,13 +826,14 @@ public void testNullableFixedType4() {
       }
 
       /* reset the vector */
+      int valueCapacityBeforeReset = vector.getValueCapacity();
       vector.reset();
 
       /* capacity shouldn't change after reset */
-      assertEquals(valueCapacity * 4, vector.getValueCapacity());
+      assertEquals(valueCapacityBeforeReset, vector.getValueCapacity());
 
       /* vector data should be zeroed out */
-      for (int i = 0; i < (valueCapacity * 4); i++) {
+      for (int i = 0; i < valueCapacityBeforeReset; i++) {
         assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
       }
     }
@@ -936,52 +952,56 @@ public void testNullableVarType2() {
   @Test /* Float8Vector */
   public void testReallocAfterVectorTransfer1() {
     try (final Float8Vector vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) {
-      final int initialDefaultCapacity = 4096;
+      int initialCapacity = 4096;
       boolean error = false;
 
       /* use the default capacity; 4096*8 => 32KB */
+      vector.setInitialCapacity(initialCapacity);
       vector.allocateNew();
 
-      assertEquals(initialDefaultCapacity, vector.getValueCapacity());
+      assertTrue(vector.getValueCapacity() >= initialCapacity);
+      initialCapacity = vector.getValueCapacity();
 
       double baseValue = 100.375;
 
-      for (int i = 0; i < initialDefaultCapacity; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         vector.setSafe(i, baseValue + (double)i);
       }
 
       /* the above setSafe calls should not have triggered a realloc as
        * we are within the capacity. check the vector contents
        */
-      assertEquals(initialDefaultCapacity, vector.getValueCapacity());
+      assertEquals(initialCapacity, vector.getValueCapacity());
 
-      for (int i = 0; i < initialDefaultCapacity; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         double value = vector.get(i);
         assertEquals(baseValue + (double)i, value, 0);
       }
 
       /* this should trigger a realloc */
-      vector.setSafe(initialDefaultCapacity, baseValue + (double)initialDefaultCapacity);
-      assertEquals(initialDefaultCapacity * 2, vector.getValueCapacity());
+      vector.setSafe(initialCapacity, baseValue + (double)initialCapacity);
+      assertTrue(vector.getValueCapacity() >= initialCapacity * 2);
+      int capacityAfterRealloc1 = vector.getValueCapacity();
 
-      for (int i = initialDefaultCapacity + 1; i < (initialDefaultCapacity * 2); i++) {
+      for (int i = initialCapacity + 1; i < capacityAfterRealloc1; i++) {
         vector.setSafe(i, baseValue + (double)i);
       }
 
-      for (int i = 0; i < (initialDefaultCapacity * 2); i++) {
+      for (int i = 0; i < capacityAfterRealloc1; i++) {
         double value = vector.get(i);
         assertEquals(baseValue + (double)i, value, 0);
       }
 
       /* this should trigger a realloc */
-      vector.setSafe(initialDefaultCapacity * 2, baseValue + (double)(initialDefaultCapacity * 2));
-      assertEquals(initialDefaultCapacity * 4, vector.getValueCapacity());
+      vector.setSafe(capacityAfterRealloc1, baseValue + (double)(capacityAfterRealloc1));
+      assertTrue(vector.getValueCapacity() >= initialCapacity * 4);
+      int capacityAfterRealloc2 = vector.getValueCapacity();
 
-      for (int i = (initialDefaultCapacity * 2) + 1; i < (initialDefaultCapacity * 4); i++) {
+      for (int i = capacityAfterRealloc1 + 1; i < capacityAfterRealloc2; i++) {
         vector.setSafe(i, baseValue + (double)i);
       }
 
-      for (int i = 0; i < (initialDefaultCapacity * 4); i++) {
+      for (int i = 0; i < capacityAfterRealloc2; i++) {
         double value = vector.get(i);
         assertEquals(baseValue + (double)i, value, 0);
       }
@@ -997,10 +1017,10 @@ public void testReallocAfterVectorTransfer1() {
 
       /* now let's realloc the toVector */
       toVector.reAlloc();
-      assertEquals(initialDefaultCapacity * 8, toVector.getValueCapacity());
+      assertTrue(toVector.getValueCapacity() >= initialCapacity * 8);
 
-      for (int i = 0; i < (initialDefaultCapacity * 8); i++) {
-        if (i < (initialDefaultCapacity * 4)) {
+      for (int i = 0; i < toVector.getValueCapacity(); i++) {
+        if (i < capacityAfterRealloc2) {
           assertEquals(baseValue + (double)i, toVector.get(i), 0);
         } else {
           assertTrue(toVector.isNull(i));
@@ -1014,51 +1034,53 @@ public void testReallocAfterVectorTransfer1() {
   @Test /* Float8Vector */
   public void testReallocAfterVectorTransfer2() {
     try (final Float8Vector vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) {
-      final int initialDefaultCapacity = 4096;
+      int initialCapacity = 4096;
       boolean error = false;
 
-      vector.allocateNew(initialDefaultCapacity);
-
-      assertEquals(initialDefaultCapacity, vector.getValueCapacity());
+      vector.allocateNew(initialCapacity);
+      assertTrue(vector.getValueCapacity() >= initialCapacity);
+      initialCapacity = vector.getValueCapacity();
 
       double baseValue = 100.375;
 
-      for (int i = 0; i < initialDefaultCapacity; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         vector.setSafe(i, baseValue + (double)i);
       }
 
       /* the above setSafe calls should not have triggered a realloc as
        * we are within the capacity. check the vector contents
        */
-      assertEquals(initialDefaultCapacity, vector.getValueCapacity());
+      assertEquals(initialCapacity, vector.getValueCapacity());
 
-      for (int i = 0; i < initialDefaultCapacity; i++) {
+      for (int i = 0; i < initialCapacity; i++) {
         double value = vector.get(i);
         assertEquals(baseValue + (double)i, value, 0);
       }
 
       /* this should trigger a realloc */
-      vector.setSafe(initialDefaultCapacity, baseValue + (double)initialDefaultCapacity);
-      assertEquals(initialDefaultCapacity * 2, vector.getValueCapacity());
+      vector.setSafe(initialCapacity, baseValue + (double)initialCapacity);
+      assertTrue(vector.getValueCapacity() >= initialCapacity * 2);
+      int capacityAfterRealloc1 = vector.getValueCapacity();
 
-      for (int i = initialDefaultCapacity + 1; i < (initialDefaultCapacity * 2); i++) {
+      for (int i = initialCapacity + 1; i < capacityAfterRealloc1; i++) {
         vector.setSafe(i, baseValue + (double)i);
       }
 
-      for (int i = 0; i < (initialDefaultCapacity * 2); i++) {
+      for (int i = 0; i < capacityAfterRealloc1; i++) {
         double value = vector.get(i);
         assertEquals(baseValue + (double)i, value, 0);
       }
 
       /* this should trigger a realloc */
-      vector.setSafe(initialDefaultCapacity * 2, baseValue + (double)(initialDefaultCapacity * 2));
-      assertEquals(initialDefaultCapacity * 4, vector.getValueCapacity());
+      vector.setSafe(capacityAfterRealloc1, baseValue + (double)(capacityAfterRealloc1));
+      assertTrue(vector.getValueCapacity() >= initialCapacity * 4);
+      int capacityAfterRealloc2 = vector.getValueCapacity();
 
-      for (int i = (initialDefaultCapacity * 2) + 1; i < (initialDefaultCapacity * 4); i++) {
+      for (int i = capacityAfterRealloc1 + 1; i < capacityAfterRealloc2; i++) {
         vector.setSafe(i, baseValue + (double)i);
       }
 
-      for (int i = 0; i < (initialDefaultCapacity * 4); i++) {
+      for (int i = 0; i < capacityAfterRealloc2; i++) {
         double value = vector.get(i);
         assertEquals(baseValue + (double)i, value, 0);
       }
@@ -1073,7 +1095,7 @@ public void testReallocAfterVectorTransfer2() {
       Float8Vector toVector = (Float8Vector)transferPair.getTo();
 
       /* check toVector contents before realloc */
-      for (int i = 0; i < (initialDefaultCapacity * 4); i++) {
+      for (int i = 0; i < toVector.getValueCapacity(); i++) {
         assertFalse("unexpected null value at index: " + i, toVector.isNull(i));
         double value = toVector.get(i);
         assertEquals("unexpected value at index: " + i, baseValue + (double)i, value, 0);
@@ -1081,10 +1103,10 @@ public void testReallocAfterVectorTransfer2() {
 
       /* now let's realloc the toVector and check contents again */
       toVector.reAlloc();
-      assertEquals(initialDefaultCapacity * 8, toVector.getValueCapacity());
+      assertTrue(toVector.getValueCapacity() >= initialCapacity * 8);
 
-      for (int i = 0; i < (initialDefaultCapacity * 8); i++) {
-        if (i < (initialDefaultCapacity * 4)) {
+      for (int i = 0; i < toVector.getValueCapacity(); i++) {
+        if (i < capacityAfterRealloc2) {
           assertFalse("unexpected null value at index: " + i, toVector.isNull(i));
           double value = toVector.get(i);
           assertEquals("unexpected value at index: " + i, baseValue + (double)i, value, 0);
@@ -1103,7 +1125,7 @@ public void testReallocAfterVectorTransfer3() {
       /* 4096 values with 10 byte per record */
       vector.allocateNew(4096 * 10, 4096);
       int valueCapacity = vector.getValueCapacity();
-      assertEquals(4096, valueCapacity);
+      assertTrue(valueCapacity >= 4096);
 
       /* populate the vector */
       for (int i = 0; i < valueCapacity; i++) {
@@ -1125,7 +1147,10 @@ public void testReallocAfterVectorTransfer3() {
 
       /* trigger first realloc */
       vector.setSafe(valueCapacity, STR2, 0, STR2.length);
-      assertEquals(valueCapacity * 2, vector.getValueCapacity());
+      assertTrue(vector.getValueCapacity() >= 2 * valueCapacity);
+      while (vector.getByteCapacity() < 10 * vector.getValueCapacity()) {
+        vector.reallocDataBuffer();
+      }
 
       /* populate the remaining vector */
       for (int i = valueCapacity; i < vector.getValueCapacity(); i++) {
@@ -1148,7 +1173,10 @@ public void testReallocAfterVectorTransfer3() {
 
       /* trigger second realloc */
       vector.setSafe(valueCapacity + 10, STR2, 0, STR2.length);
-      assertEquals(valueCapacity * 2, vector.getValueCapacity());
+      assertTrue(vector.getValueCapacity() >= 2 *  valueCapacity);
+      while (vector.getByteCapacity() < 10 * vector.getValueCapacity()) {
+        vector.reallocDataBuffer();
+      }
 
       /* populate the remaining vector */
       for (int i = valueCapacity; i < vector.getValueCapacity(); i++) {
@@ -1197,7 +1225,7 @@ public void testReallocAfterVectorTransfer4() {
       /* 4096 values  */
       vector.allocateNew(4096);
       int valueCapacity = vector.getValueCapacity();
-      assertEquals(4096, valueCapacity);
+      assertTrue(valueCapacity >= 4096);
 
       /* populate the vector */
       int baseValue = 1000;
@@ -1218,7 +1246,7 @@ public void testReallocAfterVectorTransfer4() {
 
       /* trigger first realloc */
       vector.setSafe(valueCapacity, 10000000);
-      assertEquals(valueCapacity * 2, vector.getValueCapacity());
+      assertTrue(vector.getValueCapacity() >= valueCapacity * 2);
 
       /* populate the remaining vector */
       for (int i = valueCapacity; i < vector.getValueCapacity(); i++) {
@@ -1239,7 +1267,7 @@ public void testReallocAfterVectorTransfer4() {
 
       /* trigger second realloc */
       vector.setSafe(valueCapacity, 10000000);
-      assertEquals(valueCapacity * 2, vector.getValueCapacity());
+      assertTrue(vector.getValueCapacity() >= valueCapacity * 2);
 
       /* populate the remaining vector */
       for (int i = valueCapacity; i < vector.getValueCapacity(); i++) {
@@ -1288,7 +1316,8 @@ public void testReAllocFixedWidthVector() {
     try (final Float4Vector vector = newVector(Float4Vector.class, EMPTY_SCHEMA_PATH, MinorType.FLOAT4, allocator)) {
       vector.allocateNew(1024);
 
-      assertEquals(1024, vector.getValueCapacity());
+      assertTrue(vector.getValueCapacity() >= 1024);
+      int initialCapacity = vector.getValueCapacity();
 
       // Put values in indexes that fall within the initial allocation
       vector.setSafe(0, 100.1f);
@@ -1299,7 +1328,7 @@ public void testReAllocFixedWidthVector() {
       vector.setSafe(2000, 105.5f);
 
       // Check valueCapacity is more than initial allocation
-      assertEquals(1024 * 2, vector.getValueCapacity());
+      assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
 
       assertEquals(100.1f, vector.get(0), 0);
       assertEquals(102.3f, vector.get(100), 0);
@@ -1316,24 +1345,24 @@ public void testReAllocFixedWidthVector() {
   @Test
   public void testReAllocVariableWidthVector() {
     try (final VarCharVector vector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
+      vector.setInitialCapacity(4095);
       vector.allocateNew();
 
       int initialCapacity = vector.getValueCapacity();
-      assertEquals(4095, initialCapacity);
+      assertTrue(initialCapacity >= 4095);
 
       /* Put values in indexes that fall within the initial allocation */
       vector.setSafe(0, STR1, 0, STR1.length);
       vector.setSafe(initialCapacity - 1, STR2, 0, STR2.length);
 
       /* the above set calls should NOT have triggered a realloc */
-      initialCapacity = vector.getValueCapacity();
-      assertEquals(4095, initialCapacity);
+      assertEquals(initialCapacity, vector.getValueCapacity());
 
       /* Now try to put values in space that falls beyond the initial allocation */
       vector.setSafe(initialCapacity + 200, STR3, 0, STR3.length);
 
       /* Check valueCapacity is more than initial allocation */
-      assertEquals(((initialCapacity + 1) * 2) - 1, vector.getValueCapacity());
+      assertTrue(initialCapacity  * 2 <=  vector.getValueCapacity());
 
       assertArrayEquals(STR1, vector.get(0));
       assertArrayEquals(STR2, vector.get(initialCapacity - 1));
@@ -1348,20 +1377,20 @@ public void testReAllocVariableWidthVector() {
   @Test
   public void testFillEmptiesNotOverfill() {
     try (final VarCharVector vector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
+      vector.setInitialCapacity(4095);
       vector.allocateNew();
 
       int initialCapacity = vector.getValueCapacity();
-      assertEquals(4095, initialCapacity);
+      assertTrue(initialCapacity >= 4095);
 
       vector.setSafe(4094, "hello".getBytes(), 0, 5);
       /* the above set method should NOT have trigerred a realloc */
-      initialCapacity = vector.getValueCapacity();
-      assertEquals(4095, initialCapacity);
+      assertEquals(initialCapacity, vector.getValueCapacity());
 
-      vector.setValueCount(4095);
-      assertEquals(4096 * vector.OFFSET_WIDTH, vector.getFieldBuffers().get(1).capacity());
-      initialCapacity = vector.getValueCapacity();
-      assertEquals(4095, initialCapacity);
+      int bufSizeBefore = vector.getFieldBuffers().get(1).capacity();
+      vector.setValueCount(initialCapacity);
+      assertEquals(bufSizeBefore, vector.getFieldBuffers().get(1).capacity());
+      assertEquals(initialCapacity, vector.getValueCapacity());
     }
   }
 
@@ -1371,11 +1400,12 @@ public void testCopyFromWithNulls() {
          final VarCharVector vector2 =
              newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
 
+      vector.setInitialCapacity(4095);
       vector.allocateNew();
       int capacity = vector.getValueCapacity();
-      assertEquals(4095, capacity);
+      assertTrue(capacity >= 4095);
 
-      for (int i = 0; i < 4095; i++) {
+      for (int i = 0; i < capacity; i++) {
         if (i % 3 == 0) {
           continue;
         }
@@ -1384,12 +1414,11 @@ public void testCopyFromWithNulls() {
       }
 
       /* NO reAlloc() should have happened in setSafe() */
-      capacity = vector.getValueCapacity();
-      assertEquals(4095, capacity);
+      assertEquals(capacity, vector.getValueCapacity());
 
-      vector.setValueCount(4095);
+      vector.setValueCount(capacity);
 
-      for (int i = 0; i < 4095; i++) {
+      for (int i = 0; i < capacity; i++) {
         if (i % 3 == 0) {
           assertNull(vector.getObject(i));
         } else {
@@ -1397,11 +1426,12 @@ public void testCopyFromWithNulls() {
         }
       }
 
+      vector2.setInitialCapacity(4095);
       vector2.allocateNew();
-      capacity = vector2.getValueCapacity();
-      assertEquals(4095, capacity);
+      int capacity2 = vector2.getValueCapacity();
+      assertEquals(capacity2, capacity);
 
-      for (int i = 0; i < 4095; i++) {
+      for (int i = 0; i < capacity; i++) {
         vector2.copyFromSafe(i, i, vector);
         if (i % 3 == 0) {
           assertNull(vector2.getObject(i));
@@ -1411,12 +1441,11 @@ public void testCopyFromWithNulls() {
       }
 
       /* NO reAlloc() should have happened in copyFrom */
-      capacity = vector2.getValueCapacity();
-      assertEquals(4095, capacity);
+      assertEquals(capacity, vector2.getValueCapacity());
 
-      vector2.setValueCount(4095);
+      vector2.setValueCount(capacity);
 
-      for (int i = 0; i < 4095; i++) {
+      for (int i = 0; i < capacity; i++) {
         if (i % 3 == 0) {
           assertNull(vector2.getObject(i));
         } else {
@@ -1432,11 +1461,12 @@ public void testCopyFromWithNulls1() {
          final VarCharVector vector2 =
              newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
 
+      vector.setInitialCapacity(4095);
       vector.allocateNew();
       int capacity = vector.getValueCapacity();
-      assertEquals(4095, capacity);
+      assertTrue(capacity >= 4095);
 
-      for (int i = 0; i < 4095; i++) {
+      for (int i = 0; i < capacity; i++) {
         if (i % 3 == 0) {
           continue;
         }
@@ -1445,12 +1475,11 @@ public void testCopyFromWithNulls1() {
       }
 
       /* NO reAlloc() should have happened in setSafe() */
-      capacity = vector.getValueCapacity();
-      assertEquals(4095, capacity);
+      assertEquals(capacity, vector.getValueCapacity());
 
-      vector.setValueCount(4095);
+      vector.setValueCount(capacity);
 
-      for (int i = 0; i < 4095; i++) {
+      for (int i = 0; i < capacity; i++) {
         if (i % 3 == 0) {
           assertNull(vector.getObject(i));
         } else {
@@ -1463,10 +1492,11 @@ public void testCopyFromWithNulls1() {
        */
       vector2.allocateNew(1024 * 10, 1024);
 
-      capacity = vector2.getValueCapacity();
-      assertEquals(1024, capacity);
+      int capacity2 = vector2.getValueCapacity();
+      assertTrue(capacity2 >= 1024);
+      assertTrue(capacity2 <= capacity);
 
-      for (int i = 0; i < 4095; i++) {
+      for (int i = 0; i < capacity; i++) {
         vector2.copyFromSafe(i, i, vector);
         if (i % 3 == 0) {
           assertNull(vector2.getObject(i));
@@ -1476,12 +1506,11 @@ public void testCopyFromWithNulls1() {
       }
 
       /* 2 reAllocs should have happened in copyFromSafe() */
-      capacity = vector2.getValueCapacity();
-      assertEquals(4096, capacity);
+      assertEquals(capacity, vector2.getValueCapacity());
 
-      vector2.setValueCount(4095);
+      vector2.setValueCount(capacity);
 
-      for (int i = 0; i < 4095; i++) {
+      for (int i = 0; i < capacity; i++) {
         if (i % 3 == 0) {
           assertNull(vector2.getObject(i));
         } else {
@@ -1876,30 +1905,88 @@ public void testSetInitialCapacity() {
     try (final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
 
       /* use the default 8 data bytes on average per element */
-      vector.setInitialCapacity(4096);
+      int defaultCapacity = BaseValueVector.INITIAL_VALUE_ALLOCATION - 1;
+      vector.setInitialCapacity(defaultCapacity);
       vector.allocateNew();
-      assertEquals(4096, vector.getValueCapacity());
-      assertEquals(4096 * 8, vector.getDataBuffer().capacity());
+      assertEquals(defaultCapacity, vector.getValueCapacity());
+      assertEquals(BaseAllocator.nextPowerOfTwo(defaultCapacity * 8), vector.getDataBuffer().capacity());
 
-      vector.setInitialCapacity(4096, 1);
+      vector.setInitialCapacity(defaultCapacity, 1);
       vector.allocateNew();
-      assertEquals(4096, vector.getValueCapacity());
-      assertEquals(4096, vector.getDataBuffer().capacity());
+      assertEquals(defaultCapacity, vector.getValueCapacity());
+      assertEquals(BaseAllocator.nextPowerOfTwo(defaultCapacity), vector.getDataBuffer().capacity());
 
-      vector.setInitialCapacity(4096, 0.1);
+      vector.setInitialCapacity(defaultCapacity, 0.1);
       vector.allocateNew();
-      assertEquals(4096, vector.getValueCapacity());
-      assertEquals(512, vector.getDataBuffer().capacity());
+      assertEquals(defaultCapacity, vector.getValueCapacity());
+      assertEquals(BaseAllocator.nextPowerOfTwo((int)(defaultCapacity * 0.1)), vector.getDataBuffer().capacity());
 
-      vector.setInitialCapacity(4096, 0.01);
+      vector.setInitialCapacity(defaultCapacity, 0.01);
       vector.allocateNew();
-      assertEquals(4096, vector.getValueCapacity());
-      assertEquals(64, vector.getDataBuffer().capacity());
+      assertEquals(defaultCapacity, vector.getValueCapacity());
+      assertEquals(BaseAllocator.nextPowerOfTwo((int)(defaultCapacity * 0.01)), vector.getDataBuffer().capacity());
 
       vector.setInitialCapacity(5, 0.01);
       vector.allocateNew();
-      assertEquals(7, vector.getValueCapacity());
+      assertEquals(5, vector.getValueCapacity());
       assertEquals(2, vector.getDataBuffer().capacity());
     }
   }
+
+  @Test
+  public void testDefaultAllocNewAll() {
+    int defaultCapacity = BaseFixedWidthVector.INITIAL_VALUE_ALLOCATION;
+    int expectedSize;
+    long beforeSize;
+    try (BufferAllocator childAllocator = allocator.newChildAllocator("defaultAllocs", 0, Long.MAX_VALUE);
+        final IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, childAllocator);
+        final BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, childAllocator);
+        final BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, childAllocator);
+        final DecimalVector decimalVector = new DecimalVector(EMPTY_SCHEMA_PATH, childAllocator, 38, 6);
+        final VarCharVector varCharVector = new VarCharVector(EMPTY_SCHEMA_PATH, childAllocator)) {
+
+      // verify that the wastage is within bounds for IntVector.
+      beforeSize = childAllocator.getAllocatedMemory();
+      intVector.allocateNew();
+      assertTrue(intVector.getValueCapacity() >= defaultCapacity);
+      expectedSize = (defaultCapacity * IntVector.TYPE_WIDTH) +
+          BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity);
+      assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);
+
+      // verify that the wastage is within bounds for BigIntVector.
+      beforeSize = childAllocator.getAllocatedMemory();
+      bigIntVector.allocateNew();
+      assertTrue(bigIntVector.getValueCapacity() >= defaultCapacity);
+      expectedSize = (defaultCapacity * bigIntVector.TYPE_WIDTH) +
+          BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity);
+      assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);
+
+      // verify that the wastage is within bounds for DecimalVector.
+      beforeSize = childAllocator.getAllocatedMemory();
+      decimalVector.allocateNew();
+      assertTrue(decimalVector.getValueCapacity() >= defaultCapacity);
+      expectedSize = (defaultCapacity * decimalVector.TYPE_WIDTH) +
+          BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity);
+      assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);
+
+      // verify that the wastage is within bounds for VarCharVector.
+      // var char vector have an offsets array that is 1 less than defaultCapacity
+      beforeSize = childAllocator.getAllocatedMemory();
+      varCharVector.allocateNew();
+      assertTrue(varCharVector.getValueCapacity() >= defaultCapacity - 1);
+      expectedSize = (defaultCapacity * VarCharVector.OFFSET_WIDTH) +
+          BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity) +
+          defaultCapacity * 8;
+      // wastage should be less than 5%.
+      assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);
+
+      // verify that the wastage is within bounds for BitVector.
+      beforeSize = childAllocator.getAllocatedMemory();
+      bitVector.allocateNew();
+      assertTrue(bitVector.getValueCapacity() >= defaultCapacity);
+      expectedSize = BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity) * 2;
+      assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);
+
+    }
+  }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java
index 5474675fbf343..60747aaad92ce 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java
@@ -19,6 +19,7 @@
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
 
 import java.nio.charset.StandardCharsets;
 
@@ -54,20 +55,21 @@ public void testFixedType() {
       vector.setInitialCapacity(512);
       vector.allocateNew();
 
-      assertEquals(512, vector.getValueCapacity());
+      assertTrue(vector.getValueCapacity() >= 512);
+      int initialCapacity = vector.getValueCapacity();
 
       try {
-        vector.set(512, 0);
+        vector.set(initialCapacity, 0);
         Assert.fail("Expected out of bounds exception");
       } catch (Exception e) {
         // ok
       }
 
       vector.reAlloc();
-      assertEquals(1024, vector.getValueCapacity());
+      assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
 
-      vector.set(512, 100);
-      assertEquals(100, vector.get(512));
+      vector.set(initialCapacity, 100);
+      assertEquals(100, vector.get(initialCapacity));
     }
   }
 
@@ -77,20 +79,21 @@ public void testNullableType() {
       vector.setInitialCapacity(512);
       vector.allocateNew();
 
-      assertEquals(512, vector.getValueCapacity());
+      assertTrue(vector.getValueCapacity() >= 512);
+      int initialCapacity = vector.getValueCapacity();
 
       try {
-        vector.set(512, "foo".getBytes(StandardCharsets.UTF_8));
+        vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8));
         Assert.fail("Expected out of bounds exception");
       } catch (Exception e) {
         // ok
       }
 
       vector.reAlloc();
-      assertEquals(1024, vector.getValueCapacity());
+      assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
 
-      vector.set(512, "foo".getBytes(StandardCharsets.UTF_8));
-      assertEquals("foo", new String(vector.get(512), StandardCharsets.UTF_8));
+      vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8));
+      assertEquals("foo", new String(vector.get(initialCapacity), StandardCharsets.UTF_8));
     }
   }
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
index b7215ce4e2e68..61c1b924f664d 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
@@ -974,11 +974,16 @@ public void testSingleStructWriter1() {
     Float4Vector float4Vector = (Float4Vector)parent.getChild("float4Field");
     Float8Vector float8Vector = (Float8Vector)parent.getChild("float8Field");
 
-    assertEquals(initialCapacity, singleStructWriter.getValueCapacity());
-    assertEquals(initialCapacity, intVector.getValueCapacity());
-    assertEquals(initialCapacity, bigIntVector.getValueCapacity());
-    assertEquals(initialCapacity, float4Vector.getValueCapacity());
-    assertEquals(initialCapacity, float8Vector.getValueCapacity());
+    int capacity = singleStructWriter.getValueCapacity();
+    assertTrue(capacity >= initialCapacity && capacity <  initialCapacity * 2);
+    capacity = intVector.getValueCapacity();
+    assertTrue(capacity >= initialCapacity && capacity <  initialCapacity * 2);
+    capacity = bigIntVector.getValueCapacity();
+    assertTrue(capacity >= initialCapacity && capacity <  initialCapacity * 2);
+    capacity = float4Vector.getValueCapacity();
+    assertTrue(capacity >= initialCapacity && capacity <  initialCapacity * 2);
+    capacity = float8Vector.getValueCapacity();
+    assertTrue(capacity >= initialCapacity && capacity <  initialCapacity * 2);
 
     StructReader singleStructReader = new SingleStructReaderImpl(parent);
 
diff --git a/java/vector/src/test/resources/logback.xml b/java/vector/src/test/resources/logback.xml
new file mode 100644
index 0000000000000..f9e449fa67b2e
--- /dev/null
+++ b/java/vector/src/test/resources/logback.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+  license agreements. See the NOTICE file distributed with this work for additional
+  information regarding copyright ownership. The ASF licenses this file to
+  You under the Apache License, Version 2.0 (the "License"); you may not use
+  this file except in compliance with the License. You may obtain a copy of
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+  by applicable law or agreed to in writing, software distributed under the
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+  OF ANY KIND, either express or implied. See the License for the specific
+  language governing permissions and limitations under the License. -->
+
+<configuration>
+  <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+    <!-- encoders are assigned the type
+         ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
+    <encoder>
+      <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
+    </encoder>
+  </appender>
+
+  <statusListener class="ch.qos.logback.core.status.NopStatusListener"/>
+  <logger name="org.apache.arrow" additivity="false">
+    <level value="info" />
+    <appender-ref ref="STDOUT" />
+  </logger>
+
+</configuration>
diff --git a/js/.gitignore b/js/.gitignore
index 3437e39da6c0a..5e412f8ee8a57 100644
--- a/js/.gitignore
+++ b/js/.gitignore
@@ -23,7 +23,8 @@ npm-debug.log*
 yarn-debug.log*
 yarn-error.log*
 
-.vscode
+.vscode/**
+!.vscode/launch.json
 
 # Runtime data
 pids
@@ -78,10 +79,13 @@ yarn.lock
 .env
 
 # compilation targets
+doc
 dist
 targets
 
 # test data files
-test/data/
+test/data/**/*.json
+test/data/**/*.arrow
+
 # jest snapshots (too big)
 test/__snapshots__/
diff --git a/js/.npmrc b/js/.npmrc
index b6b25d1f1816d..5536efc09ce5c 100644
--- a/js/.npmrc
+++ b/js/.npmrc
@@ -1,2 +1,2 @@
 save-prefix=
-package-lock=false
\ No newline at end of file
+engine-strict=true
diff --git a/js/.vscode/launch.json b/js/.vscode/launch.json
new file mode 100644
index 0000000000000..ba5609e0c10e8
--- /dev/null
+++ b/js/.vscode/launch.json
@@ -0,0 +1,169 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "type": "node",
+            "request": "launch",
+            "name": "Debug Gulp Build",
+            "program": "${workspaceFolder}/node_modules/gulp/bin/gulp.js",
+            "args": [
+                "build",
+                // Specify we want to debug the "src" target, which won't clean or build -- essentially a "dry-run" of the gulp build
+                "--target", "src"
+            ]
+        },
+        {
+            "type": "node",
+            "request": "launch",
+            "name": "Debug Unit Tests",
+            "cwd": "${workspaceRoot}",
+            "program": "${workspaceFolder}/node_modules/.bin/jest",
+            "skipFiles": [
+                "<node_internals>/**/*.js",
+                "${workspaceFolder}/node_modules/**/*.js"
+            ],
+            "env": {
+                "NODE_NO_WARNINGS": "1",
+                "READABLE_STREAM": "disable",
+                "TEST_DOM_STREAMS": "true",
+                "TEST_NODE_STREAMS": "true",
+                // Modify these environment variables to run tests on a specific compilation target + module format combo
+                "TEST_TS_SOURCE": "true",
+                // "TEST_TS_SOURCE": "false",
+                // "TEST_TARGET": "es5",
+                // "TEST_MODULE": "umd"
+            },
+            "args": [
+                // "-i",
+                "test/unit/",
+
+                // Uncomment any of these to run individual test suites
+                // "test/unit/int-tests.ts",
+                // "test/unit/table-tests.ts",
+                // "test/unit/generated-data-tests.ts",
+
+                // "test/unit/vector/vector-tests.ts",
+                // "test/unit/vector/bool-vector-tests.ts",
+                // "test/unit/vector/date-vector-tests.ts",
+                // "test/unit/vector/float16-vector-tests.ts",
+                // "test/unit/vector/numeric-vector-tests.ts",
+
+                // "test/unit/visitor-tests.ts",
+
+                // "test/unit/ipc/message-reader-tests.ts",
+                // "test/unit/ipc/reader/file-reader-tests.ts",
+                // "test/unit/ipc/reader/json-reader-tests.ts",
+                // "test/unit/ipc/reader/from-inference-tests.ts",
+                // "test/unit/ipc/reader/stream-reader-tests.ts",
+                // "test/unit/ipc/reader/streams-dom-tests.ts",
+                // "test/unit/ipc/reader/streams-node-tests.ts",
+                // "test/unit/ipc/writer/file-writer-tests.ts",
+                // "test/unit/ipc/writer/json-writer-tests.ts",
+                // "test/unit/ipc/writer/stream-writer-tests.ts",
+                // "test/unit/ipc/writer/streams-dom-tests.ts",
+                // "test/unit/ipc/writer/streams-node-tests.ts",
+            ]
+        },
+        {
+            "type": "node",
+            "request": "launch",
+            "name": "Debug Integration Tests",
+            "cwd": "${workspaceRoot}",
+            "program": "${workspaceFolder}/bin/integration.js",
+            "skipFiles": [
+                "<node_internals>/**/*.js",
+                "${workspaceFolder}/node_modules/**/*.js"
+            ],
+            "env": {
+                "NODE_NO_WARNINGS": "1",
+                "READABLE_STREAM": "disable"
+            },
+            "args": [
+                "--mode", "VALIDATE"
+            ]
+        },
+        {
+            "type": "node",
+            "request": "launch",
+            "name": "Debug bin/arrow2csv",
+            "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
+            "runtimeArgs": ["-r", "ts-node/register"],
+            "console": "integratedTerminal",
+            "skipFiles": [
+                "<node_internals>/**/*.js",
+                "${workspaceFolder}/node_modules/**/*.js"
+            ],
+            "args": [
+                "${workspaceFolder}/src/bin/arrow2csv.ts",
+                "-f", "./test/data/cpp/stream/simple.arrow"
+            ]
+        },
+        {
+            "type": "node",
+            "request": "launch",
+            "name": "Debug bin/file-to-stream",
+            "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
+            "runtimeArgs": ["-r", "ts-node/register"],
+            "skipFiles": [
+                "<node_internals>/**/*.js",
+                "${workspaceFolder}/node_modules/**/*.js"
+            ],
+            "args": [
+                "${workspaceFolder}/bin/file-to-stream.js",
+                "./test/data/cpp/file/struct_example.arrow",
+                "./struct_example-stream-out.arrow",
+            ]
+        },
+        {
+            "type": "node",
+            "request": "launch",
+            "name": "Debug bin/stream-to-file",
+            "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
+            "runtimeArgs": ["-r", "ts-node/register"],
+            "skipFiles": [
+                "<node_internals>/**/*.js",
+                "${workspaceFolder}/node_modules/**/*.js"
+            ],
+            "args": [
+                "${workspaceFolder}/bin/stream-to-file.js",
+                "./test/data/cpp/stream/struct_example.arrow",
+                "./struct_example-file-out.arrow",
+            ]
+        },
+        {
+            "type": "node",
+            "request": "launch",
+            "name": "Debug bin/json-to-arrow",
+            "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
+            "runtimeArgs": ["-r", "ts-node/register"],
+            "skipFiles": [
+                "<node_internals>/**/*.js",
+                "${workspaceFolder}/node_modules/**/*.js"
+            ],
+            "args": [
+                "${workspaceFolder}/bin/json-to-arrow.js",
+                "-j", "./test/data/json/struct_example.json",
+                "-a", "./struct_example-stream-out.arrow",
+                "-f", "stream"
+            ]
+        },
+        {
+            "type": "node",
+            "request": "launch",
+            "name": "Debug bin/print-buffer-alignment",
+            "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
+            "runtimeArgs": ["-r", "ts-node/register"],
+            "skipFiles": [
+                "<node_internals>/**/*.js",
+                "${workspaceFolder}/node_modules/**/*.js"
+            ],
+            "args": [
+                "${workspaceFolder}/bin/print-buffer-alignment.js",
+                "./test/data/cpp/stream/struct_example.arrow"
+            ]
+        }
+    ]
+}
diff --git a/js/README.md b/js/README.md
index e048ba1c1cdf1..0af4fecabccc9 100644
--- a/js/README.md
+++ b/js/README.md
@@ -49,7 +49,7 @@ Check out our [API documentation][7] to learn more about how to use Apache Arrow
 
 ### Get a table from an Arrow file on disk (in IPC format)
 
-```es6
+```js
 import { readFileSync } from 'fs';
 import { Table } from 'apache-arrow';
 
@@ -70,7 +70,7 @@ null, null, null
 
 ### Create a Table when the Arrow file is split across buffers
 
-```es6
+```js
 import { readFileSync } from 'fs';
 import { Table } from 'apache-arrow';
 
@@ -93,45 +93,42 @@ console.log(table.toString());
 
 ### Create a Table from JavaScript arrays
 
-```es6
-const fields = [{
-        name: 'precipitation',
-        type: { name: 'floatingpoint', precision: 'SINGLE'},
-        nullable: false, children: []
-    }, {
-        name: 'date',
-        type: { name: 'date', unit: 'MILLISECOND' },
-        nullable: false, children: []
-    }];
-const rainAmounts = Array.from({length: LENGTH}, () => Number((Math.random() * 20).toFixed(1)));
-const rainDates = Array.from({length: LENGTH}, (_, i) => Date.now() - 1000 * 60 * 60 * 24 * i);
+```js
+import {
+  Table,
+  FloatVector,
+  DateVector
+} from 'apache-arrow';
 
 const LENGTH = 2000;
-const rainfall = arrow.Table.from({
-  schema: { fields: fields },
-  batches: [{
-    count: LENGTH,
-    columns: [
-      {name: "precipitation", count: LENGTH, VALIDITY: [], DATA: rainAmounts },
-      {name: "date",          count: LENGTH, VALIDITY: [], DATA: rainDates } ] }] })
+
+const rainAmounts = Float32Array.from(
+  { length: LENGTH },
+  () => Number((Math.random() * 20).toFixed(1)));
+
+const rainDates = Array.from(
+  { length: LENGTH },
+  (_, i) => new Date(Date.now() - 1000 * 60 * 60 * 24 * i));
+
+const rainfall = Table.fromVectors(
+  [FloatVector.from(rainAmounts), DateVector.from(rainDates)],
+  ['precipitation', 'date']
+);
 ```
 
 ### Load data with `fetch`
 
-```es6
+```js
 import { Table } from "apache-arrow";
 
-fetch(require("simple.arrow")).then(response => {
-  response.arrayBuffer().then(buffer => {
-    const table = Table.from(new Uint8Array(buffer));
-    console.log(table.toString());
-  });
-});
+const table = await Table.from(fetch(("/simple.arrow")));
+console.log(table.toString());
+
 ```
 
 ### Columns look like JS Arrays
 
-```es6
+```js
 import { readFileSync } from 'fs';
 import { Table } from 'apache-arrow';
 
@@ -143,7 +140,7 @@ const table = Table.from([
 const column = table.getColumn('origin_lat');
 
 // Copy the data into a TypedArray
-const typed = column.slice();
+const typed = column.toArray();
 assert(typed instanceof Float32Array);
 
 for (let i = -1, n = column.length; ++i < n;) {
@@ -153,7 +150,7 @@ for (let i = -1, n = column.length; ++i < n;) {
 
 ### Usage with MapD Core
 
-```es6
+```js
 import MapD from 'rxjs-mapd';
 import { Table } from 'apache-arrow';
 
@@ -176,7 +173,7 @@ MapD.open(host, port)
   )
   .map(([schema, records]) =>
     // Create Arrow Table from results
-    Table.from(schema, records))
+    Table.from([schema, records]))
   .map((table) =>
     // Stringify the table to CSV with row numbers
     table.toString({ index: true }))
diff --git a/js/bin/arrow2csv.js b/js/bin/arrow2csv.js
new file mode 100755
index 0000000000000..0e446fabe7958
--- /dev/null
+++ b/js/bin/arrow2csv.js
@@ -0,0 +1,28 @@
+#! /usr/bin/env node
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+const Path = require(`path`);
+const here = Path.resolve(__dirname, '../');
+const tsnode = require.resolve(`ts-node/register`);
+const arrow2csv = Path.join(here, `src/bin/arrow2csv.ts`);
+const env = { ...process.env, TS_NODE_TRANSPILE_ONLY: `true` };
+
+require('child_process').spawn(`node`, [
+    `-r`, tsnode, arrow2csv, ...process.argv.slice(2)
+], { cwd: here, env, stdio: `inherit` });
diff --git a/js/bin/file-to-stream.js b/js/bin/file-to-stream.js
index fa4e5d17bbd3a..090cd0b0eda77 100755
--- a/js/bin/file-to-stream.js
+++ b/js/bin/file-to-stream.js
@@ -17,21 +17,24 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// @ts-check
+
 const fs = require('fs');
 const path = require('path');
-
-const encoding = 'binary';
-const ext = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : '';
-const { util: { PipeIterator } } = require(`../index${ext}`);
-const { Table, serializeStream, fromReadableStream } = require(`../index${ext}`);
+const eos = require('util').promisify(require('stream').finished);
+const extension = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : '';
+const { RecordBatchReader, RecordBatchStreamWriter } = require(`../index${extension}`);
 
 (async () => {
-    // Todo (ptaylor): implement `serializeStreamAsync` that accepts an
-    // AsyncIterable<Buffer>, rather than aggregating into a Table first
-    const in_ = process.argv.length < 3
-        ? process.stdin : fs.createReadStream(path.resolve(process.argv[2]));
-    const out = process.argv.length < 4
-        ? process.stdout : fs.createWriteStream(path.resolve(process.argv[3]));
-    new PipeIterator(serializeStream(await Table.fromAsync(fromReadableStream(in_))), encoding).pipe(out);
+
+    const readable = process.argv.length < 3 ? process.stdin : fs.createReadStream(path.resolve(process.argv[2]));
+    const writable = process.argv.length < 4 ? process.stdout : fs.createWriteStream(path.resolve(process.argv[3]));
+
+    const fileToStream = readable
+        .pipe(RecordBatchReader.throughNode())
+        .pipe(RecordBatchStreamWriter.throughNode())
+        .pipe(writable);
+
+    await eos(fileToStream);
 
 })().catch((e) => { console.error(e); process.exit(1); });
diff --git a/js/bin/integration.js b/js/bin/integration.js
index 6c064deac258d..c6f6cd7a24ed5 100755
--- a/js/bin/integration.js
+++ b/js/bin/integration.js
@@ -17,61 +17,55 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// @ts-nocheck
+
 const fs = require('fs');
-const glob = require('glob');
-const path = require('path');
-const child_process = require(`child_process`);
+const Path = require('path');
+const { promisify } = require('util');
+const glob = promisify(require('glob'));
+const { zip } = require('ix/iterable/zip');
+const { parse: bignumJSONParse } = require('json-bignum');
 const argv = require(`command-line-args`)(cliOpts(), { partial: true });
-const gulpPath = require.resolve(path.join(`..`, `node_modules/gulp/bin/gulp.js`));
-
-let jsonPaths = [...(argv.json || [])];
-let arrowPaths = [...(argv.arrow || [])];
+const {
+    Table,
+    RecordBatchReader,
+    util: { createElementComparator }
+} = require('../targets/apache-arrow/Arrow.es5.min');
 
-if (!argv.mode) {
-    return print_usage();
+const exists = async (p) => {
+    try {
+        return !!(await fs.promises.stat(p));
+    } catch (e) { return false; }
 }
 
-let mode = argv.mode.toUpperCase();
-if (mode === 'VALIDATE' && !jsonPaths.length) {
-    jsonPaths = glob.sync(path.resolve(__dirname, `../test/data/json/`, `*.json`));
-    if (!arrowPaths.length) {
-        [jsonPaths, arrowPaths] = jsonPaths.reduce(([jsonPaths, arrowPaths], jsonPath) => {
-            const { name } = path.parse(jsonPath);
-            for (const source of ['cpp', 'java']) {
-                for (const format of ['file', 'stream']) {
-                    const arrowPath = path.resolve(__dirname, `../test/data/${source}/${format}/${name}.arrow`);
-                    if (fs.existsSync(arrowPath)) {
-                        jsonPaths.push(jsonPath);
-                        arrowPaths.push(arrowPath);
-                    }
-                }
-            }
-            return [jsonPaths, arrowPaths];
-        }, [[], []]);
-        console.log(`jsonPaths: [\n\t${jsonPaths.join('\n\t')}\n]`);
-        console.log(`arrowPaths: [\n\t${arrowPaths.join('\n\t')}\n]`);
+(async () => {
+
+    if (!argv.mode) { return print_usage(); }
+
+    let mode = argv.mode.toUpperCase();
+    let jsonPaths = [...(argv.json || [])];
+    let arrowPaths = [...(argv.arrow || [])];
+
+    if (mode === 'VALIDATE' && !jsonPaths.length) {
+        [jsonPaths, arrowPaths] = await loadLocalJSONAndArrowPathsForDebugging(jsonPaths, arrowPaths);
     }
-} else if (!jsonPaths.length) {
-    return print_usage();
-}
 
-switch (mode) {
-    case 'VALIDATE':
-        const args = [`test`, `-i`].concat(argv._unknown || []);
-        jsonPaths.forEach((p, i) => {
-            args.push('-j', p, '-a', arrowPaths[i]);
-        });
-        process.exitCode = child_process.spawnSync(
-            gulpPath, args,
-            {
-                cwd: path.resolve(__dirname, '..'),
-                stdio: ['ignore', 'inherit', 'inherit']
+    if (!jsonPaths.length) { return print_usage(); }
+
+    switch (mode) {
+        case 'VALIDATE':
+            for (let [jsonPath, arrowPath] of zip(jsonPaths, arrowPaths)) {
+                await validate(jsonPath, arrowPath);
             }
-        ).status || process.exitCode || 0;
-        break;
-    default:
-        print_usage();
-}
+            break;
+        default:
+            return print_usage();
+    }
+})()
+.then((x) => +x || 0, (e) => {
+    e && process.stderr.write(`${e && e.stack || e}\n`);
+    return process.exitCode || 1;
+}).then((code) => process.exit(code));
 
 function cliOpts() {
     return [
@@ -118,5 +112,144 @@ function print_usage() {
             ]
         },
     ]));
-    process.exit(1);
+    return 1;
+}
+
+async function validate(jsonPath, arrowPath) {
+
+    const files = await Promise.all([
+        fs.promises.readFile(arrowPath),
+        fs.promises.readFile(jsonPath, 'utf8'),
+    ]);
+
+    const arrowData = files[0];
+    const jsonData = bignumJSONParse(files[1]);
+
+    validateReaderIntegration(jsonData, arrowData);
+    validateTableFromBuffersIntegration(jsonData, arrowData);
+    validateTableToBuffersIntegration('json', 'file')(jsonData, arrowData);
+    validateTableToBuffersIntegration('json', 'file')(jsonData, arrowData);
+    validateTableToBuffersIntegration('binary', 'file')(jsonData, arrowData);
+    validateTableToBuffersIntegration('binary', 'file')(jsonData, arrowData);
+}
+
+function validateReaderIntegration(jsonData, arrowBuffer) {
+    const msg = `json and arrow record batches report the same values`;
+    try {
+        const jsonReader = RecordBatchReader.from(jsonData);
+        const binaryReader = RecordBatchReader.from(arrowBuffer);
+        for (const [jsonRecordBatch, binaryRecordBatch] of zip(jsonReader, binaryReader)) {
+            compareTableIsh(jsonRecordBatch, binaryRecordBatch);
+        }
+    } catch (e) { throw new Error(`${msg}: fail \n ${e && e.stack || e}`); }
+    process.stdout.write(`${msg}: pass\n`);
+}
+
+function validateTableFromBuffersIntegration(jsonData, arrowBuffer) {
+    const msg = `json and arrow tables report the same values`;
+    try {
+        const jsonTable = Table.from(jsonData);
+        const binaryTable = Table.from(arrowBuffer);
+        compareTableIsh(jsonTable, binaryTable);
+    } catch (e) { throw new Error(`${msg}: fail \n ${e && e.stack || e}`); }
+    process.stdout.write(`${msg}: pass\n`);
+}
+
+function validateTableToBuffersIntegration(srcFormat, arrowFormat) {
+    const refFormat = srcFormat === `json` ? `binary` : `json`;
+    return function testTableToBuffersIntegration(jsonData, arrowBuffer) {
+        const msg = `serialized ${srcFormat} ${arrowFormat} reports the same values as the ${refFormat} ${arrowFormat}`;
+        try {
+            const refTable = Table.from(refFormat === `json` ? jsonData : arrowBuffer);
+            const srcTable = Table.from(srcFormat === `json` ? jsonData : arrowBuffer);
+            const dstTable = Table.from(srcTable.serialize(`binary`, arrowFormat === `stream`));
+            compareTableIsh(dstTable, refTable);
+        } catch (e) { throw new Error(`${msg}: fail \n ${e && e.stack || e}`); }
+        process.stdout.write(`${msg}: pass\n`);
+    };
+}
+
+function compareTableIsh(actual, expected) {
+    if (actual.length !== expected.length) {
+        throw new Error(`length: ${actual.length} !== ${expected.length}`);
+    }
+    if (actual.numCols !== expected.numCols) {
+        throw new Error(`numCols: ${actual.numCols} !== ${expected.numCols}`);
+    }
+    (() => {
+        const getChildAtFn = expected instanceof Table ? 'getColumnAt' : 'getChildAt';
+        for (let i = -1, n = actual.numCols; ++i < n;) {
+            const v1 = actual[getChildAtFn](i);
+            const v2 = expected[getChildAtFn](i);
+            compareVectors(v1, v2);
+        }
+    })();
+}
+
+function compareVectors(actual, expected) {
+
+    if ((actual == null && expected != null) || (expected == null && actual != null)) {
+        throw new Error(`${actual == null ? `actual` : `expected`} is null, was expecting ${actual == null ? expected : actual} to be that also`);
+    }
+
+    let props = ['type', 'length', 'nullCount'];
+
+    (() => {
+        for (let i = -1, n = props.length; ++i < n;) {
+            const prop = props[i];
+            if (`${actual[prop]}` !== `${expected[prop]}`) {
+                throw new Error(`${prop}: ${actual[prop]} !== ${expected[prop]}`);
+            }
+        }
+    })();
+
+    (() => {
+        for (let i = -1, n = actual.length; ++i < n;) {
+            let x1 = actual.get(i), x2 = expected.get(i);
+            if (!createElementComparator(x2)(x1)) {
+                throw new Error(`${i}: ${x1} !== ${x2}`);
+            }
+        }
+    })();
+
+    (() => {
+        let i = -1;
+        for (let [x1, x2] of zip(actual, expected)) {
+            ++i;
+            if (!createElementComparator(x2)(x1)) {
+                throw new Error(`${i}: ${x1} !== ${x2}`);
+            }
+        }
+    })();
+}
+
+async function loadLocalJSONAndArrowPathsForDebugging(jsonPaths, arrowPaths) {
+
+    const sourceJSONPaths = await glob(Path.resolve(__dirname, `../test/data/json/`, `*.json`));
+
+    if (!arrowPaths.length) {
+        await loadJSONAndArrowPaths(sourceJSONPaths, jsonPaths, arrowPaths, 'cpp', 'file');
+        await loadJSONAndArrowPaths(sourceJSONPaths, jsonPaths, arrowPaths, 'java', 'file');
+        await loadJSONAndArrowPaths(sourceJSONPaths, jsonPaths, arrowPaths, 'cpp', 'stream');
+        await loadJSONAndArrowPaths(sourceJSONPaths, jsonPaths, arrowPaths, 'java', 'stream');
+    }
+
+    for (let [jsonPath, arrowPath] of zip(jsonPaths, arrowPaths)) {
+        console.log(`jsonPath: ${jsonPath}`);
+        console.log(`arrowPath: ${arrowPath}`);
+    }
+
+    return [jsonPaths, arrowPaths];
+
+    async function loadJSONAndArrowPaths(sourceJSONPaths, jsonPaths, arrowPaths, source, format) {
+        for (const jsonPath of sourceJSONPaths) {
+            const { name } = Path.parse(jsonPath);
+            const arrowPath = Path.resolve(__dirname, `../test/data/${source}/${format}/${name}.arrow`);
+            if (await exists(arrowPath)) {
+                jsonPaths.push(jsonPath);
+                arrowPaths.push(arrowPath);
+            }
+        }
+        return [jsonPaths, arrowPaths];
+    }
 }
diff --git a/js/bin/json-to-arrow.js b/js/bin/json-to-arrow.js
index f28b4145ffaed..7a98d56d1a5e2 100755
--- a/js/bin/json-to-arrow.js
+++ b/js/bin/json-to-arrow.js
@@ -17,37 +17,46 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// @ts-check
+
 const fs = require('fs');
-const glob = require('glob');
-const path = require('path');
-const { promisify } = require('util');
+const Path = require('path');
 const { parse } = require('json-bignum');
+const eos = require('util').promisify(require('stream').finished);
+const extension = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : '';
 const argv = require(`command-line-args`)(cliOpts(), { partial: true });
+const { RecordBatchReader, RecordBatchFileWriter, RecordBatchStreamWriter } = require(`../index${extension}`);
 
-const ext = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : '';
-const { Table } = require(`../index${ext}`);
-
-const encoding = 'binary';
-const stream = argv.format === 'stream';
 const jsonPaths = [...(argv.json || [])];
 const arrowPaths = [...(argv.arrow || [])];
 
-if (!jsonPaths.length || !arrowPaths.length || (jsonPaths.length !== arrowPaths.length)) {
-    return print_usage();
-}
+(async () => {
 
-const readFile = callResolved(promisify(fs.readFile));
-const writeFile = callResolved(promisify(fs.writeFile));
+    if (!jsonPaths.length || !arrowPaths.length || (jsonPaths.length !== arrowPaths.length)) {
+        return print_usage();
+    }
 
-(async () => await Promise.all(jsonPaths.map(async (jPath, i) => {
-    const aPath = arrowPaths[i];
-    const arrowTable = Table.from(parse('' + (await readFile(jPath))));
-    await writeFile(aPath, arrowTable.serialize(encoding, stream), encoding);
-})))().catch((e) => { console.error(e); process.exit(1); });
+    await Promise.all(jsonPaths.map(async (path, i) => {
+        
+        const RecordBatchWriter = argv.format !== 'stream'
+            ? RecordBatchFileWriter
+            : RecordBatchStreamWriter;
 
-function callResolved(fn) {
-    return async (path_, ...xs) => await fn(path.resolve(path_), ...xs);
-}
+        const reader = RecordBatchReader.from(parse(
+            await fs.promises.readFile(Path.resolve(path), 'utf8')));
+
+        const jsonToArrow = reader
+            .pipe(RecordBatchWriter.throughNode())
+            .pipe(fs.createWriteStream(arrowPaths[i]));
+
+        await eos(jsonToArrow);
+
+    }));
+})()
+.then((x) => +x || 0, (e) => {
+    e && process.stderr.write(`${e}`);
+    return process.exitCode || 1;
+}).then((code = 0) => process.exit(code));
 
 function cliOpts() {
     return [
@@ -95,5 +104,5 @@ function print_usage() {
             ]
         },
     ]));
-    process.exit(1);
+    return 1;
 }
diff --git a/js/bin/print-buffer-alignment.js b/js/bin/print-buffer-alignment.js
index a4cd9bb2351e7..8d422aad60d74 100755
--- a/js/bin/print-buffer-alignment.js
+++ b/js/bin/print-buffer-alignment.js
@@ -17,34 +17,41 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// @ts-check
+
 const fs = require('fs');
 const path = require('path');
-
-const ext = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : '';
-const base = process.env.ARROW_JS_DEBUG === 'src' ? '../src' : '../targets/apache-arrow';
-const { Message } = require(`${base}/ipc/metadata${ext}`);
-const { readBuffersAsync } = require(`${base}/ipc/reader/binary${ext}`);
-const { Table, VectorVisitor, fromReadableStream } = require(`../index${ext}`);
+const extension = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : '';
+const { AsyncMessageReader } = require(`../index${extension}`);
 
 (async () => {
-    const in_ = process.argv.length < 3
-        ? process.stdin : fs.createReadStream(path.resolve(process.argv[2]));
-    
-    let recordBatchIndex = 0;
-    let dictionaryBatchIndex = 0;
-
-    for await (let { message, loader } of readBuffersAsync(fromReadableStream(in_))) {
-
-        if (Message.isRecordBatch(message)) {
-            console.log(`record batch ${++recordBatchIndex}, offset ${loader.messageOffset}`);
-        } else if (Message.isDictionaryBatch(message)) {
-            message = message.data;
-            console.log(`dictionary batch ${++dictionaryBatchIndex}, offset ${loader.messageOffset}`);
-        } else { continue; }
-        
-        message.buffers.forEach(({offset, length}, i) => {
-            console.log(`\tbuffer ${i+1}: { offset: ${offset},  length: ${length} }`);
+
+    const readable = process.argv.length < 3 ? process.stdin : fs.createReadStream(path.resolve(process.argv[2]));
+    const reader = new AsyncMessageReader(readable);
+
+    let recordBatchIndex = 0, dictionaryBatchIndex = 0;
+
+    for await (let message of reader) {
+
+        let bufferRegions = [];
+
+        if (message.isSchema()) {
+            continue;
+        } else if (message.isRecordBatch()) {
+            bufferRegions = message.header().buffers;
+            const body = await reader.readMessageBody(message.bodyLength);
+            console.log(`record batch ${++recordBatchIndex}, byteOffset ${body.byteOffset}`);
+        } else if (message.isDictionaryBatch()) {
+            bufferRegions = message.header().data.buffers;
+            const body = await reader.readMessageBody(message.bodyLength);
+            console.log(`dictionary batch ${++dictionaryBatchIndex}, byteOffset ${body.byteOffset}`);
+        }
+
+        bufferRegions.forEach(({ offset, length }, i) => {
+            console.log(`\tbuffer ${i + 1}: { offset: ${offset},  length: ${length} }`);
         });
     }
 
+    await reader.return();
+
 })().catch((e) => { console.error(e); process.exit(1); });
diff --git a/js/bin/stream-to-file.js b/js/bin/stream-to-file.js
index f33646ac61a41..015a5eace74d8 100755
--- a/js/bin/stream-to-file.js
+++ b/js/bin/stream-to-file.js
@@ -17,21 +17,24 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// @ts-check
+
 const fs = require('fs');
 const path = require('path');
-
-const encoding = 'binary';
-const ext = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : '';
-const { util: { PipeIterator } } = require(`../index${ext}`);
-const { Table, serializeFile, fromReadableStream } = require(`../index${ext}`);
+const eos = require('util').promisify(require('stream').finished);
+const extension = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : '';
+const { RecordBatchReader, RecordBatchFileWriter } = require(`../index${extension}`);
 
 (async () => {
-    // Todo (ptaylor): implement `serializeFileAsync` that accepts an
-    // AsyncIterable<Buffer>, rather than aggregating into a Table first
-    const in_ = process.argv.length < 3
-        ? process.stdin : fs.createReadStream(path.resolve(process.argv[2]));
-    const out = process.argv.length < 4
-        ? process.stdout : fs.createWriteStream(path.resolve(process.argv[3]));
-    new PipeIterator(serializeFile(await Table.fromAsync(fromReadableStream(in_))), encoding).pipe(out);
+
+    const readable = process.argv.length < 3 ? process.stdin : fs.createReadStream(path.resolve(process.argv[2]));
+    const writable = process.argv.length < 4 ? process.stdout : fs.createWriteStream(path.resolve(process.argv[3]));
+
+    const streamToFile = readable
+        .pipe(RecordBatchReader.throughNode())
+        .pipe(RecordBatchFileWriter.throughNode())
+        .pipe(writable);
+
+    await eos(streamToFile);
 
 })().catch((e) => { console.error(e); process.exit(1); });
diff --git a/js/examples/read_file.html b/js/examples/read_file.html
index 3e082d9dc412f..ec96d0e4755e2 100644
--- a/js/examples/read_file.html
+++ b/js/examples/read_file.html
@@ -86,6 +86,6 @@
       <tbody id="tbody">
       </tbody>
     </table>
-    <script type="text/javascript" src="../targets/apache-arrow/Arrow.es5.min.js"></script>
+    <script type="text/javascript" src="../targets/es5/umd/Arrow.js"></script>
   </body>
 </html>
diff --git a/js/gulp/argv.js b/js/gulp/argv.js
index 7dceb0f74c587..3a028f813f936 100644
--- a/js/gulp/argv.js
+++ b/js/gulp/argv.js
@@ -21,16 +21,12 @@ const path = require('path');
 
 const argv = require(`command-line-args`)([
     { name: `all`, type: Boolean },
-    { name: 'update', alias: 'u', type: Boolean },
-    { name: 'verbose', alias: 'v', type: Boolean },
+    { name: 'verbose', alias: `v`, type: Boolean },
     { name: `target`, type: String, defaultValue: `` },
     { name: `module`, type: String, defaultValue: `` },
     { name: `coverage`, type: Boolean, defaultValue: false },
-    { name: `integration`, alias: `i`, type: Boolean, defaultValue: false },
     { name: `targets`, alias: `t`, type: String, multiple: true, defaultValue: [] },
     { name: `modules`, alias: `m`, type: String, multiple: true, defaultValue: [] },
-    { name: `json_files`, alias: `j`, type: String, multiple: true, defaultValue: [] },
-    { name: `arrow_files`, alias: `a`, type: String, multiple: true, defaultValue: [] },
 ], { partial: true });
 
 const { targets, modules } = argv;
@@ -44,25 +40,4 @@ if (argv.target === `src`) {
     (argv.all || !modules.length) && modules.push(`all`);
 }
 
-if (argv.coverage && (!argv.json_files || !argv.json_files.length)) {
-
-    let [jsonPaths, arrowPaths] = glob
-        .sync(path.resolve(__dirname, `../test/data/json/`, `*.json`))
-        .reduce((paths, jsonPath) => {
-            const { name } = path.parse(jsonPath);
-            const [jsonPaths, arrowPaths] = paths;
-            ['cpp', 'java'].forEach((source) => ['file', 'stream'].forEach((format) => {
-                const arrowPath = path.resolve(__dirname, `../test/data/${source}/${format}/${name}.arrow`);
-                if (fs.existsSync(arrowPath)) {
-                    jsonPaths.push(jsonPath);
-                    arrowPaths.push(arrowPath);
-                }
-            }));
-            return paths;
-        }, [[], []]);
-
-    argv.json_files = jsonPaths;
-    argv.arrow_files = arrowPaths;
-}
-
 module.exports = { argv, targets, modules };
diff --git a/js/gulp/arrow-task.js b/js/gulp/arrow-task.js
index 95fc1eed0f84e..e119c540dc351 100644
--- a/js/gulp/arrow-task.js
+++ b/js/gulp/arrow-task.js
@@ -16,24 +16,22 @@
 // under the License.
 
 const {
-    mainExport, gCCLanguageNames,
     targetDir, observableFromStreams
 } = require('./util');
 
 const del = require('del');
 const gulp = require('gulp');
-const path = require('path');
 const { promisify } = require('util');
 const gulpRename = require(`gulp-rename`);
 const { memoizeTask } = require('./memoize-task');
 const exec = promisify(require('child_process').exec);
 const { Observable, ReplaySubject } = require('rxjs');
 
-const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target, format) {
+const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target) {
     const out = targetDir(target);
     const dtsGlob = `${targetDir(`es2015`, `cjs`)}/**/*.ts`;
     const cjsGlob = `${targetDir(`es2015`, `cjs`)}/**/*.js`;
-    const esmGlob = `${targetDir(`es2015`, `esm`)}/**/*.js`;
+    const esmGlob = `${targetDir(`esnext`, `esm`)}/**/*.js`;
     const es5UmdGlob = `${targetDir(`es5`, `umd`)}/*.js`;
     const es5UmdMaps = `${targetDir(`es5`, `umd`)}/*.map`;
     const es2015UmdGlob = `${targetDir(`es2015`, `umd`)}/*.js`;
@@ -46,7 +44,7 @@ const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target, forma
       observableFromStreams(gulp.src(esmGlob), ch_ext(`.mjs`), gulp.dest(out)), // copy es2015 esm files and rename to `.mjs`
       observableFromStreams(gulp.src(es5UmdGlob), append(`.es5.min`), gulp.dest(out)), // copy es5 umd files and add `.min`
       observableFromStreams(gulp.src(es5UmdMaps),                     gulp.dest(out)), // copy es5 umd sourcemap files, but don't rename
-      observableFromStreams(gulp.src(es2015UmdGlob), append(`.es2015.min`), gulp.dest(out)), // copy es2015 umd files and add `.es6.min`
+      observableFromStreams(gulp.src(es2015UmdGlob), append(`.es2015.min`), gulp.dest(out)), // copy es2015 umd files and add `.es2015.min`
       observableFromStreams(gulp.src(es2015UmdMaps),                        gulp.dest(out)), // copy es2015 umd sourcemap files, but don't rename
     ).publish(new ReplaySubject()).refCount();
 }))({});
@@ -61,4 +59,4 @@ const arrowTSTask = ((cache) => memoizeTask(cache, async function copyTS(target,
   
 module.exports = arrowTask;
 module.exports.arrowTask = arrowTask;
-module.exports.arrowTSTask = arrowTSTask;
\ No newline at end of file
+module.exports.arrowTSTask = arrowTSTask;
diff --git a/js/gulp/clean-task.js b/js/gulp/clean-task.js
index d6c90f4637c8b..551aeb41af739 100644
--- a/js/gulp/clean-task.js
+++ b/js/gulp/clean-task.js
@@ -16,16 +16,15 @@
 // under the License.
 
 const del = require('del');
+const { Observable } = require('rxjs');
 const { targetDir } = require('./util');
-const { memoizeTask } = require('./memoize-task');
-const { Observable, ReplaySubject } = require('rxjs');
+const memoizeTask = require('./memoize-task');
 
 const cleanTask = ((cache) => memoizeTask(cache, function clean(target, format) {
-    return Observable
-        .from(del(`${targetDir(target, format)}/**`))
-        .catch((e) => Observable.empty())
-        .multicast(new ReplaySubject()).refCount();
+    const dir = targetDir(target, format);
+    return Observable.from(del(dir))
+        .catch((e) => Observable.empty());
 }))({});
 
 module.exports = cleanTask;
-module.exports.cleanTask = cleanTask;
\ No newline at end of file
+module.exports.cleanTask = cleanTask;
diff --git a/js/gulp/closure-task.js b/js/gulp/closure-task.js
index 547e760a7fa8a..ef629982ae39f 100644
--- a/js/gulp/closure-task.js
+++ b/js/gulp/closure-task.js
@@ -18,52 +18,83 @@
 const {
     targetDir,
     mainExport,
+    esmRequire,
     gCCLanguageNames,
-    UMDSourceTargets,
-    observableFromStreams
+    publicModulePaths,
+    observableFromStreams,
+    shouldRunInChildProcess,
+    spawnGulpCommandInChildProcess,
 } = require('./util');
 
+const fs = require('fs');
 const gulp = require('gulp');
 const path = require('path');
 const sourcemaps = require('gulp-sourcemaps');
 const { memoizeTask } = require('./memoize-task');
 const { compileBinFiles } = require('./typescript-task');
-const { Observable, ReplaySubject } = require('rxjs');
+const mkdirp = require('util').promisify(require('mkdirp'));
 const closureCompiler = require('google-closure-compiler').gulp();
 
-const closureTask = ((cache) => memoizeTask(cache, function closure(target, format) {
+const closureTask = ((cache) => memoizeTask(cache, async function closure(target, format) {
+
+    if (shouldRunInChildProcess(target, format)) {
+        return spawnGulpCommandInChildProcess('compile', target, format);
+    }
+
     const src = targetDir(target, `cls`);
+    const srcAbsolute = path.resolve(src);
     const out = targetDir(target, format);
-    const entry = path.join(src, mainExport);
-    const externs = path.join(`src/Arrow.externs.js`);
-    return observableFromStreams(
-        gulp.src([
-/*   external libs first --> */ `node_modules/tslib/package.json`,
-                                `node_modules/tslib/tslib.es6.js`,
-                                `node_modules/flatbuffers/package.json`,
-                                `node_modules/flatbuffers/js/flatbuffers.mjs`,
-                                `node_modules/text-encoding-utf-8/package.json`,
-                                `node_modules/text-encoding-utf-8/src/encoding.js`,
-/*    then sources globs --> */ `${src}/**/*.js`,
-        ], { base: `./` }),
-        sourcemaps.init(),
-        closureCompiler(createClosureArgs(entry, externs)),
-        // rename the sourcemaps from *.js.map files to *.min.js.map
-        sourcemaps.write(`.`, { mapFile: (mapPath) => mapPath.replace(`.js.map`, `.${target}.min.js.map`) }),
-        gulp.dest(out)
-    )
-    .merge(compileBinFiles(target, format))
-    .takeLast(1)
-    .publish(new ReplaySubject()).refCount();
+    const externs = path.join(`${out}/${mainExport}.externs.js`);
+    const entry_point = path.join(`${src}/${mainExport}.dom.cls.js`);
+
+    const exportedImports = publicModulePaths(srcAbsolute).reduce((entries, publicModulePath) => [
+        ...entries, {
+            publicModulePath,
+            exports_: getPublicExportedNames(esmRequire(publicModulePath, { warnings: false }))
+        }
+    ], []);
+
+    await mkdirp(out);
+
+    await Promise.all([
+        fs.promises.writeFile(externs, generateExternsFile(exportedImports)),
+        fs.promises.writeFile(entry_point, generateUMDExportAssignnent(srcAbsolute, exportedImports))
+    ]);
+
+    return await Promise.all([
+        runClosureCompileAsObservable().toPromise(),
+        compileBinFiles(target, format).toPromise()
+    ]);
+
+    function runClosureCompileAsObservable() {
+        return observableFromStreams(
+            gulp.src([
+                /* external libs first */
+                `node_modules/flatbuffers/package.json`,
+                `node_modules/flatbuffers/js/flatbuffers.mjs`,
+                `node_modules/text-encoding-utf-8/package.json`,
+                `node_modules/text-encoding-utf-8/src/encoding.js`,
+                `${src}/**/*.js` /* <-- then source globs */
+            ], { base: `./` }),
+            sourcemaps.init(),
+            closureCompiler(createClosureArgs(entry_point, externs)),
+            // rename the sourcemaps from *.js.map files to *.min.js.map
+            sourcemaps.write(`.`, { mapFile: (mapPath) => mapPath.replace(`.js.map`, `.${target}.min.js.map`) }),
+            gulp.dest(out)
+        );
+    }
 }))({});
 
-const createClosureArgs = (entry, externs) => ({
+module.exports = closureTask;
+module.exports.closureTask = closureTask;
+
+const createClosureArgs = (entry_point, externs) => ({
     externs,
+    entry_point,
     third_party: true,
     warning_level: `QUIET`,
     dependency_mode: `STRICT`,
     rewrite_polyfills: false,
-    entry_point: `${entry}.js`,
     module_resolution: `NODE`,
     // formatting: `PRETTY_PRINT`,
     // debug: true,
@@ -72,10 +103,99 @@ const createClosureArgs = (entry, externs) => ({
     package_json_entry_names: `module,jsnext:main,main`,
     assume_function_wrapper: true,
     js_output_file: `${mainExport}.js`,
-    language_in: gCCLanguageNames[`es2015`],
+    language_in: gCCLanguageNames[`esnext`],
     language_out: gCCLanguageNames[`es5`],
-    output_wrapper:
-`// Licensed to the Apache Software Foundation (ASF) under one
+    output_wrapper:`${apacheHeader()}
+(function (global, factory) {
+    typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
+    typeof define === 'function' && define.amd ? define(['Arrow'], factory) :
+    (factory(global.Arrow = global.Arrow || {}));
+}(this, (function (exports) {%output%}.bind(this))));`
+});
+
+function generateUMDExportAssignnent(src, exportedImports) {
+    return [
+        ...exportedImports.map(({ publicModulePath }, i) => {
+            const p = publicModulePath.slice(src.length + 1);
+            return (`import * as exports${i} from './${p}';`);
+        }).filter(Boolean),
+        'Object.assign(arguments[0], exports0);'
+    ].join('\n');
+}
+
+function generateExternsFile(exportedImports) {
+    return [
+        externsHeader(),
+        ...exportedImports.reduce((externBodies, { exports_ }) => [
+            ...externBodies, ...exports_.map(externBody)
+        ], []).filter(Boolean)
+    ].join('\n');
+}
+
+function externBody({ exportName, staticNames, instanceNames }) {
+    return [
+        `var ${exportName} = function() {};`,
+        staticNames.map((staticName) => (isNaN(+staticName)
+            ? `/** @type {?} */\n${exportName}.${staticName} = function() {};`
+            : `/** @type {?} */\n${exportName}[${staticName}] = function() {};`
+        )).join('\n'),
+        instanceNames.map((instanceName) => (isNaN(+instanceName)
+            ? `/** @type {?} */\n${exportName}.prototype.${instanceName};`
+            : `/** @type {?} */\n${exportName}.prototype[${instanceName}];`
+        )).join('\n')
+    ].filter(Boolean).join('\n');
+}
+
+function externsHeader() {
+    return (`${apacheHeader()}
+// @ts-nocheck
+/* tslint:disable */
+/**
+ * @fileoverview Closure Compiler externs for Arrow
+ * @externs
+ * @suppress {duplicate,checkTypes}
+ */
+/** @type {symbol} */
+Symbol.iterator;
+/** @type {symbol} */
+Symbol.toPrimitive;
+/** @type {symbol} */
+Symbol.asyncIterator;
+`);
+}
+
+function getPublicExportedNames(entryModule) {
+    const fn = function() {};
+    const isStaticOrProtoName = (x) => (
+        !(x in fn) &&
+        (x !== `default`) &&
+        (x !== `undefined`) &&
+        (x !== `__esModule`) &&
+        (x !== `constructor`) &&
+        !(x.startsWith('_'))
+    );
+    return Object
+        .getOwnPropertyNames(entryModule)
+        .filter((name) => name !== 'default')
+        .filter((name) => (
+            typeof entryModule[name] === `object` ||
+            typeof entryModule[name] === `function`
+        ))
+        .map((name) => [name, entryModule[name]])
+        .reduce((reserved, [name, value]) => {
+
+            const staticNames = value &&
+                typeof value === 'object' ? Object.getOwnPropertyNames(value).filter(isStaticOrProtoName) :
+                typeof value === 'function' ? Object.getOwnPropertyNames(value).filter(isStaticOrProtoName) : [];
+
+            const instanceNames = (typeof value === `function` && Object.getOwnPropertyNames(value.prototype || {}) || []).filter(isStaticOrProtoName);
+
+            return [...reserved, { exportName: name, staticNames, instanceNames }];
+        }, []);
+}
+
+function apacheHeader() {
+    return `// Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
@@ -90,13 +210,5 @@ const createClosureArgs = (entry, externs) => ({
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
-// under the License.
-(function (global, factory) {
-    typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
-    typeof define === 'function' && define.amd ? define(['exports'], factory) :
-    (factory(global.Arrow = global.Arrow || {}));
-}(this, (function (exports) {%output%}.bind(this))));`
-});
-
-module.exports = closureTask;
-module.exports.closureTask = closureTask;
+// under the License.`
+}
diff --git a/js/gulp/build-task.js b/js/gulp/compile-task.js
similarity index 90%
rename from js/gulp/build-task.js
rename to js/gulp/compile-task.js
index 9f3402cdd3508..60e2ebbe36a93 100644
--- a/js/gulp/build-task.js
+++ b/js/gulp/compile-task.js
@@ -24,7 +24,7 @@ const closureTask = require('./closure-task');
 const typescriptTask = require('./typescript-task');
 const { arrowTask, arrowTSTask } = require('./arrow-task');
 
-const buildTask = ((cache) => memoizeTask(cache, function build(target, format, ...args) {
+const compileTask = ((cache) => memoizeTask(cache, function compile(target, format, ...args) {
     return target === `src`                    ? Observable.empty()
          : target === npmPkgName               ? arrowTask(target, format, ...args)()
          : target === `ts`                     ? arrowTSTask(target, format, ...args)()
@@ -33,5 +33,5 @@ const buildTask = ((cache) => memoizeTask(cache, function build(target, format,
                                                : typescriptTask(target, format, ...args)();
 }))({});
 
-module.exports = buildTask;
-module.exports.buildTask = buildTask;
+module.exports = compileTask;
+module.exports.compileTask = compileTask;
diff --git a/js/gulp/memoize-task.js b/js/gulp/memoize-task.js
index 0b0fc843c451a..408ee3b8839db 100644
--- a/js/gulp/memoize-task.js
+++ b/js/gulp/memoize-task.js
@@ -17,6 +17,13 @@
 
 const { taskName } = require('./util');
 
+const createTask = ((taskFn) => ((target, format, ...args) => {
+  // Give the memoized fn a displayName so gulp's output is easier to follow.
+  const fn = () => taskFn(target, format, ...args);
+  fn.displayName = `${taskFn.name || ``}:${taskName(target, format, ...args)}:task`;
+  return fn;
+}));
+
 const memoizeTask = ((cache, taskFn) => ((target, format, ...args) => {
     // Give the memoized fn a displayName so gulp's output is easier to follow.
     const fn = () => (
@@ -27,4 +34,5 @@ const memoizeTask = ((cache, taskFn) => ((target, format, ...args) => {
 }));
 
 module.exports = memoizeTask;
-module.exports.memoizeTask = memoizeTask;
\ No newline at end of file
+module.exports.createTask = createTask;
+module.exports.memoizeTask = memoizeTask;
diff --git a/js/gulp/minify-task.js b/js/gulp/minify-task.js
index 82145aa90861a..81cb5e5f3f536 100644
--- a/js/gulp/minify-task.js
+++ b/js/gulp/minify-task.js
@@ -18,10 +18,10 @@
 const {
     targetDir,
     mainExport,
-    ESKeywords,
     UMDSourceTargets,
     terserLanguageNames,
-    observableFromStreams
+    shouldRunInChildProcess,
+    spawnGulpCommandInChildProcess,
 } = require('./util');
 
 const path = require('path');
@@ -30,41 +30,24 @@ const { memoizeTask } = require('./memoize-task');
 const { compileBinFiles } = require('./typescript-task');
 const { Observable, ReplaySubject } = require('rxjs');
 const TerserPlugin = require(`terser-webpack-plugin`);
-const esmRequire = require(`@std/esm`)(module, {
-    mode: `js`,
-    warnings: false,
-    cjs: {
-        /* A boolean for storing ES modules in require.cache. */
-        cache: true,
-        /* A boolean for respecting require.extensions in ESM. */
-        extensions: true,
-        /* A boolean for __esModule interoperability. */
-        interop: true,
-        /* A boolean for importing named exports of CJS modules. */
-        namedExports: true,
-        /* A boolean for following CJS path rules in ESM. */
-        paths: true,
-        /* A boolean for __dirname, __filename, and require in ESM. */
-        vars: true,
-    }
-});
 
 const minifyTask = ((cache, commonConfig) => memoizeTask(cache, function minifyJS(target, format) {
 
+    if (shouldRunInChildProcess(target, format)) {
+        return spawnGulpCommandInChildProcess('compile', target, format);
+    }
+
     const sourceTarget = UMDSourceTargets[target];
-    const PublicNames = reservePublicNames(sourceTarget, `cls`);
     const out = targetDir(target, format), src = targetDir(sourceTarget, `cls`);
 
     const targetConfig = { ...commonConfig,
         output: { ...commonConfig.output,
             path: path.resolve(`./${out}`) } };
 
-    const webpackConfigs = [
-        [mainExport, PublicNames]
-    ].map(([entry, reserved]) => ({
+    const webpackConfigs = [mainExport].map((entry) => ({
         ...targetConfig,
         name: entry,
-        entry: { [entry]: path.resolve(`${src}/${entry}.js`) },
+        entry: { [entry]: path.resolve(`${src}/${entry}.dom.js`) },
         plugins: [
             ...(targetConfig.plugins || []),
             new webpack.SourceMapDevToolPlugin({
@@ -73,20 +56,23 @@ const minifyTask = ((cache, commonConfig) => memoizeTask(cache, function minifyJ
                     resourcePath
                         .replace(/\s/, `_`)
                         .replace(/\.\/node_modules\//, ``)
-            }),
-            new TerserPlugin({
-                sourceMap: true,
-                terserOptions: {
-                    ecma: terserLanguageNames[target],
-                    compress: { unsafe: true },
-                    output: { comments: false, beautify: false },
-                    mangle: { eval: true,
-                        properties: { reserved, keep_quoted: true }
-                    },
-                    safari10: true // <-- works around safari10 bugs, see the "safari10" option here: https://github.com/terser-js/terser#minify-options
-                },
             })
-        ]
+        ],
+        optimization: {
+            minimize: true,
+            minimizer: [
+                new TerserPlugin({
+                    sourceMap: true,
+                    terserOptions: {
+                        ecma: terserLanguageNames[target],
+                        output: { comments: false, beautify: false },
+                        compress: { unsafe: true },
+                        mangle: true,
+                        safari10: true // <-- works around safari10 bugs, see the "safari10" option here: https://github.com/terser-js/terser#minify-options
+                    },
+                })
+            ]
+        }
     }));
 
     const compilers = webpack(webpackConfigs);
@@ -102,42 +88,3 @@ const minifyTask = ((cache, commonConfig) => memoizeTask(cache, function minifyJ
 
 module.exports = minifyTask;
 module.exports.minifyTask = minifyTask;
-
-const reservePublicNames = ((ESKeywords) => function reservePublicNames(target, format) {
-    const src = targetDir(target, format);
-    const publicModulePaths = [
-        `../${src}/data.js`,
-        `../${src}/type.js`,
-        `../${src}/table.js`,
-        `../${src}/vector.js`,
-        `../${src}/util/int.js`,
-        `../${src}/predicate.js`,
-        `../${src}/recordbatch.js`,
-        `../${src}/${mainExport}.js`,
-    ];
-    return publicModulePaths.reduce((keywords, publicModulePath) => [
-        ...keywords, ...reserveExportedNames(esmRequire(publicModulePath, { warnings: false }))
-    ], [...ESKeywords]);
-})(ESKeywords);
-
-// Reflect on the Arrow modules to come up with a list of keys to save from
-// Terser's
-// mangler. Assume all the non-inherited static and prototype members of the Arrow
-// module and its direct exports are public, and should be preserved through minification.
-const reserveExportedNames = (entryModule) => (
-    Object
-        .getOwnPropertyNames(entryModule)
-        .filter((name) => (
-            typeof entryModule[name] === `object` ||
-            typeof entryModule[name] === `function`
-        ))
-        .map((name) => [name, entryModule[name]])
-        .reduce((reserved, [name, value]) => {
-            const fn = function() {};
-            const ownKeys = value && typeof value === 'object' && Object.getOwnPropertyNames(value) || [];
-            const protoKeys = typeof value === `function` && Object.getOwnPropertyNames(value.prototype || {}) || [];
-            const publicNames = [...ownKeys, ...protoKeys].filter((x) => x !== `default` && x !== `undefined` && !(x in fn));
-            return [...reserved, name, ...publicNames];
-        }, []
-    )
-);
diff --git a/js/gulp/package-task.js b/js/gulp/package-task.js
index 8c0f8fb0e4767..2a67c812206ce 100644
--- a/js/gulp/package-task.js
+++ b/js/gulp/package-task.js
@@ -46,17 +46,19 @@ const createMainPackageJson = (target, format) => (orig) => ({
     ...createTypeScriptPackageJson(target, format)(orig),
     bin: orig.bin,
     name: npmPkgName,
-    main: mainExport,
-    types: `${mainExport}.d.ts`,
-    module: `${mainExport}.mjs`,
+    main: `${mainExport}.node`,
+    browser: `${mainExport}.dom`,
+    types: `${mainExport}.node.d.ts`,
     unpkg: `${mainExport}.es5.min.js`,
-    [`@std/esm`]: { mode: `all`, warnings: false, sourceMap: true }
+    [`esm`]: { mode: `all`, sourceMap: true }
 });
   
 const createTypeScriptPackageJson = (target, format) => (orig) => ({
     ...createScopedPackageJSON(target, format)(orig),
-    main: `${mainExport}.ts`, types: `${mainExport}.ts`,
     bin: undefined,
+    main: `${mainExport}.node.ts`,
+    types: `${mainExport}.node.ts`,
+    browser: `${mainExport}.dom.ts`,
     dependencies: {
         '@types/flatbuffers': '*',
         '@types/node': '*',
@@ -70,8 +72,10 @@ const createScopedPackageJSON = (target, format) => (({ name, ...orig }) =>
             (xs, key) => ({ ...xs, [key]: xs[key] || orig[key] }),
             {
                 name: `${npmOrgName}/${packageName(target, format)}`,
-                version: undefined, main: `${mainExport}.js`, types: `${mainExport}.d.ts`,
-                unpkg: undefined, module: undefined, [`@std/esm`]: undefined
+                browser: format === 'umd' ? undefined : `${mainExport}.dom`,
+                main: format === 'umd' ? `${mainExport}` : `${mainExport}.node`,
+                types: format === 'umd' ? undefined : `${mainExport}.node.d.ts`,
+                version: undefined, unpkg: undefined, module: undefined, [`esm`]: undefined,
             }
         )
     )
@@ -80,6 +84,5 @@ const createScopedPackageJSON = (target, format) => (({ name, ...orig }) =>
 const conditionallyAddStandardESMEntry = (target, format) => (packageJSON) => (
     format !== `esm` && format !== `cls`
         ?      packageJSON
-        : { ...packageJSON, [`@std/esm`]: { mode: `js`, warnings: false, sourceMap: true } }
+        : { ...packageJSON, [`esm`]: { mode: `auto`, sourceMap: true } }
 );
-  
\ No newline at end of file
diff --git a/js/gulp/test-task.js b/js/gulp/test-task.js
index b0e34f8c94426..c7ad7d513c652 100644
--- a/js/gulp/test-task.js
+++ b/js/gulp/test-task.js
@@ -20,44 +20,47 @@ const path = require('path');
 const { argv } = require('./argv');
 const { promisify } = require('util');
 const glob = promisify(require('glob'));
-const stat = promisify(require('fs').stat);
 const mkdirp = promisify(require('mkdirp'));
 const rimraf = promisify(require('rimraf'));
 const child_process = require(`child_process`);
 const { memoizeTask } = require('./memoize-task');
 const readFile = promisify(require('fs').readFile);
+const asyncDone = promisify(require('async-done'));
 const exec = promisify(require('child_process').exec);
 const parseXML = promisify(require('xml2js').parseString);
 
 const jestArgv = [];
-argv.update && jestArgv.push(`-u`);
 argv.verbose && jestArgv.push(`--verbose`);
-argv.coverage && jestArgv.push(`--coverage`);
+argv.coverage
+    ? jestArgv.push(`-c`, `jest.coverage.config.js`, `--coverage`)
+    : jestArgv.push(`-c`, `jest.config.js`, `-i`)
 
-const debugArgv = [`--runInBand`, `--env`, `node-debug`];
-const jest = require.resolve(path.join(`..`, `node_modules`, `.bin`, `jest`));
+const jest = path.join(path.parse(require.resolve(`jest`)).dir, `../bin/jest.js`);
 const testOptions = {
-    env: { ...process.env },
     stdio: [`ignore`, `inherit`, `inherit`],
+    env: {
+        ...process.env,
+        // hide fs.promises/stream[Symbol.asyncIterator] warnings
+        NODE_NO_WARNINGS: `1`,
+        // prevent the user-land `readable-stream` module from
+        // patching node's streams -- they're better now
+        READABLE_STREAM: `disable`
+    },
 };
 
-const testTask = ((cache, execArgv, testOptions) => memoizeTask(cache, function test(target, format, debug = false) {
+const testTask = ((cache, execArgv, testOptions) => memoizeTask(cache, function test(target, format) {
     const opts = { ...testOptions };
-    const args = !debug ? [...execArgv] : [...debugArgv, ...execArgv];
-    if (!argv.coverage) {
-        args.push(`test/${argv.integration ? `integration/*` : `unit/*`}`);
-    }
-    opts.env = { ...opts.env,
+    const args = [...execArgv, `test/unit/`];
+    opts.env = {
+        ...opts.env,
         TEST_TARGET: target,
         TEST_MODULE: format,
-        TEST_TS_SOURCE: !!argv.coverage || (target === 'src') || (opts.env.TEST_TS_SOURCE === 'true'),
-        JSON_PATHS: JSON.stringify(Array.isArray(argv.json_files) ? argv.json_files : [argv.json_files]),
-        ARROW_PATHS: JSON.stringify(Array.isArray(argv.arrow_files) ? argv.arrow_files : [argv.arrow_files]),
+        TEST_DOM_STREAMS: (target ==='src' || format === 'umd').toString(),
+        TEST_NODE_STREAMS: (target ==='src' || format !== 'umd').toString(),
+        TEST_TS_SOURCE: !!argv.coverage || (target === 'src') || (opts.env.TEST_TS_SOURCE === 'true')
     };
-    return !debug ?
-        child_process.spawn(jest, args, opts) :
-        child_process.exec(`node --inspect-brk ${jest} ${args.join(` `)}`, opts);
-}))({}, jestArgv, testOptions);
+    return asyncDone(() => child_process.spawn(`node`, args, opts));
+}))({}, [jest, ...jestArgv], testOptions);
 
 module.exports = testTask;
 module.exports.testTask = testTask;
@@ -69,9 +72,9 @@ const ARROW_HOME = process.env.ARROW_HOME || path.resolve('../');
 const ARROW_JAVA_DIR = process.env.ARROW_JAVA_DIR || path.join(ARROW_HOME, 'java');
 const CPP_EXE_PATH = process.env.ARROW_CPP_EXE_PATH || path.join(ARROW_HOME, 'cpp/build/debug');
 const ARROW_INTEGRATION_DIR = process.env.ARROW_INTEGRATION_DIR || path.join(ARROW_HOME, 'integration');
-const CPP_JSON_TO_ARROW = path.join(CPP_EXE_PATH, 'json-integration-test');
-const CPP_STREAM_TO_FILE = path.join(CPP_EXE_PATH, 'stream-to-file');
-const CPP_FILE_TO_STREAM = path.join(CPP_EXE_PATH, 'file-to-stream');
+const CPP_JSON_TO_ARROW = path.join(CPP_EXE_PATH, 'arrow-json-integration-test');
+const CPP_STREAM_TO_FILE = path.join(CPP_EXE_PATH, 'arrow-stream-to-file');
+const CPP_FILE_TO_STREAM = path.join(CPP_EXE_PATH, 'arrow-file-to-stream');
 
 const testFilesDir = path.join(ARROW_HOME, 'js/test/data');
 const snapshotsDir = path.join(ARROW_HOME, 'js/test/__snapshots__');
diff --git a/js/gulp/typescript-task.js b/js/gulp/typescript-task.js
index beffab8a08ce0..fe694cac860b3 100644
--- a/js/gulp/typescript-task.js
+++ b/js/gulp/typescript-task.js
@@ -16,19 +16,26 @@
 // under the License.
 
 const {
-    targetDir, tsconfigName, observableFromStreams
+    targetDir,
+    tsconfigName,
+    observableFromStreams,
+    shouldRunInChildProcess,
+    spawnGulpCommandInChildProcess,
 } = require('./util');
 
-const del = require('del');
 const gulp = require('gulp');
 const path = require('path');
 const ts = require(`gulp-typescript`);
-const gulpRename = require(`gulp-rename`);
 const sourcemaps = require('gulp-sourcemaps');
 const { memoizeTask } = require('./memoize-task');
 const { Observable, ReplaySubject } = require('rxjs');
 
 const typescriptTask = ((cache) => memoizeTask(cache, function typescript(target, format) {
+
+    if (shouldRunInChildProcess(target, format)) {
+        return spawnGulpCommandInChildProcess('compile', target, format);
+    }
+
     const out = targetDir(target, format);
     const tsconfigPath = path.join(`tsconfig`, `tsconfig.${tsconfigName(target, format)}.json`);
     return compileTypescript(out, tsconfigPath)
@@ -39,11 +46,11 @@ const typescriptTask = ((cache) => memoizeTask(cache, function typescript(target
 function compileBinFiles(target, format) {
     const out = targetDir(target, format);
     const tsconfigPath = path.join(`tsconfig`, `tsconfig.${tsconfigName('bin', 'cjs')}.json`);
-    return compileTypescript(path.join(out, 'bin'), tsconfigPath);
+    return compileTypescript(path.join(out, 'bin'), tsconfigPath, { target });
 }
 
-function compileTypescript(out, tsconfigPath) {
-    const tsProject = ts.createProject(tsconfigPath, { typescript: require(`typescript`) });
+function compileTypescript(out, tsconfigPath, tsconfigOverrides) {
+    const tsProject = ts.createProject(tsconfigPath, { typescript: require(`typescript`), ...tsconfigOverrides });
     const { stream: { js, dts } } = observableFromStreams(
       tsProject.src(), sourcemaps.init(),
       tsProject(ts.reporter.defaultReporter())
diff --git a/js/gulp/util.js b/js/gulp/util.js
index 12d21b0e16be2..bd87684a1dc3d 100644
--- a/js/gulp/util.js
+++ b/js/gulp/util.js
@@ -17,8 +17,11 @@
 
 const fs = require('fs');
 const path = require(`path`);
-const pump = require(`pump`);
+const pump = require(`stream`).pipeline;
+const child_process = require(`child_process`);
+const { targets, modules } = require('./argv');
 const { Observable, ReplaySubject } = require('rxjs');
+const asyncDone = require('util').promisify(require('async-done'));
 
 const mainExport = `Arrow`;
 const npmPkgName = `apache-arrow`;
@@ -29,7 +32,7 @@ const knownTargets = [`es5`, `es2015`, `esnext`];
 const knownModules = [`cjs`, `esm`, `cls`, `umd`];
 const tasksToSkipPerTargetOrFormat = {
     src: { clean: true, build: true },
-    cls: { test: true, integration: true }
+    cls: { test: true, package: true }
 };
 const packageJSONFields = [
   `version`, `license`, `description`,
@@ -66,7 +69,7 @@ const UMDSourceTargets = {
  es2015: `es2015`,
  es2016: `es2015`,
  es2017: `es2015`,
- esnext: `es2015`
+ esnext: `esnext`
 };
 
 const terserLanguageNames = {
@@ -109,12 +112,27 @@ function targetDir(target, format) {
     return path.join(releasesRootDir, ...(!format ? [target] : [target, format]));
 }
 
-function logAndDie(e) {
-    if (e) {
-        process.exit(1);
-    }
+function shouldRunInChildProcess(target, format) {
+    // If we're building more than one module/target, then yes run this task in a child process
+    if (targets.length > 1 || modules.length > 1) { return true; }
+    // If the target we're building *isn't* the target the gulp command was configured to run, then yes run that in a child process
+    if (targets[0] !== target || modules[0] !== format) { return true; }
+    // Otherwise no need -- either gulp was run for just one target, or we've been spawned as the child of a multi-target parent gulp
+    return false;
+}
+
+const gulp = path.join(path.parse(require.resolve(`gulp`)).dir, `bin/gulp.js`);
+function spawnGulpCommandInChildProcess(command, target, format) {
+    const args = [gulp, command, '-t', target, '-m', format, `--silent`];
+    const opts = {
+        stdio: [`ignore`, `inherit`, `inherit`],
+        env: { ...process.env, NODE_NO_WARNINGS: `1` }
+    };
+    return asyncDone(() => child_process.spawn(`node`, args, opts))
+        .catch((e) => { throw { message: `${command}:${taskName(target, format)}` }; });
 }
 
+const logAndDie = (e) => { if (e) { process.exit(1); } };
 function observableFromStreams(...streams) {
     if (streams.length <= 0) { return Observable.empty(); }
     const pumped = streams.length <= 1 ? streams[0] : pump(...streams, logAndDie);
@@ -164,12 +182,37 @@ function* combinations(_targets, _modules) {
     }
 }
 
+const publicModulePaths = (dir) => [
+    `${dir}/${mainExport}.dom.js`,
+    `${dir}/util/int.js`,
+    `${dir}/compute/predicate.js`,
+];
+
+const esmRequire = require(`esm`)(module, {
+    mode: `auto`,
+    cjs: {
+        /* A boolean for storing ES modules in require.cache. */
+        cache: true,
+        /* A boolean for respecting require.extensions in ESM. */
+        extensions: true,
+        /* A boolean for __esModule interoperability. */
+        interop: true,
+        /* A boolean for importing named exports of CJS modules. */
+        namedExports: true,
+        /* A boolean for following CJS path rules in ESM. */
+        paths: true,
+        /* A boolean for __dirname, __filename, and require in ESM. */
+        vars: true,
+    }
+});
+
 module.exports = {
 
     mainExport, npmPkgName, npmOrgName, metadataFiles, packageJSONFields,
 
     knownTargets, knownModules, tasksToSkipPerTargetOrFormat,
-    ESKeywords, gCCLanguageNames, UMDSourceTargets, terserLanguageNames,
+    gCCLanguageNames, UMDSourceTargets, terserLanguageNames,
 
     taskName, packageName, tsconfigName, targetDir, combinations, observableFromStreams,
+    ESKeywords, publicModulePaths, esmRequire, shouldRunInChildProcess, spawnGulpCommandInChildProcess
 };
diff --git a/js/gulpfile.js b/js/gulpfile.js
index 78aaa17ddb8b4..37c1d187995d2 100644
--- a/js/gulpfile.js
+++ b/js/gulpfile.js
@@ -17,17 +17,15 @@
 
 const del = require('del');
 const gulp = require('gulp');
-const path = require('path');
 const { Observable } = require('rxjs');
-const buildTask = require('./gulp/build-task');
 const cleanTask = require('./gulp/clean-task');
+const compileTask = require('./gulp/compile-task');
 const packageTask = require('./gulp/package-task');
 const { targets, modules } = require('./gulp/argv');
 const { testTask, createTestData, cleanTestData } = require('./gulp/test-task');
 const {
-    targetDir,
     taskName, combinations,
-    knownTargets,
+    targetDir, knownTargets,
     npmPkgName, UMDSourceTargets,
     tasksToSkipPerTargetOrFormat
 } = require('./gulp/util');
@@ -36,63 +34,60 @@ for (const [target, format] of combinations([`all`], [`all`])) {
     const task = taskName(target, format);
     gulp.task(`clean:${task}`, cleanTask(target, format));
     gulp.task( `test:${task}`,  testTask(target, format));
-    gulp.task(`debug:${task}`,  testTask(target, format, true));
-    gulp.task(`build:${task}`, gulp.series(`clean:${task}`,
-                                            buildTask(target, format),
-                                            packageTask(target, format)));
+    gulp.task(`compile:${task}`, compileTask(target, format));
+    gulp.task(`package:${task}`, packageTask(target, format));
+    gulp.task(`build:${task}`, gulp.series(
+        `clean:${task}`, `compile:${task}`, `package:${task}`
+    ));
 }
 
 // The UMD bundles build temporary es5/6/next targets via TS,
 // then run the TS source through either closure-compiler or
 // a minifier, so we special case that here.
-knownTargets.forEach((target) =>
-    gulp.task(`build:${target}:umd`,
-        gulp.series(
-            gulp.parallel(
-                cleanTask(target, `umd`),
-                cleanTask(UMDSourceTargets[target], `cls`)
-            ),
-            buildTask(UMDSourceTargets[target], `cls`),
-            buildTask(target, `umd`), packageTask(target, `umd`)
-        )
-    )
-);
+knownTargets.forEach((target) => {
+    const umd = taskName(target, `umd`);
+    const cls = taskName(UMDSourceTargets[target], `cls`);
+    gulp.task(`build:${umd}`, gulp.series(
+        `build:${cls}`,
+        `clean:${umd}`, `compile:${umd}`, `package:${umd}`,
+        function remove_closure_tmp_files() {
+            return del(targetDir(target, `cls`))
+        }
+    ));
+});
 
 // The main "apache-arrow" module builds the es5/umd, es2015/cjs,
 // es2015/esm, and es2015/umd targets, then copies and renames the
 // compiled output into the apache-arrow folder
 gulp.task(`build:${npmPkgName}`,
     gulp.series(
-        cleanTask(npmPkgName),
         gulp.parallel(
             `build:${taskName(`es5`, `umd`)}`,
             `build:${taskName(`es2015`, `cjs`)}`,
             `build:${taskName(`es2015`, `esm`)}`,
             `build:${taskName(`es2015`, `umd`)}`
         ),
-        buildTask(npmPkgName), packageTask(npmPkgName)
+        `clean:${npmPkgName}`,
+        `compile:${npmPkgName}`,
+        `package:${npmPkgName}`
     )
 );
 
-
-function gulpConcurrent(tasks) {
-    return () => Observable.bindCallback((tasks, cb) => gulp.parallel(tasks)(cb))(tasks);
-}
-
-const buildConcurrent = (tasks) => () =>
-    gulpConcurrent(tasks)()
-        .concat(Observable
-            .defer(() => Observable
-            .merge(...knownTargets.map((target) =>
-                del(`${targetDir(target, `cls`)}/**`)))));
-
+// And finally the global composite tasks
 gulp.task(`clean:testdata`, cleanTestData);
 gulp.task(`create:testdata`, createTestData);
-gulp.task(`test`, gulp.series(getTasks(`test`)));
-gulp.task(`debug`, gulp.series(getTasks(`debug`)));
+gulp.task(`test`, gulpConcurrent(getTasks(`test`)));
 gulp.task(`clean`, gulp.parallel(getTasks(`clean`)));
-gulp.task(`build`, buildConcurrent(getTasks(`build`)));
-gulp.task(`default`,  gulp.series(`build`, `test`));
+gulp.task(`build`, gulpConcurrent(getTasks(`build`)));
+gulp.task(`compile`, gulpConcurrent(getTasks(`compile`)));
+gulp.task(`package`, gulpConcurrent(getTasks(`package`)));
+gulp.task(`default`,  gulp.series(`clean`, `build`, `test`));
+
+function gulpConcurrent(tasks) {
+    const numCPUs = Math.max(1, require('os').cpus().length * 0.75) | 0;
+    return () => Observable.from(tasks.map((task) => gulp.series(task)))
+        .flatMap((task) => Observable.bindNodeCallback(task)(), numCPUs);
+}
 
 function getTasks(name) {
     const tasks = [];
diff --git a/js/index.ts b/js/index.ts
index 51b8676abbd9d..cfd64bbbe9730 100644
--- a/js/index.ts
+++ b/js/index.ts
@@ -15,4 +15,4 @@
 // specific language governing permissions and limitations
 // under the License.
 
-export * from './src/Arrow';
\ No newline at end of file
+export * from './src/Arrow.node';
\ No newline at end of file
diff --git a/js/jest.config.js b/js/jest.config.js
new file mode 100644
index 0000000000000..55028d09f969e
--- /dev/null
+++ b/js/jest.config.js
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+    "verbose": false,
+    "reporters": [
+      "jest-silent-reporter"
+    ],
+    "testEnvironment": "node",
+    "globals": {
+      "ts-jest": {
+        "diagnostics": false,
+        "tsConfig": "test/tsconfig.json"
+      }
+    },
+    "roots": [
+      "<rootDir>/test/"
+    ],
+    "moduleFileExtensions": [
+      "js",
+      "ts",
+      "tsx"
+    ],
+    "coverageReporters": [
+      "lcov"
+    ],
+    "coveragePathIgnorePatterns": [
+      "fb\\/(File|Message|Schema|Tensor)\\.(js|ts)$",
+      "test\\/.*\\.(ts|tsx|js)$",
+      "/node_modules/"
+    ],
+    "transform": {
+      "^.+\\.jsx?$": "ts-jest",
+      "^.+\\.tsx?$": "ts-jest"
+    },
+    "transformIgnorePatterns": [
+      "/node_modules/(?!web-stream-tools).+\\.js$"
+    ],
+    "testRegex": "(.*(-|\\.)(test|spec)s?)\\.(ts|tsx|js)$",
+    "preset": "ts-jest",
+    "testMatch": null
+};
diff --git a/js/jest.coverage.config.js b/js/jest.coverage.config.js
new file mode 100644
index 0000000000000..72ddd3c9345a0
--- /dev/null
+++ b/js/jest.coverage.config.js
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+    ...require('./jest.config'),
+    "reporters": undefined,
+    "coverageReporters": [
+        "lcov", "json"
+    ],
+    "globals": {
+        "ts-jest": {
+            "diagnostics": false,
+            "tsConfig": "test/tsconfig.coverage.json"
+        }
+    }
+};
diff --git a/js/npm-release.sh b/js/npm-release.sh
index 3ef24d3e6f828..a52e25ed7884a 100755
--- a/js/npm-release.sh
+++ b/js/npm-release.sh
@@ -20,11 +20,7 @@ set -e
 
 # validate the targets pass all tests before publishing
 npm install
-# npx run-s clean:all lint create:testdata build
-# npm run test -- -t ts -u --integration
-# npm run test -- --integration
-npx run-s clean:all lint build
-npm run test
+npx gulp
 
 # publish the JS target modules to npm
 npx lerna exec -- npm publish
diff --git a/js/package-lock.json b/js/package-lock.json
index 1ab8bacc2269b..1dc65df427e9b 100644
--- a/js/package-lock.json
+++ b/js/package-lock.json
@@ -64,57 +64,56 @@
       }
     },
     "@lerna/add": {
-      "version": "3.4.1",
-      "resolved": "https://registry.npmjs.org/@lerna/add/-/add-3.4.1.tgz",
-      "integrity": "sha512-Vf54B42jlD6G52qnv/cAGH70cVQIa+LX//lfsbkxHvzkhIqBl5J4KsnTOPkA9uq3R+zP58ayicCHB9ReiEWGJg==",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/add/-/add-3.10.6.tgz",
+      "integrity": "sha512-FxQ5Bmyb5fF+3BQiNffM6cTeGCrl4uaAuGvxFIWF6Pgz6U14tUc1e16xgKDvVb1CurzJgIV5sLOT5xmCOqv1kA==",
       "dev": true,
       "requires": {
-        "@lerna/bootstrap": "^3.4.1",
-        "@lerna/command": "^3.3.0",
-        "@lerna/filter-options": "^3.3.2",
-        "@lerna/npm-conf": "^3.4.1",
-        "@lerna/validation-error": "^3.0.0",
+        "@lerna/bootstrap": "3.10.6",
+        "@lerna/command": "3.10.6",
+        "@lerna/filter-options": "3.10.6",
+        "@lerna/npm-conf": "3.7.0",
+        "@lerna/validation-error": "3.6.0",
         "dedent": "^0.7.0",
-        "npm-package-arg": "^6.0.0",
+        "libnpm": "^2.0.1",
         "p-map": "^1.2.0",
-        "pacote": "^9.1.0",
         "semver": "^5.5.0"
       }
     },
     "@lerna/batch-packages": {
-      "version": "3.1.2",
-      "resolved": "https://registry.npmjs.org/@lerna/batch-packages/-/batch-packages-3.1.2.tgz",
-      "integrity": "sha512-HAkpptrYeUVlBYbLScXgeCgk6BsNVXxDd53HVWgzzTWpXV4MHpbpeKrByyt7viXlNhW0w73jJbipb/QlFsHIhQ==",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/batch-packages/-/batch-packages-3.10.6.tgz",
+      "integrity": "sha512-sInr3ZQJFMh9Zq+ZUoVjX8R67j9ViRkVy0uEMsOfG+jZlXj1lRPRMPRiRgU0jXSYEwCdwuAB5pTd9tTx0VCJUw==",
       "dev": true,
       "requires": {
-        "@lerna/package-graph": "^3.1.2",
-        "@lerna/validation-error": "^3.0.0",
-        "npmlog": "^4.1.2"
+        "@lerna/package-graph": "3.10.6",
+        "@lerna/validation-error": "3.6.0",
+        "libnpm": "^2.0.1"
       }
     },
     "@lerna/bootstrap": {
-      "version": "3.4.1",
-      "resolved": "https://registry.npmjs.org/@lerna/bootstrap/-/bootstrap-3.4.1.tgz",
-      "integrity": "sha512-yZDJgNm/KDoRH2klzmQGmpWMg/XMzWgeWvauXkrfW/mj1wwmufOuh5pN4fBFxVmUUa/RFZdfMeaaJt3+W3PPBw==",
-      "dev": true,
-      "requires": {
-        "@lerna/batch-packages": "^3.1.2",
-        "@lerna/command": "^3.3.0",
-        "@lerna/filter-options": "^3.3.2",
-        "@lerna/has-npm-version": "^3.3.0",
-        "@lerna/npm-conf": "^3.4.1",
-        "@lerna/npm-install": "^3.3.0",
-        "@lerna/rimraf-dir": "^3.3.0",
-        "@lerna/run-lifecycle": "^3.4.1",
-        "@lerna/run-parallel-batches": "^3.0.0",
-        "@lerna/symlink-binary": "^3.3.0",
-        "@lerna/symlink-dependencies": "^3.3.0",
-        "@lerna/validation-error": "^3.0.0",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/bootstrap/-/bootstrap-3.10.6.tgz",
+      "integrity": "sha512-qbGjAxRpV/eiI9CboUIpsPPGpSogs8mN2/iDaAUBTaWVFVz/YyU64nui84Gll0kbdaHOyPput+kk2S8NCSCCdg==",
+      "dev": true,
+      "requires": {
+        "@lerna/batch-packages": "3.10.6",
+        "@lerna/command": "3.10.6",
+        "@lerna/filter-options": "3.10.6",
+        "@lerna/has-npm-version": "3.10.0",
+        "@lerna/npm-install": "3.10.0",
+        "@lerna/package-graph": "3.10.6",
+        "@lerna/pulse-till-done": "3.7.1",
+        "@lerna/rimraf-dir": "3.10.0",
+        "@lerna/run-lifecycle": "3.10.5",
+        "@lerna/run-parallel-batches": "3.0.0",
+        "@lerna/symlink-binary": "3.10.0",
+        "@lerna/symlink-dependencies": "3.10.0",
+        "@lerna/validation-error": "3.6.0",
         "dedent": "^0.7.0",
         "get-port": "^3.2.0",
+        "libnpm": "^2.0.1",
         "multimatch": "^2.1.0",
-        "npm-package-arg": "^6.0.0",
-        "npmlog": "^4.1.2",
         "p-finally": "^1.0.0",
         "p-map": "^1.2.0",
         "p-map-series": "^1.0.0",
@@ -124,26 +123,26 @@
       }
     },
     "@lerna/changed": {
-      "version": "3.4.1",
-      "resolved": "https://registry.npmjs.org/@lerna/changed/-/changed-3.4.1.tgz",
-      "integrity": "sha512-gT7fhl4zQWyGETDO4Yy5wsFnqNlBSsezncS1nkMW1uO6jwnolwYqcr1KbrMR8HdmsZBn/00Y0mRnbtbpPPey8w==",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/changed/-/changed-3.10.6.tgz",
+      "integrity": "sha512-nZDVq/sKdhgoAg1BVnpqjqUUz5+zedG+AnU+6mjEN2f23YVtRCsW55N4I9eEdW2pxXUaCY85Hj/HPSA74BYaFg==",
       "dev": true,
       "requires": {
-        "@lerna/collect-updates": "^3.3.2",
-        "@lerna/command": "^3.3.0",
-        "@lerna/listable": "^3.0.0",
-        "@lerna/output": "^3.0.0",
-        "@lerna/version": "^3.4.1"
+        "@lerna/collect-updates": "3.10.1",
+        "@lerna/command": "3.10.6",
+        "@lerna/listable": "3.10.6",
+        "@lerna/output": "3.6.0",
+        "@lerna/version": "3.10.6"
       }
     },
     "@lerna/check-working-tree": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/@lerna/check-working-tree/-/check-working-tree-3.3.0.tgz",
-      "integrity": "sha512-oeEP1dNhiiKUaO0pmcIi73YXJpaD0n5JczNctvVNZ8fGZmrALZtEnmC28o6Z7JgQaqq5nd2kO7xbnjoitrC51g==",
+      "version": "3.10.0",
+      "resolved": "https://registry.npmjs.org/@lerna/check-working-tree/-/check-working-tree-3.10.0.tgz",
+      "integrity": "sha512-NdIPhDgEtGHfeGjB9F0oAoPLywgMpjnJhLLwTNQkelDHo2xNAVpG8kV+A2UJ+cU5UXCZA4RZFxKNmw86rO+Drw==",
       "dev": true,
       "requires": {
-        "@lerna/describe-ref": "^3.3.0",
-        "@lerna/validation-error": "^3.0.0"
+        "@lerna/describe-ref": "3.10.0",
+        "@lerna/validation-error": "3.6.0"
       }
     },
     "@lerna/child-process": {
@@ -193,33 +192,44 @@
           "requires": {
             "pump": "^3.0.0"
           }
+        },
+        "pump": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
+          "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
+          "dev": true,
+          "requires": {
+            "end-of-stream": "^1.1.0",
+            "once": "^1.3.1"
+          }
         }
       }
     },
     "@lerna/clean": {
-      "version": "3.3.2",
-      "resolved": "https://registry.npmjs.org/@lerna/clean/-/clean-3.3.2.tgz",
-      "integrity": "sha512-mvqusgSp2ou5SGqQgTEoTvGJpGfH4+L6XSeN+Ims+eNFGXuMazmKCf+rz2PZBMFufaHJ/Os+JF0vPCcWI1Fzqg==",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/clean/-/clean-3.10.6.tgz",
+      "integrity": "sha512-MuL8HOwnyvVtr6GOiAN/Ofjbx+BJdCrtjrM1Uuh8FFnbnZTPVf+0MPxL2jVzPMo0PmoIrX3fvlwvzKNk/lH0Ug==",
       "dev": true,
       "requires": {
-        "@lerna/command": "^3.3.0",
-        "@lerna/filter-options": "^3.3.2",
-        "@lerna/prompt": "^3.3.1",
-        "@lerna/rimraf-dir": "^3.3.0",
+        "@lerna/command": "3.10.6",
+        "@lerna/filter-options": "3.10.6",
+        "@lerna/prompt": "3.6.0",
+        "@lerna/pulse-till-done": "3.7.1",
+        "@lerna/rimraf-dir": "3.10.0",
         "p-map": "^1.2.0",
         "p-map-series": "^1.0.0",
         "p-waterfall": "^1.0.0"
       }
     },
     "@lerna/cli": {
-      "version": "3.2.0",
-      "resolved": "https://registry.npmjs.org/@lerna/cli/-/cli-3.2.0.tgz",
-      "integrity": "sha512-JdbLyTxHqxUlrkI+Ke+ltXbtyA+MPu9zR6kg/n8Fl6uaez/2fZWtReXzYi8MgLxfUFa7+1OHWJv4eAMZlByJ+Q==",
+      "version": "3.10.7",
+      "resolved": "https://registry.npmjs.org/@lerna/cli/-/cli-3.10.7.tgz",
+      "integrity": "sha512-yuoz/24mIfYit3neKqoE5NVs42Rj9A6A6SlkNPDfsy3v/Vh7SgYkU3cwiGyvwBGzIdhqL4/SWYo8H7YJLs0C+g==",
       "dev": true,
       "requires": {
-        "@lerna/global-options": "^3.1.3",
+        "@lerna/global-options": "3.10.6",
         "dedent": "^0.7.0",
-        "npmlog": "^4.1.2",
+        "libnpm": "^2.0.1",
         "yargs": "^12.0.1"
       },
       "dependencies": {
@@ -230,9 +240,9 @@
           "dev": true
         },
         "camelcase": {
-          "version": "4.1.0",
-          "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-4.1.0.tgz",
-          "integrity": "sha1-1UVjW+HjPFQmScaRc+Xeas+uNN0=",
+          "version": "5.0.0",
+          "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.0.0.tgz",
+          "integrity": "sha512-faqwZqnWxbxn+F1d399ygeamQNy3lPp/H9H6rNrqYh4FSVCtcY+3cub1MxA8o9mDd55mM8Aghuu/kuyYA6VTsA==",
           "dev": true
         },
         "cliui": {
@@ -259,23 +269,14 @@
             "which": "^1.2.9"
           }
         },
-        "decamelize": {
-          "version": "2.0.0",
-          "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-2.0.0.tgz",
-          "integrity": "sha512-Ikpp5scV3MSYxY39ymh45ZLEecsTdv/Xj2CaQfI8RLMuwi7XvjX9H/fhraiSuU+C5w5NTDu4ZU72xNiZnurBPg==",
-          "dev": true,
-          "requires": {
-            "xregexp": "4.0.0"
-          }
-        },
         "execa": {
-          "version": "0.10.0",
-          "resolved": "https://registry.npmjs.org/execa/-/execa-0.10.0.tgz",
-          "integrity": "sha512-7XOMnz8Ynx1gGo/3hyV9loYNPWM94jG3+3T3Y8tsfSstFmETmENCMU/A/zj8Lyaj1lkgEepKepvd6240tBRvlw==",
+          "version": "1.0.0",
+          "resolved": "https://registry.npmjs.org/execa/-/execa-1.0.0.tgz",
+          "integrity": "sha512-adbxcyWV46qiHyvSp50TKt05tB4tK3HcmF7/nxfAdhnox83seTDbwnaqKO4sXRy7roHAIFqJP/Rw/AuEbX61LA==",
           "dev": true,
           "requires": {
             "cross-spawn": "^6.0.0",
-            "get-stream": "^3.0.0",
+            "get-stream": "^4.0.0",
             "is-stream": "^1.1.0",
             "npm-run-path": "^2.0.0",
             "p-finally": "^1.0.0",
@@ -292,6 +293,15 @@
             "locate-path": "^3.0.0"
           }
         },
+        "get-stream": {
+          "version": "4.1.0",
+          "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz",
+          "integrity": "sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==",
+          "dev": true,
+          "requires": {
+            "pump": "^3.0.0"
+          }
+        },
         "invert-kv": {
           "version": "2.0.0",
           "resolved": "https://registry.npmjs.org/invert-kv/-/invert-kv-2.0.0.tgz",
@@ -335,20 +345,20 @@
           }
         },
         "os-locale": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/os-locale/-/os-locale-3.0.1.tgz",
-          "integrity": "sha512-7g5e7dmXPtzcP4bgsZ8ixDVqA7oWYuEz4lOSujeWyliPai4gfVDiFIcwBg3aGCPnmSGfzOKTK3ccPn0CKv3DBw==",
+          "version": "3.1.0",
+          "resolved": "https://registry.npmjs.org/os-locale/-/os-locale-3.1.0.tgz",
+          "integrity": "sha512-Z8l3R4wYWM40/52Z+S265okfFj8Kt2cC2MKY+xNi3kFs+XGI7WXu/I309QQQYbRW4ijiZ+yxs9pqEhJh0DqW3Q==",
           "dev": true,
           "requires": {
-            "execa": "^0.10.0",
+            "execa": "^1.0.0",
             "lcid": "^2.0.0",
             "mem": "^4.0.0"
           }
         },
         "p-limit": {
-          "version": "2.0.0",
-          "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.0.0.tgz",
-          "integrity": "sha512-fl5s52lI5ahKCernzzIyAP0QAZbGIovtVHGwpcu1Jr/EpzLVDI2myISHwGqK7m8uQFugVWSrbxH7XnhGtvEc+A==",
+          "version": "2.1.0",
+          "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.1.0.tgz",
+          "integrity": "sha512-NhURkNcrVB+8hNfLuysU8enY5xn2KXphsHBaC2YmRNTZRc7RWusw6apSpdEj3jo4CMb6W9nrF6tTnsJsJeyu6g==",
           "dev": true,
           "requires": {
             "p-try": "^2.0.0"
@@ -369,6 +379,22 @@
           "integrity": "sha512-hMp0onDKIajHfIkdRk3P4CdCmErkYAxxDtP3Wx/4nZ3aGlau2VKh3mZpcuFkH27WQkL/3WBCPOktzA9ZOAnMQQ==",
           "dev": true
         },
+        "path-exists": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz",
+          "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=",
+          "dev": true
+        },
+        "pump": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
+          "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
+          "dev": true,
+          "requires": {
+            "end-of-stream": "^1.1.0",
+            "once": "^1.3.1"
+          }
+        },
         "string-width": {
           "version": "2.1.1",
           "resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz",
@@ -388,14 +414,20 @@
             "ansi-regex": "^3.0.0"
           }
         },
+        "which-module": {
+          "version": "2.0.0",
+          "resolved": "https://registry.npmjs.org/which-module/-/which-module-2.0.0.tgz",
+          "integrity": "sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho=",
+          "dev": true
+        },
         "yargs": {
-          "version": "12.0.2",
-          "resolved": "https://registry.npmjs.org/yargs/-/yargs-12.0.2.tgz",
-          "integrity": "sha512-e7SkEx6N6SIZ5c5H22RTZae61qtn3PYUE8JYbBFlK9sYmh3DMQ6E5ygtaG/2BW0JZi4WGgTR2IV5ChqlqrDGVQ==",
+          "version": "12.0.5",
+          "resolved": "https://registry.npmjs.org/yargs/-/yargs-12.0.5.tgz",
+          "integrity": "sha512-Lhz8TLaYnxq/2ObqHDql8dX8CJi97oHxrjUcYtzKbbykPtVW9WB+poxI+NM2UIzsMgNCZTIf0AQwsjK5yMAqZw==",
           "dev": true,
           "requires": {
             "cliui": "^4.0.0",
-            "decamelize": "^2.0.0",
+            "decamelize": "^1.2.0",
             "find-up": "^3.0.0",
             "get-caller-file": "^1.0.1",
             "os-locale": "^3.0.0",
@@ -405,49 +437,50 @@
             "string-width": "^2.0.0",
             "which-module": "^2.0.0",
             "y18n": "^3.2.1 || ^4.0.0",
-            "yargs-parser": "^10.1.0"
+            "yargs-parser": "^11.1.1"
           }
         },
         "yargs-parser": {
-          "version": "10.1.0",
-          "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-10.1.0.tgz",
-          "integrity": "sha512-VCIyR1wJoEBZUqk5PA+oOBF6ypbwh5aNB3I50guxAL/quggdfs4TtNHQrSazFA3fYZ+tEqfs0zIGlv0c/rgjbQ==",
+          "version": "11.1.1",
+          "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-11.1.1.tgz",
+          "integrity": "sha512-C6kB/WJDiaxONLJQnF8ccx9SEeoTTLek8RVbaOIsrAUS8VrBEXfmeSnCZxygc+XC2sNMBIwOOnfcxiynjHsVSQ==",
           "dev": true,
           "requires": {
-            "camelcase": "^4.1.0"
+            "camelcase": "^5.0.0",
+            "decamelize": "^1.2.0"
           }
         }
       }
     },
     "@lerna/collect-updates": {
-      "version": "3.3.2",
-      "resolved": "https://registry.npmjs.org/@lerna/collect-updates/-/collect-updates-3.3.2.tgz",
-      "integrity": "sha512-9WyBJI2S5sYgEZEScu525Lbi6nknNrdBKop35sCDIC9y6AIGvH6Dr5tkTd+Kg3n1dE+kHwW/xjERkx3+h7th3w==",
+      "version": "3.10.1",
+      "resolved": "https://registry.npmjs.org/@lerna/collect-updates/-/collect-updates-3.10.1.tgz",
+      "integrity": "sha512-vb0wEJ8k63G+2CR/ud1WeVHNJ21Fs6Ew6lbdGZXnF4ZvaFWxWJZpoHeWwzjhMdJ75QdTzUaIhTG1hnH9faQNMw==",
       "dev": true,
       "requires": {
-        "@lerna/child-process": "^3.3.0",
-        "@lerna/describe-ref": "^3.3.0",
+        "@lerna/child-process": "3.3.0",
+        "@lerna/describe-ref": "3.10.0",
+        "libnpm": "^2.0.1",
         "minimatch": "^3.0.4",
-        "npmlog": "^4.1.2",
         "slash": "^1.0.0"
       }
     },
     "@lerna/command": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/@lerna/command/-/command-3.3.0.tgz",
-      "integrity": "sha512-NTOkLEKlWcBLHSvUr9tzVpV7RJ4GROLeOuZ6RfztGOW/31JPSwVVBD2kPifEXNZunldOx5GVWukR+7+NpAWhsg==",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/command/-/command-3.10.6.tgz",
+      "integrity": "sha512-jPZswMZXOpAaIuSF5hrz+eaWQzbDrvwbrkCoRJKfiAHx7URAkE6MQe9DeAnqrTKMqwfg0RciSrZLc8kWYfrzCQ==",
       "dev": true,
       "requires": {
-        "@lerna/child-process": "^3.3.0",
-        "@lerna/package-graph": "^3.1.2",
-        "@lerna/project": "^3.0.0",
-        "@lerna/validation-error": "^3.0.0",
-        "@lerna/write-log-file": "^3.0.0",
+        "@lerna/child-process": "3.3.0",
+        "@lerna/package-graph": "3.10.6",
+        "@lerna/project": "3.10.0",
+        "@lerna/validation-error": "3.6.0",
+        "@lerna/write-log-file": "3.6.0",
         "dedent": "^0.7.0",
         "execa": "^1.0.0",
         "is-ci": "^1.0.10",
-        "lodash": "^4.17.5",
-        "npmlog": "^4.1.2"
+        "libnpm": "^2.0.1",
+        "lodash": "^4.17.5"
       },
       "dependencies": {
         "cross-spawn": {
@@ -486,23 +519,32 @@
           "requires": {
             "pump": "^3.0.0"
           }
+        },
+        "pump": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
+          "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
+          "dev": true,
+          "requires": {
+            "end-of-stream": "^1.1.0",
+            "once": "^1.3.1"
+          }
         }
       }
     },
     "@lerna/conventional-commits": {
-      "version": "3.4.1",
-      "resolved": "https://registry.npmjs.org/@lerna/conventional-commits/-/conventional-commits-3.4.1.tgz",
-      "integrity": "sha512-3NETrA58aUkaEW3RdwdJ766Bg9NVpLzb26mtdlsJQcvB5sQBWH5dJSHIVQH1QsGloBeH2pE/mDUEVY8ZJXuR4w==",
+      "version": "3.10.0",
+      "resolved": "https://registry.npmjs.org/@lerna/conventional-commits/-/conventional-commits-3.10.0.tgz",
+      "integrity": "sha512-8FvO0eR8g/tEgkb6eRVYaD39TsqMKsOXp17EV48jciciEqcrF/d1Ypu6ilK1GDp6R/1m2mbjt/b52a/qrO+xaw==",
       "dev": true,
       "requires": {
-        "@lerna/validation-error": "^3.0.0",
-        "conventional-changelog-angular": "^5.0.1",
-        "conventional-changelog-core": "^3.1.0",
-        "conventional-recommended-bump": "^4.0.1",
+        "@lerna/validation-error": "3.6.0",
+        "conventional-changelog-angular": "^5.0.2",
+        "conventional-changelog-core": "^3.1.5",
+        "conventional-recommended-bump": "^4.0.4",
         "fs-extra": "^7.0.0",
         "get-stream": "^4.0.0",
-        "npm-package-arg": "^6.0.0",
-        "npmlog": "^4.1.2",
+        "libnpm": "^2.0.1",
         "semver": "^5.5.0"
       },
       "dependencies": {
@@ -514,25 +556,36 @@
           "requires": {
             "pump": "^3.0.0"
           }
+        },
+        "pump": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
+          "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
+          "dev": true,
+          "requires": {
+            "end-of-stream": "^1.1.0",
+            "once": "^1.3.1"
+          }
         }
       }
     },
     "@lerna/create": {
-      "version": "3.4.1",
-      "resolved": "https://registry.npmjs.org/@lerna/create/-/create-3.4.1.tgz",
-      "integrity": "sha512-l+4t2SRO5nvW0MNYY+EWxbaMHsAN8bkWH3nyt7EzhBjs4+TlRAJRIEqd8o9NWznheE3pzwczFz1Qfl3BWbyM5A==",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/create/-/create-3.10.6.tgz",
+      "integrity": "sha512-OddQtGBHM2/eJONggLWoTE6275XGbnJ6dIVF+fLsKS93o4GC6g+qcc6Y7lUWHm5bfpeOwNOVKwj0tvqBZ6MgoA==",
       "dev": true,
       "requires": {
-        "@lerna/child-process": "^3.3.0",
-        "@lerna/command": "^3.3.0",
-        "@lerna/npm-conf": "^3.4.1",
-        "@lerna/validation-error": "^3.0.0",
+        "@lerna/child-process": "3.3.0",
+        "@lerna/command": "3.10.6",
+        "@lerna/npm-conf": "3.7.0",
+        "@lerna/validation-error": "3.6.0",
         "camelcase": "^4.1.0",
         "dedent": "^0.7.0",
         "fs-extra": "^7.0.0",
         "globby": "^8.0.1",
         "init-package-json": "^1.10.3",
-        "npm-package-arg": "^6.0.0",
+        "libnpm": "^2.0.1",
+        "p-reduce": "^1.0.0",
         "pify": "^3.0.0",
         "semver": "^5.5.0",
         "slash": "^1.0.0",
@@ -548,13 +601,13 @@
           "dev": true
         },
         "globby": {
-          "version": "8.0.1",
-          "resolved": "https://registry.npmjs.org/globby/-/globby-8.0.1.tgz",
-          "integrity": "sha512-oMrYrJERnKBLXNLVTqhm3vPEdJ/b2ZE28xN4YARiix1NOIOBPEpOUnm844K1iu/BkphCaf2WNFwMszv8Soi1pw==",
+          "version": "8.0.2",
+          "resolved": "https://registry.npmjs.org/globby/-/globby-8.0.2.tgz",
+          "integrity": "sha512-yTzMmKygLp8RUpG1Ymu2VXPSJQZjNAZPD4ywgYEaG7e4tBJeUQBO8OpXrf1RCNcEs5alsoJYPAMiIHP0cmeC7w==",
           "dev": true,
           "requires": {
             "array-union": "^1.0.1",
-            "dir-glob": "^2.0.0",
+            "dir-glob": "2.0.0",
             "fast-glob": "^2.0.2",
             "glob": "^7.1.2",
             "ignore": "^3.3.5",
@@ -576,178 +629,214 @@
       }
     },
     "@lerna/create-symlink": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/@lerna/create-symlink/-/create-symlink-3.3.0.tgz",
-      "integrity": "sha512-0lb88Nnq1c/GG+fwybuReOnw3+ah4dB81PuWwWwuqUNPE0n50qUf/M/7FfSb5JEh/93fcdbZI0La8t3iysNW1w==",
+      "version": "3.6.0",
+      "resolved": "https://registry.npmjs.org/@lerna/create-symlink/-/create-symlink-3.6.0.tgz",
+      "integrity": "sha512-YG3lTb6zylvmGqKU+QYA3ylSnoLn+FyLH5XZmUsD0i85R884+EyJJeHx/zUk+yrL2ZwHS4RBUgJfC24fqzgPoA==",
       "dev": true,
       "requires": {
         "cmd-shim": "^2.0.2",
         "fs-extra": "^7.0.0",
-        "npmlog": "^4.1.2"
+        "libnpm": "^2.0.1"
       }
     },
     "@lerna/describe-ref": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/@lerna/describe-ref/-/describe-ref-3.3.0.tgz",
-      "integrity": "sha512-4t7M4OupnYMSPNLrLUau8qkS+dgLEi4w+DkRkV0+A+KNYga1W0jVgNLPIIsxta7OHfodPkCNAqZCzNCw/dmAwA==",
+      "version": "3.10.0",
+      "resolved": "https://registry.npmjs.org/@lerna/describe-ref/-/describe-ref-3.10.0.tgz",
+      "integrity": "sha512-fouh3FQS07QxJJp/mW8LkGnH0xMRAzpBlejtZaiRwfDkW2kd6EuHaj8I/2/p21Wsprcvuu4dqmyia2YS1xFb/w==",
       "dev": true,
       "requires": {
-        "@lerna/child-process": "^3.3.0",
-        "npmlog": "^4.1.2"
+        "@lerna/child-process": "3.3.0",
+        "libnpm": "^2.0.1"
       }
     },
     "@lerna/diff": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/@lerna/diff/-/diff-3.3.0.tgz",
-      "integrity": "sha512-sIoMjsm3NVxvmt6ofx8Uu/2fxgldQqLl0zmC9X1xW00j831o5hBffx1EoKj9CnmaEvoSP6j/KFjxy2RWjebCIg==",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/diff/-/diff-3.10.6.tgz",
+      "integrity": "sha512-0MqFhosjrqsIdXiKIu7t3CiJELqiU9mkjFBhYPB7JruAzpPwjMXJnC6/Ur5/7LXJYYVpqGQwZI9ZaZlOYJhhrw==",
       "dev": true,
       "requires": {
-        "@lerna/child-process": "^3.3.0",
-        "@lerna/command": "^3.3.0",
-        "@lerna/validation-error": "^3.0.0",
-        "npmlog": "^4.1.2"
+        "@lerna/child-process": "3.3.0",
+        "@lerna/command": "3.10.6",
+        "@lerna/validation-error": "3.6.0",
+        "libnpm": "^2.0.1"
       }
     },
     "@lerna/exec": {
-      "version": "3.3.2",
-      "resolved": "https://registry.npmjs.org/@lerna/exec/-/exec-3.3.2.tgz",
-      "integrity": "sha512-mN6vGxNir7JOGvWLwKr3DW3LNy1ecCo2ziZj5rO9Mw5Rew3carUu1XLmhF/4judtsvXViUY+rvGIcqHe0vvb+w==",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/exec/-/exec-3.10.6.tgz",
+      "integrity": "sha512-cdHqaRBMYceJu8rZLO8b4ZeR27O+xKPHgzi13OOOfBJQjrTuacjMWyHgmpy8jWc/0f7QnTl4VsHks7VJ3UK+vw==",
       "dev": true,
       "requires": {
-        "@lerna/batch-packages": "^3.1.2",
-        "@lerna/child-process": "^3.3.0",
-        "@lerna/command": "^3.3.0",
-        "@lerna/filter-options": "^3.3.2",
-        "@lerna/run-parallel-batches": "^3.0.0",
-        "@lerna/validation-error": "^3.0.0"
+        "@lerna/batch-packages": "3.10.6",
+        "@lerna/child-process": "3.3.0",
+        "@lerna/command": "3.10.6",
+        "@lerna/filter-options": "3.10.6",
+        "@lerna/run-parallel-batches": "3.0.0",
+        "@lerna/validation-error": "3.6.0"
       }
     },
     "@lerna/filter-options": {
-      "version": "3.3.2",
-      "resolved": "https://registry.npmjs.org/@lerna/filter-options/-/filter-options-3.3.2.tgz",
-      "integrity": "sha512-0WHqdDgAnt5WKoByi1q+lFw8HWt5tEKP2DnLlGqWv3YFwVF5DsPRlO7xbzjY9sJgvyJtZcnkMtccdBPFhGGyIQ==",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/filter-options/-/filter-options-3.10.6.tgz",
+      "integrity": "sha512-r/dQbqN+RGFKZNn+DyWehswFmAkny/fkdMB2sRM2YVe7zRTtSl95YxD9DtdYnpJTG/jbOVICS/L5QJakrI6SSw==",
       "dev": true,
       "requires": {
-        "@lerna/collect-updates": "^3.3.2",
-        "@lerna/filter-packages": "^3.0.0",
+        "@lerna/collect-updates": "3.10.1",
+        "@lerna/filter-packages": "3.10.0",
         "dedent": "^0.7.0"
       }
     },
     "@lerna/filter-packages": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/@lerna/filter-packages/-/filter-packages-3.0.0.tgz",
-      "integrity": "sha512-zwbY1J4uRjWRZ/FgYbtVkq7I3Nduwsg2V2HwLKSzwV2vPglfGqgovYOVkND6/xqe2BHwDX4IyA2+e7OJmLaLSA==",
+      "version": "3.10.0",
+      "resolved": "https://registry.npmjs.org/@lerna/filter-packages/-/filter-packages-3.10.0.tgz",
+      "integrity": "sha512-3Acdj+jbany6LnQSuImU4ttcK5ULHSVug8Gh/EvwTewKCDpHAuoI3eyuzZOnSBdMvDOjE03uIESQK0dNNsn6Ow==",
       "dev": true,
       "requires": {
-        "@lerna/validation-error": "^3.0.0",
-        "multimatch": "^2.1.0",
-        "npmlog": "^4.1.2"
+        "@lerna/validation-error": "3.6.0",
+        "libnpm": "^2.0.1",
+        "multimatch": "^2.1.0"
       }
     },
     "@lerna/get-npm-exec-opts": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/@lerna/get-npm-exec-opts/-/get-npm-exec-opts-3.0.0.tgz",
-      "integrity": "sha512-arcYUm+4xS8J3Palhl+5rRJXnZnFHsLFKHBxznkPIxjwGQeAEw7df38uHdVjEQ+HNeFmHnBgSqfbxl1VIw5DHg==",
+      "version": "3.6.0",
+      "resolved": "https://registry.npmjs.org/@lerna/get-npm-exec-opts/-/get-npm-exec-opts-3.6.0.tgz",
+      "integrity": "sha512-ruH6KuLlt75aCObXfUIdVJqmfVq7sgWGq5mXa05vc1MEqxTIiU23YiJdWzofQOOUOACaZkzZ4K4Nu7wXEg4Xgg==",
+      "dev": true,
+      "requires": {
+        "libnpm": "^2.0.1"
+      }
+    },
+    "@lerna/get-packed": {
+      "version": "3.7.0",
+      "resolved": "https://registry.npmjs.org/@lerna/get-packed/-/get-packed-3.7.0.tgz",
+      "integrity": "sha512-yuFtjsUZIHjeIvIYQ/QuytC+FQcHwo3peB+yGBST2uWCLUCR5rx6knoQcPzbxdFDCuUb5IFccFGd3B1fHFg3RQ==",
       "dev": true,
       "requires": {
-        "npmlog": "^4.1.2"
+        "fs-extra": "^7.0.0",
+        "ssri": "^6.0.1",
+        "tar": "^4.4.8"
+      },
+      "dependencies": {
+        "tar": {
+          "version": "4.4.8",
+          "resolved": "https://registry.npmjs.org/tar/-/tar-4.4.8.tgz",
+          "integrity": "sha512-LzHF64s5chPQQS0IYBn9IN5h3i98c12bo4NCO7e0sGM2llXQ3p2FGC5sdENN4cTW48O915Sh+x+EXx7XW96xYQ==",
+          "dev": true,
+          "requires": {
+            "chownr": "^1.1.1",
+            "fs-minipass": "^1.2.5",
+            "minipass": "^2.3.4",
+            "minizlib": "^1.1.1",
+            "mkdirp": "^0.5.0",
+            "safe-buffer": "^5.1.2",
+            "yallist": "^3.0.2"
+          }
+        },
+        "yallist": {
+          "version": "3.0.3",
+          "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.0.3.tgz",
+          "integrity": "sha512-S+Zk8DEWE6oKpV+vI3qWkaK+jSbIK86pCwe2IF/xwIpQ8jEuxpw9NyaGjmp9+BoJv5FV2piqCDcoCtStppiq2A==",
+          "dev": true
+        }
       }
     },
     "@lerna/global-options": {
-      "version": "3.1.3",
-      "resolved": "https://registry.npmjs.org/@lerna/global-options/-/global-options-3.1.3.tgz",
-      "integrity": "sha512-LVeZU/Zgc0XkHdGMRYn+EmHfDmmYNwYRv3ta59iCVFXLVp7FRFWF7oB1ss/WRa9x/pYU0o6L8as/5DomLUGASA==",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/global-options/-/global-options-3.10.6.tgz",
+      "integrity": "sha512-k5Xkq1M/uREFC2R9uwN5gcvIgjj4iOXo0YyeEXCMWBiW3j2GL9xN4d1MmAIcrYlAzVYh6kLlWaFWl/rNIneHIw==",
       "dev": true
     },
     "@lerna/has-npm-version": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/@lerna/has-npm-version/-/has-npm-version-3.3.0.tgz",
-      "integrity": "sha512-GX7omRep1eBRZHgjZLRw3MpBJSdA5gPZFz95P7rxhpvsiG384Tdrr/cKFMhm0A09yq27Tk/nuYTaZIj7HsVE6g==",
+      "version": "3.10.0",
+      "resolved": "https://registry.npmjs.org/@lerna/has-npm-version/-/has-npm-version-3.10.0.tgz",
+      "integrity": "sha512-N4RRYxGeivuaKgPDzrhkQOQs1Sg4tOnxnEe3akfqu1wDA4Ng5V6Y2uW3DbkAjFL3aNJhWF5Vbf7sBsGtfgDQ8w==",
       "dev": true,
       "requires": {
-        "@lerna/child-process": "^3.3.0",
+        "@lerna/child-process": "3.3.0",
         "semver": "^5.5.0"
       }
     },
     "@lerna/import": {
-      "version": "3.3.1",
-      "resolved": "https://registry.npmjs.org/@lerna/import/-/import-3.3.1.tgz",
-      "integrity": "sha512-2OzTQDkYKbBPpyP2iOI1sWfcvMjNLjjHjmREq/uOWJaSIk5J3Ukt71OPpcOHh4V2CBOlXidCcO+Hyb4FVIy8fw==",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/import/-/import-3.10.6.tgz",
+      "integrity": "sha512-LlGxhfDhovoNoBJLF3PYd3j/G2GFTnfLh0V38+hBQ6lomMNJbjkACfiLVomQxPWWpYLk0GTlpWYR8YGv6L7Ifw==",
       "dev": true,
       "requires": {
-        "@lerna/child-process": "^3.3.0",
-        "@lerna/command": "^3.3.0",
-        "@lerna/prompt": "^3.3.1",
-        "@lerna/validation-error": "^3.0.0",
+        "@lerna/child-process": "3.3.0",
+        "@lerna/command": "3.10.6",
+        "@lerna/prompt": "3.6.0",
+        "@lerna/pulse-till-done": "3.7.1",
+        "@lerna/validation-error": "3.6.0",
         "dedent": "^0.7.0",
         "fs-extra": "^7.0.0",
         "p-map-series": "^1.0.0"
       }
     },
     "@lerna/init": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/@lerna/init/-/init-3.3.0.tgz",
-      "integrity": "sha512-HvgRLkIG6nDIeAO6ix5sUVIVV+W9UMk2rSSmFT66CDOefRi7S028amiyYnFUK1QkIAaUbVUyOnYaErtbJwICuw==",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/init/-/init-3.10.6.tgz",
+      "integrity": "sha512-RIlEx+ofWLYRNjxCkkV3G0XQPM+/KA5RXRDb5wKQLYO1f+tZAaHoUh8fHDIvxGf/ohY/OIjYYGSsU+ysimfwiQ==",
       "dev": true,
       "requires": {
-        "@lerna/child-process": "^3.3.0",
-        "@lerna/command": "^3.3.0",
+        "@lerna/child-process": "3.3.0",
+        "@lerna/command": "3.10.6",
         "fs-extra": "^7.0.0",
         "p-map": "^1.2.0",
         "write-json-file": "^2.3.0"
       }
     },
     "@lerna/link": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/@lerna/link/-/link-3.3.0.tgz",
-      "integrity": "sha512-8CeXzGL7okrsVXsy2sHXI2KuBaczw3cblAnA2+FJPUqSKMPNbUTRzeU3bOlCjYtK0LbxC4ngENJTL3jJ8RaYQQ==",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/link/-/link-3.10.6.tgz",
+      "integrity": "sha512-dwD6qftRWitgLDYbqtDrgO7c8uF5C0fHVew5M6gU5m9tBJidqd7cDwHv/bXboLEI63U7tt5y6LY+wEpYUFsBRw==",
       "dev": true,
       "requires": {
-        "@lerna/command": "^3.3.0",
-        "@lerna/package-graph": "^3.1.2",
-        "@lerna/symlink-dependencies": "^3.3.0",
+        "@lerna/command": "3.10.6",
+        "@lerna/package-graph": "3.10.6",
+        "@lerna/symlink-dependencies": "3.10.0",
         "p-map": "^1.2.0",
         "slash": "^1.0.0"
       }
     },
     "@lerna/list": {
-      "version": "3.3.2",
-      "resolved": "https://registry.npmjs.org/@lerna/list/-/list-3.3.2.tgz",
-      "integrity": "sha512-XXEVy7w+i/xx8NeJmGirw4upEoEF9OfD6XPLjISNQc24VgQV+frXdVJ02QcP7Y/PkY1rdIVrOjvo3ipKVLUxaQ==",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/list/-/list-3.10.6.tgz",
+      "integrity": "sha512-3ElQBj2dOB4uUkpsjC1bxdeZwEzRBuV1pBBs5E1LncwsZf7D9D99Z32fuZsDaCHpEMgHAD4/j8juI3/7m5dkaQ==",
       "dev": true,
       "requires": {
-        "@lerna/command": "^3.3.0",
-        "@lerna/filter-options": "^3.3.2",
-        "@lerna/listable": "^3.0.0",
-        "@lerna/output": "^3.0.0"
+        "@lerna/command": "3.10.6",
+        "@lerna/filter-options": "3.10.6",
+        "@lerna/listable": "3.10.6",
+        "@lerna/output": "3.6.0"
       }
     },
     "@lerna/listable": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/@lerna/listable/-/listable-3.0.0.tgz",
-      "integrity": "sha512-HX/9hyx1HLg2kpiKXIUc1EimlkK1T58aKQ7ovO7rQdTx9ForpefoMzyLnHE1n4XrUtEszcSWJIICJ/F898M6Ag==",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/listable/-/listable-3.10.6.tgz",
+      "integrity": "sha512-F7ZuvesSgeuMiJf99eOum5p1MQGQStykcmHH1ek+LQRMiGGF1o3PkBxPvHTZBADGOFarek8bFA5TVmRAMX7NIw==",
       "dev": true,
       "requires": {
+        "@lerna/batch-packages": "3.10.6",
         "chalk": "^2.3.1",
         "columnify": "^1.5.4"
       }
     },
     "@lerna/log-packed": {
-      "version": "3.0.4",
-      "resolved": "https://registry.npmjs.org/@lerna/log-packed/-/log-packed-3.0.4.tgz",
-      "integrity": "sha512-vVQHgMagE2wnbxhNY9nFkdu+Cx2TsyWalkJfkxbNzmo6gOCrDsxCBDj9vTEV8Q+4aWx0C0Bsc0sB2Eb8y/+ofA==",
+      "version": "3.6.0",
+      "resolved": "https://registry.npmjs.org/@lerna/log-packed/-/log-packed-3.6.0.tgz",
+      "integrity": "sha512-T/J41zMkzpWB5nbiTRS5PmYTFn74mJXe6RQA2qhkdLi0UqnTp97Pux1loz3jsJf2yJtiQUnyMM7KuKIAge0Vlw==",
       "dev": true,
       "requires": {
         "byte-size": "^4.0.3",
         "columnify": "^1.5.4",
         "has-unicode": "^2.0.1",
-        "npmlog": "^4.1.2"
+        "libnpm": "^2.0.1"
       }
     },
     "@lerna/npm-conf": {
-      "version": "3.4.1",
-      "resolved": "https://registry.npmjs.org/@lerna/npm-conf/-/npm-conf-3.4.1.tgz",
-      "integrity": "sha512-i9G6DnbCqiAqxKx2rSXej/n14qxlV/XOebL6QZonxJKzNTB+Q2wglnhTXmfZXTPJfoqimLaY4NfAEtbOXRWOXQ==",
+      "version": "3.7.0",
+      "resolved": "https://registry.npmjs.org/@lerna/npm-conf/-/npm-conf-3.7.0.tgz",
+      "integrity": "sha512-+WSMDfPKcKzMfqq283ydz9RRpOU6p9wfx0wy4hVSUY/6YUpsyuk8SShjcRtY8zTM5AOrxvFBuuV90H4YpZ5+Ng==",
       "dev": true,
       "requires": {
         "config-chain": "^1.1.11",
@@ -755,125 +844,180 @@
       }
     },
     "@lerna/npm-dist-tag": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/@lerna/npm-dist-tag/-/npm-dist-tag-3.3.0.tgz",
-      "integrity": "sha512-EtZJXzh3w5tqXEev+EBBPrWKWWn0WgJfxm4FihfS9VgyaAW8udIVZHGkIQ3f+tBtupcAzA9Q8cQNUkGF2efwmA==",
+      "version": "3.8.5",
+      "resolved": "https://registry.npmjs.org/@lerna/npm-dist-tag/-/npm-dist-tag-3.8.5.tgz",
+      "integrity": "sha512-VO57yKTB4NC2LZuTd4w0LmlRpoFm/gejQ1gqqLGzSJuSZaBXmieElFovzl21S07cqiy7FNVdz75x7/a6WCZ6XA==",
       "dev": true,
       "requires": {
-        "@lerna/child-process": "^3.3.0",
-        "@lerna/get-npm-exec-opts": "^3.0.0",
-        "npmlog": "^4.1.2"
+        "figgy-pudding": "^3.5.1",
+        "libnpm": "^2.0.1"
       }
     },
     "@lerna/npm-install": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/@lerna/npm-install/-/npm-install-3.3.0.tgz",
-      "integrity": "sha512-WoVvKdS8ltROTGSNQwo6NDq0YKnjwhvTG4li1okcN/eHKOS3tL9bxbgPx7No0wOq5DKBpdeS9KhAfee6LFAZ5g==",
+      "version": "3.10.0",
+      "resolved": "https://registry.npmjs.org/@lerna/npm-install/-/npm-install-3.10.0.tgz",
+      "integrity": "sha512-/6/XyLY9/4jaMPBOVYUr4wZxQURIfwoELY0qCQ8gZ5zv4cOiFiiCUxZ0i4fxqFtD7nJ084zq1DsZW0aH0CIWYw==",
       "dev": true,
       "requires": {
-        "@lerna/child-process": "^3.3.0",
-        "@lerna/get-npm-exec-opts": "^3.0.0",
+        "@lerna/child-process": "3.3.0",
+        "@lerna/get-npm-exec-opts": "3.6.0",
         "fs-extra": "^7.0.0",
-        "npm-package-arg": "^6.0.0",
-        "npmlog": "^4.1.2",
+        "libnpm": "^2.0.1",
         "signal-exit": "^3.0.2",
         "write-pkg": "^3.1.0"
       }
     },
     "@lerna/npm-publish": {
-      "version": "3.3.1",
-      "resolved": "https://registry.npmjs.org/@lerna/npm-publish/-/npm-publish-3.3.1.tgz",
-      "integrity": "sha512-bVTlWIcBL6Zpyzqvr9C7rxXYcoPw+l7IPz5eqQDNREj1R39Wj18OWB2KTJq8l7LIX7Wf4C2A1uT5hJaEf9BuvA==",
+      "version": "3.10.7",
+      "resolved": "https://registry.npmjs.org/@lerna/npm-publish/-/npm-publish-3.10.7.tgz",
+      "integrity": "sha512-oU3/Q+eHC1fRjh7bk6Nn4tRD1OLR6XZVs3v+UWMWMrF4hVSV61pxcP5tpeI1n4gDQjSgh7seI4EzKVJe/WfraA==",
       "dev": true,
       "requires": {
-        "@lerna/child-process": "^3.3.0",
-        "@lerna/get-npm-exec-opts": "^3.0.0",
-        "@lerna/has-npm-version": "^3.3.0",
-        "@lerna/log-packed": "^3.0.4",
+        "@lerna/run-lifecycle": "3.10.5",
+        "figgy-pudding": "^3.5.1",
         "fs-extra": "^7.0.0",
-        "npmlog": "^4.1.2",
-        "p-map": "^1.2.0"
+        "libnpm": "^2.0.1"
       }
     },
     "@lerna/npm-run-script": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/@lerna/npm-run-script/-/npm-run-script-3.3.0.tgz",
-      "integrity": "sha512-YqDguWZzp4jIomaE4aWMUP7MIAJAFvRAf6ziQLpqwoQskfWLqK5mW0CcszT1oLjhfb3cY3MMfSTFaqwbdKmICg==",
+      "version": "3.10.0",
+      "resolved": "https://registry.npmjs.org/@lerna/npm-run-script/-/npm-run-script-3.10.0.tgz",
+      "integrity": "sha512-c21tBXLF1Wje4tx/Td9jKIMrlZo/8QQiyyadjdKpwyyo7orSMsVNXGyJwvZ4JVVDcwC3GPU6HQvkt63v7rcyaw==",
       "dev": true,
       "requires": {
-        "@lerna/child-process": "^3.3.0",
-        "@lerna/get-npm-exec-opts": "^3.0.0",
-        "npmlog": "^4.1.2"
+        "@lerna/child-process": "3.3.0",
+        "@lerna/get-npm-exec-opts": "3.6.0",
+        "libnpm": "^2.0.1"
       }
     },
     "@lerna/output": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/@lerna/output/-/output-3.0.0.tgz",
-      "integrity": "sha512-EFxnSbO0zDEVKkTKpoCUAFcZjc3gn3DwPlyTDxbeqPU7neCfxP4rA4+0a6pcOfTlRS5kLBRMx79F2TRCaMM3DA==",
+      "version": "3.6.0",
+      "resolved": "https://registry.npmjs.org/@lerna/output/-/output-3.6.0.tgz",
+      "integrity": "sha512-9sjQouf6p7VQtVCRnzoTGlZyURd48i3ha3WBHC/UBJnHZFuXMqWVPKNuvnMf2kRXDyoQD+2mNywpmEJg5jOnRg==",
+      "dev": true,
+      "requires": {
+        "libnpm": "^2.0.1"
+      }
+    },
+    "@lerna/pack-directory": {
+      "version": "3.10.5",
+      "resolved": "https://registry.npmjs.org/@lerna/pack-directory/-/pack-directory-3.10.5.tgz",
+      "integrity": "sha512-Ulj24L9XdgjJIxBr6ZjRJEoBULVH3c10lqunUdW41bswXhzhirRtQIxv0+5shngNjDwgMmJfOBcuCVKPSez4tg==",
       "dev": true,
       "requires": {
-        "npmlog": "^4.1.2"
+        "@lerna/get-packed": "3.7.0",
+        "@lerna/package": "3.7.2",
+        "@lerna/run-lifecycle": "3.10.5",
+        "figgy-pudding": "^3.5.1",
+        "libnpm": "^2.0.1",
+        "npm-packlist": "^1.1.12",
+        "tar": "^4.4.8",
+        "temp-write": "^3.4.0"
+      },
+      "dependencies": {
+        "tar": {
+          "version": "4.4.8",
+          "resolved": "https://registry.npmjs.org/tar/-/tar-4.4.8.tgz",
+          "integrity": "sha512-LzHF64s5chPQQS0IYBn9IN5h3i98c12bo4NCO7e0sGM2llXQ3p2FGC5sdENN4cTW48O915Sh+x+EXx7XW96xYQ==",
+          "dev": true,
+          "requires": {
+            "chownr": "^1.1.1",
+            "fs-minipass": "^1.2.5",
+            "minipass": "^2.3.4",
+            "minizlib": "^1.1.1",
+            "mkdirp": "^0.5.0",
+            "safe-buffer": "^5.1.2",
+            "yallist": "^3.0.2"
+          }
+        },
+        "yallist": {
+          "version": "3.0.3",
+          "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.0.3.tgz",
+          "integrity": "sha512-S+Zk8DEWE6oKpV+vI3qWkaK+jSbIK86pCwe2IF/xwIpQ8jEuxpw9NyaGjmp9+BoJv5FV2piqCDcoCtStppiq2A==",
+          "dev": true
+        }
       }
     },
     "@lerna/package": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/@lerna/package/-/package-3.0.0.tgz",
-      "integrity": "sha512-djzEJxzn212wS8d9znBnlXkeRlPL7GqeAYBykAmsuq51YGvaQK67Umh5ejdO0uxexF/4r7yRwgrlRHpQs8Rfqg==",
+      "version": "3.7.2",
+      "resolved": "https://registry.npmjs.org/@lerna/package/-/package-3.7.2.tgz",
+      "integrity": "sha512-8A5hN2CekM1a0Ix4VUO/g+REo+MsnXb8lnQ0bGjr1YGWzSL5NxYJ0Z9+0pwTfDpvRDYlFYO0rMVwBUW44b4dUw==",
       "dev": true,
       "requires": {
-        "npm-package-arg": "^6.0.0",
+        "libnpm": "^2.0.1",
+        "load-json-file": "^4.0.0",
         "write-pkg": "^3.1.0"
+      },
+      "dependencies": {
+        "load-json-file": {
+          "version": "4.0.0",
+          "resolved": "https://registry.npmjs.org/load-json-file/-/load-json-file-4.0.0.tgz",
+          "integrity": "sha1-L19Fq5HjMhYjT9U62rZo607AmTs=",
+          "dev": true,
+          "requires": {
+            "graceful-fs": "^4.1.2",
+            "parse-json": "^4.0.0",
+            "pify": "^3.0.0",
+            "strip-bom": "^3.0.0"
+          }
+        },
+        "parse-json": {
+          "version": "4.0.0",
+          "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-4.0.0.tgz",
+          "integrity": "sha1-vjX1Qlvh9/bHRxhPmKeIy5lHfuA=",
+          "dev": true,
+          "requires": {
+            "error-ex": "^1.3.1",
+            "json-parse-better-errors": "^1.0.1"
+          }
+        },
+        "strip-bom": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz",
+          "integrity": "sha1-IzTBjpx1n3vdVv3vfprj1YjmjtM=",
+          "dev": true
+        }
       }
     },
     "@lerna/package-graph": {
-      "version": "3.1.2",
-      "resolved": "https://registry.npmjs.org/@lerna/package-graph/-/package-graph-3.1.2.tgz",
-      "integrity": "sha512-9wIWb49I1IJmyjPdEVZQ13IAi9biGfH/OZHOC04U2zXGA0GLiY+B3CAx6FQvqkZ8xEGfqzmXnv3LvZ0bQfc1aQ==",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/package-graph/-/package-graph-3.10.6.tgz",
+      "integrity": "sha512-mpIOJbhi+xLqT9BcUrLVD4We8WUdousQf/QndbEWl8DWAW1ethtRHVsCm9ufdBB3F9nj4PH/hqnDWWwqE+rS4w==",
       "dev": true,
       "requires": {
-        "@lerna/validation-error": "^3.0.0",
-        "npm-package-arg": "^6.0.0",
+        "@lerna/validation-error": "3.6.0",
+        "libnpm": "^2.0.1",
         "semver": "^5.5.0"
       }
     },
     "@lerna/project": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/@lerna/project/-/project-3.0.0.tgz",
-      "integrity": "sha512-XhDFVfqj79jG2Speggd15RpYaE8uiR25UKcQBDmumbmqvTS7xf2cvl2pq2UTvDafaJ0YwFF3xkxQZeZnFMwdkw==",
+      "version": "3.10.0",
+      "resolved": "https://registry.npmjs.org/@lerna/project/-/project-3.10.0.tgz",
+      "integrity": "sha512-9QRl8aGHuyU4zVEELQmNPnJTlS7XHqX7w9I9isCXdnilKc2R0MyvUs21lj6Yyt6xTuQnqD158TR9tbS4QufYQQ==",
       "dev": true,
       "requires": {
-        "@lerna/package": "^3.0.0",
-        "@lerna/validation-error": "^3.0.0",
+        "@lerna/package": "3.7.2",
+        "@lerna/validation-error": "3.6.0",
         "cosmiconfig": "^5.0.2",
         "dedent": "^0.7.0",
         "dot-prop": "^4.2.0",
         "glob-parent": "^3.1.0",
         "globby": "^8.0.1",
+        "libnpm": "^2.0.1",
         "load-json-file": "^4.0.0",
-        "npmlog": "^4.1.2",
         "p-map": "^1.2.0",
         "resolve-from": "^4.0.0",
         "write-json-file": "^2.3.0"
       },
       "dependencies": {
-        "glob-parent": {
-          "version": "3.1.0",
-          "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-3.1.0.tgz",
-          "integrity": "sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=",
-          "dev": true,
-          "requires": {
-            "is-glob": "^3.1.0",
-            "path-dirname": "^1.0.0"
-          }
-        },
         "globby": {
-          "version": "8.0.1",
-          "resolved": "https://registry.npmjs.org/globby/-/globby-8.0.1.tgz",
-          "integrity": "sha512-oMrYrJERnKBLXNLVTqhm3vPEdJ/b2ZE28xN4YARiix1NOIOBPEpOUnm844K1iu/BkphCaf2WNFwMszv8Soi1pw==",
+          "version": "8.0.2",
+          "resolved": "https://registry.npmjs.org/globby/-/globby-8.0.2.tgz",
+          "integrity": "sha512-yTzMmKygLp8RUpG1Ymu2VXPSJQZjNAZPD4ywgYEaG7e4tBJeUQBO8OpXrf1RCNcEs5alsoJYPAMiIHP0cmeC7w==",
           "dev": true,
           "requires": {
             "array-union": "^1.0.1",
-            "dir-glob": "^2.0.0",
+            "dir-glob": "2.0.0",
             "fast-glob": "^2.0.2",
             "glob": "^7.1.2",
             "ignore": "^3.3.5",
@@ -881,21 +1025,6 @@
             "slash": "^1.0.0"
           }
         },
-        "is-extglob": {
-          "version": "2.1.1",
-          "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
-          "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=",
-          "dev": true
-        },
-        "is-glob": {
-          "version": "3.1.0",
-          "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz",
-          "integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=",
-          "dev": true,
-          "requires": {
-            "is-extglob": "^2.1.0"
-          }
-        },
         "load-json-file": {
           "version": "4.0.0",
           "resolved": "https://registry.npmjs.org/load-json-file/-/load-json-file-4.0.0.tgz",
@@ -933,42 +1062,42 @@
       }
     },
     "@lerna/prompt": {
-      "version": "3.3.1",
-      "resolved": "https://registry.npmjs.org/@lerna/prompt/-/prompt-3.3.1.tgz",
-      "integrity": "sha512-eJhofrUCUaItMIH6et8kI7YqHfhjWqGZoTsE+40NRCfAraOMWx+pDzfRfeoAl3qeRAH2HhNj1bkYn70FbUOxuQ==",
+      "version": "3.6.0",
+      "resolved": "https://registry.npmjs.org/@lerna/prompt/-/prompt-3.6.0.tgz",
+      "integrity": "sha512-nyAjPMolJ/ZRAAVcXrUH89C4n1SiWvLh4xWNvWYKLcf3PI5yges35sDFP/HYrM4+cEbkNFuJCRq6CxaET4PRsg==",
       "dev": true,
       "requires": {
         "inquirer": "^6.2.0",
-        "npmlog": "^4.1.2"
+        "libnpm": "^2.0.1"
       }
     },
     "@lerna/publish": {
-      "version": "3.4.3",
-      "resolved": "https://registry.npmjs.org/@lerna/publish/-/publish-3.4.3.tgz",
-      "integrity": "sha512-baeRL8xmOR25p86cAaS9mL0jdRzdv4dUo04PlK2Wes+YlL705F55cSXeC9npNie+9rGwFyLzCTQe18WdbZyLuw==",
-      "dev": true,
-      "requires": {
-        "@lerna/batch-packages": "^3.1.2",
-        "@lerna/check-working-tree": "^3.3.0",
-        "@lerna/child-process": "^3.3.0",
-        "@lerna/collect-updates": "^3.3.2",
-        "@lerna/command": "^3.3.0",
-        "@lerna/describe-ref": "^3.3.0",
-        "@lerna/get-npm-exec-opts": "^3.0.0",
-        "@lerna/npm-conf": "^3.4.1",
-        "@lerna/npm-dist-tag": "^3.3.0",
-        "@lerna/npm-publish": "^3.3.1",
-        "@lerna/output": "^3.0.0",
-        "@lerna/prompt": "^3.3.1",
-        "@lerna/run-lifecycle": "^3.4.1",
-        "@lerna/run-parallel-batches": "^3.0.0",
-        "@lerna/validation-error": "^3.0.0",
-        "@lerna/version": "^3.4.1",
+      "version": "3.10.7",
+      "resolved": "https://registry.npmjs.org/@lerna/publish/-/publish-3.10.7.tgz",
+      "integrity": "sha512-Qd8pml2l9s6GIvNX1pTnia+Ddjsm9LF3pRRoOQeugAdv2IJNf45c/83AAEyE9M2ShG5VjgxEITNW4Lg49zipjQ==",
+      "dev": true,
+      "requires": {
+        "@lerna/batch-packages": "3.10.6",
+        "@lerna/check-working-tree": "3.10.0",
+        "@lerna/child-process": "3.3.0",
+        "@lerna/collect-updates": "3.10.1",
+        "@lerna/command": "3.10.6",
+        "@lerna/describe-ref": "3.10.0",
+        "@lerna/log-packed": "3.6.0",
+        "@lerna/npm-conf": "3.7.0",
+        "@lerna/npm-dist-tag": "3.8.5",
+        "@lerna/npm-publish": "3.10.7",
+        "@lerna/output": "3.6.0",
+        "@lerna/pack-directory": "3.10.5",
+        "@lerna/prompt": "3.6.0",
+        "@lerna/pulse-till-done": "3.7.1",
+        "@lerna/run-lifecycle": "3.10.5",
+        "@lerna/run-parallel-batches": "3.0.0",
+        "@lerna/validation-error": "3.6.0",
+        "@lerna/version": "3.10.6",
+        "figgy-pudding": "^3.5.1",
         "fs-extra": "^7.0.0",
-        "libnpmaccess": "^3.0.0",
-        "npm-package-arg": "^6.0.0",
-        "npm-registry-fetch": "^3.8.0",
-        "npmlog": "^4.1.2",
+        "libnpm": "^2.0.1",
         "p-finally": "^1.0.0",
         "p-map": "^1.2.0",
         "p-pipe": "^1.2.0",
@@ -976,54 +1105,72 @@
         "semver": "^5.5.0"
       }
     },
+    "@lerna/pulse-till-done": {
+      "version": "3.7.1",
+      "resolved": "https://registry.npmjs.org/@lerna/pulse-till-done/-/pulse-till-done-3.7.1.tgz",
+      "integrity": "sha512-MzpesZeW3Mc+CiAq4zUt9qTXI9uEBBKrubYHE36voQTSkHvu/Rox6YOvfUr+U7P6k8frFPeCgGpfMDTLhiqe6w==",
+      "dev": true,
+      "requires": {
+        "libnpm": "^2.0.1"
+      }
+    },
     "@lerna/resolve-symlink": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/@lerna/resolve-symlink/-/resolve-symlink-3.3.0.tgz",
-      "integrity": "sha512-KmoPDcFJ2aOK2inYHbrsiO9SodedUj0L1JDvDgirVNIjMUaQe2Q6Vi4Gh+VCJcyB27JtfHioV9R2NxU72Pk2hg==",
+      "version": "3.6.0",
+      "resolved": "https://registry.npmjs.org/@lerna/resolve-symlink/-/resolve-symlink-3.6.0.tgz",
+      "integrity": "sha512-TVOAEqHJSQVhNDMFCwEUZPaOETqHDQV1TQWQfC8ZlOqyaUQ7veZUbg0yfG7RPNzlSpvF0ZaGFeR0YhYDAW03GA==",
       "dev": true,
       "requires": {
         "fs-extra": "^7.0.0",
-        "npmlog": "^4.1.2",
+        "libnpm": "^2.0.1",
         "read-cmd-shim": "^1.0.1"
       }
     },
     "@lerna/rimraf-dir": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/@lerna/rimraf-dir/-/rimraf-dir-3.3.0.tgz",
-      "integrity": "sha512-vSqOcZ4kZduiSprbt+y40qziyN3VKYh+ygiCdnbBbsaxpdKB6CfrSMUtrLhVFrqUfBHIZRzHIzgjTdtQex1KLw==",
+      "version": "3.10.0",
+      "resolved": "https://registry.npmjs.org/@lerna/rimraf-dir/-/rimraf-dir-3.10.0.tgz",
+      "integrity": "sha512-RSKSfxPURc58ERCD/PuzorR86lWEvIWNclXYGvIYM76yNGrWiDF44pGHQvB4J+Lxa5M+52ZtZC/eOC7A7YCH4g==",
       "dev": true,
       "requires": {
-        "@lerna/child-process": "^3.3.0",
-        "npmlog": "^4.1.2",
+        "@lerna/child-process": "3.3.0",
+        "libnpm": "^2.0.1",
         "path-exists": "^3.0.0",
         "rimraf": "^2.6.2"
+      },
+      "dependencies": {
+        "path-exists": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz",
+          "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=",
+          "dev": true
+        }
       }
     },
     "@lerna/run": {
-      "version": "3.3.2",
-      "resolved": "https://registry.npmjs.org/@lerna/run/-/run-3.3.2.tgz",
-      "integrity": "sha512-cruwRGZZWnQ5I0M+AqcoT3Xpq2wj3135iVw4n59/Op6dZu50sMFXZNLiTTTZ15k8rTKjydcccJMdPSpTHbH7/A==",
-      "dev": true,
-      "requires": {
-        "@lerna/batch-packages": "^3.1.2",
-        "@lerna/command": "^3.3.0",
-        "@lerna/filter-options": "^3.3.2",
-        "@lerna/npm-run-script": "^3.3.0",
-        "@lerna/output": "^3.0.0",
-        "@lerna/run-parallel-batches": "^3.0.0",
-        "@lerna/validation-error": "^3.0.0",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/run/-/run-3.10.6.tgz",
+      "integrity": "sha512-KS2lWbu/8WUUscQPi9U8sPO6yYpzf/0GmODjpruR1nRi1u/tuncdjTiG+hjGAeFC1BD7YktT9Za6imIpE8RXmA==",
+      "dev": true,
+      "requires": {
+        "@lerna/batch-packages": "3.10.6",
+        "@lerna/command": "3.10.6",
+        "@lerna/filter-options": "3.10.6",
+        "@lerna/npm-run-script": "3.10.0",
+        "@lerna/output": "3.6.0",
+        "@lerna/run-parallel-batches": "3.0.0",
+        "@lerna/timer": "3.5.0",
+        "@lerna/validation-error": "3.6.0",
         "p-map": "^1.2.0"
       }
     },
     "@lerna/run-lifecycle": {
-      "version": "3.4.1",
-      "resolved": "https://registry.npmjs.org/@lerna/run-lifecycle/-/run-lifecycle-3.4.1.tgz",
-      "integrity": "sha512-N/hi2srM9A4BWEkXccP7vCEbf4MmIuALF00DTBMvc0A/ccItwUpl3XNuM7+ADDRK0mkwE3hDw89lJ3A7f8oUQw==",
+      "version": "3.10.5",
+      "resolved": "https://registry.npmjs.org/@lerna/run-lifecycle/-/run-lifecycle-3.10.5.tgz",
+      "integrity": "sha512-YPmXviaxVlhcKM6IkDTIpTq24mxOuMCilo+MTr1RLoafgB9ZTmP2AHRiFt/sy14wOsq2Zqr0wJyj8KFlDYLTkA==",
       "dev": true,
       "requires": {
-        "@lerna/npm-conf": "^3.4.1",
-        "npm-lifecycle": "^2.0.0",
-        "npmlog": "^4.1.2"
+        "@lerna/npm-conf": "3.7.0",
+        "figgy-pudding": "^3.5.1",
+        "libnpm": "^2.0.1"
       }
     },
     "@lerna/run-parallel-batches": {
@@ -1037,112 +1184,67 @@
       }
     },
     "@lerna/symlink-binary": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/@lerna/symlink-binary/-/symlink-binary-3.3.0.tgz",
-      "integrity": "sha512-zRo6CimhvH/VJqCFl9T4IC6syjpWyQIxEfO2sBhrapEcfwjtwbhoGgKwucsvt4rIpFazCw63jQ/AXMT27KUIHg==",
+      "version": "3.10.0",
+      "resolved": "https://registry.npmjs.org/@lerna/symlink-binary/-/symlink-binary-3.10.0.tgz",
+      "integrity": "sha512-6mQsG+iVjBo8cD8s24O+YgFrwDyUGfUQbK4ryalAXFHI817Zd4xlI3tjg3W99whCt6rt6D0s1fpf8eslMN6dSw==",
+      "dev": true,
+      "requires": {
+        "@lerna/create-symlink": "3.6.0",
+        "@lerna/package": "3.7.2",
+        "fs-extra": "^7.0.0",
+        "p-map": "^1.2.0"
+      }
+    },
+    "@lerna/symlink-dependencies": {
+      "version": "3.10.0",
+      "resolved": "https://registry.npmjs.org/@lerna/symlink-dependencies/-/symlink-dependencies-3.10.0.tgz",
+      "integrity": "sha512-vGpg5ydwGgQCuWNX5y7CRL38mGpuLhf1GRq9wMm7IGwnctEsdSNqvvE+LDgqtwEZASu5+vffYUkL0VlFXl8uWA==",
       "dev": true,
       "requires": {
-        "@lerna/create-symlink": "^3.3.0",
-        "@lerna/package": "^3.0.0",
+        "@lerna/create-symlink": "3.6.0",
+        "@lerna/resolve-symlink": "3.6.0",
+        "@lerna/symlink-binary": "3.10.0",
         "fs-extra": "^7.0.0",
-        "p-map": "^1.2.0",
-        "read-pkg": "^3.0.0"
-      },
-      "dependencies": {
-        "load-json-file": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/load-json-file/-/load-json-file-4.0.0.tgz",
-          "integrity": "sha1-L19Fq5HjMhYjT9U62rZo607AmTs=",
-          "dev": true,
-          "requires": {
-            "graceful-fs": "^4.1.2",
-            "parse-json": "^4.0.0",
-            "pify": "^3.0.0",
-            "strip-bom": "^3.0.0"
-          }
-        },
-        "parse-json": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-4.0.0.tgz",
-          "integrity": "sha1-vjX1Qlvh9/bHRxhPmKeIy5lHfuA=",
-          "dev": true,
-          "requires": {
-            "error-ex": "^1.3.1",
-            "json-parse-better-errors": "^1.0.1"
-          }
-        },
-        "path-type": {
-          "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/path-type/-/path-type-3.0.0.tgz",
-          "integrity": "sha512-T2ZUsdZFHgA3u4e5PfPbjd7HDDpxPnQb5jN0SrDsjNSuVXHJqtwTnWqG0B1jZrgmJ/7lj1EmVIByWt1gxGkWvg==",
-          "dev": true,
-          "requires": {
-            "pify": "^3.0.0"
-          }
-        },
-        "read-pkg": {
-          "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/read-pkg/-/read-pkg-3.0.0.tgz",
-          "integrity": "sha1-nLxoaXj+5l0WwA4rGcI3/Pbjg4k=",
-          "dev": true,
-          "requires": {
-            "load-json-file": "^4.0.0",
-            "normalize-package-data": "^2.3.2",
-            "path-type": "^3.0.0"
-          }
-        },
-        "strip-bom": {
-          "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz",
-          "integrity": "sha1-IzTBjpx1n3vdVv3vfprj1YjmjtM=",
-          "dev": true
-        }
-      }
-    },
-    "@lerna/symlink-dependencies": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/@lerna/symlink-dependencies/-/symlink-dependencies-3.3.0.tgz",
-      "integrity": "sha512-IRngSNCmuD5uBKVv23tHMvr7Mplti0lKHilFKcvhbvhAfu6m/Vclxhkfs/uLyHzG+DeRpl/9o86SQET3h4XDhg==",
-      "dev": true,
-      "requires": {
-        "@lerna/create-symlink": "^3.3.0",
-        "@lerna/resolve-symlink": "^3.3.0",
-        "@lerna/symlink-binary": "^3.3.0",
-        "fs-extra": "^7.0.0",
-        "p-finally": "^1.0.0",
+        "p-finally": "^1.0.0",
         "p-map": "^1.2.0",
         "p-map-series": "^1.0.0"
       }
     },
+    "@lerna/timer": {
+      "version": "3.5.0",
+      "resolved": "https://registry.npmjs.org/@lerna/timer/-/timer-3.5.0.tgz",
+      "integrity": "sha512-TAb99hqQN6E3JBGtG9iyZNPq1/DbmqgBOeNrKtdJsGvIeX/NGLgUDWMrj2h04V4O+jpBFmSf6HIld6triKmxCA==",
+      "dev": true
+    },
     "@lerna/validation-error": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/@lerna/validation-error/-/validation-error-3.0.0.tgz",
-      "integrity": "sha512-5wjkd2PszV0kWvH+EOKZJWlHEqCTTKrWsvfHnHhcUaKBe/NagPZFWs+0xlsDPZ3DJt5FNfbAPAnEBQ05zLirFA==",
+      "version": "3.6.0",
+      "resolved": "https://registry.npmjs.org/@lerna/validation-error/-/validation-error-3.6.0.tgz",
+      "integrity": "sha512-MWltncGO5VgMS0QedTlZCjFUMF/evRjDMMHrtVorkIB2Cp5xy0rkKa8iDBG43qpUWeG1giwi58yUlETBcWfILw==",
       "dev": true,
       "requires": {
-        "npmlog": "^4.1.2"
+        "libnpm": "^2.0.1"
       }
     },
     "@lerna/version": {
-      "version": "3.4.1",
-      "resolved": "https://registry.npmjs.org/@lerna/version/-/version-3.4.1.tgz",
-      "integrity": "sha512-oefNaQLBJSI2WLZXw5XxDXk4NyF5/ct0V9ys/J308NpgZthPgwRPjk9ZR0o1IOxW1ABi6z3E317W/dxHDjvAkg==",
-      "dev": true,
-      "requires": {
-        "@lerna/batch-packages": "^3.1.2",
-        "@lerna/check-working-tree": "^3.3.0",
-        "@lerna/child-process": "^3.3.0",
-        "@lerna/collect-updates": "^3.3.2",
-        "@lerna/command": "^3.3.0",
-        "@lerna/conventional-commits": "^3.4.1",
-        "@lerna/output": "^3.0.0",
-        "@lerna/prompt": "^3.3.1",
-        "@lerna/run-lifecycle": "^3.4.1",
-        "@lerna/validation-error": "^3.0.0",
+      "version": "3.10.6",
+      "resolved": "https://registry.npmjs.org/@lerna/version/-/version-3.10.6.tgz",
+      "integrity": "sha512-77peW2ROlHHl1e/tHBUmhpb8tsO6CIdlx34XapZhUuIVykrkOuqVFFxqMecrGG8SJe0e3l1G+Fah7bJTQcG0kw==",
+      "dev": true,
+      "requires": {
+        "@lerna/batch-packages": "3.10.6",
+        "@lerna/check-working-tree": "3.10.0",
+        "@lerna/child-process": "3.3.0",
+        "@lerna/collect-updates": "3.10.1",
+        "@lerna/command": "3.10.6",
+        "@lerna/conventional-commits": "3.10.0",
+        "@lerna/output": "3.6.0",
+        "@lerna/prompt": "3.6.0",
+        "@lerna/run-lifecycle": "3.10.5",
+        "@lerna/validation-error": "3.6.0",
         "chalk": "^2.3.1",
         "dedent": "^0.7.0",
+        "libnpm": "^2.0.1",
         "minimatch": "^3.0.4",
-        "npmlog": "^4.1.2",
         "p-map": "^1.2.0",
         "p-pipe": "^1.2.0",
         "p-reduce": "^1.0.0",
@@ -1153,15 +1255,24 @@
       }
     },
     "@lerna/write-log-file": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/@lerna/write-log-file/-/write-log-file-3.0.0.tgz",
-      "integrity": "sha512-SfbPp29lMeEVOb/M16lJwn4nnx5y+TwCdd7Uom9umd7KcZP0NOvpnX0PHehdonl7TyHZ1Xx2maklYuCLbQrd/A==",
+      "version": "3.6.0",
+      "resolved": "https://registry.npmjs.org/@lerna/write-log-file/-/write-log-file-3.6.0.tgz",
+      "integrity": "sha512-OkLK99V6sYXsJsYg+O9wtiFS3z6eUPaiz2e6cXJt80mfIIdI1t2dnmyua0Ib5cZWExQvx2z6Y32Wlf0MnsoNsA==",
       "dev": true,
       "requires": {
-        "npmlog": "^4.1.2",
+        "libnpm": "^2.0.1",
         "write-file-atomic": "^2.3.0"
       }
     },
+    "@mattiasbuelens/web-streams-polyfill": {
+      "version": "0.2.1",
+      "resolved": "https://registry.npmjs.org/@mattiasbuelens/web-streams-polyfill/-/web-streams-polyfill-0.2.1.tgz",
+      "integrity": "sha512-oKuFCQFa3W7Hj7zKn0+4ypI8JFm4ZKIoncwAC6wd5WwFW2sL7O1hpPoJdSWpynQ4DJ4lQ6MvFoVDmCLilonDFg==",
+      "dev": true,
+      "requires": {
+        "@types/whatwg-streams": "^0.0.7"
+      }
+    },
     "@mrmlnc/readdir-enhanced": {
       "version": "2.2.1",
       "resolved": "https://registry.npmjs.org/@mrmlnc/readdir-enhanced/-/readdir-enhanced-2.2.1.tgz",
@@ -1173,20 +1284,11 @@
       }
     },
     "@nodelib/fs.stat": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-1.1.2.tgz",
-      "integrity": "sha512-yprFYuno9FtNsSHVlSWd+nRlmGoAbqbeCwOryP6sC/zoCjhpArcRMYp19EvpSUSizJAlsXEwJv+wcWS9XaXdMw==",
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-1.1.3.tgz",
+      "integrity": "sha512-shAmDyaQC4H92APFoIaVDHCx5bStIocgvbwQyxPRrbUY20V1EYTbSDchWbuwlMG3V17cprZhA6+78JfB+3DTPw==",
       "dev": true
     },
-    "@samverschueren/stream-to-observable": {
-      "version": "0.3.0",
-      "resolved": "https://registry.npmjs.org/@samverschueren/stream-to-observable/-/stream-to-observable-0.3.0.tgz",
-      "integrity": "sha512-MI4Xx6LHs4Webyvi6EbspgyAb4D2Q2VtnCQ1blOJcoLS6mVa8lNN2rkIy1CVxfTUpoyIbCTkXES1rLXztFD1lg==",
-      "dev": true,
-      "requires": {
-        "any-observable": "^0.3.0"
-      }
-    },
     "@sindresorhus/df": {
       "version": "2.1.0",
       "resolved": "https://registry.npmjs.org/@sindresorhus/df/-/df-2.1.0.tgz",
@@ -1226,22 +1328,16 @@
         }
       }
     },
-    "@std/esm": {
-      "version": "0.26.0",
-      "resolved": "https://registry.npmjs.org/@std/esm/-/esm-0.26.0.tgz",
-      "integrity": "sha512-g3RDuosSa5fZOzENtrZdx7Gevb3zabfn8qglug2aCJIVz/4woFpKoqm1yD3mG2RD0zJEZRnkkuPHsmNglKGl7g==",
-      "dev": true
-    },
     "@types/events": {
-      "version": "1.2.0",
-      "resolved": "http://registry.npmjs.org/@types/events/-/events-1.2.0.tgz",
-      "integrity": "sha512-KEIlhXnIutzKwRbQkGWb/I4HFqBuUykAdHgDED6xqwXJfONCjF5VoE0cXEiurh3XauygxzeDzgtXUqvLkxFzzA==",
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/@types/events/-/events-3.0.0.tgz",
+      "integrity": "sha512-EaObqwIvayI5a8dCzhFrjKzVwKLxjoG9T6Ppd5CEo07LRKfQ8Yokw54r5+Wq7FaBQ+yXRvQAYPrHwya1/UFt9g==",
       "dev": true
     },
     "@types/flatbuffers": {
-      "version": "1.9.0",
-      "resolved": "https://registry.npmjs.org/@types/flatbuffers/-/flatbuffers-1.9.0.tgz",
-      "integrity": "sha512-Ek+pJNTxBXBalTaTqKNwsaF3G8kfsmtYHxdWb8EUAS7dcPkSbRcNFGlaBQboXVSfSU/Vu32J3qs5Tgj56szDTw=="
+      "version": "1.9.1",
+      "resolved": "https://registry.npmjs.org/@types/flatbuffers/-/flatbuffers-1.9.1.tgz",
+      "integrity": "sha512-TC3X0Nkj5wgvuY217VkodBtjbD3Yr0JNApDY1GW9IU5Mzm5ie1IJErqe4vRm+wy08IRz3bemaDATrdEw1CJlVQ=="
     },
     "@types/fs-extra": {
       "version": "5.0.4",
@@ -1264,9 +1360,9 @@
       }
     },
     "@types/handlebars": {
-      "version": "4.0.39",
-      "resolved": "https://registry.npmjs.org/@types/handlebars/-/handlebars-4.0.39.tgz",
-      "integrity": "sha512-vjaS7Q0dVqFp85QhyPSZqDKnTTCemcSHNHFvDdalO1s0Ifz5KuE64jQD5xoUkfdWwF4WpqdJEl7LsWH8rzhKJA==",
+      "version": "4.0.40",
+      "resolved": "https://registry.npmjs.org/@types/handlebars/-/handlebars-4.0.40.tgz",
+      "integrity": "sha512-sGWNtsjNrLOdKha2RV1UeF8+UbQnPSG7qbe5wwbni0mw4h2gHXyPFUMOC+xwGirIiiydM/HSqjDO4rk6NFB18w==",
       "dev": true
     },
     "@types/highlight.js": {
@@ -1276,15 +1372,15 @@
       "dev": true
     },
     "@types/jest": {
-      "version": "23.3.5",
-      "resolved": "https://registry.npmjs.org/@types/jest/-/jest-23.3.5.tgz",
-      "integrity": "sha512-3LI+vUC3Wju28vbjIjsTKakhMB8HC4l+tMz+Z8WRzVK+kmvezE5jcOvKtBpznWSI5KDLFo+FouUhpTKoekadCA==",
+      "version": "23.3.13",
+      "resolved": "https://registry.npmjs.org/@types/jest/-/jest-23.3.13.tgz",
+      "integrity": "sha512-ePl4l+7dLLmCucIwgQHAgjiepY++qcI6nb8eAwGNkB6OxmTe3Z9rQU3rSpomqu42PCCnlThZbOoxsf+qylJsLA==",
       "dev": true
     },
     "@types/lodash": {
-      "version": "4.14.117",
-      "resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.14.117.tgz",
-      "integrity": "sha512-xyf2m6tRbz8qQKcxYZa7PA4SllYcay+eh25DN3jmNYY6gSTL7Htc/bttVdkqj2wfJGbeWlQiX8pIyJpKU+tubw==",
+      "version": "4.14.120",
+      "resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.14.120.tgz",
+      "integrity": "sha512-jQ21kQ120mo+IrDs1nFNVm/AsdFxIx2+vZ347DbogHJPd/JzKNMOqU6HCYin1W6v8l5R9XSO2/e9cxmn7HAnVw==",
       "dev": true
     },
     "@types/marked": {
@@ -1300,14 +1396,14 @@
       "dev": true
     },
     "@types/node": {
-      "version": "10.12.0",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-10.12.0.tgz",
-      "integrity": "sha512-3TUHC3jsBAB7qVRGxT6lWyYo2v96BMmD2PTcl47H25Lu7UXtFH/2qqmKiVrnel6Ne//0TFYf6uvNX+HW2FRkLQ=="
+      "version": "10.12.18",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-10.12.18.tgz",
+      "integrity": "sha512-fh+pAqt4xRzPfqA6eh3Z2y6fyZavRIumvjhaCL753+TVkGKGhpPeyrJG2JftD0T9q4GF00KjefsQ+PQNDdWQaQ=="
     },
     "@types/shelljs": {
-      "version": "0.8.0",
-      "resolved": "https://registry.npmjs.org/@types/shelljs/-/shelljs-0.8.0.tgz",
-      "integrity": "sha512-vs1hCC8RxLHRu2bwumNyYRNrU3o8BtZhLysH5A4I98iYmA2APl6R3uNQb5ihl+WiwH0xdC9LLO+vRrXLs/Kyxg==",
+      "version": "0.8.2",
+      "resolved": "https://registry.npmjs.org/@types/shelljs/-/shelljs-0.8.2.tgz",
+      "integrity": "sha512-vVp7BCQn0yUQgpiohrdxAhHdm/bTlXshB4HG3LEBq1PgvjKiyeYHohIPIv0QBt/jipb140iMS5Xy1iR6qKovKw==",
       "dev": true,
       "requires": {
         "@types/glob": "*",
@@ -1319,175 +1415,181 @@
       "resolved": "https://registry.npmjs.org/@types/text-encoding-utf-8/-/text-encoding-utf-8-1.0.1.tgz",
       "integrity": "sha512-GpIEYaS+yNfYqpowLLziiY42pyaL+lThd/wMh6tTubaKuG4IRkXqqyxK7Nddn3BvpUg2+go3Gv/jbXvAFMRjiQ=="
     },
+    "@types/whatwg-streams": {
+      "version": "0.0.7",
+      "resolved": "https://registry.npmjs.org/@types/whatwg-streams/-/whatwg-streams-0.0.7.tgz",
+      "integrity": "sha512-6sDiSEP6DWcY2ZolsJ2s39ZmsoGQ7KVwBDI3sESQsEm9P2dHTcqnDIHRZFRNtLCzWp7hCFGqYbw5GyfpQnJ01A==",
+      "dev": true
+    },
     "@webassemblyjs/ast": {
-      "version": "1.7.10",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/ast/-/ast-1.7.10.tgz",
-      "integrity": "sha512-wTUeaByYN2EA6qVqhbgavtGc7fLTOx0glG2IBsFlrFG51uXIGlYBTyIZMf4SPLo3v1bgV/7lBN3l7Z0R6Hswew==",
+      "version": "1.7.11",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/ast/-/ast-1.7.11.tgz",
+      "integrity": "sha512-ZEzy4vjvTzScC+SH8RBssQUawpaInUdMTYwYYLh54/s8TuT0gBLuyUnppKsVyZEi876VmmStKsUs28UxPgdvrA==",
       "dev": true,
       "requires": {
-        "@webassemblyjs/helper-module-context": "1.7.10",
-        "@webassemblyjs/helper-wasm-bytecode": "1.7.10",
-        "@webassemblyjs/wast-parser": "1.7.10"
+        "@webassemblyjs/helper-module-context": "1.7.11",
+        "@webassemblyjs/helper-wasm-bytecode": "1.7.11",
+        "@webassemblyjs/wast-parser": "1.7.11"
       }
     },
     "@webassemblyjs/floating-point-hex-parser": {
-      "version": "1.7.10",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.7.10.tgz",
-      "integrity": "sha512-gMsGbI6I3p/P1xL2UxqhNh1ga2HCsx5VBB2i5VvJFAaqAjd2PBTRULc3BpTydabUQEGlaZCzEUQhLoLG7TvEYQ==",
+      "version": "1.7.11",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.7.11.tgz",
+      "integrity": "sha512-zY8dSNyYcgzNRNT666/zOoAyImshm3ycKdoLsyDw/Bwo6+/uktb7p4xyApuef1dwEBo/U/SYQzbGBvV+nru2Xg==",
       "dev": true
     },
     "@webassemblyjs/helper-api-error": {
-      "version": "1.7.10",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-api-error/-/helper-api-error-1.7.10.tgz",
-      "integrity": "sha512-DoYRlPWtuw3yd5BOr9XhtrmB6X1enYF0/54yNvQWGXZEPDF5PJVNI7zQ7gkcKfTESzp8bIBWailaFXEK/jjCsw==",
+      "version": "1.7.11",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-api-error/-/helper-api-error-1.7.11.tgz",
+      "integrity": "sha512-7r1qXLmiglC+wPNkGuXCvkmalyEstKVwcueZRP2GNC2PAvxbLYwLLPr14rcdJaE4UtHxQKfFkuDFuv91ipqvXg==",
       "dev": true
     },
     "@webassemblyjs/helper-buffer": {
-      "version": "1.7.10",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-buffer/-/helper-buffer-1.7.10.tgz",
-      "integrity": "sha512-+RMU3dt/dPh4EpVX4u5jxsOlw22tp3zjqE0m3ftU2tsYxnPULb4cyHlgaNd2KoWuwasCQqn8Mhr+TTdbtj3LlA==",
+      "version": "1.7.11",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-buffer/-/helper-buffer-1.7.11.tgz",
+      "integrity": "sha512-MynuervdylPPh3ix+mKZloTcL06P8tenNH3sx6s0qE8SLR6DdwnfgA7Hc9NSYeob2jrW5Vql6GVlsQzKQCa13w==",
       "dev": true
     },
     "@webassemblyjs/helper-code-frame": {
-      "version": "1.7.10",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-code-frame/-/helper-code-frame-1.7.10.tgz",
-      "integrity": "sha512-UiytbpKAULOEab2hUZK2ywXen4gWJVrgxtwY3Kn+eZaaSWaRM8z/7dAXRSoamhKFiBh1uaqxzE/XD9BLlug3gw==",
+      "version": "1.7.11",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-code-frame/-/helper-code-frame-1.7.11.tgz",
+      "integrity": "sha512-T8ESC9KMXFTXA5urJcyor5cn6qWeZ4/zLPyWeEXZ03hj/x9weSokGNkVCdnhSabKGYWxElSdgJ+sFa9G/RdHNw==",
       "dev": true,
       "requires": {
-        "@webassemblyjs/wast-printer": "1.7.10"
+        "@webassemblyjs/wast-printer": "1.7.11"
       }
     },
     "@webassemblyjs/helper-fsm": {
-      "version": "1.7.10",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-fsm/-/helper-fsm-1.7.10.tgz",
-      "integrity": "sha512-w2vDtUK9xeSRtt5+RnnlRCI7wHEvLjF0XdnxJpgx+LJOvklTZPqWkuy/NhwHSLP19sm9H8dWxKeReMR7sCkGZA==",
+      "version": "1.7.11",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-fsm/-/helper-fsm-1.7.11.tgz",
+      "integrity": "sha512-nsAQWNP1+8Z6tkzdYlXT0kxfa2Z1tRTARd8wYnc/e3Zv3VydVVnaeePgqUzFrpkGUyhUUxOl5ML7f1NuT+gC0A==",
       "dev": true
     },
     "@webassemblyjs/helper-module-context": {
-      "version": "1.7.10",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-module-context/-/helper-module-context-1.7.10.tgz",
-      "integrity": "sha512-yE5x/LzZ3XdPdREmJijxzfrf+BDRewvO0zl8kvORgSWmxpRrkqY39KZSq6TSgIWBxkK4SrzlS3BsMCv2s1FpsQ==",
+      "version": "1.7.11",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-module-context/-/helper-module-context-1.7.11.tgz",
+      "integrity": "sha512-JxfD5DX8Ygq4PvXDucq0M+sbUFA7BJAv/GGl9ITovqE+idGX+J3QSzJYz+LwQmL7fC3Rs+utvWoJxDb6pmC0qg==",
       "dev": true
     },
     "@webassemblyjs/helper-wasm-bytecode": {
-      "version": "1.7.10",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.7.10.tgz",
-      "integrity": "sha512-u5qy4SJ/OrxKxZqJ9N3qH4ZQgHaAzsopsYwLvoWJY6Q33r8PhT3VPyNMaJ7ZFoqzBnZlCcS/0f4Sp8WBxylXfg==",
+      "version": "1.7.11",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.7.11.tgz",
+      "integrity": "sha512-cMXeVS9rhoXsI9LLL4tJxBgVD/KMOKXuFqYb5oCJ/opScWpkCMEz9EJtkonaNcnLv2R3K5jIeS4TRj/drde1JQ==",
       "dev": true
     },
     "@webassemblyjs/helper-wasm-section": {
-      "version": "1.7.10",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.7.10.tgz",
-      "integrity": "sha512-Ecvww6sCkcjatcyctUrn22neSJHLN/TTzolMGG/N7S9rpbsTZ8c6Bl98GpSpV77EvzNijiNRHBG0+JO99qKz6g==",
+      "version": "1.7.11",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.7.11.tgz",
+      "integrity": "sha512-8ZRY5iZbZdtNFE5UFunB8mmBEAbSI3guwbrsCl4fWdfRiAcvqQpeqd5KHhSWLL5wuxo53zcaGZDBU64qgn4I4Q==",
       "dev": true,
       "requires": {
-        "@webassemblyjs/ast": "1.7.10",
-        "@webassemblyjs/helper-buffer": "1.7.10",
-        "@webassemblyjs/helper-wasm-bytecode": "1.7.10",
-        "@webassemblyjs/wasm-gen": "1.7.10"
+        "@webassemblyjs/ast": "1.7.11",
+        "@webassemblyjs/helper-buffer": "1.7.11",
+        "@webassemblyjs/helper-wasm-bytecode": "1.7.11",
+        "@webassemblyjs/wasm-gen": "1.7.11"
       }
     },
     "@webassemblyjs/ieee754": {
-      "version": "1.7.10",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/ieee754/-/ieee754-1.7.10.tgz",
-      "integrity": "sha512-HRcWcY+YWt4+s/CvQn+vnSPfRaD4KkuzQFt5MNaELXXHSjelHlSEA8ZcqT69q0GTIuLWZ6JaoKar4yWHVpZHsQ==",
+      "version": "1.7.11",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/ieee754/-/ieee754-1.7.11.tgz",
+      "integrity": "sha512-Mmqx/cS68K1tSrvRLtaV/Lp3NZWzXtOHUW2IvDvl2sihAwJh4ACE0eL6A8FvMyDG9abes3saB6dMimLOs+HMoQ==",
       "dev": true,
       "requires": {
         "@xtuc/ieee754": "^1.2.0"
       }
     },
     "@webassemblyjs/leb128": {
-      "version": "1.7.10",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/leb128/-/leb128-1.7.10.tgz",
-      "integrity": "sha512-og8MciYlA8hvzCLR71hCuZKPbVBfLQeHv7ImKZ4nlyxrYbG7uJHYtHiHu6OV9SqrGuD03H/HtXC4Bgdjfm9FHw==",
+      "version": "1.7.11",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/leb128/-/leb128-1.7.11.tgz",
+      "integrity": "sha512-vuGmgZjjp3zjcerQg+JA+tGOncOnJLWVkt8Aze5eWQLwTQGNgVLcyOTqgSCxWTR4J42ijHbBxnuRaL1Rv7XMdw==",
       "dev": true,
       "requires": {
         "@xtuc/long": "4.2.1"
       }
     },
     "@webassemblyjs/utf8": {
-      "version": "1.7.10",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/utf8/-/utf8-1.7.10.tgz",
-      "integrity": "sha512-Ng6Pxv6siyZp635xCSnH3mKmIFgqWPCcGdoo0GBYgyGdxu7cUj4agV7Uu1a8REP66UYUFXJLudeGgd4RvuJAnQ==",
+      "version": "1.7.11",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/utf8/-/utf8-1.7.11.tgz",
+      "integrity": "sha512-C6GFkc7aErQIAH+BMrIdVSmW+6HSe20wg57HEC1uqJP8E/xpMjXqQUxkQw07MhNDSDcGpxI9G5JSNOQCqJk4sA==",
       "dev": true
     },
     "@webassemblyjs/wasm-edit": {
-      "version": "1.7.10",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-edit/-/wasm-edit-1.7.10.tgz",
-      "integrity": "sha512-e9RZFQlb+ZuYcKRcW9yl+mqX/Ycj9+3/+ppDI8nEE/NCY6FoK8f3dKBcfubYV/HZn44b+ND4hjh+4BYBt+sDnA==",
+      "version": "1.7.11",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-edit/-/wasm-edit-1.7.11.tgz",
+      "integrity": "sha512-FUd97guNGsCZQgeTPKdgxJhBXkUbMTY6hFPf2Y4OedXd48H97J+sOY2Ltaq6WGVpIH8o/TGOVNiVz/SbpEMJGg==",
       "dev": true,
       "requires": {
-        "@webassemblyjs/ast": "1.7.10",
-        "@webassemblyjs/helper-buffer": "1.7.10",
-        "@webassemblyjs/helper-wasm-bytecode": "1.7.10",
-        "@webassemblyjs/helper-wasm-section": "1.7.10",
-        "@webassemblyjs/wasm-gen": "1.7.10",
-        "@webassemblyjs/wasm-opt": "1.7.10",
-        "@webassemblyjs/wasm-parser": "1.7.10",
-        "@webassemblyjs/wast-printer": "1.7.10"
+        "@webassemblyjs/ast": "1.7.11",
+        "@webassemblyjs/helper-buffer": "1.7.11",
+        "@webassemblyjs/helper-wasm-bytecode": "1.7.11",
+        "@webassemblyjs/helper-wasm-section": "1.7.11",
+        "@webassemblyjs/wasm-gen": "1.7.11",
+        "@webassemblyjs/wasm-opt": "1.7.11",
+        "@webassemblyjs/wasm-parser": "1.7.11",
+        "@webassemblyjs/wast-printer": "1.7.11"
       }
     },
     "@webassemblyjs/wasm-gen": {
-      "version": "1.7.10",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-gen/-/wasm-gen-1.7.10.tgz",
-      "integrity": "sha512-M0lb6cO2Y0PzDye/L39PqwV+jvO+2YxEG5ax+7dgq7EwXdAlpOMx1jxyXJTScQoeTpzOPIb+fLgX/IkLF8h2yw==",
+      "version": "1.7.11",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-gen/-/wasm-gen-1.7.11.tgz",
+      "integrity": "sha512-U/KDYp7fgAZX5KPfq4NOupK/BmhDc5Kjy2GIqstMhvvdJRcER/kUsMThpWeRP8BMn4LXaKhSTggIJPOeYHwISA==",
       "dev": true,
       "requires": {
-        "@webassemblyjs/ast": "1.7.10",
-        "@webassemblyjs/helper-wasm-bytecode": "1.7.10",
-        "@webassemblyjs/ieee754": "1.7.10",
-        "@webassemblyjs/leb128": "1.7.10",
-        "@webassemblyjs/utf8": "1.7.10"
+        "@webassemblyjs/ast": "1.7.11",
+        "@webassemblyjs/helper-wasm-bytecode": "1.7.11",
+        "@webassemblyjs/ieee754": "1.7.11",
+        "@webassemblyjs/leb128": "1.7.11",
+        "@webassemblyjs/utf8": "1.7.11"
       }
     },
     "@webassemblyjs/wasm-opt": {
-      "version": "1.7.10",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-opt/-/wasm-opt-1.7.10.tgz",
-      "integrity": "sha512-R66IHGCdicgF5ZliN10yn5HaC7vwYAqrSVJGjtJJQp5+QNPBye6heWdVH/at40uh0uoaDN/UVUfXK0gvuUqtVg==",
+      "version": "1.7.11",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-opt/-/wasm-opt-1.7.11.tgz",
+      "integrity": "sha512-XynkOwQyiRidh0GLua7SkeHvAPXQV/RxsUeERILmAInZegApOUAIJfRuPYe2F7RcjOC9tW3Cb9juPvAC/sCqvg==",
       "dev": true,
       "requires": {
-        "@webassemblyjs/ast": "1.7.10",
-        "@webassemblyjs/helper-buffer": "1.7.10",
-        "@webassemblyjs/wasm-gen": "1.7.10",
-        "@webassemblyjs/wasm-parser": "1.7.10"
+        "@webassemblyjs/ast": "1.7.11",
+        "@webassemblyjs/helper-buffer": "1.7.11",
+        "@webassemblyjs/wasm-gen": "1.7.11",
+        "@webassemblyjs/wasm-parser": "1.7.11"
       }
     },
     "@webassemblyjs/wasm-parser": {
-      "version": "1.7.10",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-parser/-/wasm-parser-1.7.10.tgz",
-      "integrity": "sha512-AEv8mkXVK63n/iDR3T693EzoGPnNAwKwT3iHmKJNBrrALAhhEjuPzo/lTE4U7LquEwyvg5nneSNdTdgrBaGJcA==",
+      "version": "1.7.11",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-parser/-/wasm-parser-1.7.11.tgz",
+      "integrity": "sha512-6lmXRTrrZjYD8Ng8xRyvyXQJYUQKYSXhJqXOBLw24rdiXsHAOlvw5PhesjdcaMadU/pyPQOJ5dHreMjBxwnQKg==",
       "dev": true,
       "requires": {
-        "@webassemblyjs/ast": "1.7.10",
-        "@webassemblyjs/helper-api-error": "1.7.10",
-        "@webassemblyjs/helper-wasm-bytecode": "1.7.10",
-        "@webassemblyjs/ieee754": "1.7.10",
-        "@webassemblyjs/leb128": "1.7.10",
-        "@webassemblyjs/utf8": "1.7.10"
+        "@webassemblyjs/ast": "1.7.11",
+        "@webassemblyjs/helper-api-error": "1.7.11",
+        "@webassemblyjs/helper-wasm-bytecode": "1.7.11",
+        "@webassemblyjs/ieee754": "1.7.11",
+        "@webassemblyjs/leb128": "1.7.11",
+        "@webassemblyjs/utf8": "1.7.11"
       }
     },
     "@webassemblyjs/wast-parser": {
-      "version": "1.7.10",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-parser/-/wast-parser-1.7.10.tgz",
-      "integrity": "sha512-YTPEtOBljkCL0VjDp4sHe22dAYSm3ZwdJ9+2NTGdtC7ayNvuip1wAhaAS8Zt9Q6SW9E5Jf5PX7YE3XWlrzR9cw==",
+      "version": "1.7.11",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-parser/-/wast-parser-1.7.11.tgz",
+      "integrity": "sha512-lEyVCg2np15tS+dm7+JJTNhNWq9yTZvi3qEhAIIOaofcYlUp0UR5/tVqOwa/gXYr3gjwSZqw+/lS9dscyLelbQ==",
       "dev": true,
       "requires": {
-        "@webassemblyjs/ast": "1.7.10",
-        "@webassemblyjs/floating-point-hex-parser": "1.7.10",
-        "@webassemblyjs/helper-api-error": "1.7.10",
-        "@webassemblyjs/helper-code-frame": "1.7.10",
-        "@webassemblyjs/helper-fsm": "1.7.10",
+        "@webassemblyjs/ast": "1.7.11",
+        "@webassemblyjs/floating-point-hex-parser": "1.7.11",
+        "@webassemblyjs/helper-api-error": "1.7.11",
+        "@webassemblyjs/helper-code-frame": "1.7.11",
+        "@webassemblyjs/helper-fsm": "1.7.11",
         "@xtuc/long": "4.2.1"
       }
     },
     "@webassemblyjs/wast-printer": {
-      "version": "1.7.10",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-printer/-/wast-printer-1.7.10.tgz",
-      "integrity": "sha512-mJ3QKWtCchL1vhU/kZlJnLPuQZnlDOdZsyP0bbLWPGdYsQDnSBvyTLhzwBA3QAMlzEL9V4JHygEmK6/OTEyytA==",
+      "version": "1.7.11",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-printer/-/wast-printer-1.7.11.tgz",
+      "integrity": "sha512-m5vkAsuJ32QpkdkDOUPGSltrg8Cuk3KBx4YrmAGQwCZPRdUHXxG4phIOuuycLemHFr74sWL9Wthqss4fzdzSwg==",
       "dev": true,
       "requires": {
-        "@webassemblyjs/ast": "1.7.10",
-        "@webassemblyjs/wast-parser": "1.7.10",
+        "@webassemblyjs/ast": "1.7.11",
+        "@webassemblyjs/wast-parser": "1.7.11",
         "@xtuc/long": "4.2.1"
       }
     },
@@ -1532,13 +1634,10 @@
       "dev": true
     },
     "acorn-dynamic-import": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/acorn-dynamic-import/-/acorn-dynamic-import-3.0.0.tgz",
-      "integrity": "sha512-zVWV8Z8lislJoOKKqdNMOB+s6+XV5WERty8MnKBeFgwA+19XJjJHs2RP5dzM57FftIs+jQnRToLiWazKr6sSWg==",
-      "dev": true,
-      "requires": {
-        "acorn": "^5.0.0"
-      }
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/acorn-dynamic-import/-/acorn-dynamic-import-4.0.0.tgz",
+      "integrity": "sha512-d3OEjQV4ROpoflsnUA8HozoIR504TFxNivYEUi6uwz0IYhBkTDXGuWlNdMtybRt3nqVx/L6XqMt0FxkXuWKZhw==",
+      "dev": true
     },
     "acorn-globals": {
       "version": "4.3.0",
@@ -1551,17 +1650,17 @@
       },
       "dependencies": {
         "acorn": {
-          "version": "6.0.2",
-          "resolved": "https://registry.npmjs.org/acorn/-/acorn-6.0.2.tgz",
-          "integrity": "sha512-GXmKIvbrN3TV7aVqAzVFaMW8F8wzVX7voEBRO3bDA64+EX37YSayggRJP5Xig6HYHBkWKpFg9W5gg6orklubhg==",
+          "version": "6.0.5",
+          "resolved": "https://registry.npmjs.org/acorn/-/acorn-6.0.5.tgz",
+          "integrity": "sha512-i33Zgp3XWtmZBMNvCr4azvOFeWVw1Rk6p3hfi3LUDvIFraOMywb1kAtrbi+med14m4Xfpqm3zRZMT+c0FNE7kg==",
           "dev": true
         }
       }
     },
     "acorn-walk": {
-      "version": "6.1.0",
-      "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-6.1.0.tgz",
-      "integrity": "sha512-ugTb7Lq7u4GfWSqqpwE0bGyoBZNMTok/zDBXxfEG0QM50jNlGhIWjRC1pPN7bvV1anhF+bs+/gNcRw+o55Evbg==",
+      "version": "6.1.1",
+      "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-6.1.1.tgz",
+      "integrity": "sha512-OtUw6JUTgxA2QoqqmrmQ7F2NYqiBPi/L2jqHyFtllhOUvXYQXf0Z1CYUinIfyT4bTCGmrA7gX9FvHA81uzCoVw==",
       "dev": true
     },
     "agent-base": {
@@ -1583,34 +1682,37 @@
       }
     },
     "ajv": {
-      "version": "5.5.2",
-      "resolved": "https://registry.npmjs.org/ajv/-/ajv-5.5.2.tgz",
-      "integrity": "sha1-c7Xuyj+rZT49P5Qis0GtQiBdyWU=",
+      "version": "6.7.0",
+      "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.7.0.tgz",
+      "integrity": "sha512-RZXPviBTtfmtka9n9sy1N5M5b82CbxWIR6HIis4s3WQTXDJamc/0gpCWNGz6EWdWp4DOfjzJfhz/AS9zVPjjWg==",
       "dev": true,
       "requires": {
-        "co": "^4.6.0",
-        "fast-deep-equal": "^1.0.0",
+        "fast-deep-equal": "^2.0.1",
         "fast-json-stable-stringify": "^2.0.0",
-        "json-schema-traverse": "^0.3.0"
+        "json-schema-traverse": "^0.4.1",
+        "uri-js": "^4.2.2"
       }
     },
     "ajv-errors": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/ajv-errors/-/ajv-errors-1.0.0.tgz",
-      "integrity": "sha1-7PAh+hCP0X37Xms4Py3SM+Mf/Fk=",
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/ajv-errors/-/ajv-errors-1.0.1.tgz",
+      "integrity": "sha512-DCRfO/4nQ+89p/RK43i8Ezd41EqdGIU4ld7nGF8OQ14oc/we5rEntLCUa7+jrn3nn83BosfwZA0wb4pon2o8iQ==",
       "dev": true
     },
     "ajv-keywords": {
-      "version": "3.2.0",
-      "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.2.0.tgz",
-      "integrity": "sha1-6GuBnGAs+IIa1jdBNpjx3sAhhHo=",
+      "version": "3.3.0",
+      "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.3.0.tgz",
+      "integrity": "sha512-CMzN9S62ZOO4sA/mJZIO4S++ZM7KFWzH3PPWkveLhy4OZ9i1/VatgwWMD46w/XbGCBy7Ye0gCk+Za6mmyfKK7g==",
       "dev": true
     },
     "ansi-colors": {
-      "version": "2.0.5",
-      "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-2.0.5.tgz",
-      "integrity": "sha512-yAdfUZ+c2wetVNIFsNRn44THW+Lty6S5TwMpUfLA/UaGhiXbBv/F8E60/1hMLd0cnF/CDoWH8vzVaI5bAcHCjw==",
-      "dev": true
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-1.1.0.tgz",
+      "integrity": "sha512-SFKX67auSNoVR38N3L+nvsPjOE0bybKTYbkf5tRvushrAPQ9V75huw0ZxBkKVeRU9kqH3d6HA4xTckbwZ4ixmA==",
+      "dev": true,
+      "requires": {
+        "ansi-wrap": "^0.1.0"
+      }
     },
     "ansi-escapes": {
       "version": "3.1.0",
@@ -1647,20 +1749,14 @@
       "integrity": "sha1-qCJQ3bABXponyoLoLqYDu/pF768=",
       "dev": true
     },
-    "any-observable": {
-      "version": "0.3.0",
-      "resolved": "https://registry.npmjs.org/any-observable/-/any-observable-0.3.0.tgz",
-      "integrity": "sha512-/FQM1EDkTsf63Ub2C6O7GuYFDsSXUwsaZDurV0np41ocwq0jthUAYCmhBX9f+KwlaCgIuWyr/4WlUQUBfKfZog==",
-      "dev": true
-    },
     "anymatch": {
-      "version": "1.3.2",
-      "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-1.3.2.tgz",
-      "integrity": "sha512-0XNayC8lTHQ2OI8aljNCN3sSx6hsr/1+rlcDAotXJR7C1oZZHCNsfpbKwMjRA3Uqb5tF1Rae2oloTr4xpq+WjA==",
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-2.0.0.tgz",
+      "integrity": "sha512-5teOsQWABXHHBFP9y3skS5P3d/WfWXpv3FUpy+LorMrNYaT9pI4oLMQX7jzQ2KklNpGpWHzdCXTDT2Y3XGlZBw==",
       "dev": true,
       "requires": {
-        "micromatch": "^2.1.5",
-        "normalize-path": "^2.0.0"
+        "micromatch": "^3.1.4",
+        "normalize-path": "^2.1.1"
       }
     },
     "append-buffer": {
@@ -1703,6 +1799,12 @@
         "readable-stream": "^2.0.6"
       }
     },
+    "arg": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.0.tgz",
+      "integrity": "sha512-ZWc51jO3qegGkVh8Hwpv636EkbesNV5ZNQPCtRa+0qytRYPEs9IYT9qITY9buezqUH5uqyzlWLcufrzU2rffdg==",
+      "dev": true
+    },
     "argparse": {
       "version": "1.0.10",
       "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz",
@@ -1722,13 +1824,10 @@
       }
     },
     "arr-diff": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-2.0.0.tgz",
-      "integrity": "sha1-jzuCf5Vai9ZpaX5KQlasPOrjVs8=",
-      "dev": true,
-      "requires": {
-        "arr-flatten": "^1.0.1"
-      }
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz",
+      "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=",
+      "dev": true
     },
     "arr-filter": {
       "version": "1.1.2",
@@ -1892,9 +1991,9 @@
       "dev": true
     },
     "array-unique": {
-      "version": "0.2.1",
-      "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.2.1.tgz",
-      "integrity": "sha1-odl8yvy8JiXMcPrc6zalDFiwGlM=",
+      "version": "0.3.2",
+      "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz",
+      "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=",
       "dev": true
     },
     "arrify": {
@@ -1946,7 +2045,7 @@
         },
         "util": {
           "version": "0.10.3",
-          "resolved": "http://registry.npmjs.org/util/-/util-0.10.3.tgz",
+          "resolved": "https://registry.npmjs.org/util/-/util-0.10.3.tgz",
           "integrity": "sha1-evsa/lCAUkZInj23/g7TeTNqwPk=",
           "dev": true,
           "requires": {
@@ -1992,14 +2091,6 @@
         "once": "^1.3.2",
         "process-nextick-args": "^1.0.7",
         "stream-exhaust": "^1.0.1"
-      },
-      "dependencies": {
-        "process-nextick-args": {
-          "version": "1.0.7",
-          "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-1.0.7.tgz",
-          "integrity": "sha1-FQ4gt1ZZCtP5EJPyWk8q2L/zC6M=",
-          "dev": true
-        }
       }
     },
     "async-each": {
@@ -2066,7 +2157,7 @@
         },
         "chalk": {
           "version": "1.1.3",
-          "resolved": "http://registry.npmjs.org/chalk/-/chalk-1.1.3.tgz",
+          "resolved": "https://registry.npmjs.org/chalk/-/chalk-1.1.3.tgz",
           "integrity": "sha1-qBFcVeSnAv5NFQq9OHKCKn4J/Jg=",
           "dev": true,
           "requires": {
@@ -2159,7 +2250,7 @@
     },
     "babel-plugin-istanbul": {
       "version": "4.1.6",
-      "resolved": "http://registry.npmjs.org/babel-plugin-istanbul/-/babel-plugin-istanbul-4.1.6.tgz",
+      "resolved": "https://registry.npmjs.org/babel-plugin-istanbul/-/babel-plugin-istanbul-4.1.6.tgz",
       "integrity": "sha512-PWP9FQ1AhZhS01T/4qLSKoHGY/xvkZdVBGlKM/HuxxS3+sC66HhTNR7+MpbO/so/cz/wY94MeSWJuP1hXIPfwQ==",
       "dev": true,
       "requires": {
@@ -2167,6 +2258,17 @@
         "find-up": "^2.1.0",
         "istanbul-lib-instrument": "^1.10.1",
         "test-exclude": "^4.2.1"
+      },
+      "dependencies": {
+        "find-up": {
+          "version": "2.1.0",
+          "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz",
+          "integrity": "sha1-RdG35QbHF93UgndaK3eSCjwMV6c=",
+          "dev": true,
+          "requires": {
+            "locate-path": "^2.0.0"
+          }
+        }
       }
     },
     "babel-plugin-jest-hoist": {
@@ -2177,32 +2279,10 @@
     },
     "babel-plugin-syntax-object-rest-spread": {
       "version": "6.13.0",
-      "resolved": "http://registry.npmjs.org/babel-plugin-syntax-object-rest-spread/-/babel-plugin-syntax-object-rest-spread-6.13.0.tgz",
+      "resolved": "https://registry.npmjs.org/babel-plugin-syntax-object-rest-spread/-/babel-plugin-syntax-object-rest-spread-6.13.0.tgz",
       "integrity": "sha1-/WU28rzhODb/o6VFjEkDpZe7O/U=",
       "dev": true
     },
-    "babel-plugin-transform-es2015-modules-commonjs": {
-      "version": "6.26.2",
-      "resolved": "https://registry.npmjs.org/babel-plugin-transform-es2015-modules-commonjs/-/babel-plugin-transform-es2015-modules-commonjs-6.26.2.tgz",
-      "integrity": "sha512-CV9ROOHEdrjcwhIaJNBGMBCodN+1cfkwtM1SbUHmvyy35KGT7fohbpOxkE2uLz1o6odKK2Ck/tz47z+VqQfi9Q==",
-      "dev": true,
-      "requires": {
-        "babel-plugin-transform-strict-mode": "^6.24.1",
-        "babel-runtime": "^6.26.0",
-        "babel-template": "^6.26.0",
-        "babel-types": "^6.26.0"
-      }
-    },
-    "babel-plugin-transform-strict-mode": {
-      "version": "6.24.1",
-      "resolved": "https://registry.npmjs.org/babel-plugin-transform-strict-mode/-/babel-plugin-transform-strict-mode-6.24.1.tgz",
-      "integrity": "sha1-1fr3qleKZbvlkc9e2uBKDGcCB1g=",
-      "dev": true,
-      "requires": {
-        "babel-runtime": "^6.22.0",
-        "babel-types": "^6.24.1"
-      }
-    },
     "babel-preset-jest": {
       "version": "23.2.0",
       "resolved": "https://registry.npmjs.org/babel-preset-jest/-/babel-preset-jest-23.2.0.tgz",
@@ -2361,18 +2441,6 @@
             "is-data-descriptor": "^1.0.0",
             "kind-of": "^6.0.2"
           }
-        },
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
-        },
-        "kind-of": {
-          "version": "6.0.2",
-          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz",
-          "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==",
-          "dev": true
         }
       }
     },
@@ -2391,12 +2459,6 @@
         "tweetnacl": "^0.14.3"
       }
     },
-    "beeper": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/beeper/-/beeper-1.1.1.tgz",
-      "integrity": "sha1-5tXqjF2tABMEpwsiY4RH9pyy+Ak=",
-      "dev": true
-    },
     "benchmark": {
       "version": "2.1.4",
       "resolved": "https://registry.npmjs.org/benchmark/-/benchmark-2.1.4.tgz",
@@ -2408,11 +2470,24 @@
       }
     },
     "big.js": {
-      "version": "3.2.0",
-      "resolved": "https://registry.npmjs.org/big.js/-/big.js-3.2.0.tgz",
-      "integrity": "sha512-+hN/Zh2D08Mx65pZ/4g5bsmNiZUuChDiQfTUQ7qJr4/kuopCr88xZsAXv6mBoZEsUI4OuGHlX59qE94K2mMW8Q==",
+      "version": "5.2.2",
+      "resolved": "https://registry.npmjs.org/big.js/-/big.js-5.2.2.tgz",
+      "integrity": "sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ==",
       "dev": true
     },
+    "bin-links": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/bin-links/-/bin-links-1.1.2.tgz",
+      "integrity": "sha512-8eEHVgYP03nILphilltWjeIjMbKyJo3wvp9K816pHbhP301ismzw15mxAAEVQ/USUwcP++1uNrbERbp8lOA6Fg==",
+      "dev": true,
+      "requires": {
+        "bluebird": "^3.5.0",
+        "cmd-shim": "^2.0.2",
+        "gentle-fs": "^2.0.0",
+        "graceful-fs": "^4.1.11",
+        "write-file-atomic": "^2.3.0"
+      }
+    },
     "binary-extensions": {
       "version": "1.12.0",
       "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-1.12.0.tgz",
@@ -2429,9 +2504,9 @@
       }
     },
     "bluebird": {
-      "version": "3.5.2",
-      "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.5.2.tgz",
-      "integrity": "sha512-dhHTWMI7kMx5whMQntl7Vr9C6BvV10lFXDAasnqnrMYhXVCzzk6IO9Fo2L75jXHT07WrOngL1WDXOp+yYS91Yg==",
+      "version": "3.5.3",
+      "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.5.3.tgz",
+      "integrity": "sha512-/qKPUQlaW1OyR51WeCPBvRnAlnZFUJkCSG5HzGnuIqhgyJtF+T94lFnn33eiazjRm2LAHVy2guNnaq48X9SJuw==",
       "dev": true
     },
     "bn.js": {
@@ -2451,14 +2526,32 @@
       }
     },
     "braces": {
-      "version": "1.8.5",
-      "resolved": "https://registry.npmjs.org/braces/-/braces-1.8.5.tgz",
-      "integrity": "sha1-uneWLhLf+WnWt2cR6RS3N4V79qc=",
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz",
+      "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==",
       "dev": true,
       "requires": {
-        "expand-range": "^1.8.1",
-        "preserve": "^0.2.0",
-        "repeat-element": "^1.1.2"
+        "arr-flatten": "^1.1.0",
+        "array-unique": "^0.3.2",
+        "extend-shallow": "^2.0.1",
+        "fill-range": "^4.0.0",
+        "isobject": "^3.0.1",
+        "repeat-element": "^1.1.2",
+        "snapdragon": "^0.8.1",
+        "snapdragon-node": "^2.0.1",
+        "split-string": "^3.0.2",
+        "to-regex": "^3.0.1"
+      },
+      "dependencies": {
+        "extend-shallow": {
+          "version": "2.0.1",
+          "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
+          "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
+          "dev": true,
+          "requires": {
+            "is-extendable": "^0.1.0"
+          }
+        }
       }
     },
     "brorand": {
@@ -2492,7 +2585,7 @@
     },
     "browserify-aes": {
       "version": "1.2.0",
-      "resolved": "http://registry.npmjs.org/browserify-aes/-/browserify-aes-1.2.0.tgz",
+      "resolved": "https://registry.npmjs.org/browserify-aes/-/browserify-aes-1.2.0.tgz",
       "integrity": "sha512-+7CHXqGuspUn/Sl5aO7Ea0xWGAtETPXNSAjHo48JfLdPWcMng33Xe4znFvQweqc/uzk5zSOI3H52CYnjCfb5hA==",
       "dev": true,
       "requires": {
@@ -2529,7 +2622,7 @@
     },
     "browserify-rsa": {
       "version": "4.0.1",
-      "resolved": "http://registry.npmjs.org/browserify-rsa/-/browserify-rsa-4.0.1.tgz",
+      "resolved": "https://registry.npmjs.org/browserify-rsa/-/browserify-rsa-4.0.1.tgz",
       "integrity": "sha1-IeCr+vbyApzy+vsTNWenAdQTVSQ=",
       "dev": true,
       "requires": {
@@ -2561,6 +2654,15 @@
         "pako": "~1.0.5"
       }
     },
+    "bs-logger": {
+      "version": "0.2.6",
+      "resolved": "https://registry.npmjs.org/bs-logger/-/bs-logger-0.2.6.tgz",
+      "integrity": "sha512-pd8DCoxmbgc7hyPKOvxtqNcjYoOsABPQdcCUjGp3d42VR2CX1ORhk2A87oqqu5R1kk+76nsxZupkmyd+MVtCog==",
+      "dev": true,
+      "requires": {
+        "fast-json-stable-stringify": "2.x"
+      }
+    },
     "bser": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/bser/-/bser-2.0.0.tgz",
@@ -2572,7 +2674,7 @@
     },
     "buffer": {
       "version": "4.9.1",
-      "resolved": "http://registry.npmjs.org/buffer/-/buffer-4.9.1.tgz",
+      "resolved": "https://registry.npmjs.org/buffer/-/buffer-4.9.1.tgz",
       "integrity": "sha1-bRu2AbB6TvztlwlBMgkwJ8lbwpg=",
       "dev": true,
       "requires": {
@@ -2630,32 +2732,47 @@
       "dev": true
     },
     "cacache": {
-      "version": "11.2.0",
-      "resolved": "https://registry.npmjs.org/cacache/-/cacache-11.2.0.tgz",
-      "integrity": "sha512-IFWl6lfK6wSeYCHUXh+N1lY72UDrpyrYQJNIVQf48paDuWbv5RbAtJYf/4gUQFObTCHZwdZ5sI8Iw7nqwP6nlQ==",
+      "version": "11.3.2",
+      "resolved": "https://registry.npmjs.org/cacache/-/cacache-11.3.2.tgz",
+      "integrity": "sha512-E0zP4EPGDOaT2chM08Als91eYnf8Z+eH1awwwVsngUmgppfM5jjJ8l3z5vO5p5w/I3LsiXawb1sW0VY65pQABg==",
       "dev": true,
       "requires": {
-        "bluebird": "^3.5.1",
-        "chownr": "^1.0.1",
-        "figgy-pudding": "^3.1.0",
-        "glob": "^7.1.2",
-        "graceful-fs": "^4.1.11",
-        "lru-cache": "^4.1.3",
+        "bluebird": "^3.5.3",
+        "chownr": "^1.1.1",
+        "figgy-pudding": "^3.5.1",
+        "glob": "^7.1.3",
+        "graceful-fs": "^4.1.15",
+        "lru-cache": "^5.1.1",
         "mississippi": "^3.0.0",
         "mkdirp": "^0.5.1",
         "move-concurrently": "^1.0.1",
         "promise-inflight": "^1.0.1",
         "rimraf": "^2.6.2",
-        "ssri": "^6.0.0",
-        "unique-filename": "^1.1.0",
+        "ssri": "^6.0.1",
+        "unique-filename": "^1.1.1",
         "y18n": "^4.0.0"
       },
       "dependencies": {
+        "lru-cache": {
+          "version": "5.1.1",
+          "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz",
+          "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==",
+          "dev": true,
+          "requires": {
+            "yallist": "^3.0.2"
+          }
+        },
         "y18n": {
           "version": "4.0.0",
           "resolved": "https://registry.npmjs.org/y18n/-/y18n-4.0.0.tgz",
           "integrity": "sha512-r9S/ZyXu/Xu9q1tYlpsLIsa3EeLXXk0VwlxqTcFRfg9EhMW+17kbt9G0NrgCmhGb5vT2hyhJZLfDGx+7+5Uj/w==",
           "dev": true
+        },
+        "yallist": {
+          "version": "3.0.3",
+          "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.0.3.tgz",
+          "integrity": "sha512-S+Zk8DEWE6oKpV+vI3qWkaK+jSbIK86pCwe2IF/xwIpQ8jEuxpw9NyaGjmp9+BoJv5FV2piqCDcoCtStppiq2A==",
+          "dev": true
         }
       }
     },
@@ -2674,14 +2791,6 @@
         "to-object-path": "^0.3.0",
         "union-value": "^1.0.0",
         "unset-value": "^1.0.0"
-      },
-      "dependencies": {
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
-        }
       }
     },
     "call-me-maybe": {
@@ -2690,6 +2799,24 @@
       "integrity": "sha1-JtII6onje1y95gJQoV8DHBak1ms=",
       "dev": true
     },
+    "caller-callsite": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/caller-callsite/-/caller-callsite-2.0.0.tgz",
+      "integrity": "sha1-hH4PzgoiN1CpoCfFSzNzGtMVQTQ=",
+      "dev": true,
+      "requires": {
+        "callsites": "^2.0.0"
+      }
+    },
+    "caller-path": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/caller-path/-/caller-path-2.0.0.tgz",
+      "integrity": "sha1-Ro+DBE42mrIBD6xfBs7uFbsssfQ=",
+      "dev": true,
+      "requires": {
+        "caller-callsite": "^2.0.0"
+      }
+    },
     "callsites": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/callsites/-/callsites-2.0.0.tgz",
@@ -2697,9 +2824,9 @@
       "dev": true
     },
     "camelcase": {
-      "version": "2.1.1",
-      "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-2.1.1.tgz",
-      "integrity": "sha1-fB0W1nmhu+WcoCys7PsBHiAfWh8=",
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-3.0.0.tgz",
+      "integrity": "sha1-MvxLn82vhF/N9+c7uXysImHwqwo=",
       "dev": true
     },
     "camelcase-keys": {
@@ -2737,9 +2864,9 @@
       "dev": true
     },
     "chalk": {
-      "version": "2.4.1",
-      "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.1.tgz",
-      "integrity": "sha512-ObN6h1v2fTJSmUXoS3nMQ92LbDK9be4TV+6G+omQlGJFdcUX5heKi1LZ1YnRMIgwTLEj3E24bT6tYni50rlCfQ==",
+      "version": "2.4.2",
+      "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz",
+      "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==",
       "requires": {
         "ansi-styles": "^3.2.1",
         "escape-string-regexp": "^1.0.5",
@@ -2753,20 +2880,24 @@
       "dev": true
     },
     "chokidar": {
-      "version": "1.7.0",
-      "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-1.7.0.tgz",
-      "integrity": "sha1-eY5ol3gVHIB2tLNg5e3SjNortGg=",
+      "version": "2.0.4",
+      "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-2.0.4.tgz",
+      "integrity": "sha512-z9n7yt9rOvIJrMhvDtDictKrkFHeihkNl6uWMmZlmL6tJtX9Cs+87oK+teBx+JIgzvbX3yZHT3eF8vpbDxHJXQ==",
       "dev": true,
       "requires": {
-        "anymatch": "^1.3.0",
+        "anymatch": "^2.0.0",
         "async-each": "^1.0.0",
-        "fsevents": "^1.0.0",
-        "glob-parent": "^2.0.0",
+        "braces": "^2.3.0",
+        "fsevents": "^1.2.2",
+        "glob-parent": "^3.1.0",
         "inherits": "^2.0.1",
         "is-binary-path": "^1.0.0",
-        "is-glob": "^2.0.0",
+        "is-glob": "^4.0.0",
+        "lodash.debounce": "^4.0.8",
+        "normalize-path": "^2.1.1",
         "path-is-absolute": "^1.0.0",
-        "readdirp": "^2.0.0"
+        "readdirp": "^2.0.0",
+        "upath": "^1.0.5"
       }
     },
     "chownr": {
@@ -2820,12 +2951,6 @@
           "requires": {
             "is-descriptor": "^0.1.0"
           }
-        },
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
         }
       }
     },
@@ -2838,16 +2963,6 @@
         "restore-cursor": "^2.0.0"
       }
     },
-    "cli-truncate": {
-      "version": "0.2.1",
-      "resolved": "https://registry.npmjs.org/cli-truncate/-/cli-truncate-0.2.1.tgz",
-      "integrity": "sha1-nxXPuwcFAFNpIWxiasfQWrkN1XQ=",
-      "dev": true,
-      "requires": {
-        "slice-ansi": "0.0.4",
-        "string-width": "^1.0.1"
-      }
-    },
     "cli-width": {
       "version": "2.2.0",
       "resolved": "https://registry.npmjs.org/cli-width/-/cli-width-2.2.0.tgz",
@@ -2892,6 +3007,14 @@
         "inherits": "^2.0.1",
         "process-nextick-args": "^2.0.0",
         "readable-stream": "^2.3.5"
+      },
+      "dependencies": {
+        "process-nextick-args": {
+          "version": "2.0.0",
+          "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.0.tgz",
+          "integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==",
+          "dev": true
+        }
       }
     },
     "cmd-shim": {
@@ -2925,17 +3048,6 @@
         "arr-map": "^2.0.2",
         "for-own": "^1.0.0",
         "make-iterator": "^1.0.0"
-      },
-      "dependencies": {
-        "for-own": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/for-own/-/for-own-1.0.0.tgz",
-          "integrity": "sha1-xjMy9BXO3EsE2/5wz4NklMU8tEs=",
-          "dev": true,
-          "requires": {
-            "for-in": "^1.0.1"
-          }
-        }
       }
     },
     "collection-visit": {
@@ -3098,9 +3210,9 @@
       "dev": true
     },
     "conventional-changelog-angular": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/conventional-changelog-angular/-/conventional-changelog-angular-5.0.1.tgz",
-      "integrity": "sha512-q4ylJ68fWZDdrFC9z4zKcf97HW6hp7Mo2YlqD4owfXhecFKy/PJCU/1oVFF4TqochchChqmZ0Vb0e0g8/MKNlA==",
+      "version": "5.0.2",
+      "resolved": "https://registry.npmjs.org/conventional-changelog-angular/-/conventional-changelog-angular-5.0.2.tgz",
+      "integrity": "sha512-yx7m7lVrXmt4nKWQgWZqxSALEiAKZhOAcbxdUaU9575mB0CzXVbgrgpfSnSP7OqWDUTYGD0YVJ0MSRdyOPgAwA==",
       "dev": true,
       "requires": {
         "compare-func": "^1.3.1",
@@ -3108,48 +3220,109 @@
       }
     },
     "conventional-changelog-core": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/conventional-changelog-core/-/conventional-changelog-core-3.1.0.tgz",
-      "integrity": "sha512-bcZkcFXkqVgG2W8m/1wjlp2wn/BKDcrPgw3/mvSEQtzs8Pax8JbAPFpEQReHY92+EKNNXC67wLA8y2xcNx0rDA==",
+      "version": "3.1.5",
+      "resolved": "https://registry.npmjs.org/conventional-changelog-core/-/conventional-changelog-core-3.1.5.tgz",
+      "integrity": "sha512-iwqAotS4zk0wA4S84YY1JCUG7X3LxaRjJxuUo6GI4dZuIy243j5nOg/Ora35ExT4DOiw5dQbMMQvw2SUjh6moQ==",
       "dev": true,
       "requires": {
-        "conventional-changelog-writer": "^4.0.0",
-        "conventional-commits-parser": "^3.0.0",
+        "conventional-changelog-writer": "^4.0.2",
+        "conventional-commits-parser": "^3.0.1",
         "dateformat": "^3.0.0",
         "get-pkg-repo": "^1.0.0",
-        "git-raw-commits": "^2.0.0",
+        "git-raw-commits": "2.0.0",
         "git-remote-origin-url": "^2.0.0",
-        "git-semver-tags": "^2.0.0",
+        "git-semver-tags": "^2.0.2",
         "lodash": "^4.2.1",
         "normalize-package-data": "^2.3.5",
         "q": "^1.5.1",
-        "read-pkg": "^1.1.0",
-        "read-pkg-up": "^1.0.1",
+        "read-pkg": "^3.0.0",
+        "read-pkg-up": "^3.0.0",
         "through2": "^2.0.0"
       },
       "dependencies": {
-        "dateformat": {
-          "version": "3.0.3",
-          "resolved": "https://registry.npmjs.org/dateformat/-/dateformat-3.0.3.tgz",
-          "integrity": "sha512-jyCETtSl3VMZMWeRo7iY1FL19ges1t55hMo5yaam4Jrsm5EPL89UQkoQRyiI+Yf4k8r2ZpdngkV8hr1lIdjb3Q==",
-          "dev": true
-        }
-      }
-    },
-    "conventional-changelog-preset-loader": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/conventional-changelog-preset-loader/-/conventional-changelog-preset-loader-2.0.1.tgz",
-      "integrity": "sha512-HiSfhXNzAzG9klIqJaA97MMiNBR4js+53g4Px0k7tgKeCNVXmrDrm+CY+nIqcmG5NVngEPf8rAr7iji1TWW7zg==",
-      "dev": true
-    },
-    "conventional-changelog-writer": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/conventional-changelog-writer/-/conventional-changelog-writer-4.0.0.tgz",
-      "integrity": "sha512-hMZPe0AQ6Bi05epeK/7hz80xxk59nPA5z/b63TOHq2wigM0/akreOc8N4Jam5b9nFgKWX1e9PdPv2ewgW6bcfg==",
-      "dev": true,
-      "requires": {
-        "compare-func": "^1.3.1",
-        "conventional-commits-filter": "^2.0.0",
+        "find-up": {
+          "version": "2.1.0",
+          "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz",
+          "integrity": "sha1-RdG35QbHF93UgndaK3eSCjwMV6c=",
+          "dev": true,
+          "requires": {
+            "locate-path": "^2.0.0"
+          }
+        },
+        "load-json-file": {
+          "version": "4.0.0",
+          "resolved": "https://registry.npmjs.org/load-json-file/-/load-json-file-4.0.0.tgz",
+          "integrity": "sha1-L19Fq5HjMhYjT9U62rZo607AmTs=",
+          "dev": true,
+          "requires": {
+            "graceful-fs": "^4.1.2",
+            "parse-json": "^4.0.0",
+            "pify": "^3.0.0",
+            "strip-bom": "^3.0.0"
+          }
+        },
+        "parse-json": {
+          "version": "4.0.0",
+          "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-4.0.0.tgz",
+          "integrity": "sha1-vjX1Qlvh9/bHRxhPmKeIy5lHfuA=",
+          "dev": true,
+          "requires": {
+            "error-ex": "^1.3.1",
+            "json-parse-better-errors": "^1.0.1"
+          }
+        },
+        "path-type": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/path-type/-/path-type-3.0.0.tgz",
+          "integrity": "sha512-T2ZUsdZFHgA3u4e5PfPbjd7HDDpxPnQb5jN0SrDsjNSuVXHJqtwTnWqG0B1jZrgmJ/7lj1EmVIByWt1gxGkWvg==",
+          "dev": true,
+          "requires": {
+            "pify": "^3.0.0"
+          }
+        },
+        "read-pkg": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/read-pkg/-/read-pkg-3.0.0.tgz",
+          "integrity": "sha1-nLxoaXj+5l0WwA4rGcI3/Pbjg4k=",
+          "dev": true,
+          "requires": {
+            "load-json-file": "^4.0.0",
+            "normalize-package-data": "^2.3.2",
+            "path-type": "^3.0.0"
+          }
+        },
+        "read-pkg-up": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/read-pkg-up/-/read-pkg-up-3.0.0.tgz",
+          "integrity": "sha1-PtSWaF26D4/hGNBpHcUfSh/5bwc=",
+          "dev": true,
+          "requires": {
+            "find-up": "^2.0.0",
+            "read-pkg": "^3.0.0"
+          }
+        },
+        "strip-bom": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz",
+          "integrity": "sha1-IzTBjpx1n3vdVv3vfprj1YjmjtM=",
+          "dev": true
+        }
+      }
+    },
+    "conventional-changelog-preset-loader": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/conventional-changelog-preset-loader/-/conventional-changelog-preset-loader-2.0.2.tgz",
+      "integrity": "sha512-pBY+qnUoJPXAXXqVGwQaVmcye05xi6z231QM98wHWamGAmu/ghkBprQAwmF5bdmyobdVxiLhPY3PrCfSeUNzRQ==",
+      "dev": true
+    },
+    "conventional-changelog-writer": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/conventional-changelog-writer/-/conventional-changelog-writer-4.0.2.tgz",
+      "integrity": "sha512-d8/FQY/fix2xXEBUhOo8u3DCbyEw3UOQgYHxLsPDw+wHUDma/GQGAGsGtoH876WyNs32fViHmTOUrgRKVLvBug==",
+      "dev": true,
+      "requires": {
+        "compare-func": "^1.3.1",
+        "conventional-commits-filter": "^2.0.1",
         "dateformat": "^3.0.0",
         "handlebars": "^4.0.2",
         "json-stringify-safe": "^5.0.1",
@@ -3158,20 +3331,12 @@
         "semver": "^5.5.0",
         "split": "^1.0.0",
         "through2": "^2.0.0"
-      },
-      "dependencies": {
-        "dateformat": {
-          "version": "3.0.3",
-          "resolved": "https://registry.npmjs.org/dateformat/-/dateformat-3.0.3.tgz",
-          "integrity": "sha512-jyCETtSl3VMZMWeRo7iY1FL19ges1t55hMo5yaam4Jrsm5EPL89UQkoQRyiI+Yf4k8r2ZpdngkV8hr1lIdjb3Q==",
-          "dev": true
-        }
       }
     },
     "conventional-commits-filter": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/conventional-commits-filter/-/conventional-commits-filter-2.0.0.tgz",
-      "integrity": "sha512-Cfl0j1/NquB/TMVx7Wrmyq7uRM+/rPQbtVVGwzfkhZ6/yH6fcMmP0Q/9044TBZPTNdGzm46vXFXL14wbET0/Mg==",
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/conventional-commits-filter/-/conventional-commits-filter-2.0.1.tgz",
+      "integrity": "sha512-92OU8pz/977udhBjgPEbg3sbYzIxMDFTlQT97w7KdhR9igNqdJvy8smmedAAgn4tPiqseFloKkrVfbXCVd+E7A==",
       "dev": true,
       "requires": {
         "is-subset": "^0.1.1",
@@ -3179,9 +3344,9 @@
       }
     },
     "conventional-commits-parser": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/conventional-commits-parser/-/conventional-commits-parser-3.0.0.tgz",
-      "integrity": "sha512-GWh71U26BLWgMykCp+VghZ4s64wVbtseECcKQ/PvcPZR2cUnz+FUc2J9KjxNl7/ZbCxST8R03c9fc+Vi0umS9Q==",
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/conventional-commits-parser/-/conventional-commits-parser-3.0.1.tgz",
+      "integrity": "sha512-P6U5UOvDeidUJ8ebHVDIoXzI7gMlQ1OF/id6oUvp8cnZvOXMt1n8nYl74Ey9YMn0uVQtxmCtjPQawpsssBWtGg==",
       "dev": true,
       "requires": {
         "JSONStream": "^1.0.4",
@@ -3194,17 +3359,17 @@
       }
     },
     "conventional-recommended-bump": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmjs.org/conventional-recommended-bump/-/conventional-recommended-bump-4.0.1.tgz",
-      "integrity": "sha512-9waJvW01TUs4HQJ3khwGSSlTlKsY+5u7OrxHL+oWEoGNvaNO/0qL6qqnhS3J0Fq9fNKA9bmlf5cOXjCQoW+I4Q==",
+      "version": "4.0.4",
+      "resolved": "https://registry.npmjs.org/conventional-recommended-bump/-/conventional-recommended-bump-4.0.4.tgz",
+      "integrity": "sha512-9mY5Yoblq+ZMqJpBzgS+RpSq+SUfP2miOR3H/NR9drGf08WCrY9B6HAGJZEm6+ThsVP917VHAahSOjM6k1vhPg==",
       "dev": true,
       "requires": {
         "concat-stream": "^1.6.0",
-        "conventional-changelog-preset-loader": "^2.0.1",
-        "conventional-commits-filter": "^2.0.0",
-        "conventional-commits-parser": "^3.0.0",
-        "git-raw-commits": "^2.0.0",
-        "git-semver-tags": "^2.0.0",
+        "conventional-changelog-preset-loader": "^2.0.2",
+        "conventional-commits-filter": "^2.0.1",
+        "conventional-commits-parser": "^3.0.1",
+        "git-raw-commits": "2.0.0",
+        "git-semver-tags": "^2.0.2",
         "meow": "^4.0.0",
         "q": "^1.5.1"
       }
@@ -3249,9 +3414,9 @@
       }
     },
     "core-js": {
-      "version": "2.5.7",
-      "resolved": "https://registry.npmjs.org/core-js/-/core-js-2.5.7.tgz",
-      "integrity": "sha512-RszJCAxg/PP6uzXVXL6BsxSXx/B05oJAQ2vkJRjyjrEcNVycaqOmNb5OTxZPE3xa5gwZduqza6L9JOCenh/Ecw==",
+      "version": "2.6.3",
+      "resolved": "https://registry.npmjs.org/core-js/-/core-js-2.6.3.tgz",
+      "integrity": "sha512-l00tmFFZOBHtYhN4Cz7k32VM7vTn3rE2ANjQDxdEN6zmXZ/xq1jQuutnmHvMG1ZJ7xd72+TA5YpUK8wz3rWsfQ==",
       "dev": true
     },
     "core-util-is": {
@@ -3261,11 +3426,12 @@
       "dev": true
     },
     "cosmiconfig": {
-      "version": "5.0.6",
-      "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-5.0.6.tgz",
-      "integrity": "sha512-6DWfizHriCrFWURP1/qyhsiFvYdlJzbCzmtFWh744+KyWsJo5+kPzUZZaMRSSItoYc0pxFX7gEO7ZC1/gN/7AQ==",
+      "version": "5.0.7",
+      "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-5.0.7.tgz",
+      "integrity": "sha512-PcLqxTKiDmNT6pSpy4N6KtuPwb53W+2tzNvwOZw0WH9N6O0vLIBq0x8aj8Oj75ere4YcGi48bDFCL+3fRJdlNA==",
       "dev": true,
       "requires": {
+        "import-fresh": "^2.0.0",
         "is-directory": "^0.3.1",
         "js-yaml": "^3.9.0",
         "parse-json": "^4.0.0"
@@ -3297,25 +3463,6 @@
         "request": "^2.85.0"
       }
     },
-    "cpx": {
-      "version": "1.5.0",
-      "resolved": "https://registry.npmjs.org/cpx/-/cpx-1.5.0.tgz",
-      "integrity": "sha1-GFvgGFEdhycN7czCkxceN2VauI8=",
-      "dev": true,
-      "requires": {
-        "babel-runtime": "^6.9.2",
-        "chokidar": "^1.6.0",
-        "duplexer": "^0.1.1",
-        "glob": "^7.0.5",
-        "glob2base": "^0.0.12",
-        "minimatch": "^3.0.2",
-        "mkdirp": "^0.5.1",
-        "resolve": "^1.1.7",
-        "safe-buffer": "^5.0.1",
-        "shell-quote": "^1.6.1",
-        "subarg": "^1.0.0"
-      }
-    },
     "create-ecdh": {
       "version": "4.0.3",
       "resolved": "https://registry.npmjs.org/create-ecdh/-/create-ecdh-4.0.3.tgz",
@@ -3328,7 +3475,7 @@
     },
     "create-hash": {
       "version": "1.2.0",
-      "resolved": "http://registry.npmjs.org/create-hash/-/create-hash-1.2.0.tgz",
+      "resolved": "https://registry.npmjs.org/create-hash/-/create-hash-1.2.0.tgz",
       "integrity": "sha512-z00bCGNHDG8mHAkP7CtT1qVu+bFQUPjYq/4Iv3C3kWjTFV10zIjfSoeqXo9Asws8gwSHDGj/hl2u4OGIjapeCg==",
       "dev": true,
       "requires": {
@@ -3341,7 +3488,7 @@
     },
     "create-hmac": {
       "version": "1.1.7",
-      "resolved": "http://registry.npmjs.org/create-hmac/-/create-hmac-1.1.7.tgz",
+      "resolved": "https://registry.npmjs.org/create-hmac/-/create-hmac-1.1.7.tgz",
       "integrity": "sha512-MJG9liiZ+ogc4TzUwuvbER1JRdgvUFSB5+VR/g5h82fGaIRWMWddtKBHi7/sVhfjQZ6SehlyhvQYrcYkaUIpLg==",
       "dev": true,
       "requires": {
@@ -3471,13 +3618,13 @@
       }
     },
     "data-urls": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-1.0.1.tgz",
-      "integrity": "sha512-0HdcMZzK6ubMUnsMmQmG0AcLQPvbvb47R0+7CCZQCYgcd8OUWG91CG7sM6GoXgjz+WLl4ArFzHtBMy/QqSF4eg==",
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-1.1.0.tgz",
+      "integrity": "sha512-YTWYI9se1P55u58gL5GkQHW4P6VJBJ5iBT+B5a7i2Tjadhv52paJG0qHX4A0OR6/t52odI64KP2YvFpkDOi3eQ==",
       "dev": true,
       "requires": {
         "abab": "^2.0.0",
-        "whatwg-mimetype": "^2.1.0",
+        "whatwg-mimetype": "^2.2.0",
         "whatwg-url": "^7.0.0"
       },
       "dependencies": {
@@ -3494,12 +3641,6 @@
         }
       }
     },
-    "date-fns": {
-      "version": "1.29.0",
-      "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-1.29.0.tgz",
-      "integrity": "sha512-lbTXWZ6M20cWH8N9S6afb0SBm6tMk+uUg6z3MqHPKE9atmsY3kJkTm8vKe93izJ2B2+q5MV990sM2CHgtAZaOw==",
-      "dev": true
-    },
     "date-now": {
       "version": "0.1.4",
       "resolved": "https://registry.npmjs.org/date-now/-/date-now-0.1.4.tgz",
@@ -3507,9 +3648,9 @@
       "dev": true
     },
     "dateformat": {
-      "version": "2.2.0",
-      "resolved": "https://registry.npmjs.org/dateformat/-/dateformat-2.2.0.tgz",
-      "integrity": "sha1-QGXiATz5+5Ft39gu+1Bq1MZ2kGI=",
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/dateformat/-/dateformat-3.0.3.tgz",
+      "integrity": "sha512-jyCETtSl3VMZMWeRo7iY1FL19ges1t55hMo5yaam4Jrsm5EPL89UQkoQRyiI+Yf4k8r2ZpdngkV8hr1lIdjb3Q==",
       "dev": true
     },
     "debug": {
@@ -3698,18 +3839,6 @@
             "is-data-descriptor": "^1.0.0",
             "kind-of": "^6.0.2"
           }
-        },
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
-        },
-        "kind-of": {
-          "version": "6.0.2",
-          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz",
-          "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==",
-          "dev": true
         }
       }
     },
@@ -3788,7 +3917,7 @@
     },
     "diffie-hellman": {
       "version": "5.0.3",
-      "resolved": "http://registry.npmjs.org/diffie-hellman/-/diffie-hellman-5.0.3.tgz",
+      "resolved": "https://registry.npmjs.org/diffie-hellman/-/diffie-hellman-5.0.3.tgz",
       "integrity": "sha512-kqag/Nl+f3GwyK25fhUMYj81BUOrZ9IuJsjIcDE5icNM9FJHAVm3VcUDxdLPoQtTuUylWm6ZIknYJwwaPxsUzg==",
       "dev": true,
       "requires": {
@@ -3844,45 +3973,10 @@
     },
     "duplexer": {
       "version": "0.1.1",
-      "resolved": "http://registry.npmjs.org/duplexer/-/duplexer-0.1.1.tgz",
+      "resolved": "https://registry.npmjs.org/duplexer/-/duplexer-0.1.1.tgz",
       "integrity": "sha1-rOb/gIwc5mtX0ev5eXessCM0z8E=",
       "dev": true
     },
-    "duplexer2": {
-      "version": "0.0.2",
-      "resolved": "https://registry.npmjs.org/duplexer2/-/duplexer2-0.0.2.tgz",
-      "integrity": "sha1-xhTc9n4vsUmVqRcR5aYX6KYKMds=",
-      "dev": true,
-      "requires": {
-        "readable-stream": "~1.1.9"
-      },
-      "dependencies": {
-        "isarray": {
-          "version": "0.0.1",
-          "resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz",
-          "integrity": "sha1-ihis/Kmo9Bd+Cav8YDiTmwXR7t8=",
-          "dev": true
-        },
-        "readable-stream": {
-          "version": "1.1.14",
-          "resolved": "http://registry.npmjs.org/readable-stream/-/readable-stream-1.1.14.tgz",
-          "integrity": "sha1-fPTFTvZI44EwhMY23SB54WbAgdk=",
-          "dev": true,
-          "requires": {
-            "core-util-is": "~1.0.0",
-            "inherits": "~2.0.1",
-            "isarray": "0.0.1",
-            "string_decoder": "~0.10.x"
-          }
-        },
-        "string_decoder": {
-          "version": "0.10.31",
-          "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz",
-          "integrity": "sha1-YuIDvEF2bGwoyfyEMB2rHFMQ+pQ=",
-          "dev": true
-        }
-      }
-    },
     "duplexify": {
       "version": "3.6.1",
       "resolved": "https://registry.npmjs.org/duplexify/-/duplexify-3.6.1.tgz",
@@ -3915,12 +4009,6 @@
         "safer-buffer": "^2.1.0"
       }
     },
-    "elegant-spinner": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/elegant-spinner/-/elegant-spinner-1.0.1.tgz",
-      "integrity": "sha1-2wQ1IcldfjA/2PNFvtwzSc+wcp4=",
-      "dev": true
-    },
     "elliptic": {
       "version": "6.4.1",
       "resolved": "https://registry.npmjs.org/elliptic/-/elliptic-6.4.1.tgz",
@@ -3996,16 +4084,17 @@
       }
     },
     "es-abstract": {
-      "version": "1.12.0",
-      "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.12.0.tgz",
-      "integrity": "sha512-C8Fx/0jFmV5IPoMOFPA9P9G5NtqW+4cOPit3MIuvR2t7Ag2K15EJTpxnHAYTzL+aYQJIESYeXZmDBfOBE1HcpA==",
+      "version": "1.13.0",
+      "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.13.0.tgz",
+      "integrity": "sha512-vDZfg/ykNxQVwup/8E1BZhVzFfBxs9NqMzGcvIJrqg5k2/5Za2bWo40dK2J1pgLngZ7c+Shh8lwYtLGyrwPutg==",
       "dev": true,
       "requires": {
-        "es-to-primitive": "^1.1.1",
+        "es-to-primitive": "^1.2.0",
         "function-bind": "^1.1.1",
-        "has": "^1.0.1",
-        "is-callable": "^1.1.3",
-        "is-regex": "^1.0.4"
+        "has": "^1.0.3",
+        "is-callable": "^1.1.4",
+        "is-regex": "^1.0.4",
+        "object-keys": "^1.0.12"
       }
     },
     "es-to-primitive": {
@@ -4020,9 +4109,9 @@
       }
     },
     "es5-ext": {
-      "version": "0.10.46",
-      "resolved": "https://registry.npmjs.org/es5-ext/-/es5-ext-0.10.46.tgz",
-      "integrity": "sha512-24XxRvJXNFwEMpJb3nOkiRJKRoupmjYmOPVlI65Qy2SrtxwOTB+g6ODjBKOtwEHbYrhWRty9xxOWLNdClT2djw==",
+      "version": "0.10.47",
+      "resolved": "https://registry.npmjs.org/es5-ext/-/es5-ext-0.10.47.tgz",
+      "integrity": "sha512-/1TItLfj+TTfWoeRcDn/0FbGV6SNo4R+On2GGVucPU/j3BWnXE2Co8h8CTo4Tu34gFJtnmwS9xiScKs4EjZhdw==",
       "dev": true,
       "requires": {
         "es6-iterator": "~2.0.3",
@@ -4133,6 +4222,12 @@
         "estraverse": "^4.1.1"
       }
     },
+    "esm": {
+      "version": "3.1.4",
+      "resolved": "https://registry.npmjs.org/esm/-/esm-3.1.4.tgz",
+      "integrity": "sha512-GScwIz0110RTNzBmAQEdqaAYkD9zVhj2Jo+jeizjIcdyTw+C6S0Zv/dlPYgfF41hRTu2f1vQYliubzIkusx2gA==",
+      "dev": true
+    },
     "esprima": {
       "version": "4.0.1",
       "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz",
@@ -4170,26 +4265,10 @@
         "es5-ext": "~0.10.14"
       }
     },
-    "event-stream": {
-      "version": "3.3.6",
-      "resolved": "https://registry.npmjs.org/event-stream/-/event-stream-3.3.6.tgz",
-      "integrity": "sha512-dGXNg4F/FgVzlApjzItL+7naHutA3fDqbV/zAZqDDlXTjiMnQmZKu+prImWKszeBM5UQeGvAl3u1wBiKeDh61g==",
-      "dev": true,
-      "requires": {
-        "duplexer": "^0.1.1",
-        "flatmap-stream": "^0.1.0",
-        "from": "^0.1.7",
-        "map-stream": "0.0.7",
-        "pause-stream": "^0.0.11",
-        "split": "^1.0.1",
-        "stream-combiner": "^0.2.2",
-        "through": "^2.3.8"
-      }
-    },
     "events": {
-      "version": "1.1.1",
-      "resolved": "http://registry.npmjs.org/events/-/events-1.1.1.tgz",
-      "integrity": "sha1-nr23Y1rQmccNzEwqH1AEKI6L2SQ=",
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/events/-/events-3.0.0.tgz",
+      "integrity": "sha512-Dc381HFWJzEOhQ+d8pkNon++bk9h6cdAoAj4iE6Q4y6xgTzySWXlKn05/TVNpjnfRqi/X0EpJEJohPjNI3zpVA==",
       "dev": true
     },
     "evp_bytestokey": {
@@ -4232,19 +4311,39 @@
       "integrity": "sha1-BjJjj42HfMghB9MKD/8aF8uhzQw=",
       "dev": true
     },
-    "exit-hook": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/exit-hook/-/exit-hook-1.1.1.tgz",
-      "integrity": "sha1-8FyiM7SMBdVP/wd2XfhQfpXAL/g=",
-      "dev": true
-    },
     "expand-brackets": {
-      "version": "0.1.5",
-      "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-0.1.5.tgz",
-      "integrity": "sha1-3wcoTjQqgHzXM6xa9yQR5YHRF3s=",
+      "version": "2.1.4",
+      "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz",
+      "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=",
       "dev": true,
       "requires": {
-        "is-posix-bracket": "^0.1.0"
+        "debug": "^2.3.3",
+        "define-property": "^0.2.5",
+        "extend-shallow": "^2.0.1",
+        "posix-character-classes": "^0.1.0",
+        "regex-not": "^1.0.0",
+        "snapdragon": "^0.8.1",
+        "to-regex": "^3.0.1"
+      },
+      "dependencies": {
+        "define-property": {
+          "version": "0.2.5",
+          "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz",
+          "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=",
+          "dev": true,
+          "requires": {
+            "is-descriptor": "^0.1.0"
+          }
+        },
+        "extend-shallow": {
+          "version": "2.0.1",
+          "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
+          "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
+          "dev": true,
+          "requires": {
+            "is-extendable": "^0.1.0"
+          }
+        }
       }
     },
     "expand-range": {
@@ -4254,6 +4353,48 @@
       "dev": true,
       "requires": {
         "fill-range": "^2.1.0"
+      },
+      "dependencies": {
+        "fill-range": {
+          "version": "2.2.4",
+          "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-2.2.4.tgz",
+          "integrity": "sha512-cnrcCbj01+j2gTG921VZPnHbjmdAf8oQV/iGeV2kZxGSyfYjjTyY79ErsK1WJWMpw6DaApEX72binqJE+/d+5Q==",
+          "dev": true,
+          "requires": {
+            "is-number": "^2.1.0",
+            "isobject": "^2.0.0",
+            "randomatic": "^3.0.0",
+            "repeat-element": "^1.1.2",
+            "repeat-string": "^1.5.2"
+          }
+        },
+        "is-number": {
+          "version": "2.1.0",
+          "resolved": "https://registry.npmjs.org/is-number/-/is-number-2.1.0.tgz",
+          "integrity": "sha1-Afy7s5NGOlSPL0ZszhbezknbkI8=",
+          "dev": true,
+          "requires": {
+            "kind-of": "^3.0.2"
+          }
+        },
+        "isobject": {
+          "version": "2.1.0",
+          "resolved": "https://registry.npmjs.org/isobject/-/isobject-2.1.0.tgz",
+          "integrity": "sha1-8GVWEJaj8dou9GJy+BXIQNh+DIk=",
+          "dev": true,
+          "requires": {
+            "isarray": "1.0.0"
+          }
+        },
+        "kind-of": {
+          "version": "3.2.2",
+          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
+          "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
+          "dev": true,
+          "requires": {
+            "is-buffer": "^1.1.5"
+          }
+        }
       }
     },
     "expand-tilde": {
@@ -4318,262 +4459,37 @@
       }
     },
     "extglob": {
-      "version": "0.3.2",
-      "resolved": "https://registry.npmjs.org/extglob/-/extglob-0.3.2.tgz",
-      "integrity": "sha1-Lhj/PS9JqydlzskCPwEdqo2DSaE=",
-      "dev": true,
-      "requires": {
-        "is-extglob": "^1.0.0"
-      }
-    },
-    "extsprintf": {
-      "version": "1.3.0",
-      "resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz",
-      "integrity": "sha1-lpGEQOMEGnpBT4xS48V06zw+HgU=",
-      "dev": true
-    },
-    "fancy-log": {
-      "version": "1.3.2",
-      "resolved": "https://registry.npmjs.org/fancy-log/-/fancy-log-1.3.2.tgz",
-      "integrity": "sha1-9BEl49hPLn2JpD0G2VjI94vha+E=",
-      "dev": true,
-      "requires": {
-        "ansi-gray": "^0.1.1",
-        "color-support": "^1.1.3",
-        "time-stamp": "^1.0.0"
-      }
-    },
-    "fast-deep-equal": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-1.1.0.tgz",
-      "integrity": "sha1-wFNHeBfIa1HaqFPIHgWbcz0CNhQ=",
-      "dev": true
-    },
-    "fast-glob": {
-      "version": "2.2.3",
-      "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-2.2.3.tgz",
-      "integrity": "sha512-NiX+JXjnx43RzvVFwRWfPKo4U+1BrK5pJPsHQdKMlLoFHrrGktXglQhHliSihWAq+m1z6fHk3uwGHrtRbS9vLA==",
+      "version": "2.0.4",
+      "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz",
+      "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==",
       "dev": true,
       "requires": {
-        "@mrmlnc/readdir-enhanced": "^2.2.1",
-        "@nodelib/fs.stat": "^1.0.1",
-        "glob-parent": "^3.1.0",
-        "is-glob": "^4.0.0",
-        "merge2": "^1.2.1",
-        "micromatch": "^3.1.10"
+        "array-unique": "^0.3.2",
+        "define-property": "^1.0.0",
+        "expand-brackets": "^2.1.4",
+        "extend-shallow": "^2.0.1",
+        "fragment-cache": "^0.2.1",
+        "regex-not": "^1.0.0",
+        "snapdragon": "^0.8.1",
+        "to-regex": "^3.0.1"
       },
       "dependencies": {
-        "arr-diff": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz",
-          "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=",
-          "dev": true
-        },
-        "array-unique": {
-          "version": "0.3.2",
-          "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz",
-          "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=",
-          "dev": true
-        },
-        "braces": {
-          "version": "2.3.2",
-          "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz",
-          "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==",
+        "define-property": {
+          "version": "1.0.0",
+          "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz",
+          "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=",
           "dev": true,
           "requires": {
-            "arr-flatten": "^1.1.0",
-            "array-unique": "^0.3.2",
-            "extend-shallow": "^2.0.1",
-            "fill-range": "^4.0.0",
-            "isobject": "^3.0.1",
-            "repeat-element": "^1.1.2",
-            "snapdragon": "^0.8.1",
-            "snapdragon-node": "^2.0.1",
-            "split-string": "^3.0.2",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "expand-brackets": {
-          "version": "2.1.4",
-          "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz",
-          "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=",
-          "dev": true,
-          "requires": {
-            "debug": "^2.3.3",
-            "define-property": "^0.2.5",
-            "extend-shallow": "^2.0.1",
-            "posix-character-classes": "^0.1.0",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "define-property": {
-              "version": "0.2.5",
-              "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz",
-              "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=",
-              "dev": true,
-              "requires": {
-                "is-descriptor": "^0.1.0"
-              }
-            },
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            },
-            "is-accessor-descriptor": {
-              "version": "0.1.6",
-              "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz",
-              "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=",
-              "dev": true,
-              "requires": {
-                "kind-of": "^3.0.2"
-              },
-              "dependencies": {
-                "kind-of": {
-                  "version": "3.2.2",
-                  "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-                  "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-                  "dev": true,
-                  "requires": {
-                    "is-buffer": "^1.1.5"
-                  }
-                }
-              }
-            },
-            "is-data-descriptor": {
-              "version": "0.1.4",
-              "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz",
-              "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=",
-              "dev": true,
-              "requires": {
-                "kind-of": "^3.0.2"
-              },
-              "dependencies": {
-                "kind-of": {
-                  "version": "3.2.2",
-                  "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-                  "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-                  "dev": true,
-                  "requires": {
-                    "is-buffer": "^1.1.5"
-                  }
-                }
-              }
-            },
-            "is-descriptor": {
-              "version": "0.1.6",
-              "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz",
-              "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==",
-              "dev": true,
-              "requires": {
-                "is-accessor-descriptor": "^0.1.6",
-                "is-data-descriptor": "^0.1.4",
-                "kind-of": "^5.0.0"
-              }
-            },
-            "kind-of": {
-              "version": "5.1.0",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz",
-              "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==",
-              "dev": true
-            }
-          }
-        },
-        "extglob": {
-          "version": "2.0.4",
-          "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz",
-          "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==",
-          "dev": true,
-          "requires": {
-            "array-unique": "^0.3.2",
-            "define-property": "^1.0.0",
-            "expand-brackets": "^2.1.4",
-            "extend-shallow": "^2.0.1",
-            "fragment-cache": "^0.2.1",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "define-property": {
-              "version": "1.0.0",
-              "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz",
-              "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=",
-              "dev": true,
-              "requires": {
-                "is-descriptor": "^1.0.0"
-              }
-            },
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "fill-range": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz",
-          "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=",
-          "dev": true,
-          "requires": {
-            "extend-shallow": "^2.0.1",
-            "is-number": "^3.0.0",
-            "repeat-string": "^1.6.1",
-            "to-regex-range": "^2.1.0"
-          },
-          "dependencies": {
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
+            "is-descriptor": "^1.0.0"
           }
         },
-        "glob-parent": {
-          "version": "3.1.0",
-          "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-3.1.0.tgz",
-          "integrity": "sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=",
+        "extend-shallow": {
+          "version": "2.0.1",
+          "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
+          "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
           "dev": true,
           "requires": {
-            "is-glob": "^3.1.0",
-            "path-dirname": "^1.0.0"
-          },
-          "dependencies": {
-            "is-glob": {
-              "version": "3.1.0",
-              "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz",
-              "integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=",
-              "dev": true,
-              "requires": {
-                "is-extglob": "^2.1.0"
-              }
-            }
+            "is-extendable": "^0.1.0"
           }
         },
         "is-accessor-descriptor": {
@@ -4604,77 +4520,53 @@
             "is-data-descriptor": "^1.0.0",
             "kind-of": "^6.0.2"
           }
-        },
-        "is-extglob": {
-          "version": "2.1.1",
-          "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
-          "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=",
-          "dev": true
-        },
-        "is-glob": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.0.tgz",
-          "integrity": "sha1-lSHHaEXMJhCoUgPd8ICpWML/q8A=",
-          "dev": true,
-          "requires": {
-            "is-extglob": "^2.1.1"
-          }
-        },
-        "is-number": {
-          "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz",
-          "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=",
-          "dev": true,
-          "requires": {
-            "kind-of": "^3.0.2"
-          },
-          "dependencies": {
-            "kind-of": {
-              "version": "3.2.2",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-              "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-              "dev": true,
-              "requires": {
-                "is-buffer": "^1.1.5"
-              }
-            }
-          }
-        },
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
-        },
-        "kind-of": {
-          "version": "6.0.2",
-          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz",
-          "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==",
-          "dev": true
-        },
-        "micromatch": {
-          "version": "3.1.10",
-          "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz",
-          "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==",
-          "dev": true,
-          "requires": {
-            "arr-diff": "^4.0.0",
-            "array-unique": "^0.3.2",
-            "braces": "^2.3.1",
-            "define-property": "^2.0.2",
-            "extend-shallow": "^3.0.2",
-            "extglob": "^2.0.4",
-            "fragment-cache": "^0.2.1",
-            "kind-of": "^6.0.2",
-            "nanomatch": "^1.2.9",
-            "object.pick": "^1.3.0",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.2"
-          }
         }
       }
     },
+    "extsprintf": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz",
+      "integrity": "sha1-lpGEQOMEGnpBT4xS48V06zw+HgU=",
+      "dev": true
+    },
+    "fancy-log": {
+      "version": "1.3.3",
+      "resolved": "https://registry.npmjs.org/fancy-log/-/fancy-log-1.3.3.tgz",
+      "integrity": "sha512-k9oEhlyc0FrVh25qYuSELjr8oxsCoc4/LEZfg2iJJrfEk/tZL9bCoJE47gqAvI2m/AUjluCS4+3I0eTx8n3AEw==",
+      "dev": true,
+      "requires": {
+        "ansi-gray": "^0.1.1",
+        "color-support": "^1.1.3",
+        "parse-node-version": "^1.0.0",
+        "time-stamp": "^1.0.0"
+      }
+    },
+    "fast-deep-equal": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz",
+      "integrity": "sha1-ewUhjd+WZ79/Nwv3/bLLFf3Qqkk=",
+      "dev": true
+    },
+    "fast-extend": {
+      "version": "0.0.2",
+      "resolved": "https://registry.npmjs.org/fast-extend/-/fast-extend-0.0.2.tgz",
+      "integrity": "sha1-9exCz0C5Rg9SGmOH37Ut7u1nHb0=",
+      "dev": true
+    },
+    "fast-glob": {
+      "version": "2.2.6",
+      "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-2.2.6.tgz",
+      "integrity": "sha512-0BvMaZc1k9F+MeWWMe8pL6YltFzZYcJsYU7D4JyDA6PAczaXvxqQQ/z+mDF7/4Mw01DeUc+i3CTKajnkANkV4w==",
+      "dev": true,
+      "requires": {
+        "@mrmlnc/readdir-enhanced": "^2.2.1",
+        "@nodelib/fs.stat": "^1.1.2",
+        "glob-parent": "^3.1.0",
+        "is-glob": "^4.0.0",
+        "merge2": "^1.2.3",
+        "micromatch": "^3.1.10"
+      }
+    },
     "fast-json-stable-stringify": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.0.0.tgz",
@@ -4728,16 +4620,26 @@
       }
     },
     "fill-range": {
-      "version": "2.2.4",
-      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-2.2.4.tgz",
-      "integrity": "sha512-cnrcCbj01+j2gTG921VZPnHbjmdAf8oQV/iGeV2kZxGSyfYjjTyY79ErsK1WJWMpw6DaApEX72binqJE+/d+5Q==",
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz",
+      "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=",
       "dev": true,
       "requires": {
-        "is-number": "^2.1.0",
-        "isobject": "^2.0.0",
-        "randomatic": "^3.0.0",
-        "repeat-element": "^1.1.2",
-        "repeat-string": "^1.5.2"
+        "extend-shallow": "^2.0.1",
+        "is-number": "^3.0.0",
+        "repeat-string": "^1.6.1",
+        "to-regex-range": "^2.1.0"
+      },
+      "dependencies": {
+        "extend-shallow": {
+          "version": "2.0.1",
+          "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
+          "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
+          "dev": true,
+          "requires": {
+            "is-extendable": "^0.1.0"
+          }
+        }
       }
     },
     "find-cache-dir": {
@@ -4771,9 +4673,9 @@
           }
         },
         "p-limit": {
-          "version": "2.0.0",
-          "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.0.0.tgz",
-          "integrity": "sha512-fl5s52lI5ahKCernzzIyAP0QAZbGIovtVHGwpcu1Jr/EpzLVDI2myISHwGqK7m8uQFugVWSrbxH7XnhGtvEc+A==",
+          "version": "2.1.0",
+          "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.1.0.tgz",
+          "integrity": "sha512-NhURkNcrVB+8hNfLuysU8enY5xn2KXphsHBaC2YmRNTZRc7RWusw6apSpdEj3jo4CMb6W9nrF6tTnsJsJeyu6g==",
           "dev": true,
           "requires": {
             "p-try": "^2.0.0"
@@ -4794,6 +4696,12 @@
           "integrity": "sha512-hMp0onDKIajHfIkdRk3P4CdCmErkYAxxDtP3Wx/4nZ3aGlau2VKh3mZpcuFkH27WQkL/3WBCPOktzA9ZOAnMQQ==",
           "dev": true
         },
+        "path-exists": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz",
+          "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=",
+          "dev": true
+        },
         "pkg-dir": {
           "version": "3.0.0",
           "resolved": "https://registry.npmjs.org/pkg-dir/-/pkg-dir-3.0.0.tgz",
@@ -4805,16 +4713,10 @@
         }
       }
     },
-    "find-index": {
-      "version": "0.1.1",
-      "resolved": "https://registry.npmjs.org/find-index/-/find-index-0.1.1.tgz",
-      "integrity": "sha1-Z101iyyjiS15Whq0cjL4tuLg3eQ=",
-      "dev": true
-    },
-    "find-parent-dir": {
-      "version": "0.3.0",
-      "resolved": "https://registry.npmjs.org/find-parent-dir/-/find-parent-dir-0.3.0.tgz",
-      "integrity": "sha1-M8RLQpqysvBkYpnF+fcY83b/jVQ=",
+    "find-npm-prefix": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/find-npm-prefix/-/find-npm-prefix-1.0.2.tgz",
+      "integrity": "sha512-KEftzJ+H90x6pcKtdXZEPsQse8/y/UnvzRKrOSQFprnrGaFuJ62fVkP34Iu2IYuMvyauCyoLTNkJZgrrGA2wkA==",
       "dev": true
     },
     "find-replace": {
@@ -4827,12 +4729,13 @@
       }
     },
     "find-up": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz",
-      "integrity": "sha1-RdG35QbHF93UgndaK3eSCjwMV6c=",
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/find-up/-/find-up-1.1.2.tgz",
+      "integrity": "sha1-ay6YIrGizgpgq2TWEOzK1TyyTQ8=",
       "dev": true,
       "requires": {
-        "locate-path": "^2.0.0"
+        "path-exists": "^2.0.0",
+        "pinkie-promise": "^2.0.0"
       }
     },
     "findup-sync": {
@@ -4847,301 +4750,21 @@
         "resolve-dir": "^1.0.1"
       },
       "dependencies": {
-        "arr-diff": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz",
-          "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=",
-          "dev": true
-        },
-        "array-unique": {
-          "version": "0.3.2",
-          "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz",
-          "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=",
-          "dev": true
-        },
-        "braces": {
-          "version": "2.3.2",
-          "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz",
-          "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==",
-          "dev": true,
-          "requires": {
-            "arr-flatten": "^1.1.0",
-            "array-unique": "^0.3.2",
-            "extend-shallow": "^2.0.1",
-            "fill-range": "^4.0.0",
-            "isobject": "^3.0.1",
-            "repeat-element": "^1.1.2",
-            "snapdragon": "^0.8.1",
-            "snapdragon-node": "^2.0.1",
-            "split-string": "^3.0.2",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "expand-brackets": {
-          "version": "2.1.4",
-          "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz",
-          "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=",
-          "dev": true,
-          "requires": {
-            "debug": "^2.3.3",
-            "define-property": "^0.2.5",
-            "extend-shallow": "^2.0.1",
-            "posix-character-classes": "^0.1.0",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "define-property": {
-              "version": "0.2.5",
-              "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz",
-              "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=",
-              "dev": true,
-              "requires": {
-                "is-descriptor": "^0.1.0"
-              }
-            },
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            },
-            "is-accessor-descriptor": {
-              "version": "0.1.6",
-              "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz",
-              "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=",
-              "dev": true,
-              "requires": {
-                "kind-of": "^3.0.2"
-              },
-              "dependencies": {
-                "kind-of": {
-                  "version": "3.2.2",
-                  "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-                  "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-                  "dev": true,
-                  "requires": {
-                    "is-buffer": "^1.1.5"
-                  }
-                }
-              }
-            },
-            "is-data-descriptor": {
-              "version": "0.1.4",
-              "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz",
-              "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=",
-              "dev": true,
-              "requires": {
-                "kind-of": "^3.0.2"
-              },
-              "dependencies": {
-                "kind-of": {
-                  "version": "3.2.2",
-                  "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-                  "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-                  "dev": true,
-                  "requires": {
-                    "is-buffer": "^1.1.5"
-                  }
-                }
-              }
-            },
-            "is-descriptor": {
-              "version": "0.1.6",
-              "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz",
-              "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==",
-              "dev": true,
-              "requires": {
-                "is-accessor-descriptor": "^0.1.6",
-                "is-data-descriptor": "^0.1.4",
-                "kind-of": "^5.0.0"
-              }
-            },
-            "kind-of": {
-              "version": "5.1.0",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz",
-              "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==",
-              "dev": true
-            }
-          }
-        },
-        "extglob": {
-          "version": "2.0.4",
-          "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz",
-          "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==",
-          "dev": true,
-          "requires": {
-            "array-unique": "^0.3.2",
-            "define-property": "^1.0.0",
-            "expand-brackets": "^2.1.4",
-            "extend-shallow": "^2.0.1",
-            "fragment-cache": "^0.2.1",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "define-property": {
-              "version": "1.0.0",
-              "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz",
-              "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=",
-              "dev": true,
-              "requires": {
-                "is-descriptor": "^1.0.0"
-              }
-            },
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "fill-range": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz",
-          "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=",
-          "dev": true,
-          "requires": {
-            "extend-shallow": "^2.0.1",
-            "is-number": "^3.0.0",
-            "repeat-string": "^1.6.1",
-            "to-regex-range": "^2.1.0"
-          },
-          "dependencies": {
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "is-accessor-descriptor": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz",
-          "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==",
+        "is-glob": {
+          "version": "3.1.0",
+          "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz",
+          "integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=",
           "dev": true,
           "requires": {
-            "kind-of": "^6.0.0"
-          }
-        },
-        "is-data-descriptor": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz",
-          "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==",
-          "dev": true,
-          "requires": {
-            "kind-of": "^6.0.0"
-          }
-        },
-        "is-descriptor": {
-          "version": "1.0.2",
-          "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz",
-          "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==",
-          "dev": true,
-          "requires": {
-            "is-accessor-descriptor": "^1.0.0",
-            "is-data-descriptor": "^1.0.0",
-            "kind-of": "^6.0.2"
-          }
-        },
-        "is-extglob": {
-          "version": "2.1.1",
-          "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
-          "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=",
-          "dev": true
-        },
-        "is-glob": {
-          "version": "3.1.0",
-          "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz",
-          "integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=",
-          "dev": true,
-          "requires": {
-            "is-extglob": "^2.1.0"
-          }
-        },
-        "is-number": {
-          "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz",
-          "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=",
-          "dev": true,
-          "requires": {
-            "kind-of": "^3.0.2"
-          },
-          "dependencies": {
-            "kind-of": {
-              "version": "3.2.2",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-              "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-              "dev": true,
-              "requires": {
-                "is-buffer": "^1.1.5"
-              }
-            }
-          }
-        },
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
-        },
-        "kind-of": {
-          "version": "6.0.2",
-          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz",
-          "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==",
-          "dev": true
-        },
-        "micromatch": {
-          "version": "3.1.10",
-          "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz",
-          "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==",
-          "dev": true,
-          "requires": {
-            "arr-diff": "^4.0.0",
-            "array-unique": "^0.3.2",
-            "braces": "^2.3.1",
-            "define-property": "^2.0.2",
-            "extend-shallow": "^3.0.2",
-            "extglob": "^2.0.4",
-            "fragment-cache": "^0.2.1",
-            "kind-of": "^6.0.2",
-            "nanomatch": "^1.2.9",
-            "object.pick": "^1.3.0",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.2"
+            "is-extglob": "^2.1.0"
           }
         }
       }
     },
     "fined": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/fined/-/fined-1.1.0.tgz",
-      "integrity": "sha1-s33IRLdqL15wgeiE98CuNE8VNHY=",
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/fined/-/fined-1.1.1.tgz",
+      "integrity": "sha512-jQp949ZmEbiYHk3gkbdtpJ0G1+kgtLQBNdP5edFP7Fh+WAYceLQz6yO1SBj72Xkg8GVyTB3bBzAYrHJVh5Xd5g==",
       "dev": true,
       "requires": {
         "expand-tilde": "^2.0.2",
@@ -5152,9 +4775,9 @@
       }
     },
     "flagged-respawn": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/flagged-respawn/-/flagged-respawn-1.0.0.tgz",
-      "integrity": "sha1-Tnmumy6zi/hrO7Vr8+ClaqX8q9c=",
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/flagged-respawn/-/flagged-respawn-1.0.1.tgz",
+      "integrity": "sha512-lNaHNVymajmk0OJMBn8fVUAU1BtDeKIqKoVhk4xAALB57aALg6b4W0MfJ/cUE0g9YBXy5XhSlPIpYIJ7HaY/3Q==",
       "dev": true
     },
     "flatbuffers": {
@@ -5162,12 +4785,6 @@
       "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.10.2.tgz",
       "integrity": "sha512-VK7lHZF/corkykjXZ0+dqViI8Wk1YpwPCFN2wrnTs+PMCMG5+uHRvkRW14fuA7Smkhkgx+Dj5UdS3YXktJL+qw=="
     },
-    "flatmap-stream": {
-      "version": "0.1.1",
-      "resolved": "https://registry.npmjs.org/flatmap-stream/-/flatmap-stream-0.1.1.tgz",
-      "integrity": "sha512-lAq4tLbm3sidmdCN8G3ExaxH7cUCtP5mgDvrYowsx84dcYkJJ4I28N7gkxA6+YlSXzaGLJYIDEi9WGfXzMiXdw==",
-      "dev": true
-    },
     "flush-write-stream": {
       "version": "1.0.3",
       "resolved": "https://registry.npmjs.org/flush-write-stream/-/flush-write-stream-1.0.3.tgz",
@@ -5185,9 +4802,9 @@
       "dev": true
     },
     "for-own": {
-      "version": "0.1.5",
-      "resolved": "https://registry.npmjs.org/for-own/-/for-own-0.1.5.tgz",
-      "integrity": "sha1-UmXGgaTylNq78XyVCbZ2OqhFEM4=",
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/for-own/-/for-own-1.0.0.tgz",
+      "integrity": "sha1-xjMy9BXO3EsE2/5wz4NklMU8tEs=",
       "dev": true,
       "requires": {
         "for-in": "^1.0.1"
@@ -5219,12 +4836,6 @@
         "map-cache": "^0.2.2"
       }
     },
-    "from": {
-      "version": "0.1.7",
-      "resolved": "https://registry.npmjs.org/from/-/from-0.1.7.tgz",
-      "integrity": "sha1-g8YK/Fi5xWmXAH7Rp2izqzA6RP4=",
-      "dev": true
-    },
     "from2": {
       "version": "2.3.0",
       "resolved": "https://registry.npmjs.org/from2/-/from2-2.3.0.tgz",
@@ -5236,9 +4847,9 @@
       }
     },
     "fs-extra": {
-      "version": "7.0.0",
-      "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-7.0.0.tgz",
-      "integrity": "sha512-EglNDLRpmaTWiD/qraZn6HREAEAHJcJOmxNEYwq6xeMKnVMAy3GUcFB+wXt2C6k4CNvB/mP1y/U3dzvKKj5OtQ==",
+      "version": "7.0.1",
+      "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-7.0.1.tgz",
+      "integrity": "sha512-YJDaCJZEnBmcbw13fvdAM9AwNOJwOzrE4pqMqBq5nFiEqXUqHwlK4B+3pUw6JNvfSPtX05xFHtYy/1ni01eGCw==",
       "dev": true,
       "requires": {
         "graceful-fs": "^4.1.2",
@@ -5265,6 +4876,23 @@
         "through2": "^2.0.3"
       }
     },
+    "fs-monkey": {
+      "version": "0.3.3",
+      "resolved": "https://registry.npmjs.org/fs-monkey/-/fs-monkey-0.3.3.tgz",
+      "integrity": "sha512-FNUvuTAJ3CqCQb5ELn+qCbGR/Zllhf2HtwsdAtBi59s1WeCjKMT81fHcSu7dwIskqGVK+MmOrb7VOBlq3/SItw==",
+      "dev": true
+    },
+    "fs-vacuum": {
+      "version": "1.2.10",
+      "resolved": "https://registry.npmjs.org/fs-vacuum/-/fs-vacuum-1.2.10.tgz",
+      "integrity": "sha1-t2Kb7AekAxolSP35n17PHMizHjY=",
+      "dev": true,
+      "requires": {
+        "graceful-fs": "^4.1.2",
+        "path-is-inside": "^1.0.1",
+        "rimraf": "^2.5.2"
+      }
+    },
     "fs-write-stream-atomic": {
       "version": "1.0.10",
       "resolved": "https://registry.npmjs.org/fs-write-stream-atomic/-/fs-write-stream-atomic-1.0.10.tgz",
@@ -5284,9 +4912,9 @@
       "dev": true
     },
     "fsevents": {
-      "version": "1.2.4",
-      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-1.2.4.tgz",
-      "integrity": "sha512-z8H8/diyk76B7q5wg+Ud0+CqzcAF3mBBI/bA5ne5zrRUUIvNkJY//D3BqyH571KuAC4Nr7Rw7CjWX4r0y9DvNg==",
+      "version": "1.2.7",
+      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-1.2.7.tgz",
+      "integrity": "sha512-Pxm6sI2MeBD7RdD12RYsqaP0nMiwx8eZBXCa6z2L+mRHm2DYrOYwihmhjpkdjUHwQhslWQjRpEgNq4XvBmaAuw==",
       "dev": true,
       "optional": true,
       "requires": {
@@ -5296,28 +4924,24 @@
       "dependencies": {
         "abbrev": {
           "version": "1.1.1",
-          "resolved": "https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz",
-          "integrity": "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "ansi-regex": {
           "version": "2.1.1",
-          "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-2.1.1.tgz",
-          "integrity": "sha1-w7M6te42DYbg5ijwRorn7yfWVN8=",
+          "bundled": true,
           "dev": true
         },
         "aproba": {
           "version": "1.2.0",
-          "resolved": "https://registry.npmjs.org/aproba/-/aproba-1.2.0.tgz",
-          "integrity": "sha512-Y9J6ZjXtoYh8RnXVCMOU/ttDmk1aBjunq9vO0ta5x85WDQiQfUF9sIPBITdbiiIVcBo03Hi3jMxigBtsddlXRw==",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "are-we-there-yet": {
-          "version": "1.1.4",
-          "resolved": "https://registry.npmjs.org/are-we-there-yet/-/are-we-there-yet-1.1.4.tgz",
-          "integrity": "sha1-u13KOCu5TwXhUZQ3PRb9O6HKEQ0=",
+          "version": "1.1.5",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
@@ -5327,14 +4951,12 @@
         },
         "balanced-match": {
           "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.0.tgz",
-          "integrity": "sha1-ibTRmasr7kneFk6gK4nORi1xt2c=",
+          "bundled": true,
           "dev": true
         },
         "brace-expansion": {
           "version": "1.1.11",
-          "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
-          "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
+          "bundled": true,
           "dev": true,
           "requires": {
             "balanced-match": "^1.0.0",
@@ -5342,41 +4964,35 @@
           }
         },
         "chownr": {
-          "version": "1.0.1",
-          "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.0.1.tgz",
-          "integrity": "sha1-4qdQQqlVGQi+vSW4Uj1fl2nXkYE=",
+          "version": "1.1.1",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "code-point-at": {
           "version": "1.1.0",
-          "resolved": "https://registry.npmjs.org/code-point-at/-/code-point-at-1.1.0.tgz",
-          "integrity": "sha1-DQcLTQQ6W+ozovGkDi7bPZpMz3c=",
+          "bundled": true,
           "dev": true
         },
         "concat-map": {
           "version": "0.0.1",
-          "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
-          "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=",
+          "bundled": true,
           "dev": true
         },
         "console-control-strings": {
           "version": "1.1.0",
-          "resolved": "https://registry.npmjs.org/console-control-strings/-/console-control-strings-1.1.0.tgz",
-          "integrity": "sha1-PXz0Rk22RG6mRL9LOVB/mFEAjo4=",
+          "bundled": true,
           "dev": true
         },
         "core-util-is": {
           "version": "1.0.2",
-          "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz",
-          "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "debug": {
           "version": "2.6.9",
-          "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
-          "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
@@ -5384,30 +5000,26 @@
           }
         },
         "deep-extend": {
-          "version": "0.5.1",
-          "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.5.1.tgz",
-          "integrity": "sha512-N8vBdOa+DF7zkRrDCsaOXoCs/E2fJfx9B9MrKnnSiHNh4ws7eSys6YQE4KvT1cecKmOASYQBhbKjeuDD9lT81w==",
+          "version": "0.6.0",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "delegates": {
           "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/delegates/-/delegates-1.0.0.tgz",
-          "integrity": "sha1-hMbhWbgZBP3KWaDvRM2HDTElD5o=",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "detect-libc": {
           "version": "1.0.3",
-          "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-1.0.3.tgz",
-          "integrity": "sha1-+hN8S9aY7fVc1c0CrFWfkaTEups=",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "fs-minipass": {
           "version": "1.2.5",
-          "resolved": "https://registry.npmjs.org/fs-minipass/-/fs-minipass-1.2.5.tgz",
-          "integrity": "sha512-JhBl0skXjUPCFH7x6x61gQxrKyXsxB5gcgePLZCwfyCGGsTISMoIeObbrvVeP6Xmyaudw4TT43qV2Gz+iyd2oQ==",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
@@ -5416,15 +5028,13 @@
         },
         "fs.realpath": {
           "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
-          "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "gauge": {
           "version": "2.7.4",
-          "resolved": "https://registry.npmjs.org/gauge/-/gauge-2.7.4.tgz",
-          "integrity": "sha1-LANAXHU4w51+s3sxcCLjJfsBi/c=",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
@@ -5439,9 +5049,8 @@
           }
         },
         "glob": {
-          "version": "7.1.2",
-          "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.2.tgz",
-          "integrity": "sha512-MJTUg1kjuLeQCJ+ccE4Vpa6kKVXkPYJ2mOCQyUuKLcLQsdrMCpBPUi8qVE6+YuaJkozeA9NusTAw3hLr8Xe5EQ==",
+          "version": "7.1.3",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
@@ -5455,25 +5064,22 @@
         },
         "has-unicode": {
           "version": "2.0.1",
-          "resolved": "https://registry.npmjs.org/has-unicode/-/has-unicode-2.0.1.tgz",
-          "integrity": "sha1-4Ob+aijPUROIVeCG0Wkedx3iqLk=",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "iconv-lite": {
-          "version": "0.4.21",
-          "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.21.tgz",
-          "integrity": "sha512-En5V9za5mBt2oUA03WGD3TwDv0MKAruqsuxstbMUZaj9W9k/m1CV/9py3l0L5kw9Bln8fdHQmzHSYtvpvTLpKw==",
+          "version": "0.4.24",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
-            "safer-buffer": "^2.1.0"
+            "safer-buffer": ">= 2.1.2 < 3"
           }
         },
         "ignore-walk": {
           "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/ignore-walk/-/ignore-walk-3.0.1.tgz",
-          "integrity": "sha512-DTVlMx3IYPe0/JJcYP7Gxg7ttZZu3IInhuEhbchuqneY9wWe5Ojy2mXLBaQFUQmo0AW2r3qG7m1mg86js+gnlQ==",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
@@ -5482,8 +5088,7 @@
         },
         "inflight": {
           "version": "1.0.6",
-          "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
-          "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
@@ -5493,21 +5098,18 @@
         },
         "inherits": {
           "version": "2.0.3",
-          "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz",
-          "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=",
+          "bundled": true,
           "dev": true
         },
         "ini": {
           "version": "1.3.5",
-          "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.5.tgz",
-          "integrity": "sha512-RZY5huIKCMRWDUqZlEi72f/lmXKMvuszcMBduliQ3nnWbx9X/ZBQO7DijMEYS9EhHBb2qacRUMtC7svLwe0lcw==",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "is-fullwidth-code-point": {
           "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-1.0.0.tgz",
-          "integrity": "sha1-754xOG8DGn8NZDr4L95QxFfvAMs=",
+          "bundled": true,
           "dev": true,
           "requires": {
             "number-is-nan": "^1.0.0"
@@ -5515,15 +5117,13 @@
         },
         "isarray": {
           "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
-          "integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "minimatch": {
           "version": "3.0.4",
-          "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
-          "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
+          "bundled": true,
           "dev": true,
           "requires": {
             "brace-expansion": "^1.1.7"
@@ -5531,24 +5131,21 @@
         },
         "minimist": {
           "version": "0.0.8",
-          "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz",
-          "integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0=",
+          "bundled": true,
           "dev": true
         },
         "minipass": {
-          "version": "2.2.4",
-          "resolved": "https://registry.npmjs.org/minipass/-/minipass-2.2.4.tgz",
-          "integrity": "sha512-hzXIWWet/BzWhYs2b+u7dRHlruXhwdgvlTMDKC6Cb1U7ps6Ac6yQlR39xsbjWJE377YTCtKwIXIpJ5oP+j5y8g==",
+          "version": "2.3.5",
+          "bundled": true,
           "dev": true,
           "requires": {
-            "safe-buffer": "^5.1.1",
+            "safe-buffer": "^5.1.2",
             "yallist": "^3.0.0"
           }
         },
         "minizlib": {
-          "version": "1.1.0",
-          "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-1.1.0.tgz",
-          "integrity": "sha512-4T6Ur/GctZ27nHfpt9THOdRZNgyJ9FZchYO1ceg5S8Q3DNLCKYy44nCZzgCJgcvx2UM8czmqak5BCxJMrq37lA==",
+          "version": "1.2.1",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
@@ -5557,8 +5154,7 @@
         },
         "mkdirp": {
           "version": "0.5.1",
-          "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz",
-          "integrity": "sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM=",
+          "bundled": true,
           "dev": true,
           "requires": {
             "minimist": "0.0.8"
@@ -5566,15 +5162,13 @@
         },
         "ms": {
           "version": "2.0.0",
-          "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
-          "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "needle": {
-          "version": "2.2.0",
-          "resolved": "https://registry.npmjs.org/needle/-/needle-2.2.0.tgz",
-          "integrity": "sha512-eFagy6c+TYayorXw/qtAdSvaUpEbBsDwDyxYFgLZ0lTojfH7K+OdBqAF7TAFwDokJaGpubpSGG0wO3iC0XPi8w==",
+          "version": "2.2.4",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
@@ -5584,19 +5178,18 @@
           }
         },
         "node-pre-gyp": {
-          "version": "0.10.0",
-          "resolved": "https://registry.npmjs.org/node-pre-gyp/-/node-pre-gyp-0.10.0.tgz",
-          "integrity": "sha512-G7kEonQLRbcA/mOoFoxvlMrw6Q6dPf92+t/l0DFSMuSlDoWaI9JWIyPwK0jyE1bph//CUEL65/Fz1m2vJbmjQQ==",
+          "version": "0.10.3",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
             "detect-libc": "^1.0.2",
             "mkdirp": "^0.5.1",
-            "needle": "^2.2.0",
+            "needle": "^2.2.1",
             "nopt": "^4.0.1",
             "npm-packlist": "^1.1.6",
             "npmlog": "^4.0.2",
-            "rc": "^1.1.7",
+            "rc": "^1.2.7",
             "rimraf": "^2.6.1",
             "semver": "^5.3.0",
             "tar": "^4"
@@ -5604,8 +5197,7 @@
         },
         "nopt": {
           "version": "4.0.1",
-          "resolved": "https://registry.npmjs.org/nopt/-/nopt-4.0.1.tgz",
-          "integrity": "sha1-0NRoWv1UFRk8jHUFYC0NF81kR00=",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
@@ -5614,16 +5206,14 @@
           }
         },
         "npm-bundled": {
-          "version": "1.0.3",
-          "resolved": "https://registry.npmjs.org/npm-bundled/-/npm-bundled-1.0.3.tgz",
-          "integrity": "sha512-ByQ3oJ/5ETLyglU2+8dBObvhfWXX8dtPZDMePCahptliFX2iIuhyEszyFk401PZUNQH20vvdW5MLjJxkwU80Ow==",
+          "version": "1.0.5",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "npm-packlist": {
-          "version": "1.1.10",
-          "resolved": "https://registry.npmjs.org/npm-packlist/-/npm-packlist-1.1.10.tgz",
-          "integrity": "sha512-AQC0Dyhzn4EiYEfIUjCdMl0JJ61I2ER9ukf/sLxJUcZHfo+VyEfz2rMJgLZSS1v30OxPQe1cN0LZA1xbcaVfWA==",
+          "version": "1.2.0",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
@@ -5633,8 +5223,7 @@
         },
         "npmlog": {
           "version": "4.1.2",
-          "resolved": "https://registry.npmjs.org/npmlog/-/npmlog-4.1.2.tgz",
-          "integrity": "sha512-2uUqazuKlTaSI/dC8AzicUck7+IrEaOnN/e0jd3Xtt1KcGpwx30v50mL7oPyr/h9bL3E4aZccVwpwP+5W9Vjkg==",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
@@ -5646,21 +5235,18 @@
         },
         "number-is-nan": {
           "version": "1.0.1",
-          "resolved": "https://registry.npmjs.org/number-is-nan/-/number-is-nan-1.0.1.tgz",
-          "integrity": "sha1-CXtgK1NCKlIsGvuHkDGDNpQaAR0=",
+          "bundled": true,
           "dev": true
         },
         "object-assign": {
           "version": "4.1.1",
-          "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
-          "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "once": {
           "version": "1.4.0",
-          "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
-          "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=",
+          "bundled": true,
           "dev": true,
           "requires": {
             "wrappy": "1"
@@ -5668,22 +5254,19 @@
         },
         "os-homedir": {
           "version": "1.0.2",
-          "resolved": "https://registry.npmjs.org/os-homedir/-/os-homedir-1.0.2.tgz",
-          "integrity": "sha1-/7xJiDNuDoM94MFox+8VISGqf7M=",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "os-tmpdir": {
           "version": "1.0.2",
-          "resolved": "https://registry.npmjs.org/os-tmpdir/-/os-tmpdir-1.0.2.tgz",
-          "integrity": "sha1-u+Z0BseaqFxc/sdm/lc0VV36EnQ=",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "osenv": {
           "version": "0.1.5",
-          "resolved": "https://registry.npmjs.org/osenv/-/osenv-0.1.5.tgz",
-          "integrity": "sha512-0CWcCECdMVc2Rw3U5w9ZjqX6ga6ubk1xDVKxtBQPK7wis/0F2r9T6k4ydGYhecl7YUBxBVxhL5oisPsNxAPe2g==",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
@@ -5693,26 +5276,23 @@
         },
         "path-is-absolute": {
           "version": "1.0.1",
-          "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
-          "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "process-nextick-args": {
           "version": "2.0.0",
-          "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.0.tgz",
-          "integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "rc": {
-          "version": "1.2.7",
-          "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.7.tgz",
-          "integrity": "sha512-LdLD8xD4zzLsAT5xyushXDNscEjB7+2ulnl8+r1pnESlYtlJtVSoCMBGr30eDRJ3+2Gq89jK9P9e4tCEH1+ywA==",
+          "version": "1.2.8",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
-            "deep-extend": "^0.5.1",
+            "deep-extend": "^0.6.0",
             "ini": "~1.3.0",
             "minimist": "^1.2.0",
             "strip-json-comments": "~2.0.1"
@@ -5720,8 +5300,7 @@
           "dependencies": {
             "minimist": {
               "version": "1.2.0",
-              "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz",
-              "integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=",
+              "bundled": true,
               "dev": true,
               "optional": true
             }
@@ -5729,8 +5308,7 @@
         },
         "readable-stream": {
           "version": "2.3.6",
-          "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz",
-          "integrity": "sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw==",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
@@ -5744,60 +5322,52 @@
           }
         },
         "rimraf": {
-          "version": "2.6.2",
-          "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.6.2.tgz",
-          "integrity": "sha512-lreewLK/BlghmxtfH36YYVg1i8IAce4TI7oao75I1g245+6BctqTVQiBP3YUJ9C6DQOXJmkYR9X9fCLtCOJc5w==",
+          "version": "2.6.3",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
-            "glob": "^7.0.5"
+            "glob": "^7.1.3"
           }
         },
         "safe-buffer": {
-          "version": "5.1.1",
-          "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.1.tgz",
-          "integrity": "sha512-kKvNJn6Mm93gAczWVJg7wH+wGYWNrDHdWvpUmHyEsgCtIwwo3bqPtV4tR5tuPaUhTOo/kvhVwd8XwwOllGYkbg==",
+          "version": "5.1.2",
+          "bundled": true,
           "dev": true
         },
         "safer-buffer": {
           "version": "2.1.2",
-          "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
-          "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "sax": {
           "version": "1.2.4",
-          "resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz",
-          "integrity": "sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "semver": {
-          "version": "5.5.0",
-          "resolved": "https://registry.npmjs.org/semver/-/semver-5.5.0.tgz",
-          "integrity": "sha512-4SJ3dm0WAwWy/NVeioZh5AntkdJoWKxHxcmyP622fOkgHa4z3R0TdBJICINyaSDE6uNwVc8gZr+ZinwZAH4xIA==",
+          "version": "5.6.0",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "set-blocking": {
           "version": "2.0.0",
-          "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz",
-          "integrity": "sha1-BF+XgtARrppoA93TgrJDkrPYkPc=",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "signal-exit": {
           "version": "3.0.2",
-          "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.2.tgz",
-          "integrity": "sha1-tf3AjxKH6hF4Yo5BXiUTK3NkbG0=",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "string-width": {
           "version": "1.0.2",
-          "resolved": "https://registry.npmjs.org/string-width/-/string-width-1.0.2.tgz",
-          "integrity": "sha1-EYvfW4zcUaKn5w0hHgfisLmxB9M=",
+          "bundled": true,
           "dev": true,
           "requires": {
             "code-point-at": "^1.0.0",
@@ -5807,8 +5377,7 @@
         },
         "string_decoder": {
           "version": "1.1.1",
-          "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",
-          "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
@@ -5817,8 +5386,7 @@
         },
         "strip-ansi": {
           "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-3.0.1.tgz",
-          "integrity": "sha1-ajhfuIU9lS1f8F0Oiq+UJ43GPc8=",
+          "bundled": true,
           "dev": true,
           "requires": {
             "ansi-regex": "^2.0.0"
@@ -5826,54 +5394,48 @@
         },
         "strip-json-comments": {
           "version": "2.0.1",
-          "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz",
-          "integrity": "sha1-PFMZQukIwml8DsNEhYwobHygpgo=",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "tar": {
-          "version": "4.4.1",
-          "resolved": "https://registry.npmjs.org/tar/-/tar-4.4.1.tgz",
-          "integrity": "sha512-O+v1r9yN4tOsvl90p5HAP4AEqbYhx4036AGMm075fH9F8Qwi3oJ+v4u50FkT/KkvywNGtwkk0zRI+8eYm1X/xg==",
+          "version": "4.4.8",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
-            "chownr": "^1.0.1",
+            "chownr": "^1.1.1",
             "fs-minipass": "^1.2.5",
-            "minipass": "^2.2.4",
-            "minizlib": "^1.1.0",
+            "minipass": "^2.3.4",
+            "minizlib": "^1.1.1",
             "mkdirp": "^0.5.0",
-            "safe-buffer": "^5.1.1",
+            "safe-buffer": "^5.1.2",
             "yallist": "^3.0.2"
           }
         },
         "util-deprecate": {
           "version": "1.0.2",
-          "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
-          "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=",
+          "bundled": true,
           "dev": true,
           "optional": true
         },
         "wide-align": {
-          "version": "1.1.2",
-          "resolved": "https://registry.npmjs.org/wide-align/-/wide-align-1.1.2.tgz",
-          "integrity": "sha512-ijDLlyQ7s6x1JgCLur53osjm/UXUYD9+0PbYKrBsYisYXzCxN+HC3mYDNy/dWdmf3AwqwU3CXwDCvsNgGK1S0w==",
+          "version": "1.1.3",
+          "bundled": true,
           "dev": true,
           "optional": true,
           "requires": {
-            "string-width": "^1.0.2"
+            "string-width": "^1.0.2 || 2"
           }
         },
         "wrappy": {
           "version": "1.0.2",
-          "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
-          "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=",
+          "bundled": true,
           "dev": true
         },
         "yallist": {
-          "version": "3.0.2",
-          "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.0.2.tgz",
-          "integrity": "sha1-hFK0u36Dx8GI2AQcGoN8dz1ti7k=",
+          "version": "3.0.3",
+          "bundled": true,
           "dev": true
         }
       }
@@ -5913,23 +5475,33 @@
       }
     },
     "genfun": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmjs.org/genfun/-/genfun-4.0.1.tgz",
-      "integrity": "sha1-7RAEHy5KfxsKOEZtF6XD4n3x38E=",
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/genfun/-/genfun-5.0.0.tgz",
+      "integrity": "sha512-KGDOARWVga7+rnB3z9Sd2Letx515owfk0hSxHGuqjANb1M+x2bGZGqHLiozPsYMdM2OubeMni/Hpwmjq6qIUhA==",
       "dev": true
     },
+    "gentle-fs": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/gentle-fs/-/gentle-fs-2.0.1.tgz",
+      "integrity": "sha512-cEng5+3fuARewXktTEGbwsktcldA+YsnUEaXZwcK/3pjSE1X9ObnTs+/8rYf8s+RnIcQm2D5x3rwpN7Zom8Bew==",
+      "dev": true,
+      "requires": {
+        "aproba": "^1.1.2",
+        "fs-vacuum": "^1.2.10",
+        "graceful-fs": "^4.1.11",
+        "iferr": "^0.1.5",
+        "mkdirp": "^0.5.1",
+        "path-is-inside": "^1.0.2",
+        "read-cmd-shim": "^1.0.1",
+        "slide": "^1.1.6"
+      }
+    },
     "get-caller-file": {
       "version": "1.0.3",
       "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-1.0.3.tgz",
       "integrity": "sha512-3t6rVToeoZfYSGd8YoLFR2DJkiQrIiUrGcjvFX2mDw3bn6k2OtwHN0TNCLbBO+w8qTvimhDkv+LSscbJY1vE6w==",
       "dev": true
     },
-    "get-own-enumerable-property-symbols": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/get-own-enumerable-property-symbols/-/get-own-enumerable-property-symbols-3.0.0.tgz",
-      "integrity": "sha512-CIJYJC4GGF06TakLg8z4GQKvDsx9EMspVxOYih7LerEL/WosUnFIww45CGfxfeKHqlg3twgUrYRT1O3WQqjGCg==",
-      "dev": true
-    },
     "get-pkg-repo": {
       "version": "1.4.0",
       "resolved": "https://registry.npmjs.org/get-pkg-repo/-/get-pkg-repo-1.4.0.tgz",
@@ -5943,9 +5515,15 @@
         "through2": "^2.0.0"
       },
       "dependencies": {
+        "camelcase": {
+          "version": "2.1.1",
+          "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-2.1.1.tgz",
+          "integrity": "sha1-fB0W1nmhu+WcoCys7PsBHiAfWh8=",
+          "dev": true
+        },
         "camelcase-keys": {
           "version": "2.1.0",
-          "resolved": "http://registry.npmjs.org/camelcase-keys/-/camelcase-keys-2.1.0.tgz",
+          "resolved": "https://registry.npmjs.org/camelcase-keys/-/camelcase-keys-2.1.0.tgz",
           "integrity": "sha1-MIvur/3ygRkFHvodkyITyRuPkuc=",
           "dev": true,
           "requires": {
@@ -5970,7 +5548,7 @@
         },
         "meow": {
           "version": "3.7.0",
-          "resolved": "http://registry.npmjs.org/meow/-/meow-3.7.0.tgz",
+          "resolved": "https://registry.npmjs.org/meow/-/meow-3.7.0.tgz",
           "integrity": "sha1-cstmi0JSKCkKu/qFaJJYcwioAfs=",
           "dev": true,
           "requires": {
@@ -6027,7 +5605,7 @@
     },
     "get-stream": {
       "version": "3.0.0",
-      "resolved": "http://registry.npmjs.org/get-stream/-/get-stream-3.0.0.tgz",
+      "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-3.0.0.tgz",
       "integrity": "sha1-jpQ9E1jcN1VQVOy+LtsFqhdO3hQ=",
       "dev": true
     },
@@ -6057,27 +5635,6 @@
         "meow": "^4.0.0",
         "split2": "^2.0.0",
         "through2": "^2.0.0"
-      },
-      "dependencies": {
-        "lodash.template": {
-          "version": "4.4.0",
-          "resolved": "https://registry.npmjs.org/lodash.template/-/lodash.template-4.4.0.tgz",
-          "integrity": "sha1-5zoDhcg1VZF0bgILmWecaQ5o+6A=",
-          "dev": true,
-          "requires": {
-            "lodash._reinterpolate": "~3.0.0",
-            "lodash.templatesettings": "^4.0.0"
-          }
-        },
-        "lodash.templatesettings": {
-          "version": "4.1.0",
-          "resolved": "https://registry.npmjs.org/lodash.templatesettings/-/lodash.templatesettings-4.1.0.tgz",
-          "integrity": "sha1-K01OlbpEDZFf8IvImeRVNmZxMxY=",
-          "dev": true,
-          "requires": {
-            "lodash._reinterpolate": "~3.0.0"
-          }
-        }
       }
     },
     "git-remote-origin-url": {
@@ -6099,9 +5656,9 @@
       }
     },
     "git-semver-tags": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/git-semver-tags/-/git-semver-tags-2.0.0.tgz",
-      "integrity": "sha512-lSgFc3zQTul31nFje2Q8XdNcTOI6B4I3mJRPCgFzHQQLfxfqdWTYzdtCaynkK5Xmb2wQlSJoKolhXJ1VhKROnQ==",
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/git-semver-tags/-/git-semver-tags-2.0.2.tgz",
+      "integrity": "sha512-34lMF7Yo1xEmsK2EkbArdoU79umpvm0MfzaDkSNYSJqtM5QLAVTPWgpiXSVI5o/O9EvZPSrP4Zvnec/CqhSd5w==",
       "dev": true,
       "requires": {
         "meow": "^4.0.0",
@@ -6139,15 +5696,53 @@
       "requires": {
         "glob-parent": "^2.0.0",
         "is-glob": "^2.0.0"
+      },
+      "dependencies": {
+        "glob-parent": {
+          "version": "2.0.0",
+          "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-2.0.0.tgz",
+          "integrity": "sha1-gTg9ctsFT8zPUzbaqQLxgvbtuyg=",
+          "dev": true,
+          "requires": {
+            "is-glob": "^2.0.0"
+          }
+        },
+        "is-extglob": {
+          "version": "1.0.0",
+          "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-1.0.0.tgz",
+          "integrity": "sha1-rEaBd8SUNAWgkvyPKXYMb/xiBsA=",
+          "dev": true
+        },
+        "is-glob": {
+          "version": "2.0.1",
+          "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-2.0.1.tgz",
+          "integrity": "sha1-0Jb5JqPe1WAPP9/ZEZjLCIjC2GM=",
+          "dev": true,
+          "requires": {
+            "is-extglob": "^1.0.0"
+          }
+        }
       }
     },
     "glob-parent": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-2.0.0.tgz",
-      "integrity": "sha1-gTg9ctsFT8zPUzbaqQLxgvbtuyg=",
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-3.1.0.tgz",
+      "integrity": "sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=",
       "dev": true,
       "requires": {
-        "is-glob": "^2.0.0"
+        "is-glob": "^3.1.0",
+        "path-dirname": "^1.0.0"
+      },
+      "dependencies": {
+        "is-glob": {
+          "version": "3.1.0",
+          "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz",
+          "integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=",
+          "dev": true,
+          "requires": {
+            "is-extglob": "^2.1.0"
+          }
+        }
       }
     },
     "glob-stream": {
@@ -6166,33 +5761,6 @@
         "remove-trailing-separator": "^1.0.1",
         "to-absolute-glob": "^2.0.0",
         "unique-stream": "^2.0.2"
-      },
-      "dependencies": {
-        "glob-parent": {
-          "version": "3.1.0",
-          "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-3.1.0.tgz",
-          "integrity": "sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=",
-          "dev": true,
-          "requires": {
-            "is-glob": "^3.1.0",
-            "path-dirname": "^1.0.0"
-          }
-        },
-        "is-extglob": {
-          "version": "2.1.1",
-          "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
-          "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=",
-          "dev": true
-        },
-        "is-glob": {
-          "version": "3.1.0",
-          "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz",
-          "integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=",
-          "dev": true,
-          "requires": {
-            "is-extglob": "^2.1.0"
-          }
-        }
       }
     },
     "glob-to-regexp": {
@@ -6202,438 +5770,90 @@
       "dev": true
     },
     "glob-watcher": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/glob-watcher/-/glob-watcher-5.0.1.tgz",
-      "integrity": "sha512-fK92r2COMC199WCyGUblrZKhjra3cyVMDiypDdqg1vsSDmexnbYivK1kNR4QItiNXLKmGlqan469ks67RtNa2g==",
+      "version": "5.0.3",
+      "resolved": "https://registry.npmjs.org/glob-watcher/-/glob-watcher-5.0.3.tgz",
+      "integrity": "sha512-8tWsULNEPHKQ2MR4zXuzSmqbdyV5PtwwCaWSGQ1WwHsJ07ilNeN1JB8ntxhckbnpSHaf9dXFUHzIWvm1I13dsg==",
       "dev": true,
       "requires": {
+        "anymatch": "^2.0.0",
         "async-done": "^1.2.0",
         "chokidar": "^2.0.0",
+        "is-negated-glob": "^1.0.0",
         "just-debounce": "^1.0.0",
         "object.defaults": "^1.1.0"
+      }
+    },
+    "global-modules": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/global-modules/-/global-modules-1.0.0.tgz",
+      "integrity": "sha512-sKzpEkf11GpOFuw0Zzjzmt4B4UZwjOcG757PPvrfhxcLFbq0wpsgpOqxpxtxFiCG4DtG93M6XRVbF2oGdev7bg==",
+      "dev": true,
+      "requires": {
+        "global-prefix": "^1.0.1",
+        "is-windows": "^1.0.1",
+        "resolve-dir": "^1.0.0"
+      }
+    },
+    "global-prefix": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/global-prefix/-/global-prefix-1.0.2.tgz",
+      "integrity": "sha1-2/dDxsFJklk8ZVVoy2btMsASLr4=",
+      "dev": true,
+      "requires": {
+        "expand-tilde": "^2.0.2",
+        "homedir-polyfill": "^1.0.1",
+        "ini": "^1.3.4",
+        "is-windows": "^1.0.1",
+        "which": "^1.2.14"
+      }
+    },
+    "globals": {
+      "version": "9.18.0",
+      "resolved": "https://registry.npmjs.org/globals/-/globals-9.18.0.tgz",
+      "integrity": "sha512-S0nG3CLEQiY/ILxqtztTWH/3iRRdyBLw6KMDxnKMchrtbj2OFmehVh0WUCfW3DUrIgx/qFrJPICrq4Z4sTR9UQ==",
+      "dev": true
+    },
+    "globby": {
+      "version": "6.1.0",
+      "resolved": "https://registry.npmjs.org/globby/-/globby-6.1.0.tgz",
+      "integrity": "sha1-9abXDoOV4hyFj7BInWTfAkJNUGw=",
+      "dev": true,
+      "requires": {
+        "array-union": "^1.0.1",
+        "glob": "^7.0.3",
+        "object-assign": "^4.0.1",
+        "pify": "^2.0.0",
+        "pinkie-promise": "^2.0.0"
       },
       "dependencies": {
-        "anymatch": {
-          "version": "2.0.0",
-          "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-2.0.0.tgz",
-          "integrity": "sha512-5teOsQWABXHHBFP9y3skS5P3d/WfWXpv3FUpy+LorMrNYaT9pI4oLMQX7jzQ2KklNpGpWHzdCXTDT2Y3XGlZBw==",
-          "dev": true,
-          "requires": {
-            "micromatch": "^3.1.4",
-            "normalize-path": "^2.1.1"
-          }
-        },
-        "arr-diff": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz",
-          "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=",
-          "dev": true
-        },
-        "array-unique": {
-          "version": "0.3.2",
-          "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz",
-          "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=",
-          "dev": true
-        },
-        "braces": {
-          "version": "2.3.2",
-          "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz",
-          "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==",
-          "dev": true,
-          "requires": {
-            "arr-flatten": "^1.1.0",
-            "array-unique": "^0.3.2",
-            "extend-shallow": "^2.0.1",
-            "fill-range": "^4.0.0",
-            "isobject": "^3.0.1",
-            "repeat-element": "^1.1.2",
-            "snapdragon": "^0.8.1",
-            "snapdragon-node": "^2.0.1",
-            "split-string": "^3.0.2",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "chokidar": {
-          "version": "2.0.4",
-          "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-2.0.4.tgz",
-          "integrity": "sha512-z9n7yt9rOvIJrMhvDtDictKrkFHeihkNl6uWMmZlmL6tJtX9Cs+87oK+teBx+JIgzvbX3yZHT3eF8vpbDxHJXQ==",
-          "dev": true,
-          "requires": {
-            "anymatch": "^2.0.0",
-            "async-each": "^1.0.0",
-            "braces": "^2.3.0",
-            "fsevents": "^1.2.2",
-            "glob-parent": "^3.1.0",
-            "inherits": "^2.0.1",
-            "is-binary-path": "^1.0.0",
-            "is-glob": "^4.0.0",
-            "lodash.debounce": "^4.0.8",
-            "normalize-path": "^2.1.1",
-            "path-is-absolute": "^1.0.0",
-            "readdirp": "^2.0.0",
-            "upath": "^1.0.5"
-          }
-        },
-        "expand-brackets": {
-          "version": "2.1.4",
-          "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz",
-          "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=",
-          "dev": true,
-          "requires": {
-            "debug": "^2.3.3",
-            "define-property": "^0.2.5",
-            "extend-shallow": "^2.0.1",
-            "posix-character-classes": "^0.1.0",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "define-property": {
-              "version": "0.2.5",
-              "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz",
-              "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=",
-              "dev": true,
-              "requires": {
-                "is-descriptor": "^0.1.0"
-              }
-            },
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            },
-            "is-accessor-descriptor": {
-              "version": "0.1.6",
-              "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz",
-              "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=",
-              "dev": true,
-              "requires": {
-                "kind-of": "^3.0.2"
-              },
-              "dependencies": {
-                "kind-of": {
-                  "version": "3.2.2",
-                  "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-                  "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-                  "dev": true,
-                  "requires": {
-                    "is-buffer": "^1.1.5"
-                  }
-                }
-              }
-            },
-            "is-data-descriptor": {
-              "version": "0.1.4",
-              "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz",
-              "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=",
-              "dev": true,
-              "requires": {
-                "kind-of": "^3.0.2"
-              },
-              "dependencies": {
-                "kind-of": {
-                  "version": "3.2.2",
-                  "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-                  "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-                  "dev": true,
-                  "requires": {
-                    "is-buffer": "^1.1.5"
-                  }
-                }
-              }
-            },
-            "is-descriptor": {
-              "version": "0.1.6",
-              "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz",
-              "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==",
-              "dev": true,
-              "requires": {
-                "is-accessor-descriptor": "^0.1.6",
-                "is-data-descriptor": "^0.1.4",
-                "kind-of": "^5.0.0"
-              }
-            },
-            "kind-of": {
-              "version": "5.1.0",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz",
-              "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==",
-              "dev": true
-            }
-          }
-        },
-        "extglob": {
-          "version": "2.0.4",
-          "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz",
-          "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==",
-          "dev": true,
-          "requires": {
-            "array-unique": "^0.3.2",
-            "define-property": "^1.0.0",
-            "expand-brackets": "^2.1.4",
-            "extend-shallow": "^2.0.1",
-            "fragment-cache": "^0.2.1",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "define-property": {
-              "version": "1.0.0",
-              "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz",
-              "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=",
-              "dev": true,
-              "requires": {
-                "is-descriptor": "^1.0.0"
-              }
-            },
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "fill-range": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz",
-          "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=",
-          "dev": true,
-          "requires": {
-            "extend-shallow": "^2.0.1",
-            "is-number": "^3.0.0",
-            "repeat-string": "^1.6.1",
-            "to-regex-range": "^2.1.0"
-          },
-          "dependencies": {
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "glob-parent": {
-          "version": "3.1.0",
-          "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-3.1.0.tgz",
-          "integrity": "sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=",
-          "dev": true,
-          "requires": {
-            "is-glob": "^3.1.0",
-            "path-dirname": "^1.0.0"
-          },
-          "dependencies": {
-            "is-glob": {
-              "version": "3.1.0",
-              "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz",
-              "integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=",
-              "dev": true,
-              "requires": {
-                "is-extglob": "^2.1.0"
-              }
-            }
-          }
-        },
-        "is-accessor-descriptor": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz",
-          "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==",
-          "dev": true,
-          "requires": {
-            "kind-of": "^6.0.0"
-          }
-        },
-        "is-data-descriptor": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz",
-          "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==",
-          "dev": true,
-          "requires": {
-            "kind-of": "^6.0.0"
-          }
-        },
-        "is-descriptor": {
-          "version": "1.0.2",
-          "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz",
-          "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==",
-          "dev": true,
-          "requires": {
-            "is-accessor-descriptor": "^1.0.0",
-            "is-data-descriptor": "^1.0.0",
-            "kind-of": "^6.0.2"
-          }
-        },
-        "is-extglob": {
-          "version": "2.1.1",
-          "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
-          "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=",
-          "dev": true
-        },
-        "is-glob": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.0.tgz",
-          "integrity": "sha1-lSHHaEXMJhCoUgPd8ICpWML/q8A=",
-          "dev": true,
-          "requires": {
-            "is-extglob": "^2.1.1"
-          }
-        },
-        "is-number": {
-          "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz",
-          "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=",
-          "dev": true,
-          "requires": {
-            "kind-of": "^3.0.2"
-          },
-          "dependencies": {
-            "kind-of": {
-              "version": "3.2.2",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-              "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-              "dev": true,
-              "requires": {
-                "is-buffer": "^1.1.5"
-              }
-            }
-          }
-        },
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
-        },
-        "kind-of": {
-          "version": "6.0.2",
-          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz",
-          "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==",
-          "dev": true
-        },
-        "micromatch": {
-          "version": "3.1.10",
-          "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz",
-          "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==",
-          "dev": true,
-          "requires": {
-            "arr-diff": "^4.0.0",
-            "array-unique": "^0.3.2",
-            "braces": "^2.3.1",
-            "define-property": "^2.0.2",
-            "extend-shallow": "^3.0.2",
-            "extglob": "^2.0.4",
-            "fragment-cache": "^0.2.1",
-            "kind-of": "^6.0.2",
-            "nanomatch": "^1.2.9",
-            "object.pick": "^1.3.0",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.2"
-          }
-        }
-      }
-    },
-    "glob2base": {
-      "version": "0.0.12",
-      "resolved": "https://registry.npmjs.org/glob2base/-/glob2base-0.0.12.tgz",
-      "integrity": "sha1-nUGbPijxLoOjYhZKJ3BVkiycDVY=",
-      "dev": true,
-      "requires": {
-        "find-index": "^0.1.1"
-      }
-    },
-    "global-modules": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/global-modules/-/global-modules-1.0.0.tgz",
-      "integrity": "sha512-sKzpEkf11GpOFuw0Zzjzmt4B4UZwjOcG757PPvrfhxcLFbq0wpsgpOqxpxtxFiCG4DtG93M6XRVbF2oGdev7bg==",
-      "dev": true,
-      "requires": {
-        "global-prefix": "^1.0.1",
-        "is-windows": "^1.0.1",
-        "resolve-dir": "^1.0.0"
-      }
-    },
-    "global-prefix": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/global-prefix/-/global-prefix-1.0.2.tgz",
-      "integrity": "sha1-2/dDxsFJklk8ZVVoy2btMsASLr4=",
-      "dev": true,
-      "requires": {
-        "expand-tilde": "^2.0.2",
-        "homedir-polyfill": "^1.0.1",
-        "ini": "^1.3.4",
-        "is-windows": "^1.0.1",
-        "which": "^1.2.14"
-      }
-    },
-    "globals": {
-      "version": "9.18.0",
-      "resolved": "https://registry.npmjs.org/globals/-/globals-9.18.0.tgz",
-      "integrity": "sha512-S0nG3CLEQiY/ILxqtztTWH/3iRRdyBLw6KMDxnKMchrtbj2OFmehVh0WUCfW3DUrIgx/qFrJPICrq4Z4sTR9UQ==",
-      "dev": true
-    },
-    "globby": {
-      "version": "6.1.0",
-      "resolved": "https://registry.npmjs.org/globby/-/globby-6.1.0.tgz",
-      "integrity": "sha1-9abXDoOV4hyFj7BInWTfAkJNUGw=",
-      "dev": true,
-      "requires": {
-        "array-union": "^1.0.1",
-        "glob": "^7.0.3",
-        "object-assign": "^4.0.1",
-        "pify": "^2.0.0",
-        "pinkie-promise": "^2.0.0"
-      },
-      "dependencies": {
-        "pify": {
-          "version": "2.3.0",
-          "resolved": "https://registry.npmjs.org/pify/-/pify-2.3.0.tgz",
-          "integrity": "sha1-7RQaasBDqEnqWISY59yosVMw6Qw=",
+        "pify": {
+          "version": "2.3.0",
+          "resolved": "https://registry.npmjs.org/pify/-/pify-2.3.0.tgz",
+          "integrity": "sha1-7RQaasBDqEnqWISY59yosVMw6Qw=",
           "dev": true
         }
       }
     },
     "glogg": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/glogg/-/glogg-1.0.1.tgz",
-      "integrity": "sha512-ynYqXLoluBKf9XGR1gA59yEJisIL7YHEH4xr3ZziHB5/yl4qWfaK8Js9jGe6gBGCSCKVqiyO30WnRZADvemUNw==",
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/glogg/-/glogg-1.0.2.tgz",
+      "integrity": "sha512-5mwUoSuBk44Y4EshyiqcH95ZntbDdTQqA3QYSrxmzj28Ai0vXBGMH1ApSANH14j2sIRtqCEyg6PfsuP7ElOEDA==",
       "dev": true,
       "requires": {
         "sparkles": "^1.0.0"
       }
     },
     "google-closure-compiler": {
-      "version": "20181008.0.0",
-      "resolved": "https://registry.npmjs.org/google-closure-compiler/-/google-closure-compiler-20181008.0.0.tgz",
-      "integrity": "sha512-XmJIasXHyy4kirthlsuDev2LZcXjYXWfOHwHdCLUQnfJH8T2sxWDNjFLQycaCIXwQLOyw2Kem38VgxrYfG0hzg==",
+      "version": "20190121.0.0",
+      "resolved": "https://registry.npmjs.org/google-closure-compiler/-/google-closure-compiler-20190121.0.0.tgz",
+      "integrity": "sha512-FIp3+KxjtDwykDTr1WsFo0QexEopAC4bDXXZfnEdgHECF7hCeFAAsLUPxMmj9Wx+O39eFCXGAzY7w0k5aU9qjg==",
       "dev": true,
       "requires": {
         "chalk": "^1.0.0",
-        "google-closure-compiler-linux": "^20181008.0.0",
-        "google-closure-compiler-osx": "^20181008.0.0",
+        "google-closure-compiler-java": "^20190121.0.0",
+        "google-closure-compiler-js": "^20190121.0.0",
+        "google-closure-compiler-linux": "^20190121.0.0",
+        "google-closure-compiler-osx": "^20190121.0.0",
         "minimist": "^1.2.0",
         "vinyl": "^2.0.1",
         "vinyl-sourcemaps-apply": "^0.2.0"
@@ -6647,7 +5867,7 @@
         },
         "chalk": {
           "version": "1.1.3",
-          "resolved": "http://registry.npmjs.org/chalk/-/chalk-1.1.3.tgz",
+          "resolved": "https://registry.npmjs.org/chalk/-/chalk-1.1.3.tgz",
           "integrity": "sha1-qBFcVeSnAv5NFQq9OHKCKn4J/Jg=",
           "dev": true,
           "requires": {
@@ -6666,24 +5886,36 @@
         }
       }
     },
+    "google-closure-compiler-java": {
+      "version": "20190121.0.0",
+      "resolved": "https://registry.npmjs.org/google-closure-compiler-java/-/google-closure-compiler-java-20190121.0.0.tgz",
+      "integrity": "sha512-UCQ7ZXOlk/g101DS4TqyW+SaoR+4GVq7NKrwebH4gnESY76Xuz7FRrKWwfAXwltmiYAUVZCVI4qpoEz48V+VjA==",
+      "dev": true
+    },
+    "google-closure-compiler-js": {
+      "version": "20190121.0.0",
+      "resolved": "https://registry.npmjs.org/google-closure-compiler-js/-/google-closure-compiler-js-20190121.0.0.tgz",
+      "integrity": "sha512-PgY0Fy+fXZnjir6aPz/FVJPXuwZf5pKJ9n7Hf1HL4x1lhqVIf3i+u3Ed6ZWCXa+YiEhvwH5RTQr/iPP/D3gDRg==",
+      "dev": true
+    },
     "google-closure-compiler-linux": {
-      "version": "20181008.0.0",
-      "resolved": "https://registry.npmjs.org/google-closure-compiler-linux/-/google-closure-compiler-linux-20181008.0.0.tgz",
-      "integrity": "sha512-k8njGfH2uzWJiRPPvUxM7MJB28gPrf4kI2bbuiF0gJk/1arXcWCPGjLD6pzCU0UylMy52MUXLgsIpRorqf2brw==",
+      "version": "20190121.0.0",
+      "resolved": "https://registry.npmjs.org/google-closure-compiler-linux/-/google-closure-compiler-linux-20190121.0.0.tgz",
+      "integrity": "sha512-cw4qr9TuB2gB53l/oYadZLuw+zOi2yggYFtnNA5jvTLTqY8m2VZAL5DGL6gmCtZovbQ0bv9ANqjT8NxEtcSzfw==",
       "dev": true,
       "optional": true
     },
     "google-closure-compiler-osx": {
-      "version": "20181008.0.0",
-      "resolved": "https://registry.npmjs.org/google-closure-compiler-osx/-/google-closure-compiler-osx-20181008.0.0.tgz",
-      "integrity": "sha512-xzf/yH/4MXdb6GbP84iHnpcVCOPBbH0gMVOs0JhR/KbrQh+DlJU+Y8Z/DQzTkw9HgD650R2/WZmBknURyg9OTw==",
+      "version": "20190121.0.0",
+      "resolved": "https://registry.npmjs.org/google-closure-compiler-osx/-/google-closure-compiler-osx-20190121.0.0.tgz",
+      "integrity": "sha512-6OqyUcgojPCqCuzdyKLwmIkBhfoWF3cVzaX8vaJvQ3SYwlITBT3aepMEZiWFRVvvml+ojs1AJcZvQIqFke8X1w==",
       "dev": true,
       "optional": true
     },
     "graceful-fs": {
-      "version": "4.1.11",
-      "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.1.11.tgz",
-      "integrity": "sha1-Dovf5NHduIVNZOBOp8AOKgJuVlg=",
+      "version": "4.1.15",
+      "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.1.15.tgz",
+      "integrity": "sha512-6uHUhOPEBgQ24HM+r6b/QwWfZq+yiFcipKFrOFiBEnWdy5sdzYoi+pJeQaPI5qOLRFqWmAXUPQNsielzdLoecA==",
       "dev": true
     },
     "growl": {
@@ -6710,15 +5942,6 @@
         "vinyl-fs": "^3.0.0"
       },
       "dependencies": {
-        "ansi-colors": {
-          "version": "1.1.0",
-          "resolved": "http://registry.npmjs.org/ansi-colors/-/ansi-colors-1.1.0.tgz",
-          "integrity": "sha512-SFKX67auSNoVR38N3L+nvsPjOE0bybKTYbkf5tRvushrAPQ9V75huw0ZxBkKVeRU9kqH3d6HA4xTckbwZ4ixmA==",
-          "dev": true,
-          "requires": {
-            "ansi-wrap": "^0.1.0"
-          }
-        },
         "gulp-cli": {
           "version": "2.0.1",
           "resolved": "https://registry.npmjs.org/gulp-cli/-/gulp-cli-2.0.1.tgz",
@@ -6744,24 +5967,21 @@
             "v8flags": "^3.0.1",
             "yargs": "^7.1.0"
           }
-        },
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
         }
       }
     },
     "gulp-json-transform": {
-      "version": "0.4.5",
-      "resolved": "https://registry.npmjs.org/gulp-json-transform/-/gulp-json-transform-0.4.5.tgz",
-      "integrity": "sha512-kaGUaAhgjxeLgIMNF3IPFFmYCF6AgvzBQwqmVowiIStNADZSoILtPNDisYA4mKfpwMTqSiWLogQt1q5U75+uwA==",
+      "version": "0.4.6",
+      "resolved": "https://registry.npmjs.org/gulp-json-transform/-/gulp-json-transform-0.4.6.tgz",
+      "integrity": "sha512-laPoNiJP/+lAeiyb0lgY3cynOOi7R/QbPvKBEXJY6bm836nYg90pwY4mgwR7w8nFDlXiCToUeaoQCBIc2NudjA==",
       "dev": true,
       "requires": {
-        "gulp-util": "^3.0.8",
+        "ansi-colors": "^1.0.1",
+        "fancy-log": "^1.3.2",
+        "plugin-error": "^1.0.1",
         "promise": "^8.0.1",
-        "through2": "^2.0.3"
+        "through2": "^2.0.3",
+        "vinyl": "^2.1.0"
       }
     },
     "gulp-rename": {
@@ -6798,212 +6018,47 @@
       }
     },
     "gulp-typescript": {
-      "version": "5.0.0-alpha.3",
-      "resolved": "https://registry.npmjs.org/gulp-typescript/-/gulp-typescript-5.0.0-alpha.3.tgz",
-      "integrity": "sha512-6iSBjqBXAUqRsLUh/9XtlOnSzpPMbLrr5rqGj4UPLtGpDwFHW/fVTuRgv6LAWiKesLIUDDM0ourxvcpu2trecQ==",
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/gulp-typescript/-/gulp-typescript-5.0.0.tgz",
+      "integrity": "sha512-lMj2U+Ni6HyFaY2nr1sSQ6D014eHil5L1i52XWBaAQUR9UAUUp9btnm4yRBT2Jb8xhrwqmhMssZf/g2B7cinCA==",
       "dev": true,
       "requires": {
-        "ansi-colors": "^2.0.2",
+        "ansi-colors": "^3.0.5",
         "plugin-error": "^1.0.1",
         "source-map": "^0.7.3",
-        "through2": "^2.0.3",
+        "through2": "^3.0.0",
         "vinyl": "^2.1.0",
         "vinyl-fs": "^3.0.3"
       },
       "dependencies": {
-        "glob-parent": {
-          "version": "3.1.0",
-          "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-3.1.0.tgz",
-          "integrity": "sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=",
-          "dev": true,
-          "requires": {
-            "is-glob": "^3.1.0",
-            "path-dirname": "^1.0.0"
-          }
-        },
-        "glob-stream": {
-          "version": "6.1.0",
-          "resolved": "https://registry.npmjs.org/glob-stream/-/glob-stream-6.1.0.tgz",
-          "integrity": "sha1-cEXJlBOz65SIjYOrRtC0BMx73eQ=",
-          "dev": true,
-          "requires": {
-            "extend": "^3.0.0",
-            "glob": "^7.1.1",
-            "glob-parent": "^3.1.0",
-            "is-negated-glob": "^1.0.0",
-            "ordered-read-streams": "^1.0.0",
-            "pumpify": "^1.3.5",
-            "readable-stream": "^2.1.5",
-            "remove-trailing-separator": "^1.0.1",
-            "to-absolute-glob": "^2.0.0",
-            "unique-stream": "^2.0.2"
-          }
-        },
-        "is-extglob": {
-          "version": "2.1.1",
-          "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
-          "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=",
-          "dev": true
-        },
-        "is-glob": {
-          "version": "3.1.0",
-          "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz",
-          "integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=",
-          "dev": true,
-          "requires": {
-            "is-extglob": "^2.1.0"
-          }
-        },
-        "is-valid-glob": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/is-valid-glob/-/is-valid-glob-1.0.0.tgz",
-          "integrity": "sha1-Kb8+/3Ab4tTTFdusw5vDn+j2Aao=",
+        "ansi-colors": {
+          "version": "3.2.3",
+          "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-3.2.3.tgz",
+          "integrity": "sha512-LEHHyuhlPY3TmuUYMh2oz89lTShfvgbmzaBcxve9t/9Wuy7Dwf4yoAKcND7KFT1HAQfqZ12qtc+DUrBMeKF9nw==",
           "dev": true
         },
-        "ordered-read-streams": {
-          "version": "1.0.1",
-          "resolved": "https://registry.npmjs.org/ordered-read-streams/-/ordered-read-streams-1.0.1.tgz",
-          "integrity": "sha1-d8DLN8QVJdZBZtmQ/61+xqDhNj4=",
-          "dev": true,
-          "requires": {
-            "readable-stream": "^2.0.1"
-          }
-        },
         "source-map": {
           "version": "0.7.3",
           "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.7.3.tgz",
           "integrity": "sha512-CkCj6giN3S+n9qrYiBTX5gystlENnRW5jZeNLHpe6aue+SrHcG5VYwujhW9s4dY31mEGsxBDrHR6oI69fTXsaQ==",
           "dev": true
         },
-        "to-absolute-glob": {
-          "version": "2.0.2",
-          "resolved": "https://registry.npmjs.org/to-absolute-glob/-/to-absolute-glob-2.0.2.tgz",
-          "integrity": "sha1-GGX0PZ50sIItufFFt4z/fQ98hJs=",
-          "dev": true,
-          "requires": {
-            "is-absolute": "^1.0.0",
-            "is-negated-glob": "^1.0.0"
-          }
-        },
-        "vinyl-fs": {
-          "version": "3.0.3",
-          "resolved": "https://registry.npmjs.org/vinyl-fs/-/vinyl-fs-3.0.3.tgz",
-          "integrity": "sha512-vIu34EkyNyJxmP0jscNzWBSygh7VWhqun6RmqVfXePrOwi9lhvRs//dOaGOTRUQr4tx7/zd26Tk5WeSVZitgng==",
+        "through2": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/through2/-/through2-3.0.0.tgz",
+          "integrity": "sha512-8B+sevlqP4OiCjonI1Zw03Sf8PuV1eRsYQgLad5eonILOdyeRsY27A/2Ze8IlvlMvq31OH+3fz/styI7Ya62yQ==",
           "dev": true,
           "requires": {
-            "fs-mkdirp-stream": "^1.0.0",
-            "glob-stream": "^6.1.0",
-            "graceful-fs": "^4.0.0",
-            "is-valid-glob": "^1.0.0",
-            "lazystream": "^1.0.0",
-            "lead": "^1.0.0",
-            "object.assign": "^4.0.4",
-            "pumpify": "^1.3.5",
-            "readable-stream": "^2.3.3",
-            "remove-bom-buffer": "^3.0.0",
-            "remove-bom-stream": "^1.2.0",
-            "resolve-options": "^1.1.0",
-            "through2": "^2.0.0",
-            "to-through": "^2.0.0",
-            "value-or-function": "^3.0.0",
-            "vinyl": "^2.0.0",
-            "vinyl-sourcemap": "^1.1.0"
+            "readable-stream": "2 || 3",
+            "xtend": "~4.0.1"
           }
         }
       }
     },
-    "gulp-util": {
-      "version": "3.0.8",
-      "resolved": "https://registry.npmjs.org/gulp-util/-/gulp-util-3.0.8.tgz",
-      "integrity": "sha1-AFTh50RQLifATBh8PsxQXdVLu08=",
-      "dev": true,
-      "requires": {
-        "array-differ": "^1.0.0",
-        "array-uniq": "^1.0.2",
-        "beeper": "^1.0.0",
-        "chalk": "^1.0.0",
-        "dateformat": "^2.0.0",
-        "fancy-log": "^1.1.0",
-        "gulplog": "^1.0.0",
-        "has-gulplog": "^0.1.0",
-        "lodash._reescape": "^3.0.0",
-        "lodash._reevaluate": "^3.0.0",
-        "lodash._reinterpolate": "^3.0.0",
-        "lodash.template": "^3.0.0",
-        "minimist": "^1.1.0",
-        "multipipe": "^0.1.2",
-        "object-assign": "^3.0.0",
-        "replace-ext": "0.0.1",
-        "through2": "^2.0.0",
-        "vinyl": "^0.5.0"
-      },
-      "dependencies": {
-        "ansi-styles": {
-          "version": "2.2.1",
-          "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-2.2.1.tgz",
-          "integrity": "sha1-tDLdM1i2NM914eRmQ2gkBTPB3b4=",
-          "dev": true
-        },
-        "chalk": {
-          "version": "1.1.3",
-          "resolved": "http://registry.npmjs.org/chalk/-/chalk-1.1.3.tgz",
-          "integrity": "sha1-qBFcVeSnAv5NFQq9OHKCKn4J/Jg=",
-          "dev": true,
-          "requires": {
-            "ansi-styles": "^2.2.1",
-            "escape-string-regexp": "^1.0.2",
-            "has-ansi": "^2.0.0",
-            "strip-ansi": "^3.0.0",
-            "supports-color": "^2.0.0"
-          }
-        },
-        "clone": {
-          "version": "1.0.4",
-          "resolved": "https://registry.npmjs.org/clone/-/clone-1.0.4.tgz",
-          "integrity": "sha1-2jCcwmPfFZlMaIypAheco8fNfH4=",
-          "dev": true
-        },
-        "clone-stats": {
-          "version": "0.0.1",
-          "resolved": "https://registry.npmjs.org/clone-stats/-/clone-stats-0.0.1.tgz",
-          "integrity": "sha1-uI+UqCzzi4eR1YBG6kAprYjKmdE=",
-          "dev": true
-        },
-        "object-assign": {
-          "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-3.0.0.tgz",
-          "integrity": "sha1-m+3VygiXlJvKR+f/QIBi1Un1h/I=",
-          "dev": true
-        },
-        "replace-ext": {
-          "version": "0.0.1",
-          "resolved": "https://registry.npmjs.org/replace-ext/-/replace-ext-0.0.1.tgz",
-          "integrity": "sha1-KbvZIHinOfC8zitO5B6DeVNSKSQ=",
-          "dev": true
-        },
-        "supports-color": {
-          "version": "2.0.0",
-          "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-2.0.0.tgz",
-          "integrity": "sha1-U10EXOa2Nj+kARcIRimZXp3zJMc=",
-          "dev": true
-        },
-        "vinyl": {
-          "version": "0.5.3",
-          "resolved": "https://registry.npmjs.org/vinyl/-/vinyl-0.5.3.tgz",
-          "integrity": "sha1-sEVbOPxeDPMNQyUTLkYZcMIJHN4=",
-          "dev": true,
-          "requires": {
-            "clone": "^1.0.0",
-            "clone-stats": "^0.0.1",
-            "replace-ext": "0.0.1"
-          }
-        }
-      }
-    },
-    "gulplog": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/gulplog/-/gulplog-1.0.0.tgz",
-      "integrity": "sha1-4oxNRdBey77YGDY86PnFkmIp/+U=",
+    "gulplog": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/gulplog/-/gulplog-1.0.0.tgz",
+      "integrity": "sha1-4oxNRdBey77YGDY86PnFkmIp/+U=",
       "dev": true,
       "requires": {
         "glogg": "^1.0.0"
@@ -7036,12 +6091,12 @@
       "dev": true
     },
     "har-validator": {
-      "version": "5.1.0",
-      "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.0.tgz",
-      "integrity": "sha512-+qnmNjI4OfH2ipQ9VQOw23bBd/ibtfbVdK2fYbY4acTDqKTW/YDp9McimZdDbG8iV9fZizUqQMD5xvriB146TA==",
+      "version": "5.1.3",
+      "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.3.tgz",
+      "integrity": "sha512-sNvOCzEQNr/qrvJgc3UG/kD4QtlHycrzwS+6mfTrrSq97BvaYcPZZI1ZSqGSPR73Cxn4LKTD4PttRwfU7jWq5g==",
       "dev": true,
       "requires": {
-        "ajv": "^5.3.0",
+        "ajv": "^6.5.5",
         "har-schema": "^2.0.0"
       }
     },
@@ -7068,15 +6123,6 @@
       "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
       "integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0="
     },
-    "has-gulplog": {
-      "version": "0.1.0",
-      "resolved": "https://registry.npmjs.org/has-gulplog/-/has-gulplog-0.1.0.tgz",
-      "integrity": "sha1-ZBTIKRNpfaUVkDl9r7EvIpZ4Ec4=",
-      "dev": true,
-      "requires": {
-        "sparkles": "^1.0.0"
-      }
-    },
     "has-symbols": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.0.tgz",
@@ -7098,14 +6144,6 @@
         "get-value": "^2.0.6",
         "has-values": "^1.0.0",
         "isobject": "^3.0.0"
-      },
-      "dependencies": {
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
-        }
       }
     },
     "has-values": {
@@ -7118,26 +6156,6 @@
         "kind-of": "^4.0.0"
       },
       "dependencies": {
-        "is-number": {
-          "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz",
-          "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=",
-          "dev": true,
-          "requires": {
-            "kind-of": "^3.0.2"
-          },
-          "dependencies": {
-            "kind-of": {
-              "version": "3.2.2",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-              "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-              "dev": true,
-              "requires": {
-                "is-buffer": "^1.1.5"
-              }
-            }
-          }
-        },
         "kind-of": {
           "version": "4.0.0",
           "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-4.0.0.tgz",
@@ -7160,9 +6178,9 @@
       }
     },
     "hash.js": {
-      "version": "1.1.5",
-      "resolved": "https://registry.npmjs.org/hash.js/-/hash.js-1.1.5.tgz",
-      "integrity": "sha512-eWI5HG9Np+eHV1KQhisXWwM+4EPPYe5dFX1UZZH7k/E3JzDEazVH+VGlZi6R94ZqImq+A3D1mCEtrFIfg/E7sA==",
+      "version": "1.1.7",
+      "resolved": "https://registry.npmjs.org/hash.js/-/hash.js-1.1.7.tgz",
+      "integrity": "sha512-taOaskGt4z4SOANNseOviYDvjEJinIkRgmp7LbKP2YTTmVxWBl87s/uzK9r+44BclBSp2X7K1hqeNfz9JbBeXA==",
       "dev": true,
       "requires": {
         "inherits": "^2.0.3",
@@ -7336,6 +6354,16 @@
         "minimatch": "^3.0.4"
       }
     },
+    "import-fresh": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-2.0.0.tgz",
+      "integrity": "sha1-2BNVwVYS04bGH53dOSLUMEgipUY=",
+      "dev": true,
+      "requires": {
+        "caller-path": "^2.0.0",
+        "resolve-from": "^3.0.0"
+      }
+    },
     "import-local": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/import-local/-/import-local-1.0.0.tgz",
@@ -7403,9 +6431,9 @@
       }
     },
     "inquirer": {
-      "version": "6.2.0",
-      "resolved": "https://registry.npmjs.org/inquirer/-/inquirer-6.2.0.tgz",
-      "integrity": "sha512-QIEQG4YyQ2UYZGDC4srMZ7BjHOmNk1lR2JQj5UknBapklm6WHA+VVH7N+sUdX3A7NeCfGF8o4X1S3Ao7nAcIeg==",
+      "version": "6.2.1",
+      "resolved": "https://registry.npmjs.org/inquirer/-/inquirer-6.2.1.tgz",
+      "integrity": "sha512-088kl3DRT2dLU5riVMKKr1DlImd6X7smDhpXUCkJDCKvTEJeRiXh0G132HG9u5a+6Ylw9plFRY7RuTnwohYSpg==",
       "dev": true,
       "requires": {
         "ansi-escapes": "^3.0.0",
@@ -7419,7 +6447,7 @@
         "run-async": "^2.2.0",
         "rxjs": "^6.1.0",
         "string-width": "^2.1.0",
-        "strip-ansi": "^4.0.0",
+        "strip-ansi": "^5.0.0",
         "through": "^2.3.6"
       },
       "dependencies": {
@@ -7452,23 +6480,42 @@
           "requires": {
             "is-fullwidth-code-point": "^2.0.0",
             "strip-ansi": "^4.0.0"
+          },
+          "dependencies": {
+            "strip-ansi": {
+              "version": "4.0.0",
+              "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-4.0.0.tgz",
+              "integrity": "sha1-qEeQIusaw2iocTibY1JixQXuNo8=",
+              "dev": true,
+              "requires": {
+                "ansi-regex": "^3.0.0"
+              }
+            }
           }
         },
         "strip-ansi": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-4.0.0.tgz",
-          "integrity": "sha1-qEeQIusaw2iocTibY1JixQXuNo8=",
+          "version": "5.0.0",
+          "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-5.0.0.tgz",
+          "integrity": "sha512-Uu7gQyZI7J7gn5qLn1Np3G9vcYGTVqB+lFTytnDJv83dd8T22aGH451P3jueT2/QemInJDfxHB5Tde5OzgG1Ow==",
           "dev": true,
           "requires": {
-            "ansi-regex": "^3.0.0"
+            "ansi-regex": "^4.0.0"
+          },
+          "dependencies": {
+            "ansi-regex": {
+              "version": "4.0.0",
+              "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-4.0.0.tgz",
+              "integrity": "sha512-iB5Dda8t/UqpPI/IjsejXu5jOGDrzn41wJyljwPH65VCIbk6+1BzFIMJGFwTNrYXT1CrD+B4l19U7awiQ8rk7w==",
+              "dev": true
+            }
           }
         }
       }
     },
     "interpret": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/interpret/-/interpret-1.1.0.tgz",
-      "integrity": "sha1-ftGxQQxqDg94z5XTuEQMY/eLhhQ=",
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/interpret/-/interpret-1.2.0.tgz",
+      "integrity": "sha512-mT34yGKMNceBQUoVn7iCDKDntA7SC6gycMAWzGx1z/CMCTV7b2AAtXlo3nRyHZ1FelRkQbQjprHSYGwzLtkVbw==",
       "dev": true
     },
     "invariant": {
@@ -7509,6 +6556,17 @@
       "dev": true,
       "requires": {
         "kind-of": "^3.0.2"
+      },
+      "dependencies": {
+        "kind-of": {
+          "version": "3.2.2",
+          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
+          "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
+          "dev": true,
+          "requires": {
+            "is-buffer": "^1.1.5"
+          }
+        }
       }
     },
     "is-arrayish": {
@@ -7534,7 +6592,7 @@
     },
     "is-builtin-module": {
       "version": "1.0.0",
-      "resolved": "http://registry.npmjs.org/is-builtin-module/-/is-builtin-module-1.0.0.tgz",
+      "resolved": "https://registry.npmjs.org/is-builtin-module/-/is-builtin-module-1.0.0.tgz",
       "integrity": "sha1-VAVy0096wxGfj3bDDLwbHgN6/74=",
       "dev": true,
       "requires": {
@@ -7563,6 +6621,17 @@
       "dev": true,
       "requires": {
         "kind-of": "^3.0.2"
+      },
+      "dependencies": {
+        "kind-of": {
+          "version": "3.2.2",
+          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
+          "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
+          "dev": true,
+          "requires": {
+            "is-buffer": "^1.1.5"
+          }
+        }
       }
     },
     "is-date-object": {
@@ -7618,9 +6687,9 @@
       "dev": true
     },
     "is-extglob": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-1.0.0.tgz",
-      "integrity": "sha1-rEaBd8SUNAWgkvyPKXYMb/xiBsA=",
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
+      "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=",
       "dev": true
     },
     "is-finite": {
@@ -7648,12 +6717,12 @@
       "dev": true
     },
     "is-glob": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-2.0.1.tgz",
-      "integrity": "sha1-0Jb5JqPe1WAPP9/ZEZjLCIjC2GM=",
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.0.tgz",
+      "integrity": "sha1-lSHHaEXMJhCoUgPd8ICpWML/q8A=",
       "dev": true,
       "requires": {
-        "is-extglob": "^1.0.0"
+        "is-extglob": "^2.1.1"
       }
     },
     "is-negated-glob": {
@@ -7663,29 +6732,31 @@
       "dev": true
     },
     "is-number": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/is-number/-/is-number-2.1.0.tgz",
-      "integrity": "sha1-Afy7s5NGOlSPL0ZszhbezknbkI8=",
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz",
+      "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=",
       "dev": true,
       "requires": {
         "kind-of": "^3.0.2"
+      },
+      "dependencies": {
+        "kind-of": {
+          "version": "3.2.2",
+          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
+          "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
+          "dev": true,
+          "requires": {
+            "is-buffer": "^1.1.5"
+          }
+        }
       }
     },
     "is-obj": {
       "version": "1.0.1",
-      "resolved": "http://registry.npmjs.org/is-obj/-/is-obj-1.0.1.tgz",
+      "resolved": "https://registry.npmjs.org/is-obj/-/is-obj-1.0.1.tgz",
       "integrity": "sha1-PkcprB9f3gJc19g6iW2rn09n2w8=",
       "dev": true
     },
-    "is-observable": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/is-observable/-/is-observable-1.1.0.tgz",
-      "integrity": "sha512-NqCa4Sa2d+u7BWc6CukaObG3Fh+CU9bvixbpcXYhy2VvYS7vVGIdAgnIS5Ks3A/cqk4rebLJ9s8zBstT2aKnIA==",
-      "dev": true,
-      "requires": {
-        "symbol-observable": "^1.1.0"
-      }
-    },
     "is-path-cwd": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/is-path-cwd/-/is-path-cwd-1.0.0.tgz",
@@ -7723,14 +6794,6 @@
       "dev": true,
       "requires": {
         "isobject": "^3.0.1"
-      },
-      "dependencies": {
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
-        }
       }
     },
     "is-posix-bracket": {
@@ -7760,12 +6823,6 @@
         "has": "^1.0.1"
       }
     },
-    "is-regexp": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/is-regexp/-/is-regexp-1.0.0.tgz",
-      "integrity": "sha1-/S2INUXEa6xaYz57mgnof6LLUGk=",
-      "dev": true
-    },
     "is-relative": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/is-relative/-/is-relative-1.0.0.tgz",
@@ -7851,13 +6908,10 @@
       "dev": true
     },
     "isobject": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/isobject/-/isobject-2.1.0.tgz",
-      "integrity": "sha1-8GVWEJaj8dou9GJy+BXIQNh+DIk=",
-      "dev": true,
-      "requires": {
-        "isarray": "1.0.0"
-      }
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
+      "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
+      "dev": true
     },
     "isstream": {
       "version": "0.1.2",
@@ -7983,12 +7037,14 @@
       }
     },
     "ix": {
-      "version": "2.3.5",
-      "resolved": "https://registry.npmjs.org/ix/-/ix-2.3.5.tgz",
-      "integrity": "sha512-mdW2LtQiy+gPtggKa393EdSaI46RARsAa5zjlLgNKMlE57vC6dc6g6nehROI1Gj/HhsTvpb3WALSwg0EWhhz0Q==",
+      "version": "2.5.1",
+      "resolved": "https://registry.npmjs.org/ix/-/ix-2.5.1.tgz",
+      "integrity": "sha512-YPX759NbhmIynoCYsxcpKBCQDFkeVup4xGaAylnIRaM+md7qrLyoW7kow0iqx4cJr8PUG85/cfwfjylqehg8bQ==",
       "dev": true,
       "requires": {
-        "tslib": "^1.8.0"
+        "@types/node": "^10.12.18",
+        "is-stream": "1.1.0",
+        "tslib": "^1.9.3"
       }
     },
     "jest": {
@@ -8007,6 +7063,38 @@
           "integrity": "sha1-7QMXwyIGT3lGbAKWa922Bas32Zg=",
           "dev": true
         },
+        "arr-diff": {
+          "version": "2.0.0",
+          "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-2.0.0.tgz",
+          "integrity": "sha1-jzuCf5Vai9ZpaX5KQlasPOrjVs8=",
+          "dev": true,
+          "requires": {
+            "arr-flatten": "^1.0.1"
+          }
+        },
+        "array-unique": {
+          "version": "0.2.1",
+          "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.2.1.tgz",
+          "integrity": "sha1-odl8yvy8JiXMcPrc6zalDFiwGlM=",
+          "dev": true
+        },
+        "braces": {
+          "version": "1.8.5",
+          "resolved": "https://registry.npmjs.org/braces/-/braces-1.8.5.tgz",
+          "integrity": "sha1-uneWLhLf+WnWt2cR6RS3N4V79qc=",
+          "dev": true,
+          "requires": {
+            "expand-range": "^1.8.1",
+            "preserve": "^0.2.0",
+            "repeat-element": "^1.1.2"
+          }
+        },
+        "camelcase": {
+          "version": "4.1.0",
+          "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-4.1.0.tgz",
+          "integrity": "sha1-1UVjW+HjPFQmScaRc+Xeas+uNN0=",
+          "dev": true
+        },
         "cliui": {
           "version": "4.1.0",
           "resolved": "https://registry.npmjs.org/cliui/-/cliui-4.1.0.tgz",
@@ -8018,12 +7106,54 @@
             "wrap-ansi": "^2.0.0"
           }
         },
+        "expand-brackets": {
+          "version": "0.1.5",
+          "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-0.1.5.tgz",
+          "integrity": "sha1-3wcoTjQqgHzXM6xa9yQR5YHRF3s=",
+          "dev": true,
+          "requires": {
+            "is-posix-bracket": "^0.1.0"
+          }
+        },
+        "extglob": {
+          "version": "0.3.2",
+          "resolved": "https://registry.npmjs.org/extglob/-/extglob-0.3.2.tgz",
+          "integrity": "sha1-Lhj/PS9JqydlzskCPwEdqo2DSaE=",
+          "dev": true,
+          "requires": {
+            "is-extglob": "^1.0.0"
+          }
+        },
+        "find-up": {
+          "version": "2.1.0",
+          "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz",
+          "integrity": "sha1-RdG35QbHF93UgndaK3eSCjwMV6c=",
+          "dev": true,
+          "requires": {
+            "locate-path": "^2.0.0"
+          }
+        },
+        "is-extglob": {
+          "version": "1.0.0",
+          "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-1.0.0.tgz",
+          "integrity": "sha1-rEaBd8SUNAWgkvyPKXYMb/xiBsA=",
+          "dev": true
+        },
         "is-fullwidth-code-point": {
           "version": "2.0.0",
           "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-2.0.0.tgz",
           "integrity": "sha1-o7MKXE8ZkYMWeqq5O+764937ZU8=",
           "dev": true
         },
+        "is-glob": {
+          "version": "2.0.1",
+          "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-2.0.1.tgz",
+          "integrity": "sha1-0Jb5JqPe1WAPP9/ZEZjLCIjC2GM=",
+          "dev": true,
+          "requires": {
+            "is-extglob": "^1.0.0"
+          }
+        },
         "jest-cli": {
           "version": "23.6.0",
           "resolved": "https://registry.npmjs.org/jest-cli/-/jest-cli-23.6.0.tgz",
@@ -8068,6 +7198,36 @@
             "yargs": "^11.0.0"
           }
         },
+        "kind-of": {
+          "version": "3.2.2",
+          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
+          "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
+          "dev": true,
+          "requires": {
+            "is-buffer": "^1.1.5"
+          }
+        },
+        "micromatch": {
+          "version": "2.3.11",
+          "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-2.3.11.tgz",
+          "integrity": "sha1-hmd8l9FyCzY0MdBNDRUpO9OMFWU=",
+          "dev": true,
+          "requires": {
+            "arr-diff": "^2.0.0",
+            "array-unique": "^0.2.1",
+            "braces": "^1.8.2",
+            "expand-brackets": "^0.1.4",
+            "extglob": "^0.3.1",
+            "filename-regex": "^2.0.0",
+            "is-extglob": "^1.0.0",
+            "is-glob": "^2.0.1",
+            "kind-of": "^3.0.2",
+            "normalize-path": "^2.0.1",
+            "object.omit": "^2.0.0",
+            "parse-glob": "^3.0.4",
+            "regex-cache": "^0.4.2"
+          }
+        },
         "os-locale": {
           "version": "2.1.0",
           "resolved": "https://registry.npmjs.org/os-locale/-/os-locale-2.1.0.tgz",
@@ -8098,9 +7258,15 @@
             "ansi-regex": "^3.0.0"
           }
         },
+        "which-module": {
+          "version": "2.0.0",
+          "resolved": "https://registry.npmjs.org/which-module/-/which-module-2.0.0.tgz",
+          "integrity": "sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho=",
+          "dev": true
+        },
         "yargs": {
           "version": "11.1.0",
-          "resolved": "http://registry.npmjs.org/yargs/-/yargs-11.1.0.tgz",
+          "resolved": "https://registry.npmjs.org/yargs/-/yargs-11.1.0.tgz",
           "integrity": "sha512-NwW69J42EsCSanF8kyn5upxvjp5ds+t3+udGBeTbFnERA+lF541DDpMawzo4z6W/QrzNM18D+BPMiOBibnFV5A==",
           "dev": true,
           "requires": {
@@ -8117,6 +7283,15 @@
             "y18n": "^3.2.1",
             "yargs-parser": "^9.0.2"
           }
+        },
+        "yargs-parser": {
+          "version": "9.0.2",
+          "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-9.0.2.tgz",
+          "integrity": "sha1-nM9qQ0YP5O1Aqbto9I1DuKaMwHc=",
+          "dev": true,
+          "requires": {
+            "camelcase": "^4.1.0"
+          }
         }
       }
     },
@@ -8149,37 +7324,128 @@
         "jest-validate": "^23.6.0",
         "micromatch": "^2.3.11",
         "pretty-format": "^23.6.0"
-      }
-    },
-    "jest-diff": {
-      "version": "23.6.0",
-      "resolved": "https://registry.npmjs.org/jest-diff/-/jest-diff-23.6.0.tgz",
-      "integrity": "sha512-Gz9l5Ov+X3aL5L37IT+8hoCUsof1CVYBb2QEkOupK64XyRR3h+uRpYIm97K7sY8diFxowR8pIGEdyfMKTixo3g==",
-      "dev": true,
-      "requires": {
-        "chalk": "^2.0.1",
-        "diff": "^3.2.0",
-        "jest-get-type": "^22.1.0",
-        "pretty-format": "^23.6.0"
-      }
-    },
-    "jest-docblock": {
-      "version": "23.2.0",
-      "resolved": "https://registry.npmjs.org/jest-docblock/-/jest-docblock-23.2.0.tgz",
-      "integrity": "sha1-8IXh8YVI2Z/dabICB+b9VdkTg6c=",
-      "dev": true,
-      "requires": {
-        "detect-newline": "^2.1.0"
-      }
-    },
-    "jest-each": {
-      "version": "23.6.0",
-      "resolved": "https://registry.npmjs.org/jest-each/-/jest-each-23.6.0.tgz",
-      "integrity": "sha512-x7V6M/WGJo6/kLoissORuvLIeAoyo2YqLOoCDkohgJ4XOXSqOtyvr8FbInlAWS77ojBsZrafbozWoKVRdtxFCg==",
-      "dev": true,
-      "requires": {
-        "chalk": "^2.0.1",
-        "pretty-format": "^23.6.0"
+      },
+      "dependencies": {
+        "arr-diff": {
+          "version": "2.0.0",
+          "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-2.0.0.tgz",
+          "integrity": "sha1-jzuCf5Vai9ZpaX5KQlasPOrjVs8=",
+          "dev": true,
+          "requires": {
+            "arr-flatten": "^1.0.1"
+          }
+        },
+        "array-unique": {
+          "version": "0.2.1",
+          "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.2.1.tgz",
+          "integrity": "sha1-odl8yvy8JiXMcPrc6zalDFiwGlM=",
+          "dev": true
+        },
+        "braces": {
+          "version": "1.8.5",
+          "resolved": "https://registry.npmjs.org/braces/-/braces-1.8.5.tgz",
+          "integrity": "sha1-uneWLhLf+WnWt2cR6RS3N4V79qc=",
+          "dev": true,
+          "requires": {
+            "expand-range": "^1.8.1",
+            "preserve": "^0.2.0",
+            "repeat-element": "^1.1.2"
+          }
+        },
+        "expand-brackets": {
+          "version": "0.1.5",
+          "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-0.1.5.tgz",
+          "integrity": "sha1-3wcoTjQqgHzXM6xa9yQR5YHRF3s=",
+          "dev": true,
+          "requires": {
+            "is-posix-bracket": "^0.1.0"
+          }
+        },
+        "extglob": {
+          "version": "0.3.2",
+          "resolved": "https://registry.npmjs.org/extglob/-/extglob-0.3.2.tgz",
+          "integrity": "sha1-Lhj/PS9JqydlzskCPwEdqo2DSaE=",
+          "dev": true,
+          "requires": {
+            "is-extglob": "^1.0.0"
+          }
+        },
+        "is-extglob": {
+          "version": "1.0.0",
+          "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-1.0.0.tgz",
+          "integrity": "sha1-rEaBd8SUNAWgkvyPKXYMb/xiBsA=",
+          "dev": true
+        },
+        "is-glob": {
+          "version": "2.0.1",
+          "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-2.0.1.tgz",
+          "integrity": "sha1-0Jb5JqPe1WAPP9/ZEZjLCIjC2GM=",
+          "dev": true,
+          "requires": {
+            "is-extglob": "^1.0.0"
+          }
+        },
+        "kind-of": {
+          "version": "3.2.2",
+          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
+          "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
+          "dev": true,
+          "requires": {
+            "is-buffer": "^1.1.5"
+          }
+        },
+        "micromatch": {
+          "version": "2.3.11",
+          "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-2.3.11.tgz",
+          "integrity": "sha1-hmd8l9FyCzY0MdBNDRUpO9OMFWU=",
+          "dev": true,
+          "requires": {
+            "arr-diff": "^2.0.0",
+            "array-unique": "^0.2.1",
+            "braces": "^1.8.2",
+            "expand-brackets": "^0.1.4",
+            "extglob": "^0.3.1",
+            "filename-regex": "^2.0.0",
+            "is-extglob": "^1.0.0",
+            "is-glob": "^2.0.1",
+            "kind-of": "^3.0.2",
+            "normalize-path": "^2.0.1",
+            "object.omit": "^2.0.0",
+            "parse-glob": "^3.0.4",
+            "regex-cache": "^0.4.2"
+          }
+        }
+      }
+    },
+    "jest-diff": {
+      "version": "23.6.0",
+      "resolved": "https://registry.npmjs.org/jest-diff/-/jest-diff-23.6.0.tgz",
+      "integrity": "sha512-Gz9l5Ov+X3aL5L37IT+8hoCUsof1CVYBb2QEkOupK64XyRR3h+uRpYIm97K7sY8diFxowR8pIGEdyfMKTixo3g==",
+      "dev": true,
+      "requires": {
+        "chalk": "^2.0.1",
+        "diff": "^3.2.0",
+        "jest-get-type": "^22.1.0",
+        "pretty-format": "^23.6.0"
+      }
+    },
+    "jest-docblock": {
+      "version": "23.2.0",
+      "resolved": "https://registry.npmjs.org/jest-docblock/-/jest-docblock-23.2.0.tgz",
+      "integrity": "sha1-8IXh8YVI2Z/dabICB+b9VdkTg6c=",
+      "dev": true,
+      "requires": {
+        "detect-newline": "^2.1.0"
+      }
+    },
+    "jest-each": {
+      "version": "23.6.0",
+      "resolved": "https://registry.npmjs.org/jest-each/-/jest-each-23.6.0.tgz",
+      "integrity": "sha512-x7V6M/WGJo6/kLoissORuvLIeAoyo2YqLOoCDkohgJ4XOXSqOtyvr8FbInlAWS77ojBsZrafbozWoKVRdtxFCg==",
+      "dev": true,
+      "requires": {
+        "chalk": "^2.0.1",
+        "pretty-format": "^23.6.0"
       }
     },
     "jest-environment-jsdom": {
@@ -8229,6 +7495,97 @@
         "jest-worker": "^23.2.0",
         "micromatch": "^2.3.11",
         "sane": "^2.0.0"
+      },
+      "dependencies": {
+        "arr-diff": {
+          "version": "2.0.0",
+          "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-2.0.0.tgz",
+          "integrity": "sha1-jzuCf5Vai9ZpaX5KQlasPOrjVs8=",
+          "dev": true,
+          "requires": {
+            "arr-flatten": "^1.0.1"
+          }
+        },
+        "array-unique": {
+          "version": "0.2.1",
+          "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.2.1.tgz",
+          "integrity": "sha1-odl8yvy8JiXMcPrc6zalDFiwGlM=",
+          "dev": true
+        },
+        "braces": {
+          "version": "1.8.5",
+          "resolved": "https://registry.npmjs.org/braces/-/braces-1.8.5.tgz",
+          "integrity": "sha1-uneWLhLf+WnWt2cR6RS3N4V79qc=",
+          "dev": true,
+          "requires": {
+            "expand-range": "^1.8.1",
+            "preserve": "^0.2.0",
+            "repeat-element": "^1.1.2"
+          }
+        },
+        "expand-brackets": {
+          "version": "0.1.5",
+          "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-0.1.5.tgz",
+          "integrity": "sha1-3wcoTjQqgHzXM6xa9yQR5YHRF3s=",
+          "dev": true,
+          "requires": {
+            "is-posix-bracket": "^0.1.0"
+          }
+        },
+        "extglob": {
+          "version": "0.3.2",
+          "resolved": "https://registry.npmjs.org/extglob/-/extglob-0.3.2.tgz",
+          "integrity": "sha1-Lhj/PS9JqydlzskCPwEdqo2DSaE=",
+          "dev": true,
+          "requires": {
+            "is-extglob": "^1.0.0"
+          }
+        },
+        "is-extglob": {
+          "version": "1.0.0",
+          "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-1.0.0.tgz",
+          "integrity": "sha1-rEaBd8SUNAWgkvyPKXYMb/xiBsA=",
+          "dev": true
+        },
+        "is-glob": {
+          "version": "2.0.1",
+          "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-2.0.1.tgz",
+          "integrity": "sha1-0Jb5JqPe1WAPP9/ZEZjLCIjC2GM=",
+          "dev": true,
+          "requires": {
+            "is-extglob": "^1.0.0"
+          }
+        },
+        "kind-of": {
+          "version": "3.2.2",
+          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
+          "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
+          "dev": true,
+          "requires": {
+            "is-buffer": "^1.1.5"
+          }
+        },
+        "micromatch": {
+          "version": "2.3.11",
+          "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-2.3.11.tgz",
+          "integrity": "sha1-hmd8l9FyCzY0MdBNDRUpO9OMFWU=",
+          "dev": true,
+          "requires": {
+            "arr-diff": "^2.0.0",
+            "array-unique": "^0.2.1",
+            "braces": "^1.8.2",
+            "expand-brackets": "^0.1.4",
+            "extglob": "^0.3.1",
+            "filename-regex": "^2.0.0",
+            "is-extglob": "^1.0.0",
+            "is-glob": "^2.0.1",
+            "kind-of": "^3.0.2",
+            "normalize-path": "^2.0.1",
+            "object.omit": "^2.0.0",
+            "parse-glob": "^3.0.4",
+            "regex-cache": "^0.4.2"
+          }
+        }
       }
     },
     "jest-jasmine2": {
@@ -8282,6 +7639,97 @@
         "micromatch": "^2.3.11",
         "slash": "^1.0.0",
         "stack-utils": "^1.0.1"
+      },
+      "dependencies": {
+        "arr-diff": {
+          "version": "2.0.0",
+          "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-2.0.0.tgz",
+          "integrity": "sha1-jzuCf5Vai9ZpaX5KQlasPOrjVs8=",
+          "dev": true,
+          "requires": {
+            "arr-flatten": "^1.0.1"
+          }
+        },
+        "array-unique": {
+          "version": "0.2.1",
+          "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.2.1.tgz",
+          "integrity": "sha1-odl8yvy8JiXMcPrc6zalDFiwGlM=",
+          "dev": true
+        },
+        "braces": {
+          "version": "1.8.5",
+          "resolved": "https://registry.npmjs.org/braces/-/braces-1.8.5.tgz",
+          "integrity": "sha1-uneWLhLf+WnWt2cR6RS3N4V79qc=",
+          "dev": true,
+          "requires": {
+            "expand-range": "^1.8.1",
+            "preserve": "^0.2.0",
+            "repeat-element": "^1.1.2"
+          }
+        },
+        "expand-brackets": {
+          "version": "0.1.5",
+          "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-0.1.5.tgz",
+          "integrity": "sha1-3wcoTjQqgHzXM6xa9yQR5YHRF3s=",
+          "dev": true,
+          "requires": {
+            "is-posix-bracket": "^0.1.0"
+          }
+        },
+        "extglob": {
+          "version": "0.3.2",
+          "resolved": "https://registry.npmjs.org/extglob/-/extglob-0.3.2.tgz",
+          "integrity": "sha1-Lhj/PS9JqydlzskCPwEdqo2DSaE=",
+          "dev": true,
+          "requires": {
+            "is-extglob": "^1.0.0"
+          }
+        },
+        "is-extglob": {
+          "version": "1.0.0",
+          "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-1.0.0.tgz",
+          "integrity": "sha1-rEaBd8SUNAWgkvyPKXYMb/xiBsA=",
+          "dev": true
+        },
+        "is-glob": {
+          "version": "2.0.1",
+          "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-2.0.1.tgz",
+          "integrity": "sha1-0Jb5JqPe1WAPP9/ZEZjLCIjC2GM=",
+          "dev": true,
+          "requires": {
+            "is-extglob": "^1.0.0"
+          }
+        },
+        "kind-of": {
+          "version": "3.2.2",
+          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
+          "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
+          "dev": true,
+          "requires": {
+            "is-buffer": "^1.1.5"
+          }
+        },
+        "micromatch": {
+          "version": "2.3.11",
+          "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-2.3.11.tgz",
+          "integrity": "sha1-hmd8l9FyCzY0MdBNDRUpO9OMFWU=",
+          "dev": true,
+          "requires": {
+            "arr-diff": "^2.0.0",
+            "array-unique": "^0.2.1",
+            "braces": "^1.8.2",
+            "expand-brackets": "^0.1.4",
+            "extglob": "^0.3.1",
+            "filename-regex": "^2.0.0",
+            "is-extglob": "^1.0.0",
+            "is-glob": "^2.0.1",
+            "kind-of": "^3.0.2",
+            "normalize-path": "^2.0.1",
+            "object.omit": "^2.0.0",
+            "parse-glob": "^3.0.4",
+            "regex-cache": "^0.4.2"
+          }
+        }
       }
     },
     "jest-mock": {
@@ -8345,9 +7793,9 @@
           "dev": true
         },
         "source-map-support": {
-          "version": "0.5.9",
-          "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.9.tgz",
-          "integrity": "sha512-gR6Rw4MvUlYy83vP0vxoVNzM6t8MUXqNuRsuBmBHQDu1Fh6X015FrLdgoDKcNdkwGubozq0P4N0Q37UyFVr1EA==",
+          "version": "0.5.10",
+          "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.10.tgz",
+          "integrity": "sha512-YfQ3tQFTK/yzlGJuX8pTwa4tifQj4QS2Mj7UegOu8jAz59MqIiMGPXxQhVQiIMNzayuUSF/jEuVnfFF5JqybmQ==",
           "dev": true,
           "requires": {
             "buffer-from": "^1.0.0",
@@ -8391,6 +7839,38 @@
           "integrity": "sha1-7QMXwyIGT3lGbAKWa922Bas32Zg=",
           "dev": true
         },
+        "arr-diff": {
+          "version": "2.0.0",
+          "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-2.0.0.tgz",
+          "integrity": "sha1-jzuCf5Vai9ZpaX5KQlasPOrjVs8=",
+          "dev": true,
+          "requires": {
+            "arr-flatten": "^1.0.1"
+          }
+        },
+        "array-unique": {
+          "version": "0.2.1",
+          "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.2.1.tgz",
+          "integrity": "sha1-odl8yvy8JiXMcPrc6zalDFiwGlM=",
+          "dev": true
+        },
+        "braces": {
+          "version": "1.8.5",
+          "resolved": "https://registry.npmjs.org/braces/-/braces-1.8.5.tgz",
+          "integrity": "sha1-uneWLhLf+WnWt2cR6RS3N4V79qc=",
+          "dev": true,
+          "requires": {
+            "expand-range": "^1.8.1",
+            "preserve": "^0.2.0",
+            "repeat-element": "^1.1.2"
+          }
+        },
+        "camelcase": {
+          "version": "4.1.0",
+          "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-4.1.0.tgz",
+          "integrity": "sha1-1UVjW+HjPFQmScaRc+Xeas+uNN0=",
+          "dev": true
+        },
         "cliui": {
           "version": "4.1.0",
           "resolved": "https://registry.npmjs.org/cliui/-/cliui-4.1.0.tgz",
@@ -8402,12 +7882,84 @@
             "wrap-ansi": "^2.0.0"
           }
         },
+        "expand-brackets": {
+          "version": "0.1.5",
+          "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-0.1.5.tgz",
+          "integrity": "sha1-3wcoTjQqgHzXM6xa9yQR5YHRF3s=",
+          "dev": true,
+          "requires": {
+            "is-posix-bracket": "^0.1.0"
+          }
+        },
+        "extglob": {
+          "version": "0.3.2",
+          "resolved": "https://registry.npmjs.org/extglob/-/extglob-0.3.2.tgz",
+          "integrity": "sha1-Lhj/PS9JqydlzskCPwEdqo2DSaE=",
+          "dev": true,
+          "requires": {
+            "is-extglob": "^1.0.0"
+          }
+        },
+        "find-up": {
+          "version": "2.1.0",
+          "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz",
+          "integrity": "sha1-RdG35QbHF93UgndaK3eSCjwMV6c=",
+          "dev": true,
+          "requires": {
+            "locate-path": "^2.0.0"
+          }
+        },
+        "is-extglob": {
+          "version": "1.0.0",
+          "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-1.0.0.tgz",
+          "integrity": "sha1-rEaBd8SUNAWgkvyPKXYMb/xiBsA=",
+          "dev": true
+        },
         "is-fullwidth-code-point": {
           "version": "2.0.0",
           "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-2.0.0.tgz",
           "integrity": "sha1-o7MKXE8ZkYMWeqq5O+764937ZU8=",
           "dev": true
         },
+        "is-glob": {
+          "version": "2.0.1",
+          "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-2.0.1.tgz",
+          "integrity": "sha1-0Jb5JqPe1WAPP9/ZEZjLCIjC2GM=",
+          "dev": true,
+          "requires": {
+            "is-extglob": "^1.0.0"
+          }
+        },
+        "kind-of": {
+          "version": "3.2.2",
+          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
+          "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
+          "dev": true,
+          "requires": {
+            "is-buffer": "^1.1.5"
+          }
+        },
+        "micromatch": {
+          "version": "2.3.11",
+          "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-2.3.11.tgz",
+          "integrity": "sha1-hmd8l9FyCzY0MdBNDRUpO9OMFWU=",
+          "dev": true,
+          "requires": {
+            "arr-diff": "^2.0.0",
+            "array-unique": "^0.2.1",
+            "braces": "^1.8.2",
+            "expand-brackets": "^0.1.4",
+            "extglob": "^0.3.1",
+            "filename-regex": "^2.0.0",
+            "is-extglob": "^1.0.0",
+            "is-glob": "^2.0.1",
+            "kind-of": "^3.0.2",
+            "normalize-path": "^2.0.1",
+            "object.omit": "^2.0.0",
+            "parse-glob": "^3.0.4",
+            "regex-cache": "^0.4.2"
+          }
+        },
         "os-locale": {
           "version": "2.1.0",
           "resolved": "https://registry.npmjs.org/os-locale/-/os-locale-2.1.0.tgz",
@@ -8444,9 +7996,15 @@
           "integrity": "sha1-IzTBjpx1n3vdVv3vfprj1YjmjtM=",
           "dev": true
         },
+        "which-module": {
+          "version": "2.0.0",
+          "resolved": "https://registry.npmjs.org/which-module/-/which-module-2.0.0.tgz",
+          "integrity": "sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho=",
+          "dev": true
+        },
         "yargs": {
           "version": "11.1.0",
-          "resolved": "http://registry.npmjs.org/yargs/-/yargs-11.1.0.tgz",
+          "resolved": "https://registry.npmjs.org/yargs/-/yargs-11.1.0.tgz",
           "integrity": "sha512-NwW69J42EsCSanF8kyn5upxvjp5ds+t3+udGBeTbFnERA+lF541DDpMawzo4z6W/QrzNM18D+BPMiOBibnFV5A==",
           "dev": true,
           "requires": {
@@ -8463,6 +8021,15 @@
             "y18n": "^3.2.1",
             "yargs-parser": "^9.0.2"
           }
+        },
+        "yargs-parser": {
+          "version": "9.0.2",
+          "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-9.0.2.tgz",
+          "integrity": "sha1-nM9qQ0YP5O1Aqbto9I1DuKaMwHc=",
+          "dev": true,
+          "requires": {
+            "camelcase": "^4.1.0"
+          }
         }
       }
     },
@@ -8472,6 +8039,16 @@
       "integrity": "sha1-o3dq6zEekP6D+rnlM+hRAr0WQWU=",
       "dev": true
     },
+    "jest-silent-reporter": {
+      "version": "0.1.1",
+      "resolved": "https://registry.npmjs.org/jest-silent-reporter/-/jest-silent-reporter-0.1.1.tgz",
+      "integrity": "sha512-nrRzOV4151hG354tnVWfyZbFGJdylpadRWYWWPSD+WeOz2hQOjUGxvIFODnaY9cKQ7JWCtG+5LgSss22ccRhBg==",
+      "dev": true,
+      "requires": {
+        "chalk": "^2.3.1",
+        "jest-util": "^23.0.0"
+      }
+    },
     "jest-snapshot": {
       "version": "23.6.0",
       "resolved": "https://registry.npmjs.org/jest-snapshot/-/jest-snapshot-23.6.0.tgz",
@@ -8553,9 +8130,9 @@
       "dev": true
     },
     "js-yaml": {
-      "version": "3.12.0",
-      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.12.0.tgz",
-      "integrity": "sha512-PIt2cnwmPfL4hKNwqeiuz4bKfnzHTBv6HyVgjahA6mPLwPDzjDWrplJBMjHUFxku/N3FlmrbyPclad+I+4mJ3A==",
+      "version": "3.12.1",
+      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.12.1.tgz",
+      "integrity": "sha512-um46hB9wNOKlwkHgiuyEVAybXBjwFUV0Z/RaHJblRd9DXltue9FTYvzCr9ErQrK9Adz5MU4gHWVaNUfdmrC8qA==",
       "dev": true,
       "requires": {
         "argparse": "^1.0.7",
@@ -8632,19 +8209,16 @@
       "dev": true
     },
     "json-schema-traverse": {
-      "version": "0.3.1",
-      "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.3.1.tgz",
-      "integrity": "sha1-NJptRMU6Ud6JtAgFxdXlm0F9M0A=",
+      "version": "0.4.1",
+      "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
+      "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==",
       "dev": true
     },
-    "json-stable-stringify": {
+    "json-stable-stringify-without-jsonify": {
       "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/json-stable-stringify/-/json-stable-stringify-1.0.1.tgz",
-      "integrity": "sha1-mnWdOcXy/1A/1TAGRu1EX4jE+a8=",
-      "dev": true,
-      "requires": {
-        "jsonify": "~0.0.0"
-      }
+      "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz",
+      "integrity": "sha1-nbe1lJatPzz+8wp1FC0tkwrXJlE=",
+      "dev": true
     },
     "json-stringify-safe": {
       "version": "5.0.1",
@@ -8654,7 +8228,7 @@
     },
     "json5": {
       "version": "0.5.1",
-      "resolved": "http://registry.npmjs.org/json5/-/json5-0.5.1.tgz",
+      "resolved": "https://registry.npmjs.org/json5/-/json5-0.5.1.tgz",
       "integrity": "sha1-Hq3nrMASA0rYTiOWdn6tn6VJWCE=",
       "dev": true
     },
@@ -8698,13 +8272,10 @@
       "dev": true
     },
     "kind-of": {
-      "version": "3.2.2",
-      "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-      "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-      "dev": true,
-      "requires": {
-        "is-buffer": "^1.1.5"
-      }
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz",
+      "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==",
+      "dev": true
     },
     "klaw": {
       "version": "1.3.1",
@@ -8771,28 +8342,28 @@
       "dev": true
     },
     "lerna": {
-      "version": "3.4.3",
-      "resolved": "https://registry.npmjs.org/lerna/-/lerna-3.4.3.tgz",
-      "integrity": "sha512-tWq1LvpHqkyB+FaJCmkEweivr88yShDMmauofPVdh0M5gU1cVucszYnIgWafulKYu2LMQ3IfUMUU5Pp3+MvADQ==",
-      "dev": true,
-      "requires": {
-        "@lerna/add": "^3.4.1",
-        "@lerna/bootstrap": "^3.4.1",
-        "@lerna/changed": "^3.4.1",
-        "@lerna/clean": "^3.3.2",
-        "@lerna/cli": "^3.2.0",
-        "@lerna/create": "^3.4.1",
-        "@lerna/diff": "^3.3.0",
-        "@lerna/exec": "^3.3.2",
-        "@lerna/import": "^3.3.1",
-        "@lerna/init": "^3.3.0",
-        "@lerna/link": "^3.3.0",
-        "@lerna/list": "^3.3.2",
-        "@lerna/publish": "^3.4.3",
-        "@lerna/run": "^3.3.2",
-        "@lerna/version": "^3.4.1",
+      "version": "3.10.7",
+      "resolved": "https://registry.npmjs.org/lerna/-/lerna-3.10.7.tgz",
+      "integrity": "sha512-ha/dehl/L3Nw0pbdir5z6Hrv2oYBg5ym2fTcuk8HCLe7Zdb/ylIHdrgW8CU9eTVZkwr4et8RdVtxFA/+xa65/Q==",
+      "dev": true,
+      "requires": {
+        "@lerna/add": "3.10.6",
+        "@lerna/bootstrap": "3.10.6",
+        "@lerna/changed": "3.10.6",
+        "@lerna/clean": "3.10.6",
+        "@lerna/cli": "3.10.7",
+        "@lerna/create": "3.10.6",
+        "@lerna/diff": "3.10.6",
+        "@lerna/exec": "3.10.6",
+        "@lerna/import": "3.10.6",
+        "@lerna/init": "3.10.6",
+        "@lerna/link": "3.10.6",
+        "@lerna/list": "3.10.6",
+        "@lerna/publish": "3.10.7",
+        "@lerna/run": "3.10.6",
+        "@lerna/version": "3.10.6",
         "import-local": "^1.0.0",
-        "npmlog": "^4.1.2"
+        "libnpm": "^2.0.1"
       }
     },
     "leven": {
@@ -8811,10 +8382,38 @@
         "type-check": "~0.3.2"
       }
     },
+    "libnpm": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/libnpm/-/libnpm-2.0.1.tgz",
+      "integrity": "sha512-qTKoxyJvpBxHZQB6k0AhSLajyXq9ZE/lUsZzuHAplr2Bpv9G+k4YuYlExYdUCeVRRGqcJt8hvkPh4tBwKoV98w==",
+      "dev": true,
+      "requires": {
+        "bin-links": "^1.1.2",
+        "bluebird": "^3.5.3",
+        "find-npm-prefix": "^1.0.2",
+        "libnpmaccess": "^3.0.1",
+        "libnpmconfig": "^1.2.1",
+        "libnpmhook": "^5.0.2",
+        "libnpmorg": "^1.0.0",
+        "libnpmpublish": "^1.1.0",
+        "libnpmsearch": "^2.0.0",
+        "libnpmteam": "^1.0.1",
+        "lock-verify": "^2.0.2",
+        "npm-lifecycle": "^2.1.0",
+        "npm-logical-tree": "^1.2.1",
+        "npm-package-arg": "^6.1.0",
+        "npm-profile": "^4.0.1",
+        "npm-registry-fetch": "^3.8.0",
+        "npmlog": "^4.1.2",
+        "pacote": "^9.2.3",
+        "read-package-json": "^2.0.13",
+        "stringify-package": "^1.0.0"
+      }
+    },
     "libnpmaccess": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/libnpmaccess/-/libnpmaccess-3.0.0.tgz",
-      "integrity": "sha512-SiE4AZAzMpD7pmmXHfgD7rof8QIQGoKaeyAS8exgx2CKA6tzRTbRljq1xM4Tgj8/tIg+KBJPJWkR0ifqKT3irQ==",
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/libnpmaccess/-/libnpmaccess-3.0.1.tgz",
+      "integrity": "sha512-RlZ7PNarCBt+XbnP7R6PoVgOq9t+kou5rvhaInoNibhPO7eMlRfS0B8yjatgn2yaHIwWNyoJDolC/6Lc5L/IQA==",
       "dev": true,
       "requires": {
         "aproba": "^2.0.0",
@@ -8837,564 +8436,293 @@
           "requires": {
             "pump": "^3.0.0"
           }
+        },
+        "pump": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
+          "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
+          "dev": true,
+          "requires": {
+            "end-of-stream": "^1.1.0",
+            "once": "^1.3.1"
+          }
         }
       }
     },
-    "liftoff": {
-      "version": "2.5.0",
-      "resolved": "https://registry.npmjs.org/liftoff/-/liftoff-2.5.0.tgz",
-      "integrity": "sha1-IAkpG7Mc6oYbvxCnwVooyvdcMew=",
-      "dev": true,
-      "requires": {
-        "extend": "^3.0.0",
-        "findup-sync": "^2.0.0",
-        "fined": "^1.0.1",
-        "flagged-respawn": "^1.0.0",
-        "is-plain-object": "^2.0.4",
-        "object.map": "^1.0.0",
-        "rechoir": "^0.6.2",
-        "resolve": "^1.1.7"
-      }
-    },
-    "lint-staged": {
-      "version": "7.3.0",
-      "resolved": "https://registry.npmjs.org/lint-staged/-/lint-staged-7.3.0.tgz",
-      "integrity": "sha512-AXk40M9DAiPi7f4tdJggwuKIViUplYtVj1os1MVEteW7qOkU50EOehayCfO9TsoGK24o/EsWb41yrEgfJDDjCw==",
+    "libnpmconfig": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/libnpmconfig/-/libnpmconfig-1.2.1.tgz",
+      "integrity": "sha512-9esX8rTQAHqarx6qeZqmGQKBNZR5OIbl/Ayr0qQDy3oXja2iFVQQI81R6GZ2a02bSNZ9p3YOGX1O6HHCb1X7kA==",
       "dev": true,
       "requires": {
-        "chalk": "^2.3.1",
-        "commander": "^2.14.1",
-        "cosmiconfig": "^5.0.2",
-        "debug": "^3.1.0",
-        "dedent": "^0.7.0",
-        "execa": "^0.9.0",
-        "find-parent-dir": "^0.3.0",
-        "is-glob": "^4.0.0",
-        "is-windows": "^1.0.2",
-        "jest-validate": "^23.5.0",
-        "listr": "^0.14.1",
-        "lodash": "^4.17.5",
-        "log-symbols": "^2.2.0",
-        "micromatch": "^3.1.8",
-        "npm-which": "^3.0.1",
-        "p-map": "^1.1.1",
-        "path-is-inside": "^1.0.2",
-        "pify": "^3.0.0",
-        "please-upgrade-node": "^3.0.2",
-        "staged-git-files": "1.1.1",
-        "string-argv": "^0.0.2",
-        "stringify-object": "^3.2.2"
+        "figgy-pudding": "^3.5.1",
+        "find-up": "^3.0.0",
+        "ini": "^1.3.5"
       },
       "dependencies": {
-        "arr-diff": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz",
-          "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=",
-          "dev": true
-        },
-        "array-unique": {
-          "version": "0.3.2",
-          "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz",
-          "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=",
-          "dev": true
-        },
-        "braces": {
-          "version": "2.3.2",
-          "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz",
-          "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==",
+        "find-up": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/find-up/-/find-up-3.0.0.tgz",
+          "integrity": "sha512-1yD6RmLI1XBfxugvORwlck6f75tYL+iR0jqwsOrOxMZyGYqUuDhJ0l4AXdO1iX/FTs9cBAMEk1gWSEx1kSbylg==",
           "dev": true,
           "requires": {
-            "arr-flatten": "^1.1.0",
-            "array-unique": "^0.3.2",
-            "extend-shallow": "^2.0.1",
-            "fill-range": "^4.0.0",
-            "isobject": "^3.0.1",
-            "repeat-element": "^1.1.2",
-            "snapdragon": "^0.8.1",
-            "snapdragon-node": "^2.0.1",
-            "split-string": "^3.0.2",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
+            "locate-path": "^3.0.0"
           }
         },
-        "debug": {
-          "version": "3.2.6",
-          "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.6.tgz",
-          "integrity": "sha512-mel+jf7nrtEl5Pn1Qx46zARXKDpBbvzezse7p7LqINmdoIk8PYP5SySaxEmYv6TZ0JyEKA1hsCId6DIhgITtWQ==",
+        "locate-path": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-3.0.0.tgz",
+          "integrity": "sha512-7AO748wWnIhNqAuaty2ZWHkQHRSNfPVIsPIfwEOWO22AmaoVrWavlOcMR5nzTLNYvp36X220/maaRsrec1G65A==",
           "dev": true,
           "requires": {
-            "ms": "^2.1.1"
+            "p-locate": "^3.0.0",
+            "path-exists": "^3.0.0"
           }
         },
-        "execa": {
-          "version": "0.9.0",
-          "resolved": "https://registry.npmjs.org/execa/-/execa-0.9.0.tgz",
-          "integrity": "sha512-BbUMBiX4hqiHZUA5+JujIjNb6TyAlp2D5KLheMjMluwOuzcnylDL4AxZYLLn1n2AGB49eSWwyKvvEQoRpnAtmA==",
+        "p-limit": {
+          "version": "2.1.0",
+          "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.1.0.tgz",
+          "integrity": "sha512-NhURkNcrVB+8hNfLuysU8enY5xn2KXphsHBaC2YmRNTZRc7RWusw6apSpdEj3jo4CMb6W9nrF6tTnsJsJeyu6g==",
           "dev": true,
           "requires": {
-            "cross-spawn": "^5.0.1",
-            "get-stream": "^3.0.0",
-            "is-stream": "^1.1.0",
-            "npm-run-path": "^2.0.0",
-            "p-finally": "^1.0.0",
-            "signal-exit": "^3.0.0",
-            "strip-eof": "^1.0.0"
+            "p-try": "^2.0.0"
           }
         },
-        "expand-brackets": {
-          "version": "2.1.4",
-          "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz",
-          "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=",
+        "p-locate": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-3.0.0.tgz",
+          "integrity": "sha512-x+12w/To+4GFfgJhBEpiDcLozRJGegY+Ei7/z0tSLkMmxGZNybVMSfWj9aJn8Z5Fc7dBUNJOOVgPv2H7IwulSQ==",
           "dev": true,
           "requires": {
-            "debug": "^2.3.3",
-            "define-property": "^0.2.5",
-            "extend-shallow": "^2.0.1",
-            "posix-character-classes": "^0.1.0",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "debug": {
-              "version": "2.6.9",
-              "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
-              "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
-              "dev": true,
-              "requires": {
-                "ms": "2.0.0"
-              }
-            },
-            "define-property": {
-              "version": "0.2.5",
-              "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz",
-              "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=",
-              "dev": true,
-              "requires": {
-                "is-descriptor": "^0.1.0"
-              }
-            },
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            },
-            "is-accessor-descriptor": {
-              "version": "0.1.6",
-              "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz",
-              "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=",
-              "dev": true,
-              "requires": {
-                "kind-of": "^3.0.2"
-              },
-              "dependencies": {
-                "kind-of": {
-                  "version": "3.2.2",
-                  "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-                  "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-                  "dev": true,
-                  "requires": {
-                    "is-buffer": "^1.1.5"
-                  }
-                }
-              }
-            },
-            "is-data-descriptor": {
-              "version": "0.1.4",
-              "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz",
-              "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=",
-              "dev": true,
-              "requires": {
-                "kind-of": "^3.0.2"
-              },
-              "dependencies": {
-                "kind-of": {
-                  "version": "3.2.2",
-                  "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-                  "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-                  "dev": true,
-                  "requires": {
-                    "is-buffer": "^1.1.5"
-                  }
-                }
-              }
-            },
-            "is-descriptor": {
-              "version": "0.1.6",
-              "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz",
-              "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==",
-              "dev": true,
-              "requires": {
-                "is-accessor-descriptor": "^0.1.6",
-                "is-data-descriptor": "^0.1.4",
-                "kind-of": "^5.0.0"
-              }
-            },
-            "kind-of": {
-              "version": "5.1.0",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz",
-              "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==",
-              "dev": true
-            },
-            "ms": {
-              "version": "2.0.0",
-              "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
-              "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=",
-              "dev": true
-            }
+            "p-limit": "^2.0.0"
           }
         },
-        "extglob": {
-          "version": "2.0.4",
-          "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz",
-          "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==",
-          "dev": true,
-          "requires": {
-            "array-unique": "^0.3.2",
-            "define-property": "^1.0.0",
-            "expand-brackets": "^2.1.4",
-            "extend-shallow": "^2.0.1",
-            "fragment-cache": "^0.2.1",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "define-property": {
-              "version": "1.0.0",
-              "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz",
-              "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=",
-              "dev": true,
-              "requires": {
-                "is-descriptor": "^1.0.0"
-              }
-            },
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
+        "p-try": {
+          "version": "2.0.0",
+          "resolved": "https://registry.npmjs.org/p-try/-/p-try-2.0.0.tgz",
+          "integrity": "sha512-hMp0onDKIajHfIkdRk3P4CdCmErkYAxxDtP3Wx/4nZ3aGlau2VKh3mZpcuFkH27WQkL/3WBCPOktzA9ZOAnMQQ==",
+          "dev": true
         },
-        "fill-range": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz",
-          "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=",
+        "path-exists": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz",
+          "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=",
+          "dev": true
+        }
+      }
+    },
+    "libnpmhook": {
+      "version": "5.0.2",
+      "resolved": "https://registry.npmjs.org/libnpmhook/-/libnpmhook-5.0.2.tgz",
+      "integrity": "sha512-vLenmdFWhRfnnZiNFPNMog6CK7Ujofy2TWiM2CrpZUjBRIhHkJeDaAbJdYCT6W4lcHtyrJR8yXW8KFyq6UAp1g==",
+      "dev": true,
+      "requires": {
+        "aproba": "^2.0.0",
+        "figgy-pudding": "^3.4.1",
+        "get-stream": "^4.0.0",
+        "npm-registry-fetch": "^3.8.0"
+      },
+      "dependencies": {
+        "aproba": {
+          "version": "2.0.0",
+          "resolved": "https://registry.npmjs.org/aproba/-/aproba-2.0.0.tgz",
+          "integrity": "sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ==",
+          "dev": true
+        },
+        "get-stream": {
+          "version": "4.1.0",
+          "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz",
+          "integrity": "sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==",
           "dev": true,
           "requires": {
-            "extend-shallow": "^2.0.1",
-            "is-number": "^3.0.0",
-            "repeat-string": "^1.6.1",
-            "to-regex-range": "^2.1.0"
-          },
-          "dependencies": {
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
+            "pump": "^3.0.0"
           }
         },
-        "is-accessor-descriptor": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz",
-          "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==",
+        "pump": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
+          "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
           "dev": true,
           "requires": {
-            "kind-of": "^6.0.0"
+            "end-of-stream": "^1.1.0",
+            "once": "^1.3.1"
           }
+        }
+      }
+    },
+    "libnpmorg": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/libnpmorg/-/libnpmorg-1.0.0.tgz",
+      "integrity": "sha512-o+4eVJBoDGMgRwh2lJY0a8pRV2c/tQM/SxlqXezjcAg26Qe9jigYVs+Xk0vvlYDWCDhP0g74J8UwWeAgsB7gGw==",
+      "dev": true,
+      "requires": {
+        "aproba": "^2.0.0",
+        "figgy-pudding": "^3.4.1",
+        "get-stream": "^4.0.0",
+        "npm-registry-fetch": "^3.8.0"
+      },
+      "dependencies": {
+        "aproba": {
+          "version": "2.0.0",
+          "resolved": "https://registry.npmjs.org/aproba/-/aproba-2.0.0.tgz",
+          "integrity": "sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ==",
+          "dev": true
         },
-        "is-data-descriptor": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz",
-          "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==",
+        "get-stream": {
+          "version": "4.1.0",
+          "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz",
+          "integrity": "sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==",
           "dev": true,
           "requires": {
-            "kind-of": "^6.0.0"
+            "pump": "^3.0.0"
           }
         },
-        "is-descriptor": {
-          "version": "1.0.2",
-          "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz",
-          "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==",
+        "pump": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
+          "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
           "dev": true,
           "requires": {
-            "is-accessor-descriptor": "^1.0.0",
-            "is-data-descriptor": "^1.0.0",
-            "kind-of": "^6.0.2"
+            "end-of-stream": "^1.1.0",
+            "once": "^1.3.1"
           }
-        },
-        "is-extglob": {
-          "version": "2.1.1",
-          "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
-          "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=",
+        }
+      }
+    },
+    "libnpmpublish": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/libnpmpublish/-/libnpmpublish-1.1.1.tgz",
+      "integrity": "sha512-nefbvJd/wY38zdt+b9SHL6171vqBrMtZ56Gsgfd0duEKb/pB8rDT4/ObUQLrHz1tOfht1flt2zM+UGaemzAG5g==",
+      "dev": true,
+      "requires": {
+        "aproba": "^2.0.0",
+        "figgy-pudding": "^3.5.1",
+        "get-stream": "^4.0.0",
+        "lodash.clonedeep": "^4.5.0",
+        "normalize-package-data": "^2.4.0",
+        "npm-package-arg": "^6.1.0",
+        "npm-registry-fetch": "^3.8.0",
+        "semver": "^5.5.1",
+        "ssri": "^6.0.1"
+      },
+      "dependencies": {
+        "aproba": {
+          "version": "2.0.0",
+          "resolved": "https://registry.npmjs.org/aproba/-/aproba-2.0.0.tgz",
+          "integrity": "sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ==",
           "dev": true
         },
-        "is-glob": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.0.tgz",
-          "integrity": "sha1-lSHHaEXMJhCoUgPd8ICpWML/q8A=",
+        "get-stream": {
+          "version": "4.1.0",
+          "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz",
+          "integrity": "sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==",
           "dev": true,
           "requires": {
-            "is-extglob": "^2.1.1"
+            "pump": "^3.0.0"
           }
         },
-        "is-number": {
+        "pump": {
           "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz",
-          "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=",
+          "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
+          "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
           "dev": true,
           "requires": {
-            "kind-of": "^3.0.2"
-          },
-          "dependencies": {
-            "kind-of": {
-              "version": "3.2.2",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-              "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-              "dev": true,
-              "requires": {
-                "is-buffer": "^1.1.5"
-              }
-            }
-          }
-        },
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
-        },
-        "kind-of": {
-          "version": "6.0.2",
-          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz",
-          "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==",
-          "dev": true
-        },
-        "micromatch": {
-          "version": "3.1.10",
-          "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz",
-          "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==",
-          "dev": true,
-          "requires": {
-            "arr-diff": "^4.0.0",
-            "array-unique": "^0.3.2",
-            "braces": "^2.3.1",
-            "define-property": "^2.0.2",
-            "extend-shallow": "^3.0.2",
-            "extglob": "^2.0.4",
-            "fragment-cache": "^0.2.1",
-            "kind-of": "^6.0.2",
-            "nanomatch": "^1.2.9",
-            "object.pick": "^1.3.0",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.2"
-          }
-        },
-        "ms": {
-          "version": "2.1.1",
-          "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz",
-          "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==",
-          "dev": true
-        }
-      }
-    },
-    "listr": {
-      "version": "0.14.2",
-      "resolved": "https://registry.npmjs.org/listr/-/listr-0.14.2.tgz",
-      "integrity": "sha512-vmaNJ1KlGuGWShHI35X/F8r9xxS0VTHh9GejVXwSN20fG5xpq3Jh4bJbnumoT6q5EDM/8/YP1z3YMtQbFmhuXw==",
-      "dev": true,
-      "requires": {
-        "@samverschueren/stream-to-observable": "^0.3.0",
-        "is-observable": "^1.1.0",
-        "is-promise": "^2.1.0",
-        "is-stream": "^1.1.0",
-        "listr-silent-renderer": "^1.1.1",
-        "listr-update-renderer": "^0.4.0",
-        "listr-verbose-renderer": "^0.4.0",
-        "p-map": "^1.1.1",
-        "rxjs": "^6.1.0"
-      },
-      "dependencies": {
-        "rxjs": {
-          "version": "6.3.3",
-          "resolved": "https://registry.npmjs.org/rxjs/-/rxjs-6.3.3.tgz",
-          "integrity": "sha512-JTWmoY9tWCs7zvIk/CvRjhjGaOd+OVBM987mxFo+OW66cGpdKjZcpmc74ES1sB//7Kl/PAe8+wEakuhG4pcgOw==",
-          "dev": true,
-          "requires": {
-            "tslib": "^1.9.0"
+            "end-of-stream": "^1.1.0",
+            "once": "^1.3.1"
           }
         }
       }
     },
-    "listr-silent-renderer": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/listr-silent-renderer/-/listr-silent-renderer-1.1.1.tgz",
-      "integrity": "sha1-kktaN1cVN3C/Go4/v3S4u/P5JC4=",
-      "dev": true
-    },
-    "listr-update-renderer": {
-      "version": "0.4.0",
-      "resolved": "https://registry.npmjs.org/listr-update-renderer/-/listr-update-renderer-0.4.0.tgz",
-      "integrity": "sha1-NE2YDaLKLosUW6MFkI8yrj9MyKc=",
+    "libnpmsearch": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/libnpmsearch/-/libnpmsearch-2.0.0.tgz",
+      "integrity": "sha512-vd+JWbTGzOSfiOc+72MU6y7WqmBXn49egCCrIXp27iE/88bX8EpG64ST1blWQI1bSMUr9l1AKPMVsqa2tS5KWA==",
       "dev": true,
       "requires": {
-        "chalk": "^1.1.3",
-        "cli-truncate": "^0.2.1",
-        "elegant-spinner": "^1.0.1",
-        "figures": "^1.7.0",
-        "indent-string": "^3.0.0",
-        "log-symbols": "^1.0.2",
-        "log-update": "^1.0.2",
-        "strip-ansi": "^3.0.1"
+        "figgy-pudding": "^3.5.1",
+        "get-stream": "^4.0.0",
+        "npm-registry-fetch": "^3.8.0"
       },
       "dependencies": {
-        "ansi-styles": {
-          "version": "2.2.1",
-          "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-2.2.1.tgz",
-          "integrity": "sha1-tDLdM1i2NM914eRmQ2gkBTPB3b4=",
-          "dev": true
-        },
-        "chalk": {
-          "version": "1.1.3",
-          "resolved": "http://registry.npmjs.org/chalk/-/chalk-1.1.3.tgz",
-          "integrity": "sha1-qBFcVeSnAv5NFQq9OHKCKn4J/Jg=",
-          "dev": true,
-          "requires": {
-            "ansi-styles": "^2.2.1",
-            "escape-string-regexp": "^1.0.2",
-            "has-ansi": "^2.0.0",
-            "strip-ansi": "^3.0.0",
-            "supports-color": "^2.0.0"
-          }
-        },
-        "figures": {
-          "version": "1.7.0",
-          "resolved": "https://registry.npmjs.org/figures/-/figures-1.7.0.tgz",
-          "integrity": "sha1-y+Hjr/zxzUS4DK3+0o3Hk6lwHS4=",
+        "get-stream": {
+          "version": "4.1.0",
+          "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz",
+          "integrity": "sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==",
           "dev": true,
           "requires": {
-            "escape-string-regexp": "^1.0.5",
-            "object-assign": "^4.1.0"
+            "pump": "^3.0.0"
           }
         },
-        "log-symbols": {
-          "version": "1.0.2",
-          "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-1.0.2.tgz",
-          "integrity": "sha1-N2/3tY6jCGoPCfrMdGF+ylAeGhg=",
+        "pump": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
+          "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
           "dev": true,
           "requires": {
-            "chalk": "^1.0.0"
+            "end-of-stream": "^1.1.0",
+            "once": "^1.3.1"
           }
-        },
-        "supports-color": {
-          "version": "2.0.0",
-          "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-2.0.0.tgz",
-          "integrity": "sha1-U10EXOa2Nj+kARcIRimZXp3zJMc=",
-          "dev": true
         }
       }
     },
-    "listr-verbose-renderer": {
-      "version": "0.4.1",
-      "resolved": "https://registry.npmjs.org/listr-verbose-renderer/-/listr-verbose-renderer-0.4.1.tgz",
-      "integrity": "sha1-ggb0z21S3cWCfl/RSYng6WWTOjU=",
+    "libnpmteam": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/libnpmteam/-/libnpmteam-1.0.1.tgz",
+      "integrity": "sha512-gDdrflKFCX7TNwOMX1snWojCoDE5LoRWcfOC0C/fqF7mBq8Uz9zWAX4B2RllYETNO7pBupBaSyBDkTAC15cAMg==",
       "dev": true,
       "requires": {
-        "chalk": "^1.1.3",
-        "cli-cursor": "^1.0.2",
-        "date-fns": "^1.27.2",
-        "figures": "^1.7.0"
+        "aproba": "^2.0.0",
+        "figgy-pudding": "^3.4.1",
+        "get-stream": "^4.0.0",
+        "npm-registry-fetch": "^3.8.0"
       },
       "dependencies": {
-        "ansi-styles": {
-          "version": "2.2.1",
-          "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-2.2.1.tgz",
-          "integrity": "sha1-tDLdM1i2NM914eRmQ2gkBTPB3b4=",
+        "aproba": {
+          "version": "2.0.0",
+          "resolved": "https://registry.npmjs.org/aproba/-/aproba-2.0.0.tgz",
+          "integrity": "sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ==",
           "dev": true
         },
-        "chalk": {
-          "version": "1.1.3",
-          "resolved": "http://registry.npmjs.org/chalk/-/chalk-1.1.3.tgz",
-          "integrity": "sha1-qBFcVeSnAv5NFQq9OHKCKn4J/Jg=",
-          "dev": true,
-          "requires": {
-            "ansi-styles": "^2.2.1",
-            "escape-string-regexp": "^1.0.2",
-            "has-ansi": "^2.0.0",
-            "strip-ansi": "^3.0.0",
-            "supports-color": "^2.0.0"
-          }
-        },
-        "cli-cursor": {
-          "version": "1.0.2",
-          "resolved": "https://registry.npmjs.org/cli-cursor/-/cli-cursor-1.0.2.tgz",
-          "integrity": "sha1-ZNo/fValRBLll5S9Ytw1KV6PKYc=",
-          "dev": true,
-          "requires": {
-            "restore-cursor": "^1.0.1"
-          }
-        },
-        "figures": {
-          "version": "1.7.0",
-          "resolved": "https://registry.npmjs.org/figures/-/figures-1.7.0.tgz",
-          "integrity": "sha1-y+Hjr/zxzUS4DK3+0o3Hk6lwHS4=",
+        "get-stream": {
+          "version": "4.1.0",
+          "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz",
+          "integrity": "sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==",
           "dev": true,
           "requires": {
-            "escape-string-regexp": "^1.0.5",
-            "object-assign": "^4.1.0"
+            "pump": "^3.0.0"
           }
         },
-        "onetime": {
-          "version": "1.1.0",
-          "resolved": "http://registry.npmjs.org/onetime/-/onetime-1.1.0.tgz",
-          "integrity": "sha1-ofeDj4MUxRbwXs78vEzP4EtO14k=",
-          "dev": true
-        },
-        "restore-cursor": {
-          "version": "1.0.1",
-          "resolved": "https://registry.npmjs.org/restore-cursor/-/restore-cursor-1.0.1.tgz",
-          "integrity": "sha1-NGYfRohjJ/7SmRR5FSJS35LapUE=",
+        "pump": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
+          "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
           "dev": true,
           "requires": {
-            "exit-hook": "^1.0.0",
-            "onetime": "^1.0.0"
+            "end-of-stream": "^1.1.0",
+            "once": "^1.3.1"
           }
-        },
-        "supports-color": {
-          "version": "2.0.0",
-          "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-2.0.0.tgz",
-          "integrity": "sha1-U10EXOa2Nj+kARcIRimZXp3zJMc=",
-          "dev": true
         }
       }
     },
+    "liftoff": {
+      "version": "2.5.0",
+      "resolved": "https://registry.npmjs.org/liftoff/-/liftoff-2.5.0.tgz",
+      "integrity": "sha1-IAkpG7Mc6oYbvxCnwVooyvdcMew=",
+      "dev": true,
+      "requires": {
+        "extend": "^3.0.0",
+        "findup-sync": "^2.0.0",
+        "fined": "^1.0.1",
+        "flagged-respawn": "^1.0.0",
+        "is-plain-object": "^2.0.4",
+        "object.map": "^1.0.0",
+        "rechoir": "^0.6.2",
+        "resolve": "^1.1.7"
+      }
+    },
     "load-json-file": {
       "version": "1.1.0",
-      "resolved": "http://registry.npmjs.org/load-json-file/-/load-json-file-1.1.0.tgz",
+      "resolved": "https://registry.npmjs.org/load-json-file/-/load-json-file-1.1.0.tgz",
       "integrity": "sha1-lWkFcI1YtLq0wiYbBPWfMcmTdMA=",
       "dev": true,
       "requires": {
@@ -9414,20 +8742,31 @@
       }
     },
     "loader-runner": {
-      "version": "2.3.1",
-      "resolved": "https://registry.npmjs.org/loader-runner/-/loader-runner-2.3.1.tgz",
-      "integrity": "sha512-By6ZFY7ETWOc9RFaAIb23IjJVcM4dvJC/N57nmdz9RSkMXvAXGI7SyVlAw3v8vjtDRlqThgVDVmTnr9fqMlxkw==",
+      "version": "2.4.0",
+      "resolved": "https://registry.npmjs.org/loader-runner/-/loader-runner-2.4.0.tgz",
+      "integrity": "sha512-Jsmr89RcXGIwivFY21FcRrisYZfvLMTWx5kOLc+JTxtpBOG6xML0vzbc6SEQG2FO9/4Fc3wW4LVcB5DmGflaRw==",
       "dev": true
     },
     "loader-utils": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-1.1.0.tgz",
-      "integrity": "sha1-yYrvSIvM7aL/teLeZG1qdUQp9c0=",
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-1.2.3.tgz",
+      "integrity": "sha512-fkpz8ejdnEMG3s37wGL07iSBDg99O9D5yflE9RGNH3hRdx9SOwYfnGYdZOUIZitN8E+E2vkq3MUMYMvPYl5ZZA==",
       "dev": true,
       "requires": {
-        "big.js": "^3.1.3",
+        "big.js": "^5.2.2",
         "emojis-list": "^2.0.0",
-        "json5": "^0.5.0"
+        "json5": "^1.0.1"
+      },
+      "dependencies": {
+        "json5": {
+          "version": "1.0.1",
+          "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.1.tgz",
+          "integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==",
+          "dev": true,
+          "requires": {
+            "minimist": "^1.2.0"
+          }
+        }
       }
     },
     "locate-path": {
@@ -9438,6 +8777,24 @@
       "requires": {
         "p-locate": "^2.0.0",
         "path-exists": "^3.0.0"
+      },
+      "dependencies": {
+        "path-exists": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz",
+          "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=",
+          "dev": true
+        }
+      }
+    },
+    "lock-verify": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/lock-verify/-/lock-verify-2.0.2.tgz",
+      "integrity": "sha512-QNVwK0EGZBS4R3YQ7F1Ox8p41Po9VGl2QG/2GsuvTbkJZYSsPeWHKMbbH6iZMCHWSMww5nrJroZYnGzI4cePuw==",
+      "dev": true,
+      "requires": {
+        "npm-package-arg": "^5.1.2 || 6",
+        "semver": "^5.4.1"
       }
     },
     "lodash": {
@@ -9446,114 +8803,34 @@
       "integrity": "sha512-cQKh8igo5QUhZ7lg38DYWAxMvjSAKG0A8wGSVimP07SIUEK2UO+arSRKbRZWtelMtN5V0Hkwh5ryOto/SshYIg==",
       "dev": true
     },
-    "lodash._basecopy": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/lodash._basecopy/-/lodash._basecopy-3.0.1.tgz",
-      "integrity": "sha1-jaDmqHbPNEwK2KVIghEd08XHyjY=",
-      "dev": true
-    },
-    "lodash._basetostring": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/lodash._basetostring/-/lodash._basetostring-3.0.1.tgz",
-      "integrity": "sha1-0YYdh3+CSlL2aYMtyvPuFVZqB9U=",
-      "dev": true
-    },
-    "lodash._basevalues": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/lodash._basevalues/-/lodash._basevalues-3.0.0.tgz",
-      "integrity": "sha1-W3dXYoAr3j0yl1A+JjAIIP32Ybc=",
-      "dev": true
-    },
-    "lodash._getnative": {
-      "version": "3.9.1",
-      "resolved": "https://registry.npmjs.org/lodash._getnative/-/lodash._getnative-3.9.1.tgz",
-      "integrity": "sha1-VwvH3t5G1hzc3mh9ZdPuy6o6r/U=",
-      "dev": true
-    },
-    "lodash._isiterateecall": {
-      "version": "3.0.9",
-      "resolved": "https://registry.npmjs.org/lodash._isiterateecall/-/lodash._isiterateecall-3.0.9.tgz",
-      "integrity": "sha1-UgOte6Ql+uhCRg5pbbnPPmqsBXw=",
-      "dev": true
-    },
-    "lodash._reescape": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/lodash._reescape/-/lodash._reescape-3.0.0.tgz",
-      "integrity": "sha1-Kx1vXf4HyKNVdT5fJ/rH8c3hYWo=",
-      "dev": true
-    },
-    "lodash._reevaluate": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/lodash._reevaluate/-/lodash._reevaluate-3.0.0.tgz",
-      "integrity": "sha1-WLx0xAZklTrgsSTYBpltrKQx4u0=",
-      "dev": true
-    },
     "lodash._reinterpolate": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/lodash._reinterpolate/-/lodash._reinterpolate-3.0.0.tgz",
       "integrity": "sha1-DM8tiRZq8Ds2Y8eWU4t1rG4RTZ0=",
       "dev": true
     },
-    "lodash._root": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/lodash._root/-/lodash._root-3.0.1.tgz",
-      "integrity": "sha1-+6HEUkwZ7ppfgTa0YJ8BfPTe1pI=",
-      "dev": true
-    },
     "lodash.camelcase": {
       "version": "4.3.0",
       "resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz",
       "integrity": "sha1-soqmKIorn8ZRA1x3EfZathkDMaY="
     },
+    "lodash.clonedeep": {
+      "version": "4.5.0",
+      "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
+      "integrity": "sha1-4j8/nE+Pvd6HJSnBBxhXoIblzO8=",
+      "dev": true
+    },
     "lodash.debounce": {
       "version": "4.0.8",
       "resolved": "https://registry.npmjs.org/lodash.debounce/-/lodash.debounce-4.0.8.tgz",
       "integrity": "sha1-gteb/zCmfEAF/9XiUVMArZyk168=",
       "dev": true
     },
-    "lodash.escape": {
-      "version": "3.2.0",
-      "resolved": "https://registry.npmjs.org/lodash.escape/-/lodash.escape-3.2.0.tgz",
-      "integrity": "sha1-mV7g3BjBtIzJLv+ucaEKq1tIdpg=",
-      "dev": true,
-      "requires": {
-        "lodash._root": "^3.0.0"
-      }
-    },
-    "lodash.isarguments": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/lodash.isarguments/-/lodash.isarguments-3.1.0.tgz",
-      "integrity": "sha1-L1c9hcaiQon/AGY7SRwdM4/zRYo=",
-      "dev": true
-    },
-    "lodash.isarray": {
-      "version": "3.0.4",
-      "resolved": "https://registry.npmjs.org/lodash.isarray/-/lodash.isarray-3.0.4.tgz",
-      "integrity": "sha1-eeTriMNqgSKvhvhEqpvNhRtfu1U=",
-      "dev": true
-    },
-    "lodash.keys": {
-      "version": "3.1.2",
-      "resolved": "https://registry.npmjs.org/lodash.keys/-/lodash.keys-3.1.2.tgz",
-      "integrity": "sha1-TbwEcrFWvlCgsoaFXRvQsMZWCYo=",
-      "dev": true,
-      "requires": {
-        "lodash._getnative": "^3.0.0",
-        "lodash.isarguments": "^3.0.0",
-        "lodash.isarray": "^3.0.0"
-      }
-    },
     "lodash.padend": {
       "version": "4.6.1",
       "resolved": "https://registry.npmjs.org/lodash.padend/-/lodash.padend-4.6.1.tgz",
       "integrity": "sha1-U8y6BH0G4VjTEfRdpiX05J5vFm4="
     },
-    "lodash.restparam": {
-      "version": "3.6.1",
-      "resolved": "https://registry.npmjs.org/lodash.restparam/-/lodash.restparam-3.6.1.tgz",
-      "integrity": "sha1-k2pOMJ7zMKdkXtQUWYbIWuWyCAU=",
-      "dev": true
-    },
     "lodash.sortby": {
       "version": "4.7.0",
       "resolved": "https://registry.npmjs.org/lodash.sortby/-/lodash.sortby-4.7.0.tgz",
@@ -9561,30 +8838,22 @@
       "dev": true
     },
     "lodash.template": {
-      "version": "3.6.2",
-      "resolved": "https://registry.npmjs.org/lodash.template/-/lodash.template-3.6.2.tgz",
-      "integrity": "sha1-+M3sxhaaJVvpCYrosMU9N4kx0U8=",
+      "version": "4.4.0",
+      "resolved": "https://registry.npmjs.org/lodash.template/-/lodash.template-4.4.0.tgz",
+      "integrity": "sha1-5zoDhcg1VZF0bgILmWecaQ5o+6A=",
       "dev": true,
       "requires": {
-        "lodash._basecopy": "^3.0.0",
-        "lodash._basetostring": "^3.0.0",
-        "lodash._basevalues": "^3.0.0",
-        "lodash._isiterateecall": "^3.0.0",
-        "lodash._reinterpolate": "^3.0.0",
-        "lodash.escape": "^3.0.0",
-        "lodash.keys": "^3.0.0",
-        "lodash.restparam": "^3.0.0",
-        "lodash.templatesettings": "^3.0.0"
+        "lodash._reinterpolate": "~3.0.0",
+        "lodash.templatesettings": "^4.0.0"
       }
     },
     "lodash.templatesettings": {
-      "version": "3.1.1",
-      "resolved": "https://registry.npmjs.org/lodash.templatesettings/-/lodash.templatesettings-3.1.1.tgz",
-      "integrity": "sha1-+zB4RHU7Zrnxr6VOJix0UwfbqOU=",
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/lodash.templatesettings/-/lodash.templatesettings-4.1.0.tgz",
+      "integrity": "sha1-K01OlbpEDZFf8IvImeRVNmZxMxY=",
       "dev": true,
       "requires": {
-        "lodash._reinterpolate": "^3.0.0",
-        "lodash.escape": "^3.0.0"
+        "lodash._reinterpolate": "~3.0.0"
       }
     },
     "log-driver": {
@@ -9593,58 +8862,6 @@
       "integrity": "sha512-U7KCmLdqsGHBLeWqYlFA0V0Sl6P08EE1ZrmA9cxjUE0WVqT9qnyVDPz1kzpFEP0jdJuFnasWIfSd7fsaNXkpbg==",
       "dev": true
     },
-    "log-symbols": {
-      "version": "2.2.0",
-      "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-2.2.0.tgz",
-      "integrity": "sha512-VeIAFslyIerEJLXHziedo2basKbMKtTw3vfn5IzG0XTjhAVEJyNHnL2p7vc+wBDSdQuUpNw3M2u6xb9QsAY5Eg==",
-      "dev": true,
-      "requires": {
-        "chalk": "^2.0.1"
-      }
-    },
-    "log-update": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/log-update/-/log-update-1.0.2.tgz",
-      "integrity": "sha1-GZKfZMQJPS0ucHWh2tivWcKWuNE=",
-      "dev": true,
-      "requires": {
-        "ansi-escapes": "^1.0.0",
-        "cli-cursor": "^1.0.2"
-      },
-      "dependencies": {
-        "ansi-escapes": {
-          "version": "1.4.0",
-          "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-1.4.0.tgz",
-          "integrity": "sha1-06ioOzGapneTZisT52HHkRQiMG4=",
-          "dev": true
-        },
-        "cli-cursor": {
-          "version": "1.0.2",
-          "resolved": "https://registry.npmjs.org/cli-cursor/-/cli-cursor-1.0.2.tgz",
-          "integrity": "sha1-ZNo/fValRBLll5S9Ytw1KV6PKYc=",
-          "dev": true,
-          "requires": {
-            "restore-cursor": "^1.0.1"
-          }
-        },
-        "onetime": {
-          "version": "1.1.0",
-          "resolved": "http://registry.npmjs.org/onetime/-/onetime-1.1.0.tgz",
-          "integrity": "sha1-ofeDj4MUxRbwXs78vEzP4EtO14k=",
-          "dev": true
-        },
-        "restore-cursor": {
-          "version": "1.0.1",
-          "resolved": "https://registry.npmjs.org/restore-cursor/-/restore-cursor-1.0.1.tgz",
-          "integrity": "sha1-NGYfRohjJ/7SmRR5FSJS35LapUE=",
-          "dev": true,
-          "requires": {
-            "exit-hook": "^1.0.0",
-            "onetime": "^1.0.0"
-          }
-        }
-      }
-    },
     "loose-envify": {
       "version": "1.4.0",
       "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
@@ -9665,9 +8882,9 @@
       }
     },
     "lru-cache": {
-      "version": "4.1.3",
-      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.3.tgz",
-      "integrity": "sha512-fFEhvcgzuIoJVUF8fYr5KR0YqxD238zgObTps31YdADwPPAp82a4M8TrckkWyx7ekNlf9aBcVn81cFwwXngrJA==",
+      "version": "4.1.5",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.5.tgz",
+      "integrity": "sha512-sWZlbEP2OsHNkXrMl5GYk/jKk70MBng6UU4YI/qGDYbgf6YbP4EvmqISbXCoJiRKs+1bSpFHVgQxvJ17F2li5g==",
       "dev": true,
       "requires": {
         "pseudomap": "^1.0.2",
@@ -9724,14 +8941,6 @@
       "dev": true,
       "requires": {
         "kind-of": "^6.0.2"
-      },
-      "dependencies": {
-        "kind-of": {
-          "version": "6.0.2",
-          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz",
-          "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==",
-          "dev": true
-        }
       }
     },
     "makeerror": {
@@ -9744,9 +8953,9 @@
       }
     },
     "map-age-cleaner": {
-      "version": "0.1.2",
-      "resolved": "https://registry.npmjs.org/map-age-cleaner/-/map-age-cleaner-0.1.2.tgz",
-      "integrity": "sha512-UN1dNocxQq44IhJyMI4TU8phc2m9BddacHRPRjKGLYaF0jqd3xLz0jS0skpAU9WgYyoR4gHtUpzytNBS385FWQ==",
+      "version": "0.1.3",
+      "resolved": "https://registry.npmjs.org/map-age-cleaner/-/map-age-cleaner-0.1.3.tgz",
+      "integrity": "sha512-bJzx6nMoP6PDLPBFmg7+xRKeFZvFboMrGlxmNj9ClvX53KrmvM5bXFXEWjbz4cz1AFn+jWJ9z/DJSz7hrs0w3w==",
       "dev": true,
       "requires": {
         "p-defer": "^1.0.0"
@@ -9764,12 +8973,6 @@
       "integrity": "sha1-plzSkIepJZi4eRJXpSPgISIqwfk=",
       "dev": true
     },
-    "map-stream": {
-      "version": "0.0.7",
-      "resolved": "https://registry.npmjs.org/map-stream/-/map-stream-0.0.7.tgz",
-      "integrity": "sha1-ih8HiW2CsQkmvTdEokIACfiJdKg=",
-      "dev": true
-    },
     "map-visit": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/map-visit/-/map-visit-1.0.0.tgz",
@@ -9795,288 +8998,12 @@
         "micromatch": "^3.0.4",
         "resolve": "^1.4.0",
         "stack-trace": "0.0.10"
-      },
-      "dependencies": {
-        "arr-diff": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz",
-          "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=",
-          "dev": true
-        },
-        "array-unique": {
-          "version": "0.3.2",
-          "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz",
-          "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=",
-          "dev": true
-        },
-        "braces": {
-          "version": "2.3.2",
-          "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz",
-          "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==",
-          "dev": true,
-          "requires": {
-            "arr-flatten": "^1.1.0",
-            "array-unique": "^0.3.2",
-            "extend-shallow": "^2.0.1",
-            "fill-range": "^4.0.0",
-            "isobject": "^3.0.1",
-            "repeat-element": "^1.1.2",
-            "snapdragon": "^0.8.1",
-            "snapdragon-node": "^2.0.1",
-            "split-string": "^3.0.2",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "expand-brackets": {
-          "version": "2.1.4",
-          "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz",
-          "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=",
-          "dev": true,
-          "requires": {
-            "debug": "^2.3.3",
-            "define-property": "^0.2.5",
-            "extend-shallow": "^2.0.1",
-            "posix-character-classes": "^0.1.0",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "define-property": {
-              "version": "0.2.5",
-              "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz",
-              "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=",
-              "dev": true,
-              "requires": {
-                "is-descriptor": "^0.1.0"
-              }
-            },
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            },
-            "is-accessor-descriptor": {
-              "version": "0.1.6",
-              "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz",
-              "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=",
-              "dev": true,
-              "requires": {
-                "kind-of": "^3.0.2"
-              },
-              "dependencies": {
-                "kind-of": {
-                  "version": "3.2.2",
-                  "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-                  "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-                  "dev": true,
-                  "requires": {
-                    "is-buffer": "^1.1.5"
-                  }
-                }
-              }
-            },
-            "is-data-descriptor": {
-              "version": "0.1.4",
-              "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz",
-              "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=",
-              "dev": true,
-              "requires": {
-                "kind-of": "^3.0.2"
-              },
-              "dependencies": {
-                "kind-of": {
-                  "version": "3.2.2",
-                  "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-                  "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-                  "dev": true,
-                  "requires": {
-                    "is-buffer": "^1.1.5"
-                  }
-                }
-              }
-            },
-            "is-descriptor": {
-              "version": "0.1.6",
-              "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz",
-              "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==",
-              "dev": true,
-              "requires": {
-                "is-accessor-descriptor": "^0.1.6",
-                "is-data-descriptor": "^0.1.4",
-                "kind-of": "^5.0.0"
-              }
-            },
-            "kind-of": {
-              "version": "5.1.0",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz",
-              "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==",
-              "dev": true
-            }
-          }
-        },
-        "extglob": {
-          "version": "2.0.4",
-          "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz",
-          "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==",
-          "dev": true,
-          "requires": {
-            "array-unique": "^0.3.2",
-            "define-property": "^1.0.0",
-            "expand-brackets": "^2.1.4",
-            "extend-shallow": "^2.0.1",
-            "fragment-cache": "^0.2.1",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "define-property": {
-              "version": "1.0.0",
-              "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz",
-              "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=",
-              "dev": true,
-              "requires": {
-                "is-descriptor": "^1.0.0"
-              }
-            },
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "fill-range": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz",
-          "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=",
-          "dev": true,
-          "requires": {
-            "extend-shallow": "^2.0.1",
-            "is-number": "^3.0.0",
-            "repeat-string": "^1.6.1",
-            "to-regex-range": "^2.1.0"
-          },
-          "dependencies": {
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "is-accessor-descriptor": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz",
-          "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==",
-          "dev": true,
-          "requires": {
-            "kind-of": "^6.0.0"
-          }
-        },
-        "is-data-descriptor": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz",
-          "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==",
-          "dev": true,
-          "requires": {
-            "kind-of": "^6.0.0"
-          }
-        },
-        "is-descriptor": {
-          "version": "1.0.2",
-          "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz",
-          "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==",
-          "dev": true,
-          "requires": {
-            "is-accessor-descriptor": "^1.0.0",
-            "is-data-descriptor": "^1.0.0",
-            "kind-of": "^6.0.2"
-          }
-        },
-        "is-number": {
-          "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz",
-          "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=",
-          "dev": true,
-          "requires": {
-            "kind-of": "^3.0.2"
-          },
-          "dependencies": {
-            "kind-of": {
-              "version": "3.2.2",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-              "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-              "dev": true,
-              "requires": {
-                "is-buffer": "^1.1.5"
-              }
-            }
-          }
-        },
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
-        },
-        "kind-of": {
-          "version": "6.0.2",
-          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz",
-          "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==",
-          "dev": true
-        },
-        "micromatch": {
-          "version": "3.1.10",
-          "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz",
-          "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==",
-          "dev": true,
-          "requires": {
-            "arr-diff": "^4.0.0",
-            "array-unique": "^0.3.2",
-            "braces": "^2.3.1",
-            "define-property": "^2.0.2",
-            "extend-shallow": "^3.0.2",
-            "extglob": "^2.0.4",
-            "fragment-cache": "^0.2.1",
-            "kind-of": "^6.0.2",
-            "nanomatch": "^1.2.9",
-            "object.pick": "^1.3.0",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.2"
-          }
-        }
       }
     },
     "math-random": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/math-random/-/math-random-1.0.1.tgz",
-      "integrity": "sha1-izqsWIuKZuSXXjzepn97sylgH6w=",
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/math-random/-/math-random-1.0.4.tgz",
+      "integrity": "sha512-rUxjysqif/BZQH2yhd5Aaq7vXMSx9NdEsQcyA07uEzIvxgI7zIr33gGsh+RU0/XjmQpCW7RsVof1vlkvQVCK5A==",
       "dev": true
     },
     "md5.js": {
@@ -10099,6 +9026,16 @@
         "mimic-fn": "^1.0.0"
       }
     },
+    "memfs": {
+      "version": "2.15.0",
+      "resolved": "https://registry.npmjs.org/memfs/-/memfs-2.15.0.tgz",
+      "integrity": "sha512-vktLqfHB1K4I9oiWlG4VjbztEreU5LqgnTnlVimr4bKNhJwjTmKg5+eYIimmNiKVUolTUrWSy2k/KEyqqLqZrQ==",
+      "dev": true,
+      "requires": {
+        "fast-extend": "0.0.2",
+        "fs-monkey": "^0.3.3"
+      }
+    },
     "memoizee": {
       "version": "0.4.14",
       "resolved": "https://registry.npmjs.org/memoizee/-/memoizee-0.4.14.tgz",
@@ -10148,6 +9085,15 @@
         "trim-newlines": "^2.0.0"
       },
       "dependencies": {
+        "find-up": {
+          "version": "2.1.0",
+          "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz",
+          "integrity": "sha1-RdG35QbHF93UgndaK3eSCjwMV6c=",
+          "dev": true,
+          "requires": {
+            "locate-path": "^2.0.0"
+          }
+        },
         "load-json-file": {
           "version": "4.0.0",
           "resolved": "https://registry.npmjs.org/load-json-file/-/load-json-file-4.0.0.tgz",
@@ -10209,9 +9155,9 @@
       }
     },
     "merge": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/merge/-/merge-1.2.0.tgz",
-      "integrity": "sha1-dTHjnUlJwoGma4xabgJl6LBYlNo=",
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/merge/-/merge-1.2.1.tgz",
+      "integrity": "sha512-VjFo4P5Whtj4vsLzsYBu5ayHhoHJ0UqNm7ibvShmbmoz7tGi0vXaoJbGdB+GmDMLUdg8DpQXEIeVDAe8MaABvQ==",
       "dev": true
     },
     "merge-stream": {
@@ -10230,24 +9176,24 @@
       "dev": true
     },
     "micromatch": {
-      "version": "2.3.11",
-      "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-2.3.11.tgz",
-      "integrity": "sha1-hmd8l9FyCzY0MdBNDRUpO9OMFWU=",
+      "version": "3.1.10",
+      "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz",
+      "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==",
       "dev": true,
       "requires": {
-        "arr-diff": "^2.0.0",
-        "array-unique": "^0.2.1",
-        "braces": "^1.8.2",
-        "expand-brackets": "^0.1.4",
-        "extglob": "^0.3.1",
-        "filename-regex": "^2.0.0",
-        "is-extglob": "^1.0.0",
-        "is-glob": "^2.0.1",
-        "kind-of": "^3.0.2",
-        "normalize-path": "^2.0.1",
-        "object.omit": "^2.0.0",
-        "parse-glob": "^3.0.4",
-        "regex-cache": "^0.4.2"
+        "arr-diff": "^4.0.0",
+        "array-unique": "^0.3.2",
+        "braces": "^2.3.1",
+        "define-property": "^2.0.2",
+        "extend-shallow": "^3.0.2",
+        "extglob": "^2.0.4",
+        "fragment-cache": "^0.2.1",
+        "kind-of": "^6.0.2",
+        "nanomatch": "^1.2.9",
+        "object.pick": "^1.3.0",
+        "regex-not": "^1.0.0",
+        "snapdragon": "^0.8.1",
+        "to-regex": "^3.0.2"
       }
     },
     "miller-rabin": {
@@ -10261,18 +9207,18 @@
       }
     },
     "mime-db": {
-      "version": "1.36.0",
-      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.36.0.tgz",
-      "integrity": "sha512-L+xvyD9MkoYMXb1jAmzI/lWYAxAMCPvIBSWur0PZ5nOf5euahRLVqH//FKW9mWp2lkqUgYiXPgkzfMUFi4zVDw==",
+      "version": "1.37.0",
+      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.37.0.tgz",
+      "integrity": "sha512-R3C4db6bgQhlIhPU48fUtdVmKnflq+hRdad7IyKhtFj06VPNVdk2RhiYL3UjQIlso8L+YxAtFkobT0VK+S/ybg==",
       "dev": true
     },
     "mime-types": {
-      "version": "2.1.20",
-      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.20.tgz",
-      "integrity": "sha512-HrkrPaP9vGuWbLK1B1FfgAkbqNjIuy4eHlIYnFi7kamZyLLrGlo2mpcx0bBmNpKqBtYtAfGbodDddIgddSJC2A==",
+      "version": "2.1.21",
+      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.21.tgz",
+      "integrity": "sha512-3iL6DbwpyLzjR3xHSFNFeb9Nz/M8WDkX33t1GFQnFOllWk8pOrh/LSrB5OXlnlW5P9LH73X6loW/eogc+F5lJg==",
       "dev": true,
       "requires": {
-        "mime-db": "~1.36.0"
+        "mime-db": "~1.37.0"
       }
     },
     "mimic-fn": {
@@ -10304,7 +9250,7 @@
     },
     "minimist": {
       "version": "1.2.0",
-      "resolved": "http://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz",
+      "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz",
       "integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=",
       "dev": true
     },
@@ -10319,9 +9265,9 @@
       }
     },
     "minipass": {
-      "version": "2.3.4",
-      "resolved": "https://registry.npmjs.org/minipass/-/minipass-2.3.4.tgz",
-      "integrity": "sha512-mlouk1OHlaUE8Odt1drMtG1bAJA4ZA6B/ehysgV0LUIrDHdKgo1KorZq3pK0b/7Z7LJIQ12MNM6aC+Tn6lUZ5w==",
+      "version": "2.3.5",
+      "resolved": "https://registry.npmjs.org/minipass/-/minipass-2.3.5.tgz",
+      "integrity": "sha512-Gi1W4k059gyRbyVUZQ4mEqLm0YIUiGYfvxhF6SIlk3ui1WVxMTGfGdQ2SInh3PDrRTVvPKgULkpJtT4RH10+VA==",
       "dev": true,
       "requires": {
         "safe-buffer": "^5.1.2",
@@ -10329,17 +9275,17 @@
       },
       "dependencies": {
         "yallist": {
-          "version": "3.0.2",
-          "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.0.2.tgz",
-          "integrity": "sha1-hFK0u36Dx8GI2AQcGoN8dz1ti7k=",
+          "version": "3.0.3",
+          "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.0.3.tgz",
+          "integrity": "sha512-S+Zk8DEWE6oKpV+vI3qWkaK+jSbIK86pCwe2IF/xwIpQ8jEuxpw9NyaGjmp9+BoJv5FV2piqCDcoCtStppiq2A==",
           "dev": true
         }
       }
     },
     "minizlib": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-1.1.1.tgz",
-      "integrity": "sha512-TrfjCjk4jLhcJyGMYymBH6oTXcWjYbUAXTHDbtnWHjZC25h0cdajHuPE1zxb4DVmu8crfh+HwH/WMuyLG0nHBg==",
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-1.2.1.tgz",
+      "integrity": "sha512-7+4oTUOWKg7AuL3vloEWekXY2/D20cevzsrNT2kGWm+39J9hGTCBv8VI5Pm5lXZ/o3/mdR4f8rflAPhnQb8mPA==",
       "dev": true,
       "requires": {
         "minipass": "^2.2.1"
@@ -10361,6 +9307,18 @@
         "pumpify": "^1.3.3",
         "stream-each": "^1.1.0",
         "through2": "^2.0.0"
+      },
+      "dependencies": {
+        "pump": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
+          "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
+          "dev": true,
+          "requires": {
+            "end-of-stream": "^1.1.0",
+            "once": "^1.3.1"
+          }
+        }
       }
     },
     "mixin-deep": {
@@ -10386,7 +9344,7 @@
     },
     "mkdirp": {
       "version": "0.5.1",
-      "resolved": "http://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz",
+      "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz",
       "integrity": "sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM=",
       "dev": true,
       "requires": {
@@ -10395,7 +9353,7 @@
       "dependencies": {
         "minimist": {
           "version": "0.0.8",
-          "resolved": "http://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz",
+          "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz",
           "integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0=",
           "dev": true
         }
@@ -10464,13 +9422,14 @@
         "minimatch": "^3.0.0"
       }
     },
-    "multipipe": {
-      "version": "0.1.2",
-      "resolved": "https://registry.npmjs.org/multipipe/-/multipipe-0.1.2.tgz",
-      "integrity": "sha1-Ko8t33Du1WTf8tV/HhoTfZ8FB4s=",
+    "multistream": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/multistream/-/multistream-2.1.1.tgz",
+      "integrity": "sha512-xasv76hl6nr1dEy3lPvy7Ej7K/Lx3O/FCvwge8PeVJpciPPoNCbaANcNiBug3IpdvTveZUcAV0DJzdnUDMesNQ==",
       "dev": true,
       "requires": {
-        "duplexer2": "0.0.2"
+        "inherits": "^2.0.1",
+        "readable-stream": "^2.0.5"
       }
     },
     "mute-stdout": {
@@ -10486,9 +9445,9 @@
       "dev": true
     },
     "nan": {
-      "version": "2.11.1",
-      "resolved": "https://registry.npmjs.org/nan/-/nan-2.11.1.tgz",
-      "integrity": "sha512-iji6k87OSXa0CcrLl9z+ZiYSuR2o+c0bGuNmXdrhTQTakxytAFsC56SArGYoiHlJlFoHSnvmhpceZJaXkVuOtA==",
+      "version": "2.12.1",
+      "resolved": "https://registry.npmjs.org/nan/-/nan-2.12.1.tgz",
+      "integrity": "sha512-JY7V6lRkStKcKTvHO5NVSQRv+RV+FIL5pvDoLiAtSL9pKlC5x9PKQcZDsq7m4FO4d57mkhC6Z+QhAh3Jdk5JFw==",
       "dev": true,
       "optional": true
     },
@@ -10509,26 +9468,6 @@
         "regex-not": "^1.0.0",
         "snapdragon": "^0.8.1",
         "to-regex": "^3.0.1"
-      },
-      "dependencies": {
-        "arr-diff": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz",
-          "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=",
-          "dev": true
-        },
-        "array-unique": {
-          "version": "0.3.2",
-          "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz",
-          "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=",
-          "dev": true
-        },
-        "kind-of": {
-          "version": "6.0.2",
-          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz",
-          "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==",
-          "dev": true
-        }
       }
     },
     "natural-compare": {
@@ -10588,7 +9527,7 @@
       "dependencies": {
         "semver": {
           "version": "5.3.0",
-          "resolved": "http://registry.npmjs.org/semver/-/semver-5.3.0.tgz",
+          "resolved": "https://registry.npmjs.org/semver/-/semver-5.3.0.tgz",
           "integrity": "sha1-myzl094C0XxgEq0yaqa00M9U+U8=",
           "dev": true
         }
@@ -10601,9 +9540,9 @@
       "dev": true
     },
     "node-libs-browser": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/node-libs-browser/-/node-libs-browser-2.1.0.tgz",
-      "integrity": "sha512-5AzFzdoIMb89hBGMZglEegffzgRg+ZFoUmisQ8HI4j1KDdpx13J0taNp2y9xPbur6W61gepGDDotGBVQ7mfUCg==",
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/node-libs-browser/-/node-libs-browser-2.2.0.tgz",
+      "integrity": "sha512-5MQunG/oyOaBdttrL40dA7bUfPORLRWMUJLQtMg7nluxUvk5XwnLdL9twQHFAjRx/y7mIMkLKT9++qPbbk6BZA==",
       "dev": true,
       "requires": {
         "assert": "^1.1.1",
@@ -10613,7 +9552,7 @@
         "constants-browserify": "^1.0.0",
         "crypto-browserify": "^3.11.0",
         "domain-browser": "^1.1.1",
-        "events": "^1.0.0",
+        "events": "^3.0.0",
         "https-browserify": "^1.0.0",
         "os-browserify": "^0.3.0",
         "path-browserify": "0.0.0",
@@ -10627,18 +9566,26 @@
         "timers-browserify": "^2.0.4",
         "tty-browserify": "0.0.0",
         "url": "^0.11.0",
-        "util": "^0.10.3",
+        "util": "^0.11.0",
         "vm-browserify": "0.0.4"
+      },
+      "dependencies": {
+        "punycode": {
+          "version": "1.4.1",
+          "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz",
+          "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=",
+          "dev": true
+        }
       }
     },
     "node-notifier": {
-      "version": "5.2.1",
-      "resolved": "https://registry.npmjs.org/node-notifier/-/node-notifier-5.2.1.tgz",
-      "integrity": "sha512-MIBs+AAd6dJ2SklbbE8RUDRlIVhU8MaNLh1A9SUZDUHPiZkWLFde6UNwG41yQHZEToHgJMXqyVZ9UcS/ReOVTg==",
+      "version": "5.3.0",
+      "resolved": "https://registry.npmjs.org/node-notifier/-/node-notifier-5.3.0.tgz",
+      "integrity": "sha512-AhENzCSGZnZJgBARsUjnQ7DnZbzyP+HxlVXuD0xqAnvL8q+OqtSX7lGg9e8nHzwXkMMXNdVeqq4E2M3EUAqX6Q==",
       "dev": true,
       "requires": {
         "growly": "^1.3.0",
-        "semver": "^5.4.1",
+        "semver": "^5.5.0",
         "shellwords": "^0.1.1",
         "which": "^1.3.0"
       }
@@ -10712,6 +9659,12 @@
         }
       }
     },
+    "npm-logical-tree": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/npm-logical-tree/-/npm-logical-tree-1.2.1.tgz",
+      "integrity": "sha512-AJI/qxDB2PWI4LG1CYN579AY1vCiNyWfkiquCsJWqntRu/WwimVrC8yXeILBFHDwxfOejxewlmnvW9XXjMlYIg==",
+      "dev": true
+    },
     "npm-package-arg": {
       "version": "6.1.0",
       "resolved": "https://registry.npmjs.org/npm-package-arg/-/npm-package-arg-6.1.0.tgz",
@@ -10725,38 +9678,41 @@
       }
     },
     "npm-packlist": {
-      "version": "1.1.12",
-      "resolved": "https://registry.npmjs.org/npm-packlist/-/npm-packlist-1.1.12.tgz",
-      "integrity": "sha512-WJKFOVMeAlsU/pjXuqVdzU0WfgtIBCupkEVwn+1Y0ERAbUfWw8R4GjgVbaKnUjRoD2FoQbHOCbOyT5Mbs9Lw4g==",
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/npm-packlist/-/npm-packlist-1.2.0.tgz",
+      "integrity": "sha512-7Mni4Z8Xkx0/oegoqlcao/JpPCPEMtUvsmB0q7mgvlMinykJLSRTYuFqoQLYgGY8biuxIeiHO+QNJKbCfljewQ==",
       "dev": true,
       "requires": {
         "ignore-walk": "^3.0.1",
         "npm-bundled": "^1.0.1"
       }
     },
-    "npm-path": {
-      "version": "2.0.4",
-      "resolved": "https://registry.npmjs.org/npm-path/-/npm-path-2.0.4.tgz",
-      "integrity": "sha512-IFsj0R9C7ZdR5cP+ET342q77uSRdtWOlWpih5eC+lu29tIDbNEgDbzgVJ5UFvYHWhxDZ5TFkJafFioO0pPQjCw==",
+    "npm-pick-manifest": {
+      "version": "2.2.3",
+      "resolved": "https://registry.npmjs.org/npm-pick-manifest/-/npm-pick-manifest-2.2.3.tgz",
+      "integrity": "sha512-+IluBC5K201+gRU85vFlUwX3PFShZAbAgDNp2ewJdWMVSppdo/Zih0ul2Ecky/X7b51J7LrrUAP+XOmOCvYZqA==",
       "dev": true,
       "requires": {
-        "which": "^1.2.10"
+        "figgy-pudding": "^3.5.1",
+        "npm-package-arg": "^6.0.0",
+        "semver": "^5.4.1"
       }
     },
-    "npm-pick-manifest": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/npm-pick-manifest/-/npm-pick-manifest-2.1.0.tgz",
-      "integrity": "sha512-q9zLP8cTr8xKPmMZN3naxp1k/NxVFsjxN6uWuO1tiw9gxg7wZWQ/b5UTfzD0ANw2q1lQxdLKTeCCksq+bPSgbQ==",
+    "npm-profile": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/npm-profile/-/npm-profile-4.0.1.tgz",
+      "integrity": "sha512-NQ1I/1Q7YRtHZXkcuU1/IyHeLy6pd+ScKg4+DQHdfsm769TGq6HPrkbuNJVJS4zwE+0mvvmeULzQdWn2L2EsVA==",
       "dev": true,
       "requires": {
-        "npm-package-arg": "^6.0.0",
-        "semver": "^5.4.1"
+        "aproba": "^1.1.2 || 2",
+        "figgy-pudding": "^3.4.1",
+        "npm-registry-fetch": "^3.8.0"
       }
     },
     "npm-registry-fetch": {
-      "version": "3.8.0",
-      "resolved": "https://registry.npmjs.org/npm-registry-fetch/-/npm-registry-fetch-3.8.0.tgz",
-      "integrity": "sha512-hrw8UMD+Nob3Kl3h8Z/YjmKamb1gf7D1ZZch2otrIXM3uFLB5vjEY6DhMlq80z/zZet6eETLbOXcuQudCB3Zpw==",
+      "version": "3.9.0",
+      "resolved": "https://registry.npmjs.org/npm-registry-fetch/-/npm-registry-fetch-3.9.0.tgz",
+      "integrity": "sha512-srwmt8YhNajAoSAaDWndmZgx89lJwIZ1GWxOuckH4Coek4uHv5S+o/l9FLQe/awA+JwTnj4FJHldxhlXdZEBmw==",
       "dev": true,
       "requires": {
         "JSONStream": "^1.3.4",
@@ -10768,17 +9724,17 @@
       }
     },
     "npm-run-all": {
-      "version": "4.1.3",
-      "resolved": "https://registry.npmjs.org/npm-run-all/-/npm-run-all-4.1.3.tgz",
-      "integrity": "sha512-aOG0N3Eo/WW+q6sUIdzcV2COS8VnTZCmdji0VQIAZF3b+a3YWb0AD0vFIyjKec18A7beLGbaQ5jFTNI2bPt9Cg==",
+      "version": "4.1.5",
+      "resolved": "https://registry.npmjs.org/npm-run-all/-/npm-run-all-4.1.5.tgz",
+      "integrity": "sha512-Oo82gJDAVcaMdi3nuoKFavkIHBRVqQ1qvMb+9LHk/cF4P6B2m8aP04hGf7oL6wZ9BuGwX1onlLhpuoofSyoQDQ==",
       "dev": true,
       "requires": {
-        "ansi-styles": "^3.2.0",
-        "chalk": "^2.1.0",
-        "cross-spawn": "^6.0.4",
+        "ansi-styles": "^3.2.1",
+        "chalk": "^2.4.1",
+        "cross-spawn": "^6.0.5",
         "memorystream": "^0.3.1",
         "minimatch": "^3.0.4",
-        "ps-tree": "^1.1.0",
+        "pidtree": "^0.3.0",
         "read-pkg": "^3.0.0",
         "shell-quote": "^1.6.1",
         "string.prototype.padend": "^3.0.0"
@@ -10856,17 +9812,6 @@
         "path-key": "^2.0.0"
       }
     },
-    "npm-which": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/npm-which/-/npm-which-3.0.1.tgz",
-      "integrity": "sha1-kiXybsOihcIJyuZ8OxGmtKtxQKo=",
-      "dev": true,
-      "requires": {
-        "commander": "^2.9.0",
-        "npm-path": "^2.0.2",
-        "which": "^1.2.10"
-      }
-    },
     "npmlog": {
       "version": "4.1.2",
       "resolved": "https://registry.npmjs.org/npmlog/-/npmlog-4.1.2.tgz",
@@ -10922,6 +9867,15 @@
           "requires": {
             "is-descriptor": "^0.1.0"
           }
+        },
+        "kind-of": {
+          "version": "3.2.2",
+          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
+          "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
+          "dev": true,
+          "requires": {
+            "is-buffer": "^1.1.5"
+          }
         }
       }
     },
@@ -10938,14 +9892,6 @@
       "dev": true,
       "requires": {
         "isobject": "^3.0.0"
-      },
-      "dependencies": {
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
-        }
       }
     },
     "object.assign": {
@@ -10970,23 +9916,6 @@
         "array-slice": "^1.0.0",
         "for-own": "^1.0.0",
         "isobject": "^3.0.0"
-      },
-      "dependencies": {
-        "for-own": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/for-own/-/for-own-1.0.0.tgz",
-          "integrity": "sha1-xjMy9BXO3EsE2/5wz4NklMU8tEs=",
-          "dev": true,
-          "requires": {
-            "for-in": "^1.0.1"
-          }
-        },
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
-        }
       }
     },
     "object.getownpropertydescriptors": {
@@ -11007,17 +9936,6 @@
       "requires": {
         "for-own": "^1.0.0",
         "make-iterator": "^1.0.0"
-      },
-      "dependencies": {
-        "for-own": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/for-own/-/for-own-1.0.0.tgz",
-          "integrity": "sha1-xjMy9BXO3EsE2/5wz4NklMU8tEs=",
-          "dev": true,
-          "requires": {
-            "for-in": "^1.0.1"
-          }
-        }
       }
     },
     "object.omit": {
@@ -11028,6 +9946,17 @@
       "requires": {
         "for-own": "^0.1.4",
         "is-extendable": "^0.1.1"
+      },
+      "dependencies": {
+        "for-own": {
+          "version": "0.1.5",
+          "resolved": "https://registry.npmjs.org/for-own/-/for-own-0.1.5.tgz",
+          "integrity": "sha1-UmXGgaTylNq78XyVCbZ2OqhFEM4=",
+          "dev": true,
+          "requires": {
+            "for-in": "^1.0.1"
+          }
+        }
       }
     },
     "object.pick": {
@@ -11037,14 +9966,6 @@
       "dev": true,
       "requires": {
         "isobject": "^3.0.1"
-      },
-      "dependencies": {
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
-        }
       }
     },
     "object.reduce": {
@@ -11055,17 +9976,6 @@
       "requires": {
         "for-own": "^1.0.0",
         "make-iterator": "^1.0.0"
-      },
-      "dependencies": {
-        "for-own": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/for-own/-/for-own-1.0.0.tgz",
-          "integrity": "sha1-xjMy9BXO3EsE2/5wz4NklMU8tEs=",
-          "dev": true,
-          "requires": {
-            "for-in": "^1.0.1"
-          }
-        }
       }
     },
     "once": {
@@ -11098,7 +10008,7 @@
       "dependencies": {
         "minimist": {
           "version": "0.0.10",
-          "resolved": "http://registry.npmjs.org/minimist/-/minimist-0.0.10.tgz",
+          "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.10.tgz",
           "integrity": "sha1-3j+YVD2/lggr5IrRoMfNqDYwHc8=",
           "dev": true
         }
@@ -11149,7 +10059,7 @@
     },
     "os-locale": {
       "version": "1.4.0",
-      "resolved": "http://registry.npmjs.org/os-locale/-/os-locale-1.4.0.tgz",
+      "resolved": "https://registry.npmjs.org/os-locale/-/os-locale-1.4.0.tgz",
       "integrity": "sha1-IPnxeuKe00XoveWDsT0gCYA8FNk=",
       "dev": true,
       "requires": {
@@ -11186,7 +10096,7 @@
     },
     "p-is-promise": {
       "version": "1.1.0",
-      "resolved": "http://registry.npmjs.org/p-is-promise/-/p-is-promise-1.1.0.tgz",
+      "resolved": "https://registry.npmjs.org/p-is-promise/-/p-is-promise-1.1.0.tgz",
       "integrity": "sha1-nJRWmJ6fZYgBewQ01WCXZ1w9oF4=",
       "dev": true
     },
@@ -11251,67 +10161,103 @@
       }
     },
     "pacote": {
-      "version": "9.1.0",
-      "resolved": "https://registry.npmjs.org/pacote/-/pacote-9.1.0.tgz",
-      "integrity": "sha512-AFXaSWhOtQf3jHqEvg+ZYH/dfT8TKq6TKspJ4qEFwVVuh5aGvMIk6SNF8vqfzz+cBceDIs9drOcpBbrPai7i+g==",
+      "version": "9.4.1",
+      "resolved": "https://registry.npmjs.org/pacote/-/pacote-9.4.1.tgz",
+      "integrity": "sha512-YKSRsQqmeHxgra0KCdWA2FtVxDPUlBiCdmew+mSe44pzlx5t1ViRMWiQg18T+DREA+vSqYfKzynaToFR4hcKHw==",
       "dev": true,
       "requires": {
-        "bluebird": "^3.5.1",
-        "cacache": "^11.0.2",
-        "figgy-pudding": "^3.2.1",
-        "get-stream": "^3.0.0",
-        "glob": "^7.1.2",
-        "lru-cache": "^4.1.3",
+        "bluebird": "^3.5.3",
+        "cacache": "^11.3.2",
+        "figgy-pudding": "^3.5.1",
+        "get-stream": "^4.1.0",
+        "glob": "^7.1.3",
+        "lru-cache": "^5.1.1",
         "make-fetch-happen": "^4.0.1",
         "minimatch": "^3.0.4",
-        "minipass": "^2.3.3",
+        "minipass": "^2.3.5",
         "mississippi": "^3.0.0",
         "mkdirp": "^0.5.1",
         "normalize-package-data": "^2.4.0",
         "npm-package-arg": "^6.1.0",
-        "npm-packlist": "^1.1.10",
-        "npm-pick-manifest": "^2.1.0",
-        "npm-registry-fetch": "^3.0.0",
+        "npm-packlist": "^1.1.12",
+        "npm-pick-manifest": "^2.2.3",
+        "npm-registry-fetch": "^3.8.0",
         "osenv": "^0.1.5",
         "promise-inflight": "^1.0.1",
         "promise-retry": "^1.1.1",
-        "protoduck": "^5.0.0",
+        "protoduck": "^5.0.1",
         "rimraf": "^2.6.2",
         "safe-buffer": "^5.1.2",
-        "semver": "^5.5.0",
-        "ssri": "^6.0.0",
-        "tar": "^4.4.3",
-        "unique-filename": "^1.1.0",
-        "which": "^1.3.0"
+        "semver": "^5.6.0",
+        "ssri": "^6.0.1",
+        "tar": "^4.4.8",
+        "unique-filename": "^1.1.1",
+        "which": "^1.3.1"
       },
       "dependencies": {
+        "get-stream": {
+          "version": "4.1.0",
+          "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz",
+          "integrity": "sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==",
+          "dev": true,
+          "requires": {
+            "pump": "^3.0.0"
+          }
+        },
+        "lru-cache": {
+          "version": "5.1.1",
+          "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz",
+          "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==",
+          "dev": true,
+          "requires": {
+            "yallist": "^3.0.2"
+          }
+        },
+        "pump": {
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
+          "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
+          "dev": true,
+          "requires": {
+            "end-of-stream": "^1.1.0",
+            "once": "^1.3.1"
+          }
+        },
         "tar": {
-          "version": "4.4.6",
-          "resolved": "https://registry.npmjs.org/tar/-/tar-4.4.6.tgz",
-          "integrity": "sha512-tMkTnh9EdzxyfW+6GK6fCahagXsnYk6kE6S9Gr9pjVdys769+laCTbodXDhPAjzVtEBazRgP0gYqOjnk9dQzLg==",
+          "version": "4.4.8",
+          "resolved": "https://registry.npmjs.org/tar/-/tar-4.4.8.tgz",
+          "integrity": "sha512-LzHF64s5chPQQS0IYBn9IN5h3i98c12bo4NCO7e0sGM2llXQ3p2FGC5sdENN4cTW48O915Sh+x+EXx7XW96xYQ==",
           "dev": true,
           "requires": {
-            "chownr": "^1.0.1",
+            "chownr": "^1.1.1",
             "fs-minipass": "^1.2.5",
-            "minipass": "^2.3.3",
-            "minizlib": "^1.1.0",
+            "minipass": "^2.3.4",
+            "minizlib": "^1.1.1",
             "mkdirp": "^0.5.0",
             "safe-buffer": "^5.1.2",
             "yallist": "^3.0.2"
           }
         },
         "yallist": {
-          "version": "3.0.2",
-          "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.0.2.tgz",
-          "integrity": "sha1-hFK0u36Dx8GI2AQcGoN8dz1ti7k=",
+          "version": "3.0.3",
+          "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.0.3.tgz",
+          "integrity": "sha512-S+Zk8DEWE6oKpV+vI3qWkaK+jSbIK86pCwe2IF/xwIpQ8jEuxpw9NyaGjmp9+BoJv5FV2piqCDcoCtStppiq2A==",
           "dev": true
         }
       }
     },
+    "pad-left": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/pad-left/-/pad-left-2.1.0.tgz",
+      "integrity": "sha1-FuajstRKjhOMsIOMx8tAOk/J6ZQ=",
+      "requires": {
+        "repeat-string": "^1.5.4"
+      }
+    },
     "pako": {
-      "version": "1.0.6",
-      "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.6.tgz",
-      "integrity": "sha512-lQe48YPsMJAig+yngZ87Lus+NF+3mtu7DVOBu6b/gHO1YpKwIj5AWjZ/TOS7i46HD/UixzWb1zeWDZfGZ3iYcg==",
+      "version": "1.0.8",
+      "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.8.tgz",
+      "integrity": "sha512-6i0HVbUfcKaTv+EG8ZTr75az7GFXcLYk9UyLEg7Notv/Ma+z/UG3TCoz6GiNeOrn1E/e63I0X/Hpw18jHOTUnA==",
       "dev": true
     },
     "parallel-transform": {
@@ -11326,16 +10272,17 @@
       }
     },
     "parse-asn1": {
-      "version": "5.1.1",
-      "resolved": "http://registry.npmjs.org/parse-asn1/-/parse-asn1-5.1.1.tgz",
-      "integrity": "sha512-KPx7flKXg775zZpnp9SxJlz00gTd4BmJ2yJufSc44gMCRrRQ7NSzAcSJQfifuOLgW6bEi+ftrALtsgALeB2Adw==",
+      "version": "5.1.3",
+      "resolved": "https://registry.npmjs.org/parse-asn1/-/parse-asn1-5.1.3.tgz",
+      "integrity": "sha512-VrPoetlz7B/FqjBLD2f5wBVZvsZVLnRUrxVLfRYhGXCODa/NWE4p3Wp+6+aV3ZPL3KM7/OZmxDIwwijD7yuucg==",
       "dev": true,
       "requires": {
         "asn1.js": "^4.0.0",
         "browserify-aes": "^1.0.0",
         "create-hash": "^1.1.0",
         "evp_bytestokey": "^1.0.0",
-        "pbkdf2": "^3.0.3"
+        "pbkdf2": "^3.0.3",
+        "safe-buffer": "^5.1.1"
       }
     },
     "parse-filepath": {
@@ -11365,6 +10312,23 @@
         "is-dotfile": "^1.0.0",
         "is-extglob": "^1.0.0",
         "is-glob": "^2.0.0"
+      },
+      "dependencies": {
+        "is-extglob": {
+          "version": "1.0.0",
+          "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-1.0.0.tgz",
+          "integrity": "sha1-rEaBd8SUNAWgkvyPKXYMb/xiBsA=",
+          "dev": true
+        },
+        "is-glob": {
+          "version": "2.0.1",
+          "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-2.0.1.tgz",
+          "integrity": "sha1-0Jb5JqPe1WAPP9/ZEZjLCIjC2GM=",
+          "dev": true,
+          "requires": {
+            "is-extglob": "^1.0.0"
+          }
+        }
       }
     },
     "parse-json": {
@@ -11376,6 +10340,12 @@
         "error-ex": "^1.2.0"
       }
     },
+    "parse-node-version": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/parse-node-version/-/parse-node-version-1.0.0.tgz",
+      "integrity": "sha512-02GTVHD1u0nWc20n2G7WX/PgdhNFG04j5fi1OkaJzPWLTcf6vh6229Lta1wTmXG/7Dg42tCssgkccVt7qvd8Kg==",
+      "dev": true
+    },
     "parse-passwd": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/parse-passwd/-/parse-passwd-1.0.0.tgz",
@@ -11407,10 +10377,13 @@
       "dev": true
     },
     "path-exists": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz",
-      "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=",
-      "dev": true
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-2.1.0.tgz",
+      "integrity": "sha1-D+tsZPD8UY2adU3V77YscCJ2H0s=",
+      "dev": true,
+      "requires": {
+        "pinkie-promise": "^2.0.0"
+      }
     },
     "path-is-absolute": {
       "version": "1.0.1",
@@ -11470,15 +10443,6 @@
         }
       }
     },
-    "pause-stream": {
-      "version": "0.0.11",
-      "resolved": "http://registry.npmjs.org/pause-stream/-/pause-stream-0.0.11.tgz",
-      "integrity": "sha1-/lo0sMvOErWqaitAPuLnO2AvFEU=",
-      "dev": true,
-      "requires": {
-        "through": "~2.3"
-      }
-    },
     "pbkdf2": {
       "version": "3.0.17",
       "resolved": "https://registry.npmjs.org/pbkdf2/-/pbkdf2-3.0.17.tgz",
@@ -11498,6 +10462,12 @@
       "integrity": "sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=",
       "dev": true
     },
+    "pidtree": {
+      "version": "0.3.0",
+      "resolved": "https://registry.npmjs.org/pidtree/-/pidtree-0.3.0.tgz",
+      "integrity": "sha512-9CT4NFlDcosssyg8KVFltgokyKZIFjoBxw8CTGy+5F38Y1eQWrt8tRayiUOXE+zVKQnYu5BR8JjCtvK3BcnBhg==",
+      "dev": true
+    },
     "pify": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/pify/-/pify-3.0.0.tgz",
@@ -11526,6 +10496,17 @@
       "dev": true,
       "requires": {
         "find-up": "^2.1.0"
+      },
+      "dependencies": {
+        "find-up": {
+          "version": "2.1.0",
+          "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz",
+          "integrity": "sha1-RdG35QbHF93UgndaK3eSCjwMV6c=",
+          "dev": true,
+          "requires": {
+            "locate-path": "^2.0.0"
+          }
+        }
       }
     },
     "platform": {
@@ -11534,15 +10515,6 @@
       "integrity": "sha512-TuvHS8AOIZNAlE77WUDiR4rySV/VMptyMfcfeoMgs4P8apaZM3JrnbzBiixKUv+XR6i+BXrQh8WAnjaSPFO65Q==",
       "dev": true
     },
-    "please-upgrade-node": {
-      "version": "3.1.1",
-      "resolved": "https://registry.npmjs.org/please-upgrade-node/-/please-upgrade-node-3.1.1.tgz",
-      "integrity": "sha512-KY1uHnQ2NlQHqIJQpnh/i54rKkuxCEBx+voJIS/Mvb+L2iYd2NMotwduhKTMjfC1uKoX3VXOxLjIYG66dfJTVQ==",
-      "dev": true,
-      "requires": {
-        "semver-compare": "^1.0.0"
-      }
-    },
     "plugin-error": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/plugin-error/-/plugin-error-1.0.1.tgz",
@@ -11553,23 +10525,6 @@
         "arr-diff": "^4.0.0",
         "arr-union": "^3.1.0",
         "extend-shallow": "^3.0.2"
-      },
-      "dependencies": {
-        "ansi-colors": {
-          "version": "1.1.0",
-          "resolved": "http://registry.npmjs.org/ansi-colors/-/ansi-colors-1.1.0.tgz",
-          "integrity": "sha512-SFKX67auSNoVR38N3L+nvsPjOE0bybKTYbkf5tRvushrAPQ9V75huw0ZxBkKVeRU9kqH3d6HA4xTckbwZ4ixmA==",
-          "dev": true,
-          "requires": {
-            "ansi-wrap": "^0.1.0"
-          }
-        },
-        "arr-diff": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz",
-          "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=",
-          "dev": true
-        }
       }
     },
     "pn": {
@@ -11616,7 +10571,7 @@
     },
     "pretty-hrtime": {
       "version": "1.0.3",
-      "resolved": "http://registry.npmjs.org/pretty-hrtime/-/pretty-hrtime-1.0.3.tgz",
+      "resolved": "https://registry.npmjs.org/pretty-hrtime/-/pretty-hrtime-1.0.3.tgz",
       "integrity": "sha1-t+PqQkNaTJsnWdmeDyAesZWALuE=",
       "dev": true
     },
@@ -11633,15 +10588,15 @@
       "dev": true
     },
     "process-nextick-args": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.0.tgz",
-      "integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==",
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-1.0.7.tgz",
+      "integrity": "sha1-FQ4gt1ZZCtP5EJPyWk8q2L/zC6M=",
       "dev": true
     },
     "progress": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.1.tgz",
-      "integrity": "sha512-OE+a6vzqazc+K6LxJrX5UPyKFvGnL5CYmq2jFGNIBWHpc4QyE49/YOumcrpQFJpfejmvRtbJzgO1zPmMCqlbBg==",
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz",
+      "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==",
       "dev": true
     },
     "promise": {
@@ -11695,12 +10650,12 @@
       "dev": true
     },
     "protoduck": {
-      "version": "5.0.0",
-      "resolved": "https://registry.npmjs.org/protoduck/-/protoduck-5.0.0.tgz",
-      "integrity": "sha512-agsGWD8/RZrS4ga6v82Fxb0RHIS2RZnbsSue6A9/MBRhB/jcqOANAMNrqM9900b8duj+Gx+T/JMy5IowDoO/hQ==",
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/protoduck/-/protoduck-5.0.1.tgz",
+      "integrity": "sha512-WxoCeDCoCBY55BMvj4cAEjdVUFGRWed9ZxPlqTKYyw1nDDTQ4pqmnIMAGfJlg7Dx35uB/M+PHJPTmGOvaCaPTg==",
       "dev": true,
       "requires": {
-        "genfun": "^4.0.1"
+        "genfun": "^5.0.0"
       }
     },
     "prr": {
@@ -11709,15 +10664,6 @@
       "integrity": "sha1-0/wRS6BplaRexok/SEzrHXj19HY=",
       "dev": true
     },
-    "ps-tree": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/ps-tree/-/ps-tree-1.1.0.tgz",
-      "integrity": "sha1-tCGyQUDWID8e08dplrRCewjowBQ=",
-      "dev": true,
-      "requires": {
-        "event-stream": "~3.3.0"
-      }
-    },
     "pseudomap": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/pseudomap/-/pseudomap-1.0.2.tgz",
@@ -11725,9 +10671,9 @@
       "dev": true
     },
     "psl": {
-      "version": "1.1.29",
-      "resolved": "https://registry.npmjs.org/psl/-/psl-1.1.29.tgz",
-      "integrity": "sha512-AeUmQ0oLN02flVHXWh9sSJF7mcdFq0ppid/JkErufc3hGIV/AMa8Fo9VgDo/cT2jFdOWoFvHp90qqBH54W+gjQ==",
+      "version": "1.1.31",
+      "resolved": "https://registry.npmjs.org/psl/-/psl-1.1.31.tgz",
+      "integrity": "sha512-/6pt4+C+T+wZUieKR620OpzN/LlnNKuWjy1iFLQ/UG35JqHlR/89MP1d96dUfkf6Dne3TuLQzOYEYshJ+Hx8mw==",
       "dev": true
     },
     "public-encrypt": {
@@ -11745,9 +10691,9 @@
       }
     },
     "pump": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
-      "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/pump/-/pump-2.0.1.tgz",
+      "integrity": "sha512-ruPMNRkN3MHP1cWJc9OWr+T/xDP0jhXYCLfJcBuX54hhfIBnaQmAUMfDcG4DM5UMWByBbJY69QSphm3jtDKIkA==",
       "dev": true,
       "requires": {
         "end-of-stream": "^1.1.0",
@@ -11763,24 +10709,12 @@
         "duplexify": "^3.6.0",
         "inherits": "^2.0.3",
         "pump": "^2.0.0"
-      },
-      "dependencies": {
-        "pump": {
-          "version": "2.0.1",
-          "resolved": "https://registry.npmjs.org/pump/-/pump-2.0.1.tgz",
-          "integrity": "sha512-ruPMNRkN3MHP1cWJc9OWr+T/xDP0jhXYCLfJcBuX54hhfIBnaQmAUMfDcG4DM5UMWByBbJY69QSphm3jtDKIkA==",
-          "dev": true,
-          "requires": {
-            "end-of-stream": "^1.1.0",
-            "once": "^1.3.1"
-          }
-        }
       }
     },
     "punycode": {
-      "version": "1.4.1",
-      "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz",
-      "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=",
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz",
+      "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==",
       "dev": true
     },
     "q": {
@@ -11814,9 +10748,9 @@
       "dev": true
     },
     "randomatic": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/randomatic/-/randomatic-3.1.0.tgz",
-      "integrity": "sha512-KnGPVE0lo2WoXxIZ7cPR8YBpiol4gsSuOwDSg410oHh80ZMp5EiypNqL2K4Z77vJn6lB5rap7IkAmcUlalcnBQ==",
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/randomatic/-/randomatic-3.1.1.tgz",
+      "integrity": "sha512-TuDE5KxZ0J461RVjrJZCJc+J+zCkTb1MbH9AQUq68sMhOMcy9jLcb3BrZKgp9q9Ncltdg4QVqWrH02W2EFFVYw==",
       "dev": true,
       "requires": {
         "is-number": "^4.0.0",
@@ -11829,12 +10763,6 @@
           "resolved": "https://registry.npmjs.org/is-number/-/is-number-4.0.0.tgz",
           "integrity": "sha512-rSklcAIlf1OmFdyAqbnWTLVelsQ58uvZ66S/ZyawjWqIviTWCjg2PzVGw8WUA+nNuPTqb4wgA+NszrJ+08LlgQ==",
           "dev": true
-        },
-        "kind-of": {
-          "version": "6.0.2",
-          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz",
-          "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==",
-          "dev": true
         }
       }
     },
@@ -11920,32 +10848,11 @@
       "requires": {
         "find-up": "^1.0.0",
         "read-pkg": "^1.0.0"
-      },
-      "dependencies": {
-        "find-up": {
-          "version": "1.1.2",
-          "resolved": "https://registry.npmjs.org/find-up/-/find-up-1.1.2.tgz",
-          "integrity": "sha1-ay6YIrGizgpgq2TWEOzK1TyyTQ8=",
-          "dev": true,
-          "requires": {
-            "path-exists": "^2.0.0",
-            "pinkie-promise": "^2.0.0"
-          }
-        },
-        "path-exists": {
-          "version": "2.1.0",
-          "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-2.1.0.tgz",
-          "integrity": "sha1-D+tsZPD8UY2adU3V77YscCJ2H0s=",
-          "dev": true,
-          "requires": {
-            "pinkie-promise": "^2.0.0"
-          }
-        }
       }
     },
     "readable-stream": {
       "version": "2.3.6",
-      "resolved": "http://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz",
+      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz",
       "integrity": "sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw==",
       "dev": true,
       "requires": {
@@ -11956,6 +10863,14 @@
         "safe-buffer": "~5.1.1",
         "string_decoder": "~1.1.1",
         "util-deprecate": "~1.0.1"
+      },
+      "dependencies": {
+        "process-nextick-args": {
+          "version": "2.0.0",
+          "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.0.tgz",
+          "integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==",
+          "dev": true
+        }
       }
     },
     "readdir-scoped-modules": {
@@ -11979,282 +10894,6 @@
         "graceful-fs": "^4.1.11",
         "micromatch": "^3.1.10",
         "readable-stream": "^2.0.2"
-      },
-      "dependencies": {
-        "arr-diff": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz",
-          "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=",
-          "dev": true
-        },
-        "array-unique": {
-          "version": "0.3.2",
-          "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz",
-          "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=",
-          "dev": true
-        },
-        "braces": {
-          "version": "2.3.2",
-          "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz",
-          "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==",
-          "dev": true,
-          "requires": {
-            "arr-flatten": "^1.1.0",
-            "array-unique": "^0.3.2",
-            "extend-shallow": "^2.0.1",
-            "fill-range": "^4.0.0",
-            "isobject": "^3.0.1",
-            "repeat-element": "^1.1.2",
-            "snapdragon": "^0.8.1",
-            "snapdragon-node": "^2.0.1",
-            "split-string": "^3.0.2",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "expand-brackets": {
-          "version": "2.1.4",
-          "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz",
-          "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=",
-          "dev": true,
-          "requires": {
-            "debug": "^2.3.3",
-            "define-property": "^0.2.5",
-            "extend-shallow": "^2.0.1",
-            "posix-character-classes": "^0.1.0",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "define-property": {
-              "version": "0.2.5",
-              "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz",
-              "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=",
-              "dev": true,
-              "requires": {
-                "is-descriptor": "^0.1.0"
-              }
-            },
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            },
-            "is-accessor-descriptor": {
-              "version": "0.1.6",
-              "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz",
-              "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=",
-              "dev": true,
-              "requires": {
-                "kind-of": "^3.0.2"
-              },
-              "dependencies": {
-                "kind-of": {
-                  "version": "3.2.2",
-                  "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-                  "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-                  "dev": true,
-                  "requires": {
-                    "is-buffer": "^1.1.5"
-                  }
-                }
-              }
-            },
-            "is-data-descriptor": {
-              "version": "0.1.4",
-              "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz",
-              "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=",
-              "dev": true,
-              "requires": {
-                "kind-of": "^3.0.2"
-              },
-              "dependencies": {
-                "kind-of": {
-                  "version": "3.2.2",
-                  "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-                  "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-                  "dev": true,
-                  "requires": {
-                    "is-buffer": "^1.1.5"
-                  }
-                }
-              }
-            },
-            "is-descriptor": {
-              "version": "0.1.6",
-              "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz",
-              "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==",
-              "dev": true,
-              "requires": {
-                "is-accessor-descriptor": "^0.1.6",
-                "is-data-descriptor": "^0.1.4",
-                "kind-of": "^5.0.0"
-              }
-            },
-            "kind-of": {
-              "version": "5.1.0",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz",
-              "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==",
-              "dev": true
-            }
-          }
-        },
-        "extglob": {
-          "version": "2.0.4",
-          "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz",
-          "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==",
-          "dev": true,
-          "requires": {
-            "array-unique": "^0.3.2",
-            "define-property": "^1.0.0",
-            "expand-brackets": "^2.1.4",
-            "extend-shallow": "^2.0.1",
-            "fragment-cache": "^0.2.1",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "define-property": {
-              "version": "1.0.0",
-              "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz",
-              "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=",
-              "dev": true,
-              "requires": {
-                "is-descriptor": "^1.0.0"
-              }
-            },
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "fill-range": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz",
-          "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=",
-          "dev": true,
-          "requires": {
-            "extend-shallow": "^2.0.1",
-            "is-number": "^3.0.0",
-            "repeat-string": "^1.6.1",
-            "to-regex-range": "^2.1.0"
-          },
-          "dependencies": {
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "is-accessor-descriptor": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz",
-          "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==",
-          "dev": true,
-          "requires": {
-            "kind-of": "^6.0.0"
-          }
-        },
-        "is-data-descriptor": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz",
-          "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==",
-          "dev": true,
-          "requires": {
-            "kind-of": "^6.0.0"
-          }
-        },
-        "is-descriptor": {
-          "version": "1.0.2",
-          "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz",
-          "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==",
-          "dev": true,
-          "requires": {
-            "is-accessor-descriptor": "^1.0.0",
-            "is-data-descriptor": "^1.0.0",
-            "kind-of": "^6.0.2"
-          }
-        },
-        "is-number": {
-          "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz",
-          "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=",
-          "dev": true,
-          "requires": {
-            "kind-of": "^3.0.2"
-          },
-          "dependencies": {
-            "kind-of": {
-              "version": "3.2.2",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-              "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-              "dev": true,
-              "requires": {
-                "is-buffer": "^1.1.5"
-              }
-            }
-          }
-        },
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
-        },
-        "kind-of": {
-          "version": "6.0.2",
-          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz",
-          "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==",
-          "dev": true
-        },
-        "micromatch": {
-          "version": "3.1.10",
-          "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz",
-          "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==",
-          "dev": true,
-          "requires": {
-            "arr-diff": "^4.0.0",
-            "array-unique": "^0.3.2",
-            "braces": "^2.3.1",
-            "define-property": "^2.0.2",
-            "extend-shallow": "^3.0.2",
-            "extglob": "^2.0.4",
-            "fragment-cache": "^0.2.1",
-            "kind-of": "^6.0.2",
-            "nanomatch": "^1.2.9",
-            "object.pick": "^1.3.0",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.2"
-          }
-        }
       }
     },
     "realpath-native": {
@@ -12351,8 +10990,7 @@
     "repeat-string": {
       "version": "1.6.1",
       "resolved": "https://registry.npmjs.org/repeat-string/-/repeat-string-1.6.1.tgz",
-      "integrity": "sha1-jcrkcOHIirwtYA//Sndihtp15jc=",
-      "dev": true
+      "integrity": "sha1-jcrkcOHIirwtYA//Sndihtp15jc="
     },
     "repeating": {
       "version": "2.0.1",
@@ -12441,12 +11079,12 @@
       "dev": true
     },
     "resolve": {
-      "version": "1.8.1",
-      "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.8.1.tgz",
-      "integrity": "sha512-AicPrAC7Qu1JxPCZ9ZgCZlY35QgFnNqc+0LtbRNxnVw4TXvjQ72wnuL9JQcEBgXkI9JM8MsT9kaQoHcpCRJOYA==",
+      "version": "1.10.0",
+      "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.10.0.tgz",
+      "integrity": "sha512-3sUr9aq5OfSg2S9pNtPA9hL1FVEAjvfOC4leW0SNf/mpnaakz2a9femSd6LqAww2RaFctwyf1lCqnTHuF1rxDg==",
       "dev": true,
       "requires": {
-        "path-parse": "^1.0.5"
+        "path-parse": "^1.0.6"
       }
     },
     "resolve-cwd": {
@@ -12512,12 +11150,12 @@
       "dev": true
     },
     "rimraf": {
-      "version": "2.6.2",
-      "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.6.2.tgz",
-      "integrity": "sha512-lreewLK/BlghmxtfH36YYVg1i8IAce4TI7oao75I1g245+6BctqTVQiBP3YUJ9C6DQOXJmkYR9X9fCLtCOJc5w==",
+      "version": "2.6.3",
+      "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.6.3.tgz",
+      "integrity": "sha512-mwqeW5XsA2qAejG46gYdENaxXjx9onRNCfn7L0duuP4hCuTIi/QO7PDK07KJfp1d+izWPrzEJDcSqBa0OZQriA==",
       "dev": true,
       "requires": {
-        "glob": "^7.0.5"
+        "glob": "^7.1.3"
       }
     },
     "ripemd160": {
@@ -12600,14 +11238,6 @@
       "dev": true,
       "requires": {
         "symbol-observable": "1.0.1"
-      },
-      "dependencies": {
-        "symbol-observable": {
-          "version": "1.0.1",
-          "resolved": "https://registry.npmjs.org/symbol-observable/-/symbol-observable-1.0.1.tgz",
-          "integrity": "sha1-g0D8RwLDEi310iKI+IKD9RPT/dQ=",
-          "dev": true
-        }
       }
     },
     "safe-buffer": {
@@ -12646,292 +11276,6 @@
         "minimist": "^1.1.1",
         "walker": "~1.0.5",
         "watch": "~0.18.0"
-      },
-      "dependencies": {
-        "anymatch": {
-          "version": "2.0.0",
-          "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-2.0.0.tgz",
-          "integrity": "sha512-5teOsQWABXHHBFP9y3skS5P3d/WfWXpv3FUpy+LorMrNYaT9pI4oLMQX7jzQ2KklNpGpWHzdCXTDT2Y3XGlZBw==",
-          "dev": true,
-          "requires": {
-            "micromatch": "^3.1.4",
-            "normalize-path": "^2.1.1"
-          }
-        },
-        "arr-diff": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz",
-          "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=",
-          "dev": true
-        },
-        "array-unique": {
-          "version": "0.3.2",
-          "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz",
-          "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=",
-          "dev": true
-        },
-        "braces": {
-          "version": "2.3.2",
-          "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz",
-          "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==",
-          "dev": true,
-          "requires": {
-            "arr-flatten": "^1.1.0",
-            "array-unique": "^0.3.2",
-            "extend-shallow": "^2.0.1",
-            "fill-range": "^4.0.0",
-            "isobject": "^3.0.1",
-            "repeat-element": "^1.1.2",
-            "snapdragon": "^0.8.1",
-            "snapdragon-node": "^2.0.1",
-            "split-string": "^3.0.2",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "expand-brackets": {
-          "version": "2.1.4",
-          "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz",
-          "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=",
-          "dev": true,
-          "requires": {
-            "debug": "^2.3.3",
-            "define-property": "^0.2.5",
-            "extend-shallow": "^2.0.1",
-            "posix-character-classes": "^0.1.0",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "define-property": {
-              "version": "0.2.5",
-              "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz",
-              "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=",
-              "dev": true,
-              "requires": {
-                "is-descriptor": "^0.1.0"
-              }
-            },
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            },
-            "is-accessor-descriptor": {
-              "version": "0.1.6",
-              "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz",
-              "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=",
-              "dev": true,
-              "requires": {
-                "kind-of": "^3.0.2"
-              },
-              "dependencies": {
-                "kind-of": {
-                  "version": "3.2.2",
-                  "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-                  "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-                  "dev": true,
-                  "requires": {
-                    "is-buffer": "^1.1.5"
-                  }
-                }
-              }
-            },
-            "is-data-descriptor": {
-              "version": "0.1.4",
-              "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz",
-              "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=",
-              "dev": true,
-              "requires": {
-                "kind-of": "^3.0.2"
-              },
-              "dependencies": {
-                "kind-of": {
-                  "version": "3.2.2",
-                  "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-                  "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-                  "dev": true,
-                  "requires": {
-                    "is-buffer": "^1.1.5"
-                  }
-                }
-              }
-            },
-            "is-descriptor": {
-              "version": "0.1.6",
-              "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz",
-              "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==",
-              "dev": true,
-              "requires": {
-                "is-accessor-descriptor": "^0.1.6",
-                "is-data-descriptor": "^0.1.4",
-                "kind-of": "^5.0.0"
-              }
-            },
-            "kind-of": {
-              "version": "5.1.0",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz",
-              "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==",
-              "dev": true
-            }
-          }
-        },
-        "extglob": {
-          "version": "2.0.4",
-          "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz",
-          "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==",
-          "dev": true,
-          "requires": {
-            "array-unique": "^0.3.2",
-            "define-property": "^1.0.0",
-            "expand-brackets": "^2.1.4",
-            "extend-shallow": "^2.0.1",
-            "fragment-cache": "^0.2.1",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "define-property": {
-              "version": "1.0.0",
-              "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz",
-              "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=",
-              "dev": true,
-              "requires": {
-                "is-descriptor": "^1.0.0"
-              }
-            },
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "fill-range": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz",
-          "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=",
-          "dev": true,
-          "requires": {
-            "extend-shallow": "^2.0.1",
-            "is-number": "^3.0.0",
-            "repeat-string": "^1.6.1",
-            "to-regex-range": "^2.1.0"
-          },
-          "dependencies": {
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "is-accessor-descriptor": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz",
-          "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==",
-          "dev": true,
-          "requires": {
-            "kind-of": "^6.0.0"
-          }
-        },
-        "is-data-descriptor": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz",
-          "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==",
-          "dev": true,
-          "requires": {
-            "kind-of": "^6.0.0"
-          }
-        },
-        "is-descriptor": {
-          "version": "1.0.2",
-          "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz",
-          "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==",
-          "dev": true,
-          "requires": {
-            "is-accessor-descriptor": "^1.0.0",
-            "is-data-descriptor": "^1.0.0",
-            "kind-of": "^6.0.2"
-          }
-        },
-        "is-number": {
-          "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz",
-          "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=",
-          "dev": true,
-          "requires": {
-            "kind-of": "^3.0.2"
-          },
-          "dependencies": {
-            "kind-of": {
-              "version": "3.2.2",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-              "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-              "dev": true,
-              "requires": {
-                "is-buffer": "^1.1.5"
-              }
-            }
-          }
-        },
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
-        },
-        "kind-of": {
-          "version": "6.0.2",
-          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz",
-          "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==",
-          "dev": true
-        },
-        "micromatch": {
-          "version": "3.1.10",
-          "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz",
-          "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==",
-          "dev": true,
-          "requires": {
-            "arr-diff": "^4.0.0",
-            "array-unique": "^0.3.2",
-            "braces": "^2.3.1",
-            "define-property": "^2.0.2",
-            "extend-shallow": "^3.0.2",
-            "extglob": "^2.0.4",
-            "fragment-cache": "^0.2.1",
-            "kind-of": "^6.0.2",
-            "nanomatch": "^1.2.9",
-            "object.pick": "^1.3.0",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.2"
-          }
-        }
       }
     },
     "sax": {
@@ -12949,32 +11293,6 @@
         "ajv": "^6.1.0",
         "ajv-errors": "^1.0.0",
         "ajv-keywords": "^3.1.0"
-      },
-      "dependencies": {
-        "ajv": {
-          "version": "6.5.4",
-          "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.5.4.tgz",
-          "integrity": "sha512-4Wyjt8+t6YszqaXnLDfMmG/8AlO5Zbcsy3ATHncCzjW/NoPzAId8AK6749Ybjmdt+kUY1gP60fCu46oDxPv/mg==",
-          "dev": true,
-          "requires": {
-            "fast-deep-equal": "^2.0.1",
-            "fast-json-stable-stringify": "^2.0.0",
-            "json-schema-traverse": "^0.4.1",
-            "uri-js": "^4.2.2"
-          }
-        },
-        "fast-deep-equal": {
-          "version": "2.0.1",
-          "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz",
-          "integrity": "sha1-ewUhjd+WZ79/Nwv3/bLLFf3Qqkk=",
-          "dev": true
-        },
-        "json-schema-traverse": {
-          "version": "0.4.1",
-          "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
-          "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==",
-          "dev": true
-        }
       }
     },
     "semver": {
@@ -12983,12 +11301,6 @@
       "integrity": "sha512-RS9R6R35NYgQn++fkDWaOmqGoj4Ek9gGs+DPxNUZKuwE183xjJroKvyo1IzVFeXvUrvmALy6FWD5xrdJT25gMg==",
       "dev": true
     },
-    "semver-compare": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/semver-compare/-/semver-compare-1.0.0.tgz",
-      "integrity": "sha1-De4hahyUGrN+nvsXiPavxf9VN/w=",
-      "dev": true
-    },
     "semver-greatest-satisfied-range": {
       "version": "1.1.0",
       "resolved": "https://registry.npmjs.org/semver-greatest-satisfied-range/-/semver-greatest-satisfied-range-1.1.0.tgz",
@@ -12999,9 +11311,9 @@
       }
     },
     "serialize-javascript": {
-      "version": "1.5.0",
-      "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-1.5.0.tgz",
-      "integrity": "sha512-Ga8c8NjAAp46Br4+0oZ2WxJCwIzwP60Gq1YPgU+39PiTVxyed/iKE/zyZI6+UlVYH5Q4PaQdHhcegIFPZTUfoQ==",
+      "version": "1.6.1",
+      "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-1.6.1.tgz",
+      "integrity": "sha512-A5MOagrPFga4YaKQSWHryl7AXvbQkEqpw4NNYMTNYUNV51bA8ABHgYFpqKx+YFFrw59xMV1qGH1R4AgoNIVgCw==",
       "dev": true
     },
     "set-blocking": {
@@ -13041,7 +11353,7 @@
     },
     "sha.js": {
       "version": "2.4.11",
-      "resolved": "http://registry.npmjs.org/sha.js/-/sha.js-2.4.11.tgz",
+      "resolved": "https://registry.npmjs.org/sha.js/-/sha.js-2.4.11.tgz",
       "integrity": "sha512-QMEp5B7cftE7APOjk5Y6xgrbWu+WkLVQwk8JNjZ8nKRciZaByEW6MubieAiToS7+dwvrjGhH8jRXz3MVd0AYqQ==",
       "dev": true,
       "requires": {
@@ -13077,9 +11389,9 @@
       }
     },
     "shelljs": {
-      "version": "0.8.2",
-      "resolved": "https://registry.npmjs.org/shelljs/-/shelljs-0.8.2.tgz",
-      "integrity": "sha512-pRXeNrCA2Wd9itwhvLp5LZQvPJ0wU6bcjaTMywHHGX5XWhVN2nzSu7WV0q+oUY7mGK3mgSkDDzP3MgjqdyIgbQ==",
+      "version": "0.8.3",
+      "resolved": "https://registry.npmjs.org/shelljs/-/shelljs-0.8.3.tgz",
+      "integrity": "sha512-fc0BKlAWiLpwZljmOvAOTE/gXawtCoNrP5oaY7KIaQbbyHeQVg01pSEuEGvGh3HEdBU4baCD7wQBwADmM/7f7A==",
       "dev": true,
       "requires": {
         "glob": "^7.0.0",
@@ -13122,12 +11434,6 @@
       "integrity": "sha1-xB8vbDn8FtHNF61LXYlhFK5HDVU=",
       "dev": true
     },
-    "slice-ansi": {
-      "version": "0.0.4",
-      "resolved": "http://registry.npmjs.org/slice-ansi/-/slice-ansi-0.0.4.tgz",
-      "integrity": "sha1-7b+JA/ZvfOL46v1s7tZeJkyDGzU=",
-      "dev": true
-    },
     "slide": {
       "version": "1.1.6",
       "resolved": "https://registry.npmjs.org/slide/-/slide-1.1.6.tgz",
@@ -13135,9 +11441,9 @@
       "dev": true
     },
     "smart-buffer": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.0.1.tgz",
-      "integrity": "sha512-RFqinRVJVcCAL9Uh1oVqE6FZkqsyLiVOYEZ20TqIOjuX7iFVJ+zsbs4RIghnw/pTs7mZvt8ZHhvm1ZUrR4fykg==",
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.0.2.tgz",
+      "integrity": "sha512-JDhEpTKzXusOqXZ0BUIdH+CjFdO/CR3tLlf5CN34IypI+xMmXW1uB16OOY8z3cICbJlDAVJzNbwBhNO0wt9OAw==",
       "dev": true
     },
     "snapdragon": {
@@ -13224,18 +11530,6 @@
             "is-data-descriptor": "^1.0.0",
             "kind-of": "^6.0.2"
           }
-        },
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
-        },
-        "kind-of": {
-          "version": "6.0.2",
-          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz",
-          "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==",
-          "dev": true
         }
       }
     },
@@ -13246,16 +11540,27 @@
       "dev": true,
       "requires": {
         "kind-of": "^3.2.0"
+      },
+      "dependencies": {
+        "kind-of": {
+          "version": "3.2.2",
+          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
+          "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
+          "dev": true,
+          "requires": {
+            "is-buffer": "^1.1.5"
+          }
+        }
       }
     },
     "socks": {
-      "version": "2.2.1",
-      "resolved": "https://registry.npmjs.org/socks/-/socks-2.2.1.tgz",
-      "integrity": "sha512-0GabKw7n9mI46vcNrVfs0o6XzWzjVa3h6GaSo2UPxtWAROXUWavfJWh1M4PR5tnE0dcnQXZIDFP4yrAysLze/w==",
+      "version": "2.2.3",
+      "resolved": "https://registry.npmjs.org/socks/-/socks-2.2.3.tgz",
+      "integrity": "sha512-+2r83WaRT3PXYoO/1z+RDEBE7Z2f9YcdQnJ0K/ncXXbV5gJ6wYfNAebYFYiiUjM6E4JyXnPY8cimwyvFYHVUUA==",
       "dev": true,
       "requires": {
         "ip": "^1.1.5",
-        "smart-buffer": "^4.0.1"
+        "smart-buffer": "4.0.2"
       }
     },
     "socks-proxy-agent": {
@@ -13334,9 +11639,9 @@
       "dev": true
     },
     "spdx-correct": {
-      "version": "3.0.2",
-      "resolved": "https://registry.npmjs.org/spdx-correct/-/spdx-correct-3.0.2.tgz",
-      "integrity": "sha512-q9hedtzyXHr5S0A1vEPoK/7l8NpfkFYTq6iCY+Pno2ZbdZR6WexZFtqeVGkGxW3TEJMN914Z55EnAGMmenlIQQ==",
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/spdx-correct/-/spdx-correct-3.1.0.tgz",
+      "integrity": "sha512-lr2EZCctC2BNR7j7WzJ2FpDznxky1sjfxvvYEyzxNyb6lZXHODmEoJeFu4JupYlkfha1KZpJyoqiJ7pgA1qq8Q==",
       "dev": true,
       "requires": {
         "spdx-expression-parse": "^3.0.0",
@@ -13360,9 +11665,9 @@
       }
     },
     "spdx-license-ids": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/spdx-license-ids/-/spdx-license-ids-3.0.1.tgz",
-      "integrity": "sha512-TfOfPcYGBB5sDuPn3deByxPhmfegAhpDYKSOXZQN81Oyrrif8ZCodOLzK3AesELnCx03kikhyDwh0pfvvQvF8w==",
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/spdx-license-ids/-/spdx-license-ids-3.0.3.tgz",
+      "integrity": "sha512-uBIcIl3Ih6Phe3XHK1NqboJLdGfwr1UN3k6wSD1dZpmPsIkb8AGNbZYJ1fOBk834+Gxy8rpfDxrS6XLEMZMY2g==",
       "dev": true
     },
     "split": {
@@ -13399,9 +11704,9 @@
       "dev": true
     },
     "sshpk": {
-      "version": "1.15.1",
-      "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.15.1.tgz",
-      "integrity": "sha512-mSdgNUaidk+dRU5MhYtN9zebdzF2iG0cNPWy8HG+W8y+fT1JnSkh0fzzpjOa0L7P8i1Rscz38t0h4gPcKz43xA==",
+      "version": "1.16.1",
+      "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.16.1.tgz",
+      "integrity": "sha512-HXXqVUq7+pcKeLqqZj6mHFUMvXtOJt1uoUx09pFW6011inTMxqI8BA8PM95myrIyyKwdnzjdFjLiE6KBPVtJIg==",
       "dev": true,
       "requires": {
         "asn1": "~0.2.3",
@@ -13431,15 +11736,9 @@
       "dev": true
     },
     "stack-utils": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-1.0.1.tgz",
-      "integrity": "sha1-1PM6tU6OOHeLDKXP07OvsS22hiA=",
-      "dev": true
-    },
-    "staged-git-files": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/staged-git-files/-/staged-git-files-1.1.1.tgz",
-      "integrity": "sha512-H89UNKr1rQJvI1c/PIR3kiAMBV23yvR7LItZiV74HWZwzt7f3YHuujJ9nJZlt58WlFox7XQsOahexwk7nTe69A==",
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-1.0.2.tgz",
+      "integrity": "sha512-MTX+MeG5U994cazkjd/9KNAapsHnibjMLnfXodlkXw76JEea0UiNzrqidzo1emMwk7w5Qhc9jd4Bn9TBb1MFwA==",
       "dev": true
     },
     "static-extend": {
@@ -13470,25 +11769,15 @@
       "dev": true
     },
     "stream-browserify": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/stream-browserify/-/stream-browserify-2.0.1.tgz",
-      "integrity": "sha1-ZiZu5fm9uZQKTkUUyvtDu3Hlyds=",
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/stream-browserify/-/stream-browserify-2.0.2.tgz",
+      "integrity": "sha512-nX6hmklHs/gr2FuxYDltq8fJA1GDlxKQCz8O/IM4atRqBH8OORmBNgfvW5gG10GT/qQ9u0CzIvr2X5Pkt6ntqg==",
       "dev": true,
       "requires": {
         "inherits": "~2.0.1",
         "readable-stream": "^2.0.2"
       }
     },
-    "stream-combiner": {
-      "version": "0.2.2",
-      "resolved": "http://registry.npmjs.org/stream-combiner/-/stream-combiner-0.2.2.tgz",
-      "integrity": "sha1-rsjLrBd7Vrb0+kec7YwZEs7lKFg=",
-      "dev": true,
-      "requires": {
-        "duplexer": "~0.1.1",
-        "through": "~2.3.4"
-      }
-    },
     "stream-each": {
       "version": "1.2.3",
       "resolved": "https://registry.npmjs.org/stream-each/-/stream-each-1.2.3.tgz",
@@ -13524,12 +11813,6 @@
       "integrity": "sha1-1cdSgl5TZ+eG944Y5EXqIjoVWVI=",
       "dev": true
     },
-    "string-argv": {
-      "version": "0.0.2",
-      "resolved": "https://registry.npmjs.org/string-argv/-/string-argv-0.0.2.tgz",
-      "integrity": "sha1-2sMECGkMIfPDYwo/86BYd73L1zY=",
-      "dev": true
-    },
     "string-length": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/string-length/-/string-length-2.0.0.tgz",
@@ -13588,20 +11871,15 @@
         "safe-buffer": "~5.1.0"
       }
     },
-    "stringify-object": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/stringify-object/-/stringify-object-3.3.0.tgz",
-      "integrity": "sha512-rHqiFh1elqCQ9WPLIC8I0Q/g/wj5J1eMkyoiD6eoQApWHP0FtlK7rqnhmabL5VUY9JQCcqwwvlOaSuutekgyrw==",
-      "dev": true,
-      "requires": {
-        "get-own-enumerable-property-symbols": "^3.0.0",
-        "is-obj": "^1.0.1",
-        "is-regexp": "^1.0.0"
-      }
-    },
+    "stringify-package": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/stringify-package/-/stringify-package-1.0.0.tgz",
+      "integrity": "sha512-JIQqiWmLiEozOC0b0BtxZ/AOUtdUZHCBPgqIZ2kSJJqGwgb9neo44XdTHUC4HZSGqi03hOeB7W/E8rAlKnGe9g==",
+      "dev": true
+    },
     "strip-ansi": {
       "version": "3.0.1",
-      "resolved": "http://registry.npmjs.org/strip-ansi/-/strip-ansi-3.0.1.tgz",
+      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-3.0.1.tgz",
       "integrity": "sha1-ajhfuIU9lS1f8F0Oiq+UJ43GPc8=",
       "dev": true,
       "requires": {
@@ -13636,26 +11914,16 @@
       "dev": true
     },
     "strong-log-transformer": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/strong-log-transformer/-/strong-log-transformer-2.0.0.tgz",
-      "integrity": "sha512-FQmNqAXJgOX8ygOcvPLlGWBNT41mvNJ9ALoYf0GTwVt9t30mGTqpmp/oJx5gLcu52DXK10kS7dVWhx8aPXDTlg==",
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/strong-log-transformer/-/strong-log-transformer-2.1.0.tgz",
+      "integrity": "sha512-B3Hgul+z0L9a236FAUC9iZsL+nVHgoCJnqCbN588DjYxvGXaXaaFbfmQ/JhvKjZwsOukuR72XbHv71Qkug0HxA==",
       "dev": true,
       "requires": {
-        "byline": "^5.0.0",
         "duplexer": "^0.1.1",
         "minimist": "^1.2.0",
         "through": "^2.3.4"
       }
     },
-    "subarg": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/subarg/-/subarg-1.0.0.tgz",
-      "integrity": "sha1-9izxdYHplrSPyWVpn1TAauJouNI=",
-      "dev": true,
-      "requires": {
-        "minimist": "^1.1.0"
-      }
-    },
     "supports-color": {
       "version": "5.5.0",
       "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz",
@@ -13675,9 +11943,9 @@
       }
     },
     "symbol-observable": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/symbol-observable/-/symbol-observable-1.2.0.tgz",
-      "integrity": "sha512-e900nM8RRtGhlV36KGEU9k65K3mPb1WV70OdjfxlG2EAuM1noi/E/BaW/uMhL7bPEssK8QV57vN3esixjUvcXQ==",
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/symbol-observable/-/symbol-observable-1.0.1.tgz",
+      "integrity": "sha1-g0D8RwLDEi310iKI+IKD9RPT/dQ=",
       "dev": true
     },
     "symbol-tree": {
@@ -13699,9 +11967,9 @@
       }
     },
     "tapable": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/tapable/-/tapable-1.1.0.tgz",
-      "integrity": "sha512-IlqtmLVaZA2qab8epUXbVWRn3aB1imbDMJtjB3nu4X0NqPkcY/JH9ZtCBWKHWPxs8Svi9tyo8w2dBoi07qZbBA==",
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/tapable/-/tapable-1.1.1.tgz",
+      "integrity": "sha512-9I2ydhj8Z9veORCw5PRm4u9uebCn0mcCa6scWoNcbZ6dAtoo2618u9UUzxgmsCOreJpqDDuv61LvwofW7hLcBA==",
       "dev": true
     },
     "tar": {
@@ -13736,9 +12004,9 @@
       }
     },
     "terser": {
-      "version": "3.10.1",
-      "resolved": "https://registry.npmjs.org/terser/-/terser-3.10.1.tgz",
-      "integrity": "sha512-GE0ShECt1/dZUZt9Kyr/IC6xXG46pTbm1C1WfzQbbnRB5LhdJlF8p5NBZ38RjspD7hEM9O5ud8aIcOFY6evl4A==",
+      "version": "3.14.1",
+      "resolved": "https://registry.npmjs.org/terser/-/terser-3.14.1.tgz",
+      "integrity": "sha512-NSo3E99QDbYSMeJaEk9YW2lTg3qS9V0aKGlb+PlOrei1X02r1wSBHCNX/O+yeTRFSWPKPIGj6MqvvdqV4rnVGw==",
       "dev": true,
       "requires": {
         "commander": "~2.17.1",
@@ -13753,9 +12021,9 @@
           "dev": true
         },
         "source-map-support": {
-          "version": "0.5.9",
-          "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.9.tgz",
-          "integrity": "sha512-gR6Rw4MvUlYy83vP0vxoVNzM6t8MUXqNuRsuBmBHQDu1Fh6X015FrLdgoDKcNdkwGubozq0P4N0Q37UyFVr1EA==",
+          "version": "0.5.10",
+          "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.10.tgz",
+          "integrity": "sha512-YfQ3tQFTK/yzlGJuX8pTwa4tifQj4QS2Mj7UegOu8jAz59MqIiMGPXxQhVQiIMNzayuUSF/jEuVnfFF5JqybmQ==",
           "dev": true,
           "requires": {
             "buffer-from": "^1.0.0",
@@ -13765,9 +12033,9 @@
       }
     },
     "terser-webpack-plugin": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-1.1.0.tgz",
-      "integrity": "sha512-61lV0DSxMAZ8AyZG7/A4a3UPlrbOBo8NIQ4tJzLPAdGOQ+yoNC7l5ijEow27lBAL2humer01KLS6bGIMYQxKoA==",
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-1.2.1.tgz",
+      "integrity": "sha512-GGSt+gbT0oKcMDmPx4SRSfJPE1XaN3kQRWG4ghxKQw9cn5G9x6aCKSsgYdvyM0na9NJ4Drv0RG6jbBByZ5CMjw==",
       "dev": true,
       "requires": {
         "cacache": "^11.0.2",
@@ -13799,6 +12067,97 @@
         "object-assign": "^4.1.0",
         "read-pkg-up": "^1.0.1",
         "require-main-filename": "^1.0.1"
+      },
+      "dependencies": {
+        "arr-diff": {
+          "version": "2.0.0",
+          "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-2.0.0.tgz",
+          "integrity": "sha1-jzuCf5Vai9ZpaX5KQlasPOrjVs8=",
+          "dev": true,
+          "requires": {
+            "arr-flatten": "^1.0.1"
+          }
+        },
+        "array-unique": {
+          "version": "0.2.1",
+          "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.2.1.tgz",
+          "integrity": "sha1-odl8yvy8JiXMcPrc6zalDFiwGlM=",
+          "dev": true
+        },
+        "braces": {
+          "version": "1.8.5",
+          "resolved": "https://registry.npmjs.org/braces/-/braces-1.8.5.tgz",
+          "integrity": "sha1-uneWLhLf+WnWt2cR6RS3N4V79qc=",
+          "dev": true,
+          "requires": {
+            "expand-range": "^1.8.1",
+            "preserve": "^0.2.0",
+            "repeat-element": "^1.1.2"
+          }
+        },
+        "expand-brackets": {
+          "version": "0.1.5",
+          "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-0.1.5.tgz",
+          "integrity": "sha1-3wcoTjQqgHzXM6xa9yQR5YHRF3s=",
+          "dev": true,
+          "requires": {
+            "is-posix-bracket": "^0.1.0"
+          }
+        },
+        "extglob": {
+          "version": "0.3.2",
+          "resolved": "https://registry.npmjs.org/extglob/-/extglob-0.3.2.tgz",
+          "integrity": "sha1-Lhj/PS9JqydlzskCPwEdqo2DSaE=",
+          "dev": true,
+          "requires": {
+            "is-extglob": "^1.0.0"
+          }
+        },
+        "is-extglob": {
+          "version": "1.0.0",
+          "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-1.0.0.tgz",
+          "integrity": "sha1-rEaBd8SUNAWgkvyPKXYMb/xiBsA=",
+          "dev": true
+        },
+        "is-glob": {
+          "version": "2.0.1",
+          "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-2.0.1.tgz",
+          "integrity": "sha1-0Jb5JqPe1WAPP9/ZEZjLCIjC2GM=",
+          "dev": true,
+          "requires": {
+            "is-extglob": "^1.0.0"
+          }
+        },
+        "kind-of": {
+          "version": "3.2.2",
+          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
+          "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
+          "dev": true,
+          "requires": {
+            "is-buffer": "^1.1.5"
+          }
+        },
+        "micromatch": {
+          "version": "2.3.11",
+          "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-2.3.11.tgz",
+          "integrity": "sha1-hmd8l9FyCzY0MdBNDRUpO9OMFWU=",
+          "dev": true,
+          "requires": {
+            "arr-diff": "^2.0.0",
+            "array-unique": "^0.2.1",
+            "braces": "^1.8.2",
+            "expand-brackets": "^0.1.4",
+            "extglob": "^0.3.1",
+            "filename-regex": "^2.0.0",
+            "is-extglob": "^1.0.0",
+            "is-glob": "^2.0.1",
+            "kind-of": "^3.0.2",
+            "normalize-path": "^2.0.1",
+            "object.omit": "^2.0.0",
+            "parse-glob": "^3.0.4",
+            "regex-cache": "^0.4.2"
+          }
+        }
       }
     },
     "test-value": {
@@ -13829,24 +12188,24 @@
     },
     "through": {
       "version": "2.3.8",
-      "resolved": "http://registry.npmjs.org/through/-/through-2.3.8.tgz",
+      "resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz",
       "integrity": "sha1-DdTJ/6q8NXlgsbckEV1+Doai4fU=",
       "dev": true
     },
     "through2": {
-      "version": "2.0.3",
-      "resolved": "https://registry.npmjs.org/through2/-/through2-2.0.3.tgz",
-      "integrity": "sha1-AARWmzfHx0ujnEPzzteNGtlBQL4=",
+      "version": "2.0.5",
+      "resolved": "https://registry.npmjs.org/through2/-/through2-2.0.5.tgz",
+      "integrity": "sha512-/mrRod8xqpA+IHSLyGCQ2s8SPHiCDEeQJSep1jqLYeEUClOFG2Qsh+4FU6G9VeqpZnGW/Su8LQGc4YKni5rYSQ==",
       "dev": true,
       "requires": {
-        "readable-stream": "^2.1.5",
+        "readable-stream": "~2.3.6",
         "xtend": "~4.0.1"
       }
     },
     "through2-filter": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/through2-filter/-/through2-filter-2.0.0.tgz",
-      "integrity": "sha1-YLxVoNrLdghdsfna6Zq0P4PWIuw=",
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/through2-filter/-/through2-filter-3.0.0.tgz",
+      "integrity": "sha512-jaRjI2WxN3W1V8/FMZ9HKIBXixtiqs3SQSX4/YGIiP3gL6djW48VoZq9tDqeCWs3MT8YY5wb/zli8VW8snY1CA==",
       "dev": true,
       "requires": {
         "through2": "~2.0.0",
@@ -13922,6 +12281,17 @@
       "dev": true,
       "requires": {
         "kind-of": "^3.0.2"
+      },
+      "dependencies": {
+        "kind-of": {
+          "version": "3.2.2",
+          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
+          "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
+          "dev": true,
+          "requires": {
+            "is-buffer": "^1.1.5"
+          }
+        }
       }
     },
     "to-regex": {
@@ -13944,17 +12314,6 @@
       "requires": {
         "is-number": "^3.0.0",
         "repeat-string": "^1.6.1"
-      },
-      "dependencies": {
-        "is-number": {
-          "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz",
-          "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=",
-          "dev": true,
-          "requires": {
-            "kind-of": "^3.0.2"
-          }
-        }
       }
     },
     "to-through": {
@@ -13974,6 +12333,14 @@
       "requires": {
         "psl": "^1.1.24",
         "punycode": "^1.4.1"
+      },
+      "dependencies": {
+        "punycode": {
+          "version": "1.4.1",
+          "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz",
+          "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=",
+          "dev": true
+        }
       }
     },
     "tr46": {
@@ -13983,14 +12350,6 @@
       "dev": true,
       "requires": {
         "punycode": "^2.1.0"
-      },
-      "dependencies": {
-        "punycode": {
-          "version": "2.1.1",
-          "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz",
-          "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==",
-          "dev": true
-        }
       }
     },
     "trash": {
@@ -14039,7 +12398,7 @@
         },
         "jsonfile": {
           "version": "2.4.0",
-          "resolved": "http://registry.npmjs.org/jsonfile/-/jsonfile-2.4.0.tgz",
+          "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-2.4.0.tgz",
           "integrity": "sha1-NzaitCi4e72gzIO1P6PWM6NcKug=",
           "dev": true,
           "requires": {
@@ -14067,268 +12426,61 @@
       "dev": true
     },
     "ts-jest": {
-      "version": "22.4.6",
-      "resolved": "https://registry.npmjs.org/ts-jest/-/ts-jest-22.4.6.tgz",
-      "integrity": "sha512-kYQ6g1G1AU+bOO9rv+SSQXg4WTcni6Wx3AM48iHni0nP1vIuhdNRjKTE9Cxx36Ix/IOV7L85iKu07dgXJzH2pQ==",
+      "version": "23.10.5",
+      "resolved": "https://registry.npmjs.org/ts-jest/-/ts-jest-23.10.5.tgz",
+      "integrity": "sha512-MRCs9qnGoyKgFc8adDEntAOP64fWK1vZKnOYU1o2HxaqjdJvGqmkLCPCnVq1/If4zkUmEjKPnCiUisTrlX2p2A==",
       "dev": true,
       "requires": {
-        "babel-core": "^6.26.3",
-        "babel-plugin-istanbul": "^4.1.6",
-        "babel-plugin-transform-es2015-modules-commonjs": "^6.26.2",
-        "babel-preset-jest": "^22.4.3",
-        "cpx": "^1.5.0",
-        "fs-extra": "6.0.0",
-        "jest-config": "^22.4.3",
-        "lodash": "^4.17.10",
-        "pkg-dir": "^2.0.0",
-        "source-map-support": "^0.5.5",
-        "yargs": "^11.0.0"
+        "bs-logger": "0.x",
+        "buffer-from": "1.x",
+        "fast-json-stable-stringify": "2.x",
+        "json5": "2.x",
+        "make-error": "1.x",
+        "mkdirp": "0.x",
+        "resolve": "1.x",
+        "semver": "^5.5",
+        "yargs-parser": "10.x"
       },
       "dependencies": {
-        "ansi-regex": {
-          "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-3.0.0.tgz",
-          "integrity": "sha1-7QMXwyIGT3lGbAKWa922Bas32Zg=",
-          "dev": true
-        },
-        "babel-plugin-jest-hoist": {
-          "version": "22.4.4",
-          "resolved": "https://registry.npmjs.org/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-22.4.4.tgz",
-          "integrity": "sha512-DUvGfYaAIlkdnygVIEl0O4Av69NtuQWcrjMOv6DODPuhuGLDnbsARz3AwiiI/EkIMMlxQDUcrZ9yoyJvTNjcVQ==",
-          "dev": true
-        },
-        "babel-preset-jest": {
-          "version": "22.4.4",
-          "resolved": "https://registry.npmjs.org/babel-preset-jest/-/babel-preset-jest-22.4.4.tgz",
-          "integrity": "sha512-+dxMtOFwnSYWfum0NaEc0O03oSdwBsjx4tMSChRDPGwu/4wSY6Q6ANW3wkjKpJzzguaovRs/DODcT4hbSN8yiA==",
-          "dev": true,
-          "requires": {
-            "babel-plugin-jest-hoist": "^22.4.4",
-            "babel-plugin-syntax-object-rest-spread": "^6.13.0"
-          }
-        },
-        "cliui": {
+        "camelcase": {
           "version": "4.1.0",
-          "resolved": "https://registry.npmjs.org/cliui/-/cliui-4.1.0.tgz",
-          "integrity": "sha512-4FG+RSG9DL7uEwRUZXZn3SS34DiDPfzP0VOiEwtUWlE+AR2EIg+hSyvrIgUUfhdgR/UkAeW2QHgeP+hWrXs7jQ==",
-          "dev": true,
-          "requires": {
-            "string-width": "^2.1.1",
-            "strip-ansi": "^4.0.0",
-            "wrap-ansi": "^2.0.0"
-          }
-        },
-        "expect": {
-          "version": "22.4.3",
-          "resolved": "http://registry.npmjs.org/expect/-/expect-22.4.3.tgz",
-          "integrity": "sha512-XcNXEPehqn8b/jm8FYotdX0YrXn36qp4HWlrVT4ktwQas1l1LPxiVWncYnnL2eyMtKAmVIaG0XAp0QlrqJaxaA==",
-          "dev": true,
-          "requires": {
-            "ansi-styles": "^3.2.0",
-            "jest-diff": "^22.4.3",
-            "jest-get-type": "^22.4.3",
-            "jest-matcher-utils": "^22.4.3",
-            "jest-message-util": "^22.4.3",
-            "jest-regex-util": "^22.4.3"
-          }
-        },
-        "fs-extra": {
-          "version": "6.0.0",
-          "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-6.0.0.tgz",
-          "integrity": "sha512-lk2cUCo8QzbiEWEbt7Cw3m27WMiRG321xsssbcIpfMhpRjrlC08WBOVQqj1/nQYYNnPtyIhP1oqLO3QwT2tPCw==",
-          "dev": true,
-          "requires": {
-            "graceful-fs": "^4.1.2",
-            "jsonfile": "^4.0.0",
-            "universalify": "^0.1.0"
-          }
-        },
-        "is-fullwidth-code-point": {
-          "version": "2.0.0",
-          "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-2.0.0.tgz",
-          "integrity": "sha1-o7MKXE8ZkYMWeqq5O+764937ZU8=",
-          "dev": true
-        },
-        "jest-config": {
-          "version": "22.4.4",
-          "resolved": "https://registry.npmjs.org/jest-config/-/jest-config-22.4.4.tgz",
-          "integrity": "sha512-9CKfo1GC4zrXSoMLcNeDvQBfgtqGTB1uP8iDIZ97oB26RCUb886KkKWhVcpyxVDOUxbhN+uzcBCeFe7w+Iem4A==",
-          "dev": true,
-          "requires": {
-            "chalk": "^2.0.1",
-            "glob": "^7.1.1",
-            "jest-environment-jsdom": "^22.4.1",
-            "jest-environment-node": "^22.4.1",
-            "jest-get-type": "^22.1.0",
-            "jest-jasmine2": "^22.4.4",
-            "jest-regex-util": "^22.1.0",
-            "jest-resolve": "^22.4.2",
-            "jest-util": "^22.4.1",
-            "jest-validate": "^22.4.4",
-            "pretty-format": "^22.4.0"
-          }
-        },
-        "jest-diff": {
-          "version": "22.4.3",
-          "resolved": "http://registry.npmjs.org/jest-diff/-/jest-diff-22.4.3.tgz",
-          "integrity": "sha512-/QqGvCDP5oZOF6PebDuLwrB2BMD8ffJv6TAGAdEVuDx1+uEgrHpSFrfrOiMRx2eJ1hgNjlQrOQEHetVwij90KA==",
-          "dev": true,
-          "requires": {
-            "chalk": "^2.0.1",
-            "diff": "^3.2.0",
-            "jest-get-type": "^22.4.3",
-            "pretty-format": "^22.4.3"
-          }
-        },
-        "jest-environment-jsdom": {
-          "version": "22.4.3",
-          "resolved": "https://registry.npmjs.org/jest-environment-jsdom/-/jest-environment-jsdom-22.4.3.tgz",
-          "integrity": "sha512-FviwfR+VyT3Datf13+ULjIMO5CSeajlayhhYQwpzgunswoaLIPutdbrnfUHEMyJCwvqQFaVtTmn9+Y8WCt6n1w==",
-          "dev": true,
-          "requires": {
-            "jest-mock": "^22.4.3",
-            "jest-util": "^22.4.3",
-            "jsdom": "^11.5.1"
-          }
-        },
-        "jest-environment-node": {
-          "version": "22.4.3",
-          "resolved": "https://registry.npmjs.org/jest-environment-node/-/jest-environment-node-22.4.3.tgz",
-          "integrity": "sha512-reZl8XF6t/lMEuPWwo9OLfttyC26A5AMgDyEQ6DBgZuyfyeNUzYT8BFo6uxCCP/Av/b7eb9fTi3sIHFPBzmlRA==",
-          "dev": true,
-          "requires": {
-            "jest-mock": "^22.4.3",
-            "jest-util": "^22.4.3"
-          }
-        },
-        "jest-jasmine2": {
-          "version": "22.4.4",
-          "resolved": "https://registry.npmjs.org/jest-jasmine2/-/jest-jasmine2-22.4.4.tgz",
-          "integrity": "sha512-nK3vdUl50MuH7vj/8at7EQVjPGWCi3d5+6aCi7Gxy/XMWdOdbH1qtO/LjKbqD8+8dUAEH+BVVh7HkjpCWC1CSw==",
-          "dev": true,
-          "requires": {
-            "chalk": "^2.0.1",
-            "co": "^4.6.0",
-            "expect": "^22.4.0",
-            "graceful-fs": "^4.1.11",
-            "is-generator-fn": "^1.0.0",
-            "jest-diff": "^22.4.0",
-            "jest-matcher-utils": "^22.4.0",
-            "jest-message-util": "^22.4.0",
-            "jest-snapshot": "^22.4.0",
-            "jest-util": "^22.4.1",
-            "source-map-support": "^0.5.0"
-          }
-        },
-        "jest-matcher-utils": {
-          "version": "22.4.3",
-          "resolved": "http://registry.npmjs.org/jest-matcher-utils/-/jest-matcher-utils-22.4.3.tgz",
-          "integrity": "sha512-lsEHVaTnKzdAPR5t4B6OcxXo9Vy4K+kRRbG5gtddY8lBEC+Mlpvm1CJcsMESRjzUhzkz568exMV1hTB76nAKbA==",
-          "dev": true,
-          "requires": {
-            "chalk": "^2.0.1",
-            "jest-get-type": "^22.4.3",
-            "pretty-format": "^22.4.3"
-          }
-        },
-        "jest-message-util": {
-          "version": "22.4.3",
-          "resolved": "https://registry.npmjs.org/jest-message-util/-/jest-message-util-22.4.3.tgz",
-          "integrity": "sha512-iAMeKxhB3Se5xkSjU0NndLLCHtP4n+GtCqV0bISKA5dmOXQfEbdEmYiu2qpnWBDCQdEafNDDU6Q+l6oBMd/+BA==",
-          "dev": true,
-          "requires": {
-            "@babel/code-frame": "^7.0.0-beta.35",
-            "chalk": "^2.0.1",
-            "micromatch": "^2.3.11",
-            "slash": "^1.0.0",
-            "stack-utils": "^1.0.1"
-          }
-        },
-        "jest-mock": {
-          "version": "22.4.3",
-          "resolved": "https://registry.npmjs.org/jest-mock/-/jest-mock-22.4.3.tgz",
-          "integrity": "sha512-+4R6mH5M1G4NK16CKg9N1DtCaFmuxhcIqF4lQK/Q1CIotqMs/XBemfpDPeVZBFow6iyUNu6EBT9ugdNOTT5o5Q==",
-          "dev": true
-        },
-        "jest-regex-util": {
-          "version": "22.4.3",
-          "resolved": "https://registry.npmjs.org/jest-regex-util/-/jest-regex-util-22.4.3.tgz",
-          "integrity": "sha512-LFg1gWr3QinIjb8j833bq7jtQopiwdAs67OGfkPrvy7uNUbVMfTXXcOKXJaeY5GgjobELkKvKENqq1xrUectWg==",
+          "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-4.1.0.tgz",
+          "integrity": "sha1-1UVjW+HjPFQmScaRc+Xeas+uNN0=",
           "dev": true
         },
-        "jest-resolve": {
-          "version": "22.4.3",
-          "resolved": "http://registry.npmjs.org/jest-resolve/-/jest-resolve-22.4.3.tgz",
-          "integrity": "sha512-u3BkD/MQBmwrOJDzDIaxpyqTxYH+XqAXzVJP51gt29H8jpj3QgKof5GGO2uPGKGeA1yTMlpbMs1gIQ6U4vcRhw==",
-          "dev": true,
-          "requires": {
-            "browser-resolve": "^1.11.2",
-            "chalk": "^2.0.1"
-          }
-        },
-        "jest-snapshot": {
-          "version": "22.4.3",
-          "resolved": "http://registry.npmjs.org/jest-snapshot/-/jest-snapshot-22.4.3.tgz",
-          "integrity": "sha512-JXA0gVs5YL0HtLDCGa9YxcmmV2LZbwJ+0MfyXBBc5qpgkEYITQFJP7XNhcHFbUvRiniRpRbGVfJrOoYhhGE0RQ==",
-          "dev": true,
-          "requires": {
-            "chalk": "^2.0.1",
-            "jest-diff": "^22.4.3",
-            "jest-matcher-utils": "^22.4.3",
-            "mkdirp": "^0.5.1",
-            "natural-compare": "^1.4.0",
-            "pretty-format": "^22.4.3"
-          }
-        },
-        "jest-util": {
-          "version": "22.4.3",
-          "resolved": "https://registry.npmjs.org/jest-util/-/jest-util-22.4.3.tgz",
-          "integrity": "sha512-rfDfG8wyC5pDPNdcnAlZgwKnzHvZDu8Td2NJI/jAGKEGxJPYiE4F0ss/gSAkG4778Y23Hvbz+0GMrDJTeo7RjQ==",
-          "dev": true,
-          "requires": {
-            "callsites": "^2.0.0",
-            "chalk": "^2.0.1",
-            "graceful-fs": "^4.1.11",
-            "is-ci": "^1.0.10",
-            "jest-message-util": "^22.4.3",
-            "mkdirp": "^0.5.1",
-            "source-map": "^0.6.0"
-          }
-        },
-        "jest-validate": {
-          "version": "22.4.4",
-          "resolved": "https://registry.npmjs.org/jest-validate/-/jest-validate-22.4.4.tgz",
-          "integrity": "sha512-dmlf4CIZRGvkaVg3fa0uetepcua44DHtktHm6rcoNVtYlpwe6fEJRkMFsaUVcFHLzbuBJ2cPw9Gl9TKfnzMVwg==",
-          "dev": true,
-          "requires": {
-            "chalk": "^2.0.1",
-            "jest-config": "^22.4.4",
-            "jest-get-type": "^22.1.0",
-            "leven": "^2.1.0",
-            "pretty-format": "^22.4.0"
-          }
-        },
-        "os-locale": {
+        "json5": {
           "version": "2.1.0",
-          "resolved": "https://registry.npmjs.org/os-locale/-/os-locale-2.1.0.tgz",
-          "integrity": "sha512-3sslG3zJbEYcaC4YVAvDorjGxc7tv6KVATnLPZONiljsUncvihe9BQoVCEs0RZ1kmf4Hk9OBqlZfJZWI4GanKA==",
+          "resolved": "https://registry.npmjs.org/json5/-/json5-2.1.0.tgz",
+          "integrity": "sha512-8Mh9h6xViijj36g7Dxi+Y4S6hNGV96vcJZr/SrlHh1LR/pEn/8j/+qIBbs44YKl69Lrfctp4QD+AdWLTMqEZAQ==",
           "dev": true,
           "requires": {
-            "execa": "^0.7.0",
-            "lcid": "^1.0.0",
-            "mem": "^1.1.0"
+            "minimist": "^1.2.0"
           }
         },
-        "pretty-format": {
-          "version": "22.4.3",
-          "resolved": "http://registry.npmjs.org/pretty-format/-/pretty-format-22.4.3.tgz",
-          "integrity": "sha512-S4oT9/sT6MN7/3COoOy+ZJeA92VmOnveLHgrwBE3Z1W5N9S2A1QGNYiE1z75DAENbJrXXUb+OWXhpJcg05QKQQ==",
+        "yargs-parser": {
+          "version": "10.1.0",
+          "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-10.1.0.tgz",
+          "integrity": "sha512-VCIyR1wJoEBZUqk5PA+oOBF6ypbwh5aNB3I50guxAL/quggdfs4TtNHQrSazFA3fYZ+tEqfs0zIGlv0c/rgjbQ==",
           "dev": true,
           "requires": {
-            "ansi-regex": "^3.0.0",
-            "ansi-styles": "^3.2.0"
+            "camelcase": "^4.1.0"
           }
-        },
+        }
+      }
+    },
+    "ts-node": {
+      "version": "8.0.2",
+      "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-8.0.2.tgz",
+      "integrity": "sha512-MosTrinKmaAcWgO8tqMjMJB22h+sp3Rd1i4fdoWY4mhBDekOwIAKI/bzmRi7IcbCmjquccYg2gcF6NBkLgr0Tw==",
+      "dev": true,
+      "requires": {
+        "arg": "^4.1.0",
+        "diff": "^3.1.0",
+        "make-error": "^1.1.1",
+        "source-map-support": "^0.5.6",
+        "yn": "^3.0.0"
+      },
+      "dependencies": {
         "source-map": {
           "version": "0.6.1",
           "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
@@ -14336,82 +12488,9 @@
           "dev": true
         },
         "source-map-support": {
-          "version": "0.5.9",
-          "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.9.tgz",
-          "integrity": "sha512-gR6Rw4MvUlYy83vP0vxoVNzM6t8MUXqNuRsuBmBHQDu1Fh6X015FrLdgoDKcNdkwGubozq0P4N0Q37UyFVr1EA==",
-          "dev": true,
-          "requires": {
-            "buffer-from": "^1.0.0",
-            "source-map": "^0.6.0"
-          }
-        },
-        "string-width": {
-          "version": "2.1.1",
-          "resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz",
-          "integrity": "sha512-nOqH59deCq9SRHlxq1Aw85Jnt4w6KvLKqWVik6oA9ZklXLNIOlqg4F2yrT1MVaTjAqvVwdfeZ7w7aCvJD7ugkw==",
-          "dev": true,
-          "requires": {
-            "is-fullwidth-code-point": "^2.0.0",
-            "strip-ansi": "^4.0.0"
-          }
-        },
-        "strip-ansi": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-4.0.0.tgz",
-          "integrity": "sha1-qEeQIusaw2iocTibY1JixQXuNo8=",
-          "dev": true,
-          "requires": {
-            "ansi-regex": "^3.0.0"
-          }
-        },
-        "yargs": {
-          "version": "11.1.0",
-          "resolved": "http://registry.npmjs.org/yargs/-/yargs-11.1.0.tgz",
-          "integrity": "sha512-NwW69J42EsCSanF8kyn5upxvjp5ds+t3+udGBeTbFnERA+lF541DDpMawzo4z6W/QrzNM18D+BPMiOBibnFV5A==",
-          "dev": true,
-          "requires": {
-            "cliui": "^4.0.0",
-            "decamelize": "^1.1.1",
-            "find-up": "^2.1.0",
-            "get-caller-file": "^1.0.1",
-            "os-locale": "^2.0.0",
-            "require-directory": "^2.1.1",
-            "require-main-filename": "^1.0.1",
-            "set-blocking": "^2.0.0",
-            "string-width": "^2.0.0",
-            "which-module": "^2.0.0",
-            "y18n": "^3.2.1",
-            "yargs-parser": "^9.0.2"
-          }
-        }
-      }
-    },
-    "ts-node": {
-      "version": "7.0.1",
-      "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-7.0.1.tgz",
-      "integrity": "sha512-BVwVbPJRspzNh2yfslyT1PSbl5uIk03EZlb493RKHN4qej/D06n1cEhjlOJG69oFsE7OT8XjpTUcYf6pKTLMhw==",
-      "dev": true,
-      "requires": {
-        "arrify": "^1.0.0",
-        "buffer-from": "^1.1.0",
-        "diff": "^3.1.0",
-        "make-error": "^1.1.1",
-        "minimist": "^1.2.0",
-        "mkdirp": "^0.5.1",
-        "source-map-support": "^0.5.6",
-        "yn": "^2.0.0"
-      },
-      "dependencies": {
-        "source-map": {
-          "version": "0.6.1",
-          "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
-          "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==",
-          "dev": true
-        },
-        "source-map-support": {
-          "version": "0.5.9",
-          "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.9.tgz",
-          "integrity": "sha512-gR6Rw4MvUlYy83vP0vxoVNzM6t8MUXqNuRsuBmBHQDu1Fh6X015FrLdgoDKcNdkwGubozq0P4N0Q37UyFVr1EA==",
+          "version": "0.5.10",
+          "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.10.tgz",
+          "integrity": "sha512-YfQ3tQFTK/yzlGJuX8pTwa4tifQj4QS2Mj7UegOu8jAz59MqIiMGPXxQhVQiIMNzayuUSF/jEuVnfFF5JqybmQ==",
           "dev": true,
           "requires": {
             "buffer-from": "^1.0.0",
@@ -14426,9 +12505,9 @@
       "integrity": "sha512-4krF8scpejhaOgqzBEcGM7yDIEfi0/8+8zDRZhNZZ2kjmHJ4hv3zCbQWxoJGz1iw5U0Jl0nma13xzHXcncMavQ=="
     },
     "tslint": {
-      "version": "5.11.0",
-      "resolved": "https://registry.npmjs.org/tslint/-/tslint-5.11.0.tgz",
-      "integrity": "sha1-mPMMAurjzecAYgHkwzywi0hYHu0=",
+      "version": "5.12.1",
+      "resolved": "https://registry.npmjs.org/tslint/-/tslint-5.12.1.tgz",
+      "integrity": "sha512-sfodBHOucFg6egff8d1BvuofoOQ/nOeYNfbp7LDlKBcLNrL3lmS5zoiDGyOMdT7YsEXAwWpTdAHwOGOc8eRZAw==",
       "dev": true,
       "requires": {
         "babel-code-frame": "^6.22.0",
@@ -14491,9 +12570,9 @@
       "dev": true
     },
     "typedoc": {
-      "version": "0.12.0",
-      "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.12.0.tgz",
-      "integrity": "sha512-dsdlaYZ7Je8JC+jQ3j2Iroe4uyD0GhqzADNUVyBRgLuytQDP/g0dPkAw5PdM/4drnmmJjRzSWW97FkKo+ITqQg==",
+      "version": "0.14.2",
+      "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.14.2.tgz",
+      "integrity": "sha512-aEbgJXV8/KqaVhcedT7xG6d2r+mOvB5ep3eIz1KuB5sc4fDYXcepEEMdU7XSqLFO5hVPu0nllHi1QxX2h/QlpQ==",
       "dev": true,
       "requires": {
         "@types/fs-extra": "^5.0.3",
@@ -14505,14 +12584,14 @@
         "@types/shelljs": "^0.8.0",
         "fs-extra": "^7.0.0",
         "handlebars": "^4.0.6",
-        "highlight.js": "^9.0.0",
+        "highlight.js": "^9.13.1",
         "lodash": "^4.17.10",
         "marked": "^0.4.0",
         "minimatch": "^3.0.0",
         "progress": "^2.0.0",
         "shelljs": "^0.8.2",
         "typedoc-default-themes": "^0.5.0",
-        "typescript": "3.0.x"
+        "typescript": "3.2.x"
       }
     },
     "typedoc-default-themes": {
@@ -14522,9 +12601,9 @@
       "dev": true
     },
     "typescript": {
-      "version": "3.0.3",
-      "resolved": "https://registry.npmjs.org/typescript/-/typescript-3.0.3.tgz",
-      "integrity": "sha512-kk80vLW9iGtjMnIv11qyxLqZm20UklzuR2tL0QAnDIygIUIemcZMxlMWudl9OOt76H3ntVzcTiddQ1/pAAJMYg==",
+      "version": "3.2.4",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-3.2.4.tgz",
+      "integrity": "sha512-0RNDbSdEokBeEAkgNbxJ+BLwSManFy9TeXz8uW+48j/xhEXv1ePME60olyzw2XzUqUBNAYFeJadIqAgNqIACwg==",
       "dev": true
     },
     "typical": {
@@ -14552,155 +12631,6 @@
         }
       }
     },
-    "uglifyjs-webpack-plugin": {
-      "version": "1.3.0",
-      "resolved": "https://registry.npmjs.org/uglifyjs-webpack-plugin/-/uglifyjs-webpack-plugin-1.3.0.tgz",
-      "integrity": "sha512-ovHIch0AMlxjD/97j9AYovZxG5wnHOPkL7T1GKochBADp/Zwc44pEWNqpKl1Loupp1WhFg7SlYmHZRUfdAacgw==",
-      "dev": true,
-      "requires": {
-        "cacache": "^10.0.4",
-        "find-cache-dir": "^1.0.0",
-        "schema-utils": "^0.4.5",
-        "serialize-javascript": "^1.4.0",
-        "source-map": "^0.6.1",
-        "uglify-es": "^3.3.4",
-        "webpack-sources": "^1.1.0",
-        "worker-farm": "^1.5.2"
-      },
-      "dependencies": {
-        "ajv": {
-          "version": "6.5.4",
-          "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.5.4.tgz",
-          "integrity": "sha512-4Wyjt8+t6YszqaXnLDfMmG/8AlO5Zbcsy3ATHncCzjW/NoPzAId8AK6749Ybjmdt+kUY1gP60fCu46oDxPv/mg==",
-          "dev": true,
-          "requires": {
-            "fast-deep-equal": "^2.0.1",
-            "fast-json-stable-stringify": "^2.0.0",
-            "json-schema-traverse": "^0.4.1",
-            "uri-js": "^4.2.2"
-          }
-        },
-        "cacache": {
-          "version": "10.0.4",
-          "resolved": "https://registry.npmjs.org/cacache/-/cacache-10.0.4.tgz",
-          "integrity": "sha512-Dph0MzuH+rTQzGPNT9fAnrPmMmjKfST6trxJeK7NQuHRaVw24VzPRWTmg9MpcwOVQZO0E1FBICUlFeNaKPIfHA==",
-          "dev": true,
-          "requires": {
-            "bluebird": "^3.5.1",
-            "chownr": "^1.0.1",
-            "glob": "^7.1.2",
-            "graceful-fs": "^4.1.11",
-            "lru-cache": "^4.1.1",
-            "mississippi": "^2.0.0",
-            "mkdirp": "^0.5.1",
-            "move-concurrently": "^1.0.1",
-            "promise-inflight": "^1.0.1",
-            "rimraf": "^2.6.2",
-            "ssri": "^5.2.4",
-            "unique-filename": "^1.1.0",
-            "y18n": "^4.0.0"
-          }
-        },
-        "commander": {
-          "version": "2.13.0",
-          "resolved": "https://registry.npmjs.org/commander/-/commander-2.13.0.tgz",
-          "integrity": "sha512-MVuS359B+YzaWqjCL/c+22gfryv+mCBPHAv3zyVI2GN8EY6IRP8VwtasXn8jyyhvvq84R4ImN1OKRtcbIasjYA==",
-          "dev": true
-        },
-        "fast-deep-equal": {
-          "version": "2.0.1",
-          "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz",
-          "integrity": "sha1-ewUhjd+WZ79/Nwv3/bLLFf3Qqkk=",
-          "dev": true
-        },
-        "find-cache-dir": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/find-cache-dir/-/find-cache-dir-1.0.0.tgz",
-          "integrity": "sha1-kojj6ePMN0hxfTnq3hfPcfww7m8=",
-          "dev": true,
-          "requires": {
-            "commondir": "^1.0.1",
-            "make-dir": "^1.0.0",
-            "pkg-dir": "^2.0.0"
-          }
-        },
-        "json-schema-traverse": {
-          "version": "0.4.1",
-          "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
-          "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==",
-          "dev": true
-        },
-        "mississippi": {
-          "version": "2.0.0",
-          "resolved": "https://registry.npmjs.org/mississippi/-/mississippi-2.0.0.tgz",
-          "integrity": "sha512-zHo8v+otD1J10j/tC+VNoGK9keCuByhKovAvdn74dmxJl9+mWHnx6EMsDN4lgRoMI/eYo2nchAxniIbUPb5onw==",
-          "dev": true,
-          "requires": {
-            "concat-stream": "^1.5.0",
-            "duplexify": "^3.4.2",
-            "end-of-stream": "^1.1.0",
-            "flush-write-stream": "^1.0.0",
-            "from2": "^2.1.0",
-            "parallel-transform": "^1.1.0",
-            "pump": "^2.0.1",
-            "pumpify": "^1.3.3",
-            "stream-each": "^1.1.0",
-            "through2": "^2.0.0"
-          }
-        },
-        "pump": {
-          "version": "2.0.1",
-          "resolved": "https://registry.npmjs.org/pump/-/pump-2.0.1.tgz",
-          "integrity": "sha512-ruPMNRkN3MHP1cWJc9OWr+T/xDP0jhXYCLfJcBuX54hhfIBnaQmAUMfDcG4DM5UMWByBbJY69QSphm3jtDKIkA==",
-          "dev": true,
-          "requires": {
-            "end-of-stream": "^1.1.0",
-            "once": "^1.3.1"
-          }
-        },
-        "schema-utils": {
-          "version": "0.4.7",
-          "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-0.4.7.tgz",
-          "integrity": "sha512-v/iwU6wvwGK8HbU9yi3/nhGzP0yGSuhQMzL6ySiec1FSrZZDkhm4noOSWzrNFo/jEc+SJY6jRTwuwbSXJPDUnQ==",
-          "dev": true,
-          "requires": {
-            "ajv": "^6.1.0",
-            "ajv-keywords": "^3.1.0"
-          }
-        },
-        "source-map": {
-          "version": "0.6.1",
-          "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
-          "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==",
-          "dev": true
-        },
-        "ssri": {
-          "version": "5.3.0",
-          "resolved": "https://registry.npmjs.org/ssri/-/ssri-5.3.0.tgz",
-          "integrity": "sha512-XRSIPqLij52MtgoQavH/x/dU1qVKtWUAAZeOHsR9c2Ddi4XerFy3mc1alf+dLJKl9EUIm/Ht+EowFkTUOA6GAQ==",
-          "dev": true,
-          "requires": {
-            "safe-buffer": "^5.1.1"
-          }
-        },
-        "uglify-es": {
-          "version": "3.3.9",
-          "resolved": "https://registry.npmjs.org/uglify-es/-/uglify-es-3.3.9.tgz",
-          "integrity": "sha512-r+MU0rfv4L/0eeW3xZrd16t4NZfK8Ld4SWVglYBb7ez5uXFWHuVRs6xCTrf1yirs9a4j4Y27nn7SRfO6v67XsQ==",
-          "dev": true,
-          "requires": {
-            "commander": "~2.13.0",
-            "source-map": "~0.6.1"
-          }
-        },
-        "y18n": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/y18n/-/y18n-4.0.0.tgz",
-          "integrity": "sha512-r9S/ZyXu/Xu9q1tYlpsLIsa3EeLXXk0VwlxqTcFRfg9EhMW+17kbt9G0NrgCmhGb5vT2hyhJZLfDGx+7+5Uj/w==",
-          "dev": true
-        }
-      }
-    },
     "uid-number": {
       "version": "0.0.6",
       "resolved": "https://registry.npmjs.org/uid-number/-/uid-number-0.0.6.tgz",
@@ -14796,13 +12726,13 @@
       }
     },
     "unique-stream": {
-      "version": "2.2.1",
-      "resolved": "https://registry.npmjs.org/unique-stream/-/unique-stream-2.2.1.tgz",
-      "integrity": "sha1-WqADz76Uxf+GbE59ZouxxNuts2k=",
+      "version": "2.3.1",
+      "resolved": "https://registry.npmjs.org/unique-stream/-/unique-stream-2.3.1.tgz",
+      "integrity": "sha512-2nY4TnBE70yoxHkDli7DMazpWiP7xMdCYqU2nBRO0UB+ZpEkGsSija7MvmvnZFUeC+mrgiUfcHSr3LmRFIg4+A==",
       "dev": true,
       "requires": {
-        "json-stable-stringify": "^1.0.0",
-        "through2-filter": "^2.0.0"
+        "json-stable-stringify-without-jsonify": "^1.0.1",
+        "through2-filter": "^3.0.0"
       }
     },
     "universalify": {
@@ -14848,12 +12778,6 @@
           "resolved": "https://registry.npmjs.org/has-values/-/has-values-0.1.4.tgz",
           "integrity": "sha1-bWHeldkd/Km5oCCJrThL/49it3E=",
           "dev": true
-        },
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
         }
       }
     },
@@ -14870,14 +12794,6 @@
       "dev": true,
       "requires": {
         "punycode": "^2.1.0"
-      },
-      "dependencies": {
-        "punycode": {
-          "version": "2.1.1",
-          "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz",
-          "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==",
-          "dev": true
-        }
       }
     },
     "urix": {
@@ -14910,10 +12826,19 @@
       "integrity": "sha512-cwESVXlO3url9YWlFW/TA9cshCEhtu7IKJ/p5soJ/gGpj7vbvFrAY/eIioQ6Dw23KjZhYgiIo8HOs1nQ2vr/oQ==",
       "dev": true
     },
+    "user-home": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/user-home/-/user-home-2.0.0.tgz",
+      "integrity": "sha1-nHC/2Babwdy/SGBODwS4tJzenp8=",
+      "dev": true,
+      "requires": {
+        "os-homedir": "^1.0.0"
+      }
+    },
     "util": {
-      "version": "0.10.4",
-      "resolved": "https://registry.npmjs.org/util/-/util-0.10.4.tgz",
-      "integrity": "sha512-0Pm9hTQ3se5ll1XihRic3FDIku70C+iHUdT/W926rSgHV5QgXsYbKZN8MSC3tJtSkhuROzvsQjAaFENRXr+19A==",
+      "version": "0.11.1",
+      "resolved": "https://registry.npmjs.org/util/-/util-0.11.1.tgz",
+      "integrity": "sha512-HShAsny+zS2TZfaXxD9tYj4HQGlBezXZMZuM/S5PKLLoZkShZiGk9o5CzukI1LVHZvjdvZ2Sj1aW/Ndn2NB/HQ==",
       "dev": true,
       "requires": {
         "inherits": "2.0.3"
@@ -14942,9 +12867,9 @@
       "dev": true
     },
     "v8flags": {
-      "version": "3.1.1",
-      "resolved": "https://registry.npmjs.org/v8flags/-/v8flags-3.1.1.tgz",
-      "integrity": "sha512-iw/1ViSEaff8NJ3HLyEjawk/8hjJib3E7pvG4pddVXfUg1983s3VGsiClDjhK64MQVDGqc1Q8r18S4VKQZS9EQ==",
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/v8flags/-/v8flags-3.1.2.tgz",
+      "integrity": "sha512-MtivA7GF24yMPte9Rp/BWGCYQNaUj86zeYxV/x2RRJMKagImbbv3u8iJC57lNhWLPcGLJmHcHmFWkNsplbbLWw==",
       "dev": true,
       "requires": {
         "homedir-polyfill": "^1.0.1"
@@ -15018,426 +12943,83 @@
         "remove-bom-buffer": "^3.0.0",
         "remove-bom-stream": "^1.2.0",
         "resolve-options": "^1.1.0",
-        "through2": "^2.0.0",
-        "to-through": "^2.0.0",
-        "value-or-function": "^3.0.0",
-        "vinyl": "^2.0.0",
-        "vinyl-sourcemap": "^1.1.0"
-      }
-    },
-    "vinyl-sourcemap": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/vinyl-sourcemap/-/vinyl-sourcemap-1.1.0.tgz",
-      "integrity": "sha1-kqgAWTo4cDqM2xHYswCtS+Y7PhY=",
-      "dev": true,
-      "requires": {
-        "append-buffer": "^1.0.2",
-        "convert-source-map": "^1.5.0",
-        "graceful-fs": "^4.1.6",
-        "normalize-path": "^2.1.1",
-        "now-and-later": "^2.0.0",
-        "remove-bom-buffer": "^3.0.0",
-        "vinyl": "^2.0.0"
-      }
-    },
-    "vinyl-sourcemaps-apply": {
-      "version": "0.2.1",
-      "resolved": "https://registry.npmjs.org/vinyl-sourcemaps-apply/-/vinyl-sourcemaps-apply-0.2.1.tgz",
-      "integrity": "sha1-q2VJ1h0XLCsbh75cUI0jnI74dwU=",
-      "dev": true,
-      "requires": {
-        "source-map": "^0.5.1"
-      }
-    },
-    "vm-browserify": {
-      "version": "0.0.4",
-      "resolved": "https://registry.npmjs.org/vm-browserify/-/vm-browserify-0.0.4.tgz",
-      "integrity": "sha1-XX6kW7755Kb/ZflUOOCofDV9WnM=",
-      "dev": true,
-      "requires": {
-        "indexof": "0.0.1"
-      }
-    },
-    "w3c-hr-time": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/w3c-hr-time/-/w3c-hr-time-1.0.1.tgz",
-      "integrity": "sha1-gqwr/2PZUOqeMYmlimViX+3xkEU=",
-      "dev": true,
-      "requires": {
-        "browser-process-hrtime": "^0.1.2"
-      }
-    },
-    "walker": {
-      "version": "1.0.7",
-      "resolved": "https://registry.npmjs.org/walker/-/walker-1.0.7.tgz",
-      "integrity": "sha1-L3+bj9ENZ3JisYqITijRlhjgKPs=",
-      "dev": true,
-      "requires": {
-        "makeerror": "1.0.x"
-      }
-    },
-    "watch": {
-      "version": "0.18.0",
-      "resolved": "https://registry.npmjs.org/watch/-/watch-0.18.0.tgz",
-      "integrity": "sha1-KAlUdsbffJDJYxOJkMClQj60uYY=",
-      "dev": true,
-      "requires": {
-        "exec-sh": "^0.2.0",
-        "minimist": "^1.2.0"
-      }
-    },
-    "watchpack": {
-      "version": "1.6.0",
-      "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-1.6.0.tgz",
-      "integrity": "sha512-i6dHe3EyLjMmDlU1/bGQpEw25XSjkJULPuAVKCbNRefQVq48yXKUpwg538F7AZTf9kyr57zj++pQFltUa5H7yA==",
-      "dev": true,
-      "requires": {
-        "chokidar": "^2.0.2",
-        "graceful-fs": "^4.1.2",
-        "neo-async": "^2.5.0"
-      },
-      "dependencies": {
-        "anymatch": {
-          "version": "2.0.0",
-          "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-2.0.0.tgz",
-          "integrity": "sha512-5teOsQWABXHHBFP9y3skS5P3d/WfWXpv3FUpy+LorMrNYaT9pI4oLMQX7jzQ2KklNpGpWHzdCXTDT2Y3XGlZBw==",
-          "dev": true,
-          "requires": {
-            "micromatch": "^3.1.4",
-            "normalize-path": "^2.1.1"
-          }
-        },
-        "arr-diff": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz",
-          "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=",
-          "dev": true
-        },
-        "array-unique": {
-          "version": "0.3.2",
-          "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz",
-          "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=",
-          "dev": true
-        },
-        "braces": {
-          "version": "2.3.2",
-          "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz",
-          "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==",
-          "dev": true,
-          "requires": {
-            "arr-flatten": "^1.1.0",
-            "array-unique": "^0.3.2",
-            "extend-shallow": "^2.0.1",
-            "fill-range": "^4.0.0",
-            "isobject": "^3.0.1",
-            "repeat-element": "^1.1.2",
-            "snapdragon": "^0.8.1",
-            "snapdragon-node": "^2.0.1",
-            "split-string": "^3.0.2",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "chokidar": {
-          "version": "2.0.4",
-          "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-2.0.4.tgz",
-          "integrity": "sha512-z9n7yt9rOvIJrMhvDtDictKrkFHeihkNl6uWMmZlmL6tJtX9Cs+87oK+teBx+JIgzvbX3yZHT3eF8vpbDxHJXQ==",
-          "dev": true,
-          "requires": {
-            "anymatch": "^2.0.0",
-            "async-each": "^1.0.0",
-            "braces": "^2.3.0",
-            "fsevents": "^1.2.2",
-            "glob-parent": "^3.1.0",
-            "inherits": "^2.0.1",
-            "is-binary-path": "^1.0.0",
-            "is-glob": "^4.0.0",
-            "lodash.debounce": "^4.0.8",
-            "normalize-path": "^2.1.1",
-            "path-is-absolute": "^1.0.0",
-            "readdirp": "^2.0.0",
-            "upath": "^1.0.5"
-          }
-        },
-        "expand-brackets": {
-          "version": "2.1.4",
-          "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz",
-          "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=",
-          "dev": true,
-          "requires": {
-            "debug": "^2.3.3",
-            "define-property": "^0.2.5",
-            "extend-shallow": "^2.0.1",
-            "posix-character-classes": "^0.1.0",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "define-property": {
-              "version": "0.2.5",
-              "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz",
-              "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=",
-              "dev": true,
-              "requires": {
-                "is-descriptor": "^0.1.0"
-              }
-            },
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            },
-            "is-accessor-descriptor": {
-              "version": "0.1.6",
-              "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz",
-              "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=",
-              "dev": true,
-              "requires": {
-                "kind-of": "^3.0.2"
-              },
-              "dependencies": {
-                "kind-of": {
-                  "version": "3.2.2",
-                  "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-                  "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-                  "dev": true,
-                  "requires": {
-                    "is-buffer": "^1.1.5"
-                  }
-                }
-              }
-            },
-            "is-data-descriptor": {
-              "version": "0.1.4",
-              "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz",
-              "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=",
-              "dev": true,
-              "requires": {
-                "kind-of": "^3.0.2"
-              },
-              "dependencies": {
-                "kind-of": {
-                  "version": "3.2.2",
-                  "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-                  "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-                  "dev": true,
-                  "requires": {
-                    "is-buffer": "^1.1.5"
-                  }
-                }
-              }
-            },
-            "is-descriptor": {
-              "version": "0.1.6",
-              "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz",
-              "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==",
-              "dev": true,
-              "requires": {
-                "is-accessor-descriptor": "^0.1.6",
-                "is-data-descriptor": "^0.1.4",
-                "kind-of": "^5.0.0"
-              }
-            },
-            "kind-of": {
-              "version": "5.1.0",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz",
-              "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==",
-              "dev": true
-            }
-          }
-        },
-        "extglob": {
-          "version": "2.0.4",
-          "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz",
-          "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==",
-          "dev": true,
-          "requires": {
-            "array-unique": "^0.3.2",
-            "define-property": "^1.0.0",
-            "expand-brackets": "^2.1.4",
-            "extend-shallow": "^2.0.1",
-            "fragment-cache": "^0.2.1",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "define-property": {
-              "version": "1.0.0",
-              "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz",
-              "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=",
-              "dev": true,
-              "requires": {
-                "is-descriptor": "^1.0.0"
-              }
-            },
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "fill-range": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz",
-          "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=",
-          "dev": true,
-          "requires": {
-            "extend-shallow": "^2.0.1",
-            "is-number": "^3.0.0",
-            "repeat-string": "^1.6.1",
-            "to-regex-range": "^2.1.0"
-          },
-          "dependencies": {
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "glob-parent": {
-          "version": "3.1.0",
-          "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-3.1.0.tgz",
-          "integrity": "sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=",
-          "dev": true,
-          "requires": {
-            "is-glob": "^3.1.0",
-            "path-dirname": "^1.0.0"
-          },
-          "dependencies": {
-            "is-glob": {
-              "version": "3.1.0",
-              "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz",
-              "integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=",
-              "dev": true,
-              "requires": {
-                "is-extglob": "^2.1.0"
-              }
-            }
-          }
-        },
-        "is-accessor-descriptor": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz",
-          "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==",
-          "dev": true,
-          "requires": {
-            "kind-of": "^6.0.0"
-          }
-        },
-        "is-data-descriptor": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz",
-          "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==",
-          "dev": true,
-          "requires": {
-            "kind-of": "^6.0.0"
-          }
-        },
-        "is-descriptor": {
-          "version": "1.0.2",
-          "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz",
-          "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==",
-          "dev": true,
-          "requires": {
-            "is-accessor-descriptor": "^1.0.0",
-            "is-data-descriptor": "^1.0.0",
-            "kind-of": "^6.0.2"
-          }
-        },
-        "is-extglob": {
-          "version": "2.1.1",
-          "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
-          "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=",
-          "dev": true
-        },
-        "is-glob": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.0.tgz",
-          "integrity": "sha1-lSHHaEXMJhCoUgPd8ICpWML/q8A=",
-          "dev": true,
-          "requires": {
-            "is-extglob": "^2.1.1"
-          }
-        },
-        "is-number": {
-          "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz",
-          "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=",
-          "dev": true,
-          "requires": {
-            "kind-of": "^3.0.2"
-          },
-          "dependencies": {
-            "kind-of": {
-              "version": "3.2.2",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-              "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-              "dev": true,
-              "requires": {
-                "is-buffer": "^1.1.5"
-              }
-            }
-          }
-        },
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
-        },
-        "kind-of": {
-          "version": "6.0.2",
-          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz",
-          "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==",
-          "dev": true
-        },
-        "micromatch": {
-          "version": "3.1.10",
-          "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz",
-          "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==",
-          "dev": true,
-          "requires": {
-            "arr-diff": "^4.0.0",
-            "array-unique": "^0.3.2",
-            "braces": "^2.3.1",
-            "define-property": "^2.0.2",
-            "extend-shallow": "^3.0.2",
-            "extglob": "^2.0.4",
-            "fragment-cache": "^0.2.1",
-            "kind-of": "^6.0.2",
-            "nanomatch": "^1.2.9",
-            "object.pick": "^1.3.0",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.2"
-          }
-        }
+        "through2": "^2.0.0",
+        "to-through": "^2.0.0",
+        "value-or-function": "^3.0.0",
+        "vinyl": "^2.0.0",
+        "vinyl-sourcemap": "^1.1.0"
+      }
+    },
+    "vinyl-sourcemap": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/vinyl-sourcemap/-/vinyl-sourcemap-1.1.0.tgz",
+      "integrity": "sha1-kqgAWTo4cDqM2xHYswCtS+Y7PhY=",
+      "dev": true,
+      "requires": {
+        "append-buffer": "^1.0.2",
+        "convert-source-map": "^1.5.0",
+        "graceful-fs": "^4.1.6",
+        "normalize-path": "^2.1.1",
+        "now-and-later": "^2.0.0",
+        "remove-bom-buffer": "^3.0.0",
+        "vinyl": "^2.0.0"
+      }
+    },
+    "vinyl-sourcemaps-apply": {
+      "version": "0.2.1",
+      "resolved": "https://registry.npmjs.org/vinyl-sourcemaps-apply/-/vinyl-sourcemaps-apply-0.2.1.tgz",
+      "integrity": "sha1-q2VJ1h0XLCsbh75cUI0jnI74dwU=",
+      "dev": true,
+      "requires": {
+        "source-map": "^0.5.1"
+      }
+    },
+    "vm-browserify": {
+      "version": "0.0.4",
+      "resolved": "https://registry.npmjs.org/vm-browserify/-/vm-browserify-0.0.4.tgz",
+      "integrity": "sha1-XX6kW7755Kb/ZflUOOCofDV9WnM=",
+      "dev": true,
+      "requires": {
+        "indexof": "0.0.1"
+      }
+    },
+    "w3c-hr-time": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/w3c-hr-time/-/w3c-hr-time-1.0.1.tgz",
+      "integrity": "sha1-gqwr/2PZUOqeMYmlimViX+3xkEU=",
+      "dev": true,
+      "requires": {
+        "browser-process-hrtime": "^0.1.2"
+      }
+    },
+    "walker": {
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/walker/-/walker-1.0.7.tgz",
+      "integrity": "sha1-L3+bj9ENZ3JisYqITijRlhjgKPs=",
+      "dev": true,
+      "requires": {
+        "makeerror": "1.0.x"
+      }
+    },
+    "watch": {
+      "version": "0.18.0",
+      "resolved": "https://registry.npmjs.org/watch/-/watch-0.18.0.tgz",
+      "integrity": "sha1-KAlUdsbffJDJYxOJkMClQj60uYY=",
+      "dev": true,
+      "requires": {
+        "exec-sh": "^0.2.0",
+        "minimist": "^1.2.0"
+      }
+    },
+    "watchpack": {
+      "version": "1.6.0",
+      "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-1.6.0.tgz",
+      "integrity": "sha512-i6dHe3EyLjMmDlU1/bGQpEw25XSjkJULPuAVKCbNRefQVq48yXKUpwg538F7AZTf9kyr57zj++pQFltUa5H7yA==",
+      "dev": true,
+      "requires": {
+        "chokidar": "^2.0.2",
+        "graceful-fs": "^4.1.2",
+        "neo-async": "^2.5.0"
       }
     },
     "wcwidth": {
@@ -15449,6 +13031,12 @@
         "defaults": "^1.0.3"
       }
     },
+    "web-stream-tools": {
+      "version": "0.0.1",
+      "resolved": "https://registry.npmjs.org/web-stream-tools/-/web-stream-tools-0.0.1.tgz",
+      "integrity": "sha512-MZUYhvTAMMy1u07OJL2pyp/tdrIu15fRJlGgnfvCQVXBS4cBNbIV1+6veYfVhTfnq0ZLispgx4nv17QxpuX+6w==",
+      "dev": true
+    },
     "webidl-conversions": {
       "version": "4.0.2",
       "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-4.0.2.tgz",
@@ -15456,17 +13044,17 @@
       "dev": true
     },
     "webpack": {
-      "version": "4.23.1",
-      "resolved": "https://registry.npmjs.org/webpack/-/webpack-4.23.1.tgz",
-      "integrity": "sha512-iE5Cu4rGEDk7ONRjisTOjVHv3dDtcFfwitSxT7evtYj/rANJpt1OuC/Kozh1pBa99AUBr1L/LsaNB+D9Xz3CEg==",
+      "version": "4.29.0",
+      "resolved": "https://registry.npmjs.org/webpack/-/webpack-4.29.0.tgz",
+      "integrity": "sha512-pxdGG0keDBtamE1mNvT5zyBdx+7wkh6mh7uzMOo/uRQ/fhsdj5FXkh/j5mapzs060forql1oXqXN9HJGju+y7w==",
       "dev": true,
       "requires": {
-        "@webassemblyjs/ast": "1.7.10",
-        "@webassemblyjs/helper-module-context": "1.7.10",
-        "@webassemblyjs/wasm-edit": "1.7.10",
-        "@webassemblyjs/wasm-parser": "1.7.10",
-        "acorn": "^5.6.2",
-        "acorn-dynamic-import": "^3.0.0",
+        "@webassemblyjs/ast": "1.7.11",
+        "@webassemblyjs/helper-module-context": "1.7.11",
+        "@webassemblyjs/wasm-edit": "1.7.11",
+        "@webassemblyjs/wasm-parser": "1.7.11",
+        "acorn": "^6.0.5",
+        "acorn-dynamic-import": "^4.0.0",
         "ajv": "^6.1.0",
         "ajv-keywords": "^3.1.0",
         "chrome-trace-event": "^1.0.0",
@@ -15482,309 +13070,17 @@
         "node-libs-browser": "^2.0.0",
         "schema-utils": "^0.4.4",
         "tapable": "^1.1.0",
-        "uglifyjs-webpack-plugin": "^1.2.4",
+        "terser-webpack-plugin": "^1.1.0",
         "watchpack": "^1.5.0",
         "webpack-sources": "^1.3.0"
       },
       "dependencies": {
-        "ajv": {
-          "version": "6.5.4",
-          "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.5.4.tgz",
-          "integrity": "sha512-4Wyjt8+t6YszqaXnLDfMmG/8AlO5Zbcsy3ATHncCzjW/NoPzAId8AK6749Ybjmdt+kUY1gP60fCu46oDxPv/mg==",
-          "dev": true,
-          "requires": {
-            "fast-deep-equal": "^2.0.1",
-            "fast-json-stable-stringify": "^2.0.0",
-            "json-schema-traverse": "^0.4.1",
-            "uri-js": "^4.2.2"
-          }
-        },
-        "arr-diff": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz",
-          "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=",
-          "dev": true
-        },
-        "array-unique": {
-          "version": "0.3.2",
-          "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz",
-          "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=",
-          "dev": true
-        },
-        "braces": {
-          "version": "2.3.2",
-          "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz",
-          "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==",
-          "dev": true,
-          "requires": {
-            "arr-flatten": "^1.1.0",
-            "array-unique": "^0.3.2",
-            "extend-shallow": "^2.0.1",
-            "fill-range": "^4.0.0",
-            "isobject": "^3.0.1",
-            "repeat-element": "^1.1.2",
-            "snapdragon": "^0.8.1",
-            "snapdragon-node": "^2.0.1",
-            "split-string": "^3.0.2",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "expand-brackets": {
-          "version": "2.1.4",
-          "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz",
-          "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=",
-          "dev": true,
-          "requires": {
-            "debug": "^2.3.3",
-            "define-property": "^0.2.5",
-            "extend-shallow": "^2.0.1",
-            "posix-character-classes": "^0.1.0",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "define-property": {
-              "version": "0.2.5",
-              "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz",
-              "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=",
-              "dev": true,
-              "requires": {
-                "is-descriptor": "^0.1.0"
-              }
-            },
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            },
-            "is-accessor-descriptor": {
-              "version": "0.1.6",
-              "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz",
-              "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=",
-              "dev": true,
-              "requires": {
-                "kind-of": "^3.0.2"
-              },
-              "dependencies": {
-                "kind-of": {
-                  "version": "3.2.2",
-                  "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-                  "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-                  "dev": true,
-                  "requires": {
-                    "is-buffer": "^1.1.5"
-                  }
-                }
-              }
-            },
-            "is-data-descriptor": {
-              "version": "0.1.4",
-              "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz",
-              "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=",
-              "dev": true,
-              "requires": {
-                "kind-of": "^3.0.2"
-              },
-              "dependencies": {
-                "kind-of": {
-                  "version": "3.2.2",
-                  "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-                  "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-                  "dev": true,
-                  "requires": {
-                    "is-buffer": "^1.1.5"
-                  }
-                }
-              }
-            },
-            "is-descriptor": {
-              "version": "0.1.6",
-              "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz",
-              "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==",
-              "dev": true,
-              "requires": {
-                "is-accessor-descriptor": "^0.1.6",
-                "is-data-descriptor": "^0.1.4",
-                "kind-of": "^5.0.0"
-              }
-            },
-            "kind-of": {
-              "version": "5.1.0",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz",
-              "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==",
-              "dev": true
-            }
-          }
-        },
-        "extglob": {
-          "version": "2.0.4",
-          "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz",
-          "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==",
-          "dev": true,
-          "requires": {
-            "array-unique": "^0.3.2",
-            "define-property": "^1.0.0",
-            "expand-brackets": "^2.1.4",
-            "extend-shallow": "^2.0.1",
-            "fragment-cache": "^0.2.1",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.1"
-          },
-          "dependencies": {
-            "define-property": {
-              "version": "1.0.0",
-              "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz",
-              "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=",
-              "dev": true,
-              "requires": {
-                "is-descriptor": "^1.0.0"
-              }
-            },
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "fast-deep-equal": {
-          "version": "2.0.1",
-          "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz",
-          "integrity": "sha1-ewUhjd+WZ79/Nwv3/bLLFf3Qqkk=",
-          "dev": true
-        },
-        "fill-range": {
-          "version": "4.0.0",
-          "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz",
-          "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=",
-          "dev": true,
-          "requires": {
-            "extend-shallow": "^2.0.1",
-            "is-number": "^3.0.0",
-            "repeat-string": "^1.6.1",
-            "to-regex-range": "^2.1.0"
-          },
-          "dependencies": {
-            "extend-shallow": {
-              "version": "2.0.1",
-              "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
-              "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=",
-              "dev": true,
-              "requires": {
-                "is-extendable": "^0.1.0"
-              }
-            }
-          }
-        },
-        "is-accessor-descriptor": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz",
-          "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==",
-          "dev": true,
-          "requires": {
-            "kind-of": "^6.0.0"
-          }
-        },
-        "is-data-descriptor": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz",
-          "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==",
-          "dev": true,
-          "requires": {
-            "kind-of": "^6.0.0"
-          }
-        },
-        "is-descriptor": {
-          "version": "1.0.2",
-          "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz",
-          "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==",
-          "dev": true,
-          "requires": {
-            "is-accessor-descriptor": "^1.0.0",
-            "is-data-descriptor": "^1.0.0",
-            "kind-of": "^6.0.2"
-          }
-        },
-        "is-number": {
-          "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz",
-          "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=",
-          "dev": true,
-          "requires": {
-            "kind-of": "^3.0.2"
-          },
-          "dependencies": {
-            "kind-of": {
-              "version": "3.2.2",
-              "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-              "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=",
-              "dev": true,
-              "requires": {
-                "is-buffer": "^1.1.5"
-              }
-            }
-          }
-        },
-        "isobject": {
-          "version": "3.0.1",
-          "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-          "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
-          "dev": true
-        },
-        "json-schema-traverse": {
-          "version": "0.4.1",
-          "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
-          "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==",
-          "dev": true
-        },
-        "kind-of": {
-          "version": "6.0.2",
-          "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz",
-          "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==",
+        "acorn": {
+          "version": "6.0.5",
+          "resolved": "https://registry.npmjs.org/acorn/-/acorn-6.0.5.tgz",
+          "integrity": "sha512-i33Zgp3XWtmZBMNvCr4azvOFeWVw1Rk6p3hfi3LUDvIFraOMywb1kAtrbi+med14m4Xfpqm3zRZMT+c0FNE7kg==",
           "dev": true
         },
-        "micromatch": {
-          "version": "3.1.10",
-          "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz",
-          "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==",
-          "dev": true,
-          "requires": {
-            "arr-diff": "^4.0.0",
-            "array-unique": "^0.3.2",
-            "braces": "^2.3.1",
-            "define-property": "^2.0.2",
-            "extend-shallow": "^3.0.2",
-            "extglob": "^2.0.4",
-            "fragment-cache": "^0.2.1",
-            "kind-of": "^6.0.2",
-            "nanomatch": "^1.2.9",
-            "object.pick": "^1.3.0",
-            "regex-not": "^1.0.0",
-            "snapdragon": "^0.8.1",
-            "to-regex": "^3.0.2"
-          }
-        },
         "schema-utils": {
           "version": "0.4.7",
           "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-0.4.7.tgz",
@@ -15825,9 +13121,9 @@
       }
     },
     "whatwg-mimetype": {
-      "version": "2.2.0",
-      "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-2.2.0.tgz",
-      "integrity": "sha512-5YSO1nMd5D1hY3WzAQV3PzZL83W3YeyR1yW9PcH26Weh1t+Vzh9B6XkDh7aXm83HBZ4nSMvkjvN2H2ySWIvBgw==",
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-2.3.0.tgz",
+      "integrity": "sha512-M4yMwr6mAnQz76TbJm914+gPpB/nCwvZbJU28cUD6dR004SAxDLOOSUaB1JDRqLtaOV/vi0IC5lEAGFgrjGv/g==",
       "dev": true
     },
     "whatwg-url": {
@@ -15851,9 +13147,9 @@
       }
     },
     "which-module": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/which-module/-/which-module-2.0.0.tgz",
-      "integrity": "sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho=",
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/which-module/-/which-module-1.0.0.tgz",
+      "integrity": "sha1-u6Y8qGGUiZT/MHc2CJ47lgJsKk8=",
       "dev": true
     },
     "wide-align": {
@@ -15891,7 +13187,7 @@
     },
     "wrap-ansi": {
       "version": "2.1.0",
-      "resolved": "http://registry.npmjs.org/wrap-ansi/-/wrap-ansi-2.1.0.tgz",
+      "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-2.1.0.tgz",
       "integrity": "sha1-2Pw9KE3QV5T+hJc8rs3Rz4JP3YU=",
       "dev": true,
       "requires": {
@@ -15906,9 +13202,9 @@
       "dev": true
     },
     "write-file-atomic": {
-      "version": "2.3.0",
-      "resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-2.3.0.tgz",
-      "integrity": "sha512-xuPeK4OdjWqtfi59ylvVL0Yn35SF3zgcAcv7rBPFHVaEapaDr4GdGgm3j7ckTwH9wHL7fGmgfAnb0+THrHb8tA==",
+      "version": "2.4.2",
+      "resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-2.4.2.tgz",
+      "integrity": "sha512-s0b6vB3xIVRLWywa6X9TOMA7k9zio0TMOsl9ZnDkliA/cfJlpHXAscj0gbHVJiTdIuAYpIyqS5GW91fqm6gG5g==",
       "dev": true,
       "requires": {
         "graceful-fs": "^4.1.11",
@@ -15984,15 +13280,6 @@
           "resolved": "https://registry.npmjs.org/pify/-/pify-2.3.0.tgz",
           "integrity": "sha1-7RQaasBDqEnqWISY59yosVMw6Qw=",
           "dev": true
-        },
-        "user-home": {
-          "version": "2.0.0",
-          "resolved": "https://registry.npmjs.org/user-home/-/user-home-2.0.0.tgz",
-          "integrity": "sha1-nHC/2Babwdy/SGBODwS4tJzenp8=",
-          "dev": true,
-          "requires": {
-            "os-homedir": "^1.0.0"
-          }
         }
       }
     },
@@ -16014,16 +13301,10 @@
     },
     "xmlbuilder": {
       "version": "9.0.7",
-      "resolved": "http://registry.npmjs.org/xmlbuilder/-/xmlbuilder-9.0.7.tgz",
+      "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-9.0.7.tgz",
       "integrity": "sha1-Ey7mPS7FVlxVfiD0wi35rKaGsQ0=",
       "dev": true
     },
-    "xregexp": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/xregexp/-/xregexp-4.0.0.tgz",
-      "integrity": "sha512-PHyM+sQouu7xspQQwELlGwwd05mXUFqwFYfqPO0cC7x4fxyHnnuetmQr6CjJiafIDoH4MogHb9dOoJzR/Y4rFg==",
-      "dev": true
-    },
     "xtend": {
       "version": "4.0.1",
       "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.1.tgz",
@@ -16061,52 +13342,21 @@
         "which-module": "^1.0.0",
         "y18n": "^3.2.1",
         "yargs-parser": "^5.0.0"
-      },
-      "dependencies": {
-        "camelcase": {
-          "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-3.0.0.tgz",
-          "integrity": "sha1-MvxLn82vhF/N9+c7uXysImHwqwo=",
-          "dev": true
-        },
-        "which-module": {
-          "version": "1.0.0",
-          "resolved": "https://registry.npmjs.org/which-module/-/which-module-1.0.0.tgz",
-          "integrity": "sha1-u6Y8qGGUiZT/MHc2CJ47lgJsKk8=",
-          "dev": true
-        },
-        "yargs-parser": {
-          "version": "5.0.0",
-          "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-5.0.0.tgz",
-          "integrity": "sha1-J17PDX/+Bcd+ZOfIbkzZS/DhIoo=",
-          "dev": true,
-          "requires": {
-            "camelcase": "^3.0.0"
-          }
-        }
       }
     },
     "yargs-parser": {
-      "version": "9.0.2",
-      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-9.0.2.tgz",
-      "integrity": "sha1-nM9qQ0YP5O1Aqbto9I1DuKaMwHc=",
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-5.0.0.tgz",
+      "integrity": "sha1-J17PDX/+Bcd+ZOfIbkzZS/DhIoo=",
       "dev": true,
       "requires": {
-        "camelcase": "^4.1.0"
-      },
-      "dependencies": {
-        "camelcase": {
-          "version": "4.1.0",
-          "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-4.1.0.tgz",
-          "integrity": "sha1-1UVjW+HjPFQmScaRc+Xeas+uNN0=",
-          "dev": true
-        }
+        "camelcase": "^3.0.0"
       }
     },
     "yn": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/yn/-/yn-2.0.0.tgz",
-      "integrity": "sha1-5a2ryKz0CPY4X8dklWhMiOavaJo=",
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/yn/-/yn-3.0.0.tgz",
+      "integrity": "sha512-+Wo/p5VRfxUgBUGy2j/6KX2mj9AYJWOHuhMjMcbBFc3y54o9/4buK1ksBvuiK01C3kby8DH9lSmJdSxw+4G/2Q==",
       "dev": true
     }
   }
diff --git a/js/package.json b/js/package.json
index b3f3f162ad0f1..22a1df9032aa6 100644
--- a/js/package.json
+++ b/js/package.json
@@ -8,10 +8,10 @@
   },
   "scripts": {
     "lerna": "lerna",
-    "test": "gulp test",
-    "build": "gulp build",
-    "clean": "gulp clean",
-    "debug": "gulp debug",
+    "test": "NODE_NO_WARNINGS=1 gulp test",
+    "build": "NODE_NO_WARNINGS=1 gulp build",
+    "clean": "NODE_NO_WARNINGS=1 gulp clean",
+    "debug": "NODE_NO_WARNINGS=1 gulp debug",
     "perf": "node ./perf/index.js",
     "test:integration": "node ./bin/integration.js --mode validate",
     "create:perfdata": "python ./test/data/tables/generate.py ./test/data/tables/tracks.arrow",
@@ -19,11 +19,14 @@
     "clean:all": "run-p clean clean:testdata",
     "clean:testdata": "gulp clean:testdata",
     "create:testdata": "gulp create:testdata",
-    "test:coverage": "gulp test -t ts --coverage",
-    "doc": "shx rm -rf ./doc && typedoc --mode file --out doc src/Arrow.ts",
-    "lint": "run-p lint:*",
+    "test:coverage": "gulp test -t src --coverage",
+    "doc": "shx rm -rf ./doc && typedoc --options typedoc.js",
+    "lint": "run-p lint:src lint:test",
+    "lint:ci": "run-p lint:src:ci lint:test:ci",
     "lint:src": "tslint --fix --project -p tsconfig.json -c tslint.json \"src/**/*.ts\"",
     "lint:test": "tslint --fix --project -p test/tsconfig.json -c tslint.json \"test/**/*.ts\"",
+    "lint:src:ci": "tslint --project -p tsconfig.json -c tslint.json \"src/**/*.ts\"",
+    "lint:test:ci": "tslint --project -p test/tsconfig.json -c tslint.json \"test/**/*.ts\"",
     "prepublishOnly": "echo \"Error: do 'npm run release' instead of 'npm publish'\" && exit 1",
     "version": "npm install && npm run clean:all"
   },
@@ -50,99 +53,65 @@
     "tsconfig",
     "README.md",
     "gulpfile.js",
-    "npm-release.sh"
+    "npm-release.sh",
+    "jest.config.js",
+    "jest.coverage.config.js"
   ],
   "dependencies": {
-    "@types/flatbuffers": "1.9.0",
-    "@types/node": "10.12.0",
-    "@types/text-encoding-utf-8": "1.0.1",
+    "@types/flatbuffers": "^1.9.0",
+    "@types/node": "^10.12.18",
+    "@types/text-encoding-utf-8": "^1.0.1",
     "command-line-args": "5.0.2",
     "command-line-usage": "5.0.5",
-    "flatbuffers": "1.10.2",
+    "flatbuffers": "^1.10.2",
     "json-bignum": "0.0.3",
+    "pad-left": "2.1.0",
     "text-encoding-utf-8": "1.0.2",
-    "tslib": "1.9.3"
+    "tslib": "^1.9.3"
   },
   "devDependencies": {
-    "@std/esm": "0.26.0",
+    "@mattiasbuelens/web-streams-polyfill": "0.2.1",
     "@types/glob": "7.1.1",
-    "@types/jest": "23.3.5",
+    "@types/jest": "23.3.13",
+    "async-done": "1.3.1",
     "benchmark": "2.1.4",
     "coveralls": "3.0.2",
     "del": "3.0.0",
+    "esm": "3.1.4",
     "glob": "7.1.3",
-    "google-closure-compiler": "20181008.0.0",
-    "gulp": "next",
-    "gulp-json-transform": "0.4.5",
+    "google-closure-compiler": "20190121.0.0",
+    "gulp": "4.0.0",
+    "gulp-json-transform": "0.4.6",
     "gulp-rename": "1.4.0",
     "gulp-sourcemaps": "2.6.4",
-    "gulp-typescript": "5.0.0-alpha.3",
-    "ix": "2.3.5",
+    "gulp-typescript": "5.0.0",
+    "ix": "2.5.1",
     "jest": "23.6.0",
     "jest-environment-node-debug": "2.0.0",
+    "jest-silent-reporter": "0.1.1",
     "json": "9.0.6",
-    "lerna": "3.4.3",
-    "lint-staged": "7.3.0",
-    "merge2": "1.2.3",
+    "lerna": "3.10.7",
+    "memfs": "2.15.0",
     "mkdirp": "0.5.1",
-    "npm-run-all": "4.1.3",
-    "pump": "3.0.0",
-    "rimraf": "2.6.2",
+    "multistream": "2.1.1",
+    "npm-run-all": "4.1.5",
+    "randomatic": "3.1.1",
+    "rimraf": "2.6.3",
     "rxjs": "5.5.11",
     "shx": "0.3.2",
     "source-map-loader": "0.2.4",
-    "terser-webpack-plugin": "1.1.0",
+    "terser-webpack-plugin": "1.2.1",
     "trash": "4.3.0",
-    "ts-jest": "22.4.6",
-    "ts-node": "7.0.1",
-    "tslint": "5.11.0",
-    "typedoc": "0.12",
-    "typescript": "3.0.3",
-    "webpack": "4.23.1",
+    "ts-jest": "23.10.5",
+    "ts-node": "8.0.2",
+    "tslint": "5.12.1",
+    "typedoc": "0.14.2",
+    "typescript": "3.2.4",
+    "web-stream-tools": "0.0.1",
+    "webpack": "4.29.0",
     "xml2js": "0.4.19"
   },
-  "@std/esm": {
-    "warnings": false
-  },
-  "lint-staged": {
-    "*.@(ts)": [
-      "tslint --fix",
-      "git add"
-    ]
-  },
-  "jest": {
-    "verbose": false,
-    "testEnvironment": "node",
-    "globals": {
-      "ts-jest": {
-        "skipBabel": true,
-        "tsConfigFile": "test/tsconfig.json"
-      }
-    },
-    "roots": [
-      "<rootDir>/test/"
-    ],
-    "moduleFileExtensions": [
-      "js",
-      "ts",
-      "tsx"
-    ],
-    "coverageReporters": [
-      "lcov"
-    ],
-    "coveragePathIgnorePatterns": [
-      "fb\\/(File|Message|Schema|Tensor)_generated\\.(js|ts)$",
-      "test\\/.*\\.(ts|tsx|js)$",
-      "/node_modules/"
-    ],
-    "transform": {
-      ".(ts|tsx)": "./node_modules/ts-jest/preprocessor.js",
-      ".(js|jsx)": "./node_modules/babel-jest/build/index.js"
-    },
-    "transformIgnorePatterns": [
-      "/node_modules/",
-      "/(es2015|esnext)/umd/"
-    ],
-    "testRegex": "(.*(-|\\.)(test|spec)s?)\\.(ts|tsx|js)$"
+  "engines": {
+    "node": ">=11.0"
   }
 }
diff --git a/js/perf/index.js b/js/perf/index.js
index 2c07591925328..0e9c2bd689aae 100644
--- a/js/perf/index.js
+++ b/js/perf/index.js
@@ -16,10 +16,10 @@
 // under the License.
 
 // Use the ES5 UMD target as perf baseline
-// const { predicate, Table, read: readBatches } = require('../targets/es5/umd');
-// const { predicate, Table, read: readBatches } = require('../targets/es5/cjs');
-// const { predicate, Table, read: readBatches } = require('../targets/es2015/umd');
-const { predicate, Table, read: readBatches } = require('../targets/es2015/cjs');
+// const { predicate, Table, RecordBatchReader } = require('../targets/es5/umd');
+// const { predicate, Table, RecordBatchReader } = require('../targets/es5/cjs');
+// const { predicate, Table, RecordBatchReader } = require('../targets/es2015/umd');
+const { predicate, Table, RecordBatchReader } = require('../targets/es2015/cjs');
 const { col } = predicate;
 
 const Benchmark = require('benchmark');
@@ -91,7 +91,7 @@ function createReadBatchesTest(name, buffers) {
     return {
         async: true,
         name: `readBatches\n`,
-        fn() { for (recordBatch of readBatches(buffers)) {} }
+        fn() { for (recordBatch of RecordBatchReader.from(buffers)) {} }
     };
 }
 
@@ -139,34 +139,36 @@ function createDataFrameDirectCountTest(table, column, test, value) {
     let sum, colidx = table.schema.fields.findIndex((c)=>c.name === column);
 
     if (test == 'gt') {
-        op = function () {
+        op = () => {
             sum = 0;
-            let batches = table.batches;
+            let batches = table.chunks;
             let numBatches = batches.length;
             for (let batchIndex = -1; ++batchIndex < numBatches;) {
                 // load batches
                 const batch = batches[batchIndex];
                 const vector = batch.getChildAt(colidx);
                 // yield all indices
-                for (let index = -1; ++index < batch.length;) {
+                for (let index = -1, length = batch.length; ++index < length;) {
                     sum += (vector.get(index) >= value);
                 }
             }
+            return sum;
         }
     } else if (test == 'eq') {
-        op = function() {
+        op = () => {
             sum = 0;
-            let batches = table.batches;
+            let batches = table.chunks;
             let numBatches = batches.length;
             for (let batchIndex = -1; ++batchIndex < numBatches;) {
                 // load batches
                 const batch = batches[batchIndex];
                 const vector = batch.getChildAt(colidx);
                 // yield all indices
-                for (let index = -1; ++index < batch.length;) {
+                for (let index = -1, length = batch.length; ++index < length;) {
                     sum += (vector.get(index) === value);
                 }
             }
+            return sum;
         }
     } else {
         throw new Error(`Unrecognized test "${test}"`);
diff --git a/js/src/Arrow.dom.ts b/js/src/Arrow.dom.ts
new file mode 100644
index 0000000000000..f9178df91e782
--- /dev/null
+++ b/js/src/Arrow.dom.ts
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import streamAdapters from './io/adapters';
+import { RecordBatchReader } from './ipc/reader';
+import { RecordBatchWriter } from './ipc/writer';
+import { toDOMStream } from './ipc/whatwg/iterable';
+import { recordBatchReaderThroughDOMStream } from './ipc/whatwg/reader';
+import { recordBatchWriterThroughDOMStream } from './ipc/whatwg/writer';
+
+streamAdapters.toDOMStream = toDOMStream;
+RecordBatchReader['throughDOM'] = recordBatchReaderThroughDOMStream;
+RecordBatchWriter['throughDOM'] = recordBatchWriterThroughDOMStream;
+
+export {
+    ArrowType, DateUnit, IntervalUnit, MessageHeader, MetadataVersion, Precision, TimeUnit, Type, UnionMode, VectorType,
+    Data,
+    DataType,
+    Null,
+    Bool,
+    Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
+    Float, Float16, Float32, Float64,
+    Utf8,
+    Binary,
+    FixedSizeBinary,
+    Date_, DateDay, DateMillisecond,
+    Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+    Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+    Decimal,
+    List,
+    Struct,
+    Union, DenseUnion, SparseUnion,
+    Dictionary,
+    Interval, IntervalDayTime, IntervalYearMonth,
+    FixedSizeList,
+    Map_,
+    Table,
+    Column,
+    Schema, Field,
+    Visitor,
+    Vector,
+    BaseVector,
+    BinaryVector,
+    BoolVector,
+    Chunked,
+    DateVector, DateDayVector, DateMillisecondVector,
+    DecimalVector,
+    DictionaryVector,
+    FixedSizeBinaryVector,
+    FixedSizeListVector,
+    FloatVector, Float16Vector, Float32Vector, Float64Vector,
+    IntervalVector, IntervalDayTimeVector, IntervalYearMonthVector,
+    IntVector, Int8Vector, Int16Vector, Int32Vector, Int64Vector, Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector,
+    ListVector,
+    MapVector,
+    NullVector,
+    StructVector,
+    TimestampVector, TimestampSecondVector, TimestampMillisecondVector, TimestampMicrosecondVector, TimestampNanosecondVector,
+    TimeVector, TimeSecondVector, TimeMillisecondVector, TimeMicrosecondVector, TimeNanosecondVector,
+    UnionVector, DenseUnionVector, SparseUnionVector,
+    Utf8Vector,
+    ByteStream, AsyncByteStream, AsyncByteQueue, ReadableSource, WritableSink,
+    RecordBatchReader, RecordBatchFileReader, RecordBatchStreamReader, AsyncRecordBatchFileReader, AsyncRecordBatchStreamReader,
+    RecordBatchWriter, RecordBatchFileWriter, RecordBatchStreamWriter, RecordBatchJSONWriter,
+    MessageReader, AsyncMessageReader, JSONMessageReader,
+    Message,
+    RecordBatch,
+    ArrowJSONLike, FileHandle, Readable, Writable, ReadableWritable, ReadableDOMStreamOptions,
+    DataFrame, FilteredDataFrame, CountByResult, BindFunc, NextFunc,
+    predicate,
+    util
+} from './Arrow';
diff --git a/js/src/Arrow.externs.js b/js/src/Arrow.externs.js
deleted file mode 100644
index f01ea5cedc406..0000000000000
--- a/js/src/Arrow.externs.js
+++ /dev/null
@@ -1,812 +0,0 @@
-// @ts-nocheck
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-/* tslint:disable */
-
-/**
- * @fileoverview Closure Compiler externs for Arrow
- * @externs
- * @suppress {duplicate,checkTypes}
- */
-/** @type {symbol} */
-Symbol.iterator;
-/** @type {symbol} */
-Symbol.asyncIterator;
-
-var Table = function() {};
-/** @type {?} */
-Table.from = function() {};
-/** @type {?} */
-Table.fromAsync = function() {};
-/** @type {?} */
-Table.fromStruct = function() {};
-/** @type {?} */
-Table.empty = function() {};
-/** @type {?} */
-Table.prototype.schema;
-/** @type {?} */
-Table.prototype.length;
-/** @type {?} */
-Table.prototype.numCols;
-/** @type {?} */
-Table.prototype.get;
-/** @type {?} */
-Table.prototype.getColumn;
-/** @type {?} */
-Table.prototype.getColumnAt;
-/** @type {?} */
-Table.prototype.getColumnIndex;
-/** @type {?} */
-Table.prototype.toArray;
-/** @type {?} */
-Table.prototype.select;
-/** @type {?} */
-Table.prototype.rowsToString;
-/** @type {?} */
-Table.prototype.batchesUnion;
-/** @type {?} */
-Table.prototype.batches;
-/** @type {?} */
-Table.prototype.countBy;
-/** @type {?} */
-Table.prototype.scan;
-/** @type {?} */
-Table.prototype.serialize;
-
-var CountByResult = function() {};
-/** @type {?} */
-CountByResult.prototype.asJSON;
-
-var col = function () {};
-var lit = function () {};
-var and = function () {};
-var or = function () {};
-var custom = function () {};
-
-var Value = function() {};
-/** @type {?} */
-Value.prototype.ge;
-/** @type {?} */
-Value.prototype.le;
-/** @type {?} */
-Value.prototype.eq;
-/** @type {?} */
-Value.prototype.lt;
-/** @type {?} */
-Value.prototype.gt;
-/** @type {?} */
-Value.prototype.ne;
-
-var Col = function() {};
-/** @type {?} */
-Col.prototype.bind;
-var CombinationPredicate = function () {};
-/** @type {?} */
-CombinationPredicate.prototype.children;
-var Or = function() {};
-var And = function() {};
-var Not = function() {};
-var GTeq = function () {};
-/** @type {?} */
-GTeq.prototype.and;
-/** @type {?} */
-GTeq.prototype.or;
-var LTeq = function () {};
-/** @type {?} */
-LTeq.prototype.and;
-/** @type {?} */
-LTeq.prototype.or;
-var Equals = function () {};
-/** @type {?} */
-Equals.prototype.and;
-/** @type {?} */
-Equals.prototype.or;
-var Predicate = function() {};
-/** @type {?} */
-Predicate.prototype.bind;
-/** @type {?} */
-Predicate.prototype.and;
-/** @type {?} */
-Predicate.prototype.or;
-/** @type {?} */
-Predicate.prototype.not;
-/** @type {?} */
-Predicate.prototype.ands;
-var Literal = function() {};
-
-var PipeIterator = function() {};
-/** @type {?} */
-PipeIterator.prototype.pipe;
-
-var AsyncPipeIterator = function() {};
-/** @type {?} */
-AsyncPipeIterator.prototype.pipe;
-
-var RecordBatch = function() {};
-/** @type {?} */
-RecordBatch.from = function() {};
-/** @type {?} */
-RecordBatch.prototype.numCols;
-/** @type {?} */
-RecordBatch.prototype.length;
-/** @type {?} */
-RecordBatch.prototype.schema;
-/** @type {?} */
-RecordBatch.prototype.columns;
-/** @type {?} */
-RecordBatch.prototype.select;
-
-var Vector = function() {};
-/** @type {?} */
-Vector.create = function() {};
-/** @type {?} */
-Vector.prototype.data;
-/** @type {?} */
-Vector.prototype.type;
-/** @type {?} */
-Vector.prototype.length;
-/** @type {?} */
-Vector.prototype.nullCount;
-/** @type {?} */
-Vector.prototype.nullBitmap;
-/** @type {?} */
-Vector.prototype.isValid;
-/** @type {?} */
-Vector.prototype.get;
-/** @type {?} */
-Vector.prototype.set;
-/** @type {?} */
-Vector.prototype.toArray;
-/** @type {?} */
-Vector.prototype.concat;
-/** @type {?} */
-Vector.prototype.slice;
-/** @type {?} */
-Vector.prototype.acceptTypeVisitor;
-
-var BaseInt64 = function() {};
-/** @type {?} */
-BaseInt64.prototype.lessThan;
-/** @type {?} */
-BaseInt64.prototype.equals;
-/** @type {?} */
-BaseInt64.prototype.greaterThan;
-/** @type {?} */
-BaseInt64.prototype.hex;
-
-var Uint64 = function() {};
-/** @type {?} */
-Uint64.add = function() {};
-/** @type {?} */
-Uint64.multiply = function() {};
-/** @type {?} */
-Uint64.from = function() {};
-/** @type {?} */
-Uint64.fromNumber = function() {};
-/** @type {?} */
-Uint64.fromString = function() {};
-/** @type {?} */
-Uint64.prototype.times;
-/** @type {?} */
-Uint64.prototype.plus
-
-var Int64 = function() {};
-/** @type {?} */
-Int64.add = function() {};
-/** @type {?} */
-Int64.multiply = function() {};
-/** @type {?} */
-Int64.from = function() {};
-/** @type {?} */
-Int64.fromNumber = function() {};
-/** @type {?} */
-Int64.fromString = function() {};
-/** @type {?} */
-Int64.prototype.negate
-/** @type {?} */
-Int64.prototype.times
-/** @type {?} */
-Int64.prototype.plus
-/** @type {?} */
-Int64.prototype.lessThan
-
-var Int128 = function() {};
-/** @type {?} */
-Int128.add = function() {};
-/** @type {?} */
-Int128.multiply = function() {};
-/** @type {?} */
-Int128.from = function() {};
-/** @type {?} */
-Int128.fromNumber = function() {};
-/** @type {?} */
-Int128.fromString = function() {};
-/** @type {?} */
-Int128.prototype.negate
-/** @type {?} */
-Int128.prototype.times
-/** @type {?} */
-Int128.prototype.plus
-/** @type {?} */
-Int128.prototype.hex
-
-var packBools = function() {};
-
-var Type = function() {};
-/** @type {?} */
-Type.NONE = function() {};
-/** @type {?} */
-Type.Null = function() {};
-/** @type {?} */
-Type.Int = function() {};
-/** @type {?} */
-Type.Float = function() {};
-/** @type {?} */
-Type.FloatingPoint = function() {};
-/** @type {?} */
-Type.Binary = function() {};
-/** @type {?} */
-Type.Utf8 = function() {};
-/** @type {?} */
-Type.Bool = function() {};
-/** @type {?} */
-Type.Decimal = function() {};
-/** @type {?} */
-Type.Date = function() {};
-/** @type {?} */
-Type.Time = function() {};
-/** @type {?} */
-Type.Timestamp = function() {};
-/** @type {?} */
-Type.Interval = function() {};
-/** @type {?} */
-Type.List = function() {};
-/** @type {?} */
-Type.Struct = function() {};
-/** @type {?} */
-Type.Struct_ = function() {};
-/** @type {?} */
-Type.Union = function() {};
-/** @type {?} */
-Type.FixedSizeBinary = function() {};
-/** @type {?} */
-Type.FixedSizeList = function() {};
-/** @type {?} */
-Type.Map = function() {};
-/** @type {?} */
-Type.Dictionary = function() {};
-/** @type {?} */
-Type.DenseUnion = function() {};
-/** @type {?} */
-Type.SparseUnion = function() {};
-
-var DateUnit = function() {};
-/** @type {?} */
-DateUnit.DAY = function() {};
-/** @type {?} */
-DateUnit.MILLISECOND = function() {};
-var TimeUnit = function() {};
-/** @type {?} */
-TimeUnit.SECOND = function() {};
-/** @type {?} */
-TimeUnit.MILLISECOND = function() {};
-/** @type {?} */
-TimeUnit.MICROSECOND = function() {};
-/** @type {?} */
-TimeUnit.NANOSECOND = function() {};
-var Precision = function() {};
-/** @type {?} */
-Precision.HALF = function() {};
-/** @type {?} */
-Precision.SINGLE = function() {};
-/** @type {?} */
-Precision.DOUBLE = function() {};
-var UnionMode = function() {};
-/** @type {?} */
-UnionMode.Sparse = function() {};
-/** @type {?} */
-UnionMode.Dense = function() {};
-var VectorType = function() {};
-/** @type {?} */
-VectorType.OFFSET = function() {};
-/** @type {?} */
-VectorType.DATA = function() {};
-/** @type {?} */
-VectorType.VALIDITY = function() {};
-/** @type {?} */
-VectorType.TYPE = function() {};
-var IntervalUnit = function() {};
-/** @type {?} */
-IntervalUnit.YEAR_MONTH = function() {};
-/** @type {?} */
-IntervalUnit.DAY_TIME = function() {};
-var MessageHeader = function() {};
-/** @type {?} */
-MessageHeader.NONE = function() {};
-/** @type {?} */
-MessageHeader.Schema = function() {};
-/** @type {?} */
-MessageHeader.DictionaryBatch = function() {};
-/** @type {?} */
-MessageHeader.RecordBatch = function() {};
-/** @type {?} */
-MessageHeader.Tensor = function() {};
-var MetadataVersion = function() {};
-/** @type {?} */
-MetadataVersion.V1 = function() {};
-/** @type {?} */
-MetadataVersion.V2 = function() {};
-/** @type {?} */
-MetadataVersion.V3 = function() {};
-/** @type {?} */
-MetadataVersion.V4 = function() {};
-
-var DataType = function() {};
-/** @type {?} */
-DataType.isNull = function() {};
-/** @type {?} */
-DataType.isInt = function() {};
-/** @type {?} */
-DataType.isFloat = function() {};
-/** @type {?} */
-DataType.isBinary = function() {};
-/** @type {?} */
-DataType.isUtf8 = function() {};
-/** @type {?} */
-DataType.isBool = function() {};
-/** @type {?} */
-DataType.isDecimal = function() {};
-/** @type {?} */
-DataType.isDate = function() {};
-/** @type {?} */
-DataType.isTime = function() {};
-/** @type {?} */
-DataType.isTimestamp = function() {};
-/** @type {?} */
-DataType.isInterval = function() {};
-/** @type {?} */
-DataType.isList = function() {};
-/** @type {?} */
-DataType.isStruct = function() {};
-/** @type {?} */
-DataType.isUnion = function() {};
-/** @type {?} */
-DataType.isDenseUnion = function() {};
-/** @type {?} */
-DataType.isSparseUnion = function() {};
-/** @type {?} */
-DataType.isFixedSizeBinary = function() {};
-/** @type {?} */
-DataType.isFixedSizeList = function() {};
-/** @type {?} */
-DataType.isMap = function() {};
-/** @type {?} */
-DataType.isDictionary = function() {};
-/** @type {?} */
-DataType.prototype.ArrayType;
-
-var Schema = function() {};
-/** @type {?} */
-Schema.from = function() {};
-/** @type {?} */
-Schema.prototype.fields;
-/** @type {?} */
-Schema.prototype.version;
-/** @type {?} */
-Schema.prototype.metadata;
-/** @type {?} */
-Schema.prototype.dictionaries;
-/** @type {?} */
-Schema.prototype.select;
-var Field = function() {};
-/** @type {?} */
-Field.prototype.name;
-/** @type {?} */
-Field.prototype.type;
-/** @type {?} */
-Field.prototype.nullable;
-/** @type {?} */
-Field.prototype.metadata;
-var Null = function() {};
-var Int8 = function() {};
-var Int16 = function() {};
-var Int32 = function() {};
-var Int64 = function() {};
-var Uint8 = function() {};
-var Uint16 = function() {};
-var Uint32 = function() {};
-var Uint64 = function() {};
-var Float16 = function() {};
-var Float32 = function() {};
-var Float64 = function() {};
-var Binary = function() {};
-var Utf8 = function() {};
-var Bool = function() {};
-var Decimal = function() {};
-var Date_ = function() {};
-var Time = function() {};
-var Timestamp = function() {};
-var Interval = function() {};
-var List = function() {};
-var Struct = function() {};
-var Union = function() {};
-var DenseUnion = function() {};
-var SparseUnion = function() {};
-var FixedSizeBinary = function() {};
-var FixedSizeList = function() {};
-var Map_ = function() {};
-var Dictionary = function() {};
-
-var BaseData = function() {};
-/** @type {?} */
-BaseData.prototype.type;
-/** @type {?} */
-BaseData.prototype.clone;
-/** @type {?} */
-BaseData.prototype.slice;
-/** @type {?} */
-BaseData.prototype.length;
-/** @type {?} */
-BaseData.prototype.offset;
-/** @type {?} */
-BaseData.prototype.typeId;
-/** @type {?} */
-BaseData.prototype.childData;
-/** @type {?} */
-BaseData.prototype.nullBitmap;
-/** @type {?} */
-BaseData.prototype.nullCount;
-
-var BoolData = function() {};
-var NestedData = function() {};
-var SparseUnionData = function() {};
-var ChunkedData = function() {};
-
-var FlatData = function() {};
-/** @type {?} */
-FlatData.prototype.values;
-
-var FlatListData = function() {};
-/** @type {?} */
-FlatListData.prototype.values;
-/** @type {?} */
-FlatListData.prototype.valueOffsets;
-
-var DictionaryData = function() {};
-/** @type {?} */
-DictionaryData.prototype.indices;
-/** @type {?} */
-DictionaryData.prototype.dictionary;
-
-var ListData = function() {};
-/** @type {?} */
-ListData.prototype.values;
-/** @type {?} */
-ListData.prototype.valueOffsets;
-
-var UnionData = function() {};
-/** @type {?} */
-UnionData.prototype.typeIds;
-
-var DenseUnionData = function() {};
-/** @type {?} */
-DenseUnionData.prototype.valueOffsets;
-
-var ChunkedData = function() {};
-/** @type {?} */
-ChunkedData.computeOffsets = function() {};
-
-var FlatVector = function() {};
-/** @type {?} */
-FlatVector.prototype.values;
-/** @type {?} */
-FlatVector.prototype.lows;
-/** @type {?} */
-FlatVector.prototype.highs;
-/** @type {?} */
-FlatVector.prototype.asInt32;
-
-var ListVectorBase = function() {};
-/** @type {?} */
-ListVectorBase.prototype.values;
-/** @type {?} */
-ListVectorBase.prototype.valueOffsets;
-/** @type {?} */
-ListVectorBase.prototype.getValueOffset;
-/** @type {?} */
-ListVectorBase.prototype.getValueLength;
-
-var NestedVector = function() {};
-/** @type {?} */
-NestedVector.prototype.childData;
-/** @type {?} */
-NestedVector.prototype.getChildAt;
-
-var NullVector = function() {};
-var BoolVector = function() {};
-/** @type {?} */
-BoolVector.from = function() {};
-/** @type {?} */
-BoolVector.prototype.values;
-var IntVector = function() {};
-/** @type {?} */
-IntVector.from = function() {};
-
-var FloatVector = function() {};
-/** @type {?} */
-FloatVector.from = function() {};
-
-var DateVector = function() {};
-/** @type {?} */
-DateVector.from = function() {};
-/** @type {?} */
-DateVector.prototype.asEpochMilliseconds;
-var DecimalVector = function() {};
-var TimeVector = function() {};
-var TimestampVector = function() {};
-/** @type {?} */
-TimestampVector.prototype.asEpochMilliseconds;
-var IntervalVector = function() {};
-var BinaryVector = function() {};
-/** @type {?} */
-BinaryVector.prototype.asUtf8;
-var FixedSizeBinaryVector = function() {};
-var Utf8Vector = function() {};
-/** @type {?} */
-Utf8Vector.prototype.asBinary;
-var ListVector = function() {};
-/** @type {?} */
-ListVector.prototype.getChildAt;
-var FixedSizeListVector = function() {};
-/** @type {?} */
-FixedSizeListVector.prototype.getChildAt;
-var MapVector = function() {};
-/** @type {?} */
-MapVector.prototype.asStruct;
-var StructVector = function() {};
-/** @type {?} */
-StructVector.prototype.asMap;
-var UnionVector = function() {};
-
-var DictionaryVector = function() {};
-/** @type {?} */
-DictionaryVector.prototype.indices;
-/** @type {?} */
-DictionaryVector.prototype.dictionary;
-/** @type {?} */
-DictionaryVector.prototype.getKey;
-/** @type {?} */
-DictionaryVector.prototype.getValue;
-/** @type {?} */
-DictionaryVector.prototype.reverseLookup;
-
-var FlatView = function() {};
-/** @type {?} */
-FlatView.prototype.get;
-/** @type {?} */
-FlatView.prototype.clone;
-/** @type {?} */
-FlatView.prototype.isValid;
-/** @type {?} */
-FlatView.prototype.toArray;
-/** @type {?} */
-FlatView.prototype.set;
-
-var PrimitiveView = function() {};
-/** @type {?} */
-PrimitiveView.prototype.size;
-/** @type {?} */
-PrimitiveView.prototype.clone;
-
-var NullView = function() {};
-/** @type {?} */
-NullView.prototype.get;
-/** @type {?} */
-NullView.prototype.clone;
-/** @type {?} */
-NullView.prototype.isValid;
-/** @type {?} */
-NullView.prototype.toArray;
-/** @type {?} */
-NullView.prototype.set;
-
-var BoolView = function() {};
-/** @type {?} */
-BoolView.prototype.get;
-/** @type {?} */
-BoolView.prototype.clone;
-/** @type {?} */
-BoolView.prototype.isValid;
-/** @type {?} */
-BoolView.prototype.toArray;
-/** @type {?} */
-BoolView.prototype.set;
-
-var ValidityView = function() {};
-/** @type {?} */
-ValidityView.prototype.get;
-/** @type {?} */
-ValidityView.prototype.clone;
-/** @type {?} */
-ValidityView.prototype.isValid;
-/** @type {?} */
-ValidityView.prototype.toArray;
-/** @type {?} */
-ValidityView.prototype.set;
-/** @type {?} */
-ValidityView.prototype.size;
-/** @type {?} */
-ValidityView.prototype.getChildAt;
-
-var DictionaryView = function() {};
-/** @type {?} */
-DictionaryView.prototype.get;
-/** @type {?} */
-DictionaryView.prototype.clone;
-/** @type {?} */
-DictionaryView.prototype.isValid;
-/** @type {?} */
-DictionaryView.prototype.toArray;
-/** @type {?} */
-DictionaryView.prototype.set;
-
-var ListViewBase = function() {};
-/** @type {?} */
-ListViewBase.prototype.get;
-/** @type {?} */
-ListViewBase.prototype.clone;
-/** @type {?} */
-ListViewBase.prototype.isValid;
-/** @type {?} */
-ListViewBase.prototype.toArray;
-/** @type {?} */
-ListViewBase.prototype.set;
-
-var NestedView = function() {};
-/** @type {?} */
-NestedView.prototype.get;
-/** @type {?} */
-NestedView.prototype.clone;
-/** @type {?} */
-NestedView.prototype.isValid;
-/** @type {?} */
-NestedView.prototype.toArray;
-/** @type {?} */
-NestedView.prototype.set;
-
-var ChunkedView = function() {};
-/** @type {?} */
-ChunkedView.prototype.get;
-/** @type {?} */
-ChunkedView.prototype.clone;
-/** @type {?} */
-ChunkedView.prototype.isValid;
-/** @type {?} */
-ChunkedView.prototype.toArray;
-/** @type {?} */
-ChunkedView.prototype.set;
-
-var ListView = function() {};
-var FixedSizeListView = function() {};
-var BinaryView = function() {};
-var Utf8View = function() {};
-var UnionView = function() {};
-var DenseUnionView = function() {};
-var StructView = function() {};
-var MapView = function() {};
-var NullView = function() {};
-var FixedSizeView = function() {};
-var Float16View = function() {};
-var DateDayView = function() {};
-var DateMillisecondView = function() {};
-var TimestampDayView = function() {};
-var TimestampSecondView = function() {};
-var TimestampMillisecondView = function() {};
-var TimestampMicrosecondView = function() {};
-var TimestampNanosecondView = function() {};
-var IntervalYearMonthView = function() {};
-var IntervalYearView = function() {};
-var IntervalMonthView = function() {};
-
-var TypeVisitor = function() {};
-/** @type {?} */
-TypeVisitor.visitTypeInline = function() {};
-/** @type {?} */
-TypeVisitor.prototype.visit;
-/** @type {?} */
-TypeVisitor.prototype.visitMany;
-/** @type {?} */
-TypeVisitor.prototype.visitNull;
-/** @type {?} */
-TypeVisitor.prototype.visitBool;
-/** @type {?} */
-TypeVisitor.prototype.visitInt;
-/** @type {?} */
-TypeVisitor.prototype.visitFloat;
-/** @type {?} */
-TypeVisitor.prototype.visitUtf8;
-/** @type {?} */
-TypeVisitor.prototype.visitBinary;
-/** @type {?} */
-TypeVisitor.prototype.visitFixedSizeBinary;
-/** @type {?} */
-TypeVisitor.prototype.visitDate;
-/** @type {?} */
-TypeVisitor.prototype.visitTimestamp;
-/** @type {?} */
-TypeVisitor.prototype.visitTime;
-/** @type {?} */
-TypeVisitor.prototype.visitDecimal;
-/** @type {?} */
-TypeVisitor.prototype.visitList;
-/** @type {?} */
-TypeVisitor.prototype.visitStruct;
-/** @type {?} */
-TypeVisitor.prototype.visitUnion;
-/** @type {?} */
-TypeVisitor.prototype.visitDictionary;
-/** @type {?} */
-TypeVisitor.prototype.visitInterval;
-/** @type {?} */
-TypeVisitor.prototype.visitFixedSizeList;
-/** @type {?} */
-TypeVisitor.prototype.visitMap;
-
-var VectorVisitor = function() {};
-/** @type {?} */
-VectorVisitor.visitTypeInline = function() {};
-/** @type {?} */
-VectorVisitor.prototype.visit;
-/** @type {?} */
-VectorVisitor.prototype.visitMany;
-/** @type {?} */
-VectorVisitor.prototype.visitNull;
-/** @type {?} */
-VectorVisitor.prototype.visitBool;
-/** @type {?} */
-VectorVisitor.prototype.visitInt;
-/** @type {?} */
-VectorVisitor.prototype.visitFloat;
-/** @type {?} */
-VectorVisitor.prototype.visitUtf8;
-/** @type {?} */
-VectorVisitor.prototype.visitBinary;
-/** @type {?} */
-VectorVisitor.prototype.visitFixedSizeBinary;
-/** @type {?} */
-VectorVisitor.prototype.visitDate;
-/** @type {?} */
-VectorVisitor.prototype.visitTimestamp;
-/** @type {?} */
-VectorVisitor.prototype.visitTime;
-/** @type {?} */
-VectorVisitor.prototype.visitDecimal;
-/** @type {?} */
-VectorVisitor.prototype.visitList;
-/** @type {?} */
-VectorVisitor.prototype.visitStruct;
-/** @type {?} */
-VectorVisitor.prototype.visitUnion;
-/** @type {?} */
-VectorVisitor.prototype.visitDictionary;
-/** @type {?} */
-VectorVisitor.prototype.visitInterval;
-/** @type {?} */
-VectorVisitor.prototype.visitFixedSizeList;
-/** @type {?} */
-VectorVisitor.prototype.visitMap;
diff --git a/js/src/Arrow.node.ts b/js/src/Arrow.node.ts
new file mode 100644
index 0000000000000..da6e3df6d9b08
--- /dev/null
+++ b/js/src/Arrow.node.ts
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import streamAdapters from './io/adapters';
+import { RecordBatchReader } from './ipc/reader';
+import { RecordBatchWriter } from './ipc/writer';
+import { toNodeStream } from './ipc/node/iterable';
+import { recordBatchReaderThroughNodeStream } from './ipc/node/reader';
+import { recordBatchWriterThroughNodeStream } from './ipc/node/writer';
+
+streamAdapters.toNodeStream = toNodeStream;
+RecordBatchReader['throughNode'] = recordBatchReaderThroughNodeStream;
+RecordBatchWriter['throughNode'] = recordBatchWriterThroughNodeStream;
+
+export * from './Arrow.dom';
diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts
index b1f4a3a4fa9bd..691a8bb42b73c 100644
--- a/js/src/Arrow.ts
+++ b/js/src/Arrow.ts
@@ -15,305 +15,79 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import * as type_ from './type';
-import * as data_ from './data';
-import * as vector_ from './vector';
+export { ArrowType, DateUnit, IntervalUnit, MessageHeader, MetadataVersion, Precision, TimeUnit, Type, UnionMode, VectorType } from './enum';
+export { Data } from './data';
+export {
+    DataType,
+    Null,
+    Bool,
+    Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
+    Float, Float16, Float32, Float64,
+    Utf8,
+    Binary,
+    FixedSizeBinary,
+    Date_, DateDay, DateMillisecond,
+    Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+    Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+    Decimal,
+    List,
+    Struct,
+    Union, DenseUnion, SparseUnion,
+    Dictionary,
+    Interval, IntervalDayTime, IntervalYearMonth,
+    FixedSizeList,
+    Map_,
+} from './type';
+
+export { Table } from './table';
+export { Column } from './column';
+export { Schema, Field } from './schema';
+export { Visitor } from './visitor';
+export {
+    Row,
+    Vector,
+    BaseVector,
+    BinaryVector,
+    BoolVector,
+    Chunked,
+    DateVector, DateDayVector, DateMillisecondVector,
+    DecimalVector,
+    DictionaryVector,
+    FixedSizeBinaryVector,
+    FixedSizeListVector,
+    FloatVector, Float16Vector, Float32Vector, Float64Vector,
+    IntervalVector, IntervalDayTimeVector, IntervalYearMonthVector,
+    IntVector, Int8Vector, Int16Vector, Int32Vector, Int64Vector, Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector,
+    ListVector,
+    MapVector,
+    NullVector,
+    StructVector,
+    TimestampVector, TimestampSecondVector, TimestampMillisecondVector, TimestampMicrosecondVector, TimestampNanosecondVector,
+    TimeVector, TimeSecondVector, TimeMillisecondVector, TimeMicrosecondVector, TimeNanosecondVector,
+    UnionVector, DenseUnionVector, SparseUnionVector,
+    Utf8Vector,
+} from './vector/index';
+
+export { ByteStream, AsyncByteStream, AsyncByteQueue, ReadableSource, WritableSink } from './io/stream';
+export { RecordBatchReader, RecordBatchFileReader, RecordBatchStreamReader, AsyncRecordBatchFileReader, AsyncRecordBatchStreamReader } from './ipc/reader';
+export { RecordBatchWriter, RecordBatchFileWriter, RecordBatchStreamWriter, RecordBatchJSONWriter } from './ipc/writer';
+export { MessageReader, AsyncMessageReader, JSONMessageReader } from './ipc/message';
+export { Message } from './ipc/metadata/message';
+export { RecordBatch } from './recordbatch';
+export { ArrowJSONLike, FileHandle, Readable, Writable, ReadableWritable, ReadableDOMStreamOptions } from './io/interfaces';
+export { DataFrame, FilteredDataFrame, CountByResult, BindFunc, NextFunc } from './compute/dataframe';
+
 import * as util_int_ from './util/int';
 import * as util_bit_ from './util/bit';
-import * as util_node from './util/node';
-import * as visitor_ from './visitor';
-import * as view_ from './vector/view';
-import * as predicate_ from './predicate';
-import { Vector } from './vector';
-import { RecordBatch } from './recordbatch';
-import { Schema, Field, Type } from './type';
-import { Table, DataFrame, NextFunc, BindFunc, CountByResult } from './table';
-import { fromReadableStream } from './ipc/reader/node';
-import { read, readAsync, readStream } from './ipc/reader/arrow';
-import { readBuffersAsync, readRecordBatchesAsync } from './ipc/reader/arrow';
-import { serializeFile, serializeStream } from './ipc/writer/binary';
-
-export import View = vector_.View;
-export import VectorLike = vector_.VectorLike;
-export import TypedArray = type_.TypedArray;
-export import IntBitWidth = type_.IntBitWidth;
-export import TimeBitWidth = type_.TimeBitWidth;
-export import TypedArrayConstructor = type_.TypedArrayConstructor;
-
-export { fromReadableStream };
-export { read, readAsync, readStream };
-export { readBuffersAsync, readRecordBatchesAsync };
-export { serializeFile, serializeStream };
-export { Table, DataFrame, NextFunc, BindFunc, CountByResult };
-export { Field, Schema, RecordBatch, Vector, Type };
-
-export namespace util {
-    export import Uint64 = util_int_.Uint64;
-    export import Int64 = util_int_.Int64;
-    export import Int128 = util_int_.Int128;
-    export import packBools = util_bit_.packBools;
-    export import PipeIterator = util_node.PipeIterator;
-    export import AsyncPipeIterator = util_node.AsyncPipeIterator;
-}
-
-export namespace data {
-    export import BaseData = data_.BaseData;
-    export import FlatData = data_.FlatData;
-    export import BoolData = data_.BoolData;
-    export import FlatListData = data_.FlatListData;
-    export import DictionaryData = data_.DictionaryData;
-    export import NestedData = data_.NestedData;
-    export import ListData = data_.ListData;
-    export import UnionData = data_.UnionData;
-    export import SparseUnionData = data_.SparseUnionData;
-    export import DenseUnionData = data_.DenseUnionData;
-    export import ChunkedData = data_.ChunkedData;
-}
-
-export namespace enum_ {
-    export import Type = type_.ArrowType;
-    export import DateUnit = type_.DateUnit;
-    export import TimeUnit = type_.TimeUnit;
-    export import Precision = type_.Precision;
-    export import UnionMode = type_.UnionMode;
-    export import VectorType = type_.VectorType;
-    export import IntervalUnit = type_.IntervalUnit;
-    export import MessageHeader = type_.MessageHeader;
-    export import MetadataVersion = type_.MetadataVersion;
-}
-
-export namespace type {
-    export import Schema = type_.Schema;
-    export import Field = type_.Field;
-    export import Null = type_.Null;
-    export import Int = type_.Int;
-    export import Int8 = type_.Int8;
-    export import Int16 = type_.Int16;
-    export import Int32 = type_.Int32;
-    export import Int64 = type_.Int64;
-    export import Uint8 = type_.Uint8;
-    export import Uint16 = type_.Uint16;
-    export import Uint32 = type_.Uint32;
-    export import Uint64 = type_.Uint64;
-    export import Float = type_.Float;
-    export import Float16 = type_.Float16;
-    export import Float32 = type_.Float32;
-    export import Float64 = type_.Float64;
-    export import Binary = type_.Binary;
-    export import Utf8 = type_.Utf8;
-    export import Bool = type_.Bool;
-    export import Decimal = type_.Decimal;
-    export import Date_ = type_.Date_;
-    export import Time = type_.Time;
-    export import Timestamp = type_.Timestamp;
-    export import Interval = type_.Interval;
-    export import List = type_.List;
-    export import Struct = type_.Struct;
-    export import Union = type_.Union;
-    export import DenseUnion = type_.DenseUnion;
-    export import SparseUnion = type_.SparseUnion;
-    export import FixedSizeBinary = type_.FixedSizeBinary;
-    export import FixedSizeList = type_.FixedSizeList;
-    export import Map_ = type_.Map_;
-    export import Dictionary = type_.Dictionary;
-}
-
-export namespace vector {
-    export import Vector = vector_.Vector;
-    export import NullVector = vector_.NullVector;
-    export import BoolVector = vector_.BoolVector;
-    export import IntVector = vector_.IntVector;
-    export import FloatVector = vector_.FloatVector;
-    export import DateVector = vector_.DateVector;
-    export import DecimalVector = vector_.DecimalVector;
-    export import TimeVector = vector_.TimeVector;
-    export import TimestampVector = vector_.TimestampVector;
-    export import IntervalVector = vector_.IntervalVector;
-    export import BinaryVector = vector_.BinaryVector;
-    export import FixedSizeBinaryVector = vector_.FixedSizeBinaryVector;
-    export import Utf8Vector = vector_.Utf8Vector;
-    export import ListVector = vector_.ListVector;
-    export import FixedSizeListVector = vector_.FixedSizeListVector;
-    export import MapVector = vector_.MapVector;
-    export import StructVector = vector_.StructVector;
-    export import UnionVector = vector_.UnionVector;
-    export import DictionaryVector = vector_.DictionaryVector;
-}
-
-export namespace visitor {
-    export import TypeVisitor = visitor_.TypeVisitor;
-    export import VectorVisitor = visitor_.VectorVisitor;
-}
-
-export namespace view {
-    export import ChunkedView = view_.ChunkedView;
-    export import DictionaryView = view_.DictionaryView;
-    export import ListView = view_.ListView;
-    export import FixedSizeListView = view_.FixedSizeListView;
-    export import BinaryView = view_.BinaryView;
-    export import Utf8View = view_.Utf8View;
-    export import UnionView = view_.UnionView;
-    export import DenseUnionView = view_.DenseUnionView;
-    export import NestedView = view_.NestedView;
-    export import StructView = view_.StructView;
-    export import MapView = view_.MapView;
-    export import FlatView = view_.FlatView;
-    export import NullView = view_.NullView;
-    export import BoolView = view_.BoolView;
-    export import ValidityView = view_.ValidityView;
-    export import PrimitiveView = view_.PrimitiveView;
-    export import FixedSizeView = view_.FixedSizeView;
-    export import Float16View = view_.Float16View;
-    export import DateDayView = view_.DateDayView;
-    export import DateMillisecondView = view_.DateMillisecondView;
-    export import TimestampDayView = view_.TimestampDayView;
-    export import TimestampSecondView = view_.TimestampSecondView;
-    export import TimestampMillisecondView = view_.TimestampMillisecondView;
-    export import TimestampMicrosecondView = view_.TimestampMicrosecondView;
-    export import TimestampNanosecondView = view_.TimestampNanosecondView;
-    export import IntervalYearMonthView = view_.IntervalYearMonthView;
-    export import IntervalYearView = view_.IntervalYearView;
-    export import IntervalMonthView = view_.IntervalMonthView;
-}
-
-export namespace predicate {
-    export import col = predicate_.col;
-    export import lit = predicate_.lit;
-    export import and = predicate_.and;
-    export import or = predicate_.or;
-    export import custom = predicate_.custom;
-
-    export import Or = predicate_.Or;
-    export import Col = predicate_.Col;
-    export import And = predicate_.And;
-    export import Not = predicate_.Not;
-    export import GTeq = predicate_.GTeq;
-    export import LTeq = predicate_.LTeq;
-    export import Value = predicate_.Value;
-    export import Equals = predicate_.Equals;
-    export import Literal = predicate_.Literal;
-    export import Predicate = predicate_.Predicate;
-
-    export import PredicateFunc = predicate_.PredicateFunc;
-}
-
-/* These exports are needed for the closure and uglify umd targets */
-try {
-    let Arrow: any = eval('exports');
-    if (Arrow && typeof Arrow === 'object') {
-        // string indexers tell closure and uglify not to rename these properties
-        Arrow['data'] = data;
-        Arrow['type'] = type;
-        Arrow['util'] = util;
-        Arrow['view'] = view;
-        Arrow['enum_'] = enum_;
-        Arrow['vector'] = vector;
-        Arrow['visitor'] = visitor;
-        Arrow['predicate'] = predicate;
-
-        Arrow['read'] = read;
-        Arrow['readAsync'] = readAsync;
-        Arrow['readStream'] = readStream;
-        Arrow['fromReadableStream'] = fromReadableStream;
-        Arrow['readBuffersAsync'] = readBuffersAsync;
-        Arrow['readRecordBatchesAsync'] = readRecordBatchesAsync;
-
-        Arrow['serializeFile'] = serializeFile;
-        Arrow['serializeStream'] = serializeStream;
-
-        Arrow['Type'] = Type;
-        Arrow['Field'] = Field;
-        Arrow['Schema'] = Schema;
-        Arrow['Vector'] = Vector;
-        Arrow['RecordBatch'] = RecordBatch;
-
-        Arrow['Table'] = Table;
-        Arrow['CountByResult'] = CountByResult;
-    }
-} catch (e) { /* not the UMD bundle */ }
-/* end umd exports */
-
-// closure compiler erases static properties/methods:
-// https://github.com/google/closure-compiler/issues/1776
-// set them via string indexers to save them from the mangler
-Schema['from'] = Schema.from;
-Table['from'] = Table.from;
-Table['fromAsync'] = Table.fromAsync;
-Table['fromStruct'] = Table.fromStruct;
-Table['empty'] = Table.empty;
-Vector['create'] = Vector.create;
-RecordBatch['from'] = RecordBatch.from;
-
-util_int_.Uint64['add'] = util_int_.Uint64.add;
-util_int_.Uint64['multiply'] = util_int_.Uint64.multiply;
-util_int_.Uint64['from'] = util_int_.Uint64.from;
-util_int_.Uint64['fromNumber'] = util_int_.Uint64.fromNumber;
-util_int_.Uint64['fromString'] = util_int_.Uint64.fromString;
-util_int_.Uint64['convertArray'] = util_int_.Uint64.convertArray;
-
-util_int_.Int64['add'] = util_int_.Int64.add;
-util_int_.Int64['multiply'] = util_int_.Int64.multiply;
-util_int_.Int64['from'] = util_int_.Int64.from;
-util_int_.Int64['fromNumber'] = util_int_.Int64.fromNumber;
-util_int_.Int64['fromString'] = util_int_.Int64.fromString;
-util_int_.Int64['convertArray'] = util_int_.Int64.convertArray;
-
-util_int_.Int128['add'] = util_int_.Int128.add;
-util_int_.Int128['multiply'] = util_int_.Int128.multiply;
-util_int_.Int128['from'] = util_int_.Int128.from;
-util_int_.Int128['fromNumber'] = util_int_.Int128.fromNumber;
-util_int_.Int128['fromString'] = util_int_.Int128.fromString;
-util_int_.Int128['convertArray'] = util_int_.Int128.convertArray;
-
-data_.ChunkedData['computeOffsets'] = data_.ChunkedData.computeOffsets;
-
-(type_.Type as any)['NONE'] = type_.Type.NONE;
-(type_.Type as any)['Null'] = type_.Type.Null;
-(type_.Type as any)['Int'] = type_.Type.Int;
-(type_.Type as any)['Float'] = type_.Type.Float;
-(type_.Type as any)['Binary'] = type_.Type.Binary;
-(type_.Type as any)['Utf8'] = type_.Type.Utf8;
-(type_.Type as any)['Bool'] = type_.Type.Bool;
-(type_.Type as any)['Decimal'] = type_.Type.Decimal;
-(type_.Type as any)['Date'] = type_.Type.Date;
-(type_.Type as any)['Time'] = type_.Type.Time;
-(type_.Type as any)['Timestamp'] = type_.Type.Timestamp;
-(type_.Type as any)['Interval'] = type_.Type.Interval;
-(type_.Type as any)['List'] = type_.Type.List;
-(type_.Type as any)['Struct'] = type_.Type.Struct;
-(type_.Type as any)['Union'] = type_.Type.Union;
-(type_.Type as any)['FixedSizeBinary'] = type_.Type.FixedSizeBinary;
-(type_.Type as any)['FixedSizeList'] = type_.Type.FixedSizeList;
-(type_.Type as any)['Map'] = type_.Type.Map;
-(type_.Type as any)['Dictionary'] = type_.Type.Dictionary;
-(type_.Type as any)['DenseUnion'] = type_.Type.DenseUnion;
-(type_.Type as any)['SparseUnion'] = type_.Type.SparseUnion;
-
-type_.DataType['isNull'] = type_.DataType.isNull;
-type_.DataType['isInt'] = type_.DataType.isInt;
-type_.DataType['isFloat'] = type_.DataType.isFloat;
-type_.DataType['isBinary'] = type_.DataType.isBinary;
-type_.DataType['isUtf8'] = type_.DataType.isUtf8;
-type_.DataType['isBool'] = type_.DataType.isBool;
-type_.DataType['isDecimal'] = type_.DataType.isDecimal;
-type_.DataType['isDate'] = type_.DataType.isDate;
-type_.DataType['isTime'] = type_.DataType.isTime;
-type_.DataType['isTimestamp'] = type_.DataType.isTimestamp;
-type_.DataType['isInterval'] = type_.DataType.isInterval;
-type_.DataType['isList'] = type_.DataType.isList;
-type_.DataType['isStruct'] = type_.DataType.isStruct;
-type_.DataType['isUnion'] = type_.DataType.isUnion;
-type_.DataType['isDenseUnion'] = type_.DataType.isDenseUnion;
-type_.DataType['isSparseUnion'] = type_.DataType.isSparseUnion;
-type_.DataType['isFixedSizeBinary'] = type_.DataType.isFixedSizeBinary;
-type_.DataType['isFixedSizeList'] = type_.DataType.isFixedSizeList;
-type_.DataType['isMap'] = type_.DataType.isMap;
-type_.DataType['isDictionary'] = type_.DataType.isDictionary;
-
-vector_.BoolVector['from'] = vector_.BoolVector.from;
-vector_.DateVector['from'] = vector_.DateVector.from;
-vector_.IntVector['from'] = vector_.IntVector.from;
-vector_.FloatVector['from'] = vector_.FloatVector.from;
-
-visitor_.TypeVisitor['visitTypeInline'] = visitor_.TypeVisitor.visitTypeInline;
-visitor_.VectorVisitor['visitTypeInline'] = visitor_.VectorVisitor.visitTypeInline;
\ No newline at end of file
+import * as util_buffer_ from './util/buffer';
+import * as util_vector_ from './util/vector';
+import * as predicate from './compute/predicate';
+
+export { predicate };
+/** @ignore */
+export const util = {
+    ...util_int_,
+    ...util_bit_,
+    ...util_buffer_,
+    ...util_vector_
+};
diff --git a/js/src/bin/arrow2csv.ts b/js/src/bin/arrow2csv.ts
index 510f00740fed0..4ae9c0089a009 100644
--- a/js/src/bin/arrow2csv.ts
+++ b/js/src/bin/arrow2csv.ts
@@ -20,60 +20,189 @@
 /* tslint:disable */
 
 import * as fs from 'fs';
-import { promisify } from 'util';
-import { Table, readStream } from '../Arrow';
+import * as stream from 'stream';
+import { valueToString } from '../util/pretty';
+import { RecordBatch, RecordBatchReader, AsyncByteQueue } from '../Arrow.node';
 
-const readFile = promisify(fs.readFile);
-const { parse } = require('json-bignum');
+const padLeft = require('pad-left');
+const bignumJSONParse = require('json-bignum').parse;
+const pipeline = require('util').promisify(stream.pipeline);
 const argv = require(`command-line-args`)(cliOpts(), { partial: true });
-const files = [...(argv.file || []), ...(argv._unknown || [])].filter(Boolean);
+const files = argv.help ? [] : [...(argv.file || []), ...(argv._unknown || [])].filter(Boolean);
+
+const state = { ...argv, closed: false, hasRecords: false };
 
 (async () => {
-    let hasRecords = false;
-    if (files.length > 0) {
-        hasRecords = true;
-        for (let input of files) {
-            printTable(await readFile(input));
-        }
-    } else {
-        let rowOffset = 0;
-        let maxColumnWidths: number[] = [];
-        for await (const recordBatch of readStream(process.stdin)) {
-            hasRecords = true;
-            recordBatch.rowsToString(' | ', rowOffset, maxColumnWidths).pipe(process.stdout);
-            rowOffset += recordBatch.length;
+
+    const sources = argv.help ? [] : [
+        ...files.map((file) => () => fs.createReadStream(file)),
+        ...(process.stdin.isTTY ? [] : [() => process.stdin])
+    ].filter(Boolean) as (() => NodeJS.ReadableStream)[];
+
+    let reader: RecordBatchReader | null;
+
+    for (const source of sources) {
+        if (state.closed) { break; }
+        if (reader = await createRecordBatchReader(source)) {
+            await pipeline(
+                reader.toNodeStream(),
+                recordBatchRowsToString(state),
+                process.stdout
+            ).catch(() => state.closed = true);
         }
+        if (state.closed) { break; }
     }
-    return hasRecords ? null : print_usage();
-})().catch((e) => { console.error(e); process.exit(1); });
 
-function printTable(input: any) {
-    let table: Table;
+    return state.hasRecords ? 0 : print_usage();
+})()
+.then((x) => +x || 0, (err) => {
+    if (err) {
+        console.error(`${err && err.stack || err}`);
+    }
+    return process.exitCode || 1;
+}).then((code) => process.exit(code));
+
+async function createRecordBatchReader(createSourceStream: () => NodeJS.ReadableStream) {
+
+    let json = new AsyncByteQueue();
+    let stream = new AsyncByteQueue();
+    let source = createSourceStream();
+    let reader: RecordBatchReader | null = null;
+    // tee the input source, just in case it's JSON
+    source.on('end', () => [stream, json].forEach((y) => y.close()))
+        .on('data', (x) => [stream, json].forEach((y) => y.write(x)))
+       .on('error', (e) => [stream, json].forEach((y) => y.abort(e)));
+
     try {
-        table = Table.from(input);
-    } catch (e) {
-        table = Table.from(parse(input + ''));
+        reader = await (await RecordBatchReader.from(stream)).open();
+    } catch (e) { reader = null; }
+
+    if (!reader || reader.closed) {
+        reader = null;
+        await json.closed;
+        if (source instanceof fs.ReadStream) { source.close(); }
+        // If the data in the `json` ByteQueue parses to JSON, then assume it's Arrow JSON from a file or stdin
+        try {
+            reader = await (await RecordBatchReader.from(bignumJSONParse(await json.toString()))).open();
+        } catch (e) { reader = null; }
+    }
+
+    return (reader && !reader.closed) ? reader : null;
+}
+
+function recordBatchRowsToString(state: { closed: boolean, schema: any, separator: string, hasRecords: boolean }) {
+
+    let rowId = 0, maxColWidths = [15], separator = `${state.separator || ' |'} `;
+
+    return new stream.Transform({ transform, encoding: 'utf8', writableObjectMode: true, readableObjectMode: false });
+
+    function transform(this: stream.Transform, batch: RecordBatch, _enc: string, cb: (error?: Error, data?: any) => void) {
+        batch = !(state.schema && state.schema.length) ? batch : batch.select(...state.schema);
+        if (batch.length <= 0 || batch.numCols <= 0 || state.closed) {
+            state.hasRecords || (state.hasRecords = false);
+            return cb(undefined, null);
+        }
+
+        state.hasRecords = true;
+        const header = ['row_id', ...batch.schema.fields.map((f) => `${f}`)].map(valueToString);
+
+        // Pass one to convert to strings and count max column widths
+        const newMaxWidths = measureColumnWidths(rowId, batch, header.map((x, i) => Math.max(maxColWidths[i] || 0, x.length)));
+
+        // If any of the column widths changed, print the header again
+        if ((rowId % 350) && JSON.stringify(newMaxWidths) !== JSON.stringify(maxColWidths)) {
+            this.push(`\n${formatRow(header, newMaxWidths, separator)}`);
+        }
+
+        maxColWidths = newMaxWidths;
+
+        for (const row of batch) {
+            if (state.closed) { break; }
+            else if (!row) { continue; }
+            if (!(rowId % 350)) { this.push(`\n${formatRow(header, maxColWidths, separator)}`); }
+            this.push(formatRow([rowId++, ...row].map(valueToString), maxColWidths, separator));
+        }
+        cb();
     }
-    if (argv.schema && argv.schema.length) {
-        table = table.select(...argv.schema);
+}
+
+function formatRow(row: string[] = [], maxColWidths: number[] = [], separator: string = ' |') {
+    return row.map((x, j) => padLeft(x, maxColWidths[j])).join(separator) + '\n';
+}
+
+function measureColumnWidths(rowId: number, batch: RecordBatch, maxColWidths: number[] = []) {
+    for (const row of batch) {
+        if (!row) { continue; }
+        maxColWidths[0] = Math.max(maxColWidths[0] || 0, (`${rowId++}`).length);
+        for (let val: any, j = -1, k = row.length; ++j < k;) {
+            if (ArrayBuffer.isView(val = row[j]) && (typeof val[Symbol.toPrimitive] !== 'function')) {
+                // If we're printing a column of TypedArrays, ensure the column is wide enough to accommodate
+                // the widest possible element for a given byte size, since JS omits leading zeroes. For example:
+                // 1 |  [1137743649,2170567488,244696391,2122556476]
+                // 2 |                                          null
+                // 3 |   [637174007,2142281880,961736230,2912449282]
+                // 4 |    [1035112265,21832886,412842672,2207710517]
+                // 5 |                                          null
+                // 6 |                                          null
+                // 7 |     [2755142991,4192423256,2994359,467878370]
+                const elementWidth = typedArrayElementWidths.get(val.constructor)!;
+
+                maxColWidths[j + 1] = Math.max(maxColWidths[j + 1] || 0,
+                    2 + // brackets on each end
+                    (val.length - 1) + // commas between elements
+                    (val.length * elementWidth) // width of stringified 2^N-1
+                );
+            } else {
+                maxColWidths[j + 1] = Math.max(maxColWidths[j + 1] || 0, valueToString(val).length);
+            }
+        }
     }
-    table.rowsToString().pipe(process.stdout);
+    return maxColWidths;
 }
 
+// Measure the stringified representation of 2^N-1 for each TypedArray variant
+const typedArrayElementWidths = (() => {
+    const maxElementWidth = (ArrayType: any) => {
+        const octets = Array.from({ length: ArrayType.BYTES_PER_ELEMENT - 1 }, _ => 255);
+        return `${new ArrayType(new Uint8Array([...octets, 254]).buffer)[0]}`.length;
+    };
+    return new Map<any, number>([
+        [Int8Array, maxElementWidth(Int8Array)],
+        [Int16Array, maxElementWidth(Int16Array)],
+        [Int32Array, maxElementWidth(Int32Array)],
+        [Uint8Array, maxElementWidth(Uint8Array)],
+        [Uint16Array, maxElementWidth(Uint16Array)],
+        [Uint32Array, maxElementWidth(Uint32Array)],
+        [Float32Array, maxElementWidth(Float32Array)],
+        [Float64Array, maxElementWidth(Float64Array)],
+        [Uint8ClampedArray, maxElementWidth(Uint8ClampedArray)]
+    ])
+})();
+
 function cliOpts() {
     return [
         {
             type: String,
             name: 'schema', alias: 's',
             optional: true, multiple: true,
-            typeLabel: '[underline]{columns}',
+            typeLabel: '{underline columns}',
             description: 'A space-delimited list of column names'
         },
         {
             type: String,
             name: 'file', alias: 'f',
-            optional: false, multiple: true,
+            optional: true, multiple: true,
             description: 'The Arrow file to read'
+        },
+        {
+            type: String,
+            name: 'sep', optional: true, default: '|',
+            description: 'The column separator character'
+        },
+        {
+            type: Boolean,
+            name: 'help', optional: true, default: false,
+            description: 'Print this usage guide.'
         }
     ];    
 }
@@ -87,34 +216,29 @@ function print_usage() {
         {
             header: 'Synopsis',
             content: [
-                '$ arrow2csv [underline]{file.arrow} [[bold]{--schema} column_name ...]',
-                '$ arrow2csv [[bold]{--schema} column_name ...] [[bold]{--file} [underline]{file.arrow}]',
-                '$ arrow2csv [bold]{-s} column_1 [bold]{-s} column_2 [[bold]{-f} [underline]{file.arrow}]',
-                '$ arrow2csv [[bold]{--help}]'
+                '$ arrow2csv {underline file.arrow} [{bold --schema} column_name ...]',
+                '$ arrow2csv [{bold --schema} column_name ...] [{bold --file} {underline file.arrow}]',
+                '$ arrow2csv {bold -s} column_1 {bold -s} column_2 [{bold -f} {underline file.arrow}]',
+                '$ arrow2csv [{bold --help}]'
             ]
         },
         {
             header: 'Options',
-            optionList: [
-                ...cliOpts(),
-                {
-                    name: 'help',
-                    description: 'Print this usage guide.'
-                }
-            ]
+            optionList: cliOpts()
         },
         {
             header: 'Example',
             content: [
-                '$ arrow2csv --schema foo baz -f simple.arrow',
-                '>  foo,  baz',
-                '>    1,   aa',
-                '> null, null',
-                '>    3, null',
-                '>    4,  bbb',
-                '>    5, cccc',
+                '$ arrow2csv --schema foo baz -f simple.arrow --sep ","',
+                '                                                      ',
+                '> "row_id", "foo: Int32", "bar: Float64", "baz: Utf8"',
+                '>        0,            1,              1,        "aa"',
+                '>        1,         null,           null,        null',
+                '>        2,            3,           null,        null',
+                '>        3,            4,              4,       "bbb"',
+                '>        4,            5,              5,      "cccc"',
             ]
         }
     ]));
-    process.exit(1);
-}
\ No newline at end of file
+    return 1;
+}
diff --git a/js/src/column.ts b/js/src/column.ts
new file mode 100644
index 0000000000000..0a5bc36797bf9
--- /dev/null
+++ b/js/src/column.ts
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Field } from './schema';
+import { Vector } from './vector';
+import { DataType } from './type';
+import { Clonable, Sliceable, Applicative } from './vector';
+import { Chunked, SearchContinuation } from './vector/chunked';
+
+export interface Column<T extends DataType = any> {
+    typeId: T['TType'];
+    concat(...others: Vector<T>[]): Column<T>;
+    slice(begin?: number, end?: number): Column<T>;
+    clone(chunks?: Vector<T>[], offsets?: Uint32Array): Column<T>;
+}
+
+export class Column<T extends DataType = any>
+    extends Chunked<T>
+    implements Clonable<Column<T>>,
+               Sliceable<Column<T>>,
+               Applicative<T, Column<T>> {
+
+    constructor(field: Field<T>, vectors: Vector<T>[] = [], offsets?: Uint32Array) {
+        vectors = Chunked.flatten(...vectors);
+        super(field.type, vectors, offsets);
+        this._field = field;
+        if (vectors.length === 1 && !(this instanceof SingleChunkColumn)) {
+            return new SingleChunkColumn(field, vectors[0], this._chunkOffsets);
+        }
+    }
+
+    protected _field: Field<T>;
+    protected _children?: Column[];
+
+    public get field() { return this._field; }
+    public get name() { return this._field.name; }
+
+    public clone(chunks = this._chunks) {
+        return new Column(this._field, chunks);
+    }
+
+    public getChildAt<R extends DataType = any>(index: number): Column<R> | null {
+
+        if (index < 0 || index >= this.numChildren) { return null; }
+
+        let columns = this._children || (this._children = []);
+        let column: Column<R>, field: Field<R>, chunks: Vector<R>[];
+
+        if (column = columns[index]) { return column; }
+        if (field = ((this.type.children || [])[index] as Field<R>)) {
+            chunks = this._chunks
+                .map((vector) => vector.getChildAt<R>(index))
+                .filter((vec): vec is Vector<R> => vec != null);
+            if (chunks.length > 0) {
+                return (columns[index] = new Column<R>(field, chunks));
+            }
+        }
+
+        return null;
+    }
+}
+
+class SingleChunkColumn<T extends DataType = any> extends Column<T> {
+    protected _chunk: Vector<T>;
+    constructor(field: Field<T>, vector: Vector<T>, offsets?: Uint32Array) {
+        super(field, [vector], offsets);
+        this._chunk = vector;
+    }
+    public search(index: number): [number, number] | null;
+    public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N): ReturnType<N>;
+    public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N) {
+        return then ? then(this, 0, index) : [0, index];
+    }
+    public isValid(index: number): boolean {
+        return this._chunk.isValid(index);
+    }
+    public get(index: number): T['TValue'] | null {
+        return this._chunk.get(index);
+    }
+    public set(index: number, value: T['TValue'] | null): void {
+        this._chunk.set(index, value);
+    }
+    public indexOf(element: T['TValue'], offset?: number): number {
+        return this._chunk.indexOf(element, offset);
+    }
+}
diff --git a/js/src/compute/dataframe.ts b/js/src/compute/dataframe.ts
new file mode 100644
index 0000000000000..01026d882f0c0
--- /dev/null
+++ b/js/src/compute/dataframe.ts
@@ -0,0 +1,209 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Table } from '../table';
+import { Vector } from '../vector';
+import { IntVector } from '../vector/int';
+import { Field, Schema } from '../schema';
+import { Vector as V } from '../interfaces';
+import { Predicate, Col } from './predicate';
+import { RecordBatch } from '../recordbatch';
+import { DataType, Int, Struct, Dictionary } from '../type';
+
+/** @ignore */
+export type BindFunc = (batch: RecordBatch) => void;
+/** @ignore */
+export type NextFunc = (idx: number, batch: RecordBatch) => void;
+
+Table.prototype.countBy = function(this: Table, name: Col | string) { return new DataFrame(this.chunks).countBy(name); };
+Table.prototype.scan = function(this: Table, next: NextFunc, bind?: BindFunc) { return new DataFrame(this.chunks).scan(next, bind); };
+Table.prototype.filter = function(this: Table, predicate: Predicate): FilteredDataFrame { return new DataFrame(this.chunks).filter(predicate); };
+
+export class DataFrame<T extends { [key: string]: DataType } = any> extends Table<T> {
+    public filter(predicate: Predicate): FilteredDataFrame<T> {
+        return new FilteredDataFrame<T>(this.chunks, predicate);
+    }
+    public scan(next: NextFunc, bind?: BindFunc) {
+        const batches = this.chunks, numBatches = batches.length;
+        for (let batchIndex = -1; ++batchIndex < numBatches;) {
+            // load batches
+            const batch = batches[batchIndex];
+            if (bind) { bind(batch); }
+            // yield all indices
+            for (let index = -1, numRows = batch.length; ++index < numRows;) {
+                next(index, batch);
+            }
+        }
+    }
+    public countBy(name: Col | string) {
+        const batches = this.chunks, numBatches = batches.length;
+        const count_by = typeof name === 'string' ? new Col(name) : name as Col;
+        // Assume that all dictionary batches are deltas, which means that the
+        // last record batch has the most complete dictionary
+        count_by.bind(batches[numBatches - 1]);
+        const vector = count_by.vector as V<Dictionary>;
+        if (!DataType.isDictionary(vector.type)) {
+            throw new Error('countBy currently only supports dictionary-encoded columns');
+        }
+
+        const countByteLength = Math.ceil(Math.log(vector.dictionary.length) / Math.log(256));
+        const CountsArrayType = countByteLength == 4 ? Uint32Array :
+                                countByteLength >= 2 ? Uint16Array : Uint8Array;
+
+        const counts = new CountsArrayType(vector.dictionary.length);
+        for (let batchIndex = -1; ++batchIndex < numBatches;) {
+            // load batches
+            const batch = batches[batchIndex];
+            // rebind the countBy Col
+            count_by.bind(batch);
+            const keys = (count_by.vector as V<Dictionary>).indices;
+            // yield all indices
+            for (let index = -1, numRows = batch.length; ++index < numRows;) {
+                let key = keys.get(index);
+                if (key !== null) { counts[key]++; }
+            }
+        }
+        return new CountByResult(vector.dictionary, IntVector.from(counts));
+    }
+}
+
+export class CountByResult<T extends DataType = any, TCount extends Int = Int> extends Table<{ values: T,  counts: TCount }> {
+    constructor(values: Vector<T>, counts: V<TCount>) {
+        const schema = new Schema<{ values: T, counts: TCount }>([
+            new Field('values', values.type),
+            new Field('counts', counts.type)
+        ]);
+        super(new RecordBatch(schema, counts.length, [values, counts]));
+    }
+    public toJSON(): Object {
+        const values = this.getColumnAt(0)!;
+        const counts = this.getColumnAt(1)!;
+        const result = {} as { [k: string]: number | null };
+        for (let i = -1; ++i < this.length;) {
+            result[values.get(i)] = counts.get(i);
+        }
+        return result;
+    }
+}
+
+export class FilteredDataFrame<T extends { [key: string]: DataType; } = any> extends DataFrame<T> {
+    private _predicate: Predicate;
+    constructor (batches: RecordBatch<T>[], predicate: Predicate) {
+        super(batches);
+        this._predicate = predicate;
+    }
+    public scan(next: NextFunc, bind?: BindFunc) {
+        // inlined version of this:
+        // this.parent.scan((idx, columns) => {
+        //     if (this.predicate(idx, columns)) next(idx, columns);
+        // });
+        const batches = this._chunks;
+        const numBatches = batches.length;
+        for (let batchIndex = -1; ++batchIndex < numBatches;) {
+            // load batches
+            const batch = batches[batchIndex];
+            // TODO: bind batches lazily
+            // If predicate doesn't match anything in the batch we don't need
+            // to bind the callback
+            if (bind) { bind(batch); }
+            const predicate = this._predicate.bind(batch);
+            // yield all indices
+            for (let index = -1, numRows = batch.length; ++index < numRows;) {
+                if (predicate(index, batch)) { next(index, batch); }
+            }
+        }
+    }
+    public count(): number {
+        // inlined version of this:
+        // let sum = 0;
+        // this.parent.scan((idx, columns) => {
+        //     if (this.predicate(idx, columns)) ++sum;
+        // });
+        // return sum;
+        let sum = 0;
+        const batches = this._chunks;
+        const numBatches = batches.length;
+        for (let batchIndex = -1; ++batchIndex < numBatches;) {
+            // load batches
+            const batch = batches[batchIndex];
+            const predicate = this._predicate.bind(batch);
+            // yield all indices
+            for (let index = -1, numRows = batch.length; ++index < numRows;) {
+                if (predicate(index, batch)) { ++sum; }
+            }
+        }
+        return sum;
+    }
+    public *[Symbol.iterator](): IterableIterator<Struct<T>['TValue']> {
+        // inlined version of this:
+        // this.parent.scan((idx, columns) => {
+        //     if (this.predicate(idx, columns)) next(idx, columns);
+        // });
+        const batches = this._chunks;
+        const numBatches = batches.length;
+        for (let batchIndex = -1; ++batchIndex < numBatches;) {
+            // load batches
+            const batch = batches[batchIndex];
+            // TODO: bind batches lazily
+            // If predicate doesn't match anything in the batch we don't need
+            // to bind the callback
+            const predicate = this._predicate.bind(batch);
+            // yield all indices
+            for (let index = -1, numRows = batch.length; ++index < numRows;) {
+                if (predicate(index, batch)) { yield batch.get(index) as any; }
+            }
+        }
+    }
+    public filter(predicate: Predicate): FilteredDataFrame<T> {
+        return new FilteredDataFrame<T>(
+            this._chunks,
+            this._predicate.and(predicate)
+        );
+    }
+    public countBy(name: Col | string) {
+        const batches = this._chunks, numBatches = batches.length;
+        const count_by = typeof name === 'string' ? new Col(name) : name as Col;
+        // Assume that all dictionary batches are deltas, which means that the
+        // last record batch has the most complete dictionary
+        count_by.bind(batches[numBatches - 1]);
+        const vector = count_by.vector as V<Dictionary>;
+        if (!DataType.isDictionary(vector.type)) {
+            throw new Error('countBy currently only supports dictionary-encoded columns');
+        }
+
+        const countByteLength = Math.ceil(Math.log(vector.dictionary.length) / Math.log(256));
+        const CountsArrayType = countByteLength == 4 ? Uint32Array :
+                                countByteLength >= 2 ? Uint16Array : Uint8Array;
+
+        const counts = new CountsArrayType(vector.dictionary.length);
+
+        for (let batchIndex = -1; ++batchIndex < numBatches;) {
+            // load batches
+            const batch = batches[batchIndex];
+            const predicate = this._predicate.bind(batch);
+            // rebind the countBy Col
+            count_by.bind(batch);
+            const keys = (count_by.vector as V<Dictionary>).indices;
+            // yield all indices
+            for (let index = -1, numRows = batch.length; ++index < numRows;) {
+                let key = keys.get(index);
+                if (key !== null && predicate(index, batch)) { counts[key]++; }
+            }
+        }
+        return new CountByResult(vector.dictionary, IntVector.from(counts));
+    }
+}
diff --git a/js/src/predicate.ts b/js/src/compute/predicate.ts
similarity index 94%
rename from js/src/predicate.ts
rename to js/src/compute/predicate.ts
index cfae73ae0af73..ec947d2670c81 100644
--- a/js/src/predicate.ts
+++ b/js/src/compute/predicate.ts
@@ -15,12 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { RecordBatch } from './recordbatch';
-import { Vector, DictionaryVector } from './vector';
+import { Vector } from '../vector';
+import { RecordBatch } from '../recordbatch';
+import { DictionaryVector } from '../vector/dictionary';
 
+/** @ignore */
 export type ValueFunc<T> = (idx: number, cols: RecordBatch) => T | null;
+/** @ignore */
 export type PredicateFunc = (idx: number, cols: RecordBatch) => boolean;
 
+/** @ignore */
 export abstract class Value<T> {
     eq(other: Value<T> | T): Predicate {
         if (!(other instanceof Value)) { other = new Literal(other); }
@@ -45,10 +49,12 @@ export abstract class Value<T> {
     }
 }
 
+/** @ignore */
 export class Literal<T= any> extends Value<T> {
     constructor(public v: T) { super(); }
 }
 
+/** @ignore */
 export class Col<T= any> extends Value<T> {
     // @ts-ignore
     public vector: Vector;
@@ -56,7 +62,7 @@ export class Col<T= any> extends Value<T> {
     public colidx: number;
 
     constructor(public name: string) { super(); }
-    bind(batch: RecordBatch) {
+    bind(batch: RecordBatch): (idx: number, batch?: RecordBatch) => any {
         if (!this.colidx) {
             // Assume column index doesn't change between calls to bind
             //this.colidx = cols.findIndex(v => v.name.indexOf(this.name) != -1);
@@ -70,11 +76,13 @@ export class Col<T= any> extends Value<T> {
             }
             if (this.colidx < 0) { throw new Error(`Failed to bind Col "${this.name}"`); }
         }
-        this.vector = batch.getChildAt(this.colidx)!;
-        return this.vector.get.bind(this.vector);
+
+        const vec = this.vector = batch.getChildAt(this.colidx)!;
+        return (idx: number) => vec.get(idx);
     }
 }
 
+/** @ignore */
 export abstract class Predicate {
     abstract bind(batch: RecordBatch): PredicateFunc;
     and(...expr: Predicate[]): And { return new And(this, ...expr); }
@@ -82,6 +90,7 @@ export abstract class Predicate {
     not(): Predicate { return new Not(this); }
 }
 
+/** @ignore */
 export abstract class ComparisonPredicate<T= any> extends Predicate {
     constructor(public readonly left: Value<T>, public readonly right: Value<T>) {
         super();
@@ -110,8 +119,9 @@ export abstract class ComparisonPredicate<T= any> extends Predicate {
     protected abstract _bindLitCol(batch: RecordBatch, lit: Literal, col: Col): PredicateFunc;
 }
 
+/** @ignore */
 export abstract class CombinationPredicate extends Predicate {
-    readonly children: Predicate[]
+    readonly children: Predicate[];
     constructor(...children: Predicate[]) {
         super();
         this.children = children;
@@ -120,12 +130,13 @@ export abstract class CombinationPredicate extends Predicate {
 // add children to protoype so it doesn't get mangled in es2015/umd
 (<any> CombinationPredicate.prototype).children = Object.freeze([]); // freeze for safety
 
+/** @ignore */
 export class And extends CombinationPredicate {
     constructor(...children: Predicate[]) {
         // Flatten any Ands
         children = children.reduce((accum: Predicate[], p: Predicate): Predicate[] => {
-            return accum.concat(p instanceof And ? p.children : p)
-        }, [])
+            return accum.concat(p instanceof And ? p.children : p);
+        }, []);
         super(...children);
     }
     bind(batch: RecordBatch) {
@@ -134,12 +145,13 @@ export class And extends CombinationPredicate {
     }
 }
 
+/** @ignore */
 export class Or extends CombinationPredicate {
     constructor(...children: Predicate[]) {
         // Flatten any Ors
         children = children.reduce((accum: Predicate[], p: Predicate): Predicate[] => {
-            return accum.concat(p instanceof Or ? p.children : p)
-        }, [])
+            return accum.concat(p instanceof Or ? p.children : p);
+        }, []);
         super(...children);
     }
     bind(batch: RecordBatch) {
@@ -148,6 +160,7 @@ export class Or extends CombinationPredicate {
     }
 }
 
+/** @ignore */
 export class Equals extends ComparisonPredicate {
     // Helpers used to cache dictionary reverse lookups between calls to bind
     private lastDictionary: Vector|undefined;
@@ -200,6 +213,7 @@ export class Equals extends ComparisonPredicate {
     }
 }
 
+/** @ignore */
 export class LTeq extends ComparisonPredicate {
     protected _bindLitLit(_batch: RecordBatch, left: Literal, right: Literal): PredicateFunc {
         const rtrn: boolean = left.v <= right.v;
@@ -223,6 +237,7 @@ export class LTeq extends ComparisonPredicate {
     }
 }
 
+/** @ignore */
 export class GTeq extends ComparisonPredicate {
     protected _bindLitLit(_batch: RecordBatch, left: Literal, right: Literal): PredicateFunc {
         const rtrn: boolean = left.v >= right.v;
@@ -246,6 +261,7 @@ export class GTeq extends ComparisonPredicate {
     }
 }
 
+/** @ignore */
 export class Not extends Predicate {
     constructor(public readonly child: Predicate) {
         super();
@@ -257,6 +273,7 @@ export class Not extends Predicate {
     }
 }
 
+/** @ignore */
 export class CustomPredicate extends Predicate {
     constructor(private next: PredicateFunc, private bind_: (batch: RecordBatch) => void) {
         super();
diff --git a/js/src/data.ts b/js/src/data.ts
index 5a117594bc89e..b55321bf98ec2 100644
--- a/js/src/data.ts
+++ b/js/src/data.ts
@@ -15,317 +15,231 @@
 // specific language governing permissions and limitations
 // under the License.
 
+import { Vector } from './vector';
 import { popcnt_bit_range } from './util/bit';
-import { VectorLike, Vector } from './vector';
-import { Int, Bool, FlatListType, List, Struct, Map_ } from './type';
-import { VectorType, TypedArray, TypedArrayConstructor, Dictionary } from './type';
-import { DataType, FlatType, ListType, NestedType, SingleNestedType, DenseUnion, SparseUnion } from './type';
+import { toArrayBufferView } from './util/buffer';
+import { DataType, SparseUnion, DenseUnion } from './type';
+import { VectorType as BufferType, UnionMode, Type } from './enum';
+import {
+    Dictionary,
+    Null, Int, Float,
+    Binary, Bool, Utf8, Decimal,
+    Date_, Time, Timestamp, Interval,
+    List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_,
+} from './type';
 
-export function toTypedArray<T extends TypedArray>(ArrayType: TypedArrayConstructor<T>, values?: T | ArrayLike<number> | Iterable<number> | null): T {
-    if (!ArrayType && ArrayBuffer.isView(values)) { return values; }
-    return values instanceof ArrayType ? values
-         : !values || !ArrayBuffer.isView(values) ? ArrayType.from(values || [])
-         : new ArrayType(values.buffer, values.byteOffset, values.byteLength / ArrayType.BYTES_PER_ELEMENT);
-}
-
-export type Data<T extends DataType> = DataTypes<T>[T['TType']] & BaseData<T>;
-export interface DataTypes<T extends DataType> {
-/*                [Type.NONE]*/  0: BaseData<T>;
-/*                [Type.Null]*/  1: FlatData<T>;
-/*                 [Type.Int]*/  2: FlatData<T>;
-/*               [Type.Float]*/  3: FlatData<T>;
-/*              [Type.Binary]*/  4: FlatListData<T>;
-/*                [Type.Utf8]*/  5: FlatListData<T>;
-/*                [Type.Bool]*/  6: BoolData;
-/*             [Type.Decimal]*/  7: FlatData<T>;
-/*                [Type.Date]*/  8: FlatData<T>;
-/*                [Type.Time]*/  9: FlatData<T>;
-/*           [Type.Timestamp]*/ 10: FlatData<T>;
-/*            [Type.Interval]*/ 11: FlatData<T>;
-/*                [Type.List]*/ 12: ListData<List<T>>;
-/*              [Type.Struct]*/ 13: NestedData<Struct>;
-/*               [Type.Union]*/ 14: UnionData;
-/*     [Type.FixedSizeBinary]*/ 15: FlatData<T>;
-/*       [Type.FixedSizeList]*/ 16: SingleNestedData<any>;
-/*                 [Type.Map]*/ 17: NestedData<Map_>;
-/*  [Type.DenseUnion]*/ DenseUnion: DenseUnionData;
-/*[Type.SparseUnion]*/ SparseUnion: SparseUnionData;
-/*[  Type.Dictionary]*/ Dictionary: DictionaryData<any>;
-}
 // When slicing, we do not know the null count of the sliced range without
 // doing some computation. To avoid doing this eagerly, we set the null count
-// to -1 (any negative number will do). When Array::null_count is called the
+// to -1 (any negative number will do). When Vector.nullCount is called the
 // first time, the null count will be computed. See ARROW-33
-export type kUnknownNullCount = -1;
-export const kUnknownNullCount = -1;
+/** @ignore */ export type kUnknownNullCount = -1;
+/** @ignore */ export const kUnknownNullCount = -1;
 
-export class BaseData<T extends DataType = DataType> implements VectorLike {
-    public type: T;
-    public length: number;
-    public offset: number;
-    // @ts-ignore
-    public childData: Data<any>[];
-    protected _nullCount: number | kUnknownNullCount;
-    protected /*  [VectorType.OFFSET]:*/ 0?: Int32Array;
-    protected /*    [VectorType.DATA]:*/ 1?: T['TArray'];
-    protected /*[VectorType.VALIDITY]:*/ 2?: Uint8Array;
-    protected /*    [VectorType.TYPE]:*/ 3?: Int8Array;
-    constructor(type: T, length: number, offset?: number, nullCount?: number) {
-        this.type = type;
-        this.length = Math.floor(Math.max(length || 0, 0));
-        this.offset = Math.floor(Math.max(offset || 0, 0));
-        this._nullCount = Math.floor(Math.max(nullCount || 0, -1));
-    }
-    public get typeId() { return this.type.TType; }
-    public get nullBitmap() { return this[VectorType.VALIDITY]; }
-    public get nullCount() {
-        let nullCount = this._nullCount;
-        let nullBitmap: Uint8Array | undefined;
-        if (nullCount === -1 && (nullBitmap = this[VectorType.VALIDITY])) {
-            this._nullCount = nullCount = this.length - popcnt_bit_range(nullBitmap, this.offset, this.offset + this.length);
-        }
-        return nullCount;
-    }
-    public clone<R extends T>(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount): Data<R> {
-        return new BaseData(type, length, offset, nullCount) as any;
-    }
-    public slice(offset: number, length: number) {
-        return length <= 0 ? this : this.sliceInternal(this.clone(
-            this.type, length, this.offset + offset, +(this._nullCount === 0) - 1
-        ) as any, offset, length);
-    }
-    protected sliceInternal(clone: this, offset: number, length: number) {
-        let arr: any;
-        // If typeIds exist, slice the typeIds buffer
-        (arr = this[VectorType.TYPE]) && (clone[VectorType.TYPE] = this.sliceData(arr, offset, length));
-        // If offsets exist, only slice the offsets buffer
-        (arr = this[VectorType.OFFSET]) && (clone[VectorType.OFFSET] = this.sliceOffsets(arr, offset, length)) ||
-            // Otherwise if no offsets, slice the data buffer
-            (arr = this[VectorType.DATA]) && (clone[VectorType.DATA] = this.sliceData(arr, offset, length));
-        return clone;
-    }
-    protected sliceData(data: T['TArray'] & TypedArray, offset: number, length: number) {
-        return data.subarray(offset, offset + length);
-    }
-    protected sliceOffsets(valueOffsets: Int32Array, offset: number, length: number) {
-        return valueOffsets.subarray(offset, offset + length + 1);
-    }
-}
+/** @ignore */ export type NullBuffer = Uint8Array | null | undefined;
+/** @ignore */ export type TypeIdsBuffer = Int8Array  | ArrayLike<number> | Iterable<number>;
+/** @ignore */ export type ValueOffsetsBuffer = Int32Array  | ArrayLike<number> | Iterable<number>;
+/** @ignore */ export type DataBuffer<T extends DataType> = T['TArray'] | ArrayLike<number> | Iterable<number>;
 
-export class FlatData<T extends FlatType> extends BaseData<T> {
-    public /*    [VectorType.DATA]:*/ 1: T['TArray'];
-    public /*[VectorType.VALIDITY]:*/ 2: Uint8Array;
-    public get values() { return this[VectorType.DATA]; }
-    constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, data: Iterable<number>, offset?: number, nullCount?: number) {
-        super(type, length, offset, nullCount);
-        this[VectorType.DATA] = toTypedArray(this.ArrayType, data);
-        this[VectorType.VALIDITY] = toTypedArray(Uint8Array, nullBitmap);
-    }
-    public get ArrayType(): T['ArrayType'] { return this.type.ArrayType; }
-    public clone<R extends T>(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount) {
-        return new (this.constructor as any)(type, length, this[VectorType.VALIDITY], this[VectorType.DATA], offset, nullCount) as FlatData<R>;
-    }
+/** @ignore */
+export interface Buffers<T extends DataType> {
+      [BufferType.OFFSET]: Int32Array;
+        [BufferType.DATA]: T['TArray'];
+    [BufferType.VALIDITY]: Uint8Array;
+        [BufferType.TYPE]: T['TArray'];
 }
 
-export class BoolData extends FlatData<Bool> {
-    protected sliceData(data: Uint8Array) { return data; }
+/** @ignore */
+export interface Data<T extends DataType = DataType> {
+    readonly TType: T['TType'];
+    readonly TArray: T['TArray'];
+    readonly TValue: T['TValue'];
 }
 
-export class FlatListData<T extends FlatListType> extends FlatData<T> {
-    public /*  [VectorType.OFFSET]:*/ 0: Int32Array;
-    public /*    [VectorType.DATA]:*/ 1: T['TArray'];
-    public /*[VectorType.VALIDITY]:*/ 2: Uint8Array;
-    public get values() { return this[VectorType.DATA]; }
-    public get valueOffsets() { return this[VectorType.OFFSET]; }
-    constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, valueOffsets: Iterable<number>, data: T['TArray'], offset?: number, nullCount?: number) {
-        super(type, length, nullBitmap, data, offset, nullCount);
-        this[VectorType.OFFSET] = toTypedArray(Int32Array, valueOffsets);
-    }
-    public clone<R extends T>(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount) {
-        return new FlatListData(type, length, this[VectorType.VALIDITY], this[VectorType.OFFSET], this[VectorType.DATA], offset, nullCount) as FlatListData<R>;
-    }
-}
+/** @ignore */
+export class Data<T extends DataType = DataType> {
 
-export class DictionaryData<T extends DataType> extends BaseData<Dictionary<T>> {
-    protected _dictionary: Vector<T>;
-    protected _indices: Data<Int<any>>;
-    public get indices() { return this._indices; }
-    public get dictionary() { return this._dictionary; }
-    constructor(type: Dictionary<T>, dictionary: Vector<T>, indices: Data<Int<any>>) {
-        super(type, indices.length, indices.offset, (indices as any)._nullCount);
-        this._indices = indices;
-        this._dictionary = dictionary;
-    }
-    public get nullCount() { return this._indices.nullCount; }
-    public get nullBitmap() { return this._indices.nullBitmap; }
-    public clone<R extends Dictionary<T>>(type: R, length = this.length, offset = this.offset) {
-        const data = this._dictionary.data.clone(type.dictionary as any);
-        return new DictionaryData<R>(
-            this.type as any,
-            this._dictionary.clone(data) as any,
-            this._indices.slice(offset - this.offset, length)
-        ) as any;
-    }
-    protected sliceInternal(clone: this, _offset: number, _length: number) {
-        clone.length = clone._indices.length;
-        clone._nullCount = (clone._indices as any)._nullCount;
-        return clone;
-    }
-}
+    public readonly type: T;
+    public readonly length: number;
+    public readonly offset: number;
+    public readonly stride: number;
+    public readonly childData: Data[];
+    public readonly values: Buffers<T>[BufferType.DATA];
+    public readonly typeIds: Buffers<T>[BufferType.TYPE];
+    // @ts-ignore
+    public readonly nullBitmap: Buffers<T>[BufferType.VALIDITY];
+    // @ts-ignore
+    public readonly valueOffsets: Buffers<T>[BufferType.OFFSET];
 
-export class NestedData<T extends NestedType = NestedType> extends BaseData<T> {
-    public /*[VectorType.VALIDITY]:*/ 2: Uint8Array;
-    constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, childData: Data<any>[], offset?: number, nullCount?: number) {
-        super(type, length, offset, nullCount);
-        this.childData = childData;
-        this[VectorType.VALIDITY] = toTypedArray(Uint8Array, nullBitmap);
-    }
-    public clone<R extends T>(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount): Data<R> {
-        return new NestedData<R>(type, length, this[VectorType.VALIDITY], this.childData, offset, nullCount) as any;
+    public get ArrayType() { return this.type.ArrayType; }
+    public get typeId(): T['TType'] { return this.type.typeId; }
+    public get buffers() {
+        return [this.valueOffsets, this.values, this.nullBitmap, this.typeIds] as Buffers<T>;
     }
-    protected sliceInternal(clone: this, offset: number, length: number) {
-        if (!this[VectorType.OFFSET]) {
-            clone.childData = this.childData.map((child) => child.slice(offset, length));
+
+    protected _nullCount: number | kUnknownNullCount;
+
+    public get nullCount() {
+        let nullCount = this._nullCount;
+        let nullBitmap: Uint8Array | undefined;
+        if (nullCount <= kUnknownNullCount && (nullBitmap = this.nullBitmap)) {
+            this._nullCount = nullCount = this.length - popcnt_bit_range(nullBitmap, this.offset, this.offset + this.length);
         }
-        return super.sliceInternal(clone, offset, length);
+        return nullCount;
     }
-}
 
-export class SingleNestedData<T extends SingleNestedType> extends NestedData<T> {
-    protected _valuesData: Data<T>;
-    public get values() { return this._valuesData; }
-    constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, valueChildData: Data<T>, offset?: number, nullCount?: number) {
-        super(type, length, nullBitmap, [valueChildData], offset, nullCount);
-        this._valuesData = valueChildData;
+    constructor(type: T, offset: number, length: number, nullCount?: number, buffers?: Partial<Buffers<T>> | Data<T>, childData?: (Data | Vector)[]) {
+        this.type = type;
+        this.offset = Math.floor(Math.max(offset || 0, 0));
+        this.length = Math.floor(Math.max(length || 0, 0));
+        this._nullCount = Math.floor(Math.max(nullCount || 0, -1));
+        this.childData = (childData || []).map((x) => x instanceof Data ? x : x.data) as Data[];
+        let buffer: Buffers<T>[keyof Buffers<T>];
+        if (buffers instanceof Data) {
+            this.stride = buffers.stride;
+            this.values = buffers.values;
+            this.typeIds = buffers.typeIds;
+            this.nullBitmap = buffers.nullBitmap;
+            this.valueOffsets = buffers.valueOffsets;
+        } else {
+            if (buffers) {
+                (buffer = (buffers as Buffers<T>)[0]) && (this.valueOffsets = buffer);
+                (buffer = (buffers as Buffers<T>)[1]) && (this.values = buffer);
+                (buffer = (buffers as Buffers<T>)[2]) && (this.nullBitmap = buffer);
+                (buffer = (buffers as Buffers<T>)[3]) && (this.typeIds = buffer);
+            }
+            const t: any = type;
+            switch (type.typeId) {
+                case Type.Decimal: this.stride = 4; break;
+                case Type.Timestamp: this.stride = 2; break;
+                case Type.Date: this.stride = 1 + (t as Date_).unit; break;
+                case Type.Interval: this.stride = 1 + (t as Interval).unit; break;
+                case Type.Int: this.stride = 1 + +((t as Int).bitWidth > 32); break;
+                case Type.Time: this.stride = 1 + +((t as Time).bitWidth > 32); break;
+                case Type.FixedSizeList: this.stride = (t as FixedSizeList).listSize; break;
+                case Type.FixedSizeBinary: this.stride = (t as FixedSizeBinary).byteWidth; break;
+                default: this.stride = 1;
+            }
+        }
     }
-}
 
-export class ListData<T extends ListType> extends SingleNestedData<T> {
-    public /*  [VectorType.OFFSET]:*/ 0: Int32Array;
-    public /*[VectorType.VALIDITY]:*/ 2: Uint8Array;
-    public get valueOffsets() { return this[VectorType.OFFSET]; }
-    constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, valueOffsets: Iterable<number>, valueChildData: Data<T>, offset?: number, nullCount?: number) {
-        super(type, length, nullBitmap, valueChildData, offset, nullCount);
-        this[VectorType.OFFSET] = toTypedArray(Int32Array, valueOffsets);
+    public clone<R extends DataType>(type: R, offset = this.offset, length = this.length, nullCount = this._nullCount, buffers: Buffers<R> = <any> this, childData: (Data | Vector)[] = this.childData) {
+        return new Data(type, offset, length, nullCount, buffers, childData);
     }
-    public clone<R extends T>(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount): Data<R> {
-        return new ListData(type, length, this[VectorType.VALIDITY], this[VectorType.OFFSET], this._valuesData as any, offset, nullCount) as any;
-    }
-}
 
-export class UnionData<T extends (DenseUnion | SparseUnion) = any> extends NestedData<T> {
-    public /*    [VectorType.TYPE]:*/ 3: T['TArray'];
-    public get typeIds() { return this[VectorType.TYPE]; }
-    public readonly typeIdToChildIndex: { [key: number]: number };
-    constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, typeIds: Iterable<number>, childData: Data<any>[], offset?: number, nullCount?: number) {
-        super(type, length, nullBitmap, childData, offset, nullCount);
-        this[VectorType.TYPE] = toTypedArray(Int8Array, typeIds);
-        this.typeIdToChildIndex = type.typeIds.reduce((typeIdToChildIndex, typeId, idx) => {
-            return (typeIdToChildIndex[typeId] = idx) && typeIdToChildIndex || typeIdToChildIndex;
-        }, Object.create(null) as { [key: number]: number });
+    public slice(offset: number, length: number): Data<T> {
+        // +true === 1, +false === 0, so this means
+        // we keep nullCount at 0 if it's already 0,
+        // otherwise set to the invalidated flag -1
+        const { stride, typeId, childData } = this;
+        const nullCount = +(this._nullCount === 0) - 1;
+        const childStride = typeId === 16 /* FixedSizeList */ ? stride : 1;
+        const buffers = this._sliceBuffers(offset, length, stride, typeId);
+        return this.clone<T>(this.type, this.offset + offset, length, nullCount, buffers,
+            // Don't slice children if we have value offsets (the variable-width types)
+            (!childData.length || this.valueOffsets) ? childData : this._sliceChildren(childData, childStride * offset, childStride * length));
     }
-    public clone<R extends T>(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount): Data<R> {
-        return new UnionData<R>(type, length, this[VectorType.VALIDITY], this[VectorType.TYPE], this.childData, offset, nullCount) as any;
-    }
-}
 
-export class SparseUnionData extends UnionData<SparseUnion> {
-    constructor(type: SparseUnion, length: number, nullBitmap: Uint8Array | null | undefined, typeIds: Iterable<number>, childData: Data<any>[], offset?: number, nullCount?: number) {
-        super(type, length, nullBitmap, typeIds, childData, offset, nullCount);
-    }
-    public clone<R extends SparseUnion>(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount): Data<R> {
-        return new SparseUnionData(
-            type,
-            length,
-            this[VectorType.VALIDITY],
-            this[VectorType.TYPE],
-            this.childData,
-            offset, nullCount
-        ) as any;
+    protected _sliceBuffers(offset: number, length: number, stride: number, typeId: T['TType']): Buffers<T> {
+        let arr: any, { buffers } = this;
+        // If typeIds exist, slice the typeIds buffer
+        (arr = buffers[BufferType.TYPE]) && (buffers[BufferType.TYPE] = arr.subarray(offset, offset + length));
+        // If offsets exist, only slice the offsets buffer
+        (arr = buffers[BufferType.OFFSET]) && (buffers[BufferType.OFFSET] = arr.subarray(offset, offset + length + 1)) ||
+        // Otherwise if no offsets, slice the data buffer. Don't slice the data vector for Booleans, since the offset goes by bits not bytes
+        (arr = buffers[BufferType.DATA]) && (buffers[BufferType.DATA] = typeId === 6 ? arr : arr.subarray(stride * offset, stride * (offset + length)));
+        return buffers;
     }
-}
 
-export class DenseUnionData extends UnionData<DenseUnion> {
-    public /*  [VectorType.OFFSET]:*/ 0: Int32Array;
-    public get valueOffsets() { return this[VectorType.OFFSET]; }
-    constructor(type: DenseUnion, length: number, nullBitmap: Uint8Array | null | undefined, typeIds: Iterable<number>, valueOffsets: Iterable<number>, childData: Data<any>[], offset?: number, nullCount?: number) {
-        super(type, length, nullBitmap, typeIds, childData, offset, nullCount);
-        this[VectorType.OFFSET] = toTypedArray(Int32Array, valueOffsets);
-    }
-    public clone<R extends DenseUnion>(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount): Data<R> {
-        return new DenseUnionData(
-            type,
-            length,
-            this[VectorType.VALIDITY],
-            this[VectorType.TYPE],
-            this[VectorType.OFFSET],
-            this.childData,
-            offset, nullCount
-        ) as any;
+    protected _sliceChildren(childData: Data[], offset: number, length: number): Data[] {
+        return childData.map((child) => child.slice(offset, length));
     }
-}
 
-export class ChunkedData<T extends DataType> extends BaseData<T> {
-    // @ts-ignore
-    protected _chunkData: Data<T>[];
-    protected _chunkVectors: Vector<T>[];
-    protected _chunkOffsets: Uint32Array;
-    public get chunkVectors() { return this._chunkVectors; }
-    public get chunkOffsets() { return this._chunkOffsets; }
-    public get chunkData() {
-        return this._chunkData || (
-               this._chunkData = this._chunkVectors.map(({ data }) => data));
-    }
-    constructor(type: T, length: number, chunkVectors: Vector<T>[], offset?: number, nullCount?: number, chunkOffsets?: Uint32Array) {
-        super(type, length, offset, nullCount);
-        this._chunkVectors = chunkVectors;
-        this._chunkOffsets = chunkOffsets || ChunkedData.computeOffsets(chunkVectors);
-    }
-    public get nullCount() {
-        let nullCount = this._nullCount;
-        if (nullCount === -1) {
-            this._nullCount = nullCount = this._chunkVectors.reduce((x, c) => x + c.nullCount, 0);
+    //
+    // Convenience methods for creating Data instances for each of the Arrow Vector types
+    //
+    /** @nocollapse */
+    public static Null<T extends Null>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer) {
+        return new Data(type, offset, length, nullCount, [undefined, undefined, toArrayBufferView(Uint8Array, nullBitmap)]);
+    }
+    /** @nocollapse */
+    public static Int<T extends Int>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
+    }
+    /** @nocollapse */
+    public static Dictionary<T extends Dictionary>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView<T['TArray']>(type.indices.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
+    }
+    /** @nocollapse */
+    public static Float<T extends Float>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
+    }
+    /** @nocollapse */
+    public static Bool<T extends Bool>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
+    }
+    /** @nocollapse */
+    public static Decimal<T extends Decimal>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
+    }
+    /** @nocollapse */
+    public static Date<T extends Date_>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
+    }
+    /** @nocollapse */
+    public static Time<T extends Time>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
+    }
+    /** @nocollapse */
+    public static Timestamp<T extends Timestamp>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
+    }
+    /** @nocollapse */
+    public static Interval<T extends Interval>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
+    }
+    /** @nocollapse */
+    public static FixedSizeBinary<T extends FixedSizeBinary>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
+    }
+    /** @nocollapse */
+    public static Binary<T extends Binary>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, data: Uint8Array) {
+        return new Data(type, offset, length, nullCount, [toArrayBufferView(Int32Array, valueOffsets), toArrayBufferView(Uint8Array, data), toArrayBufferView(Uint8Array, nullBitmap)]);
+    }
+    /** @nocollapse */
+    public static Utf8<T extends Utf8>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, data: Uint8Array) {
+        return new Data(type, offset, length, nullCount, [toArrayBufferView(Int32Array, valueOffsets), toArrayBufferView(Uint8Array, data), toArrayBufferView(Uint8Array, nullBitmap)]);
+    }
+    /** @nocollapse */
+    public static List<T extends List>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, child: Data<T['valueType']> | Vector<T['valueType']>) {
+        return new Data(type, offset, length, nullCount, [toArrayBufferView(Int32Array, valueOffsets), undefined, toArrayBufferView(Uint8Array, nullBitmap)], [child]);
+    }
+    /** @nocollapse */
+    public static FixedSizeList<T extends FixedSizeList>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, child: Data | Vector) {
+        return new Data(type, offset, length, nullCount, [undefined, undefined, toArrayBufferView(Uint8Array, nullBitmap)], [child]);
+    }
+    /** @nocollapse */
+    public static Struct<T extends Struct>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, children: (Data | Vector)[]) {
+        return new Data(type, offset, length, nullCount, [undefined, undefined, toArrayBufferView(Uint8Array, nullBitmap)], children);
+    }
+    /** @nocollapse */
+    public static Map<T extends Map_>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, children: (Data | Vector)[]) {
+        return new Data(type, offset, length, nullCount, [undefined, undefined, toArrayBufferView(Uint8Array, nullBitmap)], children);
+    }
+    public static Union<T extends SparseUnion>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, children: (Data | Vector)[]): Data<T>;
+    public static Union<T extends DenseUnion>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsets: ValueOffsetsBuffer, children: (Data | Vector)[]): Data<T>;
+    /** @nocollapse */
+    public static Union<T extends Union>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsetsOrChildren: ValueOffsetsBuffer | (Data | Vector)[], children?: (Data | Vector)[]) {
+        const buffers = <unknown> [
+            undefined, undefined,
+            toArrayBufferView(Uint8Array, nullBitmap),
+            toArrayBufferView(type.ArrayType, typeIds)
+        ] as Partial<Buffers<T>>;
+        if (type.mode === UnionMode.Sparse) {
+            return new Data(type, offset, length, nullCount, buffers, valueOffsetsOrChildren as (Data | Vector)[]);
         }
-        return nullCount;
-    }
-    public clone<R extends T>(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount): Data<R> {
-        return new ChunkedData(
-            type, length,
-            this._chunkVectors.map((vec) => vec.clone(vec.data.clone(type))) as any,
-            offset, nullCount, this._chunkOffsets
-        ) as any;
-    }
-    protected sliceInternal(clone: this, offset: number, length: number) {
-        const chunks = this._chunkVectors;
-        const offsets = this._chunkOffsets;
-        const chunkSlices: Vector<T>[] = [];
-        for (let childIndex = -1, numChildren = chunks.length; ++childIndex < numChildren;) {
-            const child = chunks[childIndex];
-            const childLength = child.length;
-            const childOffset = offsets[childIndex];
-            // If the child is to the right of the slice boundary, exclude
-            if (childOffset >= offset + length) { continue; }
-            // If the child is to the left of of the slice boundary, exclude
-            if (offset >= childOffset + childLength) { continue; }
-            // If the child is between both left and right boundaries, include w/o slicing
-            if (childOffset >= offset && (childOffset + childLength) <= offset + length) {
-                chunkSlices.push(child);
-                continue;
-            }
-            // If the child overlaps one of the slice boundaries, include that slice
-            const begin = Math.max(0, offset - childOffset);
-            const end = begin + Math.min(childLength - begin, (offset + length) - childOffset);
-            chunkSlices.push(child.slice(begin, end));
-        }
-        clone._chunkVectors = chunkSlices;
-        clone._chunkOffsets = ChunkedData.computeOffsets(chunkSlices);
-        return clone;
-    }
-    static computeOffsets<T extends DataType>(childVectors: Vector<T>[]) {
-        const childOffsets = new Uint32Array(childVectors.length + 1);
-        for (let index = 0, length = childOffsets.length, childOffset = childOffsets[0] = 0; ++index < length;) {
-            childOffsets[index] = (childOffset += childVectors[index - 1].length);
-        }
-        return childOffsets;
+        buffers[BufferType.OFFSET] = toArrayBufferView(Int32Array, <ValueOffsetsBuffer> valueOffsetsOrChildren);
+        return new Data(type, offset, length, nullCount, buffers, children);
     }
 }
+
+((Data.prototype as any).childData = Object.freeze([]));
diff --git a/js/src/enum.ts b/js/src/enum.ts
new file mode 100644
index 0000000000000..0be6a4ed2938e
--- /dev/null
+++ b/js/src/enum.ts
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import * as Schema_ from './fb/Schema';
+import * as Message_ from './fb/Message';
+
+export import ArrowType = Schema_.org.apache.arrow.flatbuf.Type;
+export import DateUnit = Schema_.org.apache.arrow.flatbuf.DateUnit;
+export import TimeUnit = Schema_.org.apache.arrow.flatbuf.TimeUnit;
+export import Precision = Schema_.org.apache.arrow.flatbuf.Precision;
+export import UnionMode = Schema_.org.apache.arrow.flatbuf.UnionMode;
+export import VectorType = Schema_.org.apache.arrow.flatbuf.VectorType;
+export import IntervalUnit = Schema_.org.apache.arrow.flatbuf.IntervalUnit;
+export import MessageHeader = Message_.org.apache.arrow.flatbuf.MessageHeader;
+export import MetadataVersion = Schema_.org.apache.arrow.flatbuf.MetadataVersion;
+
+/**
+ * *
+ * Main data type enumeration:
+ * *
+ * Data types in this library are all *logical*. They can be expressed as
+ * either a primitive physical type (bytes or bits of some fixed size), a
+ * nested type consisting of other data types, or another data type (e.g. a
+ * timestamp encoded as an int64)
+ */
+export enum Type {
+    NONE            =  0,  // The default placeholder type
+    Null            =  1,  // A NULL type having no physical storage
+    Int             =  2,  // Signed or unsigned 8, 16, 32, or 64-bit little-endian integer
+    Float           =  3,  // 2, 4, or 8-byte floating point value
+    Binary          =  4,  // Variable-length bytes (no guarantee of UTF8-ness)
+    Utf8            =  5,  // UTF8 variable-length string as List<Char>
+    Bool            =  6,  // Boolean as 1 bit, LSB bit-packed ordering
+    Decimal         =  7,  // Precision-and-scale-based decimal type. Storage type depends on the parameters.
+    Date            =  8,  // int32_t days or int64_t milliseconds since the UNIX epoch
+    Time            =  9,  // Time as signed 32 or 64-bit integer, representing either seconds, milliseconds, microseconds, or nanoseconds since midnight since midnight
+    Timestamp       = 10,  // Exact timestamp encoded with int64 since UNIX epoch (Default unit millisecond)
+    Interval        = 11,  // YEAR_MONTH or DAY_TIME interval in SQL style
+    List            = 12,  // A list of some logical data type
+    Struct          = 13,  // Struct of logical types
+    Union           = 14,  // Union of logical types
+    FixedSizeBinary = 15,  // Fixed-size binary. Each value occupies the same number of bytes
+    FixedSizeList   = 16,  // Fixed-size list. Each value occupies the same number of bytes
+    Map             = 17,  // Map of named logical types
+
+    // These enum values are here so that TypeScript can narrow the type signatures further
+    // beyond the base Arrow types. The base Arrow types include metadata like bitWidths that
+    // impact the type signatures of the values we return. For example, the Int8Vector reads
+    // 1-byte numbers from an Int8Array, an Int32Vector reads a 4-byte number from an Int32Array,
+    // and an Int64Vector reads a pair of 4-byte lo, hi int32s, and returns them as a zero-copy
+    // slice from an underlying Int32Array. Library consumers benefit by doing this type narrowing,
+    // since we can ensure the types across all public methods are propagated and never bail to `any`.
+    // These values are _never_ actually used at runtime, and they will _never_ be written into the
+    // flatbuffers metadata of serialized Arrow IPC payloads.
+    Dictionary            = -1, // Dictionary aka Category type
+    Int8                  = -2,
+    Int16                 = -3,
+    Int32                 = -4,
+    Int64                 = -5,
+    Uint8                 = -6,
+    Uint16                = -7,
+    Uint32                = -8,
+    Uint64                = -9,
+    Float16               = -10,
+    Float32               = -11,
+    Float64               = -12,
+    DateDay               = -13,
+    DateMillisecond       = -14,
+    TimestampSecond       = -15,
+    TimestampMillisecond  = -16,
+    TimestampMicrosecond  = -17,
+    TimestampNanosecond   = -18,
+    TimeSecond            = -19,
+    TimeMillisecond       = -20,
+    TimeMicrosecond       = -21,
+    TimeNanosecond        = -22,
+    DenseUnion            = -23,
+    SparseUnion           = -24,
+    IntervalDayTime       = -25,
+    IntervalYearMonth     = -26,
+}
diff --git a/js/src/fb/Schema.ts b/js/src/fb/Schema.ts
index 4a4aeb65599be..e9829d9d8348a 100644
--- a/js/src/fb/Schema.ts
+++ b/js/src/fb/Schema.ts
@@ -588,7 +588,7 @@ export namespace org.apache.arrow.flatbuf {
      * @param {Array.<number>} data
      * @returns {flatbuffers.Offset}
      */
-    static createTypeIdsVector(builder: flatbuffers.Builder, data: number[] | Uint8Array): flatbuffers.Offset {
+    static createTypeIdsVector(builder: flatbuffers.Builder, data: number[] | Int32Array): flatbuffers.Offset {
       builder.startVector(4, data.length, 4);
       for (let i = data.length - 1; i >= 0; i--) {
         builder.addInt32(data[i]);
diff --git a/js/src/interfaces.ts b/js/src/interfaces.ts
new file mode 100644
index 0000000000000..ae38d4e5be333
--- /dev/null
+++ b/js/src/interfaces.ts
@@ -0,0 +1,240 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data } from './data';
+import { Type } from './enum';
+import * as type from './type';
+import { DataType } from './type';
+import * as vecs from './vector/index';
+
+/** @ignore */
+export interface ArrayBufferViewConstructor<T extends ArrayBufferView> {
+    readonly prototype: T;
+    new(length: number): T;
+    new(arrayOrArrayBuffer: ArrayLike<number> | ArrayBufferLike): T;
+    new(buffer: ArrayBufferLike, byteOffset: number, length?: number): T;
+    /**
+      * The size in bytes of each element in the array.
+      */
+    readonly BYTES_PER_ELEMENT: number;
+    /**
+      * Returns a new array from a set of elements.
+      * @param items A set of elements to include in the new array object.
+      */
+    of(...items: number[]): T;
+    /**
+      * Creates an array from an array-like or iterable object.
+      * @param arrayLike An array-like or iterable object to convert to an array.
+      * @param mapfn A mapping function to call on every element of the array.
+      * @param thisArg Value of 'this' used to invoke the mapfn.
+      */
+    from(arrayLike: ArrayLike<number>, mapfn?: (v: number, k: number) => number, thisArg?: any): T;
+}
+
+/** @ignore */
+export type VectorCtorArgs<
+    T extends Vector<R>,
+    R extends DataType = any,
+    TArgs extends any[] = any[],
+    TCtor extends new (data: Data<R>, ...args: TArgs) => T =
+                  new (data: Data<R>, ...args: TArgs) => T
+> = TCtor extends new (data: Data<R>, ...args: infer TArgs) => T ? TArgs : never;
+
+/**
+ * Obtain the constructor function of an instance type
+ * @ignore
+ */
+export type ConstructorType<
+    T,
+    TCtor extends new (...args: any[]) => T =
+                  new (...args: any[]) => T
+> = TCtor extends new (...args: any[]) => T ? TCtor : never;
+
+/** @ignore */
+export type VectorCtorType<
+    T extends Vector<R>,
+    R extends DataType = any,
+    TCtor extends new (data: Data<R>, ...args: VectorCtorArgs<T, R>) => T =
+                  new (data: Data<R>, ...args: VectorCtorArgs<T, R>) => T
+> = TCtor extends new (data: Data<R>, ...args: VectorCtorArgs<T, R>) => T ? TCtor : never;
+
+/** @ignore */
+export type Vector<T extends Type | DataType = any> =
+    T extends Type          ? TypeToVector<T>     :
+    T extends DataType      ? DataTypeToVector<T> :
+                              never
+    ;
+
+/** @ignore */
+export type VectorCtor<T extends Type | DataType | Vector> =
+    T extends Vector        ? VectorCtorType<T>                  :
+    T extends Type          ? VectorCtorType<Vector<T>>          :
+    T extends DataType      ? VectorCtorType<Vector<T['TType']>> :
+                              VectorCtorType<vecs.BaseVector>
+    ;
+
+/** @ignore */
+export type DataTypeCtor<T extends Type | DataType | Vector = any> =
+    T extends DataType      ? ConstructorType<T>                 :
+    T extends Vector        ? ConstructorType<T['type']>         :
+    T extends Type          ? ConstructorType<TypeToDataType<T>> :
+                              never
+    ;
+
+/** @ignore */
+type TypeToVector<T extends Type> =
+    T extends Type.Null                 ? vecs.NullVector                 :
+    T extends Type.Bool                 ? vecs.BoolVector                 :
+    T extends Type.Int8                 ? vecs.Int8Vector                 :
+    T extends Type.Int16                ? vecs.Int16Vector                :
+    T extends Type.Int32                ? vecs.Int32Vector                :
+    T extends Type.Int64                ? vecs.Int64Vector                :
+    T extends Type.Uint8                ? vecs.Uint8Vector                :
+    T extends Type.Uint16               ? vecs.Uint16Vector               :
+    T extends Type.Uint32               ? vecs.Uint32Vector               :
+    T extends Type.Uint64               ? vecs.Uint64Vector               :
+    T extends Type.Int                  ? vecs.IntVector                  :
+    T extends Type.Float16              ? vecs.Float16Vector              :
+    T extends Type.Float32              ? vecs.Float32Vector              :
+    T extends Type.Float64              ? vecs.Float64Vector              :
+    T extends Type.Float                ? vecs.FloatVector                :
+    T extends Type.Utf8                 ? vecs.Utf8Vector                 :
+    T extends Type.Binary               ? vecs.BinaryVector               :
+    T extends Type.FixedSizeBinary      ? vecs.FixedSizeBinaryVector      :
+    T extends Type.Date                 ? vecs.DateVector                 :
+    T extends Type.DateDay              ? vecs.DateDayVector              :
+    T extends Type.DateMillisecond      ? vecs.DateMillisecondVector      :
+    T extends Type.Timestamp            ? vecs.TimestampVector            :
+    T extends Type.TimestampSecond      ? vecs.TimestampSecondVector      :
+    T extends Type.TimestampMillisecond ? vecs.TimestampMillisecondVector :
+    T extends Type.TimestampMicrosecond ? vecs.TimestampMicrosecondVector :
+    T extends Type.TimestampNanosecond  ? vecs.TimestampNanosecondVector  :
+    T extends Type.Time                 ? vecs.TimeVector                 :
+    T extends Type.TimeSecond           ? vecs.TimeSecondVector           :
+    T extends Type.TimeMillisecond      ? vecs.TimeMillisecondVector      :
+    T extends Type.TimeMicrosecond      ? vecs.TimeMicrosecondVector      :
+    T extends Type.TimeNanosecond       ? vecs.TimeNanosecondVector       :
+    T extends Type.Decimal              ? vecs.DecimalVector              :
+    T extends Type.Union                ? vecs.UnionVector                :
+    T extends Type.DenseUnion           ? vecs.DenseUnionVector           :
+    T extends Type.SparseUnion          ? vecs.SparseUnionVector          :
+    T extends Type.Interval             ? vecs.IntervalVector             :
+    T extends Type.IntervalDayTime      ? vecs.IntervalDayTimeVector      :
+    T extends Type.IntervalYearMonth    ? vecs.IntervalYearMonthVector    :
+    T extends Type.Map                  ? vecs.MapVector                  :
+    T extends Type.List                 ? vecs.ListVector                 :
+    T extends Type.Struct               ? vecs.StructVector               :
+    T extends Type.Dictionary           ? vecs.DictionaryVector           :
+    T extends Type.FixedSizeList        ? vecs.FixedSizeListVector        :
+                                          vecs.BaseVector
+    ;
+
+/** @ignore */
+type DataTypeToVector<T extends DataType = any> =
+    T extends type.Null                 ? vecs.NullVector                          :
+    T extends type.Bool                 ? vecs.BoolVector                          :
+    T extends type.Int8                 ? vecs.Int8Vector                          :
+    T extends type.Int16                ? vecs.Int16Vector                         :
+    T extends type.Int32                ? vecs.Int32Vector                         :
+    T extends type.Int64                ? vecs.Int64Vector                         :
+    T extends type.Uint8                ? vecs.Uint8Vector                         :
+    T extends type.Uint16               ? vecs.Uint16Vector                        :
+    T extends type.Uint32               ? vecs.Uint32Vector                        :
+    T extends type.Uint64               ? vecs.Uint64Vector                        :
+    T extends type.Int                  ? vecs.IntVector                           :
+    T extends type.Float16              ? vecs.Float16Vector                       :
+    T extends type.Float32              ? vecs.Float32Vector                       :
+    T extends type.Float64              ? vecs.Float64Vector                       :
+    T extends type.Float                ? vecs.FloatVector                         :
+    T extends type.Utf8                 ? vecs.Utf8Vector                          :
+    T extends type.Binary               ? vecs.BinaryVector                        :
+    T extends type.FixedSizeBinary      ? vecs.FixedSizeBinaryVector               :
+    T extends type.Date_                ? vecs.DateVector                          :
+    T extends type.DateDay              ? vecs.DateDayVector                       :
+    T extends type.DateMillisecond      ? vecs.DateMillisecondVector               :
+    T extends type.Timestamp            ? vecs.TimestampVector                     :
+    T extends type.TimestampSecond      ? vecs.TimestampSecondVector               :
+    T extends type.TimestampMillisecond ? vecs.TimestampMillisecondVector          :
+    T extends type.TimestampMicrosecond ? vecs.TimestampMicrosecondVector          :
+    T extends type.TimestampNanosecond  ? vecs.TimestampNanosecondVector           :
+    T extends type.Time                 ? vecs.TimeVector                          :
+    T extends type.TimeSecond           ? vecs.TimeSecondVector                    :
+    T extends type.TimeMillisecond      ? vecs.TimeMillisecondVector               :
+    T extends type.TimeMicrosecond      ? vecs.TimeMicrosecondVector               :
+    T extends type.TimeNanosecond       ? vecs.TimeNanosecondVector                :
+    T extends type.Decimal              ? vecs.DecimalVector                       :
+    T extends type.Union                ? vecs.UnionVector                         :
+    T extends type.DenseUnion           ? vecs.DenseUnionVector                    :
+    T extends type.SparseUnion          ? vecs.SparseUnionVector                   :
+    T extends type.Interval             ? vecs.IntervalVector                      :
+    T extends type.IntervalDayTime      ? vecs.IntervalDayTimeVector               :
+    T extends type.IntervalYearMonth    ? vecs.IntervalYearMonthVector             :
+    T extends type.Map_                 ? vecs.MapVector<T['dataTypes']>           :
+    T extends type.List                 ? vecs.ListVector<T['valueType']>          :
+    T extends type.Struct               ? vecs.StructVector<T['dataTypes']>        :
+    T extends type.Dictionary           ? vecs.DictionaryVector<T['valueType'], T['indices']>    :
+    T extends type.FixedSizeList        ? vecs.FixedSizeListVector<T['valueType']> :
+                                          vecs.BaseVector<T>
+    ;
+
+/** @ignore */
+type TypeToDataType<T extends Type> =
+      T extends Type.Null                 ? type.Null
+    : T extends Type.Bool                 ? type.Bool
+    : T extends Type.Int                  ? type.Int
+    : T extends Type.Int16                ? type.Int16
+    : T extends Type.Int32                ? type.Int32
+    : T extends Type.Int64                ? type.Int64
+    : T extends Type.Uint8                ? type.Uint8
+    : T extends Type.Uint16               ? type.Uint16
+    : T extends Type.Uint32               ? type.Uint32
+    : T extends Type.Uint64               ? type.Uint64
+    : T extends Type.Int8                 ? type.Int8
+    : T extends Type.Float16              ? type.Float16
+    : T extends Type.Float32              ? type.Float32
+    : T extends Type.Float64              ? type.Float64
+    : T extends Type.Float                ? type.Float
+    : T extends Type.Utf8                 ? type.Utf8
+    : T extends Type.Binary               ? type.Binary
+    : T extends Type.FixedSizeBinary      ? type.FixedSizeBinary
+    : T extends Type.Date                 ? type.Date_
+    : T extends Type.DateDay              ? type.DateDay
+    : T extends Type.DateMillisecond      ? type.DateMillisecond
+    : T extends Type.Timestamp            ? type.Timestamp
+    : T extends Type.TimestampSecond      ? type.TimestampSecond
+    : T extends Type.TimestampMillisecond ? type.TimestampMillisecond
+    : T extends Type.TimestampMicrosecond ? type.TimestampMicrosecond
+    : T extends Type.TimestampNanosecond  ? type.TimestampNanosecond
+    : T extends Type.Time                 ? type.Time
+    : T extends Type.TimeSecond           ? type.TimeSecond
+    : T extends Type.TimeMillisecond      ? type.TimeMillisecond
+    : T extends Type.TimeMicrosecond      ? type.TimeMicrosecond
+    : T extends Type.TimeNanosecond       ? type.TimeNanosecond
+    : T extends Type.Decimal              ? type.Decimal
+    : T extends Type.Union                ? type.Union
+    : T extends Type.DenseUnion           ? type.DenseUnion
+    : T extends Type.SparseUnion          ? type.SparseUnion
+    : T extends Type.Interval             ? type.Interval
+    : T extends Type.IntervalDayTime      ? type.IntervalDayTime
+    : T extends Type.IntervalYearMonth    ? type.IntervalYearMonth
+    : T extends Type.Map                  ? type.Map_
+    : T extends Type.List                 ? type.List
+    : T extends Type.Struct               ? type.Struct
+    : T extends Type.Dictionary           ? type.Dictionary
+    : T extends Type.FixedSizeList        ? type.FixedSizeList
+                                          : DataType
+    ;
diff --git a/js/src/io/adapters.ts b/js/src/io/adapters.ts
new file mode 100644
index 0000000000000..427fc29ab2228
--- /dev/null
+++ b/js/src/io/adapters.ts
@@ -0,0 +1,386 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+    toUint8Array,
+    joinUint8Arrays,
+    ArrayBufferViewInput,
+    toUint8ArrayIterator,
+    toUint8ArrayAsyncIterator
+} from '../util/buffer';
+
+import { ReadableDOMStreamOptions } from './interfaces';
+
+/** @ignore */
+export default {
+    fromIterable<T extends ArrayBufferViewInput>(source: Iterable<T> | T): IterableIterator<Uint8Array> {
+        return pump(fromIterable<T>(source));
+    },
+    fromAsyncIterable<T extends ArrayBufferViewInput>(source: AsyncIterable<T> | PromiseLike<T>): AsyncIterableIterator<Uint8Array> {
+        return pump(fromAsyncIterable<T>(source));
+    },
+    fromDOMStream<T extends ArrayBufferViewInput>(source: ReadableStream<T>): AsyncIterableIterator<Uint8Array> {
+        return pump(fromDOMStream<T>(source));
+    },
+    fromNodeStream(stream: NodeJS.ReadableStream): AsyncIterableIterator<Uint8Array> {
+        return pump(fromNodeStream(stream));
+    },
+    // @ts-ignore
+    toDOMStream<T>(source: Iterable<T> | AsyncIterable<T>, options?: ReadableDOMStreamOptions): ReadableStream<T> {
+        throw new Error(`"toDOMStream" not available in this environment`);
+    },
+    // @ts-ignore
+    toNodeStream<T>(source: Iterable<T> | AsyncIterable<T>, options?: import('stream').ReadableOptions): import('stream').Readable {
+        throw new Error(`"toNodeStream" not available in this environment`);
+    },
+};
+
+/** @ignore */
+const pump = <T extends Iterator<any> | AsyncIterator<any>>(iterator: T) => { iterator.next(); return iterator; };
+
+/** @ignore */
+function* fromIterable<T extends ArrayBufferViewInput>(source: Iterable<T> | T): IterableIterator<Uint8Array> {
+
+    let done: boolean, threw = false;
+    let buffers: Uint8Array[] = [], buffer: Uint8Array;
+    let cmd: 'peek' | 'read', size: number, bufferLength = 0;
+
+    function byteRange() {
+        if (cmd === 'peek') {
+            return joinUint8Arrays(buffers, size)[0];
+        }
+        [buffer, buffers, bufferLength] = joinUint8Arrays(buffers, size);
+        return buffer;
+    }
+
+    // Yield so the caller can inject the read command before creating the source Iterator
+    ({ cmd, size } = yield <any> null);
+
+    // initialize the iterator
+    let it = toUint8ArrayIterator(source)[Symbol.iterator]();
+
+    try {
+        do {
+            // read the next value
+            ({ done, value: buffer } = isNaN(size - bufferLength) ?
+                it.next(undefined) : it.next(size - bufferLength));
+            // if chunk is not null or empty, push it onto the queue
+            if (!done && buffer.byteLength > 0) {
+                buffers.push(buffer);
+                bufferLength += buffer.byteLength;
+            }
+            // If we have enough bytes in our buffer, yield chunks until we don't
+            if (done || size <= bufferLength) {
+                do {
+                    ({ cmd, size } = yield byteRange());
+                } while (size < bufferLength);
+            }
+        } while (!done);
+    } catch (e) {
+        (threw = true) && (typeof it.throw === 'function') && (it.throw(e));
+    } finally {
+        (threw === false) && (typeof it.return === 'function') && (it.return());
+    }
+}
+
+/** @ignore */
+async function* fromAsyncIterable<T extends ArrayBufferViewInput>(source: AsyncIterable<T> | PromiseLike<T>): AsyncIterableIterator<Uint8Array> {
+
+    let done: boolean, threw = false;
+    let buffers: Uint8Array[] = [], buffer: Uint8Array;
+    let cmd: 'peek' | 'read', size: number, bufferLength = 0;
+
+    function byteRange() {
+        if (cmd === 'peek') {
+            return joinUint8Arrays(buffers, size)[0];
+        }
+        [buffer, buffers, bufferLength] = joinUint8Arrays(buffers, size);
+        return buffer;
+    }
+
+    // Yield so the caller can inject the read command before creating the source AsyncIterator
+    ({ cmd, size } = yield <any> null);
+
+    // initialize the iterator
+    let it = toUint8ArrayAsyncIterator(source)[Symbol.asyncIterator]();
+
+    try {
+        do {
+            // read the next value
+            ({ done, value: buffer } = isNaN(size - bufferLength)
+                ? await it.next(undefined)
+                : await it.next(size - bufferLength));
+            // if chunk is not null or empty, push it onto the queue
+            if (!done && buffer.byteLength > 0) {
+                buffers.push(buffer);
+                bufferLength += buffer.byteLength;
+            }
+            // If we have enough bytes in our buffer, yield chunks until we don't
+            if (done || size <= bufferLength) {
+                do {
+                    ({ cmd, size } = yield byteRange());
+                } while (size < bufferLength);
+            }
+        } while (!done);
+    } catch (e) {
+        (threw = true) && (typeof it.throw === 'function') && (await it.throw(e));
+    } finally {
+        (threw === false) && (typeof it.return === 'function') && (await it.return());
+    }
+}
+
+// All this manual Uint8Array chunk management can be avoided if/when engines
+// add support for ArrayBuffer.transfer() or ArrayBuffer.prototype.realloc():
+// https://github.com/domenic/proposal-arraybuffer-transfer
+/** @ignore */
+async function* fromDOMStream<T extends ArrayBufferViewInput>(source: ReadableStream<T>): AsyncIterableIterator<Uint8Array> {
+
+    let done = false, threw = false;
+    let buffers: Uint8Array[] = [], buffer: Uint8Array;
+    let cmd: 'peek' | 'read', size: number, bufferLength = 0;
+
+    function byteRange() {
+        if (cmd === 'peek') {
+            return joinUint8Arrays(buffers, size)[0];
+        }
+        [buffer, buffers, bufferLength] = joinUint8Arrays(buffers, size);
+        return buffer;
+    }
+
+    // Yield so the caller can inject the read command before we establish the ReadableStream lock
+    ({ cmd, size } = yield <any> null);
+
+    // initialize the reader and lock the stream
+    let it = new AdaptiveByteReader(source);
+
+    try {
+        do {
+            // read the next value
+            ({ done, value: buffer } = isNaN(size - bufferLength)
+                ? await it['read'](undefined)
+                : await it['read'](size - bufferLength));
+            // if chunk is not null or empty, push it onto the queue
+            if (!done && buffer.byteLength > 0) {
+                buffers.push(toUint8Array(buffer));
+                bufferLength += buffer.byteLength;
+            }
+            // If we have enough bytes in our buffer, yield chunks until we don't
+            if (done || size <= bufferLength) {
+                do {
+                    ({ cmd, size } = yield byteRange());
+                } while (size < bufferLength);
+            }
+        } while (!done);
+    } catch (e) {
+        (threw = true) && (await it['cancel'](e));
+    } finally {
+        (threw === false) ? (await it['cancel']())
+            : source['locked'] && it.releaseLock();
+    }
+}
+
+/** @ignore */
+class AdaptiveByteReader<T extends ArrayBufferViewInput> {
+
+    private supportsBYOB: boolean;
+    private byobReader: ReadableStreamBYOBReader | null = null;
+    private defaultReader: ReadableStreamDefaultReader<T> | null = null;
+    private reader: ReadableStreamBYOBReader | ReadableStreamDefaultReader<T> | null;
+
+    constructor(private source: ReadableStream<T>) {
+        try {
+            this.supportsBYOB = !!(this.reader = this.getBYOBReader());
+        } catch (e) {
+            this.supportsBYOB = !!!(this.reader = this.getDefaultReader());
+        }
+    }
+
+    get closed(): Promise<void> {
+        return this.reader ? this.reader['closed'].catch(() => {}) : Promise.resolve();
+    }
+
+    releaseLock(): void {
+        if (this.reader) {
+            this.reader.releaseLock();
+        }
+        this.reader = this.byobReader = this.defaultReader = null;
+    }
+
+    async cancel(reason?: any): Promise<void> {
+        const { reader, source } = this;
+        reader && (await reader['cancel'](reason));
+        source && (source['locked'] && this.releaseLock());
+    }
+
+    async read(size?: number): Promise<ReadableStreamReadResult<Uint8Array>> {
+        if (size === 0) {
+            return { done: this.reader == null, value: new Uint8Array(0) };
+        }
+        const result = !this.supportsBYOB || typeof size !== 'number'
+            ? await this.getDefaultReader().read()
+            : await this.readFromBYOBReader(size);
+        !result.done && (result.value = toUint8Array(result as ReadableStreamReadResult<Uint8Array>));
+        return result as ReadableStreamReadResult<Uint8Array>;
+    }
+
+    private getDefaultReader() {
+        if (this.byobReader) { this.releaseLock(); }
+        if (!this.defaultReader) {
+            this.defaultReader = this.source['getReader']();
+            // We have to catch and swallow errors here to avoid uncaught promise rejection exceptions
+            // that seem to be raised when we call `releaseLock()` on this reader. I'm still mystified
+            // about why these errors are raised, but I'm sure there's some important spec reason that
+            // I haven't considered. I hate to employ such an anti-pattern here, but it seems like the
+            // only solution in this case :/
+            this.defaultReader['closed'].catch(() => {});
+        }
+        return (this.reader = this.defaultReader);
+    }
+
+    private getBYOBReader() {
+        if (this.defaultReader) { this.releaseLock(); }
+        if (!this.byobReader) {
+            this.byobReader = this.source['getReader']({ mode: 'byob' });
+            // We have to catch and swallow errors here to avoid uncaught promise rejection exceptions
+            // that seem to be raised when we call `releaseLock()` on this reader. I'm still mystified
+            // about why these errors are raised, but I'm sure there's some important spec reason that
+            // I haven't considered. I hate to employ such an anti-pattern here, but it seems like the
+            // only solution in this case :/
+            this.byobReader['closed'].catch(() => {});
+        }
+        return (this.reader = this.byobReader);
+    }
+
+    // This strategy plucked from the example in the streams spec:
+    // https://streams.spec.whatwg.org/#example-manual-read-bytes
+    private async readFromBYOBReader(size: number) {
+        return await readInto(this.getBYOBReader(), new ArrayBuffer(size), 0, size);
+    }
+}
+
+/** @ignore */
+async function readInto(reader: ReadableStreamBYOBReader, buffer: ArrayBufferLike, offset: number, size: number): Promise<ReadableStreamReadResult<Uint8Array>> {
+    if (offset >= size) {
+        return { done: false, value: new Uint8Array(buffer, 0, size) };
+    }
+    const { done, value } = await reader.read(new Uint8Array(buffer, offset, size - offset));
+    if (((offset += value.byteLength) < size) && !done) {
+        return await readInto(reader, value.buffer, offset, size);
+    }
+    return { done, value: new Uint8Array(value.buffer, 0, offset) };
+}
+
+/** @ignore */
+type EventName = 'end' | 'error' | 'readable';
+/** @ignore */
+type Event = [EventName, (_: any) => void, Promise<[EventName, Error | null]>];
+/** @ignore */
+const onEvent = <T extends string>(stream: NodeJS.ReadableStream, event: T) => {
+    let handler = (_: any) => resolve([event, _]);
+    let resolve: (value?: [T, any] | PromiseLike<[T, any]>) => void;
+    return [event, handler, new Promise<[T, any]>(
+        (r) => (resolve = r) && stream['once'](event, handler)
+    )] as Event;
+};
+
+/** @ignore */
+async function* fromNodeStream(stream: NodeJS.ReadableStream): AsyncIterableIterator<Uint8Array> {
+
+    let events: Event[] = [];
+    let event: EventName = 'error';
+    let done = false, err: Error | null = null;
+    let cmd: 'peek' | 'read', size: number, bufferLength = 0;
+    let buffers: Uint8Array[] = [], buffer: Uint8Array | Buffer | string;
+
+    function byteRange() {
+        if (cmd === 'peek') {
+            return joinUint8Arrays(buffers, size)[0];
+        }
+        [buffer, buffers, bufferLength] = joinUint8Arrays(buffers, size);
+        return buffer;
+    }
+
+    // Yield so the caller can inject the read command before we
+    // add the listener for the source stream's 'readable' event.
+    ({ cmd, size } = yield <any> null);
+
+    // ignore stdin if it's a TTY
+    if ((stream as any)['isTTY']) { return yield new Uint8Array(0); }
+
+    try {
+        // initialize the stream event handlers
+        events[0] = onEvent(stream, 'end');
+        events[1] = onEvent(stream, 'error');
+
+        do {
+            events[2] = onEvent(stream, 'readable');
+
+            // wait on the first message event from the stream
+            [event, err] = await Promise.race(events.map((x) => x[2]));
+
+            // if the stream emitted an Error, rethrow it
+            if (event === 'error') { break; }
+            if (!(done = event === 'end')) {
+                // If the size is NaN, request to read everything in the stream's internal buffer
+                if (!isFinite(size - bufferLength)) {
+                    buffer = toUint8Array(stream['read'](undefined));
+                } else {
+                    buffer = toUint8Array(stream['read'](size - bufferLength));
+                    // If the byteLength is 0, then the requested amount is more than the stream has
+                    // in its internal buffer. In this case the stream needs a "kick" to tell it to
+                    // continue emitting readable events, so request to read everything the stream
+                    // has in its internal buffer right now.
+                    if (buffer.byteLength < (size - bufferLength)) {
+                        buffer = toUint8Array(stream['read'](undefined));
+                    }
+                }
+                // if chunk is not null or empty, push it onto the queue
+                if (buffer.byteLength > 0) {
+                    buffers.push(buffer);
+                    bufferLength += buffer.byteLength;
+                }
+            }
+            // If we have enough bytes in our buffer, yield chunks until we don't
+            if (done || size <= bufferLength) {
+                do {
+                    ({ cmd, size } = yield byteRange());
+                } while (size < bufferLength);
+            }
+        } while (!done);
+    } finally {
+        await cleanup(events, event === 'error' ? err : null);
+    }
+
+    function cleanup<T extends Error | null | void>(events: Event[], err?: T) {
+        buffer = buffers = <any> null;
+        return new Promise<T>(async (resolve, reject) => {
+            for (const [evt, fn] of events) {
+                stream['off'](evt, fn);
+            }
+            try {
+                // Some stream implementations don't call the destroy callback,
+                // because it's really a node-internal API. Just calling `destroy`
+                // here should be enough to conform to the ReadableStream contract
+                const destroy = (stream as any)['destroy'];
+                destroy && destroy.call(stream, err);
+                err = undefined;
+            } catch (e) { err = e || err; } finally {
+                err != null ? reject(err) : resolve();
+            }
+        });
+    }
+}
diff --git a/js/src/io/file.ts b/js/src/io/file.ts
new file mode 100644
index 0000000000000..d88bc5f6f4e56
--- /dev/null
+++ b/js/src/io/file.ts
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { FileHandle } from './interfaces';
+import { ByteStream, AsyncByteStream } from './stream';
+import { ArrayBufferViewInput, toUint8Array } from '../util/buffer';
+
+/** @ignore */
+export class RandomAccessFile extends ByteStream {
+    public size: number;
+    public position: number = 0;
+    protected buffer: Uint8Array | null;
+    constructor(buffer: ArrayBufferViewInput, byteLength?: number) {
+        super();
+        this.buffer = toUint8Array(buffer);
+        this.size = typeof byteLength === 'undefined' ? this.buffer.byteLength : byteLength;
+    }
+    public readInt32(position: number) {
+        const { buffer, byteOffset } = this.readAt(position, 4);
+        return new DataView(buffer, byteOffset).getInt32(0, true);
+    }
+    public seek(position: number) {
+        this.position = Math.min(position, this.size);
+        return position < this.size;
+    }
+    public read(nBytes?: number | null) {
+        const { buffer, size, position } = this;
+        if (buffer && position < size) {
+            if (typeof nBytes !== 'number') { nBytes = Infinity; }
+            this.position = Math.min(size,
+                 position + Math.min(size - position, nBytes));
+            return buffer.subarray(position, this.position);
+        }
+        return null;
+    }
+    public readAt(position: number, nBytes: number) {
+        const buf = this.buffer;
+        const end = Math.min(this.size, position + nBytes);
+        return buf ? buf.subarray(position, end) : new Uint8Array(nBytes);
+    }
+    public close() { this.buffer && (this.buffer = null); }
+    public throw(value?: any) { this.close(); return { done: true, value }; }
+    public return(value?: any) { this.close(); return { done: true, value }; }
+}
+
+/** @ignore */
+export class AsyncRandomAccessFile extends AsyncByteStream {
+    // @ts-ignore
+    public size: number;
+    public position: number = 0;
+    public _pending?: Promise<void>;
+    protected _handle: FileHandle | null;
+    constructor(file: FileHandle, byteLength?: number) {
+        super();
+        this._handle = file;
+        if (typeof byteLength === 'number') {
+            this.size = byteLength;
+        } else {
+            this._pending = (async () => {
+                delete this._pending;
+                this.size = (await file.stat()).size;
+            })();
+        }
+    }
+    public async readInt32(position: number) {
+        const { buffer, byteOffset } = await this.readAt(position, 4);
+        return new DataView(buffer, byteOffset).getInt32(0, true);
+    }
+    public async seek(position: number) {
+        this._pending && await this._pending;
+        this.position = Math.min(position, this.size);
+        return position < this.size;
+    }
+    public async read(nBytes?: number | null) {
+        this._pending && await this._pending;
+        const { _handle: file, size, position } = this;
+        if (file && position < size) {
+            if (typeof nBytes !== 'number') { nBytes = Infinity; }
+            let pos = position, offset = 0, bytesRead = 0;
+            let end = Math.min(size, pos + Math.min(size - pos, nBytes));
+            let buffer = new Uint8Array(Math.max(0, (this.position = end) - pos));
+            while ((pos += bytesRead) < end && (offset += bytesRead) < buffer.byteLength) {
+                ({ bytesRead } = await file.read(buffer, offset, buffer.byteLength - offset, pos));
+            }
+            return buffer;
+        }
+        return null;
+    }
+    public async readAt(position: number, nBytes: number) {
+        this._pending && await this._pending;
+        const { _handle: file, size } = this;
+        if (file && (position + nBytes) < size) {
+            const end = Math.min(size, position + nBytes);
+            const buffer = new Uint8Array(end - position);
+            return (await file.read(buffer, 0, nBytes, position)).buffer;
+        }
+        return new Uint8Array(nBytes);
+    }
+    public async close() { const f = this._handle; this._handle = null; f && await f.close(); }
+    public async throw(value?: any) { await this.close(); return { done: true, value }; }
+    public async return(value?: any) { await this.close(); return { done: true, value }; }
+}
diff --git a/js/src/io/interfaces.ts b/js/src/io/interfaces.ts
new file mode 100644
index 0000000000000..9892562e0c0ec
--- /dev/null
+++ b/js/src/io/interfaces.ts
@@ -0,0 +1,180 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import streamAdapters from './adapters';
+
+/** @ignore */
+export const ITERATOR_DONE: any = Object.freeze({ done: true, value: void (0) });
+
+/** @ignore */
+export type FileHandle = import('fs').promises.FileHandle;
+/** @ignore */
+export type ArrowJSONLike = { schema: any; batches?: any[]; dictionaries?: any[]; };
+/** @ignore */
+export type ReadableDOMStreamOptions = { type: 'bytes' | undefined, autoAllocateChunkSize?: number, highWaterMark?: number };
+
+/** @ignore */
+export class ArrowJSON {
+    // @ts-ignore
+    constructor(private _json: ArrowJSONLike) {}
+    public get schema(): any { return this._json['schema']; }
+    public get batches(): any[] { return (this._json['batches'] || []) as any[]; }
+    public get dictionaries(): any[] { return (this._json['dictionaries'] || []) as any[]; }
+}
+
+/** @ignore */
+export interface Readable<T> {
+
+    readonly closed: Promise<void>;
+    cancel(reason?: any): Promise<void>;
+
+    read(size?: number | null): Promise<T | null>;
+    peek(size?: number | null): Promise<T | null>;
+    throw(value?: any): Promise<IteratorResult<any>>;
+    return(value?: any): Promise<IteratorResult<any>>;
+    next(size?: number | null): Promise<IteratorResult<T>>;
+}
+
+/** @ignore */
+export interface Writable<T> {
+    readonly closed: Promise<void>;
+    close(): void;
+    write(chunk: T): void;
+    abort(reason?: any): void;
+}
+
+/** @ignore */
+export interface ReadableWritable<TReadable, TWritable> extends Readable<TReadable>, Writable<TWritable> {
+    [Symbol.asyncIterator](): AsyncIterableIterator<TReadable>;
+    toDOMStream(options?: ReadableDOMStreamOptions): ReadableStream<TReadable>;
+    toNodeStream(options?: import('stream').ReadableOptions): import('stream').Readable;
+}
+
+/** @ignore */
+export abstract class ReadableInterop<T> {
+
+    public abstract toDOMStream(options?: ReadableDOMStreamOptions): ReadableStream<T>;
+    public abstract toNodeStream(options?: import('stream').ReadableOptions): import('stream').Readable;
+
+    public tee(): [ReadableStream<T>, ReadableStream<T>] {
+        return this._getDOMStream().tee();
+    }
+    public pipe<R extends NodeJS.WritableStream>(writable: R, options?: { end?: boolean; }) {
+        return this._getNodeStream().pipe(writable, options);
+    }
+    public pipeTo(writable: WritableStream<T>, options?: PipeOptions) { return this._getDOMStream().pipeTo(writable, options); }
+    public pipeThrough<R extends ReadableStream<any>>(duplex: { writable: WritableStream<T>, readable: R }, options?: PipeOptions) {
+        return this._getDOMStream().pipeThrough(duplex, options);
+    }
+
+    private _DOMStream?: ReadableStream<T>;
+    private _getDOMStream() {
+        return this._DOMStream || (this._DOMStream = this.toDOMStream());
+    }
+
+    private _nodeStream?: import('stream').Readable;
+    private _getNodeStream() {
+        return this._nodeStream || (this._nodeStream = this.toNodeStream());
+    }
+}
+
+/** @ignore */
+type Resolution<T> = { resolve: (value?: T | PromiseLike<T>) => void; reject: (reason?: any) => void; };
+
+/** @ignore */
+export class AsyncQueue<TReadable = Uint8Array, TWritable = TReadable> extends ReadableInterop<TReadable>
+    implements AsyncIterableIterator<TReadable>, ReadableWritable<TReadable, TWritable> {
+
+    protected _values: TWritable[] = [];
+    protected _error?: { error: any; };
+    protected _closedPromise: Promise<void>;
+    protected _closedPromiseResolve?: (value?: any) => void;
+    protected resolvers: Resolution<IteratorResult<TReadable>>[] = [];
+
+    constructor() {
+        super();
+        this._closedPromise = new Promise((r) => this._closedPromiseResolve = r);
+    }
+
+    public get closed(): Promise<void> { return this._closedPromise; }
+    public async cancel(reason?: any) { await this.return(reason); }
+    public write(value: TWritable) {
+        if (this._ensureOpen()) {
+            this.resolvers.length <= 0
+                ? (this._values.push(value))
+                : (this.resolvers.shift()!.resolve({ done: false, value } as any));
+        }
+    }
+    public abort(value?: any) {
+        if (this._closedPromiseResolve) {
+            this.resolvers.length <= 0
+                ? (this._error = { error: value })
+                : (this.resolvers.shift()!.reject({ done: true, value }));
+        }
+    }
+    public close() {
+        if (this._closedPromiseResolve) {
+            const { resolvers } = this;
+            while (resolvers.length > 0) {
+                resolvers.shift()!.resolve(ITERATOR_DONE);
+            }
+            this._closedPromiseResolve();
+            this._closedPromiseResolve = undefined;
+        }
+    }
+
+    public [Symbol.asyncIterator]() { return this; }
+    public toDOMStream(options?: ReadableDOMStreamOptions) {
+        return streamAdapters.toDOMStream(
+            (this._closedPromiseResolve || this._error)
+                ? (this as AsyncIterable<TReadable>)
+                : (this._values as any) as Iterable<TReadable>,
+            options);
+    }
+    public toNodeStream(options?: import('stream').ReadableOptions) {
+        return streamAdapters.toNodeStream(
+            (this._closedPromiseResolve || this._error)
+                ? (this as AsyncIterable<TReadable>)
+                : (this._values as any) as Iterable<TReadable>,
+            options);
+    }
+    public async throw(_?: any) { await this.abort(_); return ITERATOR_DONE; }
+    public async return(_?: any) { await this.close(); return ITERATOR_DONE; }
+
+    public async read(size?: number | null): Promise<TReadable | null> { return (await this.next(size, 'read')).value; }
+    public async peek(size?: number | null): Promise<TReadable | null> { return (await this.next(size, 'peek')).value; }
+    public next(..._args: any[]): Promise<IteratorResult<TReadable>> {
+        if (this._values.length > 0) {
+            return Promise.resolve({ done: false, value: this._values.shift()! } as any);
+        } else if (this._error) {
+            return Promise.reject({ done: true, value: this._error.error });
+        } else if (!this._closedPromiseResolve) {
+            return Promise.resolve(ITERATOR_DONE);
+        } else {
+            return new Promise<IteratorResult<TReadable>>((resolve, reject) => {
+                this.resolvers.push({ resolve, reject });
+            });
+        }
+    }
+
+    protected _ensureOpen() {
+        if (this._closedPromiseResolve) {
+            return true;
+        }
+        throw new Error(`${this} is closed`);
+    }
+}
diff --git a/js/src/io/stream.ts b/js/src/io/stream.ts
new file mode 100644
index 0000000000000..2fe686532a5e5
--- /dev/null
+++ b/js/src/io/stream.ts
@@ -0,0 +1,158 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import streamAdapters from './adapters';
+import { decodeUtf8 } from '../util/utf8';
+import { ITERATOR_DONE, Readable, Writable, AsyncQueue } from './interfaces';
+import { toUint8Array, joinUint8Arrays, ArrayBufferViewInput } from '../util/buffer';
+
+import {
+    isPromise, isFetchResponse,
+    isIterable, isAsyncIterable,
+    isReadableDOMStream, isReadableNodeStream
+} from '../util/compat';
+
+/** @ignore */
+export type WritableSink<T> = Writable<T> | WritableStream<T> | NodeJS.WritableStream | null;
+/** @ignore */
+export type ReadableSource<T> = Readable<T> | PromiseLike<T> | AsyncIterable<T> | ReadableStream<T> | NodeJS.ReadableStream | null;
+
+/** @ignore */
+export class AsyncByteQueue<T extends ArrayBufferViewInput = Uint8Array> extends AsyncQueue<Uint8Array, T> {
+    public write(value: ArrayBufferViewInput | Uint8Array) {
+        if ((value = toUint8Array(value)).byteLength > 0) {
+            return super.write(value as T);
+        }
+    }
+    public toString(sync: true): string;
+    public toString(sync?: false): Promise<string>;
+    public toString(sync = false) {
+        return sync
+            ? decodeUtf8(this.toUint8Array(true))
+            : this.toUint8Array(false).then(decodeUtf8);
+    }
+    public toUint8Array(sync: true): Uint8Array;
+    public toUint8Array(sync?: false): Promise<Uint8Array>;
+    public toUint8Array(sync = false) {
+        return sync ? joinUint8Arrays(this._values as any[])[0] : (async () => {
+            let buffers = [], byteLength = 0;
+            for await (const chunk of this) {
+                buffers.push(chunk);
+                byteLength += chunk.byteLength;
+            }
+            return joinUint8Arrays(buffers, byteLength)[0];
+        })();
+    }
+}
+
+/** @ignore */
+export class ByteStream implements IterableIterator<Uint8Array> {
+    // @ts-ignore
+    private source: ByteStreamSource<Uint8Array>;
+    constructor(source?: Iterable<ArrayBufferViewInput> | ArrayBufferViewInput) {
+        if (source) {
+            this.source = new ByteStreamSource(streamAdapters.fromIterable(source));
+        }
+    }
+    [Symbol.iterator]() { return this; }
+    public next(value?: any) { return this.source.next(value); }
+    public throw(value?: any) { return this.source.throw(value); }
+    public return(value?: any) { return this.source.return(value); }
+    public peek(size?: number | null) { return this.source.peek(size); }
+    public read(size?: number | null) { return this.source.read(size); }
+}
+
+/** @ignore */
+export class AsyncByteStream implements Readable<Uint8Array>, AsyncIterableIterator<Uint8Array> {
+    // @ts-ignore
+    private source: AsyncByteStreamSource<Uint8Array>;
+    constructor(source?: PromiseLike<ArrayBufferViewInput> | Response | ReadableStream<ArrayBufferViewInput> | NodeJS.ReadableStream | AsyncIterable<ArrayBufferViewInput> | Iterable<ArrayBufferViewInput>) {
+        if (source instanceof AsyncByteStream) {
+            this.source = (source as AsyncByteStream).source;
+        } else if (source instanceof AsyncByteQueue) {
+            this.source = new AsyncByteStreamSource(streamAdapters.fromAsyncIterable(source));
+        } else if (isReadableNodeStream(source)) {
+            this.source = new AsyncByteStreamSource(streamAdapters.fromNodeStream(source));
+        } else if (isFetchResponse(source)) {
+            this.source = new AsyncByteStreamSource(streamAdapters.fromDOMStream(source.body!));
+        } else if (isIterable<ArrayBufferViewInput>(source)) {
+            this.source = new AsyncByteStreamSource(streamAdapters.fromIterable(source));
+        } else if (isPromise<ArrayBufferViewInput>(source)) {
+            this.source = new AsyncByteStreamSource(streamAdapters.fromAsyncIterable(source));
+        } else if (isAsyncIterable<ArrayBufferViewInput>(source)) {
+            this.source = new AsyncByteStreamSource(streamAdapters.fromAsyncIterable(source));
+        } else if (isReadableDOMStream<ArrayBufferViewInput>(source)) {
+            this.source = new AsyncByteStreamSource(streamAdapters.fromDOMStream(source));
+        }
+    }
+    [Symbol.asyncIterator]() { return this; }
+    public next(value?: any) { return this.source.next(value); }
+    public throw(value?: any) { return this.source.throw(value); }
+    public return(value?: any) { return this.source.return(value); }
+    public get closed(): Promise<void> { return this.source.closed; }
+    public cancel(reason?: any) { return this.source.cancel(reason); }
+    public peek(size?: number | null) { return this.source.peek(size); }
+    public read(size?: number | null) { return this.source.read(size); }
+}
+
+/** @ignore */
+interface ByteStreamSourceIterator<T> extends IterableIterator<T> {
+    next(value?: { cmd: 'peek' | 'read', size?: number | null }): IteratorResult<T>;
+}
+
+/** @ignore */
+interface AsyncByteStreamSourceIterator<T> extends AsyncIterableIterator<T> {
+    next(value?: { cmd: 'peek' | 'read', size?: number | null }): Promise<IteratorResult<T>>;
+}
+
+/** @ignore */
+class ByteStreamSource<T> {
+    constructor(protected source: ByteStreamSourceIterator<T>) {}
+    public cancel(reason?: any) { this.return(reason); }
+    public peek(size?: number | null): T | null { return this.next(size, 'peek').value; }
+    public read(size?: number | null): T | null { return this.next(size, 'read').value; }
+    public next(size?: number | null, cmd: 'peek' | 'read' = 'read') { return this.source.next({ cmd, size }); }
+    public throw(value?: any) { return Object.create((this.source.throw && this.source.throw(value)) || ITERATOR_DONE); }
+    public return(value?: any) { return Object.create((this.source.return && this.source.return(value)) || ITERATOR_DONE); }
+}
+
+/** @ignore */
+class AsyncByteStreamSource<T> implements Readable<T> {
+
+    private _closedPromise: Promise<void>;
+    private _closedPromiseResolve?: (value?: any) => void;
+    constructor (protected source: ByteStreamSourceIterator<T> | AsyncByteStreamSourceIterator<T>) {
+        this._closedPromise = new Promise((r) => this._closedPromiseResolve = r);
+    }
+    public async cancel(reason?: any) { await this.return(reason); }
+    public get closed(): Promise<void> { return this._closedPromise; }
+    public async read(size?: number | null): Promise<T | null> { return (await this.next(size, 'read')).value; }
+    public async peek(size?: number | null): Promise<T | null> { return (await this.next(size, 'peek')).value; }
+    public async next(size?: number | null, cmd: 'peek' | 'read' = 'read') { return (await this.source.next({ cmd, size })); }
+    public async throw(value?: any) {
+        const result = (this.source.throw && await this.source.throw(value)) || ITERATOR_DONE;
+        this._closedPromiseResolve && this._closedPromiseResolve();
+        this._closedPromiseResolve = undefined;
+        return Object.create(result);
+    }
+    public async return(value?: any) {
+        const result = (this.source.return && await this.source.return(value)) || ITERATOR_DONE;
+        this._closedPromiseResolve && this._closedPromiseResolve();
+        this._closedPromiseResolve = undefined;
+        return Object.create(result);
+    }
+}
diff --git a/js/src/ipc/magic.ts b/js/src/ipc/magic.ts
deleted file mode 100644
index 0688d1a2d1e19..0000000000000
--- a/js/src/ipc/magic.ts
+++ /dev/null
@@ -1,53 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import { flatbuffers } from 'flatbuffers';
-import ByteBuffer = flatbuffers.ByteBuffer;
-
-export const PADDING = 4;
-export const MAGIC_STR = 'ARROW1';
-export const MAGIC = new Uint8Array(MAGIC_STR.length);
-
-for (let i = 0; i < MAGIC_STR.length; i += 1 | 0) {
-    MAGIC[i] = MAGIC_STR.charCodeAt(i);
-}
-
-export function checkForMagicArrowString(buffer: Uint8Array, index = 0) {
-    for (let i = -1, n = MAGIC.length; ++i < n;) {
-        if (MAGIC[i] !== buffer[index + i]) {
-            return false;
-        }
-    }
-    return true;
-}
-
-export function isValidArrowFile(bb: ByteBuffer) {
-    let fileLength = bb.capacity(), footerLength: number, lengthOffset: number;
-    if ((fileLength < magicX2AndPadding /*                     Arrow buffer too small */) ||
-        (!checkForMagicArrowString(bb.bytes(), 0) /*                        Missing magic start    */) ||
-        (!checkForMagicArrowString(bb.bytes(), fileLength - magicLength) /* Missing magic end      */) ||
-        (/*                                                    Invalid footer length  */
-        (footerLength = bb.readInt32(lengthOffset = fileLength - magicAndPadding)) < 1 &&
-        (footerLength + lengthOffset > fileLength))) {
-        return false;
-    }
-    return true;
-}
-
-export const magicLength = MAGIC.length;
-export const magicAndPadding = magicLength + PADDING;
-export const magicX2AndPadding = magicLength * 2 + PADDING;
diff --git a/js/src/ipc/message.ts b/js/src/ipc/message.ts
new file mode 100644
index 0000000000000..194e4ac7f679d
--- /dev/null
+++ b/js/src/ipc/message.ts
@@ -0,0 +1,249 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { MessageHeader } from '../enum';
+import { flatbuffers } from 'flatbuffers';
+import ByteBuffer = flatbuffers.ByteBuffer;
+import { Message } from './metadata/message';
+import { isFileHandle } from '../util/compat';
+import { AsyncRandomAccessFile } from '../io/file';
+import { toUint8Array, ArrayBufferViewInput } from '../util/buffer';
+import { ByteStream, ReadableSource, AsyncByteStream } from '../io/stream';
+import { ArrowJSON, ArrowJSONLike, ITERATOR_DONE, FileHandle } from '../io/interfaces';
+
+/** @ignore */ const invalidMessageType       = (type: MessageHeader) => `Expected ${MessageHeader[type]} Message in stream, but was null or length 0.`;
+/** @ignore */ const nullMessage              = (type: MessageHeader) => `Header pointer of flatbuffer-encoded ${MessageHeader[type]} Message is null or length 0.`;
+/** @ignore */ const invalidMessageMetadata   = (expected: number, actual: number) => `Expected to read ${expected} metadata bytes, but only read ${actual}.`;
+/** @ignore */ const invalidMessageBodyLength = (expected: number, actual: number) => `Expected to read ${expected} bytes for message body, but only read ${actual}.`;
+
+/** @ignore */
+export class MessageReader implements IterableIterator<Message> {
+    protected source: ByteStream;
+    constructor(source: ByteStream | ArrayBufferViewInput | Iterable<ArrayBufferViewInput>) {
+        this.source = source instanceof ByteStream ? source : new ByteStream(source);
+    }
+    public [Symbol.iterator](): IterableIterator<Message> { return this as IterableIterator<Message>; }
+    public next(): IteratorResult<Message> {
+        let r;
+        if ((r = this.readMetadataLength()).done) { return ITERATOR_DONE; }
+        if ((r = this.readMetadata(r.value)).done) { return ITERATOR_DONE; }
+        return (<any> r) as IteratorResult<Message>;
+    }
+    public throw(value?: any) { return this.source.throw(value); }
+    public return(value?: any) { return this.source.return(value); }
+    public readMessage<T extends MessageHeader>(type?: T | null) {
+        let r: IteratorResult<Message<T>>;
+        if ((r = this.next()).done) { return null; }
+        if ((type != null) && r.value.headerType !== type) {
+            throw new Error(invalidMessageType(type));
+        }
+        return r.value;
+    }
+    public readMessageBody(bodyLength: number): Uint8Array {
+        if (bodyLength <= 0) { return new Uint8Array(0); }
+        const buf = toUint8Array(this.source.read(bodyLength));
+        if (buf.byteLength < bodyLength) {
+            throw new Error(invalidMessageBodyLength(bodyLength, buf.byteLength));
+        }
+        // 1. Work around bugs in fs.ReadStream's internal Buffer pooling, see: https://github.com/nodejs/node/issues/24817
+        // 2. Work around https://github.com/whatwg/streams/blob/0ebe4b042e467d9876d80ae045de3843092ad797/reference-implementation/lib/helpers.js#L126
+        return /* 1. */ (buf.byteOffset % 8 === 0) &&
+               /* 2. */ (buf.byteOffset + buf.byteLength) <= buf.buffer.byteLength ? buf : buf.slice();
+    }
+    public readSchema(throwIfNull = false) {
+        const type = MessageHeader.Schema;
+        const message = this.readMessage(type);
+        const schema = message && message.header();
+        if (throwIfNull && !schema) {
+            throw new Error(nullMessage(type));
+        }
+        return schema;
+    }
+    protected readMetadataLength(): IteratorResult<number> {
+        const buf = this.source.read(PADDING);
+        const bb = buf && new ByteBuffer(buf);
+        const len = +(bb && bb.readInt32(0))!;
+        return { done: len <= 0, value: len };
+    }
+    protected readMetadata(metadataLength: number): IteratorResult<Message> {
+        const buf = this.source.read(metadataLength);
+        if (!buf) { return ITERATOR_DONE; }
+        if (buf.byteLength < metadataLength) {
+            throw new Error(invalidMessageMetadata(metadataLength, buf.byteLength));
+        }
+        return { done: false, value: Message.decode(buf) };
+    }
+}
+
+/** @ignore */
+export class AsyncMessageReader implements AsyncIterableIterator<Message> {
+    protected source: AsyncByteStream;
+    constructor(source: ReadableSource<Uint8Array>);
+    constructor(source: FileHandle, byteLength?: number);
+    constructor(source: any, byteLength?: number) {
+        this.source = source instanceof AsyncByteStream ? source
+            : isFileHandle(source)
+            ? new AsyncRandomAccessFile(source, byteLength!)
+            : new AsyncByteStream(source);
+    }
+    public [Symbol.asyncIterator](): AsyncIterableIterator<Message> { return this as AsyncIterableIterator<Message>; }
+    public async next(): Promise<IteratorResult<Message>> {
+        let r;
+        if ((r = await this.readMetadataLength()).done) { return ITERATOR_DONE; }
+        if ((r = await this.readMetadata(r.value)).done) { return ITERATOR_DONE; }
+        return (<any> r) as IteratorResult<Message>;
+    }
+    public async throw(value?: any) { return await this.source.throw(value); }
+    public async return(value?: any) { return await this.source.return(value); }
+    public async readMessage<T extends MessageHeader>(type?: T | null) {
+        let r: IteratorResult<Message<T>>;
+        if ((r = await this.next()).done) { return null; }
+        if ((type != null) && r.value.headerType !== type) {
+            throw new Error(invalidMessageType(type));
+        }
+        return r.value;
+    }
+    public async readMessageBody(bodyLength: number): Promise<Uint8Array> {
+        if (bodyLength <= 0) { return new Uint8Array(0); }
+        const buf = toUint8Array(await this.source.read(bodyLength));
+        if (buf.byteLength < bodyLength) {
+            throw new Error(invalidMessageBodyLength(bodyLength, buf.byteLength));
+        }
+        // 1. Work around bugs in fs.ReadStream's internal Buffer pooling, see: https://github.com/nodejs/node/issues/24817
+        // 2. Work around https://github.com/whatwg/streams/blob/0ebe4b042e467d9876d80ae045de3843092ad797/reference-implementation/lib/helpers.js#L126
+        return /* 1. */ (buf.byteOffset % 8 === 0) &&
+               /* 2. */ (buf.byteOffset + buf.byteLength) <= buf.buffer.byteLength ? buf : buf.slice();
+    }
+    public async readSchema(throwIfNull = false) {
+        const type = MessageHeader.Schema;
+        const message = await this.readMessage(type);
+        const schema = message && message.header();
+        if (throwIfNull && !schema) {
+            throw new Error(nullMessage(type));
+        }
+        return schema;
+    }
+    protected async readMetadataLength(): Promise<IteratorResult<number>> {
+        const buf = await this.source.read(PADDING);
+        const bb = buf && new ByteBuffer(buf);
+        const len = +(bb && bb.readInt32(0))!;
+        return { done: len <= 0, value: len };
+    }
+    protected async readMetadata(metadataLength: number): Promise<IteratorResult<Message>> {
+        const buf = await this.source.read(metadataLength);
+        if (!buf) { return ITERATOR_DONE; }
+        if (buf.byteLength < metadataLength) {
+            throw new Error(invalidMessageMetadata(metadataLength, buf.byteLength));
+        }
+        return { done: false, value: Message.decode(buf) };
+    }
+}
+
+/** @ignore */
+export class JSONMessageReader extends MessageReader {
+    private _schema = false;
+    private _json: ArrowJSON;
+    private _body: any[] = [];
+    private _batchIndex = 0;
+    private _dictionaryIndex = 0;
+    constructor(source: ArrowJSON | ArrowJSONLike) {
+        super(new Uint8Array(0));
+        this._json = source instanceof ArrowJSON ? source : new ArrowJSON(source);
+    }
+    public next() {
+        const { _json, _batchIndex, _dictionaryIndex } = this;
+        const numBatches = _json.batches.length;
+        const numDictionaries = _json.dictionaries.length;
+        if (!this._schema) {
+            this._schema = true;
+            const message = Message.fromJSON(_json.schema, MessageHeader.Schema);
+            return { value: message, done: _batchIndex >= numBatches && _dictionaryIndex >= numDictionaries };
+        }
+        if (_dictionaryIndex < numDictionaries) {
+            const batch = _json.dictionaries[this._dictionaryIndex++];
+            this._body = batch['data']['columns'];
+            const message = Message.fromJSON(batch, MessageHeader.DictionaryBatch);
+            return { done: false, value: message };
+        }
+        if (_batchIndex < numBatches) {
+            const batch = _json.batches[this._batchIndex++];
+            this._body = batch['columns'];
+            const message = Message.fromJSON(batch, MessageHeader.RecordBatch);
+            return { done: false, value: message };
+        }
+        this._body = [];
+        return ITERATOR_DONE;
+    }
+    public readMessageBody(_bodyLength?: number) {
+        return flattenDataSources(this._body) as any;
+        function flattenDataSources(xs: any[]): any[][] {
+            return (xs || []).reduce<any[][]>((buffers, column: any) => [
+                ...buffers,
+                ...(column['VALIDITY'] && [column['VALIDITY']] || []),
+                ...(column['TYPE'] && [column['TYPE']] || []),
+                ...(column['OFFSET'] && [column['OFFSET']] || []),
+                ...(column['DATA'] && [column['DATA']] || []),
+                ...flattenDataSources(column['children'])
+            ], [] as any[][]);
+        }
+    }
+    public readMessage<T extends MessageHeader>(type?: T | null) {
+        let r: IteratorResult<Message<T>>;
+        if ((r = this.next()).done) { return null; }
+        if ((type != null) && r.value.headerType !== type) {
+            throw new Error(invalidMessageType(type));
+        }
+        return r.value;
+    }
+    public readSchema() {
+        const type = MessageHeader.Schema;
+        const message = this.readMessage(type);
+        const schema = message && message.header();
+        if (!message || !schema) {
+            throw new Error(nullMessage(type));
+        }
+        return schema;
+    }
+}
+
+/** @ignore */
+export const PADDING = 4;
+/** @ignore */
+export const MAGIC_STR = 'ARROW1';
+/** @ignore */
+export const MAGIC = new Uint8Array(MAGIC_STR.length);
+
+for (let i = 0; i < MAGIC_STR.length; i += 1 | 0) {
+    MAGIC[i] = MAGIC_STR.charCodeAt(i);
+}
+
+/** @ignore */
+export function checkForMagicArrowString(buffer: Uint8Array, index = 0) {
+    for (let i = -1, n = MAGIC.length; ++i < n;) {
+        if (MAGIC[i] !== buffer[index + i]) {
+            return false;
+        }
+    }
+    return true;
+}
+
+/** @ignore */
+export const magicLength = MAGIC.length;
+/** @ignore */
+export const magicAndPadding = magicLength + PADDING;
+/** @ignore */
+export const magicX2AndPadding = magicLength * 2 + PADDING;
diff --git a/js/src/ipc/metadata.ts b/js/src/ipc/metadata.ts
deleted file mode 100644
index 025b051734295..0000000000000
--- a/js/src/ipc/metadata.ts
+++ /dev/null
@@ -1,96 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-/* tslint:disable:class-name */
-
-import { Schema, Long, MessageHeader, MetadataVersion } from '../type';
-
-export class Footer {
-    constructor(public dictionaryBatches: FileBlock[], public recordBatches: FileBlock[], public schema: Schema) {}
-}
-
-export class FileBlock {
-    public offset: number;
-    public bodyLength: number;
-    constructor(public metaDataLength: number, bodyLength: Long | number, offset: Long | number) {
-        this.offset = typeof offset === 'number' ? offset : offset.low;
-        this.bodyLength = typeof bodyLength === 'number' ? bodyLength : bodyLength.low;
-    }
-}
-
-export class Message {
-    public bodyLength: number;
-    public version: MetadataVersion;
-    public headerType: MessageHeader;
-    constructor(version: MetadataVersion, bodyLength: Long | number, headerType: MessageHeader) {
-        this.version = version;
-        this.headerType = headerType;
-        this.bodyLength = typeof bodyLength === 'number' ? bodyLength : bodyLength.low;
-    }
-    static isSchema(m: Message): m is Schema { return m.headerType === MessageHeader.Schema; }
-    static isRecordBatch(m: Message): m is RecordBatchMetadata { return m.headerType === MessageHeader.RecordBatch; }
-    static isDictionaryBatch(m: Message): m is DictionaryBatch { return m.headerType === MessageHeader.DictionaryBatch; }
-}
-
-export class RecordBatchMetadata extends Message {
-    public length: number;
-    public nodes: FieldMetadata[];
-    public buffers: BufferMetadata[];
-    constructor(version: MetadataVersion, length: Long | number, nodes: FieldMetadata[], buffers: BufferMetadata[], bodyLength?: Long | number) {
-        if (bodyLength === void(0)) {
-            bodyLength = buffers.reduce((bodyLength, buffer) => bodyLength + buffer.length, 0);
-        }
-        super(version, bodyLength, MessageHeader.RecordBatch);
-        this.nodes = nodes;
-        this.buffers = buffers;
-        this.length = typeof length === 'number' ? length : length.low;
-    }
-}
-
-export class DictionaryBatch extends Message {
-    public id: number;
-    public isDelta: boolean;
-    public data: RecordBatchMetadata;
-    constructor(version: MetadataVersion, data: RecordBatchMetadata, id: Long | number, isDelta: boolean = false) {
-        super(version, data.bodyLength, MessageHeader.DictionaryBatch);
-        this.isDelta = isDelta;
-        this.data = data;
-        this.id = typeof id === 'number' ? id : id.low;
-    }
-    private static atomicDictionaryId = 0;
-    public static getId() { return DictionaryBatch.atomicDictionaryId++; }
-    public get nodes(): FieldMetadata[] { return this.data.nodes; }
-    public get buffers(): BufferMetadata[] { return this.data.buffers; }
-}
-
-export class BufferMetadata {
-    public offset: number;
-    public length: number;
-    constructor(offset: Long | number, length: Long | number) {
-        this.offset = typeof offset === 'number' ? offset : offset.low;
-        this.length = typeof length === 'number' ? length : length.low;
-    }
-}
-
-export class FieldMetadata {
-    public length: number;
-    public nullCount: number;
-    constructor(length: Long | number, nullCount: Long | number) {
-        this.length = typeof length === 'number' ? length : length.low;
-        this.nullCount = typeof nullCount === 'number' ? nullCount : nullCount.low;
-    }
-}
diff --git a/js/src/ipc/metadata/file.ts b/js/src/ipc/metadata/file.ts
new file mode 100644
index 0000000000000..d7786fbbf9324
--- /dev/null
+++ b/js/src/ipc/metadata/file.ts
@@ -0,0 +1,163 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/* tslint:disable:class-name */
+
+import * as File_ from '../../fb/File';
+import { flatbuffers } from 'flatbuffers';
+
+import Long = flatbuffers.Long;
+import Builder = flatbuffers.Builder;
+import ByteBuffer = flatbuffers.ByteBuffer;
+import _Block = File_.org.apache.arrow.flatbuf.Block;
+import _Footer = File_.org.apache.arrow.flatbuf.Footer;
+
+import { Schema } from '../../schema';
+import { MetadataVersion } from '../../enum';
+import { toUint8Array } from '../../util/buffer';
+import { ArrayBufferViewInput } from '../../util/buffer';
+
+/** @ignore */
+class Footer_ {
+
+    /** @nocollapse */
+    public static decode(buf: ArrayBufferViewInput) {
+        buf = new ByteBuffer(toUint8Array(buf));
+        const footer = _Footer.getRootAsFooter(buf);
+        const schema = Schema.decode(footer.schema()!);
+        return new OffHeapFooter(schema, footer) as Footer_;
+    }
+
+    /** @nocollapse */
+    public static encode(footer: Footer_) {
+
+        const b: Builder = new Builder();
+        const schemaOffset = Schema.encode(b, footer.schema);
+
+        _Footer.startRecordBatchesVector(b, footer.numRecordBatches);
+        [...footer.recordBatches()].slice().reverse().forEach((rb) => FileBlock.encode(b, rb));
+        const recordBatchesOffset = b.endVector();
+
+        _Footer.startDictionariesVector(b, footer.numDictionaries);
+        [...footer.dictionaryBatches()].slice().reverse().forEach((db) => FileBlock.encode(b, db));
+
+        const dictionaryBatchesOffset = b.endVector();
+
+        _Footer.startFooter(b);
+        _Footer.addSchema(b, schemaOffset);
+        _Footer.addVersion(b, MetadataVersion.V4);
+        _Footer.addRecordBatches(b, recordBatchesOffset);
+        _Footer.addDictionaries(b, dictionaryBatchesOffset);
+        _Footer.finishFooterBuffer(b, _Footer.endFooter(b));
+
+        return b.asUint8Array();
+    }
+
+    // @ts-ignore
+    protected _recordBatches: FileBlock[];
+    // @ts-ignore
+    protected _dictionaryBatches: FileBlock[];
+    public get numRecordBatches() { return this._recordBatches.length; }
+    public get numDictionaries() { return this._dictionaryBatches.length; }
+
+    constructor(public schema: Schema,
+                public version: MetadataVersion = MetadataVersion.V4,
+                recordBatches?: FileBlock[], dictionaryBatches?: FileBlock[]) {
+        recordBatches && (this._recordBatches = recordBatches);
+        dictionaryBatches && (this._dictionaryBatches = dictionaryBatches);
+    }
+
+    public *recordBatches(): Iterable<FileBlock> {
+        for (let block, i = -1, n = this.numRecordBatches; ++i < n;) {
+            if (block = this.getRecordBatch(i)) { yield block; }
+        }
+    }
+
+    public *dictionaryBatches(): Iterable<FileBlock> {
+        for (let block, i = -1, n = this.numDictionaries; ++i < n;) {
+            if (block = this.getDictionaryBatch(i)) { yield block; }
+        }
+    }
+
+    public getRecordBatch(index: number) {
+        return index >= 0
+            && index < this.numRecordBatches
+            && this._recordBatches[index] || null;
+    }
+
+    public getDictionaryBatch(index: number) {
+        return index >= 0
+            && index < this.numDictionaries
+            && this._dictionaryBatches[index] || null;
+    }
+}
+
+export { Footer_ as Footer };
+
+/** @ignore */
+class OffHeapFooter extends Footer_ {
+
+    public get numRecordBatches() { return this._footer.recordBatchesLength(); }
+    public get numDictionaries() { return this._footer.dictionariesLength(); }
+
+    constructor(schema: Schema, protected _footer: _Footer) {
+        super(schema, _footer.version());
+    }
+
+    public getRecordBatch(index: number) {
+        if (index >= 0 && index < this.numRecordBatches) {
+            const fileBlock = this._footer.recordBatches(index);
+            if (fileBlock) { return FileBlock.decode(fileBlock); }
+        }
+        return null;
+    }
+
+    public getDictionaryBatch(index: number) {
+        if (index >= 0 && index < this.numDictionaries) {
+            const fileBlock = this._footer.dictionaries(index);
+            if (fileBlock) { return FileBlock.decode(fileBlock); }
+        }
+        return null;
+    }
+}
+
+/** @ignore */
+export class FileBlock {
+
+    /** @nocollapse */
+    public static decode(block: _Block) {
+        return new FileBlock(block.metaDataLength(), block.bodyLength(), block.offset());
+    }
+
+    /** @nocollapse */
+    public static encode(b: Builder, fileBlock: FileBlock) {
+        const { metaDataLength } = fileBlock;
+        const offset = new Long(fileBlock.offset, 0);
+        const bodyLength = new Long(fileBlock.bodyLength, 0);
+        return _Block.createBlock(b, offset, metaDataLength, bodyLength);
+    }
+
+    public offset: number;
+    public bodyLength: number;
+    public metaDataLength: number;
+
+    constructor(metaDataLength: number, bodyLength: Long | number, offset: Long | number) {
+        this.metaDataLength = metaDataLength;
+        this.offset = typeof offset === 'number' ? offset : offset.low;
+        this.bodyLength = typeof bodyLength === 'number' ? bodyLength : bodyLength.low;
+    }
+}
diff --git a/js/src/ipc/metadata/json.ts b/js/src/ipc/metadata/json.ts
new file mode 100644
index 0000000000000..fa219b3e7853b
--- /dev/null
+++ b/js/src/ipc/metadata/json.ts
@@ -0,0 +1,208 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Schema, Field } from '../../schema';
+import {
+    DataType, Dictionary, TimeBitWidth,
+    Utf8, Binary, Decimal, FixedSizeBinary,
+    List, FixedSizeList, Map_, Struct, Union,
+    Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys,
+} from '../../type';
+
+import { DictionaryBatch, RecordBatch, FieldNode, BufferRegion } from './message';
+import { TimeUnit, Precision, IntervalUnit, UnionMode, DateUnit } from '../../enum';
+
+/** @ignore */
+export function schemaFromJSON(_schema: any, dictionaries: Map<number, DataType> = new Map(), dictionaryFields: Map<number, Field<Dictionary>[]> = new Map()) {
+    return new Schema(
+        schemaFieldsFromJSON(_schema, dictionaries, dictionaryFields),
+        customMetadataFromJSON(_schema['customMetadata']),
+        dictionaries, dictionaryFields
+    );
+}
+
+/** @ignore */
+export function recordBatchFromJSON(b: any) {
+    return new RecordBatch(
+        b['count'],
+        fieldNodesFromJSON(b['columns']),
+        buffersFromJSON(b['columns'])
+    );
+}
+
+/** @ignore */
+export function dictionaryBatchFromJSON(b: any) {
+    return new DictionaryBatch(
+        recordBatchFromJSON(b['data']),
+        b['id'], b['isDelta']
+    );
+}
+
+/** @ignore */
+function schemaFieldsFromJSON(_schema: any, dictionaries?: Map<number, DataType>, dictionaryFields?: Map<number, Field<Dictionary>[]>) {
+    return (_schema['fields'] || []).filter(Boolean).map((f: any) => Field.fromJSON(f, dictionaries, dictionaryFields));
+}
+
+/** @ignore */
+function fieldChildrenFromJSON(_field: any, dictionaries?: Map<number, DataType>, dictionaryFields?: Map<number, Field<Dictionary>[]>): Field[] {
+    return (_field['children'] || []).filter(Boolean).map((f: any) => Field.fromJSON(f, dictionaries, dictionaryFields));
+}
+
+/** @ignore */
+function fieldNodesFromJSON(xs: any[]): FieldNode[] {
+    return (xs || []).reduce<FieldNode[]>((fieldNodes, column: any) => [
+        ...fieldNodes,
+        new FieldNode(
+            column['count'],
+            nullCountFromJSON(column['VALIDITY'])
+        ),
+        ...fieldNodesFromJSON(column['children'])
+    ], [] as FieldNode[]);
+}
+
+/** @ignore */
+function buffersFromJSON(xs: any[], buffers: BufferRegion[] = []): BufferRegion[] {
+    for (let i = -1, n = (xs || []).length; ++i < n;) {
+        const column = xs[i];
+        column['VALIDITY'] && buffers.push(new BufferRegion(buffers.length, column['VALIDITY'].length));
+        column['TYPE'] && buffers.push(new BufferRegion(buffers.length, column['TYPE'].length));
+        column['OFFSET'] && buffers.push(new BufferRegion(buffers.length, column['OFFSET'].length));
+        column['DATA'] && buffers.push(new BufferRegion(buffers.length, column['DATA'].length));
+        buffers = buffersFromJSON(column['children'], buffers);
+    }
+    return buffers;
+}
+
+/** @ignore */
+function nullCountFromJSON(validity: number[]) {
+    return (validity || []).reduce((sum, val) => sum + +(val === 0), 0);
+}
+
+/** @ignore */
+export function fieldFromJSON(_field: any, dictionaries?: Map<number, DataType>, dictionaryFields?: Map<number, Field<Dictionary>[]>) {
+
+    let id: number;
+    let keys: TKeys | null;
+    let field: Field | void;
+    let dictMeta: any;
+    let type: DataType<any>;
+    let dictType: Dictionary;
+    let dictField: Field<Dictionary>;
+
+    // If no dictionary encoding, or in the process of decoding the children of a dictionary-encoded field
+    if (!dictionaries || !dictionaryFields || !(dictMeta = _field['dictionary'])) {
+        type = typeFromJSON(_field, fieldChildrenFromJSON(_field, dictionaries, dictionaryFields));
+        field = new Field(_field['name'], type, _field['nullable'], customMetadataFromJSON(_field['customMetadata']));
+    }
+    // tslint:disable
+    // If dictionary encoded and the first time we've seen this dictionary id, decode
+    // the data type and child fields, then wrap in a Dictionary type and insert the
+    // data type into the dictionary types map.
+    else if (!dictionaries.has(id = dictMeta['id'])) {
+        // a dictionary index defaults to signed 32 bit int if unspecified
+        keys = (keys = dictMeta['indexType']) ? indexTypeFromJSON(keys) as TKeys : new Int32();
+        dictionaries.set(id, type = typeFromJSON(_field, fieldChildrenFromJSON(_field)));
+        dictType = new Dictionary(type, keys, id, dictMeta['isOrdered']);
+        dictField = new Field(_field['name'], dictType, _field['nullable'], customMetadataFromJSON(_field['customMetadata']));
+        dictionaryFields.set(id, [field = dictField]);
+    }
+    // If dictionary encoded, and have already seen this dictionary Id in the schema, then reuse the
+    // data type and wrap in a new Dictionary type and field.
+    else {
+        // a dictionary index defaults to signed 32 bit int if unspecified
+        keys = (keys = dictMeta['indexType']) ? indexTypeFromJSON(keys) as TKeys : new Int32();
+        dictType = new Dictionary(dictionaries.get(id)!, keys, id, dictMeta['isOrdered']);
+        dictField = new Field(_field['name'], dictType, _field['nullable'], customMetadataFromJSON(_field['customMetadata']));
+        dictionaryFields.get(id)!.push(field = dictField);
+    }
+    return field || null;
+}
+
+/** @ignore */
+function customMetadataFromJSON(_metadata?: object) {
+    return new Map<string, string>(Object.entries(_metadata || {}));
+}
+
+/** @ignore */
+function indexTypeFromJSON(_type: any) {
+    return new Int(_type['isSigned'], _type['bitWidth']);
+}
+
+/** @ignore */
+function typeFromJSON(f: any, children?: Field[]): DataType<any> {
+
+    const typeId = f['type']['name'];
+
+    switch (typeId) {
+        case 'NONE':   return new DataType();
+        case 'null':   return new Null();
+        case 'binary': return new Binary();
+        case 'utf8':   return new Utf8();
+        case 'bool':   return new Bool();
+        case 'list':   return new List((children || [])[0]);
+        case 'struct': return new Struct(children || []);
+        case 'struct_': return new Struct(children || []);
+    }
+
+    switch (typeId) {
+        case 'int': {
+            const t = f['type'];
+            return new Int(t['isSigned'], t['bitWidth'] as IntBitWidth);
+        }
+        case 'floatingpoint': {
+            const t = f['type'];
+            return new Float(Precision[t['precision']] as any);
+        }
+        case 'decimal': {
+            const t = f['type'];
+            return new Decimal(t['scale'], t['precision']);
+        }
+        case 'date': {
+            const t = f['type'];
+            return new Date_(DateUnit[t['unit']] as any);
+        }
+        case 'time': {
+            const t = f['type'];
+            return new Time(TimeUnit[t['unit']] as any, t['bitWidth'] as TimeBitWidth);
+        }
+        case 'timestamp': {
+            const t = f['type'];
+            return new Timestamp(TimeUnit[t['unit']] as any, t['timezone']);
+        }
+        case 'interval': {
+            const t = f['type'];
+            return new Interval(IntervalUnit[t['unit']] as any);
+        }
+        case 'union': {
+            const t = f['type'];
+            return new Union(UnionMode[t['mode']] as any, (t['typeIds'] || []), children || []);
+        }
+        case 'fixedsizebinary': {
+            const t = f['type'];
+            return new FixedSizeBinary(t['byteWidth']);
+        }
+        case 'fixedsizelist': {
+            const t = f['type'];
+            return new FixedSizeList(t['listSize'], (children || [])[0]);
+        }
+        case 'map': {
+            const t = f['type'];
+            return new Map_(children || [], t['keysSorted']);
+        }
+    }
+    throw new Error(`Unrecognized type: "${typeId}"`);
+}
diff --git a/js/src/ipc/metadata/message.ts b/js/src/ipc/metadata/message.ts
new file mode 100644
index 0000000000000..d1ab219cd943f
--- /dev/null
+++ b/js/src/ipc/metadata/message.ts
@@ -0,0 +1,595 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { flatbuffers } from 'flatbuffers';
+import * as Schema_ from '../../fb/Schema';
+import * as Message_ from '../../fb/Message';
+
+import { Schema, Field } from '../../schema';
+import { toUint8Array } from '../../util/buffer';
+import { ArrayBufferViewInput } from '../../util/buffer';
+import { MessageHeader, MetadataVersion } from '../../enum';
+import { instance as typeAssembler } from '../../visitor/typeassembler';
+import { fieldFromJSON, schemaFromJSON, recordBatchFromJSON, dictionaryBatchFromJSON } from './json';
+
+import Long = flatbuffers.Long;
+import Builder = flatbuffers.Builder;
+import ByteBuffer = flatbuffers.ByteBuffer;
+import _Int = Schema_.org.apache.arrow.flatbuf.Int;
+import Type = Schema_.org.apache.arrow.flatbuf.Type;
+import _Field = Schema_.org.apache.arrow.flatbuf.Field;
+import _Schema = Schema_.org.apache.arrow.flatbuf.Schema;
+import _Buffer = Schema_.org.apache.arrow.flatbuf.Buffer;
+import _Message = Message_.org.apache.arrow.flatbuf.Message;
+import _KeyValue = Schema_.org.apache.arrow.flatbuf.KeyValue;
+import _FieldNode = Message_.org.apache.arrow.flatbuf.FieldNode;
+import _Endianness = Schema_.org.apache.arrow.flatbuf.Endianness;
+import _RecordBatch = Message_.org.apache.arrow.flatbuf.RecordBatch;
+import _DictionaryBatch = Message_.org.apache.arrow.flatbuf.DictionaryBatch;
+import _DictionaryEncoding = Schema_.org.apache.arrow.flatbuf.DictionaryEncoding;
+
+import {
+    DataType, Dictionary, TimeBitWidth,
+    Utf8, Binary, Decimal, FixedSizeBinary,
+    List, FixedSizeList, Map_, Struct, Union,
+    Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys,
+} from '../../type';
+
+/** @ignore */
+export class Message<T extends MessageHeader = any> {
+
+    /** @nocollapse */
+    public static fromJSON<T extends MessageHeader>(msg: any, headerType: T): Message<T> {
+        const message = new Message(0, MetadataVersion.V4, headerType);
+        message._createHeader = messageHeaderFromJSON(msg, headerType);
+        return message;
+    }
+
+    /** @nocollapse */
+    public static decode(buf: ArrayBufferViewInput) {
+        buf = new ByteBuffer(toUint8Array(buf));
+        const _message = _Message.getRootAsMessage(buf);
+        const bodyLength: Long = _message.bodyLength()!;
+        const version: MetadataVersion = _message.version();
+        const headerType: MessageHeader = _message.headerType();
+        const message = new Message(bodyLength, version, headerType);
+        message._createHeader = decodeMessageHeader(_message, headerType);
+        return message;
+    }
+
+    /** @nocollapse */
+    public static encode<T extends MessageHeader>(message: Message<T>) {
+        let b = new Builder(), headerOffset = -1;
+        if (message.isSchema()) {
+            headerOffset = Schema.encode(b, message.header() as Schema);
+        } else if (message.isRecordBatch()) {
+            headerOffset = RecordBatch.encode(b, message.header() as RecordBatch);
+        } else if (message.isDictionaryBatch()) {
+            headerOffset = DictionaryBatch.encode(b, message.header() as DictionaryBatch);
+        }
+        _Message.startMessage(b);
+        _Message.addVersion(b, MetadataVersion.V4);
+        _Message.addHeader(b, headerOffset);
+        _Message.addHeaderType(b, message.headerType);
+        _Message.addBodyLength(b, new Long(message.bodyLength, 0));
+        _Message.finishMessageBuffer(b, _Message.endMessage(b));
+        return b.asUint8Array();
+    }
+
+    /** @nocollapse */
+    public static from(header: Schema | RecordBatch | DictionaryBatch, bodyLength = 0) {
+        if (header instanceof Schema) {
+            return new Message(0, MetadataVersion.V4, MessageHeader.Schema, header);
+        }
+        if (header instanceof RecordBatch) {
+            return new Message(bodyLength, MetadataVersion.V4, MessageHeader.RecordBatch, header);
+        }
+        if (header instanceof DictionaryBatch) {
+            return new Message(bodyLength, MetadataVersion.V4, MessageHeader.DictionaryBatch, header);
+        }
+        throw new Error(`Unrecognized Message header: ${header}`);
+    }
+
+    // @ts-ignore
+    public body: Uint8Array;
+    protected _headerType: T;
+    protected _bodyLength: number;
+    protected _version: MetadataVersion;
+    public get type() { return this.headerType; }
+    public get version() { return this._version; }
+    public get headerType() { return this._headerType; }
+    public get bodyLength() { return this._bodyLength; }
+    // @ts-ignore
+    protected _createHeader: MessageHeaderDecoder;
+    public header() { return this._createHeader<T>(); }
+    public isSchema(): this is Message<MessageHeader.Schema> { return this.headerType === MessageHeader.Schema; }
+    public isRecordBatch(): this is Message<MessageHeader.RecordBatch> { return this.headerType === MessageHeader.RecordBatch; }
+    public isDictionaryBatch(): this is Message<MessageHeader.DictionaryBatch> { return this.headerType === MessageHeader.DictionaryBatch; }
+
+    constructor(bodyLength: Long | number, version: MetadataVersion, headerType: T, header?: any) {
+        this._version = version;
+        this._headerType = headerType;
+        this.body = new Uint8Array(0);
+        header && (this._createHeader = () => header);
+        this._bodyLength = typeof bodyLength === 'number' ? bodyLength : bodyLength.low;
+    }
+}
+
+/** @ignore */
+export class RecordBatch {
+    protected _length: number;
+    protected _nodes: FieldNode[];
+    protected _buffers: BufferRegion[];
+    public get nodes() { return this._nodes; }
+    public get length() { return this._length; }
+    public get buffers() { return this._buffers; }
+    constructor(length: Long | number, nodes: FieldNode[], buffers: BufferRegion[]) {
+        this._nodes = nodes;
+        this._buffers = buffers;
+        this._length = typeof length === 'number' ? length : length.low;
+    }
+}
+
+/** @ignore */
+export class DictionaryBatch {
+
+    protected _id: number;
+    protected _isDelta: boolean;
+    protected _data: RecordBatch;
+    public get id() { return this._id; }
+    public get data() { return this._data; }
+    public get isDelta() { return this._isDelta; }
+    public get length(): number { return this.data.length; }
+    public get nodes(): FieldNode[] { return this.data.nodes; }
+    public get buffers(): BufferRegion[] { return this.data.buffers; }
+
+    constructor(data: RecordBatch, id: Long | number, isDelta: boolean = false) {
+        this._data = data;
+        this._isDelta = isDelta;
+        this._id = typeof id === 'number' ? id : id.low;
+    }
+}
+
+/** @ignore */
+export class BufferRegion {
+    public offset: number;
+    public length: number;
+    constructor(offset: Long | number, length: Long | number) {
+        this.offset = typeof offset === 'number' ? offset : offset.low;
+        this.length = typeof length === 'number' ? length : length.low;
+    }
+}
+
+/** @ignore */
+export class FieldNode {
+    public length: number;
+    public nullCount: number;
+    constructor(length: Long | number, nullCount: Long | number) {
+        this.length = typeof length === 'number' ? length : length.low;
+        this.nullCount = typeof nullCount === 'number' ? nullCount : nullCount.low;
+    }
+}
+
+/** @ignore */
+function messageHeaderFromJSON(message: any, type: MessageHeader) {
+    return (() => {
+        switch (type) {
+            case MessageHeader.Schema: return Schema.fromJSON(message);
+            case MessageHeader.RecordBatch: return RecordBatch.fromJSON(message);
+            case MessageHeader.DictionaryBatch: return DictionaryBatch.fromJSON(message);
+        }
+        throw new Error(`Unrecognized Message type: { name: ${MessageHeader[type]}, type: ${type} }`);
+    }) as MessageHeaderDecoder;
+}
+
+/** @ignore */
+function decodeMessageHeader(message: _Message, type: MessageHeader) {
+    return (() => {
+        switch (type) {
+            case MessageHeader.Schema: return Schema.decode(message.header(new _Schema())!);
+            case MessageHeader.RecordBatch: return RecordBatch.decode(message.header(new _RecordBatch())!, message.version());
+            case MessageHeader.DictionaryBatch: return DictionaryBatch.decode(message.header(new _DictionaryBatch())!, message.version());
+        }
+        throw new Error(`Unrecognized Message type: { name: ${MessageHeader[type]}, type: ${type} }`);
+    }) as MessageHeaderDecoder;
+}
+
+Field['encode'] = encodeField;
+Field['decode'] = decodeField;
+Field['fromJSON'] = fieldFromJSON;
+
+Schema['encode'] = encodeSchema;
+Schema['decode'] = decodeSchema;
+Schema['fromJSON'] = schemaFromJSON;
+
+RecordBatch['encode'] = encodeRecordBatch;
+RecordBatch['decode'] = decodeRecordBatch;
+RecordBatch['fromJSON'] = recordBatchFromJSON;
+
+DictionaryBatch['encode'] = encodeDictionaryBatch;
+DictionaryBatch['decode'] = decodeDictionaryBatch;
+DictionaryBatch['fromJSON'] = dictionaryBatchFromJSON;
+
+FieldNode['encode'] = encodeFieldNode;
+FieldNode['decode'] = decodeFieldNode;
+
+BufferRegion['encode'] = encodeBufferRegion;
+BufferRegion['decode'] = decodeBufferRegion;
+
+declare module '../../schema' {
+    namespace Field {
+        export { encodeField as encode };
+        export { decodeField as decode };
+        export { fieldFromJSON as fromJSON };
+    }
+    namespace Schema {
+        export { encodeSchema as encode };
+        export { decodeSchema as decode };
+        export { schemaFromJSON as fromJSON };
+    }
+}
+
+declare module './message' {
+    namespace RecordBatch {
+        export { encodeRecordBatch as encode };
+        export { decodeRecordBatch as decode };
+        export { recordBatchFromJSON as fromJSON };
+    }
+    namespace DictionaryBatch {
+        export { encodeDictionaryBatch as encode };
+        export { decodeDictionaryBatch as decode };
+        export { dictionaryBatchFromJSON as fromJSON };
+    }
+    namespace FieldNode {
+        export { encodeFieldNode as encode };
+        export { decodeFieldNode as decode };
+    }
+    namespace BufferRegion {
+        export { encodeBufferRegion as encode };
+        export { decodeBufferRegion as decode };
+    }
+}
+
+/** @ignore */
+function decodeSchema(_schema: _Schema, dictionaries: Map<number, DataType> = new Map(), dictionaryFields: Map<number, Field<Dictionary>[]> = new Map()) {
+    const fields = decodeSchemaFields(_schema, dictionaries, dictionaryFields);
+    return new Schema(fields, decodeCustomMetadata(_schema), dictionaries, dictionaryFields);
+}
+
+/** @ignore */
+function decodeRecordBatch(batch: _RecordBatch, version = MetadataVersion.V4) {
+    return new RecordBatch(batch.length(), decodeFieldNodes(batch), decodeBuffers(batch, version));
+}
+
+/** @ignore */
+function decodeDictionaryBatch(batch: _DictionaryBatch, version = MetadataVersion.V4) {
+    return new DictionaryBatch(RecordBatch.decode(batch.data()!, version), batch.id(), batch.isDelta());
+}
+
+/** @ignore */
+function decodeBufferRegion(b: _Buffer) {
+    return new BufferRegion(b.offset(), b.length());
+}
+
+/** @ignore */
+function decodeFieldNode(f: _FieldNode) {
+    return new FieldNode(f.length(), f.nullCount());
+}
+
+/** @ignore */
+function decodeFieldNodes(batch: _RecordBatch) {
+    const nodes = [] as FieldNode[];
+    for (let f, i = -1, j = -1, n = batch.nodesLength(); ++i < n;) {
+        if (f = batch.nodes(i)) {
+            nodes[++j] = FieldNode.decode(f);
+        }
+    }
+    return nodes;
+}
+
+/** @ignore */
+function decodeBuffers(batch: _RecordBatch, version: MetadataVersion) {
+    const bufferRegions = [] as BufferRegion[];
+    for (let b, i = -1, j = -1, n = batch.buffersLength(); ++i < n;) {
+        if (b = batch.buffers(i)) {
+        // If this Arrow buffer was written before version 4,
+        // advance the buffer's bb_pos 8 bytes to skip past
+        // the now-removed page_id field
+        if (version < MetadataVersion.V4) {
+                b.bb_pos += (8 * (i + 1));
+            }
+            bufferRegions[++j] = BufferRegion.decode(b);
+        }
+    }
+    return bufferRegions;
+}
+
+/** @ignore */
+function decodeSchemaFields(schema: _Schema, dictionaries?: Map<number, DataType>, dictionaryFields?: Map<number, Field<Dictionary>[]>) {
+    const fields = [] as Field[];
+    for (let f, i = -1, j = -1, n = schema.fieldsLength(); ++i < n;) {
+        if (f = schema.fields(i)) {
+            fields[++j] = Field.decode(f, dictionaries, dictionaryFields);
+        }
+    }
+    return fields;
+}
+
+/** @ignore */
+function decodeFieldChildren(field: _Field, dictionaries?: Map<number, DataType>, dictionaryFields?: Map<number, Field<Dictionary>[]>): Field[] {
+    const children = [] as Field[];
+    for (let f, i = -1, j = -1, n = field.childrenLength(); ++i < n;) {
+        if (f = field.children(i)) {
+            children[++j] = Field.decode(f, dictionaries, dictionaryFields);
+        }
+    }
+    return children;
+}
+
+/** @ignore */
+function decodeField(f: _Field, dictionaries?: Map<number, DataType>, dictionaryFields?: Map<number, Field<Dictionary>[]>) {
+
+    let id: number;
+    let field: Field | void;
+    let type: DataType<any>;
+    let keys: _Int | TKeys | null;
+    let dictType: Dictionary;
+    let dictMeta: _DictionaryEncoding | null;
+    let dictField: Field<Dictionary>;
+
+    // If no dictionary encoding, or in the process of decoding the children of a dictionary-encoded field
+    if (!dictionaries || !dictionaryFields || !(dictMeta = f.dictionary())) {
+        type = decodeFieldType(f, decodeFieldChildren(f, dictionaries, dictionaryFields));
+        field = new Field(f.name()!, type, f.nullable(), decodeCustomMetadata(f));
+    }
+    // tslint:disable
+    // If dictionary encoded and the first time we've seen this dictionary id, decode
+    // the data type and child fields, then wrap in a Dictionary type and insert the
+    // data type into the dictionary types map.
+    else if (!dictionaries.has(id = dictMeta.id().low)) {
+        // a dictionary index defaults to signed 32 bit int if unspecified
+        keys = (keys = dictMeta.indexType()) ? decodeIndexType(keys) as TKeys : new Int32();
+        dictionaries.set(id, type = decodeFieldType(f, decodeFieldChildren(f)));
+        dictType = new Dictionary(type, keys, id, dictMeta.isOrdered());
+        dictField = new Field(f.name()!, dictType, f.nullable(), decodeCustomMetadata(f));
+        dictionaryFields.set(id, [field = dictField]);
+    }
+    // If dictionary encoded, and have already seen this dictionary Id in the schema, then reuse the
+    // data type and wrap in a new Dictionary type and field.
+    else {
+        // a dictionary index defaults to signed 32 bit int if unspecified
+        keys = (keys = dictMeta.indexType()) ? decodeIndexType(keys) as TKeys : new Int32();
+        dictType = new Dictionary(dictionaries.get(id)!, keys, id, dictMeta.isOrdered());
+        dictField = new Field(f.name()!, dictType, f.nullable(), decodeCustomMetadata(f));
+        dictionaryFields.get(id)!.push(field = dictField);
+    }
+    return field || null;
+}
+
+/** @ignore */
+function decodeCustomMetadata(parent?: _Schema | _Field | null) {
+    const data = new Map<string, string>();
+    if (parent) {
+        for (let entry, key, i = -1, n = parent.customMetadataLength() | 0; ++i < n;) {
+            if ((entry = parent.customMetadata(i)) && (key = entry.key()) != null) {
+                data.set(key, entry.value()!);
+            }
+        }
+    }
+    return data;
+}
+
+/** @ignore */
+function decodeIndexType(_type: _Int) {
+    return new Int(_type.isSigned(), _type.bitWidth() as IntBitWidth);
+}
+
+/** @ignore */
+function decodeFieldType(f: _Field, children?: Field[]): DataType<any> {
+
+    const typeId = f.typeType();
+
+    switch (typeId) {
+        case Type.NONE:    return new DataType();
+        case Type.Null:    return new Null();
+        case Type.Binary:  return new Binary();
+        case Type.Utf8:    return new Utf8();
+        case Type.Bool:    return new Bool();
+        case Type.List:    return new List((children || [])[0]);
+        case Type.Struct_: return new Struct(children || []);
+    }
+
+    switch (typeId) {
+        case Type.Int: {
+            const t = f.type(new Schema_.org.apache.arrow.flatbuf.Int())!;
+            return new Int(t.isSigned(), t.bitWidth());
+        }
+        case Type.FloatingPoint: {
+            const t = f.type(new Schema_.org.apache.arrow.flatbuf.FloatingPoint())!;
+            return new Float(t.precision());
+        }
+        case Type.Decimal: {
+            const t = f.type(new Schema_.org.apache.arrow.flatbuf.Decimal())!;
+            return new Decimal(t.scale(), t.precision());
+        }
+        case Type.Date: {
+            const t = f.type(new Schema_.org.apache.arrow.flatbuf.Date())!;
+            return new Date_(t.unit());
+        }
+        case Type.Time: {
+            const t = f.type(new Schema_.org.apache.arrow.flatbuf.Time())!;
+            return new Time(t.unit(), t.bitWidth() as TimeBitWidth);
+        }
+        case Type.Timestamp: {
+            const t = f.type(new Schema_.org.apache.arrow.flatbuf.Timestamp())!;
+            return new Timestamp(t.unit(), t.timezone());
+        }
+        case Type.Interval: {
+            const t = f.type(new Schema_.org.apache.arrow.flatbuf.Interval())!;
+            return new Interval(t.unit());
+        }
+        case Type.Union: {
+            const t = f.type(new Schema_.org.apache.arrow.flatbuf.Union())!;
+            return new Union(t.mode(), t.typeIdsArray() || [], children || []);
+        }
+        case Type.FixedSizeBinary: {
+            const t = f.type(new Schema_.org.apache.arrow.flatbuf.FixedSizeBinary())!;
+            return new FixedSizeBinary(t.byteWidth());
+        }
+        case Type.FixedSizeList: {
+            const t = f.type(new Schema_.org.apache.arrow.flatbuf.FixedSizeList())!;
+            return new FixedSizeList(t.listSize(), (children || [])[0]);
+        }
+        case Type.Map: {
+            const t = f.type(new Schema_.org.apache.arrow.flatbuf.Map())!;
+            return new Map_(children || [], t.keysSorted());
+        }
+    }
+    throw new Error(`Unrecognized type: "${Type[typeId]}" (${typeId})`);
+}
+
+/** @ignore */
+function encodeSchema(b: Builder, schema: Schema) {
+
+    const fieldOffsets = schema.fields.map((f) => Field.encode(b, f));
+
+    _Schema.startFieldsVector(b, fieldOffsets.length);
+
+    const fieldsVectorOffset = _Schema.createFieldsVector(b, fieldOffsets);
+
+    const metadataOffset = !(schema.metadata && schema.metadata.size > 0) ? -1 :
+        _Schema.createCustomMetadataVector(b, [...schema.metadata].map(([k, v]) => {
+            const key = b.createString(`${k}`);
+            const val = b.createString(`${v}`);
+            _KeyValue.startKeyValue(b);
+            _KeyValue.addKey(b, key);
+            _KeyValue.addValue(b, val);
+            return _KeyValue.endKeyValue(b);
+        }));
+
+    _Schema.startSchema(b);
+    _Schema.addFields(b, fieldsVectorOffset);
+    _Schema.addEndianness(b, platformIsLittleEndian ? _Endianness.Little : _Endianness.Big);
+
+    if (metadataOffset !== -1) { _Schema.addCustomMetadata(b, metadataOffset); }
+
+    return _Schema.endSchema(b);
+}
+
+/** @ignore */
+function encodeField(b: Builder, field: Field) {
+
+    let nameOffset = -1;
+    let typeOffset = -1;
+    let dictionaryOffset = -1;
+
+    let type = field.type;
+    let typeId: Type = <any> field.typeId;
+
+    if (!DataType.isDictionary(type)) {
+        typeOffset = typeAssembler.visit(type, b)!;
+    } else {
+        typeId = type.dictionary.typeId;
+        dictionaryOffset = typeAssembler.visit(type, b)!;
+        typeOffset = typeAssembler.visit(type.dictionary, b)!;
+    }
+
+    const childOffsets = (type.children || []).map((f: Field) => Field.encode(b, f));
+    const childrenVectorOffset = _Field.createChildrenVector(b, childOffsets);
+
+    const metadataOffset = !(field.metadata && field.metadata.size > 0) ? -1 :
+        _Field.createCustomMetadataVector(b, [...field.metadata].map(([k, v]) => {
+            const key = b.createString(`${k}`);
+            const val = b.createString(`${v}`);
+            _KeyValue.startKeyValue(b);
+            _KeyValue.addKey(b, key);
+            _KeyValue.addValue(b, val);
+            return _KeyValue.endKeyValue(b);
+        }));
+
+    if (field.name) {
+        nameOffset = b.createString(field.name);
+    }
+
+    _Field.startField(b);
+    _Field.addType(b, typeOffset);
+    _Field.addTypeType(b, typeId);
+    _Field.addChildren(b, childrenVectorOffset);
+    _Field.addNullable(b, !!field.nullable);
+
+    if (nameOffset !== -1) { _Field.addName(b, nameOffset); }
+    if (dictionaryOffset !== -1) { _Field.addDictionary(b, dictionaryOffset); }
+    if (metadataOffset !== -1) { _Field.addCustomMetadata(b, metadataOffset); }
+
+    return _Field.endField(b);
+}
+
+/** @ignore */
+function encodeRecordBatch(b: Builder, recordBatch: RecordBatch) {
+
+    const nodes = recordBatch.nodes || [];
+    const buffers = recordBatch.buffers || [];
+
+    _RecordBatch.startNodesVector(b, nodes.length);
+    nodes.slice().reverse().forEach((n) => FieldNode.encode(b, n));
+
+    const nodesVectorOffset = b.endVector();
+
+    _RecordBatch.startBuffersVector(b, buffers.length);
+    buffers.slice().reverse().forEach((b_) => BufferRegion.encode(b, b_));
+
+    const buffersVectorOffset = b.endVector();
+
+    _RecordBatch.startRecordBatch(b);
+    _RecordBatch.addLength(b, new Long(recordBatch.length, 0));
+    _RecordBatch.addNodes(b, nodesVectorOffset);
+    _RecordBatch.addBuffers(b, buffersVectorOffset);
+    return _RecordBatch.endRecordBatch(b);
+}
+
+/** @ignore */
+function encodeDictionaryBatch(b: Builder, dictionaryBatch: DictionaryBatch) {
+    const dataOffset = RecordBatch.encode(b, dictionaryBatch.data);
+    _DictionaryBatch.startDictionaryBatch(b);
+    _DictionaryBatch.addId(b, new Long(dictionaryBatch.id, 0));
+    _DictionaryBatch.addIsDelta(b, dictionaryBatch.isDelta);
+    _DictionaryBatch.addData(b, dataOffset);
+    return _DictionaryBatch.endDictionaryBatch(b);
+}
+
+/** @ignore */
+function encodeFieldNode(b: Builder, node: FieldNode) {
+    return _FieldNode.createFieldNode(b, new Long(node.length, 0), new Long(node.nullCount, 0));
+}
+
+/** @ignore */
+function encodeBufferRegion(b: Builder, node: BufferRegion) {
+    return _Buffer.createBuffer(b, new Long(node.offset, 0), new Long(node.length, 0));
+}
+
+/** @ignore */
+const platformIsLittleEndian = (function() {
+    const buffer = new ArrayBuffer(2);
+    new DataView(buffer).setInt16(0, 256, true /* littleEndian */);
+    // Int16Array uses the platform's endianness.
+    return new Int16Array(buffer)[0] === 256;
+})();
+
+/** @ignore */
+type MessageHeaderDecoder = <T extends MessageHeader>() => T extends MessageHeader.Schema ? Schema
+                                                         : T extends MessageHeader.RecordBatch ? RecordBatch
+                                                         : T extends MessageHeader.DictionaryBatch ? DictionaryBatch : never;
diff --git a/js/src/ipc/node/iterable.ts b/js/src/ipc/node/iterable.ts
new file mode 100644
index 0000000000000..eb5542a1c542d
--- /dev/null
+++ b/js/src/ipc/node/iterable.ts
@@ -0,0 +1,107 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Readable } from 'stream';
+import { isIterable, isAsyncIterable } from '../../util/compat';
+
+/** @ignore */
+type ReadableOptions = import('stream').ReadableOptions;
+
+/** @ignore */
+export function toNodeStream<T>(source: Iterable<T> | AsyncIterable<T>, options?: ReadableOptions): Readable {
+    if (isAsyncIterable<T>(source)) { return new AsyncIterableReadable(source[Symbol.asyncIterator](), options); }
+    if (isIterable<T>(source)) { return new IterableReadable(source[Symbol.iterator](), options); }
+    /* istanbul ignore next */
+    throw new Error(`toNodeStream() must be called with an Iterable or AsyncIterable`);
+}
+
+/** @ignore */
+class IterableReadable<T extends Uint8Array | any> extends Readable {
+    private _pulling: boolean;
+    private _bytesMode: boolean;
+    private _iterator: Iterator<T>;
+    constructor(it: Iterator<T>, options?: ReadableOptions) {
+        super(options);
+        this._iterator = it;
+        this._pulling = false;
+        this._bytesMode = !options || !options.objectMode;
+    }
+    _read(size: number) {
+        const it = this._iterator;
+        if (it && !this._pulling && (this._pulling = true)) {
+            this._pulling = this._pull(size, it);
+        }
+    }
+    _destroy(e: Error | null, cb: (e: Error | null) => void) {
+        let it = this._iterator, fn: any;
+        it && (fn = e != null && it.throw || it.return);
+        fn && fn.call(it, e);
+        cb && cb(null);
+    }
+    private _pull(size: number, it: Iterator<T>) {
+        const bm = this._bytesMode;
+        let r: IteratorResult<T> | null = null;
+        while (this.readable && !(r = it.next(bm ? size : null)).done) {
+            if (size != null) {
+                size -= (bm && ArrayBuffer.isView(r.value) ? r.value.byteLength : 1);
+            }
+            if (!this.push(r.value) || size <= 0) { break; }
+        }
+        if ((r && r.done || !this.readable) && (this.push(null) || true)) {
+            it.return && it.return();
+        }
+        return !this.readable;
+    }
+}
+
+/** @ignore */
+class AsyncIterableReadable<T extends Uint8Array | any> extends Readable {
+    private _pulling: boolean;
+    private _bytesMode: boolean;
+    private _iterator: AsyncIterator<T>;
+    constructor(it: AsyncIterator<T>, options?: ReadableOptions) {
+        super(options);
+        this._iterator = it;
+        this._pulling = false;
+        this._bytesMode = !options || !options.objectMode;
+    }
+    _read(size: number) {
+        const it = this._iterator;
+        if (it && !this._pulling && (this._pulling = true)) {
+            (async () => this._pulling = await this._pull(size, it))();
+        }
+    }
+    _destroy(e: Error | null, cb: (e: Error | null) => void) {
+        let it = this._iterator, fn: any;
+        it && (fn = e != null && it.throw || it.return);
+        fn && fn.call(it, e).then(() => cb && cb(null)) || (cb && cb(null));
+    }
+    private async _pull(size: number, it: AsyncIterator<T>) {
+        const bm = this._bytesMode;
+        let r: IteratorResult<T> | null = null;
+        while (this.readable && !(r = await it.next(bm ? size : null)).done) {
+            if (size != null) {
+                size -= (bm && ArrayBuffer.isView(r.value) ? r.value.byteLength : 1);
+            }
+            if (!this.push(r.value) || size <= 0) { break; }
+        }
+        if ((r && r.done || !this.readable) && (this.push(null) || true)) {
+            it.return && it.return();
+        }
+        return !this.readable;
+    }
+}
diff --git a/js/src/ipc/node/reader.ts b/js/src/ipc/node/reader.ts
new file mode 100644
index 0000000000000..ca19eecb58f40
--- /dev/null
+++ b/js/src/ipc/node/reader.ts
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Duplex, DuplexOptions } from 'stream';
+import { DataType } from '../../type';
+import { RecordBatch } from '../../recordbatch';
+import { AsyncByteQueue } from '../../io/stream';
+import { RecordBatchReader } from '../../ipc/reader';
+
+/** @ignore */
+export function recordBatchReaderThroughNodeStream<T extends { [key: string]: DataType } = any>(options?: DuplexOptions & { autoDestroy: boolean }) {
+    return new RecordBatchReaderDuplex<T>(options);
+}
+
+/** @ignore */
+type CB = (error?: Error | null | undefined) => void;
+
+/** @ignore */
+class RecordBatchReaderDuplex<T extends { [key: string]: DataType } = any> extends Duplex {
+    private _pulling: boolean = false;
+    private _autoDestroy: boolean = true;
+    private _reader: RecordBatchReader | null;
+    private _asyncQueue: AsyncByteQueue | null;
+    constructor(options?: DuplexOptions & { autoDestroy: boolean }) {
+        super({ allowHalfOpen: false, ...options, readableObjectMode: true, writableObjectMode: false });
+        this._reader = null;
+        this._pulling = false;
+        this._asyncQueue = new AsyncByteQueue();
+        this._autoDestroy = options && (typeof options.autoDestroy === 'boolean') ? options.autoDestroy : true;
+    }
+    _final(cb?: CB) {
+        const aq = this._asyncQueue;
+        aq && aq.close();
+        cb && cb();
+    }
+    _write(x: any, _: string, cb: CB) {
+        const aq = this._asyncQueue;
+        aq && aq.write(x);
+        cb && cb();
+        return true;
+    }
+    _read(size: number) {
+        const aq = this._asyncQueue;
+        if (aq && !this._pulling && (this._pulling = true)) {
+            (async () => {
+                if (!this._reader) {
+                    this._reader = await this._open(aq);
+                }
+                this._pulling = await this._pull(size, this._reader);
+            })();
+        }
+    }
+    _destroy(err: Error | null, cb: (error: Error | null) => void) {
+        const aq = this._asyncQueue;
+        if (aq) { err ? aq.abort(err) : aq.close(); }
+        cb(this._asyncQueue = this._reader = null);
+    }
+    async _open(source: AsyncByteQueue) {
+        return await (await RecordBatchReader.from(source)).open({ autoDestroy: this._autoDestroy });
+    }
+    async _pull(size: number, reader: RecordBatchReader<T>) {
+        let r: IteratorResult<RecordBatch<T>> | null = null;
+        while (this.readable && !(r = await reader.next()).done) {
+            if (!this.push(r.value) || (size != null && --size <= 0)) { break; }
+        }
+        if ((r && r.done || !this.readable)) {
+            this.push(null);
+            await reader.cancel();
+        }
+        return !this.readable;
+    }
+}
diff --git a/js/src/ipc/node/writer.ts b/js/src/ipc/node/writer.ts
new file mode 100644
index 0000000000000..a1b31efa990c3
--- /dev/null
+++ b/js/src/ipc/node/writer.ts
@@ -0,0 +1,77 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Duplex, DuplexOptions } from 'stream';
+import { DataType } from '../../type';
+import { AsyncByteStream } from '../../io/stream';
+import { RecordBatchWriter } from '../../ipc/writer';
+
+/** @ignore */
+export function recordBatchWriterThroughNodeStream<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, options?: DuplexOptions & { autoDestroy: boolean }) {
+    return new RecordBatchWriterDuplex(new this<T>(options));
+}
+
+/** @ignore */
+type CB = (error?: Error | null | undefined) => void;
+
+/** @ignore */
+class RecordBatchWriterDuplex<T extends { [key: string]: DataType } = any> extends Duplex {
+    private _pulling: boolean = false;
+    private _reader: AsyncByteStream | null;
+    private _writer: RecordBatchWriter | null;
+    constructor(writer: RecordBatchWriter<T>, options?: DuplexOptions) {
+        super({ allowHalfOpen: false, ...options, writableObjectMode: true, readableObjectMode: false });
+        this._writer = writer;
+        this._reader = new AsyncByteStream(writer);
+    }
+    _final(cb?: CB) {
+        const writer = this._writer;
+        writer && writer.close();
+        cb && cb();
+    }
+    _write(x: any, _: string, cb: CB) {
+        const writer = this._writer;
+        writer && writer.write(x);
+        cb && cb();
+        return true;
+    }
+    _read(size: number) {
+        const it = this._reader;
+        if (it && !this._pulling && (this._pulling = true)) {
+            (async () => this._pulling = await this._pull(size, it))();
+        }
+    }
+    _destroy(err: Error | null, cb: (error: Error | null) => void) {
+        const writer = this._writer;
+        if (writer) { err ? writer.abort(err) : writer.close(); }
+        cb(this._reader = this._writer = null);
+    }
+    async _pull(size: number, reader: AsyncByteStream) {
+        let r: IteratorResult<Uint8Array> | null = null;
+        while (this.readable && !(r = await reader.next(size || null)).done) {
+            if (size != null && r.value) {
+                size -= r.value.byteLength;
+            }
+            if (!this.push(r.value) || size <= 0) { break; }
+        }
+        if ((r && r.done || !this.readable)) {
+            this.push(null);
+            await reader.cancel();
+        }
+        return !this.readable;
+    }
+}
diff --git a/js/src/ipc/reader.ts b/js/src/ipc/reader.ts
new file mode 100644
index 0000000000000..91990afb35b17
--- /dev/null
+++ b/js/src/ipc/reader.ts
@@ -0,0 +1,737 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { DataType } from '../type';
+import { Vector } from '../vector';
+import { MessageHeader } from '../enum';
+import { Footer } from './metadata/file';
+import { Schema, Field } from '../schema';
+import streamAdapters from '../io/adapters';
+import { Message } from './metadata/message';
+import { RecordBatch } from '../recordbatch';
+import * as metadata from './metadata/message';
+import { ArrayBufferViewInput } from '../util/buffer';
+import { ByteStream, AsyncByteStream } from '../io/stream';
+import { RandomAccessFile, AsyncRandomAccessFile } from '../io/file';
+import { VectorLoader, JSONVectorLoader } from '../visitor/vectorloader';
+import {
+    FileHandle,
+    ArrowJSONLike,
+    ITERATOR_DONE,
+    ReadableInterop,
+} from '../io/interfaces';
+import {
+    MessageReader, AsyncMessageReader, JSONMessageReader,
+    checkForMagicArrowString, magicLength, magicAndPadding, magicX2AndPadding
+} from './message';
+import {
+    isPromise,
+    isIterable, isAsyncIterable,
+    isIteratorResult, isArrowJSON,
+    isFileHandle, isFetchResponse,
+    isReadableDOMStream, isReadableNodeStream
+} from '../util/compat';
+
+/** @ignore */ export type FromArg0 = ArrowJSONLike;
+/** @ignore */ export type FromArg1 = PromiseLike<ArrowJSONLike>;
+/** @ignore */ export type FromArg2 = Iterable<ArrayBufferViewInput> | ArrayBufferViewInput;
+/** @ignore */ export type FromArg3 = PromiseLike<Iterable<ArrayBufferViewInput> | ArrayBufferViewInput>;
+/** @ignore */ export type FromArg4 = Response | NodeJS.ReadableStream | ReadableStream<ArrayBufferViewInput> | AsyncIterable<ArrayBufferViewInput>;
+/** @ignore */ export type FromArg5 = FileHandle | PromiseLike<FileHandle> | PromiseLike<FromArg4>;
+/** @ignore */ export type FromArgs = FromArg0 | FromArg1 | FromArg2 | FromArg3 | FromArg4 | FromArg5;
+
+/** @ignore */ type OpenOptions = { autoDestroy?: boolean; };
+/** @ignore */ type RecordBatchReaders<T extends { [key: string]: DataType } = any> = RecordBatchFileReader<T> | RecordBatchStreamReader<T>;
+/** @ignore */ type AsyncRecordBatchReaders<T extends { [key: string]: DataType } = any> = AsyncRecordBatchFileReader<T> | AsyncRecordBatchStreamReader<T>;
+/** @ignore */ type RecordBatchFileReaders<T extends { [key: string]: DataType } = any> = RecordBatchFileReader<T> | AsyncRecordBatchFileReader<T>;
+/** @ignore */ type RecordBatchStreamReaders<T extends { [key: string]: DataType } = any> = RecordBatchStreamReader<T> | AsyncRecordBatchStreamReader<T>;
+
+export class RecordBatchReader<T extends { [key: string]: DataType } = any> extends ReadableInterop<RecordBatch<T>> {
+
+    protected _impl: RecordBatchReaderImpls<T>;
+    protected constructor(impl: RecordBatchReaderImpls<T>) {
+        super();
+        this._impl = impl;
+    }
+
+    public get closed() { return this._impl.closed; }
+    public get schema() { return this._impl.schema; }
+    public get autoDestroy() { return this._impl.autoDestroy; }
+    public get dictionaries() { return this._impl.dictionaries; }
+    public get numDictionaries() { return this._impl.numDictionaries; }
+    public get numRecordBatches() { return this._impl.numRecordBatches; }
+    public get footer() { return this._impl.isFile() ? this._impl.footer : null; }
+
+    public isSync(): this is RecordBatchReaders<T> { return this._impl.isSync(); }
+    public isAsync(): this is AsyncRecordBatchReaders<T> { return this._impl.isAsync(); }
+    public isFile(): this is RecordBatchFileReaders<T> { return this._impl.isFile(); }
+    public isStream(): this is RecordBatchStreamReaders<T> { return this._impl.isStream(); }
+
+    public next() {
+        return this._impl.next();
+    }
+    public throw(value?: any) {
+        return this._impl.throw(value);
+    }
+    public return(value?: any) {
+        return this._impl.return(value);
+    }
+    public cancel() {
+        return this._impl.cancel();
+    }
+    public reset(schema?: Schema<T> | null): this {
+        this._impl.reset(schema);
+        return this;
+    }
+    public open(options?: OpenOptions) {
+        const opening = this._impl.open(options);
+        return isPromise(opening) ? opening.then(() => this) : this;
+    }
+    public readRecordBatch(index: number): RecordBatch<T> | null | Promise<RecordBatch<T> | null> {
+        return this._impl.isFile() ? this._impl.readRecordBatch(index) : null;
+    }
+    public [Symbol.iterator](): IterableIterator<RecordBatch<T>> {
+        return (<IterableIterator<RecordBatch<T>>> this._impl)[Symbol.iterator]();
+    }
+    public [Symbol.asyncIterator](): AsyncIterableIterator<RecordBatch<T>> {
+        return (<AsyncIterableIterator<RecordBatch<T>>> this._impl)[Symbol.asyncIterator]();
+    }
+    public toDOMStream() {
+        return streamAdapters.toDOMStream<RecordBatch<T>>(
+            (this.isSync()
+                ? { [Symbol.iterator]: () => this } as Iterable<RecordBatch<T>>
+                : { [Symbol.asyncIterator]: () => this } as AsyncIterable<RecordBatch<T>>));
+    }
+    public toNodeStream() {
+        return streamAdapters.toNodeStream<RecordBatch<T>>(
+            (this.isSync()
+                ? { [Symbol.iterator]: () => this } as Iterable<RecordBatch<T>>
+                : { [Symbol.asyncIterator]: () => this } as AsyncIterable<RecordBatch<T>>),
+            { objectMode: true });
+    }
+
+    /** @nocollapse */
+    // @ts-ignore
+    public static throughNode(options?: import('stream').DuplexOptions & { autoDestroy: boolean }): import('stream').Duplex {
+        throw new Error(`"throughNode" not available in this environment`);
+    }
+    /** @nocollapse */
+    public static throughDOM<T extends { [key: string]: DataType }>(
+        // @ts-ignore
+        writableStrategy?: ByteLengthQueuingStrategy,
+        // @ts-ignore
+        readableStrategy?: { autoDestroy: boolean }
+    ): { writable: WritableStream<Uint8Array>, readable: ReadableStream<RecordBatch<T>> } {
+        throw new Error(`"throughDOM" not available in this environment`);
+    }
+
+    public static from<T extends RecordBatchReader>(source: T): T;
+    public static from<T extends { [key: string]: DataType } = any>(source: FromArg0): RecordBatchStreamReader<T>;
+    public static from<T extends { [key: string]: DataType } = any>(source: FromArg1): Promise<RecordBatchStreamReader<T>>;
+    public static from<T extends { [key: string]: DataType } = any>(source: FromArg2): RecordBatchFileReader<T> | RecordBatchStreamReader<T>;
+    public static from<T extends { [key: string]: DataType } = any>(source: FromArg3): Promise<RecordBatchFileReader<T> | RecordBatchStreamReader<T>>;
+    public static from<T extends { [key: string]: DataType } = any>(source: FromArg4): Promise<RecordBatchFileReader<T> | AsyncRecordBatchReaders<T>>;
+    public static from<T extends { [key: string]: DataType } = any>(source: FromArg5): Promise<AsyncRecordBatchFileReader<T> | AsyncRecordBatchStreamReader<T>>;
+    /** @nocollapse */
+    public static from<T extends { [key: string]: DataType } = any>(source: any) {
+        if (source instanceof RecordBatchReader) {
+            return source;
+        } else if (isArrowJSON(source)) {
+            return fromArrowJSON<T>(source);
+        } else if (isFileHandle(source)) {
+            return fromFileHandle<T>(source);
+        } else if (isPromise<any>(source)) {
+            return (async () => await RecordBatchReader.from<any>(await source))();
+        } else if (isFetchResponse(source) || isReadableDOMStream(source) || isReadableNodeStream(source) || isAsyncIterable(source)) {
+            return fromAsyncByteStream<T>(new AsyncByteStream(source));
+        }
+        return fromByteStream<T>(new ByteStream(source));
+    }
+
+    public static readAll<T extends RecordBatchReader>(source: T): T extends RecordBatchReaders ? IterableIterator<T> : AsyncIterableIterator<T>;
+    public static readAll<T extends { [key: string]: DataType } = any>(source: FromArg0): IterableIterator<RecordBatchStreamReader<T>>;
+    public static readAll<T extends { [key: string]: DataType } = any>(source: FromArg1): AsyncIterableIterator<RecordBatchStreamReader<T>>;
+    public static readAll<T extends { [key: string]: DataType } = any>(source: FromArg2): IterableIterator<RecordBatchFileReader<T> | RecordBatchStreamReader<T>>;
+    public static readAll<T extends { [key: string]: DataType } = any>(source: FromArg3): AsyncIterableIterator<RecordBatchFileReader<T> | RecordBatchStreamReader<T>>;
+    public static readAll<T extends { [key: string]: DataType } = any>(source: FromArg4): AsyncIterableIterator<RecordBatchFileReader<T> | AsyncRecordBatchReaders<T>>;
+    public static readAll<T extends { [key: string]: DataType } = any>(source: FromArg5): AsyncIterableIterator<AsyncRecordBatchFileReader<T> | AsyncRecordBatchStreamReader<T>>;
+    /** @nocollapse */
+    public static readAll<T extends { [key: string]: DataType } = any>(source: any) {
+        if (source instanceof RecordBatchReader) {
+            return source.isSync() ? readAllSync(source) : readAllAsync(source as AsyncRecordBatchReaders<T>);
+        } else if (isArrowJSON(source) || ArrayBuffer.isView(source) || isIterable<ArrayBufferViewInput>(source) || isIteratorResult(source)) {
+            return readAllSync<T>(source) as IterableIterator<RecordBatchReaders<T>>;
+        }
+        return readAllAsync<T>(source) as AsyncIterableIterator<RecordBatchReaders<T> | AsyncRecordBatchReaders<T>>;
+    }
+}
+
+//
+// Since TS is a structural type system, we define the following subclass stubs
+// so that concrete types exist to associate with with the interfaces below.
+//
+// The implementation for each RecordBatchReader is hidden away in the set of
+// `RecordBatchReaderImpl` classes in the second half of this file. This allows
+// us to export a single RecordBatchReader class, and swap out the impl based
+// on the io primitives or underlying arrow (JSON, file, or stream) at runtime.
+//
+// Async/await makes our job a bit harder, since it forces everything to be
+// either fully sync or fully async. This is why the logic for the reader impls
+// has been duplicated into both sync and async variants. Since the RBR
+// delegates to its impl, an RBR with an AsyncRecordBatchFileReaderImpl for
+// example will return async/await-friendly Promises, but one with a (sync)
+// RecordBatchStreamReaderImpl will always return values. Nothing should be
+// different about their logic, aside from the async handling. This is also why
+// this code looks highly structured, as it should be nearly identical and easy
+// to follow.
+//
+
+/** @ignore */
+export class RecordBatchStreamReader<T extends { [key: string]: DataType } = any> extends RecordBatchReader<T> {
+    constructor(protected _impl: RecordBatchStreamReaderImpl<T>) { super (_impl); }
+    public [Symbol.iterator]() { return (this._impl as IterableIterator<RecordBatch<T>>)[Symbol.iterator](); }
+    public async *[Symbol.asyncIterator](): AsyncIterableIterator<RecordBatch<T>> { yield* this[Symbol.iterator](); }
+}
+/** @ignore */
+export class AsyncRecordBatchStreamReader<T extends { [key: string]: DataType } = any> extends RecordBatchReader<T> {
+    constructor(protected _impl: AsyncRecordBatchStreamReaderImpl<T>) { super (_impl); }
+    public [Symbol.iterator](): IterableIterator<RecordBatch<T>> { throw new Error(`AsyncRecordBatchStreamReader is not Iterable`); }
+    public [Symbol.asyncIterator]() { return (this._impl as AsyncIterableIterator<RecordBatch<T>>)[Symbol.asyncIterator](); }
+}
+/** @ignore */
+export class RecordBatchFileReader<T extends { [key: string]: DataType } = any> extends RecordBatchStreamReader<T> {
+    constructor(protected _impl: RecordBatchFileReaderImpl<T>) { super (_impl); }
+}
+/** @ignore */
+export class AsyncRecordBatchFileReader<T extends { [key: string]: DataType } = any> extends AsyncRecordBatchStreamReader<T> {
+    constructor(protected _impl: AsyncRecordBatchFileReaderImpl<T>) { super (_impl); }
+}
+
+//
+// Now override the return types for each sync/async RecordBatchReader variant
+//
+
+/** @ignore */
+export interface RecordBatchStreamReader<T extends { [key: string]: DataType } = any> extends RecordBatchReader<T> {
+    open(options?: OpenOptions | undefined): this;
+    cancel(): void;
+    throw(value?: any): IteratorResult<any>;
+    return(value?: any): IteratorResult<any>;
+    next(value?: any): IteratorResult<RecordBatch<T>>;
+}
+
+/** @ignore */
+export interface AsyncRecordBatchStreamReader<T extends { [key: string]: DataType } = any> extends RecordBatchReader<T> {
+    open(options?: OpenOptions | undefined): Promise<this>;
+    cancel(): Promise<void>;
+    throw(value?: any): Promise<IteratorResult<any>>;
+    return(value?: any): Promise<IteratorResult<any>>;
+    next(value?: any): Promise<IteratorResult<RecordBatch<T>>>;
+}
+
+/** @ignore */
+export interface RecordBatchFileReader<T extends { [key: string]: DataType } = any> extends RecordBatchStreamReader<T> {
+    footer: Footer;
+    readRecordBatch(index: number): RecordBatch<T> | null;
+}
+
+/** @ignore */
+export interface AsyncRecordBatchFileReader<T extends { [key: string]: DataType } = any> extends AsyncRecordBatchStreamReader<T> {
+    footer: Footer;
+    readRecordBatch(index: number): Promise<RecordBatch<T> | null>;
+}
+
+/** @ignore */
+type RecordBatchReaderImpls<T extends { [key: string]: DataType } = any> =
+     RecordBatchJSONReaderImpl<T> |
+     RecordBatchFileReaderImpl<T> |
+     RecordBatchStreamReaderImpl<T> |
+     AsyncRecordBatchFileReaderImpl<T> |
+     AsyncRecordBatchStreamReaderImpl<T>;
+
+/** @ignore */
+interface RecordBatchReaderImpl<T extends { [key: string]: DataType } = any> {
+
+    closed: boolean;
+    schema: Schema<T>;
+    autoDestroy: boolean;
+    dictionaries: Map<number, Vector>;
+
+    isFile(): this is RecordBatchFileReaders<T>;
+    isStream(): this is RecordBatchStreamReaders<T>;
+    isSync(): this is RecordBatchReaders<T>;
+    isAsync(): this is AsyncRecordBatchReaders<T>;
+
+    reset(schema?: Schema<T> | null): this;
+}
+
+/** @ignore */
+interface RecordBatchStreamReaderImpl<T extends { [key: string]: DataType } = any> extends RecordBatchReaderImpl<T> {
+
+    open(options?: OpenOptions): this;
+    cancel(): void;
+
+    throw(value?: any): IteratorResult<any>;
+    return(value?: any): IteratorResult<any>;
+    next(value?: any): IteratorResult<RecordBatch<T>>;
+
+    [Symbol.iterator](): IterableIterator<RecordBatch<T>>;
+}
+
+/** @ignore */
+interface AsyncRecordBatchStreamReaderImpl<T extends { [key: string]: DataType } = any> extends RecordBatchReaderImpl<T> {
+
+    open(options?: OpenOptions): Promise<this>;
+    cancel(): Promise<void>;
+
+    throw(value?: any): Promise<IteratorResult<any>>;
+    return(value?: any): Promise<IteratorResult<any>>;
+    next(value?: any): Promise<IteratorResult<RecordBatch<T>>>;
+
+    [Symbol.asyncIterator](): AsyncIterableIterator<RecordBatch<T>>;
+}
+
+/** @ignore */
+interface RecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any> extends RecordBatchStreamReaderImpl<T> {
+    readRecordBatch(index: number): RecordBatch<T> | null;
+}
+
+/** @ignore */
+interface AsyncRecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any> extends AsyncRecordBatchStreamReaderImpl<T> {
+    readRecordBatch(index: number): Promise<RecordBatch<T> | null>;
+}
+
+/** @ignore */
+abstract class RecordBatchReaderImpl<T extends { [key: string]: DataType } = any> implements RecordBatchReaderImpl<T> {
+
+    // @ts-ignore
+    public schema: Schema;
+    public closed = false;
+    public autoDestroy = true;
+    public dictionaries: Map<number, Vector>;
+
+    protected _dictionaryIndex = 0;
+    protected _recordBatchIndex = 0;
+    public get numDictionaries() { return this._dictionaryIndex; }
+    public get numRecordBatches() { return this._recordBatchIndex; }
+
+    constructor(dictionaries = new Map<number, Vector>()) {
+        this.dictionaries = dictionaries;
+    }
+
+    public isSync(): this is RecordBatchReaders<T> { return false; }
+    public isAsync(): this is AsyncRecordBatchReaders<T> { return false; }
+    public isFile(): this is RecordBatchFileReaders<T> { return false; }
+    public isStream(): this is RecordBatchStreamReaders<T> { return false; }
+
+    public reset(schema?: Schema<T> | null) {
+        this._dictionaryIndex = 0;
+        this._recordBatchIndex = 0;
+        this.schema = <any> schema;
+        this.dictionaries = new Map();
+        return this;
+    }
+
+    protected _loadRecordBatch(header: metadata.RecordBatch, body: any) {
+        return new RecordBatch<T>(this.schema, header.length, this._loadVectors(header, body, this.schema.fields));
+    }
+    protected _loadDictionaryBatch(header: metadata.DictionaryBatch, body: any) {
+        const { id, isDelta, data } = header;
+        const { dictionaries, schema } = this;
+        if (isDelta || !dictionaries.get(id)) {
+
+            const type = schema.dictionaries.get(id)!;
+            const vector = (isDelta ? dictionaries.get(id)!.concat(
+                Vector.new(this._loadVectors(data, body, [type])[0])) :
+                Vector.new(this._loadVectors(data, body, [type])[0])) as Vector;
+
+            (schema.dictionaryFields.get(id) || []).forEach(({ type }) => type.dictionaryVector = vector);
+
+            return vector;
+        }
+        return dictionaries.get(id)!;
+    }
+    protected _loadVectors(header: metadata.RecordBatch, body: any, types: (Field | DataType)[]) {
+        return new VectorLoader(body, header.nodes, header.buffers).visitMany(types);
+    }
+}
+
+/** @ignore */
+class RecordBatchStreamReaderImpl<T extends { [key: string]: DataType } = any> extends RecordBatchReaderImpl<T> implements IterableIterator<RecordBatch<T>> {
+
+    protected _reader: MessageReader;
+    protected _handle: ByteStream | ArrowJSONLike;
+
+    constructor(source: ByteStream | ArrowJSONLike, dictionaries?: Map<number, Vector>) {
+        super(dictionaries);
+        this._reader = !isArrowJSON(source)
+            ? new MessageReader(this._handle = source)
+            : new JSONMessageReader(this._handle = source);
+    }
+
+    public isSync(): this is RecordBatchReaders<T> { return true; }
+    public isStream(): this is RecordBatchStreamReaders<T> { return true; }
+    public [Symbol.iterator](): IterableIterator<RecordBatch<T>> {
+        return this as IterableIterator<RecordBatch<T>>;
+    }
+    public cancel() {
+        if (!this.closed && (this.closed = true)) {
+            this.reset()._reader.return();
+            this._reader = <any> null;
+            this.dictionaries = <any> null;
+        }
+    }
+    public open(options?: OpenOptions) {
+        if (!this.closed) {
+            this.autoDestroy = shouldAutoDestroy(this, options);
+            if (!(this.schema || (this.schema = this._reader.readSchema()!))) {
+                this.cancel();
+            }
+        }
+        return this;
+    }
+    public throw(value?: any): IteratorResult<any> {
+        if (!this.closed && this.autoDestroy && (this.closed = true)) {
+            return this.reset()._reader.throw(value);
+        }
+        return ITERATOR_DONE;
+    }
+    public return(value?: any): IteratorResult<any> {
+        if (!this.closed && this.autoDestroy && (this.closed = true)) {
+            return this.reset()._reader.return(value);
+        }
+        return ITERATOR_DONE;
+    }
+    public next(): IteratorResult<RecordBatch<T>> {
+        if (this.closed) { return ITERATOR_DONE; }
+        let message: Message | null, { _reader: reader } = this;
+        while (message = this._readNextMessageAndValidate()) {
+            if (message.isSchema()) {
+                this.reset(message.header());
+            } else if (message.isRecordBatch()) {
+                this._recordBatchIndex++;
+                const header = message.header();
+                const buffer = reader.readMessageBody(message.bodyLength);
+                const recordBatch = this._loadRecordBatch(header, buffer);
+                return { done: false, value: recordBatch };
+            } else if (message.isDictionaryBatch()) {
+                this._dictionaryIndex++;
+                const header = message.header();
+                const buffer = reader.readMessageBody(message.bodyLength);
+                const vector = this._loadDictionaryBatch(header, buffer);
+                this.dictionaries.set(header.id, vector);
+            }
+        }
+        return this.return();
+    }
+    protected _readNextMessageAndValidate<T extends MessageHeader>(type?: T | null) {
+        return this._reader.readMessage<T>(type);
+    }
+}
+
+/** @ignore */
+class AsyncRecordBatchStreamReaderImpl<T extends { [key: string]: DataType } = any> extends RecordBatchReaderImpl<T> implements AsyncIterableIterator<RecordBatch<T>> {
+
+    protected _handle: AsyncByteStream;
+    protected _reader: AsyncMessageReader;
+
+    constructor(source: AsyncByteStream, dictionaries?: Map<number, Vector>) {
+        super(dictionaries);
+        this._reader = new AsyncMessageReader(this._handle = source);
+    }
+    public isAsync(): this is AsyncRecordBatchReaders<T> { return true; }
+    public isStream(): this is RecordBatchStreamReaders<T> { return true; }
+    public [Symbol.asyncIterator](): AsyncIterableIterator<RecordBatch<T>> {
+        return this as AsyncIterableIterator<RecordBatch<T>>;
+    }
+    public async cancel() {
+        if (!this.closed && (this.closed = true)) {
+            await this.reset()._reader.return();
+            this._reader = <any> null;
+            this.dictionaries = <any> null;
+        }
+    }
+    public async open(options?: OpenOptions) {
+        if (!this.closed) {
+            this.autoDestroy = shouldAutoDestroy(this, options);
+            if (!(this.schema || (this.schema = (await this._reader.readSchema())!))) {
+                await this.cancel();
+            }
+        }
+        return this;
+    }
+    public async throw(value?: any): Promise<IteratorResult<any>> {
+        if (!this.closed && this.autoDestroy && (this.closed = true)) {
+            return await this.reset()._reader.throw(value);
+        }
+        return ITERATOR_DONE;
+    }
+    public async return(value?: any): Promise<IteratorResult<any>> {
+        if (!this.closed && this.autoDestroy && (this.closed = true)) {
+            return await this.reset()._reader.return(value);
+        }
+        return ITERATOR_DONE;
+    }
+    public async next() {
+        if (this.closed) { return ITERATOR_DONE; }
+        let message: Message | null, { _reader: reader } = this;
+        while (message = await this._readNextMessageAndValidate()) {
+            if (message.isSchema()) {
+                await this.reset(message.header());
+            } else if (message.isRecordBatch()) {
+                this._recordBatchIndex++;
+                const header = message.header();
+                const buffer = await reader.readMessageBody(message.bodyLength);
+                const recordBatch = this._loadRecordBatch(header, buffer);
+                return { done: false, value: recordBatch };
+            } else if (message.isDictionaryBatch()) {
+                this._dictionaryIndex++;
+                const header = message.header();
+                const buffer = await reader.readMessageBody(message.bodyLength);
+                const vector = this._loadDictionaryBatch(header, buffer);
+                this.dictionaries.set(header.id, vector);
+            }
+        }
+        return await this.return();
+    }
+    protected async _readNextMessageAndValidate<T extends MessageHeader>(type?: T | null) {
+        return await this._reader.readMessage<T>(type);
+    }
+}
+
+/** @ignore */
+class RecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any> extends RecordBatchStreamReaderImpl<T> {
+
+    // @ts-ignore
+    protected _footer?: Footer;
+    // @ts-ignore
+    protected _handle: RandomAccessFile;
+    public get footer() { return this._footer!; }
+    public get numDictionaries() { return this._footer ? this._footer.numDictionaries : 0; }
+    public get numRecordBatches() { return this._footer ? this._footer.numRecordBatches : 0; }
+
+    constructor(source: RandomAccessFile | ArrayBufferViewInput, dictionaries?: Map<number, Vector>) {
+        super(source instanceof RandomAccessFile ? source : new RandomAccessFile(source), dictionaries);
+    }
+    public isSync(): this is RecordBatchReaders<T> { return true; }
+    public isFile(): this is RecordBatchFileReaders<T> { return true; }
+    public open(options?: OpenOptions) {
+        if (!this.closed && !this._footer) {
+            this.schema = (this._footer = this._readFooter()).schema;
+            for (const block of this._footer.dictionaryBatches()) {
+                block && this._readDictionaryBatch(this._dictionaryIndex++);
+            }
+        }
+        return super.open(options);
+    }
+    public readRecordBatch(index: number) {
+        if (this.closed) { return null; }
+        if (!this._footer) { this.open(); }
+        const block = this._footer && this._footer.getRecordBatch(index);
+        if (block && this._handle.seek(block.offset)) {
+            const message = this._reader.readMessage(MessageHeader.RecordBatch);
+            if (message && message.isRecordBatch()) {
+                const header = message.header();
+                const buffer = this._reader.readMessageBody(message.bodyLength);
+                const recordBatch = this._loadRecordBatch(header, buffer);
+                return recordBatch;
+            }
+        }
+        return null;
+    }
+    protected _readDictionaryBatch(index: number) {
+        const block = this._footer && this._footer.getDictionaryBatch(index);
+        if (block && this._handle.seek(block.offset)) {
+            const message = this._reader.readMessage(MessageHeader.DictionaryBatch);
+            if (message && message.isDictionaryBatch()) {
+                const header = message.header();
+                const buffer = this._reader.readMessageBody(message.bodyLength);
+                const vector = this._loadDictionaryBatch(header, buffer);
+                this.dictionaries.set(header.id, vector);
+            }
+        }
+    }
+    protected _readFooter() {
+        const { _handle } = this;
+        const offset = _handle.size - magicAndPadding;
+        const length = _handle.readInt32(offset);
+        const buffer = _handle.readAt(offset - length, length);
+        return Footer.decode(buffer);
+    }
+    protected _readNextMessageAndValidate<T extends MessageHeader>(type?: T | null): Message<T> | null {
+        if (!this._footer) { this.open(); }
+        if (this._footer && this._recordBatchIndex < this.numRecordBatches) {
+            const block = this._footer && this._footer.getRecordBatch(this._recordBatchIndex);
+            if (block && this._handle.seek(block.offset)) {
+                return this._reader.readMessage(type);
+            }
+        }
+        return null;
+    }
+}
+
+/** @ignore */
+class AsyncRecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any> extends AsyncRecordBatchStreamReaderImpl<T>
+    implements AsyncRecordBatchFileReaderImpl<T> {
+
+    protected _footer?: Footer;
+    // @ts-ignore
+    protected _handle: AsyncRandomAccessFile;
+    public get footer() { return this._footer!; }
+    public get numDictionaries() { return this._footer ? this._footer.numDictionaries : 0; }
+    public get numRecordBatches() { return this._footer ? this._footer.numRecordBatches : 0; }
+
+    constructor(source: FileHandle, byteLength?: number, dictionaries?: Map<number, Vector>);
+    constructor(source: FileHandle | AsyncRandomAccessFile, dictionaries?: Map<number, Vector>);
+    constructor(source: FileHandle | AsyncRandomAccessFile, ...rest: any[]) {
+        const byteLength = typeof rest[0] !== 'number' ? <number> rest.shift() : undefined;
+        const dictionaries = rest[0] instanceof Map ? <Map<number, Vector>> rest.shift() : undefined;
+        super(source instanceof AsyncRandomAccessFile ? source : new AsyncRandomAccessFile(source, byteLength), dictionaries);
+    }
+    public isFile(): this is RecordBatchFileReaders<T> { return true; }
+    public isAsync(): this is AsyncRecordBatchReaders<T> { return true; }
+    public async open(options?: OpenOptions) {
+        if (!this.closed && !this._footer) {
+            this.schema = (this._footer = await this._readFooter()).schema;
+            for (const block of this._footer.dictionaryBatches()) {
+                block && await this._readDictionaryBatch(this._dictionaryIndex++);
+            }
+        }
+        return await super.open(options);
+    }
+    public async readRecordBatch(index: number) {
+        if (this.closed) { return null; }
+        if (!this._footer) { await this.open(); }
+        const block = this._footer && this._footer.getRecordBatch(index);
+        if (block && (await this._handle.seek(block.offset))) {
+            const message = await this._reader.readMessage(MessageHeader.RecordBatch);
+            if (message && message.isRecordBatch()) {
+                const header = message.header();
+                const buffer = await this._reader.readMessageBody(message.bodyLength);
+                const recordBatch = this._loadRecordBatch(header, buffer);
+                return recordBatch;
+            }
+        }
+        return null;
+    }
+    protected async _readDictionaryBatch(index: number) {
+        const block = this._footer && this._footer.getDictionaryBatch(index);
+        if (block && (await this._handle.seek(block.offset))) {
+            const message = await this._reader.readMessage(MessageHeader.DictionaryBatch);
+            if (message && message.isDictionaryBatch()) {
+                const header = message.header();
+                const buffer = await this._reader.readMessageBody(message.bodyLength);
+                const vector = this._loadDictionaryBatch(header, buffer);
+                this.dictionaries.set(header.id, vector);
+            }
+        }
+    }
+    protected async _readFooter() {
+        const { _handle } = this;
+        _handle._pending && await _handle._pending;
+        const offset = _handle.size - magicAndPadding;
+        const length = await _handle.readInt32(offset);
+        const buffer = await _handle.readAt(offset - length, length);
+        return Footer.decode(buffer);
+    }
+    protected async _readNextMessageAndValidate<T extends MessageHeader>(type?: T | null): Promise<Message<T> | null> {
+        if (!this._footer) { await this.open(); }
+        if (this._footer && this._recordBatchIndex < this.numRecordBatches) {
+            const block = this._footer.getRecordBatch(this._recordBatchIndex);
+            if (block && await this._handle.seek(block.offset)) {
+                return await this._reader.readMessage(type);
+            }
+        }
+        return null;
+    }
+}
+
+/** @ignore */
+class RecordBatchJSONReaderImpl<T extends { [key: string]: DataType } = any> extends RecordBatchStreamReaderImpl<T> {
+    constructor(source: ArrowJSONLike, dictionaries?: Map<number, Vector>) {
+        super(source, dictionaries);
+    }
+    protected _loadVectors(header: metadata.RecordBatch, body: any, types: (Field | DataType)[]) {
+        return new JSONVectorLoader(body, header.nodes, header.buffers).visitMany(types);
+    }
+}
+
+//
+// Define some helper functions and static implementations down here. There's
+// a bit of branching in the static methods that can lead to the same routines
+// being executed, so we've broken those out here for readability.
+//
+
+/** @ignore */
+function shouldAutoDestroy(self: { autoDestroy: boolean }, options?: OpenOptions) {
+    return options && (typeof options['autoDestroy'] === 'boolean') ? options['autoDestroy'] : self['autoDestroy'];
+}
+
+/** @ignore */
+function* readAllSync<T extends { [key: string]: DataType } = any>(source: RecordBatchReaders<T> | FromArg0 | FromArg2) {
+    const reader = RecordBatchReader.from<T>(<any> source) as RecordBatchReaders<T>;
+    try {
+        if (!reader.open({ autoDestroy: false }).closed) {
+            do { yield reader; } while (!(reader.reset().open()).closed);
+        }
+    } finally { reader.cancel(); }
+}
+
+/** @ignore */
+async function* readAllAsync<T extends { [key: string]: DataType } = any>(source: AsyncRecordBatchReaders<T> | FromArg1 | FromArg3 | FromArg4 | FromArg5) {
+    const reader = await RecordBatchReader.from<T>(<any> source) as RecordBatchReader<T>;
+    try {
+        if (!(await reader.open({ autoDestroy: false })).closed) {
+            do { yield reader; } while (!(await reader.reset().open()).closed);
+        }
+    } finally { await reader.cancel(); }
+}
+
+/** @ignore */
+function fromArrowJSON<T extends { [key: string]: DataType }>(source: ArrowJSONLike) {
+    return new RecordBatchStreamReader(new RecordBatchJSONReaderImpl<T>(source));
+}
+
+/** @ignore */
+function fromByteStream<T extends { [key: string]: DataType }>(source: ByteStream) {
+    const bytes = source.peek((magicLength + 7) & ~7);
+    return bytes && bytes.byteLength >= 4 ? !checkForMagicArrowString(bytes)
+        ? new RecordBatchStreamReader(new RecordBatchStreamReaderImpl<T>(source))
+        : new RecordBatchFileReader(new RecordBatchFileReaderImpl<T>(source.read()))
+        : new RecordBatchStreamReader(new RecordBatchStreamReaderImpl<T>(function*(): any {}()));
+}
+
+/** @ignore */
+async function fromAsyncByteStream<T extends { [key: string]: DataType }>(source: AsyncByteStream) {
+    const bytes = await source.peek((magicLength + 7) & ~7);
+    return bytes && bytes.byteLength >= 4 ? !checkForMagicArrowString(bytes)
+        ? new AsyncRecordBatchStreamReader(new AsyncRecordBatchStreamReaderImpl<T>(source))
+        : new RecordBatchFileReader(new RecordBatchFileReaderImpl<T>(await source.read()))
+        : new AsyncRecordBatchStreamReader(new AsyncRecordBatchStreamReaderImpl<T>(async function*(): any {}()));
+}
+
+/** @ignore */
+async function fromFileHandle<T extends { [key: string]: DataType }>(source: FileHandle) {
+    const { size } = await source.stat();
+    const file = new AsyncRandomAccessFile(source, size);
+    if (size >= magicX2AndPadding) {
+        if (checkForMagicArrowString(await file.readAt(0, (magicLength + 7) & ~7))) {
+            return new AsyncRecordBatchFileReader(new AsyncRecordBatchFileReaderImpl<T>(file));
+        }
+    }
+    return new AsyncRecordBatchStreamReader(new AsyncRecordBatchStreamReaderImpl<T>(file));
+}
diff --git a/js/src/ipc/reader/arrow.ts b/js/src/ipc/reader/arrow.ts
deleted file mode 100644
index 1847c9c2eb628..0000000000000
--- a/js/src/ipc/reader/arrow.ts
+++ /dev/null
@@ -1,55 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import { readJSON } from './json';
-import { fromReadableStream } from './node';
-import { RecordBatch } from '../../recordbatch';
-import { readBuffers, readBuffersAsync } from './binary';
-import { readRecordBatches, readRecordBatchesAsync, TypeDataLoader } from './vector';
-import { Schema } from '../../type';
-import { Message } from '../metadata';
-
-export { readJSON, RecordBatch };
-export { readBuffers, readBuffersAsync };
-export { readRecordBatches, readRecordBatchesAsync };
-
-export function* read(sources: Iterable<Uint8Array | Buffer | string> | object | string) {
-    let input: any = sources;
-    let messages: Iterable<{ schema: Schema, message: Message, loader: TypeDataLoader }>;
-    if (typeof input === 'string') {
-        try { input = JSON.parse(input); }
-        catch (e) { input = sources; }
-    }
-    if (!input || typeof input !== 'object') {
-        messages = (typeof input === 'string') ? readBuffers([input]) : [];
-    } else {
-        messages = (typeof input[Symbol.iterator] === 'function') ? readBuffers(input) : readJSON(input);
-    }
-    yield* readRecordBatches(messages);
-}
-
-export async function* readAsync(sources: AsyncIterable<Uint8Array | Buffer | string>) {
-    for await (let recordBatch of readRecordBatchesAsync(readBuffersAsync(sources))) {
-        yield recordBatch;
-    }
-}
-
-export async function* readStream(stream: NodeJS.ReadableStream) {
-    for await (const recordBatch of readAsync(fromReadableStream(stream))) {
-        yield recordBatch as RecordBatch;
-    }
-}
diff --git a/js/src/ipc/reader/binary.ts b/js/src/ipc/reader/binary.ts
deleted file mode 100644
index 988ce606b2614..0000000000000
--- a/js/src/ipc/reader/binary.ts
+++ /dev/null
@@ -1,432 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import { Vector } from '../../vector';
-import { flatbuffers } from 'flatbuffers';
-import { TypeDataLoader } from './vector';
-import { checkForMagicArrowString, PADDING, magicAndPadding, isValidArrowFile } from '../magic';
-import { Message, Footer, FileBlock, RecordBatchMetadata, DictionaryBatch, BufferMetadata, FieldMetadata, } from '../metadata';
-import {
-    Schema, Field,
-    DataType, Dictionary,
-    Null, TimeBitWidth,
-    Binary, Bool, Utf8, Decimal,
-    Date_, Time, Timestamp, Interval,
-    List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_,
-} from '../../type';
-
-import {
-    Int8,  Uint8,
-    Int16, Uint16,
-    Int32, Uint32,
-    Int64, Uint64,
-    Float16, Float64, Float32,
-} from '../../type';
-
-import ByteBuffer = flatbuffers.ByteBuffer;
-
-type MessageReader = (bb: ByteBuffer) => IterableIterator<Message>;
-
-export function* readBuffers<T extends Uint8Array | Buffer | string>(sources: Iterable<T> | Uint8Array | Buffer | string) {
-    let schema: Schema | null = null;
-    let dictionaries = new Map<number, Vector>();
-    let readMessages: MessageReader | null = null;
-    if (ArrayBuffer.isView(sources) || typeof sources === 'string') {
-        sources = [sources as T];
-    }
-    for (const source of sources) {
-        const bb = toByteBuffer(source);
-        if ((!schema && ({ schema, readMessages } = readSchema(bb)) || true) && schema && readMessages) {
-            for (const message of readMessages(bb)) {
-                yield {
-                    schema, message,
-                    loader: new BinaryDataLoader(
-                        bb,
-                        arrayIterator((message as any).nodes || []),
-                        arrayIterator((message as any).buffers || []),
-                        dictionaries
-                    )
-                };
-            }
-        }
-    }
-}
-
-export async function* readBuffersAsync<T extends Uint8Array | Buffer | string>(sources: AsyncIterable<T>) {
-    let schema: Schema | null = null;
-    let dictionaries = new Map<number, Vector>();
-    let readMessages: MessageReader | null = null;
-    for await (const source of sources) {
-        const bb = toByteBuffer(source);
-        if ((!schema && ({ schema, readMessages } = readSchema(bb)) || true) && schema && readMessages) {
-            for (const message of readMessages(bb)) {
-                yield {
-                    schema, message,
-                    loader: new BinaryDataLoader(
-                        bb,
-                        arrayIterator((message as any).nodes || []),
-                        arrayIterator((message as any).buffers || []),
-                        dictionaries
-                    )
-                };
-            }
-        }
-    }
-}
-
-export class BinaryDataLoader extends TypeDataLoader {
-    private bytes: Uint8Array;
-    private messageOffset: number;
-    constructor(bb: ByteBuffer, nodes: Iterator<FieldMetadata>, buffers: Iterator<BufferMetadata>, dictionaries: Map<number, Vector>) {
-        super(nodes, buffers, dictionaries);
-        this.bytes = bb.bytes();
-        this.messageOffset = bb.position();
-    }
-    protected readOffsets<T extends DataType>(type: T, buffer?: BufferMetadata) { return this.readData(type, buffer); }
-    protected readTypeIds<T extends DataType>(type: T, buffer?: BufferMetadata) { return this.readData(type, buffer); }
-    protected readData<T extends DataType>(_type: T, { length, offset }: BufferMetadata = this.getBufferMetadata()) {
-        return new Uint8Array(this.bytes.buffer, this.bytes.byteOffset + this.messageOffset + offset, length);
-    }
-}
-
-function* arrayIterator(arr: Array<any>) { yield* arr; }
-
-function toByteBuffer(bytes?: Uint8Array | Buffer | string) {
-    let arr: Uint8Array = bytes as any || new Uint8Array(0);
-    if (typeof bytes === 'string') {
-        arr = new Uint8Array(bytes.length);
-        for (let i = -1, n = bytes.length; ++i < n;) {
-            arr[i] = bytes.charCodeAt(i);
-        }
-        return new ByteBuffer(arr);
-    }
-    return new ByteBuffer(arr);
-}
-
-function readSchema(bb: ByteBuffer) {
-    let schema: Schema, readMessages, footer: Footer | null;
-    if (footer = readFileSchema(bb)) {
-        schema = footer.schema;
-        readMessages = readFileMessages(footer);
-    } else if (schema = readStreamSchema(bb)!) {
-        readMessages = readStreamMessages;
-    } else {
-        throw new Error('Invalid Arrow buffer');
-    }
-    return { schema, readMessages };
-}
-
-function readStreamSchema(bb: ByteBuffer) {
-    if (!checkForMagicArrowString(bb.bytes(), 0)) {
-        for (const message of readMessages(bb)) {
-            if (Message.isSchema(message)) {
-                return message as Schema;
-            }
-        }
-    }
-    return null;
-}
-
-function* readStreamMessages(bb: ByteBuffer) {
-    for (const message of readMessages(bb)) {
-        if (Message.isRecordBatch(message)) {
-            yield message;
-        } else if (Message.isDictionaryBatch(message)) {
-            yield message;
-        } else {
-            yield message;
-        }
-        // position the buffer after the body to read the next message
-        bb.setPosition(bb.position() + message.bodyLength);
-    }
-}
-
-function readFileSchema(bb: ByteBuffer) {
-    if (!isValidArrowFile(bb)) {
-        return null;
-    }
-    let fileLength = bb.capacity();
-    let lengthOffset = fileLength - magicAndPadding;
-    let footerLength = bb.readInt32(lengthOffset);
-    bb.setPosition(lengthOffset - footerLength);
-    return footerFromByteBuffer(bb);
-}
-
-function readFileMessages(footer: Footer) {
-    return function* (bb: ByteBuffer) {
-        let message: RecordBatchMetadata | DictionaryBatch;
-        for (let i = -1, batches = footer.dictionaryBatches, n = batches.length; ++i < n;) {
-            bb.setPosition(batches[i].offset);
-            if (message = readMessage(bb, bb.readInt32(bb.position())) as DictionaryBatch) {
-                yield message;
-            }
-        }
-        for (let i = -1, batches = footer.recordBatches, n = batches.length; ++i < n;) {
-            bb.setPosition(batches[i].offset);
-            if (message = readMessage(bb, bb.readInt32(bb.position())) as RecordBatchMetadata) {
-                yield message;
-            }
-        }
-    };
-}
-
-function* readMessages(bb: ByteBuffer) {
-    let length: number, message: Schema | RecordBatchMetadata | DictionaryBatch;
-    while (bb.position() < bb.capacity() &&
-          (length = bb.readInt32(bb.position())) > 0) {
-        if (message = readMessage(bb, length)!) {
-            yield message;
-        }
-    }
-}
-
-function readMessage(bb: ByteBuffer, length: number) {
-    bb.setPosition(bb.position() + PADDING);
-    const message = messageFromByteBuffer(bb);
-    bb.setPosition(bb.position() + length);
-    return message;
-}
-
-import * as File_ from '../../fb/File';
-import * as Schema_ from '../../fb/Schema';
-import * as Message_ from '../../fb/Message';
-
-import Type = Schema_.org.apache.arrow.flatbuf.Type;
-import Precision = Schema_.org.apache.arrow.flatbuf.Precision;
-import MessageHeader = Message_.org.apache.arrow.flatbuf.MessageHeader;
-import MetadataVersion = Schema_.org.apache.arrow.flatbuf.MetadataVersion;
-import _Footer = File_.org.apache.arrow.flatbuf.Footer;
-import _Block = File_.org.apache.arrow.flatbuf.Block;
-import _Message = Message_.org.apache.arrow.flatbuf.Message;
-import _Schema = Schema_.org.apache.arrow.flatbuf.Schema;
-import _Field = Schema_.org.apache.arrow.flatbuf.Field;
-import _RecordBatch = Message_.org.apache.arrow.flatbuf.RecordBatch;
-import _DictionaryBatch = Message_.org.apache.arrow.flatbuf.DictionaryBatch;
-import _FieldNode = Message_.org.apache.arrow.flatbuf.FieldNode;
-import _Buffer = Schema_.org.apache.arrow.flatbuf.Buffer;
-import _DictionaryEncoding = Schema_.org.apache.arrow.flatbuf.DictionaryEncoding;
-import _Null = Schema_.org.apache.arrow.flatbuf.Null;
-import _Int = Schema_.org.apache.arrow.flatbuf.Int;
-import _FloatingPoint = Schema_.org.apache.arrow.flatbuf.FloatingPoint;
-import _Binary = Schema_.org.apache.arrow.flatbuf.Binary;
-import _Bool = Schema_.org.apache.arrow.flatbuf.Bool;
-import _Utf8 = Schema_.org.apache.arrow.flatbuf.Utf8;
-import _Decimal = Schema_.org.apache.arrow.flatbuf.Decimal;
-import _Date = Schema_.org.apache.arrow.flatbuf.Date;
-import _Time = Schema_.org.apache.arrow.flatbuf.Time;
-import _Timestamp = Schema_.org.apache.arrow.flatbuf.Timestamp;
-import _Interval = Schema_.org.apache.arrow.flatbuf.Interval;
-import _List = Schema_.org.apache.arrow.flatbuf.List;
-import _Struct = Schema_.org.apache.arrow.flatbuf.Struct_;
-import _Union = Schema_.org.apache.arrow.flatbuf.Union;
-import _FixedSizeBinary = Schema_.org.apache.arrow.flatbuf.FixedSizeBinary;
-import _FixedSizeList = Schema_.org.apache.arrow.flatbuf.FixedSizeList;
-import _Map = Schema_.org.apache.arrow.flatbuf.Map;
-
-function footerFromByteBuffer(bb: ByteBuffer) {
-    const dictionaryFields = new Map<number, Field<Dictionary>>();
-    const f = _Footer.getRootAsFooter(bb), s = f.schema()!;
-    return new Footer(
-        dictionaryBatchesFromFooter(f), recordBatchesFromFooter(f),
-        new Schema(fieldsFromSchema(s, dictionaryFields), customMetadata(s), f.version(), dictionaryFields)
-    );
-}
-
-function messageFromByteBuffer(bb: ByteBuffer) {
-    const m = _Message.getRootAsMessage(bb)!, type = m.headerType(), version = m.version();
-    switch (type) {
-        case MessageHeader.Schema: return schemaFromMessage(version, m.header(new _Schema())!, new Map());
-        case MessageHeader.RecordBatch: return recordBatchFromMessage(version, m, m.header(new _RecordBatch())!);
-        case MessageHeader.DictionaryBatch: return dictionaryBatchFromMessage(version, m, m.header(new _DictionaryBatch())!);
-    }
-    return null;
-    // throw new Error(`Unrecognized Message type '${type}'`);
-}
-
-function schemaFromMessage(version: MetadataVersion, s: _Schema, dictionaryFields: Map<number, Field<Dictionary>>) {
-    return new Schema(fieldsFromSchema(s, dictionaryFields), customMetadata(s), version, dictionaryFields);
-}
-
-function recordBatchFromMessage(version: MetadataVersion, m: _Message, b: _RecordBatch) {
-    return new RecordBatchMetadata(version, b.length(), fieldNodesFromRecordBatch(b), buffersFromRecordBatch(b, version), m.bodyLength());
-}
-
-function dictionaryBatchFromMessage(version: MetadataVersion, m: _Message, d: _DictionaryBatch) {
-    return new DictionaryBatch(version, recordBatchFromMessage(version, m, d.data()!), d.id(), d.isDelta());
-}
-
-function dictionaryBatchesFromFooter(f: _Footer) {
-    const blocks = [] as FileBlock[];
-    for (let b: _Block, i = -1, n = f && f.dictionariesLength(); ++i < n;) {
-        if (b = f.dictionaries(i)!) {
-            blocks.push(new FileBlock(b.metaDataLength(), b.bodyLength(), b.offset()));
-        }
-    }
-    return blocks;
-}
-
-function recordBatchesFromFooter(f: _Footer) {
-    const blocks = [] as FileBlock[];
-    for (let b: _Block, i = -1, n = f && f.recordBatchesLength(); ++i < n;) {
-        if (b = f.recordBatches(i)!) {
-            blocks.push(new FileBlock(b.metaDataLength(), b.bodyLength(), b.offset()));
-        }
-    }
-    return blocks;
-}
-
-function fieldsFromSchema(s: _Schema, dictionaryFields: Map<number, Field<Dictionary>> | null) {
-    const fields = [] as Field[];
-    for (let i = -1, c: Field | null, n = s && s.fieldsLength(); ++i < n;) {
-        if (c = field(s.fields(i)!, dictionaryFields)) {
-            fields.push(c);
-        }
-    }
-    return fields;
-}
-
-function fieldsFromField(f: _Field, dictionaryFields: Map<number, Field<Dictionary>> | null) {
-    const fields = [] as Field[];
-    for (let i = -1, c: Field | null, n = f && f.childrenLength(); ++i < n;) {
-        if (c = field(f.children(i)!, dictionaryFields)) {
-            fields.push(c);
-        }
-    }
-    return fields;
-}
-
-function fieldNodesFromRecordBatch(b: _RecordBatch) {
-    const fieldNodes = [] as FieldMetadata[];
-    for (let i = -1, n = b.nodesLength(); ++i < n;) {
-        fieldNodes.push(fieldNodeFromRecordBatch(b.nodes(i)!));
-    }
-    return fieldNodes;
-}
-
-function buffersFromRecordBatch(b: _RecordBatch, version: MetadataVersion) {
-    const buffers = [] as BufferMetadata[];
-    for (let i = -1, n = b.buffersLength(); ++i < n;) {
-        let buffer = b.buffers(i)!;
-        // If this Arrow buffer was written before version 4,
-        // advance the buffer's bb_pos 8 bytes to skip past
-        // the now-removed page id field.
-        if (version < MetadataVersion.V4) {
-            buffer.bb_pos += (8 * (i + 1));
-        }
-        buffers.push(bufferFromRecordBatch(buffer));
-    }
-    return buffers;
-}
-
-function field(f: _Field, dictionaryFields: Map<number, Field<Dictionary>> | null) {
-    let name = f.name()!;
-    let field: Field | void;
-    let nullable = f.nullable();
-    let metadata = customMetadata(f);
-    let dataType: DataType<any> | null;
-    let keysMeta: _Int | null, id: number;
-    let dictMeta: _DictionaryEncoding | null;
-    if (!dictionaryFields || !(dictMeta = f.dictionary())) {
-        if (dataType = typeFromField(f, fieldsFromField(f, dictionaryFields))) {
-            field = new Field(name, dataType, nullable, metadata);
-        }
-    } else if (dataType = dictionaryFields.has(id = dictMeta.id().low)
-                        ? dictionaryFields.get(id)!.type.dictionary
-                        : typeFromField(f, fieldsFromField(f, null))) {
-        dataType = new Dictionary(dataType,
-            // a dictionary index defaults to signed 32 bit int if unspecified
-            (keysMeta = dictMeta.indexType()) ? intFromField(keysMeta)! : new Int32(),
-            id, dictMeta.isOrdered()
-        );
-        field = new Field(name, dataType, nullable, metadata);
-        dictionaryFields.has(id) || dictionaryFields.set(id, field as Field<Dictionary>);
-    }
-    return field || null;
-}
-
-function customMetadata(parent?: _Schema | _Field | null) {
-    const data = new Map<string, string>();
-    if (parent) {
-        for (let entry, key, i = -1, n = parent.customMetadataLength() | 0; ++i < n;) {
-            if ((entry = parent.customMetadata(i)) && (key = entry.key()) != null) {
-                data.set(key, entry.value()!);
-            }
-        }
-    }
-    return data;
-}
-
-function fieldNodeFromRecordBatch(f: _FieldNode) {
-    return new FieldMetadata(f.length(), f.nullCount());
-}
-
-function bufferFromRecordBatch(b: _Buffer) {
-    return new BufferMetadata(b.offset(), b.length());
-}
-
-function typeFromField(f: _Field, children?: Field[]): DataType<any> | null {
-    switch (f.typeType()) {
-        case Type.NONE: return null;
-        case Type.Null: return nullFromField(f.type(new _Null())!);
-        case Type.Int: return intFromField(f.type(new _Int())!);
-        case Type.FloatingPoint: return floatFromField(f.type(new _FloatingPoint())!);
-        case Type.Binary: return binaryFromField(f.type(new _Binary())!);
-        case Type.Utf8: return utf8FromField(f.type(new _Utf8())!);
-        case Type.Bool: return boolFromField(f.type(new _Bool())!);
-        case Type.Decimal: return decimalFromField(f.type(new _Decimal())!);
-        case Type.Date: return dateFromField(f.type(new _Date())!);
-        case Type.Time: return timeFromField(f.type(new _Time())!);
-        case Type.Timestamp: return timestampFromField(f.type(new _Timestamp())!);
-        case Type.Interval: return intervalFromField(f.type(new _Interval())!);
-        case Type.List: return listFromField(f.type(new _List())!, children || []);
-        case Type.Struct_: return structFromField(f.type(new _Struct())!, children || []);
-        case Type.Union: return unionFromField(f.type(new _Union())!, children || []);
-        case Type.FixedSizeBinary: return fixedSizeBinaryFromField(f.type(new _FixedSizeBinary())!);
-        case Type.FixedSizeList: return fixedSizeListFromField(f.type(new _FixedSizeList())!, children || []);
-        case Type.Map: return mapFromField(f.type(new _Map())!, children || []);
-    }
-    throw new Error(`Unrecognized type ${f.typeType()}`);
-}
-
-function nullFromField           (_type: _Null)                             { return new Null();                                                                }
-function intFromField            (_type: _Int)                              { switch (_type.bitWidth()) {
-                                                                                  case  8: return _type.isSigned() ? new  Int8() : new  Uint8();
-                                                                                  case 16: return _type.isSigned() ? new Int16() : new Uint16();
-                                                                                  case 32: return _type.isSigned() ? new Int32() : new Uint32();
-                                                                                  case 64: return _type.isSigned() ? new Int64() : new Uint64();
-                                                                              }
-                                                                              return null;                                                                      }
-function floatFromField          (_type: _FloatingPoint)                    { switch (_type.precision()) {
-                                                                                  case Precision.HALF: return new Float16();
-                                                                                  case Precision.SINGLE: return new Float32();
-                                                                                  case Precision.DOUBLE: return new Float64();
-                                                                              }
-                                                                              return null;                                                                      }
-function binaryFromField         (_type: _Binary)                           { return new Binary();                                                              }
-function utf8FromField           (_type: _Utf8)                             { return new Utf8();                                                                }
-function boolFromField           (_type: _Bool)                             { return new Bool();                                                                }
-function decimalFromField        (_type: _Decimal)                          { return new Decimal(_type.scale(), _type.precision());                             }
-function dateFromField           (_type: _Date)                             { return new Date_(_type.unit());                                                   }
-function timeFromField           (_type: _Time)                             { return new Time(_type.unit(), _type.bitWidth() as TimeBitWidth);                  }
-function timestampFromField      (_type: _Timestamp)                        { return new Timestamp(_type.unit(), _type.timezone());                             }
-function intervalFromField       (_type: _Interval)                         { return new Interval(_type.unit());                                                }
-function listFromField           (_type: _List, children: Field[])          { return new List(children);                                                        }
-function structFromField         (_type: _Struct, children: Field[])        { return new Struct(children);                                                      }
-function unionFromField          (_type: _Union, children: Field[])         { return new Union(_type.mode(), (_type.typeIdsArray() || []) as Type[], children); }
-function fixedSizeBinaryFromField(_type: _FixedSizeBinary)                  { return new FixedSizeBinary(_type.byteWidth());                                    }
-function fixedSizeListFromField  (_type: _FixedSizeList, children: Field[]) { return new FixedSizeList(_type.listSize(), children);                             }
-function mapFromField            (_type: _Map, children: Field[])           { return new Map_(_type.keysSorted(), children);                                    }
diff --git a/js/src/ipc/reader/json.ts b/js/src/ipc/reader/json.ts
deleted file mode 100644
index 0f0c018d66bb9..0000000000000
--- a/js/src/ipc/reader/json.ts
+++ /dev/null
@@ -1,304 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import { Vector } from '../../vector';
-import { flatbuffers } from 'flatbuffers';
-import { TypeDataLoader } from './vector';
-import { packBools } from '../../util/bit';
-import * as IntUtil from '../../util/int';
-import { TextEncoder } from 'text-encoding-utf-8';
-import { RecordBatchMetadata, DictionaryBatch, BufferMetadata, FieldMetadata } from '../metadata';
-import {
-    Schema, Field,
-    DataType, Dictionary,
-    Null, TimeBitWidth,
-    Binary, Bool, Utf8, Decimal,
-    Date_, Time, Timestamp, Interval,
-    List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_,
-} from '../../type';
-
-import {
-    Int8,  Uint8,
-    Int16, Uint16,
-    Int32, Uint32,
-    Int64, Uint64,
-    Float16, Float64, Float32,
-} from '../../type';
-
-import Long = flatbuffers.Long;
-
-export function* readJSON(json: any) {
-    const schema = schemaFromJSON(json['schema']);
-    const dictionaries = new Map<number, Vector>();
-    for (const batch of (json['dictionaries'] || [])) {
-        const message = dictionaryBatchFromJSON(batch);
-        yield {
-            schema, message,
-            loader: new JSONDataLoader(
-                flattenDataSources(batch['data']['columns']),
-                arrayIterator(message.nodes),
-                arrayIterator(message.buffers),
-                dictionaries
-            )
-        };
-    }
-    for (const batch of (json['batches'] || [])) {
-        const message = recordBatchFromJSON(batch);
-        yield {
-            schema, message,
-            loader: new JSONDataLoader(
-                flattenDataSources(batch['columns']),
-                arrayIterator(message.nodes),
-                arrayIterator(message.buffers),
-                dictionaries
-            )
-        };
-    }
-}
-
-function* arrayIterator(arr: Array<any>) { yield* arr; }
-function flattenDataSources(xs: any[]): any[][] {
-    return (xs || []).reduce<any[][]>((buffers, column: any) => [
-        ...buffers,
-        ...(column['VALIDITY'] && [column['VALIDITY']] || []),
-        ...(column['OFFSET'] && [column['OFFSET']] || []),
-        ...(column['TYPE'] && [column['TYPE']] || []),
-        ...(column['DATA'] && [column['DATA']] || []),
-        ...flattenDataSources(column['children'])
-    ], [] as any[][]);
-}
-
-const utf8Encoder = new TextEncoder('utf-8');
-
-export class JSONDataLoader extends TypeDataLoader {
-    constructor(private sources: any[][], nodes: Iterator<FieldMetadata>, buffers: Iterator<BufferMetadata>, dictionaries: Map<number, Vector>) {
-        super(nodes, buffers, dictionaries);
-    }
-    protected readNullBitmap<T extends DataType>(_type: T, nullCount: number, { offset } = this.getBufferMetadata()) {
-        return nullCount <= 0 ? new Uint8Array(0) : packBools(this.sources[offset]);
-    }
-    protected readOffsets<T extends DataType>(_type: T, { offset }: BufferMetadata = this.getBufferMetadata()) {
-        return new Int32Array(this.sources[offset]);
-    }
-    protected readTypeIds<T extends DataType>(_type: T, { offset }: BufferMetadata = this.getBufferMetadata()) {
-        return new Int8Array(this.sources[offset]);
-    }
-    protected readData<T extends DataType>(type: T, { offset }: BufferMetadata = this.getBufferMetadata()) {
-        const { sources } = this;
-        if (DataType.isTimestamp(type) === true) {
-            return new Uint8Array(IntUtil.Int64.convertArray(sources[offset] as string[]).buffer);
-        } else if ((DataType.isInt(type) || DataType.isTime(type)) && type.bitWidth === 64) {
-            return new Uint8Array(IntUtil.Int64.convertArray(sources[offset] as string[]).buffer);
-        } else if (DataType.isDate(type) && type.unit === DateUnit.MILLISECOND) {
-            return new Uint8Array(IntUtil.Int64.convertArray(sources[offset] as string[]).buffer);
-        } else if (DataType.isDecimal(type) === true) {
-            return new Uint8Array(IntUtil.Int128.convertArray(sources[offset] as string[]).buffer);
-        } else if (DataType.isBinary(type) === true || DataType.isFixedSizeBinary(type) === true) {
-            return new Uint8Array(binaryDataFromJSON(sources[offset] as string[]));
-        } else if (DataType.isBool(type) === true) {
-            return new Uint8Array(packBools(sources[offset] as number[]).buffer);
-        } else if (DataType.isUtf8(type) === true) {
-            return utf8Encoder.encode((sources[offset] as string[]).join(''));
-        } else {
-            return toTypedArray(type.ArrayType, sources[offset].map((x) => +x)) as any;
-        }
-    }
-}
-
-function binaryDataFromJSON(values: string[]) {
-    // "DATA": ["49BC7D5B6C47D2","3F5FB6D9322026"]
-    // There are definitely more efficient ways to do this... but it gets the
-    // job done.
-    const joined = values.join('');
-    const data = new Uint8Array(joined.length / 2);
-    for (let i = 0; i < joined.length; i += 2) {
-        data[i >> 1] = parseInt(joined.substr(i, 2), 16);
-    }
-    return data.buffer;
-}
-
-import * as Schema_ from '../../fb/Schema';
-import Type = Schema_.org.apache.arrow.flatbuf.Type;
-import DateUnit = Schema_.org.apache.arrow.flatbuf.DateUnit;
-import TimeUnit = Schema_.org.apache.arrow.flatbuf.TimeUnit;
-import UnionMode = Schema_.org.apache.arrow.flatbuf.UnionMode;
-import Precision = Schema_.org.apache.arrow.flatbuf.Precision;
-import IntervalUnit = Schema_.org.apache.arrow.flatbuf.IntervalUnit;
-import MetadataVersion = Schema_.org.apache.arrow.flatbuf.MetadataVersion;
-import { toTypedArray } from '../../data';
-
-function schemaFromJSON(s: any): Schema {
-    const dictionaryFields = new Map<number, Field<Dictionary>>();
-    return new Schema(
-        fieldsFromJSON(s['fields'], dictionaryFields),
-        customMetadata(s['customMetadata']),
-        MetadataVersion.V4, dictionaryFields
-    );
-}
-
-function recordBatchFromJSON(b: any): RecordBatchMetadata {
-    return new RecordBatchMetadata(
-        MetadataVersion.V4,
-        b['count'],
-        fieldNodesFromJSON(b['columns']),
-        buffersFromJSON(b['columns'])
-    );
-}
-
-function dictionaryBatchFromJSON(b: any): DictionaryBatch {
-    return new DictionaryBatch(
-        MetadataVersion.V4,
-        recordBatchFromJSON(b['data']),
-        b['id'], b['isDelta']
-    );
-}
-
-function fieldsFromJSON(fs: any[], dictionaryFields: Map<number, Field<Dictionary>> | null): Field[] {
-    return (fs || [])
-        .map((f) => fieldFromJSON(f, dictionaryFields))
-        .filter((f) => f != null) as Field[];
-}
-
-function fieldNodesFromJSON(xs: any[]): FieldMetadata[] {
-    return (xs || []).reduce<FieldMetadata[]>((fieldNodes, column: any) => [
-        ...fieldNodes,
-        new FieldMetadata(
-            new Long(column['count'], 0),
-            new Long(nullCountFromJSON(column['VALIDITY']), 0)
-        ),
-        ...fieldNodesFromJSON(column['children'])
-    ], [] as FieldMetadata[]);
-}
-
-function buffersFromJSON(xs: any[], buffers: BufferMetadata[] = []): BufferMetadata[] {
-    for (let i = -1, n = (xs || []).length; ++i < n;) {
-        const column = xs[i];
-        column['VALIDITY'] && buffers.push(new BufferMetadata(new Long(buffers.length, 0), new Long(column['VALIDITY'].length, 0)));
-        column['OFFSET'] && buffers.push(new BufferMetadata(new Long(buffers.length, 0), new Long(column['OFFSET'].length, 0)));
-        column['TYPE'] && buffers.push(new BufferMetadata(new Long(buffers.length, 0), new Long(column['TYPE'].length, 0)));
-        column['DATA'] && buffers.push(new BufferMetadata(new Long(buffers.length, 0), new Long(column['DATA'].length, 0)));
-        buffers = buffersFromJSON(column['children'], buffers);
-    }
-    return buffers;
-}
-
-function nullCountFromJSON(validity: number[]) {
-    return (validity || []).reduce((sum, val) => sum + +(val === 0), 0);
-}
-
-function fieldFromJSON(f: any, dictionaryFields: Map<number, Field<Dictionary>> | null) {
-    let name = f['name'];
-    let field: Field | void;
-    let nullable = f['nullable'];
-    let dataType: DataType<any> | null;
-    let id: number, keysMeta: any, dictMeta: any;
-    let metadata = customMetadata(f['customMetadata']);
-    if (!dictionaryFields || !(dictMeta = f['dictionary'])) {
-        if (dataType = typeFromJSON(f['type'], fieldsFromJSON(f['children'], dictionaryFields))) {
-            field = new Field(name, dataType, nullable, metadata);
-        }
-    } else if (dataType = dictionaryFields.has(id = dictMeta['id'])
-                        ? dictionaryFields.get(id)!.type.dictionary
-                        : typeFromJSON(f['type'], fieldsFromJSON(f['children'], null))) {
-        dataType = new Dictionary(dataType,
-            // a dictionary index defaults to signed 32 bit int if unspecified
-            (keysMeta = dictMeta['indexType']) ? intFromJSON(keysMeta)! : new Int32(),
-            id, dictMeta['isOrdered']
-        );
-        field = new Field(name, dataType, nullable, metadata);
-        dictionaryFields.has(id) || dictionaryFields.set(id, field as Field<Dictionary>);
-    }
-    return field || null;
-}
-
-function customMetadata(metadata?: any) {
-    return new Map<string, string>(Object.entries(metadata || {}));
-}
-
-const namesToTypeMap: { [n: string]: Type }  = {
-    'NONE': Type.NONE,
-    'null': Type.Null,
-    'int': Type.Int,
-    'floatingpoint': Type.FloatingPoint,
-    'binary': Type.Binary,
-    'bool': Type.Bool,
-    'utf8': Type.Utf8,
-    'decimal': Type.Decimal,
-    'date': Type.Date,
-    'time': Type.Time,
-    'timestamp': Type.Timestamp,
-    'interval': Type.Interval,
-    'list': Type.List,
-    'struct': Type.Struct_,
-    'union': Type.Union,
-    'fixedsizebinary': Type.FixedSizeBinary,
-    'fixedsizelist': Type.FixedSizeList,
-    'map': Type.Map,
-};
-
-function typeFromJSON(t: any, children?: Field[]) {
-    switch (namesToTypeMap[t['name']]) {
-        case Type.NONE: return null;
-        case Type.Null: return nullFromJSON(t);
-        case Type.Int: return intFromJSON(t);
-        case Type.FloatingPoint: return floatingPointFromJSON(t);
-        case Type.Binary: return binaryFromJSON(t);
-        case Type.Utf8: return utf8FromJSON(t);
-        case Type.Bool: return boolFromJSON(t);
-        case Type.Decimal: return decimalFromJSON(t);
-        case Type.Date: return dateFromJSON(t);
-        case Type.Time: return timeFromJSON(t);
-        case Type.Timestamp: return timestampFromJSON(t);
-        case Type.Interval: return intervalFromJSON(t);
-        case Type.List: return listFromJSON(t, children || []);
-        case Type.Struct_: return structFromJSON(t, children || []);
-        case Type.Union: return unionFromJSON(t, children || []);
-        case Type.FixedSizeBinary: return fixedSizeBinaryFromJSON(t);
-        case Type.FixedSizeList: return fixedSizeListFromJSON(t, children || []);
-        case Type.Map: return mapFromJSON(t, children || []);
-    }
-    throw new Error(`Unrecognized type ${t['name']}`);
-}
-
-function nullFromJSON           (_type: any)                    { return new Null();                                                                               }
-function intFromJSON            (_type: any)                    { switch (_type['bitWidth']) {
-                                                                      case  8: return _type['isSigned'] ? new  Int8() : new  Uint8();
-                                                                      case 16: return _type['isSigned'] ? new Int16() : new Uint16();
-                                                                      case 32: return _type['isSigned'] ? new Int32() : new Uint32();
-                                                                      case 64: return _type['isSigned'] ? new Int64() : new Uint64();
-                                                                  }
-                                                                  return null;                                                                                     }
-function floatingPointFromJSON  (_type: any)                    { switch (Precision[_type['precision']] as any) {
-                                                                      case Precision.HALF: return new Float16();
-                                                                      case Precision.SINGLE: return new Float32();
-                                                                      case Precision.DOUBLE: return new Float64();
-                                                                  }
-                                                                  return null;                                                                                     }
-function binaryFromJSON         (_type: any)                    { return new Binary();                                                                             }
-function utf8FromJSON           (_type: any)                    { return new Utf8();                                                                               }
-function boolFromJSON           (_type: any)                    { return new Bool();                                                                               }
-function decimalFromJSON        (_type: any)                    { return new Decimal(_type['scale'], _type['precision']);                                          }
-function dateFromJSON           (_type: any)                    { return new Date_(DateUnit[_type['unit']] as any);                                                }
-function timeFromJSON           (_type: any)                    { return new Time(TimeUnit[_type['unit']] as any, _type['bitWidth'] as TimeBitWidth);              }
-function timestampFromJSON      (_type: any)                    { return new Timestamp(TimeUnit[_type['unit']] as any, _type['timezone']);                         }
-function intervalFromJSON       (_type: any)                    { return new Interval(IntervalUnit[_type['unit']] as any);                                         }
-function listFromJSON           (_type: any, children: Field[]) { return new List(children);                                                                       }
-function structFromJSON         (_type: any, children: Field[]) { return new Struct(children);                                                                     }
-function unionFromJSON          (_type: any, children: Field[]) { return new Union(UnionMode[_type['mode']] as any, (_type['typeIds'] || []) as Type[], children); }
-function fixedSizeBinaryFromJSON(_type: any)                    { return new FixedSizeBinary(_type['byteWidth']);                                                  }
-function fixedSizeListFromJSON  (_type: any, children: Field[]) { return new FixedSizeList(_type['listSize'], children);                                           }
-function mapFromJSON            (_type: any, children: Field[]) { return new Map_(_type['keysSorted'], children);                                                  }
diff --git a/js/src/ipc/reader/node.ts b/js/src/ipc/reader/node.ts
deleted file mode 100644
index 24295c81cbd52..0000000000000
--- a/js/src/ipc/reader/node.ts
+++ /dev/null
@@ -1,78 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import { flatbuffers } from 'flatbuffers';
-import * as Message_ from '../../fb/Message';
-import ByteBuffer = flatbuffers.ByteBuffer;
-import _Message = Message_.org.apache.arrow.flatbuf.Message;
-import { PADDING, isValidArrowFile, checkForMagicArrowString } from '../magic';
-
-export async function* fromReadableStream(stream: NodeJS.ReadableStream) {
-
-    let bb: ByteBuffer;
-    let bytesRead = 0, bytes = new Uint8Array(0);
-    let messageLength = 0, message: _Message | null = null;
-
-    for await (let chunk of (stream as any as AsyncIterable<Uint8Array | Buffer | string>)) {
-
-        if (chunk == null) {
-            continue;
-        }
-
-        const grown = new Uint8Array(bytes.byteLength + chunk.length);
-
-        if (typeof chunk !== 'string') {
-            grown.set(bytes, 0) || grown.set(chunk, bytes.byteLength);
-        } else {
-            for (let i = -1, j = bytes.byteLength, n = chunk.length; ++i < n;) {
-                grown[i + j] = chunk.charCodeAt(i);
-            }
-        }
-
-        bytes = grown;
-
-        // If we're reading in an Arrow File, just concatenate the bytes until
-        // the file is fully read in
-        if (checkForMagicArrowString(bytes)) {
-            if (!isValidArrowFile(new ByteBuffer(bytes))) {
-                continue;
-            }
-            return yield bytes;
-        }
-
-        if (bytes.byteLength > 0 && messageLength <= 0) {
-            messageLength = new DataView(bytes.buffer).getInt32(0, true);
-        }
-
-        while (messageLength > 0 && messageLength <= bytes.byteLength) {
-            if (!message) {
-                (bb = new ByteBuffer(bytes)).setPosition(4);
-                if (message = _Message.getRootAsMessage(bb)) {
-                    messageLength += message.bodyLength().low;
-                    continue;
-                }
-                throw new Error(`Invalid message at position ${bytesRead}`);
-            }
-            bytesRead += messageLength + PADDING;
-            yield bytes.subarray(0, messageLength + PADDING);
-            bytes = bytes.subarray(messageLength + PADDING);
-            messageLength = bytes.byteLength < 4 ? 0 :
-                new DataView(bytes.buffer).getInt32(bytes.byteOffset, true);
-            message = null;
-        }
-    }
-}
diff --git a/js/src/ipc/reader/vector.ts b/js/src/ipc/reader/vector.ts
deleted file mode 100644
index c4688f5e2b851..0000000000000
--- a/js/src/ipc/reader/vector.ts
+++ /dev/null
@@ -1,131 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import { Vector } from '../../vector';
-import { RecordBatch } from '../../recordbatch';
-import { TypeVisitor } from '../../visitor';
-import { FlatType, NestedType, ListType } from '../../type';
-import { Message, FieldMetadata, BufferMetadata } from '../metadata';
-import { FlatData, ListData, NestedData, SingleNestedData, DenseUnionData, SparseUnionData, BoolData, FlatListData, DictionaryData } from '../../data';
-import {
-    Schema, Field,
-    Dictionary,
-    Null, Int, Float,
-    Binary, Bool, Utf8, Decimal,
-    Date_, Time, Timestamp, Interval,
-    List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_,
-    UnionMode, SparseUnion, DenseUnion, FlatListType, DataType,
-} from '../../type';
-
-export function* readRecordBatches(messages: Iterable<{ schema: Schema, message: Message, loader: TypeDataLoader }>) {
-    for (const { schema, message, loader } of messages) {
-        yield* readRecordBatch(schema, message, loader);
-    }
-}
-
-export async function* readRecordBatchesAsync(messages: AsyncIterable<{ schema: Schema, message: Message, loader: TypeDataLoader }>) {
-    for await (const { schema, message, loader } of messages) {
-        yield* readRecordBatch(schema, message, loader);
-    }
-}
-
-export function* readRecordBatch(schema: Schema, message: Message, loader: TypeDataLoader) {
-    if (Message.isRecordBatch(message)) {
-        yield new RecordBatch(schema, message.length, loader.visitFields(schema.fields));
-    } else if (Message.isDictionaryBatch(message)) {
-        const dictionaryId = message.id;
-        const dictionaries = loader.dictionaries;
-        const dictionaryField = schema.dictionaries.get(dictionaryId)!;
-        const dictionaryDataType = (dictionaryField.type as Dictionary).dictionary;
-        let dictionaryVector = Vector.create(loader.visit(dictionaryDataType));
-        if (message.isDelta && dictionaries.has(dictionaryId)) {
-            dictionaryVector = dictionaries.get(dictionaryId)!.concat(dictionaryVector);
-        }
-        dictionaries.set(dictionaryId, dictionaryVector);
-    }
-}
-
-export abstract class TypeDataLoader extends TypeVisitor {
-
-    public dictionaries: Map<number, Vector>;
-    protected nodes: Iterator<FieldMetadata>;
-    protected buffers: Iterator<BufferMetadata>;
-
-    constructor(nodes: Iterator<FieldMetadata>, buffers: Iterator<BufferMetadata>, dictionaries: Map<number, Vector>) {
-        super();
-        this.nodes = nodes;
-        this.buffers = buffers;
-        this.dictionaries = dictionaries;
-    }
-
-    public visitFields(fields: Field[]) { return fields.map((field) => this.visit(field.type)); }
-
-    public visitNull           (type: Null)            { return this.visitNullType(type);   }
-    public visitInt            (type: Int)             { return this.visitFlatType(type);   }
-    public visitFloat          (type: Float)           { return this.visitFlatType(type);   }
-    public visitBinary         (type: Binary)          { return this.visitFlatList(type);   }
-    public visitUtf8           (type: Utf8)            { return this.visitFlatList(type);   }
-    public visitBool           (type: Bool)            { return this.visitBoolType(type);   }
-    public visitDecimal        (type: Decimal)         { return this.visitFlatType(type);   }
-    public visitDate           (type: Date_)           { return this.visitFlatType(type);   }
-    public visitTime           (type: Time)            { return this.visitFlatType(type);   }
-    public visitTimestamp      (type: Timestamp)       { return this.visitFlatType(type);   }
-    public visitInterval       (type: Interval)        { return this.visitFlatType(type);   }
-    public visitList           (type: List)            { return this.visitListType(type);   }
-    public visitStruct         (type: Struct)          { return this.visitNestedType(type); }
-    public visitUnion          (type: Union)           { return this.visitUnionType(type);  }
-    public visitFixedSizeBinary(type: FixedSizeBinary) { return this.visitFlatType(type);   }
-    public visitFixedSizeList  (type: FixedSizeList)   { return this.visitFixedSizeListType(type); }
-    public visitMap            (type: Map_)            { return this.visitNestedType(type); }
-    public visitDictionary     (type: Dictionary)      {
-        return new DictionaryData(type, this.dictionaries.get(type.id)!, this.visit(type.indices));
-    }
-    protected getFieldMetadata() { return this.nodes.next().value; }
-    protected getBufferMetadata() { return this.buffers.next().value; }
-    protected readNullBitmap<T extends DataType>(type: T, nullCount: number, buffer = this.getBufferMetadata()) {
-        return nullCount > 0 && this.readData(type, buffer) || new Uint8Array(0);
-    }
-    protected abstract readData<T extends DataType>(type: T, buffer?: BufferMetadata): any;
-    protected abstract readOffsets<T extends DataType>(type: T, buffer?: BufferMetadata): any;
-    protected abstract readTypeIds<T extends DataType>(type: T, buffer?: BufferMetadata): any;
-    protected visitNullType(type: Null, { length, nullCount }: FieldMetadata = this.getFieldMetadata()) {
-        return new FlatData<any>(type, length, this.readNullBitmap(type, nullCount), new Uint8Array(0), 0, nullCount);
-    }
-    protected visitFlatType<T extends FlatType>(type: T, { length, nullCount }: FieldMetadata = this.getFieldMetadata()) {
-        return new FlatData<T>(type, length, this.readNullBitmap(type, nullCount), this.readData(type), 0, nullCount);
-    }
-    protected visitBoolType(type: Bool, { length, nullCount }: FieldMetadata = this.getFieldMetadata(), data?: Uint8Array) {
-        return new BoolData(type, length, this.readNullBitmap(type, nullCount), data || this.readData(type), 0, nullCount);
-    }
-    protected visitFlatList<T extends FlatListType>(type: T, { length, nullCount }: FieldMetadata = this.getFieldMetadata()) {
-        return new FlatListData<T>(type, length, this.readNullBitmap(type, nullCount), this.readOffsets(type), this.readData(type), 0, nullCount);
-    }
-    protected visitListType<T extends ListType>(type: T, { length, nullCount }: FieldMetadata = this.getFieldMetadata()) {
-        return new ListData<T>(type, length, this.readNullBitmap(type, nullCount), this.readOffsets(type), this.visit(type.children![0].type), 0, nullCount);
-    }
-    protected visitFixedSizeListType<T extends FixedSizeList>(type: T, { length, nullCount }: FieldMetadata = this.getFieldMetadata()) {
-        return new SingleNestedData<T>(type, length, this.readNullBitmap(type, nullCount), this.visit(type.children![0].type), 0, nullCount);
-    }
-    protected visitNestedType<T extends NestedType>(type: T, { length, nullCount }: FieldMetadata = this.getFieldMetadata()) {
-        return new NestedData<T>(type, length, this.readNullBitmap(type, nullCount), this.visitFields(type.children), 0, nullCount);
-    }
-    protected visitUnionType(type: DenseUnion | SparseUnion, { length, nullCount }: FieldMetadata = this.getFieldMetadata()) {
-        return type.mode === UnionMode.Sparse ?
-            new SparseUnionData(type as SparseUnion, length, this.readNullBitmap(type, nullCount), this.readTypeIds(type), this.visitFields(type.children), 0, nullCount) :
-            new DenseUnionData(type as DenseUnion, length, this.readNullBitmap(type, nullCount), this.readTypeIds(type), this.readOffsets(type), this.visitFields(type.children), 0, nullCount);
-    }
-}
diff --git a/js/src/ipc/whatwg/iterable.ts b/js/src/ipc/whatwg/iterable.ts
new file mode 100644
index 0000000000000..31916f2a3bdac
--- /dev/null
+++ b/js/src/ipc/whatwg/iterable.ts
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { toUint8Array } from '../../util/buffer';
+import { ReadableDOMStreamOptions } from '../../io/interfaces';
+import { isIterable, isAsyncIterable } from '../../util/compat';
+
+/** @ignore */
+export function toDOMStream<T>(source: Iterable<T> | AsyncIterable<T>, options?: ReadableDOMStreamOptions): ReadableStream<T> {
+    if (isAsyncIterable<T>(source)) { return asyncIterableAsReadableDOMStream(source, options); }
+    if (isIterable<T>(source)) { return iterableAsReadableDOMStream(source, options); }
+    /* istanbul ignore next */
+    throw new Error(`toDOMStream() must be called with an Iterable or AsyncIterable`);
+}
+
+/** @ignore */
+function iterableAsReadableDOMStream<T>(source: Iterable<T>, options?: ReadableDOMStreamOptions) {
+
+    let it: Iterator<T> | null = null;
+    const bm = (options && options.type === 'bytes') || false;
+    const hwm = options && options.highWaterMark || (2 ** 24);
+
+    return new ReadableStream<T>({
+        ...options as any,
+        start(controller) { next(controller, it || (it = source[Symbol.iterator]())); },
+        pull(controller) { it ? (next(controller, it)) : controller.close(); },
+        cancel() { (it && (it.return && it.return()) || true) && (it = null); }
+    }, { highWaterMark: bm ? hwm : undefined, ...options });
+
+    function next(controller: ReadableStreamDefaultController<T>, it: Iterator<T>) {
+        let buf: Uint8Array;
+        let r: IteratorResult<T> | null = null;
+        let size = controller.desiredSize || null;
+        while (!(r = it.next(bm ? size : null)).done) {
+            if (ArrayBuffer.isView(r.value) && (buf = toUint8Array(r.value))) {
+                size != null && bm && (size = size - buf.byteLength + 1);
+                r.value = <any> buf;
+            }
+            controller.enqueue(r.value);
+            if (size != null && --size <= 0) { return; }
+        }
+        controller.close();
+    }
+}
+
+/** @ignore */
+function asyncIterableAsReadableDOMStream<T>(source: AsyncIterable<T>, options?: ReadableDOMStreamOptions) {
+
+    let it: AsyncIterator<T> | null = null;
+    const bm = (options && options.type === 'bytes') || false;
+    const hwm = options && options.highWaterMark || (2 ** 24);
+
+    return new ReadableStream<T>({
+        ...options as any,
+        async start(controller) { await next(controller, it || (it = source[Symbol.asyncIterator]())); },
+        async pull(controller) { it ? (await next(controller, it)) : controller.close(); },
+        async cancel() { (it && (it.return && await it.return()) || true) && (it = null); },
+    }, { highWaterMark: bm ? hwm : undefined, ...options });
+
+    async function next(controller: ReadableStreamDefaultController<T>, it: AsyncIterator<T>) {
+        let buf: Uint8Array;
+        let r: IteratorResult<T> | null = null;
+        let size = controller.desiredSize || null;
+        while (!(r = await it.next(bm ? size : null)).done) {
+            if (ArrayBuffer.isView(r.value) && (buf = toUint8Array(r.value))) {
+                size != null && bm && (size = size - buf.byteLength + 1);
+                r.value = <any> buf;
+            }
+            controller.enqueue(r.value);
+            if (size != null && --size <= 0) { return; }
+        }
+        controller.close();
+    }
+}
diff --git a/js/src/ipc/whatwg/reader.ts b/js/src/ipc/whatwg/reader.ts
new file mode 100644
index 0000000000000..3e39900fe27e5
--- /dev/null
+++ b/js/src/ipc/whatwg/reader.ts
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { DataType } from '../../type';
+import { RecordBatch } from '../../recordbatch';
+import { AsyncByteQueue } from '../../io/stream';
+import { RecordBatchReader } from '../../ipc/reader';
+
+/** @ignore */
+export function recordBatchReaderThroughDOMStream<T extends { [key: string]: DataType } = any>(writableStrategy?: ByteLengthQueuingStrategy, readableStrategy?: { autoDestroy: boolean }) {
+
+    const queue = new AsyncByteQueue();
+    let reader: RecordBatchReader<T> | null = null;
+
+    const readable = new ReadableStream<RecordBatch<T>>({
+        async cancel() { await queue.close(); },
+        async start(controller) { await next(controller, reader || (reader = await open())); },
+        async pull(controller) { reader ? await next(controller, reader) : controller.close(); }
+    });
+
+    return { writable: new WritableStream(queue, { 'highWaterMark': 2 ** 14, ...writableStrategy }), readable };
+
+    async function open() {
+        return await (await RecordBatchReader.from(queue)).open(readableStrategy);
+    }
+
+    async function next(controller: ReadableStreamDefaultController<RecordBatch<T>>, reader: RecordBatchReader<T>) {
+        let size = controller.desiredSize;
+        let r: IteratorResult<RecordBatch<T>> | null = null;
+        while (!(r = await reader.next()).done) {
+            controller.enqueue(r.value);
+            if (size != null && --size <= 0) {
+                return;
+            }
+        }
+        controller.close();
+    }
+}
diff --git a/js/src/ipc/whatwg/writer.ts b/js/src/ipc/whatwg/writer.ts
new file mode 100644
index 0000000000000..de3b3f1d2474a
--- /dev/null
+++ b/js/src/ipc/whatwg/writer.ts
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { DataType } from '../../type';
+import { RecordBatch } from '../../recordbatch';
+import { AsyncByteStream } from '../../io/stream';
+import { RecordBatchWriter } from '../../ipc/writer';
+
+/** @ignore */
+export function recordBatchWriterThroughDOMStream<T extends { [key: string]: DataType } = any>(
+    this: typeof RecordBatchWriter,
+    writableStrategy?: QueuingStrategy<RecordBatch<T>> & { autoDestroy: boolean },
+    readableStrategy?: { highWaterMark?: number, size?: any }
+) {
+
+    const writer = new this<T>(writableStrategy);
+    const reader = new AsyncByteStream(writer);
+    const readable = new ReadableStream({
+        type: 'bytes',
+        async cancel() { await reader.cancel(); },
+        async pull(controller) { await next(controller); },
+        async start(controller) { await next(controller); },
+    }, { 'highWaterMark': 2 ** 14, ...readableStrategy });
+
+    return { writable: new WritableStream(writer, writableStrategy), readable };
+
+    async function next(controller: ReadableStreamDefaultController<Uint8Array>) {
+        let buf: Uint8Array | null = null;
+        let size = controller.desiredSize;
+        while (buf = await reader.read(size || null)) {
+            controller.enqueue(buf);
+            if (size != null && (size -= buf.byteLength) <= 0) { return; }
+        }
+        controller.close();
+    }
+}
diff --git a/js/src/ipc/writer.ts b/js/src/ipc/writer.ts
new file mode 100644
index 0000000000000..746e5ef58e369
--- /dev/null
+++ b/js/src/ipc/writer.ts
@@ -0,0 +1,417 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Table } from '../table';
+import { MAGIC } from './message';
+import { Vector } from '../vector';
+import { Column } from '../column';
+import { Schema, Field } from '../schema';
+import { Chunked } from '../vector/chunked';
+import { Message } from './metadata/message';
+import { RecordBatch } from '../recordbatch';
+import * as metadata from './metadata/message';
+import { DataType, Dictionary } from '../type';
+import { FileBlock, Footer } from './metadata/file';
+import { MessageHeader, MetadataVersion } from '../enum';
+import { WritableSink, AsyncByteQueue } from '../io/stream';
+import { VectorAssembler } from '../visitor/vectorassembler';
+import { JSONTypeAssembler } from '../visitor/jsontypeassembler';
+import { JSONVectorAssembler } from '../visitor/jsonvectorassembler';
+import { ArrayBufferViewInput, toUint8Array } from '../util/buffer';
+import { Writable, ReadableInterop, ReadableDOMStreamOptions } from '../io/interfaces';
+import { isPromise, isAsyncIterable, isWritableDOMStream, isWritableNodeStream } from '../util/compat';
+
+export class RecordBatchWriter<T extends { [key: string]: DataType } = any> extends ReadableInterop<Uint8Array> implements Writable<RecordBatch<T>> {
+
+    /** @nocollapse */
+    // @ts-ignore
+    public static throughNode(options?: import('stream').DuplexOptions & { autoDestroy: boolean }): import('stream').Duplex {
+        throw new Error(`"throughNode" not available in this environment`);
+    }
+    /** @nocollapse */
+    public static throughDOM<T extends { [key: string]: DataType }>(
+        // @ts-ignore
+        writableStrategy?: QueuingStrategy<RecordBatch<T>> & { autoDestroy: boolean },
+        // @ts-ignore
+        readableStrategy?: { highWaterMark?: number, size?: any }
+    ): { writable: WritableStream<Table<T> | RecordBatch<T>>, readable: ReadableStream<Uint8Array> } {
+        throw new Error(`"throughDOM" not available in this environment`);
+    }
+
+    constructor(options?: { autoDestroy: boolean }) {
+        super();
+        this._autoDestroy = options && (typeof options.autoDestroy === 'boolean') ? options.autoDestroy : true;
+    }
+
+    protected _position = 0;
+    protected _started = false;
+    protected _autoDestroy: boolean;
+    // @ts-ignore
+    protected _sink = new AsyncByteQueue();
+    protected _schema: Schema | null = null;
+    protected _dictionaryBlocks: FileBlock[] = [];
+    protected _recordBatchBlocks: FileBlock[] = [];
+
+    public toString(sync: true): string;
+    public toString(sync?: false): Promise<string>;
+    public toString(sync: any = false) {
+        return this._sink.toString(sync) as Promise<string> | string;
+    }
+    public toUint8Array(sync: true): Uint8Array;
+    public toUint8Array(sync?: false): Promise<Uint8Array>;
+    public toUint8Array(sync: any = false) {
+        return this._sink.toUint8Array(sync) as Promise<Uint8Array> | Uint8Array;
+    }
+
+    public writeAll(input: Table<T> | Iterable<RecordBatch<T>>): this;
+    public writeAll(input: AsyncIterable<RecordBatch<T>>): Promise<this>;
+    public writeAll(input: PromiseLike<AsyncIterable<RecordBatch<T>>>): Promise<this>;
+    public writeAll(input: PromiseLike<Table<T> | Iterable<RecordBatch<T>>>): Promise<this>;
+    public writeAll(input: PromiseLike<any> | Table<T> | Iterable<RecordBatch<T>> | AsyncIterable<RecordBatch<T>>) {
+        if (isPromise<any>(input)) {
+            return input.then((x) => this.writeAll(x));
+        } else if (isAsyncIterable<RecordBatch<T>>(input)) {
+            return writeAllAsync(this, input);
+        }
+        return writeAll(this, <any> input);
+    }
+
+    public get closed() { return this._sink.closed; }
+    public [Symbol.asyncIterator]() { return this._sink[Symbol.asyncIterator](); }
+    public toDOMStream(options?: ReadableDOMStreamOptions) { return this._sink.toDOMStream(options); }
+    public toNodeStream(options?: import('stream').ReadableOptions) { return this._sink.toNodeStream(options); }
+
+    public close() {
+        return this.reset()._sink.close();
+    }
+    public abort(reason?: any) {
+        return this.reset()._sink.abort(reason);
+    }
+    public finish() {
+        this._autoDestroy ? this.close() : this.reset(this._sink, this._schema);
+        return this;
+    }
+    public reset(sink: WritableSink<ArrayBufferViewInput> = this._sink, schema: Schema<T> | null = null) {
+
+        if ((sink === this._sink) || (sink instanceof AsyncByteQueue)) {
+            this._sink = sink as AsyncByteQueue;
+        } else {
+            this._sink = new AsyncByteQueue();
+            if (sink && isWritableDOMStream(sink)) {
+                this.toDOMStream({ type: 'bytes' }).pipeTo(sink);
+            } else if (sink && isWritableNodeStream(sink)) {
+                this.toNodeStream({ objectMode: false }).pipe(sink);
+            }
+        }
+
+        if (this._started && this._schema) {
+            this._writeFooter();
+        }
+
+        this._started = false;
+        this._dictionaryBlocks = [];
+        this._recordBatchBlocks = [];
+
+        if (!schema || (schema !== this._schema)) {
+            if (schema === null) {
+                this._position = 0;
+                this._schema = null;
+            } else {
+                this._started = true;
+                this._schema = schema;
+                this._writeSchema(schema);
+            }
+        }
+
+        return this;
+    }
+
+    public write(chunk?: Table<T> | RecordBatch<T> | null) {
+        let schema: Schema<T> | null;
+        if (!this._sink) {
+            throw new Error(`RecordBatchWriter is closed`);
+        } else if (!chunk || !(schema = chunk.schema)) {
+            return this.finish() && undefined;
+        } else if (schema !== this._schema) {
+            if (this._started && this._autoDestroy) {
+                return this.close();
+            }
+            this.reset(this._sink, schema);
+        }
+        (chunk instanceof Table)
+            ? this.writeAll(chunk.chunks)
+            : this._writeRecordBatch(chunk);
+    }
+
+    protected _writeMessage<T extends MessageHeader>(message: Message<T>, alignment = 8) {
+
+        const a = alignment - 1;
+        const buffer = Message.encode(message);
+        const flatbufferSize = buffer.byteLength;
+        const alignedSize = (flatbufferSize + 4 + a) & ~a;
+        const nPaddingBytes = alignedSize - flatbufferSize - 4;
+
+        if (message.headerType === MessageHeader.RecordBatch) {
+            this._recordBatchBlocks.push(new FileBlock(alignedSize, message.bodyLength, this._position));
+        } else if (message.headerType === MessageHeader.DictionaryBatch) {
+            this._dictionaryBlocks.push(new FileBlock(alignedSize, message.bodyLength, this._position));
+        }
+
+        // Write the flatbuffer size prefix including padding
+        this._write(Int32Array.of(alignedSize - 4));
+        // Write the flatbuffer
+        if (flatbufferSize > 0) { this._write(buffer); }
+        // Write any padding
+        return this._writePadding(nPaddingBytes);
+    }
+
+    protected _write(chunk: ArrayBufferViewInput) {
+        if (this._started) {
+            const buffer = toUint8Array(chunk);
+            if (buffer && buffer.byteLength > 0) {
+                this._sink.write(buffer);
+                this._position += buffer.byteLength;
+            }
+        }
+        return this;
+    }
+
+    protected _writeSchema(schema: Schema<T>) {
+        return this
+            ._writeMessage(Message.from(schema))
+            ._writeDictionaries(schema.dictionaryFields);
+    }
+
+    protected _writeFooter() {
+        return this._writePadding(4); // eos bytes
+    }
+
+    protected _writeMagic() {
+        return this._write(MAGIC);
+    }
+
+    protected _writePadding(nBytes: number) {
+        return nBytes > 0 ? this._write(new Uint8Array(nBytes)) : this;
+    }
+
+    protected _writeRecordBatch(records: RecordBatch<T>) {
+        const { byteLength, nodes, bufferRegions, buffers } = VectorAssembler.assemble(records);
+        const recordBatch = new metadata.RecordBatch(records.length, nodes, bufferRegions);
+        const message = Message.from(recordBatch, byteLength);
+        return this
+            ._writeMessage(message)
+            ._writeBodyBuffers(buffers);
+    }
+
+    protected _writeDictionaryBatch(dictionary: Vector, id: number, isDelta = false) {
+        const { byteLength, nodes, bufferRegions, buffers } = VectorAssembler.assemble(dictionary);
+        const recordBatch = new metadata.RecordBatch(dictionary.length, nodes, bufferRegions);
+        const dictionaryBatch = new metadata.DictionaryBatch(recordBatch, id, isDelta);
+        const message = Message.from(dictionaryBatch, byteLength);
+        return this
+            ._writeMessage(message)
+            ._writeBodyBuffers(buffers);
+    }
+
+    protected _writeBodyBuffers(buffers: ArrayBufferView[]) {
+        let buffer: ArrayBufferView;
+        let size: number, padding: number;
+        for (let i = -1, n = buffers.length; ++i < n;) {
+            if ((buffer = buffers[i]) && (size = buffer.byteLength) > 0) {
+                this._write(buffer);
+                if ((padding = ((size + 7) & ~7) - size) > 0) {
+                    this._writePadding(padding);
+                }
+            }
+        }
+        return this;
+    }
+
+    protected _writeDictionaries(dictionaryFields: Map<number, Field<Dictionary<any, any>>[]>) {
+        for (const [id, fields] of dictionaryFields) {
+            const vector = fields[0].type.dictionaryVector;
+            if (!(vector instanceof Chunked)) {
+                this._writeDictionaryBatch(vector, id, false);
+            } else {
+                const chunks = vector.chunks;
+                for (let i = -1, n = chunks.length; ++i < n;) {
+                    this._writeDictionaryBatch(chunks[i], id, i > 0);
+                }
+            }
+        }
+        return this;
+    }
+}
+
+/** @ignore */
+export class RecordBatchStreamWriter<T extends { [key: string]: DataType } = any> extends RecordBatchWriter<T> {
+
+    public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: Table<T> | Iterable<RecordBatch<T>>, options?: { autoDestroy: true }): RecordBatchStreamWriter<T>;
+    // @ts-ignore
+    public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: AsyncIterable<RecordBatch<T>>, options?: { autoDestroy: true }): Promise<RecordBatchStreamWriter<T>>;
+    public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: PromiseLike<AsyncIterable<RecordBatch<T>>>, options?: { autoDestroy: true }): Promise<RecordBatchStreamWriter<T>>;
+    public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: PromiseLike<Table<T> | Iterable<RecordBatch<T>>>, options?: { autoDestroy: true }): Promise<RecordBatchStreamWriter<T>>;
+    /** @nocollapse */
+    public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: any, options?: { autoDestroy: true }) {
+        return new RecordBatchStreamWriter<T>(options).writeAll(input);
+    }
+}
+
+/** @ignore */
+export class RecordBatchFileWriter<T extends { [key: string]: DataType } = any> extends RecordBatchWriter<T> {
+
+    public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: Table<T> | Iterable<RecordBatch<T>>): RecordBatchFileWriter<T>;
+    // @ts-ignore
+    public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: AsyncIterable<RecordBatch<T>>): Promise<RecordBatchFileWriter<T>>;
+    public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: PromiseLike<AsyncIterable<RecordBatch<T>>>): Promise<RecordBatchFileWriter<T>>;
+    public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: PromiseLike<Table<T> | Iterable<RecordBatch<T>>>): Promise<RecordBatchFileWriter<T>>;
+    /** @nocollapse */
+    public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: any) {
+        return new RecordBatchFileWriter<T>().writeAll(input);
+    }
+
+    constructor() {
+        super();
+        this._autoDestroy = true;
+    }
+
+    protected _writeSchema(schema: Schema<T>) {
+        return this
+            ._writeMagic()._writePadding(2)
+            ._writeDictionaries(schema.dictionaryFields);
+    }
+
+    protected _writeFooter() {
+        const buffer = Footer.encode(new Footer(
+            this._schema!, MetadataVersion.V4,
+            this._recordBatchBlocks, this._dictionaryBlocks
+        ));
+        return this
+            ._write(buffer) // Write the flatbuffer
+            ._write(Int32Array.of(buffer.byteLength)) // then the footer size suffix
+            ._writeMagic(); // then the magic suffix
+    }
+}
+
+/** @ignore */
+export class RecordBatchJSONWriter<T extends { [key: string]: DataType } = any> extends RecordBatchWriter<T> {
+
+    public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: Table<T> | Iterable<RecordBatch<T>>): RecordBatchJSONWriter<T>;
+    // @ts-ignore
+    public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: AsyncIterable<RecordBatch<T>>): Promise<RecordBatchJSONWriter<T>>;
+    public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: PromiseLike<AsyncIterable<RecordBatch<T>>>): Promise<RecordBatchJSONWriter<T>>;
+    public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: PromiseLike<Table<T> | Iterable<RecordBatch<T>>>): Promise<RecordBatchJSONWriter<T>>;
+    /** @nocollapse */
+    public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: any) {
+        return new RecordBatchJSONWriter<T>().writeAll(input as any);
+    }
+
+    constructor() {
+        super();
+        this._autoDestroy = true;
+    }
+
+    protected _writeMessage() { return this; }
+    protected _writeSchema(schema: Schema<T>) {
+        return this._write(`{\n  "schema": ${
+            JSON.stringify({ fields: schema.fields.map(fieldToJSON) }, null, 2)
+        }`)._writeDictionaries(schema.dictionaryFields);
+    }
+    protected _writeDictionaries(dictionaryFields: Map<number, Field<Dictionary<any, any>>[]>) {
+        this._write(`,\n  "dictionaries": [\n`);
+        super._writeDictionaries(dictionaryFields);
+        return this._write(`\n  ]`);
+    }
+    protected _writeDictionaryBatch(dictionary: Vector, id: number, isDelta = false) {
+        this._write(this._dictionaryBlocks.length === 0 ? `    ` : `,\n    `);
+        this._write(`${dictionaryBatchToJSON(this._schema!, dictionary, id, isDelta)}`);
+        this._dictionaryBlocks.push(new FileBlock(0, 0, 0));
+        return this;
+    }
+    protected _writeRecordBatch(records: RecordBatch<T>) {
+        this._write(this._recordBatchBlocks.length === 0
+            ? `,\n  "batches": [\n    `
+            : `,\n    `);
+        this._write(`${recordBatchToJSON(records)}`);
+        this._recordBatchBlocks.push(new FileBlock(0, 0, 0));
+        return this;
+    }
+    public close() {
+        if (this._recordBatchBlocks.length > 0) {
+            this._write(`\n  ]`);
+        }
+        if (this._schema) {
+            this._write(`\n}`);
+        }
+        return super.close();
+    }
+}
+
+/** @ignore */
+function writeAll<T extends { [key: string]: DataType } = any>(writer: RecordBatchWriter<T>, input: Table<T> | Iterable<RecordBatch<T>>) {
+    const chunks = (input instanceof Table) ? input.chunks : input;
+    for (const batch of chunks) {
+        writer.write(batch);
+    }
+    return writer.finish();
+}
+
+/** @ignore */
+async function writeAllAsync<T extends { [key: string]: DataType } = any>(writer: RecordBatchWriter<T>, batches: AsyncIterable<RecordBatch<T>>) {
+    for await (const batch of batches) {
+        writer.write(batch);
+    }
+    return writer.finish();
+}
+
+/** @ignore */
+function fieldToJSON({ name, type, nullable }: Field): object {
+    const assembler = new JSONTypeAssembler();
+    return {
+        'name': name, 'nullable': nullable,
+        'type': assembler.visit(type),
+        'children': (type.children || []).map(fieldToJSON),
+        'dictionary': !DataType.isDictionary(type) ? undefined : {
+            'id': type.id,
+            'isOrdered': type.isOrdered,
+            'indexType': assembler.visit(type.indices)
+        }
+    };
+}
+
+/** @ignore */
+function dictionaryBatchToJSON(schema: Schema, dictionary: Vector, id: number, isDelta = false) {
+    const f = schema.dictionaryFields.get(id)![0];
+    const field = new Field(f.name, f.type.dictionary, f.nullable, f.metadata);
+    const columns = JSONVectorAssembler.assemble(new Column(field, [dictionary]));
+    return JSON.stringify({
+        'id': id,
+        'isDelta': isDelta,
+        'data': {
+            'count': dictionary.length,
+            'columns': columns
+        }
+    }, null, 2);
+}
+
+/** @ignore */
+function recordBatchToJSON(records: RecordBatch) {
+    return JSON.stringify({
+        'count': records.length,
+        'columns': JSONVectorAssembler.assemble(records)
+    }, null, 2);
+}
diff --git a/js/src/ipc/writer/binary.ts b/js/src/ipc/writer/binary.ts
deleted file mode 100644
index df7c586d94ab5..0000000000000
--- a/js/src/ipc/writer/binary.ts
+++ /dev/null
@@ -1,725 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import { Table } from '../../table';
-import { DenseUnionData } from '../../data';
-import { RecordBatch } from '../../recordbatch';
-import { VectorVisitor, TypeVisitor } from '../../visitor';
-import { MAGIC, magicLength, magicAndPadding, PADDING } from '../magic';
-import { align, getBool, packBools, iterateBits } from '../../util/bit';
-import { Vector, UnionVector, DictionaryVector, NestedVector, ListVector } from '../../vector';
-import { BufferMetadata, FieldMetadata, Footer, FileBlock, Message, RecordBatchMetadata, DictionaryBatch } from '../metadata';
-import {
-    Schema, Field, TypedArray, MetadataVersion,
-    DataType,
-    Dictionary,
-    Null, Int, Float,
-    Binary, Bool, Utf8, Decimal,
-    Date_, Time, Timestamp, Interval,
-    List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_,
-    FlatType, FlatListType, NestedType, UnionMode, SparseUnion, DenseUnion, SingleNestedType,
-} from '../../type';
-
-export function* serializeStream(table: Table) {
-    yield serializeMessage(table.schema).buffer;
-    for (const [id, field] of table.schema.dictionaries) {
-        const vec = table.getColumn(field.name) as any as DictionaryVector;
-        if (vec && vec.dictionary) {
-            yield serializeDictionaryBatch(vec.dictionary, id).buffer;
-        }
-    }
-    for (const recordBatch of table.batches) {
-        yield serializeRecordBatch(recordBatch).buffer;
-    }
-}
-
-export function* serializeFile(table: Table) {
-
-    const recordBatches = [];
-    const dictionaryBatches = [];
-
-    // First yield the magic string (aligned)
-    let buffer = new Uint8Array(align(magicLength, 8));
-    let metadataLength, bodyLength, byteLength = buffer.byteLength;
-    buffer.set(MAGIC, 0);
-    yield buffer;
-
-    // Then yield the schema
-    ({ metadataLength, buffer } = serializeMessage(table.schema));
-    byteLength += buffer.byteLength;
-    yield buffer;
-
-    for (const [id, field] of table.schema.dictionaries) {
-        const vec = table.getColumn(field.name) as any as DictionaryVector;
-        if (vec && vec.dictionary) {
-            ({ metadataLength, bodyLength, buffer } = serializeDictionaryBatch(vec.dictionary, id));
-            dictionaryBatches.push(new FileBlock(metadataLength, bodyLength, byteLength));
-            byteLength += buffer.byteLength;
-            yield buffer;
-        }
-    }
-    for (const recordBatch of table.batches) {
-        ({ metadataLength, bodyLength, buffer } = serializeRecordBatch(recordBatch));
-        recordBatches.push(new FileBlock(metadataLength, bodyLength, byteLength));
-        byteLength += buffer.byteLength;
-        yield buffer;
-    }
-
-    // Then yield the footer metadata (not aligned)
-    ({ metadataLength, buffer } = serializeFooter(new Footer(dictionaryBatches, recordBatches, table.schema)));
-    yield buffer;
-
-    // Last, yield the footer length + terminating magic arrow string (aligned)
-    buffer = new Uint8Array(magicAndPadding);
-    new DataView(buffer.buffer).setInt32(0, metadataLength, platformIsLittleEndian);
-    buffer.set(MAGIC, buffer.byteLength - magicLength);
-    yield buffer;
-}
-
-export function serializeRecordBatch(recordBatch: RecordBatch) {
-    const { byteLength, fieldNodes, buffers, buffersMeta } = new RecordBatchSerializer().visitRecordBatch(recordBatch);
-    const rbMeta = new RecordBatchMetadata(MetadataVersion.V4, recordBatch.length, fieldNodes, buffersMeta);
-    const rbData = concatBuffersWithMetadata(byteLength, buffers, buffersMeta);
-    return serializeMessage(rbMeta, rbData);
-}
-
-export function serializeDictionaryBatch(dictionary: Vector, id: Long | number, isDelta: boolean = false) {
-    const { byteLength, fieldNodes, buffers, buffersMeta } = new RecordBatchSerializer().visitRecordBatch(RecordBatch.from([dictionary]));
-    const rbMeta = new RecordBatchMetadata(MetadataVersion.V4, dictionary.length, fieldNodes, buffersMeta);
-    const dbMeta = new DictionaryBatch(MetadataVersion.V4, rbMeta, id, isDelta);
-    const rbData = concatBuffersWithMetadata(byteLength, buffers, buffersMeta);
-    return serializeMessage(dbMeta, rbData);
-}
-
-export function serializeMessage(message: Message, data?: Uint8Array) {
-    const b = new Builder();
-    _Message.finishMessageBuffer(b, writeMessage(b, message));
-    // Slice out the buffer that contains the message metadata
-    const metadataBytes = b.asUint8Array();
-    // Reserve 4 bytes for writing the message size at the front.
-    // Metadata length includes the metadata byteLength + the 4
-    // bytes for the length, and rounded up to the nearest 8 bytes.
-    const metadataLength = align(PADDING + metadataBytes.byteLength, 8);
-    // + the length of the optional data buffer at the end, padded
-    const dataByteLength = data ? data.byteLength : 0;
-    // ensure the entire message is aligned to an 8-byte boundary
-    const messageBytes = new Uint8Array(align(metadataLength + dataByteLength, 8));
-    // Write the metadata length into the first 4 bytes, but subtract the
-    // bytes we use to hold the length itself.
-    new DataView(messageBytes.buffer).setInt32(0, metadataLength - PADDING, platformIsLittleEndian);
-    // Copy the metadata bytes into the message buffer
-    messageBytes.set(metadataBytes, PADDING);
-    // Copy the optional data buffer after the metadata bytes
-    (data && dataByteLength > 0) && messageBytes.set(data, metadataLength);
-    // if (messageBytes.byteLength % 8 !== 0) { debugger; }
-    // Return the metadata length because we need to write it into each FileBlock also
-    return { metadataLength, bodyLength: message.bodyLength, buffer: messageBytes };
-}
-
-export function serializeFooter(footer: Footer) {
-    const b = new Builder();
-    _Footer.finishFooterBuffer(b, writeFooter(b, footer));
-    // Slice out the buffer that contains the footer metadata
-    const footerBytes = b.asUint8Array();
-    const metadataLength = footerBytes.byteLength;
-    return { metadataLength, buffer: footerBytes };
-}
-
-export class RecordBatchSerializer extends VectorVisitor {
-    public byteLength = 0;
-    public buffers: TypedArray[] = [];
-    public fieldNodes: FieldMetadata[] = [];
-    public buffersMeta: BufferMetadata[] = [];
-    public visitRecordBatch(recordBatch: RecordBatch) {
-        this.buffers = [];
-        this.byteLength = 0;
-        this.fieldNodes = [];
-        this.buffersMeta = [];
-        for (let vector: Vector, index = -1, numCols = recordBatch.numCols; ++index < numCols;) {
-            if (vector = recordBatch.getChildAt(index)!) {
-                this.visit(vector);
-            }
-        }
-        return this;
-    }
-    public visit<T extends DataType>(vector: Vector<T>) {
-        if (!DataType.isDictionary(vector.type)) {
-            const { data, length, nullCount } = vector;
-            if (length > 2147483647) {
-                throw new RangeError('Cannot write arrays larger than 2^31 - 1 in length');
-            }
-            this.fieldNodes.push(new FieldMetadata(length, nullCount));
-            this.addBuffer(nullCount <= 0
-                ? new Uint8Array(0) // placeholder validity buffer
-                : this.getTruncatedBitmap(data.offset, length, data.nullBitmap!)
-            );
-        }
-        return super.visit(vector);
-    }
-    public visitNull           (_nullz: Vector<Null>)            { return this;                              }
-    public visitBool           (vector: Vector<Bool>)            { return this.visitBoolVector(vector);      }
-    public visitInt            (vector: Vector<Int>)             { return this.visitFlatVector(vector);      }
-    public visitFloat          (vector: Vector<Float>)           { return this.visitFlatVector(vector);      }
-    public visitUtf8           (vector: Vector<Utf8>)            { return this.visitFlatListVector(vector);  }
-    public visitBinary         (vector: Vector<Binary>)          { return this.visitFlatListVector(vector);  }
-    public visitDate           (vector: Vector<Date_>)           { return this.visitFlatVector(vector);      }
-    public visitTimestamp      (vector: Vector<Timestamp>)       { return this.visitFlatVector(vector);      }
-    public visitTime           (vector: Vector<Time>)            { return this.visitFlatVector(vector);      }
-    public visitDecimal        (vector: Vector<Decimal>)         { return this.visitFlatVector(vector);      }
-    public visitInterval       (vector: Vector<Interval>)        { return this.visitFlatVector(vector);      }
-    public visitList           (vector: Vector<List>)            { return this.visitListVector(vector);      }
-    public visitStruct         (vector: Vector<Struct>)          { return this.visitNestedVector(vector);    }
-    public visitFixedSizeBinary(vector: Vector<FixedSizeBinary>) { return this.visitFlatVector(vector);      }
-    public visitFixedSizeList  (vector: Vector<FixedSizeList>)   { return this.visitListVector(vector);      }
-    public visitMap            (vector: Vector<Map_>)            { return this.visitNestedVector(vector);    }
-    public visitDictionary     (vector: DictionaryVector)        {
-        // Dictionary written out separately. Slice offset contained in the indices
-        return this.visit(vector.indices);
-    }
-    public visitUnion(vector: Vector<DenseUnion | SparseUnion>) {
-        const { data, type, length } = vector;
-        const { offset: sliceOffset, typeIds } = data;
-        // All Union Vectors have a typeIds buffer
-        this.addBuffer(typeIds);
-        // If this is a Sparse Union, treat it like all other Nested types
-        if (type.mode === UnionMode.Sparse) {
-            return this.visitNestedVector(vector);
-        } else if (type.mode === UnionMode.Dense) {
-            // If this is a Dense Union, add the valueOffsets buffer and potentially slice the children
-            const valueOffsets = (data as DenseUnionData).valueOffsets;
-            if (sliceOffset <= 0) {
-                // If the Vector hasn't been sliced, write the existing valueOffsets
-                this.addBuffer(valueOffsets);
-                // We can treat this like all other Nested types
-                return this.visitNestedVector(vector);
-            } else {
-                // A sliced Dense Union is an unpleasant case. Because the offsets are different for
-                // each child vector, we need to "rebase" the valueOffsets for each child
-                // Union typeIds are not necessary 0-indexed
-                const maxChildTypeId = Math.max(...type.typeIds);
-                const childLengths = new Int32Array(maxChildTypeId + 1);
-                // Set all to -1 to indicate that we haven't observed a first occurrence of a particular child yet
-                const childOffsets = new Int32Array(maxChildTypeId + 1).fill(-1);
-                const shiftedOffsets = new Int32Array(length);
-                const unshiftedOffsets = this.getZeroBasedValueOffsets(0, length, valueOffsets);
-                for (let typeId, shift, index = -1; ++index < length;) {
-                    typeId = typeIds[index];
-                    // ~(-1) used to be faster than x === -1, so maybe worth benchmarking the difference of these two impls for large dense unions:
-                    // ~(shift = childOffsets[typeId]) || (shift = childOffsets[typeId] = unshiftedOffsets[index]);
-                    // Going with this form for now, as it's more readable
-                    if ((shift = childOffsets[typeId]) === -1) {
-                        shift = childOffsets[typeId] = unshiftedOffsets[typeId];
-                    }
-                    shiftedOffsets[index] = unshiftedOffsets[index] - shift;
-                    ++childLengths[typeId];
-                }
-                this.addBuffer(shiftedOffsets);
-                // Slice and visit children accordingly
-                for (let childIndex = -1, numChildren = type.children.length; ++childIndex < numChildren;) {
-                    const typeId = type.typeIds[childIndex];
-                    const child = (vector as UnionVector).getChildAt(childIndex)!;
-                    this.visit(child.slice(childOffsets[typeId], Math.min(length, childLengths[typeId])));
-                }
-            }
-        }
-        return this;
-    }
-    protected visitBoolVector(vector: Vector<Bool>) {
-        // Bool vector is a special case of FlatVector, as its data buffer needs to stay packed
-        let bitmap: Uint8Array;
-        let values, { data, length } = vector;
-        if (vector.nullCount >= length) {
-            // If all values are null, just insert a placeholder empty data buffer (fastest path)
-            bitmap = new Uint8Array(0);
-        } else if (!((values = data.values) instanceof Uint8Array)) {
-            // Otherwise if the underlying data *isn't* a Uint8Array, enumerate
-            // the values as bools and re-pack them into a Uint8Array (slow path)
-            bitmap = packBools(vector);
-        } else {
-            // otherwise just slice the bitmap (fast path)
-            bitmap = this.getTruncatedBitmap(data.offset, length, values);
-        }
-        return this.addBuffer(bitmap);
-    }
-    protected visitFlatVector<T extends FlatType>(vector: Vector<T>) {
-        const { view, data } = vector;
-        const { length, values } = data;
-        const scaledLength = length * ((view as any).size || 1);
-        return this.addBuffer(values.subarray(0, scaledLength));
-    }
-    protected visitFlatListVector<T extends FlatListType>(vector: Vector<T>) {
-        const { data, length } = vector;
-        const { values, valueOffsets } = data;
-        const firstOffset = valueOffsets[0];
-        const lastOffset = valueOffsets[length];
-        const byteLength = Math.min(lastOffset - firstOffset, values.byteLength - firstOffset);
-        // Push in the order FlatList types read their buffers
-        // valueOffsets buffer first
-        this.addBuffer(this.getZeroBasedValueOffsets(0, length, valueOffsets));
-        // sliced values buffer second
-        this.addBuffer(values.subarray(firstOffset, firstOffset + byteLength));
-        return this;
-    }
-    protected visitListVector<T extends SingleNestedType>(vector: Vector<T>) {
-        const { data, length } = vector;
-        const { valueOffsets } = <any> data;
-        // If we have valueOffsets (ListVector), push that buffer first
-        if (valueOffsets) {
-            this.addBuffer(this.getZeroBasedValueOffsets(0, length, valueOffsets));
-        }
-        // Then insert the List's values child
-        return this.visit((vector as any as ListVector<T>).getChildAt(0)!);
-    }
-    protected visitNestedVector<T extends NestedType>(vector: Vector<T>) {
-        // Visit the children accordingly
-        const numChildren = (vector.type.children || []).length;
-        for (let child: Vector | null, childIndex = -1; ++childIndex < numChildren;) {
-            if (child = (vector as NestedVector<T>).getChildAt(childIndex)) {
-                this.visit(child);
-            }
-        }
-        return this;
-    }
-    protected addBuffer(values: TypedArray) {
-        const byteLength = align(values.byteLength, 8);
-        this.buffers.push(values);
-        this.buffersMeta.push(new BufferMetadata(this.byteLength, byteLength));
-        this.byteLength += byteLength;
-        return this;
-    }
-    protected getTruncatedBitmap(offset: number, length: number, bitmap: Uint8Array) {
-        const alignedLength = align(bitmap.byteLength, 8);
-        if (offset > 0 || bitmap.byteLength < alignedLength) {
-            // With a sliced array / non-zero offset, we have to copy the bitmap
-            const bytes = new Uint8Array(alignedLength);
-            bytes.set(
-                (offset % 8 === 0)
-                // If the slice offset is aligned to 1 byte, it's safe to slice the nullBitmap directly
-                ? bitmap.subarray(offset >> 3)
-                // iterate each bit starting from the slice offset, and repack into an aligned nullBitmap
-                : packBools(iterateBits(bitmap, offset, length, null, getBool))
-            );
-            return bytes;
-        }
-        return bitmap;
-    }
-    protected getZeroBasedValueOffsets(offset: number, length: number, valueOffsets: Int32Array) {
-        // If we have a non-zero offset, then the value offsets do not start at
-        // zero. We must a) create a new offsets array with shifted offsets and
-        // b) slice the values array accordingly
-        if (offset > 0 || valueOffsets[0] !== 0) {
-            const startOffset = valueOffsets[0];
-            const destOffsets = new Int32Array(length + 1);
-            for (let index = -1; ++index < length;) {
-                destOffsets[index] = valueOffsets[index] - startOffset;
-            }
-            // Final offset
-            destOffsets[length] = valueOffsets[length] - startOffset;
-            return destOffsets;
-        }
-        return valueOffsets;
-    }
-}
-
-import { flatbuffers } from 'flatbuffers';
-import Long = flatbuffers.Long;
-import Builder = flatbuffers.Builder;
-import * as File_ from '../../fb/File';
-import * as Schema_ from '../../fb/Schema';
-import * as Message_ from '../../fb/Message';
-
-import _Block = File_.org.apache.arrow.flatbuf.Block;
-import _Footer = File_.org.apache.arrow.flatbuf.Footer;
-import _Field = Schema_.org.apache.arrow.flatbuf.Field;
-import _Schema = Schema_.org.apache.arrow.flatbuf.Schema;
-import _Buffer = Schema_.org.apache.arrow.flatbuf.Buffer;
-import _Message = Message_.org.apache.arrow.flatbuf.Message;
-import _KeyValue = Schema_.org.apache.arrow.flatbuf.KeyValue;
-import _FieldNode = Message_.org.apache.arrow.flatbuf.FieldNode;
-import _RecordBatch = Message_.org.apache.arrow.flatbuf.RecordBatch;
-import _DictionaryBatch = Message_.org.apache.arrow.flatbuf.DictionaryBatch;
-import _DictionaryEncoding = Schema_.org.apache.arrow.flatbuf.DictionaryEncoding;
-import _Endianness = Schema_.org.apache.arrow.flatbuf.Endianness;
-
-import _Null = Schema_.org.apache.arrow.flatbuf.Null;
-import _Int = Schema_.org.apache.arrow.flatbuf.Int;
-import _FloatingPoint = Schema_.org.apache.arrow.flatbuf.FloatingPoint;
-import _Binary = Schema_.org.apache.arrow.flatbuf.Binary;
-import _Bool = Schema_.org.apache.arrow.flatbuf.Bool;
-import _Utf8 = Schema_.org.apache.arrow.flatbuf.Utf8;
-import _Decimal = Schema_.org.apache.arrow.flatbuf.Decimal;
-import _Date = Schema_.org.apache.arrow.flatbuf.Date;
-import _Time = Schema_.org.apache.arrow.flatbuf.Time;
-import _Timestamp = Schema_.org.apache.arrow.flatbuf.Timestamp;
-import _Interval = Schema_.org.apache.arrow.flatbuf.Interval;
-import _List = Schema_.org.apache.arrow.flatbuf.List;
-import _Struct = Schema_.org.apache.arrow.flatbuf.Struct_;
-import _Union = Schema_.org.apache.arrow.flatbuf.Union;
-import _FixedSizeBinary = Schema_.org.apache.arrow.flatbuf.FixedSizeBinary;
-import _FixedSizeList = Schema_.org.apache.arrow.flatbuf.FixedSizeList;
-import _Map = Schema_.org.apache.arrow.flatbuf.Map;
-
-export class TypeSerializer extends TypeVisitor {
-    constructor(protected builder: Builder) {
-        super();
-    }
-    public visitNull(_node: Null) {
-        const b = this.builder;
-        return (
-            _Null.startNull(b) ||
-            _Null.endNull(b)
-        );
-    }
-    public visitInt(node: Int) {
-        const b = this.builder;
-        return (
-            _Int.startInt(b) ||
-            _Int.addBitWidth(b, node.bitWidth) ||
-            _Int.addIsSigned(b, node.isSigned) ||
-            _Int.endInt(b)
-        );
-    }
-    public visitFloat(node: Float) {
-        const b = this.builder;
-        return (
-            _FloatingPoint.startFloatingPoint(b) ||
-            _FloatingPoint.addPrecision(b, node.precision) ||
-            _FloatingPoint.endFloatingPoint(b)
-        );
-    }
-    public visitBinary(_node: Binary) {
-        const b = this.builder;
-        return (
-            _Binary.startBinary(b) ||
-            _Binary.endBinary(b)
-        );
-    }
-    public visitBool(_node: Bool) {
-        const b = this.builder;
-        return (
-            _Bool.startBool(b) ||
-            _Bool.endBool(b)
-        );
-    }
-    public visitUtf8(_node: Utf8) {
-        const b = this.builder;
-        return (
-            _Utf8.startUtf8(b) ||
-            _Utf8.endUtf8(b)
-        );
-    }
-    public visitDecimal(node: Decimal) {
-        const b = this.builder;
-        return (
-            _Decimal.startDecimal(b) ||
-            _Decimal.addScale(b, node.scale) ||
-            _Decimal.addPrecision(b, node.precision) ||
-            _Decimal.endDecimal(b)
-        );
-    }
-    public visitDate(node: Date_) {
-        const b = this.builder;
-        return _Date.startDate(b) || _Date.addUnit(b, node.unit) || _Date.endDate(b);
-    }
-    public visitTime(node: Time) {
-        const b = this.builder;
-        return (
-            _Time.startTime(b) ||
-            _Time.addUnit(b, node.unit) ||
-            _Time.addBitWidth(b, node.bitWidth) ||
-            _Time.endTime(b)
-        );
-    }
-    public visitTimestamp(node: Timestamp) {
-        const b = this.builder;
-        const timezone = (node.timezone && b.createString(node.timezone)) || undefined;
-        return (
-            _Timestamp.startTimestamp(b) ||
-            _Timestamp.addUnit(b, node.unit) ||
-            (timezone !== undefined && _Timestamp.addTimezone(b, timezone)) ||
-            _Timestamp.endTimestamp(b)
-        );
-    }
-    public visitInterval(node: Interval) {
-        const b = this.builder;
-        return (
-            _Interval.startInterval(b) || _Interval.addUnit(b, node.unit) || _Interval.endInterval(b)
-        );
-    }
-    public visitList(_node: List) {
-        const b = this.builder;
-        return (
-            _List.startList(b) ||
-            _List.endList(b)
-        );
-    }
-    public visitStruct(_node: Struct) {
-        const b = this.builder;
-        return (
-            _Struct.startStruct_(b) ||
-            _Struct.endStruct_(b)
-        );
-    }
-    public visitUnion(node: Union) {
-        const b = this.builder;
-        const typeIds =
-            _Union.startTypeIdsVector(b, node.typeIds.length) ||
-            _Union.createTypeIdsVector(b, node.typeIds);
-        return (
-            _Union.startUnion(b) ||
-            _Union.addMode(b, node.mode) ||
-            _Union.addTypeIds(b, typeIds) ||
-            _Union.endUnion(b)
-        );
-    }
-    public visitDictionary(node: Dictionary) {
-        const b = this.builder;
-        const indexType = this.visit(node.indices);
-        return (
-            _DictionaryEncoding.startDictionaryEncoding(b) ||
-            _DictionaryEncoding.addId(b, new Long(node.id, 0)) ||
-            _DictionaryEncoding.addIsOrdered(b, node.isOrdered) ||
-            (indexType !== undefined && _DictionaryEncoding.addIndexType(b, indexType)) ||
-            _DictionaryEncoding.endDictionaryEncoding(b)
-        );
-    }
-    public visitFixedSizeBinary(node: FixedSizeBinary) {
-        const b = this.builder;
-        return (
-            _FixedSizeBinary.startFixedSizeBinary(b) ||
-            _FixedSizeBinary.addByteWidth(b, node.byteWidth) ||
-            _FixedSizeBinary.endFixedSizeBinary(b)
-        );
-    }
-    public visitFixedSizeList(node: FixedSizeList) {
-        const b = this.builder;
-        return (
-            _FixedSizeList.startFixedSizeList(b) ||
-            _FixedSizeList.addListSize(b, node.listSize) ||
-            _FixedSizeList.endFixedSizeList(b)
-        );
-    }
-    public visitMap(node: Map_) {
-        const b = this.builder;
-        return (
-            _Map.startMap(b) ||
-            _Map.addKeysSorted(b, node.keysSorted) ||
-            _Map.endMap(b)
-        );
-    }
-}
-
-function concatBuffersWithMetadata(totalByteLength: number, buffers: Uint8Array[], buffersMeta: BufferMetadata[]) {
-    const data = new Uint8Array(totalByteLength);
-    for (let i = -1, n = buffers.length; ++i < n;) {
-        const { offset, length } = buffersMeta[i];
-        const { buffer, byteOffset, byteLength } = buffers[i];
-        const realBufferLength = Math.min(length, byteLength);
-        if (realBufferLength > 0) {
-            data.set(new Uint8Array(buffer, byteOffset, realBufferLength), offset);
-        }
-    }
-    return data;
-}
-
-function writeFooter(b: Builder, node: Footer) {
-    let schemaOffset = writeSchema(b, node.schema);
-    let recordBatches = (node.recordBatches || []);
-    let dictionaryBatches = (node.dictionaryBatches || []);
-    let recordBatchesOffset =
-        _Footer.startRecordBatchesVector(b, recordBatches.length) ||
-            mapReverse(recordBatches, (rb) => writeBlock(b, rb)) &&
-        b.endVector();
-
-    let dictionaryBatchesOffset =
-        _Footer.startDictionariesVector(b, dictionaryBatches.length) ||
-            mapReverse(dictionaryBatches, (db) => writeBlock(b, db)) &&
-        b.endVector();
-
-    return (
-        _Footer.startFooter(b) ||
-        _Footer.addSchema(b, schemaOffset) ||
-        _Footer.addVersion(b, node.schema.version) ||
-        _Footer.addRecordBatches(b, recordBatchesOffset) ||
-        _Footer.addDictionaries(b, dictionaryBatchesOffset) ||
-        _Footer.endFooter(b)
-    );
-}
-
-function writeBlock(b: Builder, node: FileBlock) {
-    return _Block.createBlock(b,
-        new Long(node.offset, 0),
-        node.metaDataLength,
-        new Long(node.bodyLength, 0)
-    );
-}
-
-function writeMessage(b: Builder, node: Message) {
-    let messageHeaderOffset = 0;
-    if (Message.isSchema(node)) {
-        messageHeaderOffset = writeSchema(b, node as Schema);
-    } else if (Message.isRecordBatch(node)) {
-        messageHeaderOffset = writeRecordBatch(b, node as RecordBatchMetadata);
-    } else if (Message.isDictionaryBatch(node)) {
-        messageHeaderOffset = writeDictionaryBatch(b, node as DictionaryBatch);
-    }
-    return (
-        _Message.startMessage(b) ||
-        _Message.addVersion(b, node.version) ||
-        _Message.addHeader(b, messageHeaderOffset) ||
-        _Message.addHeaderType(b, node.headerType) ||
-        _Message.addBodyLength(b, new Long(node.bodyLength, 0)) ||
-        _Message.endMessage(b)
-    );
-}
-
-function writeSchema(b: Builder, node: Schema) {
-
-    const fieldOffsets = node.fields.map((f) => writeField(b, f));
-    const fieldsOffset =
-        _Schema.startFieldsVector(b, fieldOffsets.length) ||
-        _Schema.createFieldsVector(b, fieldOffsets);
-
-    let metadata: number | undefined = undefined;
-    if (node.metadata && node.metadata.size > 0) {
-        metadata = _Schema.createCustomMetadataVector(
-            b,
-            [...node.metadata].map(([k, v]) => {
-                const key = b.createString(`${k}`);
-                const val = b.createString(`${v}`);
-                return (
-                    _KeyValue.startKeyValue(b) ||
-                    _KeyValue.addKey(b, key) ||
-                    _KeyValue.addValue(b, val) ||
-                    _KeyValue.endKeyValue(b)
-                );
-            })
-        );
-    }
-
-    return (
-        _Schema.startSchema(b) ||
-        _Schema.addFields(b, fieldsOffset) ||
-        _Schema.addEndianness(b, platformIsLittleEndian ? _Endianness.Little : _Endianness.Big) ||
-        (metadata !== undefined && _Schema.addCustomMetadata(b, metadata)) ||
-        _Schema.endSchema(b)
-    );
-}
-
-function writeRecordBatch(b: Builder, node: RecordBatchMetadata) {
-    let nodes = (node.nodes || []);
-    let buffers = (node.buffers || []);
-    let nodesOffset =
-        _RecordBatch.startNodesVector(b, nodes.length) ||
-        mapReverse(nodes, (n) => writeFieldNode(b, n)) &&
-        b.endVector();
-
-    let buffersOffset =
-        _RecordBatch.startBuffersVector(b, buffers.length) ||
-        mapReverse(buffers, (b_) => writeBuffer(b, b_)) &&
-        b.endVector();
-
-    return (
-        _RecordBatch.startRecordBatch(b) ||
-        _RecordBatch.addLength(b, new Long(node.length, 0)) ||
-        _RecordBatch.addNodes(b, nodesOffset) ||
-        _RecordBatch.addBuffers(b, buffersOffset) ||
-        _RecordBatch.endRecordBatch(b)
-    );
-}
-
-function writeDictionaryBatch(b: Builder, node: DictionaryBatch) {
-    const dataOffset = writeRecordBatch(b, node.data);
-    return (
-        _DictionaryBatch.startDictionaryBatch(b) ||
-        _DictionaryBatch.addId(b, new Long(node.id, 0)) ||
-        _DictionaryBatch.addIsDelta(b, node.isDelta) ||
-        _DictionaryBatch.addData(b, dataOffset) ||
-        _DictionaryBatch.endDictionaryBatch(b)
-    );
-}
-
-function writeBuffer(b: Builder, node: BufferMetadata) {
-    return _Buffer.createBuffer(b, new Long(node.offset, 0), new Long(node.length, 0));
-}
-
-function writeFieldNode(b: Builder, node: FieldMetadata) {
-    return _FieldNode.createFieldNode(b, new Long(node.length, 0), new Long(node.nullCount, 0));
-}
-
-function writeField(b: Builder, node: Field) {
-    let typeOffset = -1;
-    let type = node.type;
-    let typeId = node.typeId;
-    let name: number | undefined = undefined;
-    let metadata: number | undefined = undefined;
-    let dictionary: number | undefined = undefined;
-
-    if (!DataType.isDictionary(type)) {
-        typeOffset = new TypeSerializer(b).visit(type);
-    } else {
-        typeId = type.dictionary.TType;
-        dictionary = new TypeSerializer(b).visit(type);
-        typeOffset = new TypeSerializer(b).visit(type.dictionary);
-    }
-
-    let children = _Field.createChildrenVector(b, (type.children || []).map((f) => writeField(b, f)));
-    if (node.metadata && node.metadata.size > 0) {
-        metadata = _Field.createCustomMetadataVector(
-            b,
-            [...node.metadata].map(([k, v]) => {
-                const key = b.createString(`${k}`);
-                const val = b.createString(`${v}`);
-                return (
-                    _KeyValue.startKeyValue(b) ||
-                    _KeyValue.addKey(b, key) ||
-                    _KeyValue.addValue(b, val) ||
-                    _KeyValue.endKeyValue(b)
-                );
-            })
-        );
-    }
-    if (node.name) {
-        name = b.createString(node.name);
-    }
-    return (
-        _Field.startField(b) ||
-        _Field.addType(b, typeOffset) ||
-        _Field.addTypeType(b, typeId) ||
-        _Field.addChildren(b, children) ||
-        _Field.addNullable(b, !!node.nullable) ||
-        (name !== undefined && _Field.addName(b, name)) ||
-        (dictionary !== undefined && _Field.addDictionary(b, dictionary)) ||
-        (metadata !== undefined && _Field.addCustomMetadata(b, metadata)) ||
-        _Field.endField(b)
-    );
-}
-
-function mapReverse<T, U>(source: T[], callbackfn: (value: T, index: number, array: T[]) => U): U[] {
-    const result = new Array(source.length);
-    for (let i = -1, j = source.length; --j > -1;) {
-        result[i] = callbackfn(source[j], i, source);
-    }
-    return result;
-}
-
-const platformIsLittleEndian = (function() {
-    const buffer = new ArrayBuffer(2);
-    new DataView(buffer).setInt16(0, 256, true /* littleEndian */);
-    // Int16Array uses the platform's endianness.
-    return new Int16Array(buffer)[0] === 256;
-})();
diff --git a/js/src/recordbatch.ts b/js/src/recordbatch.ts
index 91ea5cfbffd95..6a94a8cb583e6 100644
--- a/js/src/recordbatch.ts
+++ b/js/src/recordbatch.ts
@@ -15,86 +15,74 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Schema, Struct, StructData, DataType } from './type';
-import { flatbuffers } from 'flatbuffers';
-import { View, Vector, StructVector } from './vector';
-import { Data, NestedData } from './data';
-import { PipeIterator } from './util/node';
-import { valueToString, leftPad } from './util/pretty';
+import { Data } from './data';
+import { Table } from './table';
+import { Vector } from './vector';
+import { Schema, Field } from './schema';
+import { DataType, Struct } from './type';
+import { StructVector } from './vector/struct';
+import { Vector as VType } from './interfaces';
+import { Chunked } from './vector/chunked';
+import { Clonable, Sliceable, Applicative } from './vector';
 
-import Long = flatbuffers.Long;
+export interface RecordBatch<T extends { [key: string]: DataType } = any> {
+    concat(...others: Vector<Struct<T>>[]): Table<T>;
+    slice(begin?: number, end?: number): RecordBatch<T>;
+    clone(data: Data<Struct<T>>, children?: Vector[]): RecordBatch<T>;
+}
+
+export class RecordBatch<T extends { [key: string]: DataType } = any>
+    extends StructVector<T>
+    implements Clonable<RecordBatch<T>>,
+               Sliceable<RecordBatch<T>>,
+               Applicative<Struct<T>, Table<T>> {
 
-export class RecordBatch<T extends StructData = StructData> extends StructVector<T> {
-    public static from<R extends StructData = StructData>(vectors: Vector[]) {
-        return new RecordBatch<R>(Schema.from(vectors),
-            Math.max(...vectors.map((v) => v.length)),
+    /** @nocollapse */
+    public static from<T extends { [key: string]: DataType } = any>(vectors: VType<T[keyof T]>[], names: (keyof T)[] = []) {
+        return new RecordBatch(
+            Schema.from(vectors, names),
+            vectors.reduce((len, vec) => Math.max(len, vec.length), 0),
             vectors
         );
     }
-    public readonly schema: Schema;
-    public readonly length: number;
-    public readonly numCols: number;
-    constructor(schema: Schema, data: Data<Struct<T>>, view: View<Struct<T>>);
-    constructor(schema: Schema, numRows: Long | number, cols: Data<any> | Vector[]);
+
+    protected _schema: Schema;
+
+    constructor(schema: Schema<T>, numRows: number, childData: (Data | Vector)[]);
+    constructor(schema: Schema<T>, data: Data<Struct<T>>, children?: Vector[]);
     constructor(...args: any[]) {
-        if (typeof args[1] !== 'number') {
-            const data = args[1] as Data<Struct<T>>;
-            super(data, args[2]);
-            this.schema = args[0];
-            this.length = data.length;
+        let schema = args[0];
+        let data: Data<Struct<T>>;
+        let children: Vector[] | undefined;
+        if (typeof args[1] === 'number') {
+            const fields = schema.fields as Field<T[keyof T]>[];
+            const [, numRows, childData] = args as [Schema<T>, number, Data[]];
+            data = Data.Struct(new Struct<T>(fields), 0, numRows, 0, null, childData);
         } else {
-            const [schema, numRows, cols] = args;
-            const childData: Data<any>[] = new Array(cols.length);
-            for (let index = -1, length = cols.length; ++index < length;) {
-                const col: Data<any> | Vector = cols[index];
-                childData[index] = col instanceof Vector ? col.data : col;
-            }
-            super(new NestedData(new Struct<T>(schema.fields), numRows, null, childData));
-            this.schema = schema;
-            this.length = numRows;
+            [, data, children] = (args as [Schema<T>, Data<Struct<T>>, Vector[]?]);
         }
-        this.numCols = this.schema.fields.length;
-    }
-    public clone<R extends Struct<T>>(data: Data<R>, view: View<R> = this.view.clone(data)): this {
-        return new RecordBatch(this.schema, data as any, view) as any;
-    }
-    public getChildAt<R extends DataType = DataType>(index: number): Vector<R> | null {
-        return index < 0 || index >= this.numCols ? null : super.getChildAt<R>(index);
-    }
-    public select(...columnNames: string[]) {
-        const fields = this.schema.fields;
-        const namesToKeep = columnNames.reduce((xs, x) => (xs[x] = true) && xs, Object.create(null));
-        return new RecordBatch(
-            this.schema.select(...columnNames), this.length,
-            this.childData.filter((_, i) => namesToKeep[fields[i].name])
-        );
+        super(data, children);
+        this._schema = schema;
     }
-    public rowsToString(separator = ' | ', rowOffset = 0, maxColumnWidths: number[] = []): PipeIterator<string> {
-        return new PipeIterator(recordBatchRowsToString(this, separator, rowOffset, maxColumnWidths), 'utf8');
+
+    public clone(data: Data<Struct<T>>, children = this._children) {
+        return new RecordBatch<T>(this._schema, data, children);
     }
-}
 
-function* recordBatchRowsToString(recordBatch: RecordBatch, separator = ' | ', rowOffset = 0, maxColumnWidths: number[] = []) {
-    const fields = recordBatch.schema.fields;
-    const header = ['row_id', ...fields.map((f) => `${f}`)].map(valueToString);
-    header.forEach((x, i) => {
-        maxColumnWidths[i] = Math.max(maxColumnWidths[i] || 0, x.length);
-    });
-    // Pass one to convert to strings and count max column widths
-    for (let i = -1, n = recordBatch.length - 1; ++i < n;) {
-        let val, row = [rowOffset + i, ...recordBatch.get(i) as Struct['TValue']];
-        for (let j = -1, k = row.length; ++j < k; ) {
-            val = valueToString(row[j]);
-            maxColumnWidths[j] = Math.max(maxColumnWidths[j] || 0, val.length);
-        }
+    public concat(...others: Vector<Struct<T>>[]): Table<T> {
+        const schema = this._schema, chunks = Chunked.flatten(this, ...others);
+        return new Table(schema, chunks.map(({ data }) => new RecordBatch(schema, data)));
     }
-    for (let i = -1; ++i < recordBatch.length;) {
-        if ((rowOffset + i) % 1000 === 0) {
-            yield header.map((x, j) => leftPad(x, ' ', maxColumnWidths[j])).join(separator);
-        }
-        yield [rowOffset + i, ...recordBatch.get(i) as Struct['TValue']]
-            .map((x) => valueToString(x))
-            .map((x, j) => leftPad(x, ' ', maxColumnWidths[j]))
-            .join(separator);
+
+    public get schema() { return this._schema; }
+    public get numCols() { return this._schema.fields.length; }
+
+    public select<K extends keyof T = any>(...columnNames: K[]) {
+        const fields = this._schema.fields;
+        const schema = this._schema.select(...columnNames);
+        const childNames = columnNames.reduce((xs, x) => (xs[x] = true) && xs, <any> {});
+        const childData = this.data.childData.filter((_, i) => childNames[fields[i].name]);
+        const structData = Data.Struct(new Struct(schema.fields), 0, this.length, 0, null, childData);
+        return new RecordBatch<{ [P in K]: T[P] }>(schema, structData as Data<Struct<{ [P in K]: T[P] }>>);
     }
 }
diff --git a/js/src/schema.ts b/js/src/schema.ts
new file mode 100644
index 0000000000000..a1ade5f294b16
--- /dev/null
+++ b/js/src/schema.ts
@@ -0,0 +1,107 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { DataType, Dictionary } from './type';
+import { Vector as VType } from './interfaces';
+
+export class Schema<T extends { [key: string]: DataType } = any> {
+
+    /** @nocollapse */
+    public static from<T extends { [key: string]: DataType } = any>(vectors: VType<T[keyof T]>[], names: (keyof T)[] = []) {
+        return new Schema<T>(vectors.map((v, i) => new Field('' + (names[i] || i), v.type)));
+    }
+
+    protected _fields: Field[];
+    protected _metadata: Map<string, string>;
+    protected _dictionaries: Map<number, DataType>;
+    protected _dictionaryFields: Map<number, Field<Dictionary>[]>;
+    public get fields(): Field[] { return this._fields; }
+    public get metadata(): Map<string, string> { return this._metadata; }
+    public get dictionaries(): Map<number, DataType> { return this._dictionaries; }
+    public get dictionaryFields(): Map<number, Field<Dictionary>[]> { return this._dictionaryFields; }
+
+    constructor(fields: Field[],
+                metadata?: Map<string, string>,
+                dictionaries?: Map<number, DataType>,
+                dictionaryFields?: Map<number, Field<Dictionary>[]>) {
+        this._fields = fields || [];
+        this._metadata = metadata || new Map();
+        if (!dictionaries || !dictionaryFields) {
+            ({ dictionaries, dictionaryFields } = generateDictionaryMap(
+                fields, dictionaries || new Map(), dictionaryFields || new Map()
+            ));
+        }
+        this._dictionaries = dictionaries;
+        this._dictionaryFields = dictionaryFields;
+    }
+    public get [Symbol.toStringTag]() { return 'Schema'; }
+    public toString() {
+        return `Schema<{ ${this._fields.map((f, i) => `${i}: ${f}`).join(', ')} }>`;
+    }
+    public select<K extends keyof T = any>(...columnNames: K[]) {
+        const names = columnNames.reduce((xs, x) => (xs[x] = true) && xs, Object.create(null));
+        return new Schema<{ [P in K]: T[P] }>(this.fields.filter((f) => names[f.name]), this.metadata);
+    }
+}
+
+export class Field<T extends DataType = DataType> {
+    protected _type: T;
+    protected _name: string;
+    protected _nullable: true | false;
+    protected _metadata?: Map<string, string> | null;
+    constructor(name: string, type: T, nullable: true | false = false, metadata?: Map<string, string> | null) {
+        this._name = name;
+        this._type = type;
+        this._nullable = nullable;
+        this._metadata = metadata || new Map();
+    }
+    public get type() { return this._type; }
+    public get name() { return this._name; }
+    public get nullable() { return this._nullable; }
+    public get metadata() { return this._metadata; }
+    public get typeId() { return this._type.typeId; }
+    public get [Symbol.toStringTag]() { return 'Field'; }
+    public get indices() {
+        return DataType.isDictionary(this._type) ? this._type.indices : this._type;
+    }
+    public toString() { return `${this.name}: ${this.type}`; }
+}
+
+/** @ignore */
+function generateDictionaryMap(fields: Field[], dictionaries: Map<number, DataType>, dictionaryFields: Map<number, Field<Dictionary>[]>) {
+
+    for (let i = -1, n = fields.length; ++i < n;) {
+        const field = fields[i];
+        const type = field.type;
+        if (DataType.isDictionary(type)) {
+            if (!dictionaryFields.get(type.id)) {
+                dictionaryFields.set(type.id, []);
+            }
+            if (!dictionaries.has(type.id)) {
+                dictionaries.set(type.id, type.dictionary);
+                dictionaryFields.get(type.id)!.push(field as any);
+            } else if (dictionaries.get(type.id) !== type.dictionary) {
+                throw new Error(`Cannot create Schema containing two different dictionaries with the same Id`);
+            }
+        }
+        if (type.children) {
+            generateDictionaryMap(type.children, dictionaries, dictionaryFields);
+        }
+    }
+
+    return { dictionaries, dictionaryFields };
+}
diff --git a/js/src/table.ts b/js/src/table.ts
index 634092f0e4bc9..6d77f93a48e63 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -15,78 +15,94 @@
 // specific language governing permissions and limitations
 // under the License.
 
+import { Column } from './column';
+import { Schema, Field } from './schema';
+import { isPromise } from './util/compat';
 import { RecordBatch } from './recordbatch';
-import { Col, Predicate } from './predicate';
-import { DataType, Schema, Field, Struct, StructData, Int } from './type';
-import { read, readAsync } from './ipc/reader/arrow';
-import { writeTableBinary } from './ipc/writer/arrow';
-import { PipeIterator } from './util/node';
-import { isPromise, isAsyncIterable } from './util/compat';
-import { Vector, DictionaryVector, IntVector, StructVector } from './vector';
-import { ChunkedView } from './vector/chunked';
+import { Vector as VType } from './interfaces';
+import { DataFrame } from './compute/dataframe';
+import { RecordBatchReader } from './ipc/reader';
+import { Vector, Chunked } from './vector/index';
+import { DataType, RowLike, Struct } from './type';
+import { Clonable, Sliceable, Applicative } from './vector';
+import { RecordBatchFileWriter, RecordBatchStreamWriter } from './ipc/writer';
 
-export type NextFunc = (idx: number, batch: RecordBatch) => void;
-export type BindFunc = (batch: RecordBatch) => void;
+export interface Table<T extends { [key: string]: DataType; } = any> {
 
-export interface DataFrame<T extends StructData = StructData> {
-    count(): number;
-    filter(predicate: Predicate): DataFrame<T>;
-    scan(next: NextFunc, bind?: BindFunc): void;
-    countBy(col: (Col|string)): CountByResult;
-    [Symbol.iterator](): IterableIterator<Struct<T>['TValue']>;
+    get(index: number): Struct<T>['TValue'];
+    [Symbol.iterator](): IterableIterator<RowLike<T>>;
+
+    slice(begin?: number, end?: number): Table<T>;
+    concat(...others: Vector<Struct<T>>[]): Table<T>;
+    clone(chunks?: RecordBatch<T>[], offsets?: Uint32Array): Table<T>;
+
+    scan(next: import('./compute/dataframe').NextFunc, bind?: import('./compute/dataframe').BindFunc): void;
+    countBy(name: import('./compute/predicate').Col | string): import('./compute/dataframe').CountByResult;
+    filter(predicate: import('./compute/predicate').Predicate): import('./compute/dataframe').FilteredDataFrame<T>;
 }
 
-export class Table<T extends StructData = StructData> implements DataFrame {
-    static empty<R extends StructData = StructData>() { return new Table<R>(new Schema([]), []); }
-    static from<R extends StructData = StructData>(sources?: Iterable<Uint8Array | Buffer | string> | object | string) {
-        if (sources) {
-            let schema: Schema | undefined;
-            let recordBatches: RecordBatch<R>[] = [];
-            for (let recordBatch of read(sources)) {
-                schema = schema || recordBatch.schema;
-                recordBatches.push(recordBatch as RecordBatch<R>);
-            }
-            return new Table<R>(schema || new Schema([]), recordBatches);
+export class Table<T extends { [key: string]: DataType; } = any>
+    extends Chunked<Struct<T>>
+    implements DataFrame<T>,
+               Clonable<Table<T>>,
+               Sliceable<Table<T>>,
+               Applicative<Struct<T>, Table<T>> {
+
+    /** @nocollapse */
+    public static empty<T extends { [key: string]: DataType; } = any>() { return new Table<T>(new Schema([]), []); }
+
+    public static from<T extends { [key: string]: DataType } = any>(): Table<T>;
+    public static from<T extends { [key: string]: DataType } = any>(source: RecordBatchReader<T>): Table<T>;
+    public static from<T extends { [key: string]: DataType } = any>(source: import('./ipc/reader').FromArg0): Table<T>;
+    public static from<T extends { [key: string]: DataType } = any>(source: import('./ipc/reader').FromArg2): Table<T>;
+    public static from<T extends { [key: string]: DataType } = any>(source: import('./ipc/reader').FromArg1): Promise<Table<T>>;
+    public static from<T extends { [key: string]: DataType } = any>(source: import('./ipc/reader').FromArg3): Promise<Table<T>>;
+    public static from<T extends { [key: string]: DataType } = any>(source: import('./ipc/reader').FromArg4): Promise<Table<T>>;
+    public static from<T extends { [key: string]: DataType } = any>(source: import('./ipc/reader').FromArg5): Promise<Table<T>>;
+    public static from<T extends { [key: string]: DataType } = any>(source: PromiseLike<RecordBatchReader<T>>): Promise<Table<T>>;
+    /** @nocollapse */
+    public static from<T extends { [key: string]: DataType } = any>(source?: any) {
+
+        if (!source) { return Table.empty<T>(); }
+
+        let reader = RecordBatchReader.from<T>(source) as RecordBatchReader<T> | Promise<RecordBatchReader<T>>;
+
+        if (isPromise<RecordBatchReader<T>>(reader)) {
+            return (async () => await Table.from(await reader))();
         }
-        return Table.empty<R>();
-    }
-    static async fromAsync<R extends StructData = StructData>(sources?: AsyncIterable<Uint8Array | Buffer | string>) {
-        if (isAsyncIterable(sources)) {
-            let schema: Schema | undefined;
-            let recordBatches: RecordBatch[] = [];
-            for await (let recordBatch of readAsync(sources)) {
-                schema = schema || recordBatch.schema;
-                recordBatches.push(recordBatch);
-            }
-            return new Table(schema || new Schema([]), recordBatches);
-        } else if (isPromise(sources)) {
-            return Table.from(await sources);
-        } else if (sources) {
-            return Table.from(sources);
+        if (reader.isSync() && (reader = reader.open())) {
+            return !reader.schema ? Table.empty<T>() : new Table<T>(reader.schema, [...reader]);
         }
-        return Table.empty<R>();
+        return (async (opening) => {
+            const reader = await opening;
+            const schema = reader.schema;
+            const batches: RecordBatch[] = [];
+            if (schema) {
+                for await (let batch of reader) {
+                    batches.push(batch);
+                }
+                return new Table<T>(schema, batches);
+            }
+            return Table.empty<T>();
+        })(reader.open());
+    }
+
+    /** @nocollapse */
+    public static async fromAsync<T extends { [key: string]: DataType; } = any>(source: import('./ipc/reader').FromArgs): Promise<Table<T>> {
+        return await Table.from<T>(source as any);
     }
-    static fromStruct<R extends StructData = StructData>(struct: StructVector<R>) {
-        const schema = new Schema(struct.type.children);
-        const chunks = struct.view instanceof ChunkedView ?
-                            (struct.view.chunkVectors as StructVector<R>[]) :
-                            [struct];
-        return new Table<R>(chunks.map((chunk) => new RecordBatch(schema, chunk.length, chunk.view.childData)));
+
+    /** @nocollapse */
+    public static fromVectors<T extends { [key: string]: DataType; } = any>(vectors: VType<T[keyof T]>[], names?: (keyof T)[]) {
+        return new Table(RecordBatch.from(vectors, names));
     }
 
-    public readonly schema: Schema;
-    public readonly length: number;
-    public readonly numCols: number;
-    // List of inner RecordBatches
-    public readonly batches: RecordBatch<T>[];
-    // List of inner Vectors, possibly spanning batches
-    protected readonly _columns: Vector<any>[] = [];
-    // Union of all inner RecordBatches into one RecordBatch, possibly chunked.
-    // If the Table has just one inner RecordBatch, this points to that.
-    // If the Table has multiple inner RecordBatches, then this is a Chunked view
-    // over the list of RecordBatches. This allows us to delegate the responsibility
-    // of indexing, iterating, slicing, and visiting to the Nested/Chunked Data/Views.
-    public readonly batchesUnion: RecordBatch<T>;
+    /** @nocollapse */
+    public static fromStruct<T extends { [key: string]: DataType; } = any>(struct: Vector<Struct<T>>) {
+        const schema = new Schema<T>(struct.type.children);
+        const chunks = (struct instanceof Chunked ? struct.chunks : [struct]) as VType<Struct<T>>[];
+        return new Table(schema, chunks.map((chunk) => new RecordBatch(schema, chunk.data)));
+    }
 
     constructor(batches: RecordBatch<T>[]);
     constructor(...batches: RecordBatch<T>[]);
@@ -96,254 +112,75 @@ export class Table<T extends StructData = StructData> implements DataFrame {
 
         let schema: Schema = null!;
 
-        if (args[0] instanceof Schema) {
-            schema = args.shift();
-        }
+        if (args[0] instanceof Schema) { schema = args.shift(); }
 
-        let batches = args.reduce(function flatten(xs: any[], x: any): any[] {
+        let chunks = args.reduce(function flatten(xs: any[], x: any): any[] {
             return Array.isArray(x) ? x.reduce(flatten, xs) : [...xs, x];
         }, []).filter((x: any): x is RecordBatch<T> => x instanceof RecordBatch);
 
-        if (!schema && !(schema = batches[0] && batches[0].schema)) {
-            throw new TypeError('Table must be initialized with a Schema or at least one RecordBatch with a Schema');
+        if (!schema && !(schema = chunks[0] && chunks[0].schema)) {
+            throw new TypeError('Table must be initialized with a Schema or at least one RecordBatch');
         }
 
-        this.schema = schema;
-        this.batches = batches;
-        this.batchesUnion = batches.length == 0 ?
-            new RecordBatch<T>(schema, 0, []) :
-            batches.reduce((union, batch) => union.concat(batch));
-        this.length = this.batchesUnion.length;
-        this.numCols = this.batchesUnion.numCols;
+        if (!chunks[0]) { chunks[0] = new RecordBatch(schema, 0, []); }
+
+        super(chunks[0].type, chunks);
+
+        this._schema = schema;
+        this._chunks = chunks;
     }
 
-    public get(index: number): Struct<T>['TValue'] {
-        return this.batchesUnion.get(index)!;
+    protected _schema: Schema;
+    // List of inner RecordBatches
+    protected _chunks: RecordBatch<T>[];
+    protected _children?: Column<T[keyof T]>[];
+
+    public get schema() { return this._schema; }
+    public get length() { return this._length; }
+    public get chunks() { return this._chunks; }
+    public get numCols() { return this._numChildren; }
+
+    public clone(chunks = this._chunks) {
+        return new Table<T>(this._schema, chunks);
     }
-    public getColumn<R extends keyof T>(name: R): Vector<T[R]>|null {
-        return this.getColumnAt(this.getColumnIndex(name));
+
+    public getColumnAt<R extends DataType = any>(index: number): Column<R> | null {
+        return this.getChildAt(index);
     }
-    public getColumnAt(index: number) {
-        return index < 0 || index >= this.numCols
-            ? null
-            : this._columns[index] || (
-              this._columns[index] = this.batchesUnion.getChildAt(index)!);
+    public getColumn<R extends keyof T>(name: R): Column<T[R]> | null {
+        return this.getColumnAt(this.getColumnIndex(name)) as Column<T[R]> | null;
     }
     public getColumnIndex<R extends keyof T>(name: R) {
-        return this.schema.fields.findIndex((f) => f.name === name);
-    }
-    public [Symbol.iterator](): IterableIterator<Struct<T>['TValue']> {
-        return this.batchesUnion[Symbol.iterator]() as any;
-    }
-    public filter(predicate: Predicate): DataFrame {
-        return new FilteredDataFrame(this.batches, predicate);
-    }
-    public scan(next: NextFunc, bind?: BindFunc) {
-        const batches = this.batches, numBatches = batches.length;
-        for (let batchIndex = -1; ++batchIndex < numBatches;) {
-            // load batches
-            const batch = batches[batchIndex];
-            if (bind) { bind(batch); }
-            // yield all indices
-            for (let index = -1, numRows = batch.length; ++index < numRows;) {
-                next(index, batch);
-            }
-        }
-    }
-    public countBy(name: Col | string): CountByResult {
-        const batches = this.batches, numBatches = batches.length;
-        const count_by = typeof name === 'string' ? new Col(name) : name;
-        // Assume that all dictionary batches are deltas, which means that the
-        // last record batch has the most complete dictionary
-        count_by.bind(batches[numBatches - 1]);
-        const vector = count_by.vector as DictionaryVector;
-        if (!(vector instanceof DictionaryVector)) {
-            throw new Error('countBy currently only supports dictionary-encoded columns');
-        }
-        // TODO: Adjust array byte width based on overall length
-        // (e.g. if this.length <= 255 use Uint8Array, etc...)
-        const counts: Uint32Array = new Uint32Array(vector.dictionary.length);
-        for (let batchIndex = -1; ++batchIndex < numBatches;) {
-            // load batches
-            const batch = batches[batchIndex];
-            // rebind the countBy Col
-            count_by.bind(batch);
-            const keys = (count_by.vector as DictionaryVector).indices;
-            // yield all indices
-            for (let index = -1, numRows = batch.length; ++index < numRows;) {
-                let key = keys.get(index);
-                if (key !== null) { counts[key]++; }
+        return this._schema.fields.findIndex((f) => f.name === name);
+    }
+    public getChildAt<R extends DataType = any>(index: number): Column<R> | null {
+        if (index < 0 || index >= this.numChildren) { return null; }
+        let schema = this._schema;
+        let column: Column<R>, field: Field<R>, chunks: Vector<R>[];
+        let columns = this._children || (this._children = []) as Column[];
+        if (column = columns[index]) { return column as Column<R>; }
+        if (field = ((schema.fields || [])[index] as Field<R>)) {
+            chunks = this._chunks
+                .map((chunk) => chunk.getChildAt<R>(index))
+                .filter((vec): vec is Vector<R> => vec != null);
+            if (chunks.length > 0) {
+                return (columns[index] = new Column<R>(field, chunks));
             }
         }
-        return new CountByResult(vector.dictionary, IntVector.from(counts));
-    }
-    public count(): number {
-        return this.length;
-    }
-    public select(...columnNames: string[]) {
-        return new Table(this.batches.map((batch) => batch.select(...columnNames)));
-    }
-    public toString(separator?: string) {
-        let str = '';
-        for (const row of this.rowsToString(separator)) {
-            str += row + '\n';
-        }
-        return str;
+        return null;
     }
+
     // @ts-ignore
     public serialize(encoding = 'binary', stream = true) {
-        return writeTableBinary(this, stream);
-    }
-    public rowsToString(separator = ' | '): PipeIterator<string|undefined> {
-        return new PipeIterator(tableRowsToString(this, separator), 'utf8');
-    }
-}
-
-class FilteredDataFrame<T extends StructData = StructData> implements DataFrame<T> {
-    private predicate: Predicate;
-    private batches: RecordBatch<T>[];
-    constructor (batches: RecordBatch<T>[], predicate: Predicate) {
-        this.batches = batches;
-        this.predicate = predicate;
-    }
-    public scan(next: NextFunc, bind?: BindFunc) {
-        // inlined version of this:
-        // this.parent.scan((idx, columns) => {
-        //     if (this.predicate(idx, columns)) next(idx, columns);
-        // });
-        const batches = this.batches;
-        const numBatches = batches.length;
-        for (let batchIndex = -1; ++batchIndex < numBatches;) {
-            // load batches
-            const batch = batches[batchIndex];
-            // TODO: bind batches lazily
-            // If predicate doesn't match anything in the batch we don't need
-            // to bind the callback
-            if (bind) { bind(batch); }
-            const predicate = this.predicate.bind(batch);
-            // yield all indices
-            for (let index = -1, numRows = batch.length; ++index < numRows;) {
-                if (predicate(index, batch)) { next(index, batch); }
-            }
-        }
+        const writer = !stream
+            ? RecordBatchFileWriter
+            : RecordBatchStreamWriter;
+        return writer.writeAll(this._chunks).toUint8Array(true);
     }
     public count(): number {
-        // inlined version of this:
-        // let sum = 0;
-        // this.parent.scan((idx, columns) => {
-        //     if (this.predicate(idx, columns)) ++sum;
-        // });
-        // return sum;
-        let sum = 0;
-        const batches = this.batches;
-        const numBatches = batches.length;
-        for (let batchIndex = -1; ++batchIndex < numBatches;) {
-            // load batches
-            const batch = batches[batchIndex];
-            const predicate = this.predicate.bind(batch);
-            // yield all indices
-            for (let index = -1, numRows = batch.length; ++index < numRows;) {
-                if (predicate(index, batch)) { ++sum; }
-            }
-        }
-        return sum;
-    }
-    public *[Symbol.iterator](): IterableIterator<Struct<T>['TValue']> {
-        // inlined version of this:
-        // this.parent.scan((idx, columns) => {
-        //     if (this.predicate(idx, columns)) next(idx, columns);
-        // });
-        const batches = this.batches;
-        const numBatches = batches.length;
-        for (let batchIndex = -1; ++batchIndex < numBatches;) {
-            // load batches
-            const batch = batches[batchIndex];
-            // TODO: bind batches lazily
-            // If predicate doesn't match anything in the batch we don't need
-            // to bind the callback
-            const predicate = this.predicate.bind(batch);
-            // yield all indices
-            for (let index = -1, numRows = batch.length; ++index < numRows;) {
-                if (predicate(index, batch)) { yield batch.get(index) as any; }
-            }
-        }
-    }
-    public filter(predicate: Predicate): DataFrame<T> {
-        return new FilteredDataFrame<T>(
-            this.batches,
-            this.predicate.and(predicate)
-        );
-    }
-    public countBy(name: Col | string): CountByResult {
-        const batches = this.batches, numBatches = batches.length;
-        const count_by = typeof name === 'string' ? new Col(name) : name;
-        // Assume that all dictionary batches are deltas, which means that the
-        // last record batch has the most complete dictionary
-        count_by.bind(batches[numBatches - 1]);
-        const vector = count_by.vector as DictionaryVector;
-        if (!(vector instanceof DictionaryVector)) {
-            throw new Error('countBy currently only supports dictionary-encoded columns');
-        }
-        // TODO: Adjust array byte width based on overall length
-        // (e.g. if this.length <= 255 use Uint8Array, etc...)
-        const counts: Uint32Array = new Uint32Array(vector.dictionary.length);
-        for (let batchIndex = -1; ++batchIndex < numBatches;) {
-            // load batches
-            const batch = batches[batchIndex];
-            const predicate = this.predicate.bind(batch);
-            // rebind the countBy Col
-            count_by.bind(batch);
-            const keys = (count_by.vector as DictionaryVector).indices;
-            // yield all indices
-            for (let index = -1, numRows = batch.length; ++index < numRows;) {
-                let key = keys.get(index);
-                if (key !== null && predicate(index, batch)) { counts[key]++; }
-            }
-        }
-        return new CountByResult(vector.dictionary, IntVector.from(counts));
-    }
-}
-
-export class CountByResult<T extends DataType = DataType> extends Table<{'values': T, 'counts': Int}> {
-    constructor(values: Vector, counts: IntVector) {
-        super(
-            new RecordBatch<{'values': T, 'counts': Int}>(new Schema([
-                new Field('values', values.type),
-                new Field('counts', counts.type)
-            ]),
-            counts.length, [values, counts]
-        ));
-    }
-    public toJSON(): Object {
-        const values = this.getColumnAt(0)!;
-        const counts = this.getColumnAt(1)!;
-        const result = {} as { [k: string]: number | null };
-        for (let i = -1; ++i < this.length;) {
-            result[values.get(i)] = counts.get(i);
-        }
-        return result;
-    }
-}
-
-function* tableRowsToString(table: Table, separator = ' | ') {
-    let rowOffset = 0;
-    let firstValues = [];
-    let maxColumnWidths: number[] = [];
-    let iterators: IterableIterator<string>[] = [];
-    // Gather all the `rowsToString` iterators into a list before iterating,
-    // so that `maxColumnWidths` is filled with the maxWidth for each column
-    // across all RecordBatches.
-    for (const batch of table.batches) {
-        const iterator = batch.rowsToString(separator, rowOffset, maxColumnWidths);
-        const { done, value } = iterator.next();
-        if (!done) {
-            firstValues.push(value);
-            iterators.push(iterator);
-            rowOffset += batch.length;
-        }
+        return this._length;
     }
-    for (const iterator of iterators) {
-        yield firstValues.shift();
-        yield* iterator;
+    public select(...columnNames: string[]) {
+        return new Table(this._chunks.map((batch) => batch.select(...columnNames)));
     }
 }
diff --git a/js/src/type.ts b/js/src/type.ts
index 811086c9382b9..f44af74f08628 100644
--- a/js/src/type.ts
+++ b/js/src/type.ts
@@ -15,176 +15,71 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import * as Schema_ from './fb/Schema';
-import * as Message_ from './fb/Message';
-import { Vector, View } from './vector';
+/* tslint:disable:class-name */
+
+import { Field } from './schema';
+import { Vector } from './vector';
 import { flatbuffers } from 'flatbuffers';
-import { DictionaryBatch } from './ipc/metadata';
-import { TypeVisitor, VisitorNode } from './visitor';
-
-export import Long = flatbuffers.Long;
-export import ArrowType = Schema_.org.apache.arrow.flatbuf.Type;
-export import DateUnit = Schema_.org.apache.arrow.flatbuf.DateUnit;
-export import TimeUnit = Schema_.org.apache.arrow.flatbuf.TimeUnit;
-export import Precision = Schema_.org.apache.arrow.flatbuf.Precision;
-export import UnionMode = Schema_.org.apache.arrow.flatbuf.UnionMode;
-export import VectorType = Schema_.org.apache.arrow.flatbuf.VectorType;
-export import IntervalUnit = Schema_.org.apache.arrow.flatbuf.IntervalUnit;
-export import MessageHeader = Message_.org.apache.arrow.flatbuf.MessageHeader;
-export import MetadataVersion = Schema_.org.apache.arrow.flatbuf.MetadataVersion;
-
-function generateDictionaryMap(fields: Field[]) {
-    const result: Map<number, Field<Dictionary>> = new Map();
-    fields
-        .filter((f) => f.type instanceof Dictionary)
-        .forEach((f) => {
-            if (result.has((f.type as Dictionary).id)) {
-                throw new Error(`Cannot create Schema containing two dictionaries with the same ID`);
-            }
-            result.set((f.type as Dictionary).id, f as Field<Dictionary>);
-        });
-    return result;
-}
+import { Vector as VType } from './interfaces';
+import { ArrayBufferViewConstructor } from './interfaces';
 
-export class Schema {
-    public static from(vectors: Vector[]) {
-        return new Schema(vectors.map((v, i) => new Field('' + i, v.type)));
-    }
-    // @ts-ignore
-    protected _bodyLength: number;
-    // @ts-ignore
-    protected _headerType: MessageHeader;
-    public readonly fields: Field[];
-    public readonly version: MetadataVersion;
-    public readonly metadata?: Map<string, string>;
-    public readonly dictionaries: Map<number, Field<Dictionary>>;
-    constructor(fields: Field[],
-                metadata?: Map<string, string>,
-                version: MetadataVersion = MetadataVersion.V4,
-                dictionaries: Map<number, Field<Dictionary>> = generateDictionaryMap(fields)) {
-        this.fields = fields;
-        this.version = version;
-        this.metadata = metadata;
-        this.dictionaries = dictionaries;
-    }
-    public get bodyLength() { return this._bodyLength; }
-    public get headerType() { return this._headerType; }
-    public select(...fieldNames: string[]): Schema {
-        const namesToKeep = fieldNames.reduce((xs, x) => (xs[x] = true) && xs, Object.create(null));
-        const newDictFields = new Map(), newFields = this.fields.filter((f) => namesToKeep[f.name]);
-        this.dictionaries.forEach((f, dictId) => (namesToKeep[f.name]) && newDictFields.set(dictId, f));
-        return new Schema(newFields, this.metadata, this.version, newDictFields);
-    }
-    public static [Symbol.toStringTag] = ((prototype: Schema) => {
-        prototype._bodyLength = 0;
-        prototype._headerType = MessageHeader.Schema;
-        return 'Schema';
-    })(Schema.prototype);
-}
-
-export class Field<T extends DataType = DataType> {
-    public readonly type: T;
-    public readonly name: string;
-    public readonly nullable: boolean;
-    public readonly metadata?: Map<string, string> | null;
-    constructor(name: string, type: T, nullable = false, metadata?: Map<string, string> | null) {
-        this.name = name;
-        this.type = type;
-        this.nullable = nullable;
-        this.metadata = metadata;
-    }
-    public toString() { return `${this.name}: ${this.type}`; }
-    public get typeId(): T['TType'] { return this.type.TType; }
-    public get [Symbol.toStringTag](): string { return 'Field'; }
-    public get indices(): T | Int<any> {
-        return DataType.isDictionary(this.type) ? this.type.indices : this.type;
-    }
-}
+import Long = flatbuffers.Long;
+import {
+    Type,
+    Precision, UnionMode,
+    DateUnit, TimeUnit, IntervalUnit
+} from './enum';
 
+/** @ignore */
 export type TimeBitWidth = 32 | 64;
+/** @ignore */
 export type IntBitWidth = 8 | 16 | 32 | 64;
-
-export type NumericType = Int | Float | Date_ | Time | Interval | Timestamp;
-export type FixedSizeType = Int64 |  Uint64 | Decimal | FixedSizeBinary;
-export type PrimitiveType = NumericType | FixedSizeType;
-
-export type FlatListType = Utf8 | Binary; // <-- these types have `offset`, `data`, and `validity` buffers
-export type FlatType = Bool | PrimitiveType | FlatListType; // <-- these types have `data` and `validity` buffers
-export type ListType = List<any>; // <-- these types have `offset` and `validity` buffers
-export type NestedType = Map_ | Struct | List<any> | FixedSizeList<any> | Union<any>; // <-- these types have `validity` buffer and nested childData
-export type SingleNestedType = List<any> | FixedSizeList<any>; // <-- these are nested types that can only have a single child
-
-/**
- * *
- * Main data type enumeration:
- * *
- * Data types in this library are all *logical*. They can be expressed as
- * either a primitive physical type (bytes or bits of some fixed size), a
- * nested type consisting of other data types, or another data type (e.g. a
- * timestamp encoded as an int64)
- */
- export enum Type {
-    NONE            =  0,  // The default placeholder type
-    Null            =  1,  // A NULL type having no physical storage
-    Int             =  2,  // Signed or unsigned 8, 16, 32, or 64-bit little-endian integer
-    Float           =  3,  // 2, 4, or 8-byte floating point value
-    Binary          =  4,  // Variable-length bytes (no guarantee of UTF8-ness)
-    Utf8            =  5,  // UTF8 variable-length string as List<Char>
-    Bool            =  6,  // Boolean as 1 bit, LSB bit-packed ordering
-    Decimal         =  7,  // Precision-and-scale-based decimal type. Storage type depends on the parameters.
-    Date            =  8,  // int32_t days or int64_t milliseconds since the UNIX epoch
-    Time            =  9,  // Time as signed 32 or 64-bit integer, representing either seconds, milliseconds, microseconds, or nanoseconds since midnight since midnight
-    Timestamp       = 10,  // Exact timestamp encoded with int64 since UNIX epoch (Default unit millisecond)
-    Interval        = 11,  // YEAR_MONTH or DAY_TIME interval in SQL style
-    List            = 12,  // A list of some logical data type
-    Struct          = 13,  // Struct of logical types
-    Union           = 14,  // Union of logical types
-    FixedSizeBinary = 15,  // Fixed-size binary. Each value occupies the same number of bytes
-    FixedSizeList   = 16,  // Fixed-size list. Each value occupies the same number of bytes
-    Map             = 17,  // Map of named logical types
-    Dictionary      = 'Dictionary',  // Dictionary aka Category type
-    DenseUnion      = 'DenseUnion',  // Dense Union of logical types
-    SparseUnion     = 'SparseUnion',  // Sparse Union of logical types
-}
-
-export interface DataType<TType extends Type = any> {
+/** @ignore */
+export type IsSigned = { 'true': true; 'false': false };
+/** @ignore */
+export type RowLike<T extends { [key: string]: DataType; }> =
+      { readonly length: number }
+    & ( Iterable<T[keyof T]['TValue']> )
+    & { [P in keyof T]: T[P]['TValue'] }
+    & { get<K extends keyof T>(key: K): T[K]['TValue']; }
+    ;
+
+export interface DataType<TType extends Type = Type> {
     readonly TType: TType;
     readonly TArray: any;
     readonly TValue: any;
     readonly ArrayType: any;
 }
 
-export abstract class DataType<TType extends Type = any> implements Partial<VisitorNode> {
+export class DataType<TType extends Type = Type, TChildren extends { [key: string]: DataType } = any> {
 
     // @ts-ignore
     public [Symbol.toStringTag]: string;
 
-    static            isNull (x: any): x is Null            { return x && x.TType === Type.Null;            }
-    static             isInt (x: any): x is Int             { return x && x.TType === Type.Int;             }
-    static           isFloat (x: any): x is Float           { return x && x.TType === Type.Float;           }
-    static          isBinary (x: any): x is Binary          { return x && x.TType === Type.Binary;          }
-    static            isUtf8 (x: any): x is Utf8            { return x && x.TType === Type.Utf8;            }
-    static            isBool (x: any): x is Bool            { return x && x.TType === Type.Bool;            }
-    static         isDecimal (x: any): x is Decimal         { return x && x.TType === Type.Decimal;         }
-    static            isDate (x: any): x is Date_           { return x && x.TType === Type.Date;            }
-    static            isTime (x: any): x is Time            { return x && x.TType === Type.Time;            }
-    static       isTimestamp (x: any): x is Timestamp       { return x && x.TType === Type.Timestamp;       }
-    static        isInterval (x: any): x is Interval        { return x && x.TType === Type.Interval;        }
-    static            isList (x: any): x is List            { return x && x.TType === Type.List;            }
-    static          isStruct (x: any): x is Struct          { return x && x.TType === Type.Struct;          }
-    static           isUnion (x: any): x is Union           { return x && x.TType === Type.Union;           }
-    static      isDenseUnion (x: any): x is DenseUnion      { return x && x.TType === Type.DenseUnion;      }
-    static     isSparseUnion (x: any): x is SparseUnion     { return x && x.TType === Type.SparseUnion;     }
-    static isFixedSizeBinary (x: any): x is FixedSizeBinary { return x && x.TType === Type.FixedSizeBinary; }
-    static   isFixedSizeList (x: any): x is FixedSizeList   { return x && x.TType === Type.FixedSizeList;   }
-    static             isMap (x: any): x is Map_            { return x && x.TType === Type.Map;             }
-    static      isDictionary (x: any): x is Dictionary      { return x && x.TType === Type.Dictionary;      }
-
-    constructor(public readonly TType: TType,
-                public readonly children?: Field[]) {}
-    public acceptTypeVisitor(visitor: TypeVisitor): any {
-        return TypeVisitor.visitTypeInline(visitor, this);
-    }
+    /** @nocollapse */ static            isNull (x: any): x is Null            { return x && x.typeId === Type.Null;            }
+    /** @nocollapse */ static             isInt (x: any): x is Int_            { return x && x.typeId === Type.Int;             }
+    /** @nocollapse */ static           isFloat (x: any): x is Float           { return x && x.typeId === Type.Float;           }
+    /** @nocollapse */ static          isBinary (x: any): x is Binary          { return x && x.typeId === Type.Binary;          }
+    /** @nocollapse */ static            isUtf8 (x: any): x is Utf8            { return x && x.typeId === Type.Utf8;            }
+    /** @nocollapse */ static            isBool (x: any): x is Bool            { return x && x.typeId === Type.Bool;            }
+    /** @nocollapse */ static         isDecimal (x: any): x is Decimal         { return x && x.typeId === Type.Decimal;         }
+    /** @nocollapse */ static            isDate (x: any): x is Date_           { return x && x.typeId === Type.Date;            }
+    /** @nocollapse */ static            isTime (x: any): x is Time_           { return x && x.typeId === Type.Time;            }
+    /** @nocollapse */ static       isTimestamp (x: any): x is Timestamp_      { return x && x.typeId === Type.Timestamp;       }
+    /** @nocollapse */ static        isInterval (x: any): x is Interval_       { return x && x.typeId === Type.Interval;        }
+    /** @nocollapse */ static            isList (x: any): x is List            { return x && x.typeId === Type.List;            }
+    /** @nocollapse */ static          isStruct (x: any): x is Struct          { return x && x.typeId === Type.Struct;          }
+    /** @nocollapse */ static           isUnion (x: any): x is Union_          { return x && x.typeId === Type.Union;           }
+    /** @nocollapse */ static isFixedSizeBinary (x: any): x is FixedSizeBinary { return x && x.typeId === Type.FixedSizeBinary; }
+    /** @nocollapse */ static   isFixedSizeList (x: any): x is FixedSizeList   { return x && x.typeId === Type.FixedSizeList;   }
+    /** @nocollapse */ static             isMap (x: any): x is Map_            { return x && x.typeId === Type.Map;             }
+    /** @nocollapse */ static      isDictionary (x: any): x is Dictionary      { return x && x.typeId === Type.Dictionary;      }
+
+    public get children() { return this._children; }
+    public get typeId(): TType { return <any> Type.NONE; }
+
+    constructor(protected _children?: Field<TChildren[keyof TChildren]>[]) {}
+
     protected static [Symbol.toStringTag] = ((proto: DataType) => {
         (<any> proto).ArrayType = Array;
         return proto[Symbol.toStringTag] = 'DataType';
@@ -193,56 +88,85 @@ export abstract class DataType<TType extends Type = any> implements Partial<Visi
 
 export interface Null extends DataType<Type.Null> { TArray: void; TValue: null; }
 export class Null extends DataType<Type.Null> {
-    constructor() {
-        super(Type.Null);
-    }
     public toString() { return `Null`; }
+    public get typeId() { return Type.Null as Type.Null; }
     protected static [Symbol.toStringTag] = ((proto: Null) => {
         return proto[Symbol.toStringTag] = 'Null';
     })(Null.prototype);
 }
 
-export interface Int<TValueType = any, TArrayType extends IntArray = IntArray> extends DataType<Type.Int> { TArray: TArrayType; TValue: TValueType; }
-export class Int<TValueType = any, TArrayType extends IntArray = IntArray> extends DataType<Type.Int> {
-    constructor(public readonly isSigned: boolean,
-                public readonly bitWidth: IntBitWidth) {
-        super(Type.Int);
+/** @ignore */
+type Ints = Type.Int | Type.Int8 | Type.Int16 | Type.Int32 | Type.Int64 | Type.Uint8 | Type.Uint16 | Type.Uint32 | Type.Uint64;
+/** @ignore */
+type IType = {
+    [Type.Int   ]: { bitWidth: IntBitWidth; isSigned: true | false; TArray: IntArray;    TValue: number | Int32Array | Uint32Array; };
+    [Type.Int8  ]: { bitWidth:           8; isSigned: true;         TArray: Int8Array;   TValue: number;            };
+    [Type.Int16 ]: { bitWidth:          16; isSigned: true;         TArray: Int16Array;  TValue: number;            };
+    [Type.Int32 ]: { bitWidth:          32; isSigned: true;         TArray: Int32Array;  TValue: number;            };
+    [Type.Int64 ]: { bitWidth:          64; isSigned: true;         TArray: Int32Array;  TValue: Int32Array;        };
+    [Type.Uint8 ]: { bitWidth:           8; isSigned: false;        TArray: Uint8Array;  TValue: number;            };
+    [Type.Uint16]: { bitWidth:          16; isSigned: false;        TArray: Uint16Array; TValue: number;            };
+    [Type.Uint32]: { bitWidth:          32; isSigned: false;        TArray: Uint32Array; TValue: number;            };
+    [Type.Uint64]: { bitWidth:          64; isSigned: false;        TArray: Uint32Array; TValue: Uint32Array;       };
+};
+
+interface Int_<T extends Ints = Ints> extends DataType<T> { TArray: IType[T]['TArray']; TValue: IType[T]['TValue']; }
+class Int_<T extends Ints = Ints> extends DataType<T> {
+    constructor(protected _isSigned: IType[T]['isSigned'],
+                protected _bitWidth: IType[T]['bitWidth']) {
+        super();
     }
-    public get ArrayType(): TypedArrayConstructor<TArrayType> {
-        switch (this.bitWidth) {
-            case  8: return (this.isSigned ?  Int8Array :  Uint8Array) as any;
-            case 16: return (this.isSigned ? Int16Array : Uint16Array) as any;
-            case 32: return (this.isSigned ? Int32Array : Uint32Array) as any;
-            case 64: return (this.isSigned ? Int32Array : Uint32Array) as any;
+    public get typeId() { return Type.Int as T; }
+    public get isSigned() { return this._isSigned; }
+    public get bitWidth() { return this._bitWidth; }
+    public get ArrayType(): ArrayBufferViewConstructor<IType[T]['TArray']> {
+        switch (this._bitWidth) {
+            case  8: return (this._isSigned ?  Int8Array :  Uint8Array) as any;
+            case 16: return (this._isSigned ? Int16Array : Uint16Array) as any;
+            case 32: return (this._isSigned ? Int32Array : Uint32Array) as any;
+            case 64: return (this._isSigned ? Int32Array : Uint32Array) as any;
         }
         throw new Error(`Unrecognized ${this[Symbol.toStringTag]} type`);
     }
-    public toString() { return `${this.isSigned ? `I` : `Ui`}nt${this.bitWidth}`; }
-    protected static [Symbol.toStringTag] = ((proto: Int) => {
+    public toString() { return `${this._isSigned ? `I` : `Ui`}nt${this._bitWidth}`; }
+    protected static [Symbol.toStringTag] = ((proto: Int_) => {
         return proto[Symbol.toStringTag] = 'Int';
-    })(Int.prototype);
+    })(Int_.prototype);
 }
 
-export class Int8 extends Int<number, Int8Array> { constructor() { super(true, 8); } }
-export class Int16 extends Int<number, Int16Array> { constructor() { super(true, 16); } }
-export class Int32 extends Int<number, Int32Array> { constructor() { super(true, 32); } }
-export class Int64 extends Int<Int32Array, Int32Array> { constructor() { super(true, 64); } }
-export class Uint8 extends Int<number, Uint8Array> { constructor() { super(false, 8); } }
-export class Uint16 extends Int<number, Uint16Array> { constructor() { super(false, 16); } }
-export class Uint32 extends Int<number, Uint32Array> { constructor() { super(false, 32); } }
-export class Uint64 extends Int<Uint32Array, Uint32Array> { constructor() { super(false, 64); } }
-
-export interface Float<TArrayType extends FloatArray = FloatArray> extends DataType<Type.Float> { TArray: TArrayType; TValue: number; }
-export class Float<TArrayType extends FloatArray = FloatArray> extends DataType<Type.Float> {
-    constructor(public readonly precision: Precision) {
-        super(Type.Float);
+export { Int_ as Int };
+
+export class Int8 extends Int_<Type.Int8> { constructor() { super(true, 8); } }
+export class Int16 extends Int_<Type.Int16> { constructor() { super(true, 16); } }
+export class Int32 extends Int_<Type.Int32> { constructor() { super(true, 32); } }
+export class Int64 extends Int_<Type.Int64> { constructor() { super(true, 64); } }
+export class Uint8 extends Int_<Type.Uint8> { constructor() { super(false, 8); } }
+export class Uint16 extends Int_<Type.Uint16> { constructor() { super(false, 16); } }
+export class Uint32 extends Int_<Type.Uint32> { constructor() { super(false, 32); } }
+export class Uint64 extends Int_<Type.Uint64> { constructor() { super(false, 64); } }
+
+/** @ignore */
+type Floats = Type.Float | Type.Float16 | Type.Float32 | Type.Float64;
+/** @ignore */
+type FType = {
+    [Type.Float  ]: { precision: Precision;        TArray: FloatArray;    TValue: number; };
+    [Type.Float16]: { precision: Precision.HALF;   TArray: Uint16Array;   TValue: number; };
+    [Type.Float32]: { precision: Precision.SINGLE; TArray: Float32Array;  TValue: number; };
+    [Type.Float64]: { precision: Precision.DOUBLE; TArray: Float64Array;  TValue: number; };
+};
+
+export interface Float<T extends Floats = Floats> extends DataType<T> { TArray: FType[T]['TArray']; TValue: number; }
+export class Float<T extends Floats = Floats> extends DataType<T> {
+    constructor(protected _precision: Precision) {
+        super();
     }
-    // @ts-ignore
-    public get ArrayType(): TypedArrayConstructor<TArrayType> {
+    public get typeId() { return Type.Float as T; }
+    public get precision() { return this._precision; }
+    public get ArrayType(): ArrayBufferViewConstructor<FType[T]['TArray']> {
         switch (this.precision) {
-            case Precision.HALF: return Uint16Array as any;
-            case Precision.SINGLE: return Float32Array as any;
-            case Precision.DOUBLE: return Float64Array as any;
+            case Precision.HALF: return Uint16Array;
+            case Precision.SINGLE: return Float32Array;
+            case Precision.DOUBLE: return Float64Array;
         }
         throw new Error(`Unrecognized ${this[Symbol.toStringTag]} type`);
     }
@@ -252,15 +176,16 @@ export class Float<TArrayType extends FloatArray = FloatArray> extends DataType<
     })(Float.prototype);
 }
 
-export class Float16 extends Float<Uint16Array> { constructor() { super(Precision.HALF); } }
-export class Float32 extends Float<Float32Array> { constructor() { super(Precision.SINGLE); } }
-export class Float64 extends Float<Float64Array> { constructor() { super(Precision.DOUBLE); } }
+export class Float16 extends Float<Type.Float16> { constructor() { super(Precision.HALF); } }
+export class Float32 extends Float<Type.Float32> { constructor() { super(Precision.SINGLE); } }
+export class Float64 extends Float<Type.Float64> { constructor() { super(Precision.DOUBLE); } }
 
 export interface Binary extends DataType<Type.Binary> { TArray: Uint8Array; TValue: Uint8Array; }
 export class Binary extends DataType<Type.Binary> {
     constructor() {
-        super(Type.Binary);
+        super();
     }
+    public get typeId() { return Type.Binary as Type.Binary; }
     public toString() { return `Binary`; }
     protected static [Symbol.toStringTag] = ((proto: Binary) => {
         (<any> proto).ArrayType = Uint8Array;
@@ -268,11 +193,12 @@ export class Binary extends DataType<Type.Binary> {
     })(Binary.prototype);
 }
 
-export interface Utf8 extends DataType<Type.Utf8> { TArray: Uint8Array; TValue: string; }
+export interface Utf8 extends DataType<Type.Utf8> { TArray: Uint8Array; TValue: string; ArrayType: typeof Uint8Array; }
 export class Utf8 extends DataType<Type.Utf8> {
     constructor() {
-        super(Type.Utf8);
+        super();
     }
+    public get typeId() { return Type.Utf8 as Type.Utf8; }
     public toString() { return `Utf8`; }
     protected static [Symbol.toStringTag] = ((proto: Utf8) => {
         (<any> proto).ArrayType = Uint8Array;
@@ -280,11 +206,12 @@ export class Utf8 extends DataType<Type.Utf8> {
     })(Utf8.prototype);
 }
 
-export interface Bool extends DataType<Type.Bool> { TArray: Uint8Array; TValue: boolean; }
+export interface Bool extends DataType<Type.Bool> { TArray: Uint8Array; TValue: boolean; ArrayType: typeof Uint8Array; }
 export class Bool extends DataType<Type.Bool> {
     constructor() {
-        super(Type.Bool);
+        super();
     }
+    public get typeId() { return Type.Bool as Type.Bool; }
     public toString() { return `Bool`; }
     protected static [Symbol.toStringTag] = ((proto: Bool) => {
         (<any> proto).ArrayType = Uint8Array;
@@ -292,137 +219,201 @@ export class Bool extends DataType<Type.Bool> {
     })(Bool.prototype);
 }
 
-export interface Decimal extends DataType<Type.Decimal> { TArray: Uint32Array; TValue: Uint32Array; }
+export interface Decimal extends DataType<Type.Decimal> { TArray: Uint32Array; TValue: Uint32Array; ArrayType: typeof Uint32Array; }
 export class Decimal extends DataType<Type.Decimal> {
-    constructor(public readonly scale: number,
-                public readonly precision: number) {
-        super(Type.Decimal);
+    constructor(protected _scale: number,
+                protected _precision: number) {
+        super();
     }
-    public toString() { return `Decimal[${this.precision}e${this.scale > 0 ? `+` : ``}${this.scale}]`; }
+    public get typeId() { return Type.Decimal as Type.Decimal; }
+    public get scale() { return this._scale; }
+    public get precision() { return this._precision; }
+    public toString() { return `Decimal[${this._precision}e${this._scale > 0 ? `+` : ``}${this._scale}]`; }
     protected static [Symbol.toStringTag] = ((proto: Decimal) => {
         (<any> proto).ArrayType = Uint32Array;
         return proto[Symbol.toStringTag] = 'Decimal';
     })(Decimal.prototype);
 }
 
-/* tslint:disable:class-name */
-export interface Date_ extends DataType<Type.Date> { TArray: Int32Array; TValue: Date; }
-export class Date_ extends DataType<Type.Date> {
-    constructor(public readonly unit: DateUnit) {
-        super(Type.Date);
+/** @ignore */
+export type Dates = Type.Date | Type.DateDay | Type.DateMillisecond;
+export interface Date_<T extends Dates = Dates> extends DataType<T> { TArray: Int32Array; TValue: Date; ArrayType: typeof Int32Array; }
+export class Date_<T extends Dates = Dates> extends DataType<T> {
+    constructor(protected _unit: DateUnit) {
+        super();
     }
-    public toString() { return `Date${(this.unit + 1) * 32}<${DateUnit[this.unit]}>`; }
+    public get typeId() { return Type.Date as T; }
+    public get unit() { return this._unit; }
+    public toString() { return `Date${(this._unit + 1) * 32}<${DateUnit[this._unit]}>`; }
     protected static [Symbol.toStringTag] = ((proto: Date_) => {
         (<any> proto).ArrayType = Int32Array;
         return proto[Symbol.toStringTag] = 'Date';
     })(Date_.prototype);
 }
 
-export interface Time extends DataType<Type.Time> { TArray: Uint32Array; TValue: number; }
-export class Time extends DataType<Type.Time> {
-    constructor(public readonly unit: TimeUnit,
-                public readonly bitWidth: TimeBitWidth) {
-        super(Type.Time);
+export class DateDay extends Date_<Type.DateDay> { constructor() { super(DateUnit.DAY); } }
+export class DateMillisecond extends Date_<Type.DateMillisecond> { constructor() { super(DateUnit.MILLISECOND); } }
+
+/** @ignore */
+type Times = Type.Time | Type.TimeSecond | Type.TimeMillisecond | Type.TimeMicrosecond | Type.TimeNanosecond;
+/** @ignore */
+type TimesType = {
+    [Type.Time           ]: { unit: TimeUnit;             TValue: number | Int32Array };
+    [Type.TimeSecond     ]: { unit: TimeUnit.SECOND;      TValue: number;             };
+    [Type.TimeMillisecond]: { unit: TimeUnit.MILLISECOND; TValue: number;             };
+    [Type.TimeMicrosecond]: { unit: TimeUnit.MICROSECOND; TValue: Int32Array;         };
+    [Type.TimeNanosecond ]: { unit: TimeUnit.NANOSECOND;  TValue: Int32Array;         };
+};
+
+interface Time_<T extends Times = Times> extends DataType<T> { TArray: Int32Array; TValue: TimesType[T]['TValue']; ArrayType: typeof Int32Array; }
+class Time_<T extends Times = Times> extends DataType<T> {
+    constructor(protected _unit: TimesType[T]['unit'],
+                protected _bitWidth: TimeBitWidth) {
+        super();
     }
-    public toString() { return `Time${this.bitWidth}<${TimeUnit[this.unit]}>`; }
-    protected static [Symbol.toStringTag] = ((proto: Time) => {
-        (<any> proto).ArrayType = Uint32Array;
+    public get typeId() { return Type.Time as T; }
+    public get unit() { return this._unit; }
+    public get bitWidth() { return this._bitWidth; }
+    public toString() { return `Time${this._bitWidth}<${TimeUnit[this._unit]}>`; }
+    protected static [Symbol.toStringTag] = ((proto: Time_) => {
+        (<any> proto).ArrayType = Int32Array;
         return proto[Symbol.toStringTag] = 'Time';
-    })(Time.prototype);
+    })(Time_.prototype);
 }
 
-export interface Timestamp extends DataType<Type.Timestamp> { TArray: Int32Array; TValue: number; }
-export class Timestamp extends DataType<Type.Timestamp> {
-    constructor(public unit: TimeUnit, public timezone?: string | null) {
-        super(Type.Timestamp);
+export { Time_ as Time };
+
+export class TimeSecond extends Time_<Type.TimeSecond> { constructor() { super(TimeUnit.SECOND, 32); } }
+export class TimeMillisecond extends Time_<Type.TimeMillisecond> { constructor() { super(TimeUnit.MILLISECOND, 32); } }
+export class TimeMicrosecond extends Time_<Type.TimeMicrosecond> { constructor() { super(TimeUnit.MICROSECOND, 64); } }
+export class TimeNanosecond extends Time_<Type.TimeNanosecond> { constructor() { super(TimeUnit.NANOSECOND, 64); } }
+
+/** @ignore */
+type Timestamps = Type.Timestamp | Type.TimestampSecond | Type.TimestampMillisecond | Type.TimestampMicrosecond | Type.TimestampNanosecond;
+interface Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> { TArray: Int32Array; TValue: number; ArrayType: typeof Int32Array; }
+class Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> {
+    constructor(protected _unit: TimeUnit, protected _timezone?: string | null) {
+        super();
     }
-    public toString() { return `Timestamp<${TimeUnit[this.unit]}${this.timezone ? `, ${this.timezone}` : ``}>`; }
-    protected static [Symbol.toStringTag] = ((proto: Timestamp) => {
+    public get typeId() { return Type.Timestamp as T; }
+    public get unit() { return this._unit; }
+    public get timezone() { return this._timezone; }
+    public toString() { return `Timestamp<${TimeUnit[this._unit]}${this._timezone ? `, ${this._timezone}` : ``}>`; }
+    protected static [Symbol.toStringTag] = ((proto: Timestamp_) => {
         (<any> proto).ArrayType = Int32Array;
         return proto[Symbol.toStringTag] = 'Timestamp';
-    })(Timestamp.prototype);
+    })(Timestamp_.prototype);
 }
 
-export interface Interval extends DataType<Type.Interval> { TArray: Int32Array; TValue: Int32Array; }
-export class Interval extends DataType<Type.Interval> {
-    constructor(public unit: IntervalUnit) {
-        super(Type.Interval);
+export { Timestamp_ as Timestamp };
+
+export class TimestampSecond extends Timestamp_<Type.TimestampSecond> { constructor(timezone?: string | null) { super(TimeUnit.SECOND, timezone); } }
+export class TimestampMillisecond extends Timestamp_<Type.TimestampMillisecond> { constructor(timezone?: string | null) { super(TimeUnit.MILLISECOND, timezone); } }
+export class TimestampMicrosecond extends Timestamp_<Type.TimestampMicrosecond> { constructor(timezone?: string | null) { super(TimeUnit.MICROSECOND, timezone); } }
+export class TimestampNanosecond extends Timestamp_<Type.TimestampNanosecond> { constructor(timezone?: string | null) { super(TimeUnit.NANOSECOND, timezone); } }
+
+/** @ignore */
+type Intervals = Type.Interval | Type.IntervalDayTime | Type.IntervalYearMonth;
+interface Interval_<T extends Intervals = Intervals> extends DataType<T> { TArray: Int32Array; TValue: Int32Array; ArrayType: typeof Int32Array; }
+class Interval_<T extends Intervals = Intervals> extends DataType<T> {
+    constructor(protected _unit: IntervalUnit) {
+        super();
     }
-    public toString() { return `Interval<${IntervalUnit[this.unit]}>`; }
-    protected static [Symbol.toStringTag] = ((proto: Interval) => {
+    public get typeId() { return Type.Interval as T; }
+    public get unit() { return this._unit; }
+    public toString() { return `Interval<${IntervalUnit[this._unit]}>`; }
+    protected static [Symbol.toStringTag] = ((proto: Interval_) => {
         (<any> proto).ArrayType = Int32Array;
         return proto[Symbol.toStringTag] = 'Interval';
-    })(Interval.prototype);
+    })(Interval_.prototype);
 }
 
-export interface List<T extends DataType = any> extends DataType<Type.List>  { TArray: any; TValue: Vector<T>; }
-export class List<T extends DataType = any> extends DataType<Type.List> {
-    constructor(public children: Field[]) {
-        super(Type.List, children);
+export { Interval_ as Interval };
+
+export class IntervalDayTime extends Interval_<Type.IntervalDayTime> { constructor() { super(IntervalUnit.DAY_TIME); } }
+export class IntervalYearMonth extends Interval_<Type.IntervalYearMonth> { constructor() { super(IntervalUnit.YEAR_MONTH); } }
+
+export interface List<T extends DataType = any> extends DataType<Type.List, { [0]: T }>  { TArray: IterableArrayLike<T>; TValue: VType<T>; }
+export class List<T extends DataType = any> extends DataType<Type.List, { [0]: T }> {
+    constructor(child: Field<T>) {
+        super([child]);
     }
+    public get typeId() { return Type.List as Type.List; }
+    // @ts-ignore
+    protected _children: Field<T>[];
     public toString() { return `List<${this.valueType}>`; }
-    public get ArrayType() { return this.valueType.ArrayType; }
-    public get valueType() { return this.children[0].type as T; }
-    public get valueField() { return this.children[0] as Field<T>; }
+    public get children() { return this._children; }
+    public get valueType(): T { return this._children[0].type as T; }
+    public get valueField(): Field<T> { return this._children[0] as Field<T>; }
+    public get ArrayType(): T['ArrayType'] { return this.valueType.ArrayType; }
     protected static [Symbol.toStringTag] = ((proto: List) => {
         return proto[Symbol.toStringTag] = 'List';
     })(List.prototype);
 }
 
-export type StructData = {[name: string]: DataType}
-export type StructValue<T extends StructData> = {
-    [P in keyof T]: T[P]['TValue'];
-}
-export interface Struct<T extends StructData = StructData> extends DataType<Type.Struct> { TArray: any; TValue: StructValue<T> & View<any>; }
-export class Struct<T extends StructData = StructData> extends DataType<Type.Struct> {
-    constructor(public children: Field[]) {
-        super(Type.Struct, children);
+export interface Struct<T extends { [key: string]: DataType; } = any> extends DataType<Type.Struct> { TArray: IterableArrayLike<RowLike<T>>; TValue: RowLike<T>; dataTypes: T; }
+export class Struct<T extends { [key: string]: DataType; } = any> extends DataType<Type.Struct, T> {
+    constructor(protected _children: Field<T[keyof T]>[]) {
+        super(_children);
     }
-    public toString() { return `Struct<${this.children.map((f) => f.type).join(`, `)}>`; }
+    public get typeId() { return Type.Struct as Type.Struct; }
+    public get children() { return this._children; }
+    public toString() { return `Struct<[${this._children.map((f) => f.type).join(`, `)}]>`; }
     protected static [Symbol.toStringTag] = ((proto: Struct) => {
         return proto[Symbol.toStringTag] = 'Struct';
     })(Struct.prototype);
 }
 
-export interface Union<TType extends Type = any> extends DataType<TType> { TArray: Int8Array; TValue: any; }
-export class Union<TType extends Type = any> extends DataType<TType> {
-    constructor(public readonly mode: UnionMode,
-                public readonly typeIds: ArrowType[],
-                public readonly children: Field[]) {
-        super(<TType> Type.Union, children);
+/** @ignore */
+type Unions = Type.Union | Type.DenseUnion | Type.SparseUnion;
+interface Union_<T extends Unions = Unions> extends DataType<T> { TArray: Int32Array; TValue: any[]; }
+class Union_<T extends Unions = Unions> extends DataType<T> {
+    protected _typeIds: Int32Array;
+    protected _children: Field<any>[];
+    protected _typeIdToChildIndex: { [key: number]: number };
+    constructor(protected _mode: UnionMode, _typeIds: number[] | Int32Array, _children: Field<any>[]) {
+        super(_children);
+        this._children = _children;
+        this._typeIds = _typeIds = Int32Array.from(_typeIds);
+        this._typeIdToChildIndex = _typeIds.reduce((typeIdToChildIndex, typeId, idx) => {
+            return (typeIdToChildIndex[typeId] = idx) && typeIdToChildIndex || typeIdToChildIndex;
+        }, Object.create(null) as { [key: number]: number });
     }
+    public get typeId() { return Type.Union as T; }
+    public get mode() { return this._mode; }
+    public get typeIds() { return this._typeIds; }
+    public get children() { return this._children; }
+    public get typeIdToChildIndex() { return this._typeIdToChildIndex; }
     public toString() { return `${this[Symbol.toStringTag]}<${
-        this.children.map((x) => `${x.type}`).join(` | `)
+        this._children.map((x) => `${x.type}`).join(` | `)
     }>`; }
-    protected static [Symbol.toStringTag] = ((proto: Union) => {
-        (<any> proto).ArrayType = Int8Array;
+    protected static [Symbol.toStringTag] = ((proto: Union_) => {
+        (<any> proto).ArrayType = Int32Array;
         return proto[Symbol.toStringTag] = 'Union';
-    })(Union.prototype);
+    })(Union_.prototype);
 }
 
-export class DenseUnion extends Union<Type.DenseUnion> {
-    constructor(typeIds: ArrowType[], children: Field[]) {
+export { Union_ as Union };
+
+export class DenseUnion extends Union_<Type.DenseUnion> {
+    constructor(typeIds: number[] | Int32Array, children: Field[]) {
         super(UnionMode.Dense, typeIds, children);
     }
-    protected static [Symbol.toStringTag] = ((proto: DenseUnion) => {
-        return proto[Symbol.toStringTag] = 'DenseUnion';
-    })(DenseUnion.prototype);
 }
 
-export class SparseUnion extends Union<Type.SparseUnion> {
-    constructor(typeIds: ArrowType[], children: Field[]) {
+export class SparseUnion extends Union_<Type.SparseUnion> {
+    constructor(typeIds: number[] | Int32Array, children: Field[]) {
         super(UnionMode.Sparse, typeIds, children);
     }
-    protected static [Symbol.toStringTag] = ((proto: SparseUnion) => {
-        return proto[Symbol.toStringTag] = 'SparseUnion';
-    })(SparseUnion.prototype);
 }
 
-export interface FixedSizeBinary extends DataType<Type.FixedSizeBinary> { TArray: Uint8Array; TValue: Uint8Array; }
+export interface FixedSizeBinary extends DataType<Type.FixedSizeBinary> { TArray: Uint8Array; TValue: Uint8Array; ArrayType: typeof Uint8Array; }
 export class FixedSizeBinary extends DataType<Type.FixedSizeBinary> {
-    constructor(public readonly byteWidth: number) {
-        super(Type.FixedSizeBinary);
+    constructor(protected _byteWidth: number) {
+        super();
     }
+    public get typeId() { return Type.FixedSizeBinary as Type.FixedSizeBinary; }
+    public get byteWidth() { return this._byteWidth; }
     public toString() { return `FixedSizeBinary[${this.byteWidth}]`; }
     protected static [Symbol.toStringTag] = ((proto: FixedSizeBinary) => {
         (<any> proto).ArrayType = Uint8Array;
@@ -430,105 +421,81 @@ export class FixedSizeBinary extends DataType<Type.FixedSizeBinary> {
     })(FixedSizeBinary.prototype);
 }
 
-export interface FixedSizeList<T extends DataType = any> extends DataType<Type.FixedSizeList> { TArray: any; TValue: Vector<T>; }
-export class FixedSizeList<T extends DataType = any> extends DataType<Type.FixedSizeList> {
-    constructor(public readonly listSize: number,
-                public readonly children: Field[]) {
-        super(Type.FixedSizeList, children);
+export interface FixedSizeList<T extends DataType = any> extends DataType<Type.FixedSizeList> { TArray: IterableArrayLike<T['TArray']>; TValue: VType<T>; }
+export class FixedSizeList<T extends DataType = any> extends DataType<Type.FixedSizeList, { [0]: T }> {
+    constructor(protected _listSize: number, child: Field<T>) {
+        super([child]);
     }
-    public get ArrayType() { return this.valueType.ArrayType; }
-    public get valueType() { return this.children[0].type as T; }
-    public get valueField() { return this.children[0] as Field<T>; }
-    public toString() { return `FixedSizeList[${this.listSize}]<${this.valueType}>`; }
+    public get typeId() { return Type.FixedSizeList as Type.FixedSizeList; }
+    // @ts-ignore
+    protected _children: Field<T>[];
+    public get listSize() { return this._listSize; }
+    public get children() { return this._children; }
+    public get valueType(): T { return this.children[0].type as T; }
+    public get valueField(): Field<T> { return this.children[0] as Field<T>; }
+    public get ArrayType(): T['ArrayType'] { return this.valueType.ArrayType; }
+    public toString() { return `FixedSizeList[${this._listSize}]<${this.valueType}>`; }
     protected static [Symbol.toStringTag] = ((proto: FixedSizeList) => {
         return proto[Symbol.toStringTag] = 'FixedSizeList';
     })(FixedSizeList.prototype);
 }
 
-/* tslint:disable:class-name */
-export interface Map_ extends DataType<Type.Map> { TArray: Uint8Array; TValue: View<any>; }
-export class Map_ extends DataType<Type.Map> {
-    constructor(public readonly keysSorted: boolean,
-                public readonly children: Field[]) {
-        super(Type.Map, children);
+export interface Map_<T extends { [key: string]: DataType; } = any> extends DataType<Type.Map> { TArray: Uint8Array; TValue: RowLike<T>; dataTypes: T; }
+export class Map_<T extends { [key: string]: DataType; } = any> extends DataType<Type.Map, T> {
+    constructor(protected _children: Field<T[keyof T]>[],
+                protected _keysSorted: boolean = false) {
+        super(_children);
     }
-    public toString() { return `Map<${this.children.join(`, `)}>`; }
+    public get typeId() { return Type.Map as Type.Map; }
+    public get children() { return this._children; }
+    public get keysSorted() { return this._keysSorted; }
+    public toString() { return `Map<{${this._children.map((f) => `${f.name}:${f.type}`).join(`, `)}}>`; }
     protected static [Symbol.toStringTag] = ((proto: Map_) => {
         return proto[Symbol.toStringTag] = 'Map_';
     })(Map_.prototype);
 }
 
-export interface Dictionary<T extends DataType = any> extends DataType<Type.Dictionary> { TArray: T['TArray']; TValue: T['TValue']; }
-export class Dictionary<T extends DataType> extends DataType<Type.Dictionary> {
-    public readonly id: number;
-    public readonly dictionary: T;
-    public readonly indices: Int<any>;
-    public readonly isOrdered: boolean;
-    constructor(dictionary: T, indices: Int<any>, id?: Long | number | null, isOrdered?: boolean | null) {
-        super(Type.Dictionary);
-        this.indices = indices;
-        this.dictionary = dictionary;
-        this.isOrdered = isOrdered || false;
-        this.id = id == null ? DictionaryBatch.getId() : typeof id === 'number' ? id : id.low;
+/** @ignore */
+const getId = ((atomicDictionaryId) => () => ++atomicDictionaryId)(-1);
+
+/** @ignore */
+export type TKeys = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32;
+
+export interface Dictionary<T extends DataType = any, TKey extends TKeys = TKeys> extends DataType<Type.Dictionary> { TArray: TKey['TArray']; TValue: T['TValue']; }
+export class Dictionary<T extends DataType = any, TKey extends TKeys = TKeys> extends DataType<Type.Dictionary> {
+    protected _id: number;
+    protected _indices: TKey;
+    protected _dictionary: T;
+    protected _isOrdered: boolean;
+    protected _dictionaryVector: Vector<T>;
+    public set dictionaryVector(v) { this._dictionaryVector = v; }
+    public get dictionaryVector() { return this._dictionaryVector; }
+    constructor(dictionary: T, indices: TKey, id?: Long | number | null, isOrdered?: boolean | null, dictionaryVector?: Vector<T>) {
+        super();
+        this._indices = indices;
+        this._dictionary = dictionary;
+        this._isOrdered = isOrdered || false;
+        this._dictionaryVector = dictionaryVector!;
+        this._id = id == null ? getId() : typeof id === 'number' ? id : id.low;
     }
-    public get ArrayType() { return this.dictionary.ArrayType; }
+    public get typeId() { return Type.Dictionary as Type.Dictionary; }
+    public get id() { return this._id; }
+    public get indices() { return this._indices; }
+    public get dictionary() { return this._dictionary; }
+    public get isOrdered() { return this._isOrdered; }
+    public set children(_: T['children']) {}
+    public get children() { return this.dictionary.children; }
+    public get valueType(): T { return this.dictionary as T; }
+    public get ArrayType(): T['ArrayType'] { return this.dictionary.ArrayType; }
     public toString() { return `Dictionary<${this.indices}, ${this.dictionary}>`; }
     protected static [Symbol.toStringTag] = ((proto: Dictionary) => {
         return proto[Symbol.toStringTag] = 'Dictionary';
     })(Dictionary.prototype);
 }
-export interface IterableArrayLike<T = any> extends ArrayLike<T>, Iterable<T> {}
-
-export interface TypedArrayConstructor<T extends TypedArray = TypedArray> {
-    readonly prototype: T;
-    readonly BYTES_PER_ELEMENT: number;
-    new (length: number): T;
-    new (elements: Iterable<number>): T;
-    new (arrayOrArrayBuffer: ArrayLike<number> | ArrayBufferLike): T;
-    new (buffer: ArrayBufferLike, byteOffset: number, length?: number): T;
-    of(...items: number[]): T;
-    from(arrayLike: ArrayLike<number> | Iterable<number>, mapfn?: (v: number, k: number) => number, thisArg?: any): T;
-}
 
+/** @ignore */
+export interface IterableArrayLike<T = any> extends ArrayLike<T>, Iterable<T> {}
+/** @ignore */
 export type FloatArray = Uint16Array | Float32Array | Float64Array;
+/** @ignore */
 export type IntArray = Int8Array | Int16Array | Int32Array | Uint8Array | Uint16Array | Uint32Array;
-
-export interface TypedArray extends Iterable<number> {
-    [index: number]: number;
-    readonly length: number;
-    readonly byteLength: number;
-    readonly byteOffset: number;
-    readonly buffer: ArrayBufferLike;
-    readonly BYTES_PER_ELEMENT: number;
-    [Symbol.toStringTag]: any;
-    [Symbol.iterator](): IterableIterator<number>;
-    entries(): IterableIterator<[number, number]>;
-    keys(): IterableIterator<number>;
-    values(): IterableIterator<number>;
-    copyWithin(target: number, start: number, end?: number): this;
-    every(callbackfn: (value: number, index: number, array: TypedArray) => boolean, thisArg?: any): boolean;
-    fill(value: number, start?: number, end?: number): this;
-    filter(callbackfn: (value: number, index: number, array: TypedArray) => any, thisArg?: any): TypedArray;
-    find(predicate: (value: number, index: number, obj: TypedArray) => boolean, thisArg?: any): number | undefined;
-    findIndex(predicate: (value: number, index: number, obj: TypedArray) => boolean, thisArg?: any): number;
-    forEach(callbackfn: (value: number, index: number, array: TypedArray) => void, thisArg?: any): void;
-    includes(searchElement: number, fromIndex?: number): boolean;
-    indexOf(searchElement: number, fromIndex?: number): number;
-    join(separator?: string): string;
-    lastIndexOf(searchElement: number, fromIndex?: number): number;
-    map(callbackfn: (value: number, index: number, array: TypedArray) => number, thisArg?: any): TypedArray;
-    reduce(callbackfn: (previousValue: number, currentValue: number, currentIndex: number, array: TypedArray) => number): number;
-    reduce(callbackfn: (previousValue: number, currentValue: number, currentIndex: number, array: TypedArray) => number, initialValue: number): number;
-    reduce<U>(callbackfn: (previousValue: U, currentValue: number, currentIndex: number, array: TypedArray) => U, initialValue: U): U;
-    reduceRight(callbackfn: (previousValue: number, currentValue: number, currentIndex: number, array: TypedArray) => number): number;
-    reduceRight(callbackfn: (previousValue: number, currentValue: number, currentIndex: number, array: TypedArray) => number, initialValue: number): number;
-    reduceRight<U>(callbackfn: (previousValue: U, currentValue: number, currentIndex: number, array: TypedArray) => U, initialValue: U): U;
-    reverse(): TypedArray;
-    set(array: ArrayLike<number>, offset?: number): void;
-    slice(start?: number, end?: number): TypedArray;
-    some(callbackfn: (value: number, index: number, array: TypedArray) => boolean, thisArg?: any): boolean;
-    sort(compareFn?: (a: number, b: number) => number): this;
-    subarray(begin: number, end?: number): TypedArray;
-    toLocaleString(): string;
-    toString(): string;
-}
diff --git a/js/src/util/bit.ts b/js/src/util/bit.ts
index 2308bf6a2e03c..a87078f40a277 100644
--- a/js/src/util/bit.ts
+++ b/js/src/util/bit.ts
@@ -15,30 +15,39 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { TypedArray } from '../type';
-
-export function align(value: number, alignment: number) {
-    return value + padding(value, alignment);
-}
-
-export function padding(value: number, alignment: number) {
-    return (value % alignment === 0 ? 0 : alignment - value % alignment);
-}
-
+/** @ignore */
 export function getBool(_data: any, _index: number, byte: number, bit: number) {
     return (byte & 1 << bit) !== 0;
 }
 
+/** @ignore */
 export function getBit(_data: any, _index: number, byte: number, bit: number): 0 | 1 {
     return (byte & 1 << bit) >> bit as (0 | 1);
 }
 
+/** @ignore */
 export function setBool(bytes: Uint8Array, index: number, value: any) {
     return value ?
         !!(bytes[index >> 3] |=  (1 << (index % 8))) || true :
         !(bytes[index >> 3] &= ~(1 << (index % 8))) && false ;
 }
 
+/** @ignore */
+export function truncateBitmap(offset: number, length: number, bitmap: Uint8Array) {
+    const alignedSize = (bitmap.byteLength + 7) & ~7;
+    if (offset > 0 || bitmap.byteLength < alignedSize) {
+        const bytes = new Uint8Array(alignedSize);
+        bytes.set((offset % 8 === 0)
+            // If the offset is a multiple of 8 bits, it's safe to slice the bitmap
+            ? bitmap.subarray(offset >> 3)
+            // Otherwise iterate each bit from the offset and return a new one
+            : packBools(iterateBits(bitmap, offset, length, null, getBool)));
+        return bytes;
+    }
+    return bitmap;
+}
+
+/** @ignore */
 export function packBools(values: Iterable<any>) {
     let n = 0, i = 0;
     let xs: number[] = [];
@@ -57,6 +66,7 @@ export function packBools(values: Iterable<any>) {
     return new Uint8Array(xs);
 }
 
+/** @ignore */
 export function* iterateBits<T>(bytes: Uint8Array, begin: number, length: number, context: any,
                                 get: (context: any, index: number, byte: number, bit: number) => T) {
     let bit = begin % 8;
@@ -76,6 +86,7 @@ export function* iterateBits<T>(bytes: Uint8Array, begin: number, length: number
  * @param lhs The range's left-hand side (or start) bit
  * @param rhs The range's right-hand side (or end) bit
  */
+/** @ignore */
 export function popcnt_bit_range(data: Uint8Array, lhs: number, rhs: number): number {
     if (rhs - lhs <= 0) { return 0; }
     // If the bit range is less than one byte, sum the 1 bits in the bit range
@@ -100,7 +111,8 @@ export function popcnt_bit_range(data: Uint8Array, lhs: number, rhs: number): nu
     );
 }
 
-export function popcnt_array(arr: TypedArray, byteOffset?: number, byteLength?: number) {
+/** @ignore */
+export function popcnt_array(arr: ArrayBufferView, byteOffset?: number, byteLength?: number) {
     let cnt = 0, pos = byteOffset! | 0;
     const view = new DataView(arr.buffer, arr.byteOffset, arr.byteLength);
     const len =  byteLength === void 0 ? arr.byteLength : pos + byteLength;
@@ -119,6 +131,7 @@ export function popcnt_array(arr: TypedArray, byteOffset?: number, byteLength?:
     return cnt;
 }
 
+/** @ignore */
 export function popcnt_uint32(uint32: number): number {
     let i = uint32 | 0;
     i = i - ((i >>> 1) & 0x55555555);
diff --git a/js/src/util/bn.ts b/js/src/util/bn.ts
new file mode 100644
index 0000000000000..74e3e82cea7e9
--- /dev/null
+++ b/js/src/util/bn.ts
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { toArrayBufferView, ArrayBufferViewInput } from './buffer';
+
+/** @ignore */
+type BigNumArray = IntArray | UintArray;
+/** @ignore */
+type IntArray = Int8Array | Int16Array | Int32Array;
+/** @ignore */
+type UintArray = Uint8Array | Uint16Array | Uint32Array | Uint8ClampedArray;
+
+/** @ignore */
+const BigNumNMixin = {
+    toJSON(this: BN<BigNumArray>, ) { return `"${bignumToString(this)}"`; },
+    valueOf(this: BN<BigNumArray>, ) { return bignumToNumber(this); },
+    toString(this: BN<BigNumArray>, ) { return bignumToString(this); },
+    [Symbol.toPrimitive]<T extends BN<BigNumArray>>(this: T, hint: 'string' | 'number' | 'default') {
+        if (hint === 'number') { return bignumToNumber(this); }
+        /** @suppress {missingRequire} */
+        return hint === 'string' || typeof BigInt !== 'function' ?
+            bignumToString(this) : BigInt(bignumToString(this));
+    }
+};
+
+/** @ignore */
+const SignedBigNumNMixin: any = Object.assign({}, BigNumNMixin, { signed: true, constructor: undefined });
+/** @ignore */
+const UnsignedBigNumNMixin: any = Object.assign({}, BigNumNMixin, { signed: false, constructor: undefined });
+
+/** @ignore */
+export class BN<T extends BigNumArray> {
+    public static new<T extends BigNumArray>(input: ArrayBufferViewInput, signed?: boolean): T;
+    /** @nocollapse */
+    public static new<T extends BigNumArray>(input: ArrayBufferViewInput, signed = (input instanceof Int8Array || input instanceof Int16Array || input instanceof Int32Array)): T {
+        return (signed === true) ? BN.signed(input) as T : BN.unsigned(input) as T;
+    }
+    /** @nocollapse */
+    public static signed<T extends IntArray>(input: ArrayBufferViewInput): T {
+        const Ctor: any = ArrayBuffer.isView(input) ? <any> input.constructor : Int32Array;
+        const { buffer, byteOffset, length } = toArrayBufferView<T>(<any> Ctor, input) as T;
+        const bn = new Ctor(buffer, byteOffset, length);
+        return Object.assign(bn, SignedBigNumNMixin);
+    }
+    /** @nocollapse */
+    public static unsigned<T extends UintArray>(input: ArrayBufferViewInput): T {
+        const Ctor: any = ArrayBuffer.isView(input) ? <any> input.constructor : Uint32Array;
+        const { buffer, byteOffset, length } = toArrayBufferView<T>(<any> Ctor, input) as T;
+        const bn = new Ctor(buffer, byteOffset, length);
+        return Object.assign(bn, UnsignedBigNumNMixin);
+    }
+    constructor(input: ArrayBufferViewInput, signed = input instanceof Int32Array) {
+        return BN.new(input, signed) as any;
+    }
+}
+
+/** @ignore */
+export interface BN<T extends BigNumArray> extends TypedArrayLike<T> {
+
+    new<T extends ArrayBufferViewInput>(buffer: T, signed?: boolean): T;
+
+    readonly signed: boolean;
+
+    [Symbol.toStringTag]:
+        'Int8Array'         |
+        'Int16Array'        |
+        'Int32Array'        |
+        'Uint8Array'        |
+        'Uint16Array'       |
+        'Uint32Array'       |
+        'Uint8ClampedArray';
+
+    /**
+     * Convert the bytes to their (positive) decimal representation for printing
+     */
+    toString(): string;
+    /**
+     * Down-convert the bytes to a 53-bit precision integer. Invoked by JS for
+     * arithmatic operators, like `+`. Easy (and unsafe) way to convert BN to
+     * number via `+bn_inst`
+     */
+    valueOf(): number;
+    /**
+     * Return the JSON representation of the bytes. Must be wrapped in double-quotes,
+     * so it's compatible with JSON.stringify().
+     */
+    toJSON(): string;
+    [Symbol.toPrimitive](hint: any): number | string | bigint;
+}
+
+/** @ignore */
+function bignumToNumber<T extends BN<BigNumArray>>({ buffer, byteOffset, length }: T) {
+    let int64 = 0;
+    let words = new Uint32Array(buffer, byteOffset, length);
+    for (let i = 0, n = words.length; i < n;) {
+        int64 += words[i++] + (words[i++] * (i ** 32));
+        // int64 += (words[i++] >>> 0) + (words[i++] * (i ** 32));
+    }
+    return int64;
+}
+
+/** @ignore */
+function bignumToString<T extends BN<BigNumArray>>({ buffer, byteOffset, length }: T) {
+
+    let string = '', i = -1;
+    let base64 = new Uint32Array(2);
+    let base32 = new Uint16Array(buffer, byteOffset, length * 2);
+    let checks = new Uint32Array((base32 = new Uint16Array(base32).reverse()).buffer);
+    let n = base32.length - 1;
+
+    do {
+        for (base64[0] = base32[i = 0]; i < n;) {
+            base32[i++] = base64[1] = base64[0] / 10;
+            base64[0] = ((base64[0] - base64[1] * 10) << 16) + base32[i];
+        }
+        base32[i] = base64[1] = base64[0] / 10;
+        base64[0] = base64[0] - base64[1] * 10;
+        string = `${base64[0]}${string}`;
+    } while (checks[0] || checks[1] || checks[2] || checks[3]);
+
+    return string ? string : `0`;
+}
+
+/** @ignore */
+interface TypedArrayLike<T extends BigNumArray> {
+
+    readonly length: number;
+    readonly buffer: ArrayBuffer;
+    readonly byteLength: number;
+    readonly byteOffset: number;
+    readonly BYTES_PER_ELEMENT: number;
+
+    includes(searchElement: number, fromIndex?: number | undefined): boolean;
+    copyWithin(target: number, start: number, end?: number | undefined): this;
+    every(callbackfn: (value: number, index: number, array: T) => boolean, thisArg?: any): boolean;
+    fill(value: number, start?: number | undefined, end?: number | undefined): this;
+    filter(callbackfn: (value: number, index: number, array: T) => boolean, thisArg?: any): T;
+    find(predicate: (value: number, index: number, obj: T) => boolean, thisArg?: any): number | undefined;
+    findIndex(predicate: (value: number, index: number, obj: T) => boolean, thisArg?: any): number;
+    forEach(callbackfn: (value: number, index: number, array: T) => void, thisArg?: any): void;
+    indexOf(searchElement: number, fromIndex?: number | undefined): number;
+    join(separator?: string | undefined): string;
+    lastIndexOf(searchElement: number, fromIndex?: number | undefined): number;
+    map(callbackfn: (value: number, index: number, array: T) => number, thisArg?: any): T;
+    reduce(callbackfn: (previousValue: number, currentValue: number, currentIndex: number, array: T) => number): number;
+    reduce(callbackfn: (previousValue: number, currentValue: number, currentIndex: number, array: T) => number, initialValue: number): number;
+    reduce<U>(callbackfn: (previousValue: U, currentValue: number, currentIndex: number, array: T) => U, initialValue: U): U;
+    reduceRight(callbackfn: (previousValue: number, currentValue: number, currentIndex: number, array: T) => number): number;
+    reduceRight(callbackfn: (previousValue: number, currentValue: number, currentIndex: number, array: T) => number, initialValue: number): number;
+    reduceRight<U>(callbackfn: (previousValue: U, currentValue: number, currentIndex: number, array: T) => U, initialValue: U): U;
+    reverse(): T;
+    set(array: ArrayLike<number>, offset?: number | undefined): void;
+    slice(start?: number | undefined, end?: number | undefined): T;
+    some(callbackfn: (value: number, index: number, array: T) => boolean, thisArg?: any): boolean;
+    sort(compareFn?: ((a: number, b: number) => number) | undefined): this;
+    subarray(begin: number, end?: number | undefined): T;
+    toLocaleString(): string;
+    entries(): IterableIterator<[number, number]>;
+    keys(): IterableIterator<number>;
+    values(): IterableIterator<number>;
+}
diff --git a/js/src/util/buffer.ts b/js/src/util/buffer.ts
new file mode 100644
index 0000000000000..13b3f90ab3964
--- /dev/null
+++ b/js/src/util/buffer.ts
@@ -0,0 +1,228 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { flatbuffers } from 'flatbuffers';
+import { encodeUtf8 } from '../util/utf8';
+import ByteBuffer = flatbuffers.ByteBuffer;
+import { ArrayBufferViewConstructor } from '../interfaces';
+import { isPromise, isIterable, isAsyncIterable, isIteratorResult } from './compat';
+
+/** @ignore */
+const SharedArrayBuf = (typeof SharedArrayBuffer !== 'undefined' ? SharedArrayBuffer : ArrayBuffer);
+
+/** @ignore */
+function collapseContiguousByteRanges(chunks: Uint8Array[]) {
+    let result = chunks[0] ? [chunks[0]] : [];
+    let xOffset: number, yOffset: number, xLen: number, yLen: number;
+    for (let x, y, i = 0, j = 0, n = chunks.length; ++i < n;) {
+        x = result[j];
+        y = chunks[i];
+        // continue x and y don't share the same underlying ArrayBuffer
+        if (!x || !y || x.buffer !== y.buffer) {
+            y && (result[++j] = y);
+            continue;
+        }
+        // swap if y starts before x
+        if (y.byteOffset < x.byteOffset) {
+            x = chunks[i]; y = result[j];
+        }
+        ({ byteOffset: xOffset, byteLength: xLen } = x);
+        ({ byteOffset: yOffset, byteLength: yLen } = y);
+        // continue if the byte ranges of x and y aren't contiguous
+        if ((xOffset + xLen) < yOffset || (yOffset + yLen) < xOffset) {
+            y && (result[++j] = y);
+            continue;
+        }
+        result[j] = new Uint8Array(x.buffer, xOffset, yOffset - xOffset + yLen);
+    }
+    return result;
+}
+
+/** @ignore */
+export function memcpy<TTarget extends ArrayBufferView, TSource extends ArrayBufferView>(target: TTarget, source: TSource, targetByteOffset = 0, sourceByteLength = source.byteLength) {
+    const targetByteLength = target.byteLength;
+    const dst = new Uint8Array(target.buffer, target.byteOffset, targetByteLength);
+    const src = new Uint8Array(source.buffer, source.byteOffset, Math.min(sourceByteLength, targetByteLength));
+    dst.set(src, targetByteOffset);
+    return target;
+}
+
+/** @ignore */
+export function joinUint8Arrays(chunks: Uint8Array[], size?: number | null): [Uint8Array, Uint8Array[], number] {
+    // collapse chunks that share the same underlying ArrayBuffer and whose byte ranges overlap,
+    // to avoid unnecessarily copying the bytes to do this buffer join. This is a common case during
+    // streaming, where we may be reading partial byte ranges out of the same underlying ArrayBuffer
+    let result = collapseContiguousByteRanges(chunks);
+    let byteLength = result.reduce((x, b) => x + b.byteLength, 0);
+    let source: Uint8Array, sliced: Uint8Array, buffer: Uint8Array | void;
+    let offset = 0, index = -1, length = Math.min(size || Infinity, byteLength);
+    for (let n = result.length; ++index < n;) {
+        source = result[index];
+        sliced = source.subarray(0, Math.min(source.length, length - offset));
+        if (length <= (offset + sliced.length)) {
+            if (sliced.length < source.length) {
+                result[index] = source.subarray(sliced.length);
+            } else if (sliced.length === source.length) { index++; }
+            buffer ? memcpy(buffer, sliced, offset) : (buffer = sliced);
+            break;
+        }
+        memcpy(buffer || (buffer = new Uint8Array(length)), sliced, offset);
+        offset += sliced.length;
+    }
+    return [buffer || new Uint8Array(0), result.slice(index), byteLength - (buffer ? buffer.byteLength : 0)];
+}
+
+/** @ignore */
+export type ArrayBufferViewInput = ArrayBufferView | ArrayBufferLike | ArrayBufferView | Iterable<number> | ArrayLike<number> | ByteBuffer | string | null | undefined  |
+                    IteratorResult<ArrayBufferView | ArrayBufferLike | ArrayBufferView | Iterable<number> | ArrayLike<number> | ByteBuffer | string | null | undefined> |
+          ReadableStreamReadResult<ArrayBufferView | ArrayBufferLike | ArrayBufferView | Iterable<number> | ArrayLike<number> | ByteBuffer | string | null | undefined> ;
+
+/** @ignore */
+export function toArrayBufferView<T extends ArrayBufferView>(ArrayBufferViewCtor: ArrayBufferViewConstructor<T>, input: ArrayBufferViewInput): T {
+
+    let value: any = isIteratorResult(input) ? input.value : input;
+
+    if (!value) { return new ArrayBufferViewCtor(0); }
+    if (typeof value === 'string') { value = encodeUtf8(value); }
+    if (value instanceof ArrayBufferViewCtor) {
+        return value.constructor === ArrayBufferViewCtor ? value :
+            // Node's `Buffer` class passes the `instanceof Uint8Array` check, but we need
+            // a real Uint8Array, since Buffer#slice isn't the same as Uint8Array#slice :/
+            new ArrayBufferViewCtor(value.buffer, value.byteOffset, value.byteLength / ArrayBufferViewCtor.BYTES_PER_ELEMENT);
+    }
+    if (value instanceof ArrayBuffer) { return new ArrayBufferViewCtor(value); }
+    if (value instanceof SharedArrayBuf) { return new ArrayBufferViewCtor(value); }
+    if (value instanceof ByteBuffer) { return toArrayBufferView(ArrayBufferViewCtor, value.bytes()); }
+    return !ArrayBuffer.isView(value) ? ArrayBufferViewCtor.from(value) : value.byteLength <= 0 ? new ArrayBufferViewCtor(0)
+        : new ArrayBufferViewCtor(value.buffer, value.byteOffset, value.byteLength / ArrayBufferViewCtor.BYTES_PER_ELEMENT);
+}
+
+/** @ignore */ export const toInt8Array = (input: ArrayBufferViewInput) => toArrayBufferView(Int8Array, input);
+/** @ignore */ export const toInt16Array = (input: ArrayBufferViewInput) => toArrayBufferView(Int16Array, input);
+/** @ignore */ export const toInt32Array = (input: ArrayBufferViewInput) => toArrayBufferView(Int32Array, input);
+/** @ignore */ export const toUint8Array = (input: ArrayBufferViewInput) => toArrayBufferView(Uint8Array, input);
+/** @ignore */ export const toUint16Array = (input: ArrayBufferViewInput) => toArrayBufferView(Uint16Array, input);
+/** @ignore */ export const toUint32Array = (input: ArrayBufferViewInput) => toArrayBufferView(Uint32Array, input);
+/** @ignore */ export const toFloat32Array = (input: ArrayBufferViewInput) => toArrayBufferView(Float32Array, input);
+/** @ignore */ export const toFloat64Array = (input: ArrayBufferViewInput) => toArrayBufferView(Float64Array, input);
+/** @ignore */ export const toUint8ClampedArray = (input: ArrayBufferViewInput) => toArrayBufferView(Uint8ClampedArray, input);
+
+/** @ignore */
+type ArrayBufferViewIteratorInput = Iterable<ArrayBufferViewInput> | ArrayBufferViewInput;
+
+/** @ignore */
+const pump = <T extends Iterator<any> | AsyncIterator<any>>(iterator: T) => { iterator.next(); return iterator; };
+
+/** @ignore */
+export function* toArrayBufferViewIterator<T extends ArrayBufferView>(ArrayCtor: ArrayBufferViewConstructor<T>, source: ArrayBufferViewIteratorInput) {
+
+    const wrap = function*<T>(x: T) { yield x; };
+    const buffers: Iterable<ArrayBufferViewInput> =
+                   (typeof source === 'string') ? wrap(source)
+                 : (ArrayBuffer.isView(source)) ? wrap(source)
+              : (source instanceof ArrayBuffer) ? wrap(source)
+           : (source instanceof SharedArrayBuf) ? wrap(source)
+    : !isIterable<ArrayBufferViewInput>(source) ? wrap(source) : source;
+
+    yield* pump((function* (it) {
+        let r: IteratorResult<any> = <any> null;
+        do {
+            r = it.next(yield toArrayBufferView(ArrayCtor, r));
+        } while (!r.done);
+    })(buffers[Symbol.iterator]()));
+}
+
+/** @ignore */ export const toInt8ArrayIterator = (input: ArrayBufferViewIteratorInput) => toArrayBufferViewIterator(Int8Array, input);
+/** @ignore */ export const toInt16ArrayIterator = (input: ArrayBufferViewIteratorInput) => toArrayBufferViewIterator(Int16Array, input);
+/** @ignore */ export const toInt32ArrayIterator = (input: ArrayBufferViewIteratorInput) => toArrayBufferViewIterator(Int32Array, input);
+/** @ignore */ export const toUint8ArrayIterator = (input: ArrayBufferViewIteratorInput) => toArrayBufferViewIterator(Uint8Array, input);
+/** @ignore */ export const toUint16ArrayIterator = (input: ArrayBufferViewIteratorInput) => toArrayBufferViewIterator(Uint16Array, input);
+/** @ignore */ export const toUint32ArrayIterator = (input: ArrayBufferViewIteratorInput) => toArrayBufferViewIterator(Uint32Array, input);
+/** @ignore */ export const toFloat32ArrayIterator = (input: ArrayBufferViewIteratorInput) => toArrayBufferViewIterator(Float32Array, input);
+/** @ignore */ export const toFloat64ArrayIterator = (input: ArrayBufferViewIteratorInput) => toArrayBufferViewIterator(Float64Array, input);
+/** @ignore */ export const toUint8ClampedArrayIterator = (input: ArrayBufferViewIteratorInput) => toArrayBufferViewIterator(Uint8ClampedArray, input);
+
+/** @ignore */
+type ArrayBufferViewAsyncIteratorInput = AsyncIterable<ArrayBufferViewInput> | Iterable<ArrayBufferViewInput> | PromiseLike<ArrayBufferViewInput> | ArrayBufferViewInput;
+
+/** @ignore */
+export async function* toArrayBufferViewAsyncIterator<T extends ArrayBufferView>(ArrayCtor: ArrayBufferViewConstructor<T>, source: ArrayBufferViewAsyncIteratorInput): AsyncIterableIterator<T> {
+
+    // if a Promise, unwrap the Promise and iterate the resolved value
+    if (isPromise<ArrayBufferViewInput>(source)) {
+        return yield* toArrayBufferViewAsyncIterator(ArrayCtor, await source);
+    }
+
+    const wrap = async function*<T>(x: T) { yield await x; };
+    const emit = async function* <T extends Iterable<any>>(source: T) {
+        yield* pump((function*(it: Iterator<any>) {
+            let r: IteratorResult<any> = <any> null;
+            do {
+                r = it.next(yield r && r.value);
+            } while (!r.done);
+        })(source[Symbol.iterator]()));
+    };
+
+    const buffers: AsyncIterable<ArrayBufferViewInput> =
+                        (typeof source === 'string') ? wrap(source) // if string, wrap in an AsyncIterableIterator
+                      : (ArrayBuffer.isView(source)) ? wrap(source) // if TypedArray, wrap in an AsyncIterableIterator
+                   : (source instanceof ArrayBuffer) ? wrap(source) // if ArrayBuffer, wrap in an AsyncIterableIterator
+                : (source instanceof SharedArrayBuf) ? wrap(source) // if SharedArrayBuffer, wrap in an AsyncIterableIterator
+          : isIterable<ArrayBufferViewInput>(source) ? emit(source) // If Iterable, wrap in an AsyncIterableIterator and compose the `next` values
+    : !isAsyncIterable<ArrayBufferViewInput>(source) ? wrap(source) // If not an AsyncIterable, treat as a sentinel and wrap in an AsyncIterableIterator
+                                                     : source; // otherwise if AsyncIterable, use it
+
+    yield* pump((async function* (it) {
+        let r: IteratorResult<any> = <any> null;
+        do {
+            r = await it.next(yield toArrayBufferView(ArrayCtor, r));
+        } while (!r.done);
+    })(buffers[Symbol.asyncIterator]()));
+}
+
+/** @ignore */ export const toInt8ArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Int8Array, input);
+/** @ignore */ export const toInt16ArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Int16Array, input);
+/** @ignore */ export const toInt32ArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Int32Array, input);
+/** @ignore */ export const toUint8ArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Uint8Array, input);
+/** @ignore */ export const toUint16ArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Uint16Array, input);
+/** @ignore */ export const toUint32ArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Uint32Array, input);
+/** @ignore */ export const toFloat32ArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Float32Array, input);
+/** @ignore */ export const toFloat64ArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Float64Array, input);
+/** @ignore */ export const toUint8ClampedArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Uint8ClampedArray, input);
+
+/** @ignore */
+export function rebaseValueOffsets(offset: number, length: number, valueOffsets: Int32Array) {
+    // If we have a non-zero offset, create a new offsets array with the values
+    // shifted by the start offset, such that the new start offset is 0
+    if (offset !== 0) {
+        valueOffsets = valueOffsets.slice(0, length + 1);
+        for (let i = -1; ++i <= length;) {
+            valueOffsets[i] += offset;
+        }
+    }
+    return valueOffsets;
+}
+
+/** @ignore */
+export function compareArrayLike<T extends ArrayLike<any>>(a: T, b: T) {
+    let i = 0, n = a.length;
+    if (n !== b.length) { return false; }
+    if (n > 0) {
+        do { if (a[i] !== b[i]) { return false; } } while (++i < n);
+    }
+    return true;
+}
diff --git a/js/src/util/compat.ts b/js/src/util/compat.ts
index 7a4232ee8c32e..1e5a9c0a5f5d3 100644
--- a/js/src/util/compat.ts
+++ b/js/src/util/compat.ts
@@ -1,7 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { ReadableInterop, ArrowJSONLike } from '../io/interfaces';
+
+/** @ignore */
+type FSReadStream = import('fs').ReadStream;
+/** @ignore */
+type FileHandle = import('fs').promises.FileHandle;
+
+/** @ignore */
 export interface Subscription {
     unsubscribe: () => void;
 }
 
+/** @ignore */
 export interface Observer<T> {
     closed?: boolean;
     next: (value: T) => void;
@@ -9,41 +35,108 @@ export interface Observer<T> {
     complete: () => void;
 }
 
+/** @ignore */
 export interface Observable<T> {
     subscribe: (observer: Observer<T>) => Subscription;
 }
 
-/**
- * @ignore
- */
-export function isPromise(x: any): x is PromiseLike<any> {
-    return x != null && Object(x) === x && typeof x['then'] === 'function';
-}
+/** @ignore */ const isNumber = (x: any) => typeof x === 'number';
+/** @ignore */ const isBoolean = (x: any) => typeof x === 'boolean';
+/** @ignore */ const isFunction = (x: any) => typeof x === 'function';
+/** @ignore */
+export const isObject = (x: any): x is Object => x != null && Object(x) === x;
 
-/**
- * @ignore
- */
-export function isObservable(x: any): x is Observable<any> {
-    return x != null && Object(x) === x && typeof x['subscribe'] === 'function';
-}
+/** @ignore */
+export const isPromise = <T = any>(x: any): x is PromiseLike<T> => {
+    return isObject(x) && isFunction(x.then);
+};
 
-/**
- * @ignore
- */
-export function isArrayLike(x: any): x is ArrayLike<any> {
-    return x != null && Object(x) === x && typeof x['length'] === 'number';
-}
+/** @ignore */
+export const isObservable = <T = any>(x: any): x is Observable<T> => {
+    return isObject(x) && isFunction(x.subscribe);
+};
 
-/**
- * @ignore
- */
-export function isIterable(x: any): x is Iterable<any> {
-    return x != null && Object(x) === x && typeof x[Symbol.iterator] !== 'undefined';
-}
+/** @ignore */
+export const isIterable = <T = any>(x: any): x is Iterable<T> => {
+    return isObject(x) && isFunction(x[Symbol.iterator]);
+};
 
-/**
- * @ignore
- */
-export function isAsyncIterable(x: any): x is AsyncIterable<any> {
-    return x != null && Object(x) === x && typeof x[Symbol.asyncIterator] !== 'undefined';
-}
+/** @ignore */
+export const isAsyncIterable = <T = any>(x: any): x is AsyncIterable<T> => {
+    return isObject(x) && isFunction(x[Symbol.asyncIterator]);
+};
+
+/** @ignore */
+export const isArrowJSON = (x: any): x is ArrowJSONLike  => {
+    return isObject(x) && isObject(x['schema']);
+};
+
+/** @ignore */
+export const isArrayLike = <T = any>(x: any): x is ArrayLike<T> => {
+    return isObject(x) && isNumber(x['length']);
+};
+
+/** @ignore */
+export const isIteratorResult = <T = any>(x: any): x is IteratorResult<T> => {
+    return isObject(x) && ('done' in x) && ('value' in x);
+};
+
+/** @ignore */
+export const isUnderlyingSink = <T = any>(x: any): x is UnderlyingSink<T> => {
+    return isObject(x) &&
+        isFunction(x['abort']) &&
+        isFunction(x['close']) &&
+        isFunction(x['start']) &&
+        isFunction(x['write']);
+};
+
+/** @ignore */
+export const isFileHandle = (x: any): x is FileHandle => {
+    return isObject(x) && isFunction(x['stat']) && isNumber(x['fd']);
+};
+
+/** @ignore */
+export const isFSReadStream = (x: any): x is FSReadStream => {
+    return isReadableNodeStream(x) && isNumber((<any> x)['bytesRead']);
+};
+
+/** @ignore */
+export const isFetchResponse = (x: any): x is Response => {
+    return isObject(x) && isReadableDOMStream(x['body']);
+};
+
+/** @ignore */
+export const isWritableDOMStream = <T = any>(x: any): x is WritableStream<T> => {
+    return isObject(x) &&
+        isFunction(x['abort']) &&
+        isFunction(x['getWriter']) &&
+        !(x instanceof ReadableInterop);
+};
+
+/** @ignore */
+export const isReadableDOMStream = <T = any>(x: any): x is ReadableStream<T> => {
+    return isObject(x) &&
+        isFunction(x['tee']) &&
+        isFunction(x['cancel']) &&
+        isFunction(x['pipeTo']) &&
+        isFunction(x['getReader']) &&
+        !(x instanceof ReadableInterop);
+};
+
+/** @ignore */
+export const isWritableNodeStream = (x: any): x is NodeJS.WritableStream => {
+    return isObject(x) &&
+        isFunction(x['end']) &&
+        isFunction(x['write']) &&
+        isBoolean(x['writable']) &&
+        !(x instanceof ReadableInterop);
+};
+
+/** @ignore */
+export const isReadableNodeStream = (x: any): x is NodeJS.ReadableStream => {
+    return isObject(x) &&
+        isFunction(x['read']) &&
+        isFunction(x['pipe']) &&
+        isBoolean(x['readable']) &&
+        !(x instanceof ReadableInterop);
+};
diff --git a/js/src/util/int.ts b/js/src/util/int.ts
index 5b9497f5aaae5..f5bdb4c3c68aa 100644
--- a/js/src/util/int.ts
+++ b/js/src/util/int.ts
@@ -15,8 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
+/** @ignore */
 const carryBit16 = 1 << 16;
 
+/** @ignore */
 function intAsHex(value: number): string {
     if (value < 0) {
         value = 0xFFFFFFFF + value + 1;
@@ -24,7 +26,9 @@ function intAsHex(value: number): string {
     return `0x${value.toString(16)}`;
 }
 
+/** @ignore */
 const kInt32DecimalDigits = 8;
+/** @ignore */
 const kPowersOfTen = [1,
                       10,
                       100,
@@ -38,8 +42,8 @@ const kPowersOfTen = [1,
 export class BaseInt64 {
     constructor (protected buffer: Uint32Array) {}
 
-    high(): number { return this.buffer[1]; }
-    low (): number { return this.buffer[0]; }
+    public high(): number { return this.buffer[1]; }
+    public low (): number { return this.buffer[0]; }
 
     protected _times(other: BaseInt64) {
         // Break the left and right numbers into 16 bit chunks
@@ -89,43 +93,45 @@ export class BaseInt64 {
         this.buffer[0] = sum;
     }
 
-    lessThan(other: BaseInt64): boolean {
+    public lessThan(other: BaseInt64): boolean {
         return this.buffer[1] < other.buffer[1] ||
             (this.buffer[1] === other.buffer[1] && this.buffer[0] < other.buffer[0]);
     }
 
-    equals(other: BaseInt64): boolean {
+    public equals(other: BaseInt64): boolean {
         return this.buffer[1] === other.buffer[1] && this.buffer[0] == other.buffer[0];
     }
 
-    greaterThan(other: BaseInt64): boolean {
+    public greaterThan(other: BaseInt64): boolean {
         return other.lessThan(this);
     }
 
-    hex(): string {
+    public hex(): string {
         return `${intAsHex(this.buffer[1])} ${intAsHex(this.buffer[0])}`;
     }
 }
 
 export class Uint64 extends BaseInt64 {
-    times(other: Uint64): Uint64 {
+    public times(other: Uint64): Uint64 {
         this._times(other);
         return this;
     }
 
-    plus(other: Uint64): Uint64 {
+    public plus(other: Uint64): Uint64 {
         this._plus(other);
         return this;
     }
 
-    static from(val: any, out_buffer = new Uint32Array(2)): Uint64 {
+    /** @nocollapse */
+    public static from(val: any, out_buffer = new Uint32Array(2)): Uint64 {
         return Uint64.fromString(
             typeof(val) === 'string' ? val : val.toString(),
             out_buffer
         );
     }
 
-    static fromNumber(num: number, out_buffer = new Uint32Array(2)): Uint64 {
+    /** @nocollapse */
+    public static fromNumber(num: number, out_buffer = new Uint32Array(2)): Uint64 {
         // Always parse numbers as strings - pulling out high and low bits
         // directly seems to lose precision sometimes
         // For example:
@@ -135,7 +141,8 @@ export class Uint64 extends BaseInt64 {
         return Uint64.fromString(num.toString(), out_buffer);
     }
 
-    static fromString(str: string, out_buffer = new Uint32Array(2)): Uint64 {
+    /** @nocollapse */
+    public static fromString(str: string, out_buffer = new Uint32Array(2)): Uint64 {
         const length = str.length;
 
         let out = new Uint64(out_buffer);
@@ -154,7 +161,8 @@ export class Uint64 extends BaseInt64 {
         return out;
     }
 
-    static convertArray(values: (string|number)[]): Uint32Array {
+    /** @nocollapse */
+    public static convertArray(values: (string|number)[]): Uint32Array {
         const data = new Uint32Array(values.length * 2);
         for (let i = -1, n = values.length; ++i < n;) {
             Uint64.from(values[i], new Uint32Array(data.buffer, data.byteOffset + 2 * i * 4, 2));
@@ -162,19 +170,21 @@ export class Uint64 extends BaseInt64 {
         return data;
     }
 
-    static multiply(left: Uint64, right: Uint64): Uint64 {
+    /** @nocollapse */
+    public static multiply(left: Uint64, right: Uint64): Uint64 {
         let rtrn = new Uint64(new Uint32Array(left.buffer));
         return rtrn.times(right);
     }
 
-    static add(left: Uint64, right: Uint64): Uint64 {
+    /** @nocollapse */
+    public static add(left: Uint64, right: Uint64): Uint64 {
         let rtrn = new Uint64(new Uint32Array(left.buffer));
         return rtrn.plus(right);
     }
 }
 
 export class Int64 extends BaseInt64 {
-    negate(): Int64 {
+    public negate(): Int64 {
         this.buffer[0] = ~this.buffer[0] + 1;
         this.buffer[1] = ~this.buffer[1];
 
@@ -182,17 +192,17 @@ export class Int64 extends BaseInt64 {
         return this;
     }
 
-    times(other: Int64): Int64 {
+    public times(other: Int64): Int64 {
         this._times(other);
         return this;
     }
 
-    plus(other: Int64): Int64 {
+    public plus(other: Int64): Int64 {
         this._plus(other);
         return this;
     }
 
-    lessThan(other: Int64): boolean {
+    public lessThan(other: Int64): boolean {
         // force high bytes to be signed
         const this_high = this.buffer[1] << 0;
         const other_high = other.buffer[1] << 0;
@@ -200,14 +210,16 @@ export class Int64 extends BaseInt64 {
             (this_high === other_high && this.buffer[0] < other.buffer[0]);
     }
 
-    static from(val: any, out_buffer = new Uint32Array(2)): Int64 {
+    /** @nocollapse */
+    public static from(val: any, out_buffer = new Uint32Array(2)): Int64 {
         return Int64.fromString(
             typeof(val) === 'string' ? val : val.toString(),
             out_buffer
         );
     }
 
-    static fromNumber(num: number, out_buffer = new Uint32Array(2)): Int64 {
+    /** @nocollapse */
+    public static fromNumber(num: number, out_buffer = new Uint32Array(2)): Int64 {
         // Always parse numbers as strings - pulling out high and low bits
         // directly seems to lose precision sometimes
         // For example:
@@ -217,7 +229,8 @@ export class Int64 extends BaseInt64 {
         return Int64.fromString(num.toString(), out_buffer);
     }
 
-    static fromString(str: string, out_buffer = new Uint32Array(2)): Int64 {
+    /** @nocollapse */
+    public static fromString(str: string, out_buffer = new Uint32Array(2)): Int64 {
         // TODO: Assert that out_buffer is 0 and length = 2
         const negate = str.startsWith('-');
         const length = str.length;
@@ -237,7 +250,8 @@ export class Int64 extends BaseInt64 {
         return negate ? out.negate() : out;
     }
 
-    static convertArray(values: (string|number)[]): Uint32Array {
+    /** @nocollapse */
+    public static convertArray(values: (string|number)[]): Uint32Array {
         const data = new Uint32Array(values.length * 2);
         for (let i = -1, n = values.length; ++i < n;) {
             Int64.from(values[i], new Uint32Array(data.buffer, data.byteOffset + 2 * i * 4, 2));
@@ -245,12 +259,14 @@ export class Int64 extends BaseInt64 {
         return data;
     }
 
-    static multiply(left: Int64, right: Int64): Int64 {
+    /** @nocollapse */
+    public static multiply(left: Int64, right: Int64): Int64 {
         let rtrn = new Int64(new Uint32Array(left.buffer));
         return rtrn.times(right);
     }
 
-    static add(left: Int64, right: Int64): Int64 {
+    /** @nocollapse */
+    public static add(left: Int64, right: Int64): Int64 {
         let rtrn = new Int64(new Uint32Array(left.buffer));
         return rtrn.plus(right);
     }
@@ -264,15 +280,15 @@ export class Int128 {
         // buffer[0] LSB (low)
     }
 
-    high(): Int64 {
+    public high(): Int64 {
         return new Int64(new Uint32Array(this.buffer.buffer, this.buffer.byteOffset + 8, 2));
     }
 
-    low(): Int64 {
+    public low(): Int64 {
         return new Int64(new Uint32Array(this.buffer.buffer, this.buffer.byteOffset, 2));
     }
 
-    negate(): Int128 {
+    public negate(): Int128 {
         this.buffer[0] = ~this.buffer[0] + 1;
         this.buffer[1] = ~this.buffer[1];
         this.buffer[2] = ~this.buffer[2];
@@ -284,7 +300,7 @@ export class Int128 {
         return this;
     }
 
-    times(other: Int128): Int128 {
+    public times(other: Int128): Int128 {
         // Break the left and right numbers into 32 bit chunks
         // so that we can multiply them without overflow.
         const L0 = new Uint64(new Uint32Array([this.buffer[3],  0]));
@@ -326,7 +342,7 @@ export class Int128 {
         return this;
     }
 
-    plus(other: Int128): Int128 {
+    public plus(other: Int128): Int128 {
         let sums = new Uint32Array(4);
         sums[3] = (this.buffer[3] + other.buffer[3]) >>> 0;
         sums[2] = (this.buffer[2] + other.buffer[2]) >>> 0;
@@ -351,28 +367,32 @@ export class Int128 {
         return this;
     }
 
-    hex(): string {
+    public hex(): string {
         return `${intAsHex(this.buffer[3])} ${intAsHex(this.buffer[2])} ${intAsHex(this.buffer[1])} ${intAsHex(this.buffer[0])}`;
     }
 
-    static multiply(left: Int128, right: Int128): Int128 {
+    /** @nocollapse */
+    public static multiply(left: Int128, right: Int128): Int128 {
         let rtrn = new Int128(new Uint32Array(left.buffer));
         return rtrn.times(right);
     }
 
-    static add(left: Int128, right: Int128): Int128 {
+    /** @nocollapse */
+    public static add(left: Int128, right: Int128): Int128 {
         let rtrn = new Int128(new Uint32Array(left.buffer));
         return rtrn.plus(right);
     }
 
-    static from(val: any, out_buffer = new Uint32Array(4)): Int128 {
+    /** @nocollapse */
+    public static from(val: any, out_buffer = new Uint32Array(4)): Int128 {
         return Int128.fromString(
             typeof(val) === 'string' ? val : val.toString(),
             out_buffer
         );
     }
 
-    static fromNumber(num: number, out_buffer = new Uint32Array(4)): Int128 {
+    /** @nocollapse */
+    public static fromNumber(num: number, out_buffer = new Uint32Array(4)): Int128 {
         // Always parse numbers as strings - pulling out high and low bits
         // directly seems to lose precision sometimes
         // For example:
@@ -382,7 +402,8 @@ export class Int128 {
         return Int128.fromString(num.toString(), out_buffer);
     }
 
-    static fromString(str: string, out_buffer = new Uint32Array(4)): Int128 {
+    /** @nocollapse */
+    public static fromString(str: string, out_buffer = new Uint32Array(4)): Int128 {
         // TODO: Assert that out_buffer is 0 and length = 4
         const negate = str.startsWith('-');
         const length = str.length;
@@ -403,7 +424,8 @@ export class Int128 {
         return negate ? out.negate() : out;
     }
 
-    static convertArray(values: (string|number)[]): Uint32Array {
+    /** @nocollapse */
+    public static convertArray(values: (string|number)[]): Uint32Array {
         // TODO: Distinguish between string and number at compile-time
         const data = new Uint32Array(values.length * 4);
         for (let i = -1, n = values.length; ++i < n;) {
diff --git a/js/src/util/node.ts b/js/src/util/node.ts
deleted file mode 100644
index e5c506692ac0c..0000000000000
--- a/js/src/util/node.ts
+++ /dev/null
@@ -1,93 +0,0 @@
-
-export class PipeIterator<T> implements IterableIterator<T> {
-    constructor(protected iterator: IterableIterator<T>, protected encoding?: any) {}
-    [Symbol.iterator]() { return this.iterator; }
-    next(value?: any) { return this.iterator.next(value); }
-    throw(error?: any) {
-        if (typeof this.iterator.throw === 'function') {
-            return this.iterator.throw(error);
-        }
-        return { done: true, value: null as any };
-    }
-    return(value?: any) {
-        if (typeof this.iterator.return === 'function') {
-            return this.iterator.return(value);
-        }
-        return { done: true, value: null as any };
-    }
-    pipe(stream: NodeJS.WritableStream) {
-        let { encoding } = this;
-        let res: IteratorResult<T>;
-        let write = (err?: any) => {
-            stream['removeListener']('error', write);
-            stream['removeListener']('drain', write);
-            if (err) { return this.throw(err); }
-            if (stream['writable']) {
-                do {
-                    if ((res = this.next()).done) { break; }
-                } while (emit(stream, encoding, res.value));
-            }
-            return wait(stream, res && res.done, write);
-        };
-        write();
-        return stream;
-    }
-}
-
-export class AsyncPipeIterator<T> implements AsyncIterableIterator<T> {
-    constructor(protected iterator: AsyncIterableIterator<T>, protected encoding?: any) {}
-    [Symbol.asyncIterator]() { return this.iterator; }
-    next(value?: any) { return this.iterator.next(value); }
-    async throw(error?: any) {
-        if (typeof this.iterator.throw === 'function') {
-            return this.iterator.throw(error);
-        }
-        return { done: true, value: null as any };
-    }
-    async return(value?: any) {
-        if (typeof this.iterator.return === 'function') {
-            return this.iterator.return(value);
-        }
-        return { done: true, value: null as any };
-    }
-    pipe(stream: NodeJS.WritableStream) {
-        let { encoding } = this;
-        let res: IteratorResult<T>;
-        let write = async (err?: any) => {
-            stream['removeListener']('error', write);
-            stream['removeListener']('drain', write);
-            if (err) { return this.throw(err); }
-            if (stream['writable']) {
-                do {
-                    if ((res = await this.next()).done) { break; }
-                } while (emit(stream, encoding, res.value));
-            }
-            return wait(stream, res && res.done, write);
-        };
-        write();
-        return stream;
-    }
-}
-
-const toBufferOrUint8Array = (() => {
-    // If in node, convert Uint8Arrays to Buffer instances. This is necessary
-    // because some node APIs ('http' etc.) don't work unless you give them Buffers.
-    // This eval also defeats closure-compiler, which doesn't recognize the Buffer constructor.
-    const BufferCtor = eval('typeof Buffer !== "undefined" ? Buffer : null');
-    return !BufferCtor ? (arr: Uint8Array) => arr :
-        (arr: Uint8Array) => BufferCtor.from(arr.buffer, arr.byteOffset, arr.byteLength);
-})();
-
-function emit(stream: NodeJS.WritableStream, encoding: string, value: any) {
-    return stream['write']((encoding === 'utf8' ? value + '\n' : toBufferOrUint8Array(value)) as any, encoding);
-}
-
-function wait(stream: NodeJS.WritableStream, done: boolean, write: (x?: any) => void) {
-    const p = eval('process'); // defeat closure compiler
-    if (!done) {
-        stream['once']('error', write);
-        stream['once']('drain', write);
-    } else if (!(!p || stream === p.stdout) && !(stream as any)['isTTY']) {
-        stream['end']();
-    }
-}
diff --git a/js/src/util/pretty.ts b/js/src/util/pretty.ts
index c2d7a414efdef..f337e09c62a1a 100644
--- a/js/src/util/pretty.ts
+++ b/js/src/util/pretty.ts
@@ -1,8 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
 
-export function leftPad(str: string, fill: string, n: number) {
-    return (new Array(n + 1).join(fill) + str).slice(-1 * n);
-}
+/** @ignore */ const undf = void (0);
 
+/** @ignore */
 export function valueToString(x: any) {
-    return typeof x === 'string' ? `"${x}"` : ArrayBuffer.isView(x) ? `[${x}]` : JSON.stringify(x);
+    if (x === null) { return 'null'; }
+    if (x === undf) { return 'undefined'; }
+    if (typeof x === 'string') { return `"${x}"`; }
+    // If [Symbol.toPrimitive] is implemented (like in BN)
+    // use it instead of JSON.stringify(). This ensures we
+    // print BigInts, Decimals, and Binary in their native
+    // representation
+    if (typeof x[Symbol.toPrimitive] === 'function') {
+        return x[Symbol.toPrimitive]('string');
+    }
+    return ArrayBuffer.isView(x) ? `[${x}]` : JSON.stringify(x);
 }
diff --git a/js/src/util/utf8.ts b/js/src/util/utf8.ts
new file mode 100644
index 0000000000000..47d91da4fe127
--- /dev/null
+++ b/js/src/util/utf8.ts
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { toUint8Array } from './buffer';
+import {
+    TextDecoder as TextDecoderPolyfill,
+    TextEncoder as TextEncoderPolyfill,
+} from 'text-encoding-utf-8';
+
+/** @ignore @suppress {missingRequire} */
+const _Buffer = typeof Buffer === 'function' ? Buffer : null;
+/** @ignore */
+const useNativeEncoders = typeof TextDecoder === 'function' && typeof TextEncoder === 'function';
+
+/** @ignore */
+export const decodeUtf8 = ((TextDecoder) => {
+    if (useNativeEncoders || !_Buffer) {
+        const decoder = new TextDecoder();
+        return decoder.decode.bind(decoder);
+    }
+    return (input: ArrayBufferLike | ArrayBufferView) => {
+        const { buffer, byteOffset, length } = toUint8Array(input);
+        return _Buffer.from(buffer, byteOffset, length).toString();
+    };
+})(typeof TextDecoder !== 'undefined' ? TextDecoder : TextDecoderPolyfill);
+
+/** @ignore */
+export const encodeUtf8 = ((TextEncoder) => {
+    if (useNativeEncoders || !_Buffer) {
+        const encoder = new TextEncoder();
+        return encoder.encode.bind(encoder);
+    }
+    return (input = '') => toUint8Array(_Buffer.from(input, 'utf8'));
+})(typeof TextEncoder !== 'undefined' ? TextEncoder : TextEncoderPolyfill);
diff --git a/js/src/util/vector.ts b/js/src/util/vector.ts
new file mode 100644
index 0000000000000..92f348dd3f03a
--- /dev/null
+++ b/js/src/util/vector.ts
@@ -0,0 +1,134 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Vector } from '../vector';
+import { Row } from '../vector/row';
+import { compareArrayLike } from '../util/buffer';
+
+/** @ignore */
+type RangeLike = { length: number; stride?: number };
+/** @ignore */
+type ClampThen<T extends RangeLike> = (source: T, index: number) => any;
+/** @ignore */
+type ClampRangeThen<T extends RangeLike> = (source: T, offset: number, length: number) => any;
+
+export function clampIndex<T extends RangeLike>(source: T, index: number): number;
+export function clampIndex<T extends RangeLike, N extends ClampThen<T> = ClampThen<T>>(source: T, index: number, then: N): ReturnType<N>;
+/** @ignore */
+export function clampIndex<T extends RangeLike, N extends ClampThen<T> = ClampThen<T>>(source: T, index: number, then?: N) {
+    const length = source.length;
+    const adjust = index > -1 ? index : (length + (index % length));
+    return then ? then(source, adjust) : adjust;
+}
+
+/** @ignore */
+let tmp: number;
+export function clampRange<T extends RangeLike>(source: T, begin: number | undefined, end: number | undefined): [number, number];
+export function clampRange<T extends RangeLike, N extends ClampRangeThen<T> = ClampRangeThen<T>>(source: T, begin: number | undefined, end: number | undefined, then: N): ReturnType<N>;
+/** @ignore */
+export function clampRange<T extends RangeLike, N extends ClampRangeThen<T> = ClampRangeThen<T>>(source: T, begin: number | undefined, end: number | undefined, then?: N) {
+
+    // Adjust args similar to Array.prototype.slice. Normalize begin/end to
+    // clamp between 0 and length, and wrap around on negative indices, e.g.
+    // slice(-1, 5) or slice(5, -1)
+    let { length: len = 0 } = source;
+    let lhs = typeof begin !== 'number' ? 0 : begin;
+    let rhs = typeof end !== 'number' ? len : end;
+    // wrap around on negative start/end positions
+    (lhs < 0) && (lhs = ((lhs % len) + len) % len);
+    (rhs < 0) && (rhs = ((rhs % len) + len) % len);
+    // ensure lhs <= rhs
+    (rhs < lhs) && (tmp = lhs, lhs = rhs, rhs = tmp);
+     // ensure rhs <= length
+    (rhs > len) && (rhs = len);
+
+    return then ? then(source, lhs, rhs) : [lhs, rhs];
+}
+
+/** @ignore */
+export function createElementComparator(search: any) {
+    // Compare primitives
+    if (search == null || typeof search !== 'object') {
+        return (value: any) => value === search;
+    }
+    // Compare Dates
+    if (search instanceof Date) {
+        const valueOfSearch = search.valueOf();
+        return (value: any) => value instanceof Date ? (value.valueOf() === valueOfSearch) : false;
+    }
+    if (ArrayBuffer.isView(search)) {
+        return (value: any) => value ? compareArrayLike(search, value) : false;
+    }
+    // Compare Array-likes
+    if (Array.isArray(search)) {
+        const n = (search as any).length;
+        const fns = [] as ((x: any) => boolean)[];
+        for (let i = -1; ++i < n;) {
+            fns[i] = createElementComparator((search as any)[i]);
+        }
+        return (value: any) => {
+            if (!value || value.length !== n) { return false; }
+            // Handle the case where the search element is an Array, but the
+            // values are Rows or Vectors, e.g. list.indexOf(['foo', 'bar'])
+            if ((value instanceof Row) || (value instanceof Vector)) {
+                for (let i = -1, n = value.length; ++i < n;) {
+                    if (!(fns[i]((value as any).get(i)))) { return false; }
+                }
+                return true;
+            }
+            for (let i = -1, n = value.length; ++i < n;) {
+                if (!(fns[i](value[i]))) { return false; }
+            }
+            return true;
+        };
+    }
+    // Compare Rows and Vectors
+    if ((search instanceof Row) || (search instanceof Vector)) {
+        const n = search.length;
+        const C = search.constructor as any;
+        const fns = [] as ((x: any) => boolean)[];
+        for (let i = -1; ++i < n;) {
+            fns[i] = createElementComparator((search as any).get(i));
+        }
+        return (value: any) => {
+            if (!(value instanceof C)) { return false; }
+            if (!(value.length === n)) { return false; }
+            for (let i = -1; ++i < n;) {
+                if (!(fns[i](value.get(i)))) { return false; }
+            }
+            return true;
+        };
+    }
+    // Compare non-empty Objects
+    const keys = Object.keys(search);
+    if (keys.length > 0) {
+        const n = keys.length;
+        const fns = [] as ((x: any) => boolean)[];
+        for (let i = -1; ++i < n;) {
+            fns[i] = createElementComparator(search[keys[i]]);
+        }
+        return (value: any) => {
+            if (!value || typeof value !== 'object') { return false; }
+            for (let i = -1; ++i < n;) {
+                if (!(fns[i](value[keys[i]]))) { return false; }
+            }
+            return true;
+        };
+    }
+    // No valid comparator
+    return () => false;
+}
diff --git a/js/src/vector.ts b/js/src/vector.ts
index 4b76fc629b8e7..6892cf909ec7b 100644
--- a/js/src/vector.ts
+++ b/js/src/vector.ts
@@ -15,463 +15,54 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Data, ChunkedData, FlatData, BoolData, FlatListData, NestedData, DictionaryData } from './data';
-import { VisitorNode, TypeVisitor, VectorVisitor } from './visitor';
-import { DataType, ListType, FlatType, NestedType, FlatListType, TimeUnit } from './type';
-import { IterableArrayLike, Precision, DateUnit, IntervalUnit, UnionMode } from './type';
-import * as IntUtil from './util/int';
+import { Data } from './data';
+import { DataType } from './type';
+import { Chunked } from './vector/chunked';
 
-export interface VectorLike { length: number; nullCount: number; }
-
-export interface View<T extends DataType> {
-    clone(data: Data<T>): this;
-    isValid(index: number): boolean;
-    get(index: number): T['TValue'] | null;
-    set(index: number, value: T['TValue']): void;
-    toArray(): IterableArrayLike<T['TValue'] | null>;
-    indexOf(search: T['TValue']): number;
-    [Symbol.iterator](): IterableIterator<T['TValue'] | null>;
+/** @ignore */
+export interface Clonable<R extends Vector> {
+    clone(...args: any[]): R;
 }
 
-export class Vector<T extends DataType = any> implements VectorLike, View<T>, VisitorNode {
-    public static create<T extends DataType>(data: Data<T>): Vector<T> {
-        return createVector(data);
-    }
-    public static concat<T extends DataType>(source?: Vector<T> | null, ...others: Vector<T>[]): Vector<T> {
-        return others.reduce((a, b) => a ? a.concat(b) : b, source!);
-    }
-    public type: T;
-    public length: number;
-    public readonly data: Data<T>;
-    public readonly view: View<T>;
-    constructor(data: Data<T>, view: View<T>) {
-        this.data = data;
-        this.type = data.type;
-        this.length = data.length;
-        let nulls: Uint8Array;
-        if ((<any> data instanceof ChunkedData) && !(view instanceof ChunkedView)) {
-            this.view = new ChunkedView(data as any) as any;
-        } else if (!(view instanceof ValidityView) && (nulls = data.nullBitmap!) && nulls.length > 0 && data.nullCount > 0) {
-            this.view = new ValidityView(data, view);
-        } else {
-            this.view = view;
-        }
-    }
-
-    public get nullCount() { return this.data.nullCount; }
-    public get nullBitmap() { return this.data.nullBitmap; }
-    public get [Symbol.toStringTag]() {
-        return `Vector<${this.type[Symbol.toStringTag]}>`;
-    }
-    public toJSON(): any { return this.toArray(); }
-    public clone<R extends T>(data: Data<R>, view: View<R> = this.view.clone(data) as any): this {
-        return new (this.constructor as any)(data, view);
-    }
-    public isValid(index: number): boolean {
-        return this.view.isValid(index);
-    }
-    public get(index: number): T['TValue'] | null {
-        return this.view.get(index);
-    }
-    public set(index: number, value: T['TValue']): void {
-        return this.view.set(index, value);
-    }
-    public toArray(): IterableArrayLike<T['TValue'] | null> {
-        return this.view.toArray();
-    }
-    public indexOf(value: T['TValue']) {
-        return this.view.indexOf(value);
-    }
-    public [Symbol.iterator](): IterableIterator<T['TValue'] | null> {
-        return this.view[Symbol.iterator]();
-    }
-    public concat(...others: Vector<T>[]): this {
-        if ((others = others.filter(Boolean)).length === 0) {
-            return this;
-        }
-        const { view } = this;
-        const vecs = !(view instanceof ChunkedView)
-            ? [this, ...others]
-            : [...view.chunkVectors, ...others];
-        const offsets = ChunkedData.computeOffsets(vecs);
-        const chunksLength = offsets[offsets.length - 1];
-        const chunkedData = new ChunkedData(this.type, chunksLength, vecs, 0, -1, offsets);
-        return this.clone(chunkedData, new ChunkedView(chunkedData)) as this;
-    }
-    public slice(begin?: number, end?: number): this {
-        let { length } = this;
-        let size = (this.view as any).size || 1;
-        let total = length, from = (begin || 0) * size;
-        let to = (typeof end === 'number' ? end : total) * size;
-        if (to < 0) { to = total - (to * -1) % total; }
-        if (from < 0) { from = total - (from * -1) % total; }
-        if (to < from) { [from, to] = [to, from]; }
-        total = !isFinite(total = (to - from)) || total < 0 ? 0 : total;
-        const slicedData = this.data.slice(from, Math.min(total, length));
-        return this.clone(slicedData, this.view.clone(slicedData)) as this;
-    }
-
-    public acceptTypeVisitor(visitor: TypeVisitor): any {
-        return TypeVisitor.visitTypeInline(visitor, this.type);
-    }
-    public acceptVectorVisitor(visitor: VectorVisitor): any {
-        return VectorVisitor.visitTypeInline(visitor, this.type, this);
-    }
+/** @ignore */
+export interface Sliceable<R extends Vector> {
+    slice(begin?: number, end?: number): R;
 }
 
-export abstract class FlatVector<T extends FlatType> extends Vector<T> {
-    public get values() { return this.data.values; }
-    public lows(): IntVector<Int32> { return this.asInt32(0, 2); }
-    public highs(): IntVector<Int32> { return this.asInt32(1, 2); }
-    public asInt32(offset: number = 0, stride: number = 2): IntVector<Int32> {
-        let data = (this.data as FlatData<any>).clone(new Int32());
-        if (offset > 0) {
-            data = data.slice(offset, this.length - offset);
-        }
-        const int32s = new IntVector(data, new PrimitiveView(data, stride));
-        int32s.length = this.length / stride | 0;
-        return int32s;
-    }
+/** @ignore */
+export interface Applicative<T extends DataType, R extends Chunked> {
+    concat(...others: Vector<T>[]): R;
+    readonly [Symbol.isConcatSpreadable]: boolean;
 }
 
-export abstract class ListVectorBase<T extends (ListType | FlatListType)> extends Vector<T> {
-    public get values() { return this.data.values; }
-    public get valueOffsets() { return this.data.valueOffsets; }
-    public getValueOffset(index: number) {
-        return this.valueOffsets[index];
-    }
-    public getValueLength(index: number) {
-        return this.valueOffsets[index + 1] - this.valueOffsets[index];
-    }
-}
+export interface Vector<T extends DataType = any>
+    extends Clonable<Vector<T>>,
+            Sliceable<Vector<T>>,
+            Applicative<T, Chunked<T>> {
 
-export abstract class NestedVector<T extends NestedType> extends Vector<T>  {
-    // @ts-ignore
-    public readonly view: NestedView<T>;
-    // @ts-ignore
-    protected _childData: Data<any>[];
-    public getChildAt<R extends DataType = DataType>(index: number): Vector<R> | null {
-        return this.view.getChildAt<R>(index);
-    }
-    public get childData(): Data<any>[] {
-        let data: Data<T> | Data<any>[];
-        if ((data = this._childData)) {
-            // Return the cached childData reference first
-            return data as Data<any>[];
-        } else if (!(<any> (data = this.data) instanceof ChunkedData)) {
-            // If data isn't chunked, cache and return NestedData's childData
-            return this._childData = data.childData;
-        }
-        // Otherwise if the data is chunked, concatenate the childVectors from each chunk
-        // to construct a single chunked Vector for each column. Then return the ChunkedData
-        // instance from each unified chunked column as the childData of a chunked NestedVector
-        const chunks = ((data as any as ChunkedData<T>).chunkVectors as NestedVector<T>[]);
-        return this._childData = chunks
-            .reduce<(Vector<T> | null)[][]>((cols, chunk) => chunk.childData
-            .reduce<(Vector<T> | null)[][]>((cols, _, i) => (
-                (cols[i] || (cols[i] = [])).push(chunk.getChildAt(i))
-            ) && cols || cols, cols), [] as Vector<T>[][])
-        .map((vecs) => Vector.concat<T>(...vecs).data);
-    }
+    readonly TType: T['TType'];
+    readonly TArray: T['TArray'];
+    readonly TValue: T['TValue'];
 }
 
-import { List, Binary, Utf8, Bool, } from './type';
-import { Null, Int, Float, Decimal, Date_, Time, Timestamp, Interval } from './type';
-import { Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64, Float16, Float32, Float64 } from './type';
-import { Struct, StructData, Union, SparseUnion, DenseUnion, FixedSizeBinary, FixedSizeList, Map_, Dictionary } from './type';
-
-import { ChunkedView } from './vector/chunked';
-import { ValidityView } from './vector/validity';
-import { DictionaryView } from './vector/dictionary';
-import { ListView, FixedSizeListView, BinaryView, Utf8View } from './vector/list';
-import { UnionView, DenseUnionView, NestedView, StructView, MapView } from './vector/nested';
-import { FlatView, NullView, BoolView, PrimitiveView, FixedSizeView, Float16View } from './vector/flat';
-import { DateDayView, DateMillisecondView, IntervalYearMonthView } from './vector/flat';
-import { TimestampDayView, TimestampSecondView, TimestampMillisecondView, TimestampMicrosecondView, TimestampNanosecondView } from './vector/flat';
-import { packBools } from './util/bit';
+export abstract class Vector<T extends DataType = any> implements Iterable<T['TValue'] | null> {
 
-export class NullVector extends Vector<Null> {
-    constructor(data: Data<Null>, view: View<Null> = new NullView(data)) {
-        super(data, view);
-    }
-}
+    public abstract readonly data: Data<T>;
+    public abstract readonly type: T;
+    public abstract readonly typeId: T['TType'];
+    public abstract readonly length: number;
+    public abstract readonly stride: number;
+    public abstract readonly nullCount: number;
+    public abstract readonly numChildren: number;
 
-export class BoolVector extends Vector<Bool> {
-    public static from(data: IterableArrayLike<boolean>) {
-        return new BoolVector(new BoolData(new Bool(), data.length, null, packBools(data)) as Data<Bool>);
-    }
-    public get values() { return this.data.values; }
-    constructor(data: Data<Bool>, view: View<Bool> = new BoolView(data)) {
-        super(data, view);
-    }
-}
+    public abstract readonly ArrayType: T['ArrayType'];
 
-export class IntVector<T extends Int = Int<any>> extends FlatVector<T> {
-    public static from(data: Int8Array): IntVector<Int8>;
-    public static from(data: Int16Array): IntVector<Int16>;
-    public static from(data: Int32Array): IntVector<Int32>;
-    public static from(data: Uint8Array): IntVector<Uint8>;
-    public static from(data: Uint16Array): IntVector<Uint16>;
-    public static from(data: Uint32Array): IntVector<Uint32>;
-    public static from(data: Int32Array, is64: true): IntVector<Int64>;
-    public static from(data: Uint32Array, is64: true): IntVector<Uint64>;
-    public static from(data: any, is64?: boolean) {
-        if (is64 === true) {
-            return data instanceof Int32Array
-                ? new IntVector(new FlatData(new Int64(), data.length, null, data))
-                : new IntVector(new FlatData(new Uint64(), data.length, null, data));
-        }
-        switch (data.constructor) {
-            case Int8Array: return new IntVector(new FlatData(new Int8(), data.length, null, data));
-            case Int16Array: return new IntVector(new FlatData(new Int16(), data.length, null, data));
-            case Int32Array: return new IntVector(new FlatData(new Int32(), data.length, null, data));
-            case Uint8Array: return new IntVector(new FlatData(new Uint8(), data.length, null, data));
-            case Uint16Array: return new IntVector(new FlatData(new Uint16(), data.length, null, data));
-            case Uint32Array: return new IntVector(new FlatData(new Uint32(), data.length, null, data));
-        }
-        throw new TypeError('Unrecognized Int data');
-    }
-    static defaultView<T extends Int>(data: Data<T>) {
-        return data.type.bitWidth <= 32 ? new FlatView(data) : new FixedSizeView(data, (data.type.bitWidth / 32) | 0);
-    }
-    constructor(data: Data<T>, view: View<T> = IntVector.defaultView(data)) {
-        super(data, view);
-    }
-}
-
-export class FloatVector<T extends Float = Float<any>> extends FlatVector<T> {
-    public static from(data: Uint16Array): FloatVector<Float16>;
-    public static from(data: Float32Array): FloatVector<Float32>;
-    public static from(data: Float64Array): FloatVector<Float64>;
-    public static from(data: any) {
-        switch (data.constructor) {
-            case Uint16Array: return new FloatVector(new FlatData(new Float16(), data.length, null, data));
-            case Float32Array: return new FloatVector(new FlatData(new Float32(), data.length, null, data));
-            case Float64Array: return new FloatVector(new FlatData(new Float64(), data.length, null, data));
-        }
-        throw new TypeError('Unrecognized Float data');
-    }
-    static defaultView<T extends Float>(data: Data<T>): FlatView<any> {
-        return data.type.precision !== Precision.HALF ? new FlatView(data) : new Float16View(data as Data<Float16>);
-    }
-    constructor(data: Data<T>, view: View<T> = FloatVector.defaultView(data)) {
-        super(data, view);
-    }
-}
+    public abstract isValid(index: number): boolean;
+    public abstract get(index: number): T['TValue'] | null;
+    public abstract set(index: number, value: T['TValue'] | null): void;
+    public abstract indexOf(value: T['TValue'] | null, fromIndex?: number): number;
+    public abstract [Symbol.iterator](): IterableIterator<T['TValue'] | null>;
 
-export class DateVector extends FlatVector<Date_> {
-    static from(data: Date[], unit: DateUnit = DateUnit.MILLISECOND): DateVector {
-        const type_ = new Date_(unit);
-        const converted =
-            unit === DateUnit.MILLISECOND ?
-            IntUtil.Int64.convertArray(data.map((d) => d.valueOf())) :
-            unit === DateUnit.DAY ?
-            Int32Array.from(data.map((d) => d.valueOf() / 86400000)) :
-            undefined;
-        if (converted === undefined) {
-            throw new TypeError(`Unrecognized date unit "${DateUnit[unit]}"`);
-        }
-        return new DateVector(new FlatData(type_, data.length, null, converted));
-    }
-    static defaultView<T extends Date_>(data: Data<T>) {
-        return data.type.unit === DateUnit.DAY ? new DateDayView(data) : new DateMillisecondView(data, 2);
-    }
-    constructor(data: Data<Date_>, view: View<Date_> = DateVector.defaultView(data)) {
-        super(data, view);
-    }
-    public lows(): IntVector<Int32> {
-        return this.type.unit === DateUnit.DAY ? this.asInt32(0, 1) : this.asInt32(0, 2);
-    }
-    public highs(): IntVector<Int32> {
-        return this.type.unit === DateUnit.DAY ? this.asInt32(0, 1) : this.asInt32(1, 2);
-    }
-    public asEpochMilliseconds(): IntVector<Int32> {
-        let data = (this.data as FlatData<any>).clone(new Int32());
-        switch (this.type.unit) {
-            case DateUnit.DAY: return new IntVector(data, new TimestampDayView(data as any, 1) as any);
-            case DateUnit.MILLISECOND: return new IntVector(data, new TimestampMillisecondView(data as any, 2) as any);
-        }
-        throw new TypeError(`Unrecognized date unit "${DateUnit[this.type.unit]}"`);
-    }
-    public indexOf(search: Date) {
-        return this.asEpochMilliseconds().indexOf(search.valueOf());
-    }
+    public abstract toArray(): T['TArray'];
+    public abstract getChildAt<R extends DataType = any>(index: number): Vector<R> | null;
 }
-
-export class DecimalVector extends FlatVector<Decimal> {
-    constructor(data: Data<Decimal>, view: View<Decimal> = new FixedSizeView(data, 4)) {
-        super(data, view);
-    }
-}
-
-export class TimeVector extends FlatVector<Time> {
-    static defaultView<T extends Time>(data: Data<T>) {
-        return data.type.bitWidth <= 32 ? new FlatView(data) : new FixedSizeView(data, (data.type.bitWidth / 32) | 0);
-    }
-    constructor(data: Data<Time>, view: View<Time> = TimeVector.defaultView(data)) {
-        super(data, view);
-    }
-    public lows(): IntVector<Int32> {
-        return this.type.bitWidth <= 32 ? this.asInt32(0, 1) : this.asInt32(0, 2);
-    }
-    public highs(): IntVector<Int32> {
-        return this.type.bitWidth <= 32 ? this.asInt32(0, 1) : this.asInt32(1, 2);
-    }
-}
-
-export class TimestampVector extends FlatVector<Timestamp> {
-    constructor(data: Data<Timestamp>, view: View<Timestamp> = new FixedSizeView(data, 2)) {
-        super(data, view);
-    }
-    public asEpochMilliseconds(): IntVector<Int32> {
-        let data = (this.data as FlatData<any>).clone(new Int32());
-        switch (this.type.unit) {
-            case TimeUnit.SECOND: return new IntVector(data, new TimestampSecondView(data as any, 1) as any);
-            case TimeUnit.MILLISECOND: return new IntVector(data, new TimestampMillisecondView(data as any, 2) as any);
-            case TimeUnit.MICROSECOND: return new IntVector(data, new TimestampMicrosecondView(data as any, 2) as any);
-            case TimeUnit.NANOSECOND: return new IntVector(data, new TimestampNanosecondView(data as any, 2) as any);
-        }
-        throw new TypeError(`Unrecognized time unit "${TimeUnit[this.type.unit]}"`);
-    }
-}
-
-export class IntervalVector extends FlatVector<Interval> {
-    static defaultView<T extends Interval>(data: Data<T>) {
-        return data.type.unit === IntervalUnit.YEAR_MONTH ? new IntervalYearMonthView(data) : new FixedSizeView(data, 2);
-    }
-    constructor(data: Data<Interval>, view: View<Interval> = IntervalVector.defaultView(data)) {
-        super(data, view);
-    }
-    public lows(): IntVector<Int32> {
-        return this.type.unit === IntervalUnit.YEAR_MONTH ? this.asInt32(0, 1) : this.asInt32(0, 2);
-    }
-    public highs(): IntVector<Int32> {
-        return this.type.unit === IntervalUnit.YEAR_MONTH ? this.asInt32(0, 1) : this.asInt32(1, 2);
-    }
-}
-
-export class BinaryVector extends ListVectorBase<Binary> {
-    constructor(data: Data<Binary>, view: View<Binary> = new BinaryView(data)) {
-        super(data, view);
-    }
-    public asUtf8() {
-        return new Utf8Vector((this.data as FlatListData<any>).clone(new Utf8()));
-    }
-}
-
-export class FixedSizeBinaryVector extends FlatVector<FixedSizeBinary> {
-    constructor(data: Data<FixedSizeBinary>, view: View<FixedSizeBinary> = new FixedSizeView(data, data.type.byteWidth)) {
-        super(data, view);
-    }
-}
-
-export class Utf8Vector extends ListVectorBase<Utf8> {
-    constructor(data: Data<Utf8>, view: View<Utf8> = new Utf8View(data)) {
-        super(data, view);
-    }
-    public asBinary() {
-        return new BinaryVector((this.data as FlatListData<any>).clone(new Binary()));
-    }
-}
-
-export class ListVector<T extends DataType = DataType> extends ListVectorBase<List<T>> {
-    // @ts-ignore
-    public readonly view: ListView<T>;
-    constructor(data: Data<List<T>>, view: ListView<T> = new ListView<T>(data as any)) {
-        super(data, view);
-    }
-    public getChildAt(index: number): Vector<T> | null {
-        return this.view.getChildAt<T>(index);
-    }
-}
-
-export class FixedSizeListVector<T extends DataType = DataType> extends Vector<FixedSizeList<T>> {
-    // @ts-ignore
-    public readonly view: FixedSizeListView<T>;
-    constructor(data: Data<FixedSizeList<T>>, view: View<FixedSizeList<T>> = new FixedSizeListView(data)) {
-        super(data, view);
-    }
-    public getChildAt(index: number): Vector<T> | null {
-        return this.view.getChildAt<T>(index);
-    }
-}
-
-export class MapVector extends NestedVector<Map_> {
-    constructor(data: Data<Map_>, view: View<Map_> = new MapView(data)) {
-        super(data, view);
-    }
-    public asStruct() {
-        return new StructVector((this.data as NestedData<any>).clone(new Struct(this.type.children)));
-    }
-}
-
-export class StructVector<T extends StructData = StructData> extends NestedVector<Struct<T>> {
-    constructor(data: Data<Struct<T>>, view: View<Struct<T>> = new StructView<T>(data)) {
-        super(data, view);
-    }
-    public asMap(keysSorted: boolean = false) {
-        return new MapVector((this.data as NestedData<any>).clone(new Map_(keysSorted, this.type.children)));
-    }
-}
-
-export class UnionVector<T extends (SparseUnion | DenseUnion) = any> extends NestedVector<T> {
-    constructor(data: Data<T>, view: View<T> = <any> (data.type.mode === UnionMode.Sparse ? new UnionView<SparseUnion>(data as Data<SparseUnion>) : new DenseUnionView(data as Data<DenseUnion>))) {
-        super(data, view);
-    }
-}
-
-export class DictionaryVector<T extends DataType = DataType> extends Vector<Dictionary<T>> {
-    // @ts-ignore
-    public readonly indices: Vector<Int>;
-    // @ts-ignore
-    public readonly dictionary: Vector<T>;
-    constructor(data: Data<Dictionary<T>>, view: View<Dictionary<T>> = new DictionaryView<T>(data.dictionary, new IntVector(data.indices))) {
-        super(data as Data<any>, view);
-        if (view instanceof ValidityView) {
-            view = (view as any).view;
-        }
-        if (data instanceof DictionaryData && view instanceof DictionaryView) {
-            this.indices = view.indices;
-            this.dictionary = data.dictionary;
-        } else if (data instanceof ChunkedData && view instanceof ChunkedView) {
-            const chunks = view.chunkVectors as DictionaryVector<T>[];
-            // Assume the last chunk's dictionary data is the most up-to-date,
-            // including data from DictionaryBatches that were marked as deltas
-            this.dictionary = chunks[chunks.length - 1].dictionary;
-            this.indices = chunks.reduce<Vector<Int> | null>(
-                (idxs: Vector<Int> | null, dict: DictionaryVector<T>) =>
-                    !idxs ? dict.indices! : idxs.concat(dict.indices!),
-                null
-            )!;
-        } else {
-            throw new TypeError(`Unrecognized DictionaryVector view`);
-        }
-    }
-    public getKey(index: number) { return this.indices.get(index); }
-    public getValue(key: number) { return this.dictionary.get(key); }
-    public reverseLookup(value: T) { return this.dictionary.indexOf(value); }
-}
-
-export const createVector = ((VectorLoader: new <T extends DataType>(data: Data<T>) => TypeVisitor) => (
-    <T extends DataType>(data: Data<T>) => TypeVisitor.visitTypeInline(new VectorLoader(data), data.type) as Vector<T>
-))(class VectorLoader<T extends DataType> extends TypeVisitor {
-    constructor(private data: Data<T>) { super(); }
-    visitNull           (_type: Null)            { return new NullVector(<any> this.data);            }
-    visitInt            (_type: Int)             { return new IntVector(<any> this.data);             }
-    visitFloat          (_type: Float)           { return new FloatVector(<any> this.data);           }
-    visitBinary         (_type: Binary)          { return new BinaryVector(<any> this.data);          }
-    visitUtf8           (_type: Utf8)            { return new Utf8Vector(<any> this.data);            }
-    visitBool           (_type: Bool)            { return new BoolVector(<any> this.data);            }
-    visitDecimal        (_type: Decimal)         { return new DecimalVector(<any> this.data);         }
-    visitDate           (_type: Date_)           { return new DateVector(<any> this.data);            }
-    visitTime           (_type: Time)            { return new TimeVector(<any> this.data);            }
-    visitTimestamp      (_type: Timestamp)       { return new TimestampVector(<any> this.data);       }
-    visitInterval       (_type: Interval)        { return new IntervalVector(<any> this.data);        }
-    visitList           (_type: List)            { return new ListVector(<any> this.data);            }
-    visitStruct         (_type: Struct)          { return new StructVector(<any> this.data);          }
-    visitUnion          (_type: Union)           { return new UnionVector(<any> this.data);           }
-    visitFixedSizeBinary(_type: FixedSizeBinary) { return new FixedSizeBinaryVector(<any> this.data); }
-    visitFixedSizeList  (_type: FixedSizeList)   { return new FixedSizeListVector(<any> this.data);   }
-    visitMap            (_type: Map_)            { return new MapVector(<any> this.data);             }
-    visitDictionary     (_type: Dictionary)      { return new DictionaryVector(<any> this.data);      }
-});
diff --git a/js/src/vector/base.ts b/js/src/vector/base.ts
new file mode 100644
index 0000000000000..f552fedb11074
--- /dev/null
+++ b/js/src/vector/base.ts
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data } from '../data';
+import { Vector } from '../vector';
+import { DataType } from '../type';
+import { Chunked } from './chunked';
+import { clampRange } from '../util/vector';
+import { Vector as VType } from '../interfaces';
+import { Clonable, Sliceable, Applicative } from '../vector';
+
+export interface BaseVector<T extends DataType = any> extends Clonable<VType<T>>, Sliceable<VType<T>>, Applicative<T, Chunked<T>> {
+    slice(begin?: number, end?: number): VType<T>;
+    concat(...others: Vector<T>[]): Chunked<T>;
+    clone<R extends DataType = T>(data: Data<R>, children?: Vector<R>[]): VType<R>;
+}
+
+export abstract class BaseVector<T extends DataType = any> extends Vector<T>
+    implements Clonable<VType<T>>, Sliceable<VType<T>>, Applicative<T, Chunked<T>> {
+
+    protected _children?: Vector[];
+
+    constructor(data: Data<T>, children?: Vector[]) {
+        super();
+        this._children = children;
+        this.numChildren = data.childData.length;
+        this._bindDataAccessors(this.data = data);
+    }
+
+    public readonly data: Data<T>;
+    public readonly numChildren: number;
+
+    public get type() { return this.data.type; }
+    public get typeId() { return this.data.typeId; }
+    public get length() { return this.data.length; }
+    public get offset() { return this.data.offset; }
+    public get stride() { return this.data.stride; }
+    public get nullCount() { return this.data.nullCount; }
+    public get VectorName() { return this.constructor.name; }
+
+    public get ArrayType(): T['ArrayType'] { return this.data.ArrayType; }
+
+    public get values() { return this.data.values; }
+    public get typeIds() { return this.data.typeIds; }
+    public get nullBitmap() { return this.data.nullBitmap; }
+    public get valueOffsets() { return this.data.valueOffsets; }
+
+    public get [Symbol.toStringTag]() { return `${this.VectorName}<${this.type[Symbol.toStringTag]}>`; }
+
+    public clone<R extends DataType = T>(data: Data<R>, children = this._children) {
+        return Vector.new<R>(data, children) as any;
+    }
+
+    public concat(...others: Vector<T>[]) {
+        return Chunked.concat<T>(this, ...others);
+    }
+
+    public slice(begin?: number, end?: number) {
+        // Adjust args similar to Array.prototype.slice. Normalize begin/end to
+        // clamp between 0 and length, and wrap around on negative indices, e.g.
+        // slice(-1, 5) or slice(5, -1)
+        return clampRange(this, begin, end, this._sliceInternal);
+    }
+
+    public isValid(index: number): boolean {
+        if (this.nullCount > 0) {
+            const idx = this.offset + index;
+            const val = this.nullBitmap[idx >> 3];
+            const mask = (val & (1 << (idx % 8)));
+            return mask !== 0;
+        }
+        return true;
+    }
+
+    public getChildAt<R extends DataType = any>(index: number): Vector<R> | null {
+        return index < 0 || index >= this.numChildren ? null : (
+            (this._children || (this._children = []))[index] ||
+            (this._children[index] = Vector.new<R>(this.data.childData[index] as Data<R>))
+        ) as Vector<R>;
+    }
+
+    // @ts-ignore
+    public toJSON(): any { return [...this]; }
+
+    protected _sliceInternal(self: this, begin: number, end: number) {
+        return self.clone(self.data.slice(begin, end - begin));
+    }
+
+    // @ts-ignore
+    protected _bindDataAccessors(data: Data<T>) {
+        // Implementation in src/vectors/index.ts due to circular dependency/packaging shenanigans
+    }
+}
+
+(BaseVector.prototype as any)[Symbol.isConcatSpreadable] = true;
diff --git a/js/src/ipc/writer/arrow.ts b/js/src/vector/binary.ts
similarity index 52%
rename from js/src/ipc/writer/arrow.ts
rename to js/src/vector/binary.ts
index 4ff82a61d2f8d..cfe76b01e67d8 100644
--- a/js/src/ipc/writer/arrow.ts
+++ b/js/src/vector/binary.ts
@@ -15,26 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Table } from '../../table';
-import { serializeStream, serializeFile } from './binary';
+import { Vector } from '../vector';
+import { BaseVector } from './base';
+import { Binary, Utf8 } from '../type';
 
-export function writeTableBinary(table: Table, stream = true) {
-    return concatBuffers(stream ? serializeStream(table) : serializeFile(table));
-}
-
-function concatBuffers(messages: Iterable<Uint8Array | Buffer>) {
-
-    let buffers = [], byteLength = 0;
-
-    for (const message of messages) {
-        buffers.push(message);
-        byteLength += message.byteLength;
+export class BinaryVector extends BaseVector<Binary> {
+    public asUtf8() {
+        return Vector.new(this.data.clone(new Utf8()));
     }
-
-    const { buffer } = buffers.reduce(({ buffer, byteOffset }, bytes) => {
-        buffer.set(bytes, byteOffset);
-        return { buffer, byteOffset: byteOffset + bytes.byteLength };
-    }, { buffer: new Uint8Array(byteLength), byteOffset: 0 });
-
-    return buffer;
 }
diff --git a/js/src/vector/bool.ts b/js/src/vector/bool.ts
new file mode 100644
index 0000000000000..0b4c60c9e9d73
--- /dev/null
+++ b/js/src/vector/bool.ts
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data } from '../data';
+import { Bool } from '../type';
+import { Vector } from '../vector';
+import { BaseVector } from './base';
+import { packBools } from '../util/bit';
+
+export class BoolVector extends BaseVector<Bool> {
+    /** @nocollapse */
+    public static from(data: Iterable<boolean>) {
+        let length = 0, bitmap = packBools(function*() {
+            for (let x of data) { length++; yield x; }
+        }());
+        return Vector.new(Data.Bool(new Bool(), 0, length, 0, null, bitmap));
+    }
+}
diff --git a/js/src/vector/chunked.ts b/js/src/vector/chunked.ts
index 8e96d348035c0..0d26dec8f8ad1 100644
--- a/js/src/vector/chunked.ts
+++ b/js/src/vector/chunked.ts
@@ -15,114 +15,264 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { ChunkedData, Data } from '../data';
-import { View, Vector, NestedVector } from '../vector';
-import { DataType, TypedArray, IterableArrayLike } from '../type';
-
-export class ChunkedView<T extends DataType> implements View<T> {
-    public chunkVectors: Vector<T>[];
-    public chunkOffsets: Uint32Array;
-    // @ts-ignore
-    protected _children: Vector<any>[];
-    constructor(data: ChunkedData<T>) {
-        this.chunkVectors = data.chunkVectors;
-        this.chunkOffsets = data.chunkOffsets;
-    }
-    public clone(data: ChunkedData<T> & Data<T>): this {
-        return new ChunkedView(data) as this;
+import { Data } from '../data';
+import { Field } from '../schema';
+import { Vector } from '../vector';
+import { clampRange } from '../util/vector';
+import { DataType, Dictionary } from '../type';
+import { Clonable, Sliceable, Applicative } from '../vector';
+import { DictionaryVector } from './dictionary';
+
+/** @ignore */
+type ChunkedDict<T extends DataType> = T extends Dictionary ? T['dictionaryVector'] : null | never;
+/** @ignore */
+type ChunkedKeys<T extends DataType> = T extends Dictionary ? Vector<T['indices']> | Chunked<T['indices']> : null | never;
+
+/** @ignore */
+export type SearchContinuation<T extends Chunked> = (column: T, chunkIndex: number, valueIndex: number) => any;
+
+/** @ignore */
+export class Chunked<T extends DataType = any>
+    extends Vector<T>
+    implements Clonable<Chunked<T>>,
+               Sliceable<Chunked<T>>,
+               Applicative<T, Chunked<T>> {
+
+    /** @nocollapse */
+    public static flatten<T extends DataType>(...vectors: Vector<T>[]) {
+        return vectors.reduce(function flatten(xs: any[], x: any): any[] {
+            return x instanceof Chunked ? x.chunks.reduce(flatten, xs) : [...xs, x];
+        }, []).filter((x: any): x is Vector<T> => x instanceof Vector);
+    }
+
+    /** @nocollapse */
+    public static concat<T extends DataType>(...chunks: Vector<T>[]): Chunked<T> {
+        return new Chunked(chunks[0].type, Chunked.flatten(...chunks));
+    }
+
+    protected _type: T;
+    protected _length: number;
+    protected _chunks: Vector<T>[];
+    protected _numChildren: number;
+    protected _children?: Chunked[];
+    protected _nullCount: number = -1;
+    protected _chunkOffsets: Uint32Array;
+
+    constructor(type: T, chunks: Vector<T>[] = [], offsets = calculateOffsets(chunks)) {
+        super();
+        this._type = type;
+        this._chunks = chunks;
+        this._chunkOffsets = offsets;
+        this._length = offsets[offsets.length - 1];
+        this._numChildren = (this._type.children || []).length;
     }
+
+    public get type() { return this._type; }
+    public get length() { return this._length; }
+    public get chunks() { return this._chunks; }
+    public get typeId() { return this._type.typeId; }
+    public get data(): Data<T> {
+        return this._chunks[0] ? this._chunks[0].data : <any> null;
+    }
+
+    public get ArrayType() { return this._type.ArrayType; }
+    public get numChildren() { return this._numChildren; }
+    public get stride() { return this._chunks[0] ? this._chunks[0].stride : 1; }
+    public get nullCount() {
+        let nullCount = this._nullCount;
+        if (nullCount < 0) {
+            this._nullCount = nullCount = this._chunks.reduce((x, { nullCount }) => x + nullCount, 0);
+        }
+        return nullCount;
+    }
+
+    protected _indices?: ChunkedKeys<T>;
+    public get indices(): ChunkedKeys<T> | null {
+        if (DataType.isDictionary(this._type)) {
+            if (!this._indices) {
+                const chunks = (<any> this._chunks) as DictionaryVector<T, any>[];
+                this._indices = (chunks.length === 1
+                    ? chunks[0].indices
+                    : Chunked.concat(...chunks.map((x) => x.indices))) as ChunkedKeys<T>;
+            }
+            return this._indices;
+        }
+        return null;
+    }
+    public get dictionary(): ChunkedDict<T> | null {
+        if (DataType.isDictionary(this._type)) {
+            return (<any> this._type.dictionaryVector) as ChunkedDict<T>;
+        }
+        return null;
+    }
+
     public *[Symbol.iterator](): IterableIterator<T['TValue'] | null> {
-        for (const vector of this.chunkVectors) {
-            yield* vector;
+        for (const chunk of this._chunks) {
+            yield* chunk;
         }
     }
-    public getChildAt<R extends DataType = DataType>(index: number) {
-        return index < 0 ? null
-            : (this._children || (this._children = []))[index] ||
-              (this._children[index] = Vector.concat<R>(
-                  ...(<any> this.chunkVectors as NestedVector<any>[])
-                         .map((chunk) => chunk.getChildAt<R>(index))));
+
+    public clone(chunks = this._chunks): Chunked<T> {
+        return new Chunked(this._type, chunks);
     }
-    public isValid(index: number): boolean {
-        // binary search to find the child vector and value index offset (inlined for speed)
-        let offsets = this.chunkOffsets, pos = 0;
-        let lhs = 0, mid = 0, rhs = offsets.length - 1;
-        while (index < offsets[rhs] && index >= (pos = offsets[lhs])) {
-            if (lhs + 1 === rhs) {
-                return this.chunkVectors[lhs].isValid(index - pos);
+
+    public concat(...others: Vector<T>[]): Chunked<T> {
+        return this.clone(Chunked.flatten(this, ...others));
+    }
+
+    public slice(begin?: number, end?: number): Chunked<T> {
+        return clampRange(this, begin, end, this._sliceInternal);
+    }
+
+    public getChildAt<R extends DataType = any>(index: number): Chunked<R> | null {
+
+        if (index < 0 || index >= this._numChildren) { return null; }
+
+        let columns = this._children || (this._children = []);
+        let child: Chunked<R>, field: Field<R>, chunks: Vector<R>[];
+
+        if (child = columns[index]) { return child; }
+        if (field = ((this._type.children || [])[index] as Field<R>)) {
+            chunks = this._chunks
+                .map((vector) => vector.getChildAt<R>(index))
+                .filter((vec): vec is Vector<R> => vec != null);
+            if (chunks.length > 0) {
+                return (columns[index] = new Chunked<R>(field.type, chunks));
             }
-            mid = lhs + ((rhs - lhs) / 2) | 0;
-            index >= offsets[mid] ? (lhs = mid) : (rhs = mid);
         }
-        return false;
+
+        return null;
     }
-    public get(index: number): T['TValue'] | null {
-        // binary search to find the child vector and value index offset (inlined for speed)
-        let offsets = this.chunkOffsets, pos = 0;
-        let lhs = 0, mid = 0, rhs = offsets.length - 1;
-        while (index < offsets[rhs] && index >= (pos = offsets[lhs])) {
+
+    public search(index: number): [number, number] | null;
+    public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N): ReturnType<N>;
+    public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N) {
+        let idx = index;
+        // binary search to find the child vector and value indices
+        let offsets = this._chunkOffsets, rhs = offsets.length - 1;
+        // return early if out of bounds, or if there's just one child
+        if (idx < 0            ) { return null; }
+        if (idx >= offsets[rhs]) { return null; }
+        if (rhs <= 1           ) { return then ? then(this, 0, idx) : [0, idx]; }
+        let lhs = 0, pos = 0, mid = 0;
+        do {
             if (lhs + 1 === rhs) {
-                return this.chunkVectors[lhs].get(index - pos);
+                return then ? then(this, lhs, idx - pos) : [lhs, idx - pos];
             }
             mid = lhs + ((rhs - lhs) / 2) | 0;
-            index >= offsets[mid] ? (lhs = mid) : (rhs = mid);
-        }
+            idx >= offsets[mid] ? (lhs = mid) : (rhs = mid);
+        } while (idx < offsets[rhs] && idx >= (pos = offsets[lhs]));
         return null;
     }
+
+    public isValid(index: number): boolean {
+        return !!this.search(index, this.isValidInternal);
+    }
+
+    public get(index: number): T['TValue'] | null {
+        return this.search(index, this.getInternal);
+    }
+
     public set(index: number, value: T['TValue'] | null): void {
-        // binary search to find the child vector and value index offset (inlined for speed)
-        let offsets = this.chunkOffsets, pos = 0;
-        let lhs = 0, mid = 0, rhs = offsets.length - 1;
-        while (index < offsets[rhs] && index >= (pos = offsets[lhs])) {
-            if (lhs + 1 === rhs) {
-                return this.chunkVectors[lhs].set(index - pos, value);
-            }
-            mid = lhs + ((rhs - lhs) / 2) | 0;
-            index >= offsets[mid] ? (lhs = mid) : (rhs = mid);
-        }
+        this.search(index, ({ chunks }, i, j) => chunks[i].set(j, value));
     }
-    public toArray(): IterableArrayLike<T['TValue'] | null> {
-        const chunks = this.chunkVectors;
-        const numChunks = chunks.length;
-        if (numChunks === 1) {
-            return chunks[0].toArray();
+
+    public indexOf(element: T['TValue'], offset?: number): number {
+        if (offset && typeof offset === 'number') {
+            return this.search(offset, (self, i, j) => this.indexOfInternal(self, i, j, element))!;
         }
-        let sources = new Array<any>(numChunks);
-        let sourcesLen = 0, ArrayType: any = Array;
-        for (let index = -1; ++index < numChunks;) {
-            let source = chunks[index].toArray();
-            sourcesLen += (sources[index] = source).length;
-            if (ArrayType !== source.constructor) {
-                ArrayType = source.constructor;
-            }
+        return this.indexOfInternal(this, 0, Math.max(0, offset || 0), element);
+    }
+
+    public toArray(): T['TArray'] {
+        const { chunks } = this;
+        const n = chunks.length;
+        let { ArrayType } = this._type;
+        if (n <= 0) { return new ArrayType(0); }
+        if (n <= 1) { return chunks[0].toArray(); }
+        let len = 0, src = new Array(n);
+        for (let i = -1; ++i < n;) {
+            len += (src[i] = chunks[i].toArray()).length;
         }
-        let target = new ArrayType(sourcesLen);
-        let setValues = ArrayType === Array ? arraySet : typedArraySet as any;
-        for (let index = -1, offset = 0; ++index < numChunks;) {
-            offset = setValues(sources[index], target, offset);
+        if (ArrayType !== src[0].constructor) {
+            ArrayType = src[0].constructor;
         }
-        return target;
-    }
-    public indexOf(search: T['TValue']) {
-        let offset = 0, result;
-        for (const vector of this.chunkVectors) {
-            result = vector.indexOf(search);
-            if (result !== -1) { return result + offset; }
-            offset += vector.length;
+        let dst = new (ArrayType as any)(len);
+        let set: any = ArrayType === Array ? arraySet : typedSet;
+        for (let i = -1, idx = 0; ++i < n;) {
+            idx = set(src[i], dst, idx);
         }
+        return dst;
+    }
 
+    protected getInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].get(j); }
+    protected isValidInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].isValid(j); }
+    protected indexOfInternal({ _chunks }: Chunked<T>, chunkIndex: number, fromIndex: number, element: T['TValue']) {
+        let i = chunkIndex - 1, n = _chunks.length;
+        let start = fromIndex, offset = 0, found = -1;
+        while (++i < n) {
+            if (~(found = _chunks[i].indexOf(element, start))) {
+                return offset + found;
+            }
+            start = 0;
+            offset += _chunks[i].length;
+        }
         return -1;
     }
+
+    protected _sliceInternal(self: Chunked<T>, begin: number, end: number) {
+        const slices: Vector<T>[] = [];
+        const { chunks, _chunkOffsets: chunkOffsets } = self;
+        for (let i = -1, n = chunks.length; ++i < n;) {
+            const chunk = chunks[i];
+            const chunkLength = chunk.length;
+            const chunkOffset = chunkOffsets[i];
+            // If the child is to the right of the slice boundary, we can stop
+            if (chunkOffset >= end) { break; }
+            // If the child is to the left of of the slice boundary, exclude
+            if (begin >= chunkOffset + chunkLength) { continue; }
+            // If the child is between both left and right boundaries, include w/o slicing
+            if (chunkOffset >= begin && (chunkOffset + chunkLength) <= end) {
+                slices.push(chunk);
+                continue;
+            }
+            // If the child overlaps one of the slice boundaries, include that slice
+            const from = Math.max(0, begin - chunkOffset);
+            const to = from + Math.min(chunkLength - from, end - chunkOffset);
+            slices.push(chunk.slice(from, to) as Vector<T>);
+        }
+        return self.clone(slices);
+    }
 }
 
-function typedArraySet(source: TypedArray, target: TypedArray, index: number) {
-    return target.set(source, index) || index + source.length;
+/** @ignore */
+function calculateOffsets<T extends DataType>(vectors: Vector<T>[]) {
+    let offsets = new Uint32Array((vectors || []).length + 1);
+    let offset = offsets[0] = 0, length = offsets.length;
+    for (let index = 0; ++index < length;) {
+        offsets[index] = (offset += vectors[index - 1].length);
+    }
+    return offsets;
 }
 
-function arraySet(source: any[], target: any[], index: number) {
-    let dstIdx = index - 1, srcIdx = -1, srcLen = source.length;
-    while (++srcIdx < srcLen) {
-        target[++dstIdx] = source[srcIdx];
+/** @ignore */
+const typedSet = (src: TypedArray, dst: TypedArray, offset: number) => {
+    dst.set(src, offset);
+    return (offset + src.length);
+};
+
+/** @ignore */
+const arraySet = (src: any[], dst: any[], offset: number) => {
+    let idx = offset - 1;
+    for (let i = -1, n = src.length; ++i < n;) {
+        dst[++idx] = src[i];
     }
-    return dstIdx;
+    return idx;
+};
+
+/** @ignore */
+interface TypedArray extends ArrayBufferView {
+    readonly length: number;
+    readonly [n: number]: number;
+    set(array: ArrayLike<number>, offset?: number): void;
 }
diff --git a/js/src/vector/date.ts b/js/src/vector/date.ts
new file mode 100644
index 0000000000000..cca31a57fbbce
--- /dev/null
+++ b/js/src/vector/date.ts
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data } from '../data';
+import { Vector } from '../vector';
+import { DateUnit } from '../enum';
+import { BaseVector } from './base';
+import * as IntUtil from '../util/int';
+import { Date_, DateDay, DateMillisecond  } from '../type';
+
+export class DateVector<T extends Date_ = Date_> extends BaseVector<T> {
+    /** @nocollapse */
+    public static from<T extends Date_ = DateMillisecond>(data: Date[], unit: T['unit'] = DateUnit.MILLISECOND) {
+        switch (unit) {
+            case DateUnit.DAY: {
+                const values = Int32Array.from(data.map((d) => d.valueOf() / 86400000));
+                return Vector.new(Data.Date(new DateDay(), 0, data.length, 0, null, values));
+            }
+            case DateUnit.MILLISECOND: {
+                const values = IntUtil.Int64.convertArray(data.map((d) => d.valueOf()));
+                return Vector.new(Data.Date(new DateMillisecond(), 0, data.length, 0, null, values));
+            }
+        }
+        throw new TypeError(`Unrecognized date unit "${DateUnit[unit]}"`);
+    }
+}
+
+export class DateDayVector extends DateVector<DateDay> {}
+export class DateMillisecondVector extends DateVector<DateMillisecond> {}
diff --git a/js/src/vector/decimal.ts b/js/src/vector/decimal.ts
new file mode 100644
index 0000000000000..fd197508660a8
--- /dev/null
+++ b/js/src/vector/decimal.ts
@@ -0,0 +1,21 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Decimal } from '../type';
+import { BaseVector } from './base';
+
+export class DecimalVector extends BaseVector<Decimal> {}
diff --git a/js/src/vector/dictionary.ts b/js/src/vector/dictionary.ts
index 7014cdadc401d..ef3378c2dc762 100644
--- a/js/src/vector/dictionary.ts
+++ b/js/src/vector/dictionary.ts
@@ -16,43 +16,32 @@
 // under the License.
 
 import { Data } from '../data';
-import { View, Vector } from '../vector';
-import { IterableArrayLike, DataType, Dictionary, Int } from '../type';
+import { Vector } from '../vector';
+import { BaseVector } from './base';
+import { Vector as V } from '../interfaces';
+import { DataType, Dictionary, TKeys } from '../type';
 
-export class DictionaryView<T extends DataType> implements View<T> {
-    public indices: Vector<Int>;
-    public dictionary: Vector<T>;
-    constructor(dictionary: Vector<T>, indices: Vector<Int>) {
-        this.indices = indices;
-        this.dictionary = dictionary;
-    }
-    public clone(data: Data<Dictionary<T>> & Data<T>): this {
-        return new DictionaryView(data.dictionary, this.indices.clone(data.indices)) as this;
-    }
-    public isValid(index: number): boolean {
-        return this.indices.isValid(index);
-    }
-    public get(index: number): T['TValue'] {
-        return this.dictionary.get(this.indices.get(index));
-    }
-    public set(index: number, value: T['TValue']): void {
-        this.dictionary.set(this.indices.get(index), value);
-    }
-    public toArray(): IterableArrayLike<T['TValue']> {
-        return [...this];
-    }
-    public *[Symbol.iterator](): IterableIterator<T['TValue']> {
-        const values = this.dictionary, indices = this.indices;
-        for (let index = -1, n = indices.length; ++index < n;) {
-            yield values.get(indices.get(index));
-        }
-    }
-    public indexOf(search: T['TValue']) {
-        // First find the dictionary key for the desired value...
-        const key = this.dictionary.indexOf(search);
-        if (key === -1) { return key; }
-
-        // ... then find the first occurence of that key in indices
-        return this.indices.indexOf(key!);
-    }
+export class DictionaryVector<T extends DataType = any, TKey extends TKeys = TKeys> extends BaseVector<Dictionary<T, TKey>> {
+    /** @nocollapse */
+    public static from<T extends DataType<any>, TKey extends TKeys = TKeys>(
+        values: Vector<T>, indices: TKey,
+        keys: ArrayLike<number> | TKey['TArray']
+    ) {
+        const type = new Dictionary(values.type, indices, null, null, values);
+        return Vector.new(Data.Dictionary(type, 0, keys.length, 0, null, keys));
+    }
+    protected _indices: V<TKey>;
+    constructor(data: Data<Dictionary<T, TKey>>) {
+        super(data);
+        this._indices = Vector.new(data.clone(this.type.indices));
+    }
+    // protected _bindDataAccessors() {}
+    public get indices() { return this._indices; }
+    public get dictionary() { return this.data.type.dictionaryVector; }
+    public isValid(index: number) { return this._indices.isValid(index); }
+    public reverseLookup(value: T) { return this.dictionary.indexOf(value); }
+    public getKey(idx: number): TKey['TValue'] | null { return this._indices.get(idx); }
+    public getValue(key: number): T['TValue'] | null { return this.dictionary.get(key); }
+    public setKey(idx: number, key: TKey['TValue'] | null) { return this._indices.set(idx, key); }
+    public setValue(key: number, value: T['TValue'] | null) { return this.dictionary.set(key, value); }
 }
diff --git a/rust/src/error.rs b/js/src/vector/fixedsizebinary.ts
similarity index 82%
rename from rust/src/error.rs
rename to js/src/vector/fixedsizebinary.ts
index d82ee1190a68c..b474d2a8c5880 100644
--- a/rust/src/error.rs
+++ b/js/src/vector/fixedsizebinary.ts
@@ -15,10 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#[derive(Debug, Clone, PartialEq)]
-pub enum ArrowError {
-    MemoryError(String),
-    ParseError(String),
-}
+import { BaseVector } from './base';
+import { FixedSizeBinary } from '../type';
 
-pub type Result<T> = ::std::result::Result<T, ArrowError>;
+export class FixedSizeBinaryVector extends BaseVector<FixedSizeBinary> {
+}
diff --git a/js/src/vector/fixedsizelist.ts b/js/src/vector/fixedsizelist.ts
new file mode 100644
index 0000000000000..b5841fa90a60c
--- /dev/null
+++ b/js/src/vector/fixedsizelist.ts
@@ -0,0 +1,22 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { BaseVector } from './base';
+import { DataType, FixedSizeList } from '../type';
+
+export class FixedSizeListVector<T extends DataType = any> extends BaseVector<FixedSizeList<T>> {
+}
diff --git a/js/src/vector/flat.ts b/js/src/vector/flat.ts
deleted file mode 100644
index c16fd2b48abff..0000000000000
--- a/js/src/vector/flat.ts
+++ /dev/null
@@ -1,290 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import { Data } from '../data';
-import { View } from '../vector';
-import { getBool, setBool, iterateBits } from '../util/bit';
-import { FlatType, PrimitiveType, IterableArrayLike } from '../type';
-import { Bool, Float16, Date_, Interval, Null, Int32, Timestamp } from '../type';
-
-export class FlatView<T extends FlatType> implements View<T> {
-    public length: number;
-    public values: T['TArray'];
-    constructor(data: Data<T>) {
-        this.length = data.length;
-        this.values = data.values;
-    }
-    public clone(data: Data<T>): this {
-        return new (<any> this.constructor)(data) as this;
-    }
-    public isValid(): boolean {
-        return true;
-    }
-    public get(index: number): T['TValue'] {
-        return this.values[index];
-    }
-    public set(index: number, value: T['TValue']): void {
-        return this.values[index] = value;
-    }
-    public toArray(): IterableArrayLike<T['TValue']> {
-        return this.values.subarray(0, this.length);
-    }
-    public indexOf(search: T['TValue']) {
-        let index = 0;
-        for (let value of this) {
-            if (value === search) { return index; }
-            ++index;
-        }
-
-        return -1;
-    }
-    public [Symbol.iterator](): IterableIterator<T['TValue']> {
-        return this.values.subarray(0, this.length)[Symbol.iterator]() as IterableIterator<T['TValue']>;
-    }
-}
-
-export class NullView implements View<Null> {
-    public length: number;
-    constructor(data: Data<Null>) {
-        this.length = data.length;
-    }
-    public clone(data: Data<Null>): this {
-        return new (<any> this.constructor)(data) as this;
-    }
-    public isValid(): boolean {
-        return true;
-    }
-    public set(): void {}
-    public get() { return null; }
-    public toArray(): IterableArrayLike<null> {
-        return [...this];
-    }
-    public indexOf(search: any) {
-        // if you're looking for nulls and the view isn't empty, we've got 'em!
-        return search === null && this.length > 0 ? 0 : -1;
-    }
-    public *[Symbol.iterator](): IterableIterator<null> {
-        for (let index = -1, length = this.length; ++index < length;) {
-            yield null;
-        }
-    }
-}
-
-export class BoolView extends FlatView<Bool> {
-    protected offset: number;
-    constructor(data: Data<Bool>) {
-        super(data);
-        this.offset = data.offset;
-    }
-    public toArray() { return [...this]; }
-    public get(index: number): boolean {
-        const boolBitIndex = this.offset + index;
-        return getBool(null, index, this.values[boolBitIndex >> 3], boolBitIndex % 8);
-    }
-    public set(index: number, value: boolean): void {
-        setBool(this.values, this.offset + index, value);
-    }
-    public [Symbol.iterator](): IterableIterator<boolean> {
-        return iterateBits<boolean>(this.values, this.offset, this.length, this.values, getBool);
-    }
-}
-
-export class PrimitiveView<T extends PrimitiveType> extends FlatView<T> {
-    public size: number;
-    public ArrayType: T['ArrayType'];
-    constructor(data: Data<T>, size?: number) {
-        super(data);
-        this.size = size || 1;
-        this.ArrayType = data.type.ArrayType;
-    }
-    public clone(data: Data<T>): this {
-        return new (<any> this.constructor)(data, this.size) as this;
-    }
-    protected getValue(values: T['TArray'], index: number, size: number): T['TValue'] {
-        return values[index * size];
-    }
-    protected setValue(values: T['TArray'], index: number, size: number, value: T['TValue']): void {
-        values[index * size] = value;
-    }
-    public get(index: number): T['TValue'] {
-        return this.getValue(this.values, index, this.size);
-    }
-    public set(index: number, value: T['TValue']): void {
-        return this.setValue(this.values, index, this.size, value);
-    }
-    public toArray(): IterableArrayLike<T['TValue']> {
-        return this.size > 1 ?
-            new this.ArrayType(this) :
-            this.values.subarray(0, this.length);
-    }
-    public *[Symbol.iterator](): IterableIterator<T['TValue']> {
-        const get = this.getValue;
-        const { size, values, length } = this;
-        for (let index = -1; ++index < length;) {
-            yield get(values, index, size);
-        }
-    }
-}
-
-export class FixedSizeView<T extends PrimitiveType> extends PrimitiveView<T> {
-    public toArray(): IterableArrayLike<T['TValue']> {
-        return this.values;
-    }
-    public indexOf(search: T['TValue']) {
-        let index = 0;
-        for (let value of this) {
-            if (value.every((d: number, i: number) => d === search[i])) { return index; }
-            ++index;
-        }
-
-        return -1;
-    }
-    protected getValue(values: T['TArray'], index: number, size: number): T['TValue'] {
-        return values.subarray(index * size, index * size + size);
-    }
-    protected setValue(values: T['TArray'], index: number, size: number, value: T['TValue']): void {
-        values.set((value as T['TArray']).subarray(0, size), index * size);
-    }
-}
-
-export class Float16View extends PrimitiveView<Float16> {
-    public toArray() { return new Float32Array(this); }
-    protected getValue(values: Uint16Array, index: number, size: number): number {
-        return (values[index * size] - 32767) / 32767;
-    }
-    protected setValue(values: Uint16Array, index: number, size: number, value: number): void {
-        values[index * size] = (value * 32767) + 32767;
-    }
-}
-
-export class DateDayView extends PrimitiveView<Date_> {
-    public toArray() { return [...this]; }
-    protected getValue(values: Int32Array, index: number, size: number): Date {
-        return epochDaysToDate(values, index * size);
-    }
-    protected setValue(values: Int32Array, index: number, size: number, value: Date): void {
-        values[index * size] = value.valueOf() / 86400000;
-    }
-}
-
-export class DateMillisecondView extends FixedSizeView<Date_> {
-    public toArray() { return [...this]; }
-    protected getValue(values: Int32Array, index: number, size: number): Date {
-        return epochMillisecondsLongToDate(values, index * size);
-    }
-    protected setValue(values: Int32Array, index: number, size: number, value: Date): void {
-        const epochMs = value.valueOf();
-        values[index * size] = (epochMs % 4294967296) | 0;
-        values[index * size + size] = (epochMs / 4294967296) | 0;
-    }
-}
-
-export class TimestampDayView extends PrimitiveView<Timestamp> {
-    public toArray() { return [...this]; }
-    protected getValue(values: Int32Array, index: number, size: number): number {
-        return epochDaysToMs(values, index * size);
-    }
-    protected setValue(values: Int32Array, index: number, size: number, epochMs: number): void {
-        values[index * size] = (epochMs / 86400000) | 0;
-    }
-}
-
-export class TimestampSecondView extends PrimitiveView<Timestamp> {
-    public toArray() { return [...this]; }
-    protected getValue(values: Int32Array, index: number, size: number): number {
-        return epochSecondsToMs(values, index * size);
-    }
-    protected setValue(values: Int32Array, index: number, size: number, epochMs: number): void {
-        values[index * size] = (epochMs / 1000) | 0;
-    }
-}
-
-export class TimestampMillisecondView extends PrimitiveView<Timestamp> {
-    public toArray() { return [...this]; }
-    protected getValue(values: Int32Array, index: number, size: number): number {
-        return epochMillisecondsLongToMs(values, index * size);
-    }
-    protected setValue(values: Int32Array, index: number, size: number, epochMs: number): void {
-        values[index * size] = (epochMs % 4294967296) | 0;
-        values[index * size + size] = (epochMs / 4294967296) | 0;
-    }
-}
-
-export class TimestampMicrosecondView extends PrimitiveView<Timestamp> {
-    public toArray() { return [...this]; }
-    protected getValue(values: Int32Array, index: number, size: number): number {
-        return epochMicrosecondsLongToMs(values, index * size);
-    }
-    protected setValue(values: Int32Array, index: number, size: number, epochMs: number): void {
-        values[index * size] = ((epochMs / 1000) % 4294967296) | 0;
-        values[index * size + size] = ((epochMs / 1000) / 4294967296) | 0;
-    }
-}
-
-export class TimestampNanosecondView extends PrimitiveView<Timestamp> {
-    public toArray() { return [...this]; }
-    protected getValue(values: Int32Array, index: number, size: number): number {
-        return epochNanosecondsLongToMs(values, index * size);
-    }
-    protected setValue(values: Int32Array, index: number, size: number, epochMs: number): void {
-        values[index * size] = ((epochMs / 1000000) % 4294967296) | 0;
-        values[index * size + size] = ((epochMs / 1000000) / 4294967296) | 0;
-    }
-}
-
-export class IntervalYearMonthView extends PrimitiveView<Interval> {
-    public toArray() { return [...this]; }
-    protected getValue(values: Int32Array, index: number, size: number): Int32Array {
-        const interval = values[index * size];
-        return new Int32Array([interval / 12, /* years */ interval % 12  /* months */]);
-    }
-    protected setValue(values: Int32Array, index: number, size: number, value: Int32Array): void {
-        values[index * size] = (value[0] * 12) + (value[1] % 12);
-    }
-}
-
-export class IntervalYearView extends PrimitiveView<Int32> {
-    public toArray() { return [...this]; }
-    protected getValue(values: Int32Array, index: number, size: number): number {
-        return values[index * size] / 12;
-    }
-    protected setValue(values: Int32Array, index: number, size: number, value: number): void {
-        values[index * size] = (value * 12) + (values[index * size] % 12);
-    }
-}
-
-export class IntervalMonthView extends PrimitiveView<Int32> {
-    public toArray() { return [...this]; }
-    protected getValue(values: Int32Array, index: number, size: number): number {
-        return values[index * size] % 12;
-    }
-    protected setValue(values: Int32Array, index: number, size: number, value: number): void {
-        values[index * size] = (values[index * size] * 12) + (value % 12);
-    }
-}
-
-export function epochSecondsToMs(data: Int32Array, index: number) { return 1000 * data[index]; }
-export function epochDaysToMs(data: Int32Array, index: number) { return 86400000 * data[index]; }
-export function epochMillisecondsLongToMs(data: Int32Array, index: number) { return 4294967296 * (data[index + 1]) + (data[index] >>> 0); }
-export function epochMicrosecondsLongToMs(data: Int32Array, index: number) { return 4294967296 * (data[index + 1] / 1000) + ((data[index] >>> 0) / 1000); }
-export function epochNanosecondsLongToMs(data: Int32Array, index: number) { return 4294967296 * (data[index + 1] / 1000000) + ((data[index] >>> 0) / 1000000); }
-
-export function epochMillisecondsToDate(epochMs: number) { return new Date(epochMs); }
-export function epochDaysToDate(data: Int32Array, index: number) { return epochMillisecondsToDate(epochDaysToMs(data, index)); }
-export function epochSecondsToDate(data: Int32Array, index: number) { return epochMillisecondsToDate(epochSecondsToMs(data, index)); }
-export function epochNanosecondsLongToDate(data: Int32Array, index: number) { return epochMillisecondsToDate(epochNanosecondsLongToMs(data, index)); }
-export function epochMillisecondsLongToDate(data: Int32Array, index: number) { return epochMillisecondsToDate(epochMillisecondsLongToMs(data, index)); }
diff --git a/js/src/vector/float.ts b/js/src/vector/float.ts
new file mode 100644
index 0000000000000..638f9e188e035
--- /dev/null
+++ b/js/src/vector/float.ts
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data } from '../data';
+import { Vector } from '../vector';
+import { BaseVector } from './base';
+import { Float, Float16, Float32, Float64 } from '../type';
+
+export class FloatVector<T extends Float = Float> extends BaseVector<T> {
+    /** @nocollapse */
+    public static from<T extends Float>(data: T['TArray']) {
+        switch (data.constructor) {
+            case Uint16Array: return Vector.new(Data.Float(new Float16(), 0, data.length, 0, null, data));
+            case Float32Array: return Vector.new(Data.Float(new Float32(), 0, data.length, 0, null, data));
+            case Float64Array: return Vector.new(Data.Float(new Float64(), 0, data.length, 0, null, data));
+        }
+        throw new TypeError('Unrecognized Float data');
+    }
+}
+
+export class Float16Vector extends FloatVector<Float16> {}
+export class Float32Vector extends FloatVector<Float32> {}
+export class Float64Vector extends FloatVector<Float64> {}
diff --git a/js/src/vector/index.ts b/js/src/vector/index.ts
new file mode 100644
index 0000000000000..c3c580f14691c
--- /dev/null
+++ b/js/src/vector/index.ts
@@ -0,0 +1,183 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+export { Row } from './row';
+export { Vector } from '../vector';
+export { BaseVector } from './base';
+export { BinaryVector } from './binary';
+export { BoolVector } from './bool';
+export { Chunked } from './chunked';
+export { DateVector, DateDayVector, DateMillisecondVector } from './date';
+export { DecimalVector } from './decimal';
+export { DictionaryVector } from './dictionary';
+export { FixedSizeBinaryVector } from './fixedsizebinary';
+export { FixedSizeListVector } from './fixedsizelist';
+export { FloatVector, Float16Vector, Float32Vector, Float64Vector } from './float';
+export { IntervalVector, IntervalDayTimeVector, IntervalYearMonthVector } from './interval';
+export { IntVector, Int8Vector, Int16Vector, Int32Vector, Int64Vector, Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector } from './int';
+export { ListVector } from './list';
+export { MapVector } from './map';
+export { NullVector } from './null';
+export { StructVector } from './struct';
+export { TimestampVector, TimestampSecondVector, TimestampMillisecondVector, TimestampMicrosecondVector, TimestampNanosecondVector } from './timestamp';
+export { TimeVector, TimeSecondVector, TimeMillisecondVector, TimeMicrosecondVector, TimeNanosecondVector } from './time';
+export { UnionVector, DenseUnionVector, SparseUnionVector } from './union';
+export { Utf8Vector } from './utf8';
+
+import { Data } from '../data';
+import { Type } from '../enum';
+import { Vector } from '../vector';
+import { DataType } from '../type';
+import { BaseVector } from './base';
+import { setBool } from '../util/bit';
+import { Vector as V, VectorCtorArgs } from '../interfaces';
+import { instance as getVisitor } from '../visitor/get';
+import { instance as setVisitor } from '../visitor/set';
+import { instance as indexOfVisitor } from '../visitor/indexof';
+import { instance as toArrayVisitor } from '../visitor/toarray';
+import { instance as iteratorVisitor } from '../visitor/iterator';
+import { instance as byteWidthVisitor } from '../visitor/bytewidth';
+import { instance as getVectorConstructor } from '../visitor/vectorctor';
+
+declare module '../vector' {
+    namespace Vector {
+        export { newVector as new };
+    }
+}
+
+declare module './base' {
+    interface BaseVector<T extends DataType> {
+        get(index: number): T['TValue'] | null;
+        set(index: number, value: T['TValue'] | null): void;
+        indexOf(value: T['TValue'] | null, fromIndex?: number): number;
+        toArray(): T['TArray'];
+        getByteWidth(): number;
+        [Symbol.iterator](): IterableIterator<T['TValue'] | null>;
+    }
+}
+
+/** @nocollapse */
+Vector.new = newVector;
+
+/** @ignore */
+function newVector<T extends DataType>(data: Data<T>, ...args: VectorCtorArgs<V<T>>): V<T> {
+    return new (getVectorConstructor.getVisitFn(data.type)())(data, ...args) as V<T>;
+}
+
+//
+// We provide the following method implementations for code navigability purposes only.
+// They're overridden at runtime below with the specific Visitor implementation for each type,
+// short-circuiting the usual Visitor traversal and reducing intermediate lookups and calls.
+// This comment is here to remind you to not set breakpoints in these function bodies, or to inform
+// you why the breakpoints you have already set are not being triggered. Have a great day!
+//
+
+BaseVector.prototype.get = function baseVectorGet<T extends DataType>(this: BaseVector<T>, index: number): T['TValue'] | null {
+    return getVisitor.visit(this, index);
+};
+
+BaseVector.prototype.set = function baseVectorSet<T extends DataType>(this: BaseVector<T>, index: number, value: T['TValue'] | null): void {
+    return setVisitor.visit(this, index, value);
+};
+
+BaseVector.prototype.indexOf = function baseVectorIndexOf<T extends DataType>(this: BaseVector<T>, value: T['TValue'] | null, fromIndex?: number): number {
+    return indexOfVisitor.visit(this, value, fromIndex);
+};
+
+BaseVector.prototype.toArray = function baseVectorToArray<T extends DataType>(this: BaseVector<T>): T['TArray'] {
+    return toArrayVisitor.visit(this);
+};
+
+BaseVector.prototype.getByteWidth = function baseVectorGetByteWidth<T extends DataType>(this: BaseVector<T>): number {
+    return byteWidthVisitor.visit(this.type);
+};
+
+BaseVector.prototype[Symbol.iterator] = function baseVectorSymbolIterator<T extends DataType>(this: BaseVector<T>): IterableIterator<T['TValue'] | null> {
+    return iteratorVisitor.visit(this);
+};
+
+(BaseVector.prototype as any)._bindDataAccessors = bindBaseVectorDataAccessors;
+
+// Perf: bind and assign the operator Visitor methods to each of the Vector subclasses for each Type
+(Object.keys(Type) as any[])
+    .filter((typeId) => typeId !== Type.NONE && typeId !== Type[Type.NONE])
+    .map((T: any) => Type[T] as any).filter((T: any): T is Type => typeof T === 'number')
+    .forEach((typeId) => {
+        let typeIds: Type[];
+        switch (typeId) {
+            case Type.Int:       typeIds = [Type.Int8, Type.Int16, Type.Int32, Type.Int64, Type.Uint8, Type.Uint16, Type.Uint32, Type.Uint64]; break;
+            case Type.Float:     typeIds = [Type.Float16, Type.Float32, Type.Float64]; break;
+            case Type.Date:      typeIds = [Type.DateDay, Type.DateMillisecond]; break;
+            case Type.Time:      typeIds = [Type.TimeSecond, Type.TimeMillisecond, Type.TimeMicrosecond, Type.TimeNanosecond]; break;
+            case Type.Timestamp: typeIds = [Type.TimestampSecond, Type.TimestampMillisecond, Type.TimestampMicrosecond, Type.TimestampNanosecond]; break;
+            case Type.Interval:  typeIds = [Type.IntervalDayTime, Type.IntervalYearMonth]; break;
+            case Type.Union:     typeIds = [Type.DenseUnion, Type.SparseUnion]; break;
+            default:                typeIds = [typeId]; break;
+        }
+        typeIds.forEach((typeId) => {
+            const VectorCtor = getVectorConstructor.visit(typeId);
+            VectorCtor.prototype['get'] = partial1(getVisitor.getVisitFn(typeId));
+            VectorCtor.prototype['set'] = partial2(setVisitor.getVisitFn(typeId));
+            VectorCtor.prototype['indexOf'] = partial2(indexOfVisitor.getVisitFn(typeId));
+            VectorCtor.prototype['toArray'] = partial0(toArrayVisitor.getVisitFn(typeId));
+            VectorCtor.prototype['getByteWidth'] = partialType0(byteWidthVisitor.getVisitFn(typeId));
+            VectorCtor.prototype[Symbol.iterator] = partial0(iteratorVisitor.getVisitFn(typeId));
+        });
+    });
+
+/** @ignore */
+function partial0<T>(visit: (node: T) => any) {
+    return function(this: T) { return visit(this); };
+}
+
+/** @ignore */
+function partialType0<T extends Vector>(visit: (node: T['type']) => any) {
+    return function(this: T) { return visit(this.type); };
+}
+
+/** @ignore */
+function partial1<T>(visit: (node: T, a: any) => any) {
+    return function(this: T, a: any) { return visit(this, a); };
+}
+
+/** @ignore */
+function partial2<T>(visit: (node: T, a: any, b: any) => any) {
+    return function(this: T, a: any, b: any) { return visit(this, a, b); };
+}
+
+/** @ignore */
+function wrapNullable1<T extends DataType, V extends Vector<T>, F extends (i: number) => any>(fn: F): (...args: Parameters<F>) => ReturnType<F> {
+    return function(this: V, i: number) { return this.isValid(i) ? fn.call(this, i) : null; };
+}
+
+/** @ignore */
+function wrapNullableSet<T extends DataType, V extends BaseVector<T>, F extends (i: number, a: any) => void>(fn: F): (...args: Parameters<F>) => void {
+    return function(this: V, i: number, a: any) {
+        if (setBool(this.nullBitmap, this.offset + i, a != null)) {
+            fn.call(this, i, a);
+        }
+    };
+}
+
+/** @ignore */
+function bindBaseVectorDataAccessors<T extends DataType>(this: BaseVector<T>) {
+    const nullBitmap = this.nullBitmap;
+    if (nullBitmap && nullBitmap.byteLength > 0) {
+        this.get = wrapNullable1(this.get);
+        this.set = wrapNullableSet(this.set);
+    }
+}
diff --git a/js/src/vector/int.ts b/js/src/vector/int.ts
new file mode 100644
index 0000000000000..3bf40b9bf1252
--- /dev/null
+++ b/js/src/vector/int.ts
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data } from '../data';
+import { Vector } from '../vector';
+import { BaseVector } from './base';
+import { Vector as V } from '../interfaces';
+import { Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64 } from '../type';
+
+export class IntVector<T extends Int = Int> extends BaseVector<T> {
+    public static from<T extends Int>(data: T['TArray']): V<T>;
+    public static from<T extends Int32 | Uint32>(data: T['TArray'], is64: true): V<T>;
+    /** @nocollapse */
+    public static from(data: any, is64?: boolean) {
+        if (is64 === true) {
+            return data instanceof Int32Array
+                ? Vector.new(Data.Int(new Int64(), 0, data.length, 0, null, data))
+                : Vector.new(Data.Int(new Uint64(), 0, data.length, 0, null, data));
+        }
+        switch (data.constructor) {
+            case Int8Array: return Vector.new(Data.Int(new Int8(), 0, data.length, 0, null, data));
+            case Int16Array: return Vector.new(Data.Int(new Int16(), 0, data.length, 0, null, data));
+            case Int32Array: return Vector.new(Data.Int(new Int32(), 0, data.length, 0, null, data));
+            case Uint8Array: return Vector.new(Data.Int(new Uint8(), 0, data.length, 0, null, data));
+            case Uint16Array: return Vector.new(Data.Int(new Uint16(), 0, data.length, 0, null, data));
+            case Uint32Array: return Vector.new(Data.Int(new Uint32(), 0, data.length, 0, null, data));
+        }
+        throw new TypeError('Unrecognized Int data');
+    }
+}
+
+export class Int8Vector extends IntVector<Int8> {}
+export class Int16Vector extends IntVector<Int16> {}
+export class Int32Vector extends IntVector<Int32> {}
+export class Int64Vector extends IntVector<Int64> {}
+export class Uint8Vector extends IntVector<Uint8> {}
+export class Uint16Vector extends IntVector<Uint16> {}
+export class Uint32Vector extends IntVector<Uint32> {}
+export class Uint64Vector extends IntVector<Uint64> {}
diff --git a/js/src/vector/interval.ts b/js/src/vector/interval.ts
new file mode 100644
index 0000000000000..4d6ff15018a4f
--- /dev/null
+++ b/js/src/vector/interval.ts
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { BaseVector } from './base';
+import { Interval, IntervalDayTime, IntervalYearMonth } from '../type';
+
+export class IntervalVector<T extends Interval = Interval> extends BaseVector<T> {}
+export class IntervalDayTimeVector extends IntervalVector<IntervalDayTime> {}
+export class IntervalYearMonthVector extends IntervalVector<IntervalYearMonth> {}
diff --git a/js/src/vector/list.ts b/js/src/vector/list.ts
index 8e7560eae8ea8..6bfaf312b63ee 100644
--- a/js/src/vector/list.ts
+++ b/js/src/vector/list.ts
@@ -15,132 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Data } from '../data';
-import { View, Vector, createVector } from '../vector';
-import { TextEncoder, TextDecoder } from 'text-encoding-utf-8';
-import { List, Binary, Utf8, FixedSizeList, FlatListType } from '../type';
-import { ListType, SingleNestedType, DataType, IterableArrayLike } from '../type';
+import { BaseVector } from './base';
+import { DataType, List } from '../type';
 
-export const encodeUtf8 = ((encoder) =>
-    encoder.encode.bind(encoder) as (input?: string) => Uint8Array
-)(new TextEncoder('utf-8'));
-
-export const decodeUtf8 = ((decoder) =>
-    decoder.decode.bind(decoder) as (input?: ArrayBufferLike | ArrayBufferView) => string
-)(new TextDecoder('utf-8'));
-
-export abstract class ListViewBase<T extends (FlatListType | SingleNestedType)> implements View<T> {
-    public length: number;
-    public values: T['TArray'];
-    public valueOffsets?: Int32Array;
-    constructor(data: Data<T>) {
-        this.length = data.length;
-        this.values = data.values;
-    }
-    public clone(data: Data<T>): this {
-        return new (<any> this.constructor)(data) as this;
-    }
-    public isValid(): boolean {
-        return true;
-    }
-    public toArray(): IterableArrayLike<T['TValue']> {
-        return [...this];
-    }
-    public get(index: number): T['TValue'] {
-        return this.getList(this.values, index, this.valueOffsets);
-    }
-    public set(index: number, value: T['TValue']): void {
-        return this.setList(this.values, index, value, this.valueOffsets);
-    }
-    public *[Symbol.iterator](): IterableIterator<T['TValue']> {
-        const get = this.getList, length = this.length;
-        const values = this.values, valueOffsets = this.valueOffsets;
-        for (let index = -1; ++index < length;) {
-            yield get(values, index, valueOffsets);
-        }
-    }
-    public indexOf(search: T['TValue']) {
-        let index = 0;
-        for (let value of this) {
-            if (value === search) { return index; }
-            ++index;
-        }
-
-        return -1;
-    }
-    protected abstract getList(values: T['TArray'], index: number, valueOffsets?: Int32Array): T['TValue'];
-    protected abstract setList(values: T['TArray'], index: number, value: T['TValue'], valueOffsets?: Int32Array): void;
-}
-
-export abstract class VariableListViewBase<T extends (ListType | FlatListType)> extends ListViewBase<T> {
-    constructor(data: Data<T>) {
-        super(data);
-        this.length = data.length;
-        this.valueOffsets = data.valueOffsets;
-    }
-}
-
-export class ListView<T extends DataType> extends VariableListViewBase<List<T>> {
-    public values: Vector<T>;
-    constructor(data: Data<T>) {
-        super(data as any);
-        this.values = createVector((data as any).values);
-    }
-    public getChildAt<R extends T = T>(index: number): Vector<R> | null {
-        return index === 0 ? (this.values as Vector<R>) : null;
-    }
-    protected getList(values: Vector<T>, index: number, valueOffsets: Int32Array) {
-        return values.slice(valueOffsets[index], valueOffsets[index + 1]) as Vector<T>;
-    }
-    protected setList(values: Vector<T>, index: number, value: Vector<T>, valueOffsets: Int32Array): void {
-        let idx = -1;
-        let offset = valueOffsets[index];
-        let end = Math.min(value.length, valueOffsets[index + 1] - offset);
-        while (offset < end) {
-            values.set(offset++, value.get(++idx));
-        }
-    }
-}
-
-export class FixedSizeListView<T extends DataType> extends ListViewBase<FixedSizeList<T>> {
-    public size: number;
-    public values: Vector<T>;
-    constructor(data: Data<FixedSizeList<T>>) {
-        super(data);
-        this.size = data.type.listSize;
-        this.values = createVector(data.values);
-    }
-    public getChildAt<R extends T = T>(index: number): Vector<R> | null {
-        return index === 0 ? (this.values as Vector<R>) : null;
-    }
-    protected getList(values: Vector<T>, index: number) {
-        const size = this.size;
-        return values.slice(index *= size, index + size) as Vector<T>;
-    }
-    protected setList(values: Vector<T>, index: number, value: Vector<T>): void {
-        let size = this.size;
-        for (let idx = -1, offset = index * size; ++idx < size;) {
-            values.set(offset + idx, value.get(++idx));
-        }
-    }
-}
-
-export class BinaryView extends VariableListViewBase<Binary> {
-    protected getList(values: Uint8Array, index: number, valueOffsets: Int32Array) {
-        return values.subarray(valueOffsets[index], valueOffsets[index + 1]);
-    }
-    protected setList(values: Uint8Array, index: number, value: Uint8Array, valueOffsets: Int32Array): void {
-        const offset = valueOffsets[index];
-        values.set(value.subarray(0, valueOffsets[index + 1] - offset), offset);
-    }
-}
-
-export class Utf8View extends VariableListViewBase<Utf8> {
-    protected getList(values: Uint8Array, index: number, valueOffsets: Int32Array) {
-        return decodeUtf8(values.subarray(valueOffsets[index], valueOffsets[index + 1]));
-    }
-    protected setList(values: Uint8Array, index: number, value: string, valueOffsets: Int32Array): void {
-        const offset = valueOffsets[index];
-        values.set(encodeUtf8(value).subarray(0, valueOffsets[index + 1] - offset), offset);
-    }
-}
+export class ListVector<T extends DataType = any> extends BaseVector<List<T>> {}
diff --git a/js/src/vector/map.ts b/js/src/vector/map.ts
new file mode 100644
index 0000000000000..27c51a622706f
--- /dev/null
+++ b/js/src/vector/map.ts
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Row } from './row';
+import { Vector } from '../vector';
+import { BaseVector } from './base';
+import { DataType, Map_, Struct } from '../type';
+
+export class MapVector<T extends { [key: string]: DataType } = any> extends BaseVector<Map_<T>> {
+    public asStruct() {
+        return Vector.new(this.data.clone(new Struct<T>(this.type.children)));
+    }
+    // @ts-ignore
+    private _rowProxy: Row<T>;
+    public get rowProxy(): Row<T> {
+        return this._rowProxy || (this._rowProxy = Row.new<T>(this.type.children || [], true));
+    }
+}
diff --git a/js/src/vector/nested.ts b/js/src/vector/nested.ts
deleted file mode 100644
index f980028527e7e..0000000000000
--- a/js/src/vector/nested.ts
+++ /dev/null
@@ -1,247 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import { Data } from '../data';
-import { View, Vector } from '../vector';
-import { IterableArrayLike } from '../type';
-import { valueToString } from '../util/pretty';
-import { DataType, NestedType, DenseUnion, SparseUnion, Struct, StructData, StructValue, Map_ } from '../type';
-
-export abstract class NestedView<T extends NestedType> implements View<T> {
-    public length: number;
-    public numChildren: number;
-    public childData: Data<any>[];
-    protected _children: Vector<any>[];
-    constructor(data: Data<T>, children?: Vector<any>[]) {
-        this.length = data.length;
-        this.childData = data.childData;
-        this.numChildren = data.childData.length;
-        this._children = children || new Array(this.numChildren);
-    }
-    public clone(data: Data<T>): this {
-        return new (<any> this.constructor)(data, new Array(this.numChildren)) as this;
-    }
-    public isValid(): boolean {
-        return true;
-    }
-    public toArray(): IterableArrayLike<T['TValue']> {
-        return [...this];
-    }
-    public indexOf(_: T['TValue']): number {
-        throw new Error(`Not implemented yet`);
-    }
-    public toJSON(): any { return this.toArray(); }
-    public toString() {
-        return [...this].map((x) => valueToString(x)).join(', ');
-    }
-    public get(index: number): T['TValue'] {
-        return this.getNested(this, index);
-    }
-    public set(index: number, value: T['TValue']): void {
-        return this.setNested(this, index, value);
-    }
-    protected abstract getNested(self: NestedView<T>, index: number): T['TValue'];
-    protected abstract setNested(self: NestedView<T>, index: number, value: T['TValue']): void;
-    public getChildAt<R extends DataType = DataType>(index: number): Vector<R> | null {
-        return index < 0 || index >= this.numChildren
-            ? null
-            : (this._children[index] as Vector<R>) ||
-              (this._children[index] = Vector.create<R>(this.childData[index]));
-    }
-    public *[Symbol.iterator](): IterableIterator<T['TValue']> {
-        const get = this.getNested;
-        const length = this.length;
-        for (let index = -1; ++index < length;) {
-            yield get(this, index);
-        }
-    }
-}
-
-export class UnionView<T extends (DenseUnion | SparseUnion) = SparseUnion> extends NestedView<T> {
-    // @ts-ignore
-    public typeIds: Int8Array;
-    // @ts-ignore
-    public valueOffsets?: Int32Array;
-    // @ts-ignore
-    protected typeIdToChildIndex: { [key: number]: number };
-    constructor(data: Data<T>, children?: Vector<any>[]) {
-        super(data, children);
-        this.length = data.length;
-        this.typeIds = data.typeIds;
-        this.typeIdToChildIndex = data.typeIdToChildIndex;
-    }
-    protected getNested(self: UnionView<T>, index: number): T['TValue'] {
-        return self.getChildValue(self, index, self.typeIds, self.valueOffsets, self.typeIdToChildIndex);
-    }
-    protected setNested(self: UnionView<T>, index: number, value: T['TValue']): void {
-        return self.setChildValue(self, index, value, self.typeIds, self.valueOffsets, self.typeIdToChildIndex);
-    }
-    protected getChildValue(self: NestedView<T>, index: number, typeIds: Int8Array, _valueOffsets: any, typeIdToChildIndex: { [key: number]: number }): any | null {
-        const child = self.getChildAt(typeIdToChildIndex[typeIds[index]]);
-        return child ? child.get(index) : null;
-    }
-    protected setChildValue(self: NestedView<T>, index: number, value: T['TValue'], typeIds: Int8Array, _valueOffsets: any, typeIdToChildIndex: { [key: number]: number }): any | null {
-        const child = self.getChildAt(typeIdToChildIndex[typeIds[index]]);
-        return child ? child.set(index, value) : null;
-    }
-    public *[Symbol.iterator](): IterableIterator<T['TValue']> {
-        const length = this.length;
-        const get = this.getChildValue;
-        const { typeIdToChildIndex } = this;
-        const { typeIds, valueOffsets } = this;
-        for (let index = -1; ++index < length;) {
-            yield get(this, index, typeIds, valueOffsets, typeIdToChildIndex);
-        }
-    }
-}
-
-export class DenseUnionView extends UnionView<DenseUnion> {
-    public valueOffsets: Int32Array;
-    constructor(data: Data<DenseUnion>, children?: Vector<any>[]) {
-        super(data, children);
-        this.valueOffsets = data.valueOffsets;
-    }
-    protected getNested(self: DenseUnionView, index: number): any | null {
-        return self.getChildValue(self, index, self.typeIds, self.valueOffsets, self.typeIdToChildIndex);
-    }
-    protected getChildValue(self: NestedView<DenseUnion>, index: number, typeIds: Int8Array, valueOffsets: any, typeIdToChildIndex: { [key: number]: number }): any | null {
-        const child = self.getChildAt(typeIdToChildIndex[typeIds[index]]);
-        return child ? child.get(valueOffsets[index]) : null;
-    }
-    protected setChildValue(self: NestedView<DenseUnion>, index: number, value: any, typeIds: Int8Array, valueOffsets: any, typeIdToChildIndex: { [key: number]: number }): any | null {
-        const child = self.getChildAt(typeIdToChildIndex[typeIds[index]]);
-        return child ? child.set(valueOffsets[index], value) : null;
-    }
-}
-
-interface RowViewConstructor<T extends StructData = StructData> {
-    readonly prototype: StructValue<T> & RowView;
-    new (data: Data<SparseUnion> & NestedView<any>, children?: Vector<any>[], rowIndex?: number): StructValue<T> & RowView;
-}
-
-export class StructView<T extends StructData = StructData> extends NestedView<Struct<T>> {
-    private RowView: RowViewConstructor<T>;
-
-    constructor(data: Data<Struct<T>>, children?: Vector<any>[]) {
-        super(data, children);
-
-        // Make a customized RowView that includes proxies for
-        class RowProxy extends RowView {}
-
-        const proto = RowProxy.prototype;
-
-        data.type.children.forEach(function (f, i) {
-            Object.defineProperty(proto, f.name, {
-                get: function () {
-                    return (this as any as RowView).get(i);
-                },
-                enumerable: true
-            });
-        });
-
-        this.RowView = (RowProxy as any);
-    }
-    protected getNested(self: StructView<T>, index: number) {
-        return new self.RowView(self as any, self._children, index);
-    }
-    protected setNested(self: StructView<T>, index: number, value: any): void {
-        let idx = -1, len = self.numChildren, child: Vector | null;
-        if (!(value instanceof NestedView || value instanceof Vector)) {
-            while (++idx < len) {
-                if (child = self.getChildAt(idx)) {
-                    child.set(index, value[idx]);
-                }
-            }
-        } else {
-            while (++idx < len) {
-                if (child = self.getChildAt(idx)) {
-                    child.set(index, value.get(idx));
-                }
-            }
-        }
-    }
-}
-
-export class MapView extends NestedView<Map_> {
-    public typeIds: { [k: string]: number };
-    constructor(data: Data<Map_>, children?: Vector<any>[]) {
-        super(data, children);
-        this.typeIds = data.type.children.reduce((xs, x, i) =>
-            (xs[x.name] = i) && xs || xs, Object.create(null));
-    }
-    protected getNested(self: MapView, index: number) {
-        return new MapRowView(self as any, self._children, index);
-    }
-    protected setNested(self: MapView, index: number, value: { [k: string]: any }): void {
-        let typeIds = self.typeIds as any, child: Vector | null;
-        if (!(value instanceof NestedView || value instanceof Vector)) {
-            for (const key in typeIds) {
-                if (child = self.getChildAt(typeIds[key])) {
-                    child.set(index, value[key]);
-                }
-            }
-        } else {
-            for (const key in typeIds) {
-                if (child = self.getChildAt(typeIds[key])) {
-                    child.set(index, value.get(key as any));
-                }
-            }
-        }
-    }
-}
-
-export class RowView extends UnionView<SparseUnion> {
-    protected rowIndex: number;
-    constructor(data: Data<SparseUnion> & NestedView<any>, children?: Vector<any>[], rowIndex?: number) {
-        super(data, children);
-        this.rowIndex = rowIndex || 0;
-        this.length = data.numChildren;
-    }
-    public clone(data: Data<SparseUnion> & NestedView<any>): this {
-        return new (<any> this.constructor)(data, this._children, this.rowIndex) as this;
-    }
-    protected getChildValue(self: RowView, index: number, _typeIds: any, _valueOffsets?: any): any | null {
-        const child = self.getChildAt(index);
-        return child ? child.get(self.rowIndex) : null;
-    }
-    protected setChildValue(self: RowView, index: number, value: any, _typeIds: any, _valueOffsets?: any): any | null {
-        const child = self.getChildAt(index);
-        return child ? child.set(self.rowIndex, value) : null;
-    }
-}
-
-export class MapRowView extends RowView {
-    // @ts-ignore
-    public typeIds: any;
-    public toJSON() {
-        const get = this.getChildValue;
-        const result = {} as { [k: string]: any };
-        const typeIds = this.typeIds as { [k: string]: number };
-        for (const name in typeIds) {
-            result[name] = get(this, name, typeIds, null);
-        }
-        return result;
-    }
-    protected getChildValue(self: MapRowView, key: any, typeIds: any, _valueOffsets: any): any | null {
-        const child = self.getChildAt(typeIds[key]);
-        return child ? child.get(self.rowIndex) : null;
-    }
-    protected setChildValue(self: MapRowView, key: any, value: any, typeIds: any, _valueOffsets?: any): any | null {
-        const child = self.getChildAt(typeIds[key]);
-        return child ? child.set(self.rowIndex, value) : null;
-    }
-}
diff --git a/js/src/vector/null.ts b/js/src/vector/null.ts
new file mode 100644
index 0000000000000..8dbe06aded118
--- /dev/null
+++ b/js/src/vector/null.ts
@@ -0,0 +1,21 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Null } from '../type';
+import { BaseVector } from './base';
+
+export class NullVector extends BaseVector<Null> {}
diff --git a/js/src/vector/row.ts b/js/src/vector/row.ts
new file mode 100644
index 0000000000000..62fb3b608c23d
--- /dev/null
+++ b/js/src/vector/row.ts
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Field } from '../schema';
+import { MapVector } from '../vector/map';
+import { DataType, RowLike } from '../type';
+import { valueToString } from '../util/pretty';
+import { StructVector } from '../vector/struct';
+
+/** @ignore */ const columnDescriptor = { enumerable: true, configurable: false, get: () => {} };
+/** @ignore */ const lengthDescriptor = { writable: false, enumerable: false, configurable: false, value: -1 };
+/** @ignore */ const rowIndexDescriptor = { writable: false, enumerable: false, configurable: true, value: null as any };
+/** @ignore */ const rowParentDescriptor = { writable: false, enumerable: false, configurable: false, value: null as any };
+/** @ignore */ const row = { parent: rowParentDescriptor, rowIndex: rowIndexDescriptor };
+
+/** @ignore */
+export class Row<T extends { [key: string]: DataType }> implements Iterable<T[keyof T]['TValue']> {
+    [key: string]: T[keyof T]['TValue'];
+    /** @nocollapse */
+    public static new<T extends { [key: string]: DataType }>(schemaOrFields: T | Field[], fieldsAreEnumerable = false): RowLike<T> & Row<T> {
+        let schema: T, fields: Field[];
+        if (Array.isArray(schemaOrFields)) {
+            fields = schemaOrFields;
+        } else {
+            schema = schemaOrFields;
+            fieldsAreEnumerable = true;
+            fields = Object.keys(schema).map((x) => new Field(x, schema[x]));
+        }
+        return new Row<T>(fields, fieldsAreEnumerable) as RowLike<T> & Row<T>;
+    }
+    // @ts-ignore
+    private parent: TParent;
+    // @ts-ignore
+    private rowIndex: number;
+    // @ts-ignore
+    public readonly length: number;
+    private constructor(fields: Field[], fieldsAreEnumerable: boolean) {
+        lengthDescriptor.value = fields.length;
+        Object.defineProperty(this, 'length', lengthDescriptor);
+        fields.forEach((field, columnIndex) => {
+            columnDescriptor.get = this._bindGetter(columnIndex);
+            // set configurable to true to ensure Object.defineProperty
+            // doesn't throw in the case of duplicate column names
+            columnDescriptor.configurable = true;
+            columnDescriptor.enumerable = fieldsAreEnumerable;
+            Object.defineProperty(this, field.name, columnDescriptor);
+            columnDescriptor.configurable = false;
+            columnDescriptor.enumerable = !fieldsAreEnumerable;
+            Object.defineProperty(this, columnIndex, columnDescriptor);
+            columnDescriptor.get = null as any;
+        });
+    }
+    *[Symbol.iterator](this: RowLike<T>) {
+        for (let i = -1, n = this.length; ++i < n;) {
+            yield this[i];
+        }
+    }
+    private _bindGetter(colIndex: number) {
+        return function (this: Row<T>) {
+            let child = this.parent.getChildAt(colIndex);
+            return child ? child.get(this.rowIndex) : null;
+        };
+    }
+    public get<K extends keyof T>(key: K) { return (this as any)[key] as T[K]['TValue']; }
+    public bind<TParent extends MapVector<T> | StructVector<T>>(parent: TParent, rowIndex: number) {
+        rowIndexDescriptor.value = rowIndex;
+        rowParentDescriptor.value = parent;
+        const bound = Object.create(this, row);
+        rowIndexDescriptor.value = null;
+        rowParentDescriptor.value = null;
+        return bound as RowLike<T>;
+    }
+    public toJSON(): any {
+        return DataType.isStruct(this.parent.type) ? [...this] :
+            Object.getOwnPropertyNames(this).reduce((props: any, prop: string) => {
+                return (props[prop] = (this as any)[prop]) && props || props;
+            }, {});
+    }
+    public toString() {
+        return DataType.isStruct(this.parent.type) ?
+            [...this].map((x) => valueToString(x)).join(', ') :
+            Object.getOwnPropertyNames(this).reduce((props: any, prop: string) => {
+                return (props[prop] = valueToString((this as any)[prop])) && props || props;
+            }, {});
+    }
+}
diff --git a/js/src/vector/struct.ts b/js/src/vector/struct.ts
new file mode 100644
index 0000000000000..4ad57ff51135b
--- /dev/null
+++ b/js/src/vector/struct.ts
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Row } from './row';
+import { Vector } from '../vector';
+import { BaseVector } from './base';
+import { DataType, Map_, Struct } from '../type';
+
+export class StructVector<T extends { [key: string]: DataType } = any> extends BaseVector<Struct<T>> {
+    public asMap(keysSorted: boolean = false) {
+        return Vector.new(this.data.clone(new Map_<T>(this.type.children, keysSorted)));
+    }
+    // @ts-ignore
+    private _rowProxy: Row<T>;
+    public get rowProxy(): Row<T> {
+        return this._rowProxy || (this._rowProxy = Row.new<T>(this.type.children || [], false));
+    }
+}
diff --git a/js/src/vector/time.ts b/js/src/vector/time.ts
new file mode 100644
index 0000000000000..206e58b7dd995
--- /dev/null
+++ b/js/src/vector/time.ts
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { BaseVector } from './base';
+import { Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond } from '../type';
+
+export class TimeVector<T extends Time = Time> extends BaseVector<T> {}
+export class TimeSecondVector extends TimeVector<TimeSecond> {}
+export class TimeMillisecondVector extends TimeVector<TimeMillisecond> {}
+export class TimeMicrosecondVector extends TimeVector<TimeMicrosecond> {}
+export class TimeNanosecondVector extends TimeVector<TimeNanosecond> {}
diff --git a/js/src/vector/timestamp.ts b/js/src/vector/timestamp.ts
new file mode 100644
index 0000000000000..019483f156be6
--- /dev/null
+++ b/js/src/vector/timestamp.ts
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { BaseVector } from './base';
+import { Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond } from '../type';
+
+export class TimestampVector<T extends Timestamp = Timestamp> extends BaseVector<T> {}
+export class TimestampSecondVector extends TimestampVector<TimestampSecond> {}
+export class TimestampMillisecondVector extends TimestampVector<TimestampMillisecond> {}
+export class TimestampMicrosecondVector extends TimestampVector<TimestampMicrosecond> {}
+export class TimestampNanosecondVector extends TimestampVector<TimestampNanosecond> {}
diff --git a/js/src/vector/union.ts b/js/src/vector/union.ts
new file mode 100644
index 0000000000000..e31a3231dc65c
--- /dev/null
+++ b/js/src/vector/union.ts
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { BaseVector } from './base';
+import { Union, DenseUnion, SparseUnion} from '../type';
+
+export class UnionVector<T extends Union = Union> extends BaseVector<T> {
+    public get typeIdToChildIndex() { return this.type.typeIdToChildIndex; }
+}
+
+export class DenseUnionVector extends UnionVector<DenseUnion> {
+    public get valueOffsets() { return this.data.valueOffsets!; }
+}
+
+export class SparseUnionVector extends UnionVector<SparseUnion> {}
diff --git a/js/src/vector/utf8.ts b/js/src/vector/utf8.ts
new file mode 100644
index 0000000000000..8c874dfdcdcea
--- /dev/null
+++ b/js/src/vector/utf8.ts
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data } from '../data';
+import { Vector } from '../vector';
+import { BaseVector } from './base';
+import { Binary, Utf8 } from '../type';
+import { encodeUtf8 } from '../util/utf8';
+
+export class Utf8Vector extends BaseVector<Utf8> {
+    /** @nocollapse */
+    public static from(values: string[]) {
+        const length = values.length;
+        const data = encodeUtf8(values.join(''));
+        const offsets = values.reduce((offsets, str, idx) => (
+            (!(offsets[idx + 1] = offsets[idx] + str.length) || true) && offsets
+        ), new Uint32Array(values.length + 1));
+        return Vector.new(Data.Utf8(new Utf8(), 0, length, 0, null, offsets, data));
+    }
+    public asBinary() {
+        return Vector.new(this.data.clone(new Binary()));
+    }
+}
diff --git a/js/src/vector/validity.ts b/js/src/vector/validity.ts
deleted file mode 100644
index 57e183793e2d8..0000000000000
--- a/js/src/vector/validity.ts
+++ /dev/null
@@ -1,75 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import { Data } from '../data';
-import { View, Vector } from '../vector';
-import { NestedView } from './nested';
-import { DataType, IterableArrayLike } from '../type';
-import { getBool, setBool, iterateBits } from '../util/bit';
-
-export class ValidityView<T extends DataType> implements View<T> {
-    protected view: View<T>;
-    protected length: number;
-    protected offset: number;
-    protected nullBitmap: Uint8Array;
-    constructor(data: Data<T>, view: View<T>) {
-        this.view = view;
-        this.length = data.length;
-        this.offset = data.offset;
-        this.nullBitmap = data.nullBitmap!;
-    }
-    public get size(): number {
-        return (this.view as any).size || 1;
-    }
-    public clone(data: Data<T>): this {
-        return new ValidityView(data, this.view.clone(data)) as this;
-    }
-    public toArray(): IterableArrayLike<T['TValue'] | null> {
-        return [...this];
-    }
-    public indexOf(search: T['TValue']) {
-        let index = 0;
-        for (let value of this) {
-            if (value === search) { return index; }
-            ++index;
-        }
-
-        return -1;
-    }
-    public isValid(index: number): boolean {
-        const nullBitIndex = this.offset + index;
-        return getBool(null, index, this.nullBitmap[nullBitIndex >> 3], nullBitIndex % 8);
-    }
-    public get(index: number): T['TValue'] | null {
-        const nullBitIndex = this.offset + index;
-        return this.getNullable(this.view, index, this.nullBitmap[nullBitIndex >> 3], nullBitIndex % 8);
-    }
-    public set(index: number, value: T['TValue'] | null): void {
-        if (setBool(this.nullBitmap, this.offset + index, value != null)) {
-            this.view.set(index, value);
-        }
-    }
-    public getChildAt<R extends DataType = DataType>(index: number): Vector<R> | null {
-        return (this.view as NestedView<any>).getChildAt<R>(index);
-    }
-    public [Symbol.iterator](): IterableIterator<T['TValue'] | null> {
-        return iterateBits<T['TValue'] | null>(this.nullBitmap, this.offset, this.length, this.view, this.getNullable);
-    }
-    protected getNullable(view: View<T>, index: number, byte: number, bit: number) {
-        return getBool(view, index, byte, bit) ? view.get(index) : null;
-    }
-}
diff --git a/js/src/vector/view.ts b/js/src/vector/view.ts
deleted file mode 100644
index 36aeae71ee37b..0000000000000
--- a/js/src/vector/view.ts
+++ /dev/null
@@ -1,9 +0,0 @@
-export { ChunkedView } from './chunked';
-export { ValidityView } from './validity';
-export { DictionaryView } from './dictionary';
-export { ListView, FixedSizeListView, BinaryView, Utf8View } from './list';
-export { UnionView, DenseUnionView, NestedView, StructView, MapView } from './nested';
-export { FlatView, NullView, BoolView, PrimitiveView, FixedSizeView, Float16View } from './flat';
-export { DateDayView, DateMillisecondView } from './flat';
-export { IntervalYearMonthView, IntervalYearView, IntervalMonthView } from './flat';
-export { TimestampDayView, TimestampSecondView, TimestampMillisecondView, TimestampMicrosecondView, TimestampNanosecondView } from './flat';
diff --git a/js/src/visitor.ts b/js/src/visitor.ts
index c88b6bd311579..f857a389525c0 100644
--- a/js/src/visitor.ts
+++ b/js/src/visitor.ts
@@ -15,115 +15,239 @@
 // specific language governing permissions and limitations
 // under the License.
 
+import { Data } from './data';
 import { Vector } from './vector';
-import { Type, DataType, Dictionary } from './type';
-import { Utf8, Binary, Decimal, FixedSizeBinary } from './type';
-import { List, FixedSizeList, Union, Map_, Struct } from './type';
-import { Bool, Null, Int, Float, Date_, Time, Interval, Timestamp } from './type';
+import { Type, Precision, DateUnit, TimeUnit, IntervalUnit, UnionMode } from './enum';
+import { DataType, Float, Int, Date_, Interval, Time, Timestamp, Union, } from './type';
 
-export interface VisitorNode {
-    acceptTypeVisitor(visitor: TypeVisitor): any;
-    acceptVectorVisitor(visitor: VectorVisitor): any;
-}
-
-export abstract class TypeVisitor {
-    visit(type: Partial<VisitorNode>): any {
-        return type.acceptTypeVisitor && type.acceptTypeVisitor(this) || null;
+export abstract class Visitor {
+    public visitMany(nodes: any[], ...args: any[][]) {
+        return nodes.map((node, i) => this.visit(node, ...args.map((x) => x[i])));
     }
-    visitMany(types: Partial<VisitorNode>[]): any[] {
-        return types.map((type) => this.visit(type));
+    public visit(...args: any[]) {
+        return this.getVisitFn(args[0], false).apply(this, args);
     }
-    abstract visitNull?(type: Null): any;
-    abstract visitBool?(type: Bool): any;
-    abstract visitInt?(type: Int): any;
-    abstract visitFloat?(type: Float): any;
-    abstract visitUtf8?(type: Utf8): any;
-    abstract visitBinary?(type: Binary): any;
-    abstract visitFixedSizeBinary?(type: FixedSizeBinary): any;
-    abstract visitDate?(type: Date_): any;
-    abstract visitTimestamp?(type: Timestamp): any;
-    abstract visitTime?(type: Time): any;
-    abstract visitDecimal?(type: Decimal): any;
-    abstract visitList?(type: List): any;
-    abstract visitStruct?(type: Struct): any;
-    abstract visitUnion?(type: Union<any>): any;
-    abstract visitDictionary?(type: Dictionary): any;
-    abstract visitInterval?(type: Interval): any;
-    abstract visitFixedSizeList?(type: FixedSizeList): any;
-    abstract visitMap?(type: Map_): any;
-
-    static visitTypeInline<T extends DataType>(visitor: TypeVisitor, type: T): any {
-        switch (type.TType) {
-            case Type.Null:            return visitor.visitNull            && visitor.visitNull(type            as any as Null);
-            case Type.Int:             return visitor.visitInt             && visitor.visitInt(type             as any as Int);
-            case Type.Float:           return visitor.visitFloat           && visitor.visitFloat(type           as any as Float);
-            case Type.Binary:          return visitor.visitBinary          && visitor.visitBinary(type          as any as Binary);
-            case Type.Utf8:            return visitor.visitUtf8            && visitor.visitUtf8(type            as any as Utf8);
-            case Type.Bool:            return visitor.visitBool            && visitor.visitBool(type            as any as Bool);
-            case Type.Decimal:         return visitor.visitDecimal         && visitor.visitDecimal(type         as any as Decimal);
-            case Type.Date:            return visitor.visitDate            && visitor.visitDate(type            as any as Date_);
-            case Type.Time:            return visitor.visitTime            && visitor.visitTime(type            as any as Time);
-            case Type.Timestamp:       return visitor.visitTimestamp       && visitor.visitTimestamp(type       as any as Timestamp);
-            case Type.Interval:        return visitor.visitInterval        && visitor.visitInterval(type        as any as Interval);
-            case Type.List:            return visitor.visitList            && visitor.visitList(type            as any as List<T>);
-            case Type.Struct:          return visitor.visitStruct          && visitor.visitStruct(type          as any as Struct);
-            case Type.Union:           return visitor.visitUnion           && visitor.visitUnion(type           as any as Union);
-            case Type.FixedSizeBinary: return visitor.visitFixedSizeBinary && visitor.visitFixedSizeBinary(type as any as FixedSizeBinary);
-            case Type.FixedSizeList:   return visitor.visitFixedSizeList   && visitor.visitFixedSizeList(type   as any as FixedSizeList);
-            case Type.Map:             return visitor.visitMap             && visitor.visitMap(type             as any as Map_);
-            case Type.Dictionary:      return visitor.visitDictionary      && visitor.visitDictionary(type      as any as Dictionary);
-            default: return null;
-        }
+    public getVisitFn(node: any, throwIfNotFound = true) {
+        return getVisitFn(this, node, throwIfNotFound);
     }
+    public visitNull            (_node: any, ..._args: any[]): any { return null; }
+    public visitBool            (_node: any, ..._args: any[]): any { return null; }
+    public visitInt             (_node: any, ..._args: any[]): any { return null; }
+    public visitFloat           (_node: any, ..._args: any[]): any { return null; }
+    public visitUtf8            (_node: any, ..._args: any[]): any { return null; }
+    public visitBinary          (_node: any, ..._args: any[]): any { return null; }
+    public visitFixedSizeBinary (_node: any, ..._args: any[]): any { return null; }
+    public visitDate            (_node: any, ..._args: any[]): any { return null; }
+    public visitTimestamp       (_node: any, ..._args: any[]): any { return null; }
+    public visitTime            (_node: any, ..._args: any[]): any { return null; }
+    public visitDecimal         (_node: any, ..._args: any[]): any { return null; }
+    public visitList            (_node: any, ..._args: any[]): any { return null; }
+    public visitStruct          (_node: any, ..._args: any[]): any { return null; }
+    public visitUnion           (_node: any, ..._args: any[]): any { return null; }
+    public visitDictionary      (_node: any, ..._args: any[]): any { return null; }
+    public visitInterval        (_node: any, ..._args: any[]): any { return null; }
+    public visitFixedSizeList   (_node: any, ..._args: any[]): any { return null; }
+    public visitMap             (_node: any, ..._args: any[]): any { return null; }
 }
 
-export abstract class VectorVisitor {
-    visit(vector: Partial<VisitorNode>): any {
-        return vector.acceptVectorVisitor && vector.acceptVectorVisitor(this) || null;
-    }
-    visitMany(vectors: Partial<VisitorNode>[]): any[] {
-        return vectors.map((vector) => this.visit(vector));
+/** @ignore */
+function getVisitFn<T extends DataType>(visitor: Visitor, node: any, throwIfNotFound = true) {
+    let fn: any = null;
+    let dtype: T['TType'] = Type.NONE;
+    // tslint:disable
+    if      (node instanceof Data    ) { dtype = inferDType(node.type as T); }
+    else if (node instanceof Vector  ) { dtype = inferDType(node.type as T); }
+    else if (node instanceof DataType) { dtype = inferDType(node      as T); }
+    else if (typeof (dtype = node) !== 'number') { dtype = Type[node] as any as T['TType']; }
+
+    switch (dtype) {
+        case Type.Null:                 fn = visitor.visitNull; break;
+        case Type.Bool:                 fn = visitor.visitBool; break;
+        case Type.Int:                  fn = visitor.visitInt; break;
+        case Type.Int8:                 fn = visitor.visitInt8 || visitor.visitInt; break;
+        case Type.Int16:                fn = visitor.visitInt16 || visitor.visitInt; break;
+        case Type.Int32:                fn = visitor.visitInt32 || visitor.visitInt; break;
+        case Type.Int64:                fn = visitor.visitInt64 || visitor.visitInt; break;
+        case Type.Uint8:                fn = visitor.visitUint8 || visitor.visitInt; break;
+        case Type.Uint16:               fn = visitor.visitUint16 || visitor.visitInt; break;
+        case Type.Uint32:               fn = visitor.visitUint32 || visitor.visitInt; break;
+        case Type.Uint64:               fn = visitor.visitUint64 || visitor.visitInt; break;
+        case Type.Float:                fn = visitor.visitFloat; break;
+        case Type.Float16:              fn = visitor.visitFloat16 || visitor.visitFloat; break;
+        case Type.Float32:              fn = visitor.visitFloat32 || visitor.visitFloat; break;
+        case Type.Float64:              fn = visitor.visitFloat64 || visitor.visitFloat; break;
+        case Type.Utf8:                 fn = visitor.visitUtf8; break;
+        case Type.Binary:               fn = visitor.visitBinary; break;
+        case Type.FixedSizeBinary:      fn = visitor.visitFixedSizeBinary; break;
+        case Type.Date:                 fn = visitor.visitDate; break;
+        case Type.DateDay:              fn = visitor.visitDateDay || visitor.visitDate; break;
+        case Type.DateMillisecond:      fn = visitor.visitDateMillisecond || visitor.visitDate; break;
+        case Type.Timestamp:            fn = visitor.visitTimestamp; break;
+        case Type.TimestampSecond:      fn = visitor.visitTimestampSecond || visitor.visitTimestamp; break;
+        case Type.TimestampMillisecond: fn = visitor.visitTimestampMillisecond || visitor.visitTimestamp; break;
+        case Type.TimestampMicrosecond: fn = visitor.visitTimestampMicrosecond || visitor.visitTimestamp; break;
+        case Type.TimestampNanosecond:  fn = visitor.visitTimestampNanosecond || visitor.visitTimestamp; break;
+        case Type.Time:                 fn = visitor.visitTime; break;
+        case Type.TimeSecond:           fn = visitor.visitTimeSecond || visitor.visitTime; break;
+        case Type.TimeMillisecond:      fn = visitor.visitTimeMillisecond || visitor.visitTime; break;
+        case Type.TimeMicrosecond:      fn = visitor.visitTimeMicrosecond || visitor.visitTime; break;
+        case Type.TimeNanosecond:       fn = visitor.visitTimeNanosecond || visitor.visitTime; break;
+        case Type.Decimal:              fn = visitor.visitDecimal; break;
+        case Type.List:                 fn = visitor.visitList; break;
+        case Type.Struct:               fn = visitor.visitStruct; break;
+        case Type.Union:                fn = visitor.visitUnion; break;
+        case Type.DenseUnion:           fn = visitor.visitDenseUnion || visitor.visitUnion; break;
+        case Type.SparseUnion:          fn = visitor.visitSparseUnion || visitor.visitUnion; break;
+        case Type.Dictionary:           fn = visitor.visitDictionary; break;
+        case Type.Interval:             fn = visitor.visitInterval; break;
+        case Type.IntervalDayTime:      fn = visitor.visitIntervalDayTime || visitor.visitInterval; break;
+        case Type.IntervalYearMonth:    fn = visitor.visitIntervalYearMonth || visitor.visitInterval; break;
+        case Type.FixedSizeList:        fn = visitor.visitFixedSizeList; break;
+        case Type.Map:                  fn = visitor.visitMap; break;
     }
-    abstract visitNull?(vector: Vector<Null>): any;
-    abstract visitBool?(vector: Vector<Bool>): any;
-    abstract visitInt?(vector: Vector<Int>): any;
-    abstract visitFloat?(vector: Vector<Float>): any;
-    abstract visitUtf8?(vector: Vector<Utf8>): any;
-    abstract visitBinary?(vector: Vector<Binary>): any;
-    abstract visitFixedSizeBinary?(vector: Vector<FixedSizeBinary>): any;
-    abstract visitDate?(vector: Vector<Date_>): any;
-    abstract visitTimestamp?(vector: Vector<Timestamp>): any;
-    abstract visitTime?(vector: Vector<Time>): any;
-    abstract visitDecimal?(vector: Vector<Decimal>): any;
-    abstract visitList?(vector: Vector<List>): any;
-    abstract visitStruct?(vector: Vector<Struct>): any;
-    abstract visitUnion?(vector: Vector<Union<any>>): any;
-    abstract visitDictionary?(vector: Vector<Dictionary>): any;
-    abstract visitInterval?(vector: Vector<Interval>): any;
-    abstract visitFixedSizeList?(vector: Vector<FixedSizeList>): any;
-    abstract visitMap?(vector: Vector<Map_>): any;
+    if (typeof fn === 'function') return fn;
+    if (!throwIfNotFound) return () => null;
+    throw new Error(`Unrecognized type '${Type[dtype]}'`);
+}
 
-    static visitTypeInline<T extends DataType>(visitor: VectorVisitor, type: T, vector: Vector<T>): any {
-        switch (type.TType) {
-            case Type.Null:            return visitor.visitNull            && visitor.visitNull(vector            as any as Vector<Null>);
-            case Type.Int:             return visitor.visitInt             && visitor.visitInt(vector             as any as Vector<Int>);
-            case Type.Float:           return visitor.visitFloat           && visitor.visitFloat(vector           as any as Vector<Float>);
-            case Type.Binary:          return visitor.visitBinary          && visitor.visitBinary(vector          as any as Vector<Binary>);
-            case Type.Utf8:            return visitor.visitUtf8            && visitor.visitUtf8(vector            as any as Vector<Utf8>);
-            case Type.Bool:            return visitor.visitBool            && visitor.visitBool(vector            as any as Vector<Bool>);
-            case Type.Decimal:         return visitor.visitDecimal         && visitor.visitDecimal(vector         as any as Vector<Decimal>);
-            case Type.Date:            return visitor.visitDate            && visitor.visitDate(vector            as any as Vector<Date_>);
-            case Type.Time:            return visitor.visitTime            && visitor.visitTime(vector            as any as Vector<Time>);
-            case Type.Timestamp:       return visitor.visitTimestamp       && visitor.visitTimestamp(vector       as any as Vector<Timestamp>);
-            case Type.Interval:        return visitor.visitInterval        && visitor.visitInterval(vector        as any as Vector<Interval>);
-            case Type.List:            return visitor.visitList            && visitor.visitList(vector            as any as Vector<List<T>>);
-            case Type.Struct:          return visitor.visitStruct          && visitor.visitStruct(vector          as any as Vector<Struct>);
-            case Type.Union:           return visitor.visitUnion           && visitor.visitUnion(vector           as any as Vector<Union>);
-            case Type.FixedSizeBinary: return visitor.visitFixedSizeBinary && visitor.visitFixedSizeBinary(vector as any as Vector<FixedSizeBinary>);
-            case Type.FixedSizeList:   return visitor.visitFixedSizeList   && visitor.visitFixedSizeList(vector   as any as Vector<FixedSizeList>);
-            case Type.Map:             return visitor.visitMap             && visitor.visitMap(vector             as any as Vector<Map_>);
-            case Type.Dictionary:      return visitor.visitDictionary      && visitor.visitDictionary(vector      as any as Vector<Dictionary>);
-            default: return null;
-        }
+/** @ignore */
+function inferDType<T extends DataType>(type: T): Type {
+    switch (type.typeId) {
+        case Type.Null: return Type.Null;
+        case Type.Int:
+            const { bitWidth, isSigned } = (type as any as Int);
+            switch (bitWidth) {
+                case  8: return isSigned ? Type.Int8  : Type.Uint8 ;
+                case 16: return isSigned ? Type.Int16 : Type.Uint16;
+                case 32: return isSigned ? Type.Int32 : Type.Uint32;
+                case 64: return isSigned ? Type.Int64 : Type.Uint64;
+            }
+            return Type.Int;
+        case Type.Float:
+            switch((type as any as Float).precision) {
+                case Precision.HALF: return Type.Float16;
+                case Precision.SINGLE: return Type.Float32;
+                case Precision.DOUBLE: return Type.Float64;
+            }
+            return Type.Float;
+        case Type.Binary: return Type.Binary;
+        case Type.Utf8: return Type.Utf8;
+        case Type.Bool: return Type.Bool;
+        case Type.Decimal: return Type.Decimal;
+        case Type.Time:
+            switch ((type as any as Time).unit) {
+                case TimeUnit.SECOND: return Type.TimeSecond;
+                case TimeUnit.MILLISECOND: return Type.TimeMillisecond;
+                case TimeUnit.MICROSECOND: return Type.TimeMicrosecond;
+                case TimeUnit.NANOSECOND: return Type.TimeNanosecond;
+            }
+            return Type.Time;
+        case Type.Timestamp:
+            switch ((type as any as Timestamp).unit) {
+                case TimeUnit.SECOND: return Type.TimestampSecond;
+                case TimeUnit.MILLISECOND: return Type.TimestampMillisecond;
+                case TimeUnit.MICROSECOND: return Type.TimestampMicrosecond;
+                case TimeUnit.NANOSECOND: return Type.TimestampNanosecond;
+            }
+            return Type.Timestamp;
+        case Type.Date:
+            switch ((type as any as Date_).unit) {
+                case DateUnit.DAY: return Type.DateDay;
+                case DateUnit.MILLISECOND: return Type.DateMillisecond;
+            }
+            return Type.Date;
+        case Type.Interval:
+            switch ((type as any as Interval).unit) {
+                case IntervalUnit.DAY_TIME: return Type.IntervalDayTime;
+                case IntervalUnit.YEAR_MONTH: return Type.IntervalYearMonth;
+            }
+            return Type.Interval;
+        case Type.Map: return Type.Map;
+        case Type.List: return Type.List;
+        case Type.Struct: return Type.Struct;
+        case Type.Union:
+            switch ((type as any as Union).mode) {
+                case UnionMode.Dense: return Type.DenseUnion;
+                case UnionMode.Sparse: return Type.SparseUnion;
+            }
+            return Type.Union;
+        case Type.FixedSizeBinary: return Type.FixedSizeBinary;
+        case Type.FixedSizeList: return Type.FixedSizeList;
+        case Type.Dictionary: return Type.Dictionary;
     }
+    throw new Error(`Unrecognized type '${Type[type.typeId]}'`);
+}
+
+export interface Visitor {
+    visitNull                  (node: any, ...args: any[]): any;
+    visitBool                  (node: any, ...args: any[]): any;
+    visitInt                   (node: any, ...args: any[]): any;
+    visitInt8?                 (node: any, ...args: any[]): any;
+    visitInt16?                (node: any, ...args: any[]): any;
+    visitInt32?                (node: any, ...args: any[]): any;
+    visitInt64?                (node: any, ...args: any[]): any;
+    visitUint8?                (node: any, ...args: any[]): any;
+    visitUint16?               (node: any, ...args: any[]): any;
+    visitUint32?               (node: any, ...args: any[]): any;
+    visitUint64?               (node: any, ...args: any[]): any;
+    visitFloat                 (node: any, ...args: any[]): any;
+    visitFloat16?              (node: any, ...args: any[]): any;
+    visitFloat32?              (node: any, ...args: any[]): any;
+    visitFloat64?              (node: any, ...args: any[]): any;
+    visitUtf8                  (node: any, ...args: any[]): any;
+    visitBinary                (node: any, ...args: any[]): any;
+    visitFixedSizeBinary       (node: any, ...args: any[]): any;
+    visitDate                  (node: any, ...args: any[]): any;
+    visitDateDay?              (node: any, ...args: any[]): any;
+    visitDateMillisecond?      (node: any, ...args: any[]): any;
+    visitTimestamp             (node: any, ...args: any[]): any;
+    visitTimestampSecond?      (node: any, ...args: any[]): any;
+    visitTimestampMillisecond? (node: any, ...args: any[]): any;
+    visitTimestampMicrosecond? (node: any, ...args: any[]): any;
+    visitTimestampNanosecond?  (node: any, ...args: any[]): any;
+    visitTime                  (node: any, ...args: any[]): any;
+    visitTimeSecond?           (node: any, ...args: any[]): any;
+    visitTimeMillisecond?      (node: any, ...args: any[]): any;
+    visitTimeMicrosecond?      (node: any, ...args: any[]): any;
+    visitTimeNanosecond?       (node: any, ...args: any[]): any;
+    visitDecimal               (node: any, ...args: any[]): any;
+    visitList                  (node: any, ...args: any[]): any;
+    visitStruct                (node: any, ...args: any[]): any;
+    visitUnion                 (node: any, ...args: any[]): any;
+    visitDenseUnion?           (node: any, ...args: any[]): any;
+    visitSparseUnion?          (node: any, ...args: any[]): any;
+    visitDictionary            (node: any, ...args: any[]): any;
+    visitInterval              (node: any, ...args: any[]): any;
+    visitIntervalDayTime?      (node: any, ...args: any[]): any;
+    visitIntervalYearMonth?    (node: any, ...args: any[]): any;
+    visitFixedSizeList         (node: any, ...args: any[]): any;
+    visitMap                   (node: any, ...args: any[]): any;
 }
+
+// Add these here so they're picked up by the externs creator
+// in the build, and closure-compiler doesn't minify them away
+(Visitor.prototype as any).visitInt8 = null;
+(Visitor.prototype as any).visitInt16 = null;
+(Visitor.prototype as any).visitInt32 = null;
+(Visitor.prototype as any).visitInt64 = null;
+(Visitor.prototype as any).visitUint8 = null;
+(Visitor.prototype as any).visitUint16 = null;
+(Visitor.prototype as any).visitUint32 = null;
+(Visitor.prototype as any).visitUint64 = null;
+(Visitor.prototype as any).visitFloat16 = null;
+(Visitor.prototype as any).visitFloat32 = null;
+(Visitor.prototype as any).visitFloat64 = null;
+(Visitor.prototype as any).visitDateDay = null;
+(Visitor.prototype as any).visitDateMillisecond = null;
+(Visitor.prototype as any).visitTimestampSecond = null;
+(Visitor.prototype as any).visitTimestampMillisecond = null;
+(Visitor.prototype as any).visitTimestampMicrosecond = null;
+(Visitor.prototype as any).visitTimestampNanosecond = null;
+(Visitor.prototype as any).visitTimeSecond = null;
+(Visitor.prototype as any).visitTimeMillisecond = null;
+(Visitor.prototype as any).visitTimeMicrosecond = null;
+(Visitor.prototype as any).visitTimeNanosecond = null;
+(Visitor.prototype as any).visitDenseUnion = null;
+(Visitor.prototype as any).visitSparseUnion = null;
+(Visitor.prototype as any).visitIntervalDayTime = null;
+(Visitor.prototype as any).visitIntervalYearMonth = null;
diff --git a/js/src/visitor/bytewidth.ts b/js/src/visitor/bytewidth.ts
new file mode 100644
index 0000000000000..b4da6f3871498
--- /dev/null
+++ b/js/src/visitor/bytewidth.ts
@@ -0,0 +1,66 @@
+/* istanbul ignore file */
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data } from '../data';
+import { Visitor } from '../visitor';
+import { Vector } from '../interfaces';
+import { Type, TimeUnit } from '../enum';
+import { Schema, Field } from '../schema';
+import {
+    DataType, Dictionary,
+    Float, Int, Date_, Interval, Time, Timestamp,
+    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary,
+    List, FixedSizeList, Map_, Struct, Union,
+} from '../type';
+
+/** @ignore */ const sum = (x: number, y: number) => x + y;
+/** @ignore */ const variableWidthColumnErrorMessage = (type: DataType) => `Cannot compute the byte width of variable-width column ${type}`;
+
+export interface ByteWidthVisitor extends Visitor {
+    visit<T extends DataType>(node: T): number;
+    visitMany<T extends DataType>(nodes: T[]): number[];
+    getVisitFn<T extends Type>    (node: T): (type: DataType<T>) => number;
+    getVisitFn<T extends DataType>(node: Vector<T> | Data<T> | T): (type: T) => number;
+}
+
+export class ByteWidthVisitor extends Visitor {
+    public visitNull            (____: Null            ) { return 0; }
+    public visitInt             (type: Int             ) { return type.bitWidth / 8; }
+    public visitFloat           (type: Float           ) { return type.ArrayType.BYTES_PER_ELEMENT; }
+    public visitBinary          (type: Binary          ) { throw new Error(variableWidthColumnErrorMessage(type)); }
+    public visitUtf8            (type: Utf8            ) { throw new Error(variableWidthColumnErrorMessage(type)); }
+    public visitBool            (____: Bool            ) { return 1 / 8; }
+    public visitDecimal         (____: Decimal         ) { return 16; }
+    public visitDate            (type: Date_           ) { return (type.unit + 1) * 4; }
+    public visitTime            (type: Time            ) { return type.bitWidth / 8; }
+    public visitTimestamp       (type: Timestamp       ) { return type.unit === TimeUnit.SECOND ? 4 : 8; }
+    public visitInterval        (type: Interval        ) { return (type.unit + 1) * 4; }
+    public visitList            (type: List            ) { throw new Error(variableWidthColumnErrorMessage(type)); }
+    public visitStruct          (type: Struct          ) { return this.visitFields(type.children).reduce(sum, 0); }
+    public visitUnion           (type: Union           ) { return this.visitFields(type.children).reduce(sum, 0); }
+    public visitFixedSizeBinary (type: FixedSizeBinary ) { return type.byteWidth; }
+    public visitFixedSizeList   (type: FixedSizeList   ) { return type.listSize * this.visitFields(type.children).reduce(sum, 0); }
+    public visitMap             (type: Map_            ) { return this.visitFields(type.children).reduce(sum, 0); }
+    public visitDictionary      (type: Dictionary      ) { return this.visit(type.indices); }
+    public visitFields          (fields: Field[]       ) { return (fields || []).map((field) => this.visit(field.type)); }
+    public visitSchema          (schema: Schema        ) { return this.visitFields(schema.fields).reduce(sum, 0); }
+}
+
+/** @ignore */
+export const instance = new ByteWidthVisitor();
diff --git a/js/src/visitor/get.ts b/js/src/visitor/get.ts
new file mode 100644
index 0000000000000..67909eacfce57
--- /dev/null
+++ b/js/src/visitor/get.ts
@@ -0,0 +1,316 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data } from '../data';
+import { BN } from '../util/bn';
+import { Visitor } from '../visitor';
+import { Vector } from '../interfaces';
+import { decodeUtf8 } from '../util/utf8';
+import { Type, UnionMode, Precision, DateUnit, TimeUnit, IntervalUnit } from '../enum';
+import {
+    DataType, Dictionary,
+    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Float, Float16, Float32, Float64,
+    Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
+    Date_, DateDay, DateMillisecond,
+    Interval, IntervalDayTime, IntervalYearMonth,
+    Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+    Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+    Union, DenseUnion, SparseUnion,
+} from '../type';
+
+export interface GetVisitor extends Visitor {
+    visit<T extends Vector>  (node: T, index: number): T['TValue'];
+    visitMany<T extends Vector>  (nodes: T[], indices: number[]): T['TValue'][];
+    getVisitFn<T extends Type>    (node: T): (vector: Vector<T>, index: number) => Vector<T>['TValue'];
+    getVisitFn<T extends DataType>(node: Vector<T> | Data<T> | T): (vector: Vector<T>, index: number) => Vector<T>['TValue'];
+    visitNull                 <T extends Null>                 (vector: Vector<T>, index: number): T['TValue'];
+    visitBool                 <T extends Bool>                 (vector: Vector<T>, index: number): T['TValue'];
+    visitInt                  <T extends Int>                  (vector: Vector<T>, index: number): T['TValue'];
+    visitInt8                 <T extends Int8>                 (vector: Vector<T>, index: number): T['TValue'];
+    visitInt16                <T extends Int16>                (vector: Vector<T>, index: number): T['TValue'];
+    visitInt32                <T extends Int32>                (vector: Vector<T>, index: number): T['TValue'];
+    visitInt64                <T extends Int64>                (vector: Vector<T>, index: number): T['TValue'];
+    visitUint8                <T extends Uint8>                (vector: Vector<T>, index: number): T['TValue'];
+    visitUint16               <T extends Uint16>               (vector: Vector<T>, index: number): T['TValue'];
+    visitUint32               <T extends Uint32>               (vector: Vector<T>, index: number): T['TValue'];
+    visitUint64               <T extends Uint64>               (vector: Vector<T>, index: number): T['TValue'];
+    visitFloat                <T extends Float>                (vector: Vector<T>, index: number): T['TValue'];
+    visitFloat16              <T extends Float16>              (vector: Vector<T>, index: number): T['TValue'];
+    visitFloat32              <T extends Float32>              (vector: Vector<T>, index: number): T['TValue'];
+    visitFloat64              <T extends Float64>              (vector: Vector<T>, index: number): T['TValue'];
+    visitUtf8                 <T extends Utf8>                 (vector: Vector<T>, index: number): T['TValue'];
+    visitBinary               <T extends Binary>               (vector: Vector<T>, index: number): T['TValue'];
+    visitFixedSizeBinary      <T extends FixedSizeBinary>      (vector: Vector<T>, index: number): T['TValue'];
+    visitDate                 <T extends Date_>                (vector: Vector<T>, index: number): T['TValue'];
+    visitDateDay              <T extends DateDay>              (vector: Vector<T>, index: number): T['TValue'];
+    visitDateMillisecond      <T extends DateMillisecond>      (vector: Vector<T>, index: number): T['TValue'];
+    visitTimestamp            <T extends Timestamp>            (vector: Vector<T>, index: number): T['TValue'];
+    visitTimestampSecond      <T extends TimestampSecond>      (vector: Vector<T>, index: number): T['TValue'];
+    visitTimestampMillisecond <T extends TimestampMillisecond> (vector: Vector<T>, index: number): T['TValue'];
+    visitTimestampMicrosecond <T extends TimestampMicrosecond> (vector: Vector<T>, index: number): T['TValue'];
+    visitTimestampNanosecond  <T extends TimestampNanosecond>  (vector: Vector<T>, index: number): T['TValue'];
+    visitTime                 <T extends Time>                 (vector: Vector<T>, index: number): T['TValue'];
+    visitTimeSecond           <T extends TimeSecond>           (vector: Vector<T>, index: number): T['TValue'];
+    visitTimeMillisecond      <T extends TimeMillisecond>      (vector: Vector<T>, index: number): T['TValue'];
+    visitTimeMicrosecond      <T extends TimeMicrosecond>      (vector: Vector<T>, index: number): T['TValue'];
+    visitTimeNanosecond       <T extends TimeNanosecond>       (vector: Vector<T>, index: number): T['TValue'];
+    visitDecimal              <T extends Decimal>              (vector: Vector<T>, index: number): T['TValue'];
+    visitList                 <T extends List>                 (vector: Vector<T>, index: number): T['TValue'];
+    visitStruct               <T extends Struct>               (vector: Vector<T>, index: number): T['TValue'];
+    visitUnion                <T extends Union>                (vector: Vector<T>, index: number): T['TValue'];
+    visitDenseUnion           <T extends DenseUnion>           (vector: Vector<T>, index: number): T['TValue'];
+    visitSparseUnion          <T extends SparseUnion>          (vector: Vector<T>, index: number): T['TValue'];
+    visitDictionary           <T extends Dictionary>           (vector: Vector<T>, index: number): T['TValue'];
+    visitInterval             <T extends Interval>             (vector: Vector<T>, index: number): T['TValue'];
+    visitIntervalDayTime      <T extends IntervalDayTime>      (vector: Vector<T>, index: number): T['TValue'];
+    visitIntervalYearMonth    <T extends IntervalYearMonth>    (vector: Vector<T>, index: number): T['TValue'];
+    visitFixedSizeList        <T extends FixedSizeList>        (vector: Vector<T>, index: number): T['TValue'];
+    visitMap                  <T extends Map_>                 (vector: Vector<T>, index: number): T['TValue'];
+}
+
+export class GetVisitor extends Visitor {}
+
+/** @ignore */const epochDaysToMs = (data: Int32Array, index: number) => 86400000 * data[index];
+/** @ignore */const epochMillisecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1]) + (data[index] >>> 0);
+/** @ignore */const epochMicrosecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1] / 1000) + ((data[index] >>> 0) / 1000);
+/** @ignore */const epochNanosecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1] / 1000000) + ((data[index] >>> 0) / 1000000);
+
+/** @ignore */const epochMillisecondsToDate = (epochMs: number) => new Date(epochMs);
+/** @ignore */const epochDaysToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochDaysToMs(data, index));
+/** @ignore */const epochMillisecondsLongToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochMillisecondsLongToMs(data, index));
+
+/** @ignore */
+const getNull = <T extends Null>(_vector: Vector<T>, _index: number): T['TValue'] => null;
+/** @ignore */
+const getVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array, index: number) => {
+    const { [index]: x, [index + 1]: y } = valueOffsets;
+    return x != null && y != null ? values.subarray(x, y) : null as any;
+};
+
+/** @ignore */
+const getBool = <T extends Bool>({ offset, values }: Vector<T>, index: number): T['TValue'] => {
+    const idx = offset + index;
+    const byte = values[idx >> 3];
+    return (byte & 1 << (idx % 8)) !== 0;
+};
+
+/** @ignore */
+type Numeric1X = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32 | Float32 | Float64;
+/** @ignore */
+type Numeric2X = Int64 | Uint64;
+
+/** @ignore */
+const getDateDay         = <T extends DateDay>        ({ values         }: Vector<T>, index: number): T['TValue'] => epochDaysToDate(values, index);
+/** @ignore */
+const getDateMillisecond = <T extends DateMillisecond>({ values         }: Vector<T>, index: number): T['TValue'] => epochMillisecondsLongToDate(values, index * 2);
+/** @ignore */
+const getNumeric         = <T extends Numeric1X>      ({ stride, values }: Vector<T>, index: number): T['TValue'] => values[stride * index];
+/** @ignore */
+const getFloat16         = <T extends Float16>        ({ stride, values }: Vector<T>, index: number): T['TValue'] => (values[stride * index] - 32767) / 32767;
+/** @ignore */
+const getBigInts         = <T extends Numeric2X>({ stride, values, type }: Vector<T>, index: number): T['TValue'] => BN.new(values.subarray(stride * index, stride * (index + 1)), type.isSigned);
+/** @ignore */
+const getFixedSizeBinary = <T extends FixedSizeBinary>({ stride, values }: Vector<T>, index: number): T['TValue'] => values.subarray(stride * index, stride * (index + 1));
+
+/** @ignore */
+const getBinary = <T extends Binary>({ values, valueOffsets }: Vector<T>, index: number): T['TValue'] => getVariableWidthBytes(values, valueOffsets, index);
+/** @ignore */
+const getUtf8 = <T extends Utf8>({ values, valueOffsets }: Vector<T>, index: number): T['TValue'] => {
+    const bytes = getVariableWidthBytes(values, valueOffsets, index);
+    return bytes !== null ? decodeUtf8(bytes) : null as any;
+};
+
+/* istanbul ignore next */
+/** @ignore */
+const getInt = <T extends Int>(vector: Vector<T>, index: number): T['TValue'] => (
+    vector.type.bitWidth < 64
+        ? getNumeric(<any> vector, index)
+        : getBigInts(<any> vector, index)
+);
+
+/* istanbul ignore next */
+/** @ignore */
+const getFloat = <T extends Float> (vector: Vector<T>, index: number): T['TValue'] => (
+    vector.type.precision !== Precision.HALF
+        ? getNumeric(vector as any, index)
+        : getFloat16(vector as any, index)
+);
+
+/* istanbul ignore next */
+/** @ignore */
+const getDate = <T extends Date_> (vector: Vector<T>, index: number): T['TValue'] => (
+    vector.type.unit === DateUnit.DAY
+        ? getDateDay(vector as any, index)
+        : getDateMillisecond(vector as any, index)
+);
+
+/** @ignore */
+const getTimestampSecond      = <T extends TimestampSecond>     ({ values }: Vector<T>, index: number): T['TValue'] => 1000 * epochMillisecondsLongToMs(values, index * 2);
+/** @ignore */
+const getTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Vector<T>, index: number): T['TValue'] => epochMillisecondsLongToMs(values, index * 2);
+/** @ignore */
+const getTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Vector<T>, index: number): T['TValue'] => epochMicrosecondsLongToMs(values, index * 2);
+/** @ignore */
+const getTimestampNanosecond  = <T extends TimestampNanosecond> ({ values }: Vector<T>, index: number): T['TValue'] => epochNanosecondsLongToMs(values, index * 2);
+/* istanbul ignore next */
+/** @ignore */
+const getTimestamp            = <T extends Timestamp>(vector: Vector<T>, index: number): T['TValue'] => {
+    switch (vector.type.unit) {
+        case TimeUnit.SECOND:      return      getTimestampSecond(vector as Vector<TimestampSecond>, index);
+        case TimeUnit.MILLISECOND: return getTimestampMillisecond(vector as Vector<TimestampMillisecond>, index);
+        case TimeUnit.MICROSECOND: return getTimestampMicrosecond(vector as Vector<TimestampMicrosecond>, index);
+        case TimeUnit.NANOSECOND:  return  getTimestampNanosecond(vector as Vector<TimestampNanosecond>, index);
+    }
+};
+
+/** @ignore */
+const getTimeSecond      = <T extends TimeSecond>     ({ values, stride }: Vector<T>, index: number): T['TValue'] => values[stride * index];
+/** @ignore */
+const getTimeMillisecond = <T extends TimeMillisecond>({ values, stride }: Vector<T>, index: number): T['TValue'] => values[stride * index];
+/** @ignore */
+const getTimeMicrosecond = <T extends TimeMicrosecond>({ values         }: Vector<T>, index: number): T['TValue'] => BN.new(values.subarray(2 * index, 2 * (index + 1)), true);
+/** @ignore */
+const getTimeNanosecond  = <T extends TimeNanosecond> ({ values         }: Vector<T>, index: number): T['TValue'] => BN.new(values.subarray(2 * index, 2 * (index + 1)), true);
+/* istanbul ignore next */
+/** @ignore */
+const getTime            = <T extends Time>(vector: Vector<T>, index: number): T['TValue'] => {
+    switch (vector.type.unit) {
+        case TimeUnit.SECOND:      return      getTimeSecond(vector as Vector<TimeSecond>, index);
+        case TimeUnit.MILLISECOND: return getTimeMillisecond(vector as Vector<TimeMillisecond>, index);
+        case TimeUnit.MICROSECOND: return getTimeMicrosecond(vector as Vector<TimeMicrosecond>, index);
+        case TimeUnit.NANOSECOND:  return  getTimeNanosecond(vector as Vector<TimeNanosecond>, index);
+    }
+};
+
+/** @ignore */
+const getDecimal = <T extends Decimal>({ values }: Vector<T>, index: number): T['TValue'] => BN.new(values.subarray(4 * index, 4 * (index + 1)), false);
+
+/** @ignore */
+const getList = <T extends List>(vector: Vector<T>, index: number): T['TValue'] => {
+    const child = vector.getChildAt(0)!, { valueOffsets, stride } = vector;
+    return child.slice(valueOffsets[index * stride], valueOffsets[(index * stride) + 1]) as T['TValue'];
+};
+
+/** @ignore */
+const getNested = <
+    S extends { [key: string]: DataType },
+    V extends Vector<Map_<S>> | Vector<Struct<S>>
+>(vector: V, index: number): V['TValue'] => {
+    return vector.rowProxy.bind(vector, index);
+};
+
+/* istanbul ignore next */
+/** @ignore */
+const getUnion = <
+    V extends Vector<Union> | Vector<DenseUnion> | Vector<SparseUnion>
+>(vector: V, index: number): V['TValue'] => {
+    return vector.type.mode === UnionMode.Dense ?
+        getDenseUnion(vector as Vector<DenseUnion>, index) :
+        getSparseUnion(vector as Vector<SparseUnion>, index);
+};
+
+/** @ignore */
+const getDenseUnion = <T extends DenseUnion>(vector: Vector<T>, index: number): T['TValue'] => {
+    const { typeIds, type: { typeIdToChildIndex } } = vector;
+    const child = vector.getChildAt(typeIdToChildIndex[typeIds[index]]);
+    return child ? child.get(vector.valueOffsets[index]) : null;
+};
+
+/** @ignore */
+const getSparseUnion = <T extends SparseUnion>(vector: Vector<T>, index: number): T['TValue'] => {
+    const { typeIds, type: { typeIdToChildIndex } } = vector;
+    const child = vector.getChildAt(typeIdToChildIndex[typeIds[index]]);
+    return child ? child.get(index) : null;
+};
+
+/** @ignore */
+const getDictionary = <T extends Dictionary>(vector: Vector<T>, index: number): T['TValue'] => {
+    return vector.getValue(vector.getKey(index)!);
+};
+
+/* istanbul ignore next */
+/** @ignore */
+const getInterval = <T extends Interval>(vector: Vector<T>, index: number): T['TValue'] =>
+    (vector.type.unit === IntervalUnit.DAY_TIME)
+        ? getIntervalDayTime(vector as any, index)
+        : getIntervalYearMonth(vector as any, index);
+
+/** @ignore */
+const getIntervalDayTime = <T extends IntervalDayTime>({ values }: Vector<T>, index: number): T['TValue'] => values.subarray(2 * index, 2 * (index + 1));
+
+/** @ignore */
+const getIntervalYearMonth = <T extends IntervalYearMonth>({ values }: Vector<T>, index: number): T['TValue'] => {
+    const interval = values[index];
+    const int32s = new Int32Array(2);
+    int32s[0] = interval / 12 | 0; /* years */
+    int32s[1] = interval % 12 | 0; /* months */
+    return int32s;
+};
+
+/** @ignore */
+const getFixedSizeList = <T extends FixedSizeList>(vector: Vector<T>, index: number): T['TValue'] => {
+    const child = vector.getChildAt(0)!, { stride } = vector;
+    return child.slice(index * stride, (index + 1) * stride) as T['TValue'];
+};
+
+GetVisitor.prototype.visitNull                 =                 getNull;
+GetVisitor.prototype.visitBool                 =                 getBool;
+GetVisitor.prototype.visitInt                  =                  getInt;
+GetVisitor.prototype.visitInt8                 =              getNumeric;
+GetVisitor.prototype.visitInt16                =              getNumeric;
+GetVisitor.prototype.visitInt32                =              getNumeric;
+GetVisitor.prototype.visitInt64                =              getBigInts;
+GetVisitor.prototype.visitUint8                =              getNumeric;
+GetVisitor.prototype.visitUint16               =              getNumeric;
+GetVisitor.prototype.visitUint32               =              getNumeric;
+GetVisitor.prototype.visitUint64               =              getBigInts;
+GetVisitor.prototype.visitFloat                =                getFloat;
+GetVisitor.prototype.visitFloat16              =              getFloat16;
+GetVisitor.prototype.visitFloat32              =              getNumeric;
+GetVisitor.prototype.visitFloat64              =              getNumeric;
+GetVisitor.prototype.visitUtf8                 =                 getUtf8;
+GetVisitor.prototype.visitBinary               =               getBinary;
+GetVisitor.prototype.visitFixedSizeBinary      =      getFixedSizeBinary;
+GetVisitor.prototype.visitDate                 =                 getDate;
+GetVisitor.prototype.visitDateDay              =              getDateDay;
+GetVisitor.prototype.visitDateMillisecond      =      getDateMillisecond;
+GetVisitor.prototype.visitTimestamp            =            getTimestamp;
+GetVisitor.prototype.visitTimestampSecond      =      getTimestampSecond;
+GetVisitor.prototype.visitTimestampMillisecond = getTimestampMillisecond;
+GetVisitor.prototype.visitTimestampMicrosecond = getTimestampMicrosecond;
+GetVisitor.prototype.visitTimestampNanosecond  =  getTimestampNanosecond;
+GetVisitor.prototype.visitTime                 =                 getTime;
+GetVisitor.prototype.visitTimeSecond           =           getTimeSecond;
+GetVisitor.prototype.visitTimeMillisecond      =      getTimeMillisecond;
+GetVisitor.prototype.visitTimeMicrosecond      =      getTimeMicrosecond;
+GetVisitor.prototype.visitTimeNanosecond       =       getTimeNanosecond;
+GetVisitor.prototype.visitDecimal              =              getDecimal;
+GetVisitor.prototype.visitList                 =                 getList;
+GetVisitor.prototype.visitStruct               =               getNested;
+GetVisitor.prototype.visitUnion                =                getUnion;
+GetVisitor.prototype.visitDenseUnion           =           getDenseUnion;
+GetVisitor.prototype.visitSparseUnion          =          getSparseUnion;
+GetVisitor.prototype.visitDictionary           =           getDictionary;
+GetVisitor.prototype.visitInterval             =             getInterval;
+GetVisitor.prototype.visitIntervalDayTime      =      getIntervalDayTime;
+GetVisitor.prototype.visitIntervalYearMonth    =    getIntervalYearMonth;
+GetVisitor.prototype.visitFixedSizeList        =        getFixedSizeList;
+GetVisitor.prototype.visitMap                  =               getNested;
+
+/** @ignore */
+export const instance = new GetVisitor();
diff --git a/js/src/visitor/indexof.ts b/js/src/visitor/indexof.ts
new file mode 100644
index 0000000000000..74e252b207468
--- /dev/null
+++ b/js/src/visitor/indexof.ts
@@ -0,0 +1,182 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data } from '../data';
+import { Type } from '../enum';
+import { Visitor } from '../visitor';
+import { Vector } from '../interfaces';
+import { getBool, iterateBits } from '../util/bit';
+import { createElementComparator } from '../util/vector';
+import {
+    DataType, Dictionary,
+    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Float, Float16, Float32, Float64,
+    Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
+    Date_, DateDay, DateMillisecond,
+    Interval, IntervalDayTime, IntervalYearMonth,
+    Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+    Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+    Union, DenseUnion, SparseUnion,
+} from '../type';
+
+export interface IndexOfVisitor extends Visitor {
+    visit<T extends Vector>  (node: T, value: T['TValue'] | null, index?: number): number;
+    visitMany <T extends Vector>  (nodes: T[], values: (T['TValue'] | null)[], indices: (number | undefined)[]): number[];
+    getVisitFn<T extends Type>    (node: T): (vector: Vector<T>, value: Vector<T>['TValue'] | null, index?: number) => number;
+    getVisitFn<T extends DataType>(node: Vector<T> | Data<T> | T): (vector: Vector<T>, value:         T['TValue'] | null, index?: number) => number;
+    visitNull                 <T extends Null>                (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitBool                 <T extends Bool>                (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitInt                  <T extends Int>                 (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitInt8                 <T extends Int8>                (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitInt16                <T extends Int16>               (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitInt32                <T extends Int32>               (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitInt64                <T extends Int64>               (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitUint8                <T extends Uint8>               (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitUint16               <T extends Uint16>              (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitUint32               <T extends Uint32>              (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitUint64               <T extends Uint64>              (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitFloat                <T extends Float>               (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitFloat16              <T extends Float16>             (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitFloat32              <T extends Float32>             (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitFloat64              <T extends Float64>             (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitUtf8                 <T extends Utf8>                (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitBinary               <T extends Binary>              (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitFixedSizeBinary      <T extends FixedSizeBinary>     (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitDate                 <T extends Date_>               (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitDateDay              <T extends DateDay>             (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitDateMillisecond      <T extends DateMillisecond>     (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitTimestamp            <T extends Timestamp>           (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitTimestampSecond      <T extends TimestampSecond>     (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitTimestampMillisecond <T extends TimestampMillisecond>(vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitTimestampMicrosecond <T extends TimestampMicrosecond>(vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitTimestampNanosecond  <T extends TimestampNanosecond> (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitTime                 <T extends Time>                (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitTimeSecond           <T extends TimeSecond>          (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitTimeMillisecond      <T extends TimeMillisecond>     (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitTimeMicrosecond      <T extends TimeMicrosecond>     (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitTimeNanosecond       <T extends TimeNanosecond>      (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitDecimal              <T extends Decimal>             (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitList                 <T extends List>                (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitStruct               <T extends Struct>              (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitUnion                <T extends Union>               (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitDenseUnion           <T extends DenseUnion>          (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitSparseUnion          <T extends SparseUnion>         (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitDictionary           <T extends Dictionary>          (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitInterval             <T extends Interval>            (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitIntervalDayTime      <T extends IntervalDayTime>     (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitIntervalYearMonth    <T extends IntervalYearMonth>   (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitFixedSizeList        <T extends FixedSizeList>       (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+    visitMap                  <T extends Map_>                (vector: Vector<T>, value: T['TValue'] | null, index?: number): number;
+}
+
+export class IndexOfVisitor extends Visitor {
+}
+
+/** @ignore */
+function nullIndexOf(vector: Vector<Null>, searchElement?: null) {
+    // if you're looking for nulls and the vector isn't empty, we've got 'em!
+    return searchElement === null && vector.length > 0 ? 0 : -1;
+}
+
+/** @ignore */
+function indexOfNull<T extends DataType>(vector: Vector<T>, fromIndex?: number): number {
+    const { nullBitmap } = vector;
+    if (!nullBitmap || vector.nullCount <= 0) {
+        return -1;
+    }
+    let i = 0;
+    for (const isValid of iterateBits(nullBitmap, vector.data.offset + (fromIndex || 0), vector.length, nullBitmap, getBool)) {
+        if (!isValid) { return i; }
+        ++i;
+    }
+    return -1;
+}
+
+/** @ignore */
+function indexOfValue<T extends DataType>(vector: Vector<T>, searchElement?: T['TValue'] | null, fromIndex?: number): number {
+    if (searchElement === undefined) { return -1; }
+    if (searchElement === null) { return indexOfNull(vector, fromIndex); }
+    const compare = createElementComparator(searchElement);
+    for (let i = (fromIndex || 0) - 1, n = vector.length; ++i < n;) {
+        if (compare(vector.get(i))) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+/** @ignore */
+function indexOfUnion<T extends DataType>(vector: Vector<T>, searchElement?: T['TValue'] | null, fromIndex?: number): number {
+    // Unions are special -- they do have a nullBitmap, but so can their children.
+    // If the searchElement is null, we don't know whether it came from the Union's
+    // bitmap or one of its childrens'. So we don't interrogate the Union's bitmap,
+    // since that will report the wrong index if a child has a null before the Union.
+    const compare = createElementComparator(searchElement);
+    for (let i = (fromIndex || 0) - 1, n = vector.length; ++i < n;) {
+        if (compare(vector.get(i))) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+IndexOfVisitor.prototype.visitNull                 =  nullIndexOf;
+IndexOfVisitor.prototype.visitBool                 = indexOfValue;
+IndexOfVisitor.prototype.visitInt                  = indexOfValue;
+IndexOfVisitor.prototype.visitInt8                 = indexOfValue;
+IndexOfVisitor.prototype.visitInt16                = indexOfValue;
+IndexOfVisitor.prototype.visitInt32                = indexOfValue;
+IndexOfVisitor.prototype.visitInt64                = indexOfValue;
+IndexOfVisitor.prototype.visitUint8                = indexOfValue;
+IndexOfVisitor.prototype.visitUint16               = indexOfValue;
+IndexOfVisitor.prototype.visitUint32               = indexOfValue;
+IndexOfVisitor.prototype.visitUint64               = indexOfValue;
+IndexOfVisitor.prototype.visitFloat                = indexOfValue;
+IndexOfVisitor.prototype.visitFloat16              = indexOfValue;
+IndexOfVisitor.prototype.visitFloat32              = indexOfValue;
+IndexOfVisitor.prototype.visitFloat64              = indexOfValue;
+IndexOfVisitor.prototype.visitUtf8                 = indexOfValue;
+IndexOfVisitor.prototype.visitBinary               = indexOfValue;
+IndexOfVisitor.prototype.visitFixedSizeBinary      = indexOfValue;
+IndexOfVisitor.prototype.visitDate                 = indexOfValue;
+IndexOfVisitor.prototype.visitDateDay              = indexOfValue;
+IndexOfVisitor.prototype.visitDateMillisecond      = indexOfValue;
+IndexOfVisitor.prototype.visitTimestamp            = indexOfValue;
+IndexOfVisitor.prototype.visitTimestampSecond      = indexOfValue;
+IndexOfVisitor.prototype.visitTimestampMillisecond = indexOfValue;
+IndexOfVisitor.prototype.visitTimestampMicrosecond = indexOfValue;
+IndexOfVisitor.prototype.visitTimestampNanosecond  = indexOfValue;
+IndexOfVisitor.prototype.visitTime                 = indexOfValue;
+IndexOfVisitor.prototype.visitTimeSecond           = indexOfValue;
+IndexOfVisitor.prototype.visitTimeMillisecond      = indexOfValue;
+IndexOfVisitor.prototype.visitTimeMicrosecond      = indexOfValue;
+IndexOfVisitor.prototype.visitTimeNanosecond       = indexOfValue;
+IndexOfVisitor.prototype.visitDecimal              = indexOfValue;
+IndexOfVisitor.prototype.visitList                 = indexOfValue;
+IndexOfVisitor.prototype.visitStruct               = indexOfValue;
+IndexOfVisitor.prototype.visitUnion                = indexOfValue;
+IndexOfVisitor.prototype.visitDenseUnion           = indexOfUnion;
+IndexOfVisitor.prototype.visitSparseUnion          = indexOfUnion;
+IndexOfVisitor.prototype.visitDictionary           = indexOfValue;
+IndexOfVisitor.prototype.visitInterval             = indexOfValue;
+IndexOfVisitor.prototype.visitIntervalDayTime      = indexOfValue;
+IndexOfVisitor.prototype.visitIntervalYearMonth    = indexOfValue;
+IndexOfVisitor.prototype.visitFixedSizeList        = indexOfValue;
+IndexOfVisitor.prototype.visitMap                  = indexOfValue;
+
+/** @ignore */
+export const instance = new IndexOfVisitor();
diff --git a/js/src/visitor/iterator.ts b/js/src/visitor/iterator.ts
new file mode 100644
index 0000000000000..cb1ea7dfb8e35
--- /dev/null
+++ b/js/src/visitor/iterator.ts
@@ -0,0 +1,171 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data } from '../data';
+import { Type } from '../enum';
+import { Visitor } from '../visitor';
+import { Vector } from '../interfaces';
+import { iterateBits } from '../util/bit';
+import { instance as getVisitor } from './get';
+import {
+    DataType, Dictionary,
+    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Float, Float16, Float32, Float64,
+    Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
+    Date_, DateDay, DateMillisecond,
+    Interval, IntervalDayTime, IntervalYearMonth,
+    Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+    Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+    Union, DenseUnion, SparseUnion,
+} from '../type';
+
+export interface IteratorVisitor extends Visitor {
+    visit<T extends Vector>(node: T): IterableIterator<T['TValue'] | null>;
+    visitMany <T extends Vector>(nodes: T[]): IterableIterator<T['TValue'] | null>[];
+    getVisitFn<T extends Type>(node: T): (vector: Vector<T>) => IterableIterator<Vector<T>['TValue'] | null>;
+    getVisitFn<T extends DataType>(node: Vector<T> | Data<T> | T): (vector: Vector<T>) => IterableIterator<Vector<T>['TValue'] | null>;
+    visitNull                 <T extends Null>                 (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitBool                 <T extends Bool>                 (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitInt                  <T extends Int>                  (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitInt8                 <T extends Int8>                 (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitInt16                <T extends Int16>                (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitInt32                <T extends Int32>                (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitInt64                <T extends Int64>                (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitUint8                <T extends Uint8>                (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitUint16               <T extends Uint16>               (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitUint32               <T extends Uint32>               (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitUint64               <T extends Uint64>               (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitFloat                <T extends Float>                (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitFloat16              <T extends Float16>              (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitFloat32              <T extends Float32>              (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitFloat64              <T extends Float64>              (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitUtf8                 <T extends Utf8>                 (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitBinary               <T extends Binary>               (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitFixedSizeBinary      <T extends FixedSizeBinary>      (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitDate                 <T extends Date_>                (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitDateDay              <T extends DateDay>              (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitDateMillisecond      <T extends DateMillisecond>      (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitTimestamp            <T extends Timestamp>            (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitTimestampSecond      <T extends TimestampSecond>      (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitTimestampMillisecond <T extends TimestampMillisecond> (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitTimestampMicrosecond <T extends TimestampMicrosecond> (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitTimestampNanosecond  <T extends TimestampNanosecond>  (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitTime                 <T extends Time>                 (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitTimeSecond           <T extends TimeSecond>           (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitTimeMillisecond      <T extends TimeMillisecond>      (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitTimeMicrosecond      <T extends TimeMicrosecond>      (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitTimeNanosecond       <T extends TimeNanosecond>       (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitDecimal              <T extends Decimal>              (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitList                 <T extends List>                 (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitStruct               <T extends Struct>               (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitUnion                <T extends Union>                (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitDenseUnion           <T extends DenseUnion>           (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitSparseUnion          <T extends SparseUnion>          (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitDictionary           <T extends Dictionary>           (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitInterval             <T extends Interval>             (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitIntervalDayTime      <T extends IntervalDayTime>      (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitIntervalYearMonth    <T extends IntervalYearMonth>    (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitFixedSizeList        <T extends FixedSizeList>        (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitMap                  <T extends Map_>                 (vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+}
+
+export class IteratorVisitor extends Visitor {}
+
+/** @ignore */
+function nullableIterator<T extends DataType>(vector: Vector<T>): IterableIterator<T['TValue'] | null> {
+    const getFn = getVisitor.getVisitFn(vector);
+    return iterateBits<T['TValue'] | null>(
+        vector.nullBitmap, vector.offset, vector.length, vector,
+        (vec: Vector<T>, idx: number, nullByte: number, nullBit: number) =>
+            ((nullByte & 1 << nullBit) !== 0) ? getFn(vec, idx) : null
+    );
+}
+
+/** @ignore */
+function vectorIterator<T extends DataType>(vector: Vector<T>): IterableIterator<T['TValue'] | null> {
+
+    // If nullable, iterate manually
+    if (vector.nullCount > 0) {
+        return nullableIterator<T>(vector);
+    }
+
+    const { type, typeId, length } = vector;
+
+    // Fast case, defer to native iterators if possible
+    if (vector.stride === 1 && (
+        (typeId === Type.Timestamp) ||
+        (typeId === Type.Int && (type as Int).bitWidth !== 64) ||
+        (typeId === Type.Time && (type as Time).bitWidth !== 64) ||
+        (typeId === Type.Float && (type as Float).precision > 0 /* Precision.HALF */)
+    )) {
+        return vector.values.subarray(0, length)[Symbol.iterator]();
+    }
+
+    // Otherwise, iterate manually
+    return (function* (getFn) {
+        for (let index = -1; ++index < length;) {
+            yield getFn(vector, index);
+        }
+    })(getVisitor.getVisitFn(vector));
+}
+
+IteratorVisitor.prototype.visitNull                 = vectorIterator;
+IteratorVisitor.prototype.visitBool                 = vectorIterator;
+IteratorVisitor.prototype.visitInt                  = vectorIterator;
+IteratorVisitor.prototype.visitInt8                 = vectorIterator;
+IteratorVisitor.prototype.visitInt16                = vectorIterator;
+IteratorVisitor.prototype.visitInt32                = vectorIterator;
+IteratorVisitor.prototype.visitInt64                = vectorIterator;
+IteratorVisitor.prototype.visitUint8                = vectorIterator;
+IteratorVisitor.prototype.visitUint16               = vectorIterator;
+IteratorVisitor.prototype.visitUint32               = vectorIterator;
+IteratorVisitor.prototype.visitUint64               = vectorIterator;
+IteratorVisitor.prototype.visitFloat                = vectorIterator;
+IteratorVisitor.prototype.visitFloat16              = vectorIterator;
+IteratorVisitor.prototype.visitFloat32              = vectorIterator;
+IteratorVisitor.prototype.visitFloat64              = vectorIterator;
+IteratorVisitor.prototype.visitUtf8                 = vectorIterator;
+IteratorVisitor.prototype.visitBinary               = vectorIterator;
+IteratorVisitor.prototype.visitFixedSizeBinary      = vectorIterator;
+IteratorVisitor.prototype.visitDate                 = vectorIterator;
+IteratorVisitor.prototype.visitDateDay              = vectorIterator;
+IteratorVisitor.prototype.visitDateMillisecond      = vectorIterator;
+IteratorVisitor.prototype.visitTimestamp            = vectorIterator;
+IteratorVisitor.prototype.visitTimestampSecond      = vectorIterator;
+IteratorVisitor.prototype.visitTimestampMillisecond = vectorIterator;
+IteratorVisitor.prototype.visitTimestampMicrosecond = vectorIterator;
+IteratorVisitor.prototype.visitTimestampNanosecond  = vectorIterator;
+IteratorVisitor.prototype.visitTime                 = vectorIterator;
+IteratorVisitor.prototype.visitTimeSecond           = vectorIterator;
+IteratorVisitor.prototype.visitTimeMillisecond      = vectorIterator;
+IteratorVisitor.prototype.visitTimeMicrosecond      = vectorIterator;
+IteratorVisitor.prototype.visitTimeNanosecond       = vectorIterator;
+IteratorVisitor.prototype.visitDecimal              = vectorIterator;
+IteratorVisitor.prototype.visitList                 = vectorIterator;
+IteratorVisitor.prototype.visitStruct               = vectorIterator;
+IteratorVisitor.prototype.visitUnion                = vectorIterator;
+IteratorVisitor.prototype.visitDenseUnion           = vectorIterator;
+IteratorVisitor.prototype.visitSparseUnion          = vectorIterator;
+IteratorVisitor.prototype.visitDictionary           = vectorIterator;
+IteratorVisitor.prototype.visitInterval             = vectorIterator;
+IteratorVisitor.prototype.visitIntervalDayTime      = vectorIterator;
+IteratorVisitor.prototype.visitIntervalYearMonth    = vectorIterator;
+IteratorVisitor.prototype.visitFixedSizeList        = vectorIterator;
+IteratorVisitor.prototype.visitMap                  = vectorIterator;
+
+/** @ignore */
+export const instance = new IteratorVisitor();
diff --git a/js/src/visitor/jsontypeassembler.ts b/js/src/visitor/jsontypeassembler.ts
new file mode 100644
index 0000000000000..746e3333b2f1e
--- /dev/null
+++ b/js/src/visitor/jsontypeassembler.ts
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import * as type from '../type';
+import { Visitor } from '../visitor';
+import { ArrowType, Precision, DateUnit, TimeUnit, IntervalUnit, UnionMode } from '../enum';
+
+export interface JSONTypeAssembler extends Visitor {
+    visit<T extends type.DataType>(node: T): object | undefined;
+}
+
+export class JSONTypeAssembler extends Visitor {
+    public visit<T extends type.DataType>(node: T): object | undefined {
+        return node == null ? undefined : super.visit(node);
+    }
+    public visitNull<T extends type.Null>({ typeId }: T) {
+        return { 'name': ArrowType[typeId].toLowerCase() };
+    }
+    public visitInt<T extends type.Int>({ typeId, bitWidth, isSigned }: T) {
+        return { 'name': ArrowType[typeId].toLowerCase(), 'bitWidth': bitWidth, 'isSigned': isSigned };
+    }
+    public visitFloat<T extends type.Float>({ typeId, precision }: T) {
+        return { 'name': ArrowType[typeId].toLowerCase(), 'precision': Precision[precision] };
+    }
+    public visitBinary<T extends type.Binary>({ typeId }: T) {
+        return { 'name': ArrowType[typeId].toLowerCase() };
+    }
+    public visitBool<T extends type.Bool>({ typeId }: T) {
+        return { 'name': ArrowType[typeId].toLowerCase() };
+    }
+    public visitUtf8<T extends type.Utf8>({ typeId }: T) {
+        return { 'name': ArrowType[typeId].toLowerCase() };
+    }
+    public visitDecimal<T extends type.Decimal>({ typeId, scale, precision }: T) {
+        return { 'name': ArrowType[typeId].toLowerCase(), 'scale': scale, 'precision': precision };
+    }
+    public visitDate<T extends type.Date_>({ typeId, unit }: T) {
+        return { 'name': ArrowType[typeId].toLowerCase(), 'unit': DateUnit[unit] };
+    }
+    public visitTime<T extends type.Time>({ typeId, unit, bitWidth }: T) {
+        return { 'name': ArrowType[typeId].toLowerCase(), 'unit': TimeUnit[unit], bitWidth };
+    }
+    public visitTimestamp<T extends type.Timestamp>({ typeId, timezone, unit }: T) {
+        return { 'name': ArrowType[typeId].toLowerCase(), 'unit': TimeUnit[unit], timezone };
+    }
+    public visitInterval<T extends type.Interval>({ typeId, unit }: T) {
+        return { 'name': ArrowType[typeId].toLowerCase(), 'unit': IntervalUnit[unit] };
+    }
+    public visitList<T extends type.List>({ typeId }: T) {
+        return { 'name': ArrowType[typeId].toLowerCase() };
+    }
+    public visitStruct<T extends type.Struct>({ typeId }: T) {
+        return { 'name': ArrowType[typeId].toLowerCase() };
+    }
+    public visitUnion<T extends type.Union>({ typeId, mode, typeIds }: T) {
+        return {
+            'name': ArrowType[typeId].toLowerCase(),
+            'mode': UnionMode[mode],
+            'typeIds': [...typeIds]
+        };
+    }
+    public visitDictionary<T extends type.Dictionary>(node: T) {
+        return this.visit(node.dictionary);
+    }
+    public visitFixedSizeBinary<T extends type.FixedSizeBinary>({ typeId, byteWidth }: T) {
+        return { 'name': ArrowType[typeId].toLowerCase(), 'byteWidth': byteWidth };
+    }
+    public visitFixedSizeList<T extends type.FixedSizeList>({ typeId, listSize }: T) {
+        return { 'name': ArrowType[typeId].toLowerCase(), 'listSize': listSize };
+    }
+    public visitMap<T extends type.Map_>({ typeId, keysSorted }: T) {
+        return { 'name': ArrowType[typeId].toLowerCase(), 'keysSorted': keysSorted };
+    }
+}
diff --git a/js/src/visitor/jsonvectorassembler.ts b/js/src/visitor/jsonvectorassembler.ts
new file mode 100644
index 0000000000000..38efad78d4185
--- /dev/null
+++ b/js/src/visitor/jsonvectorassembler.ts
@@ -0,0 +1,181 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { BN } from '../util/bn';
+import { Column } from '../column';
+import { Vector } from '../vector';
+import { Visitor } from '../visitor';
+import { RecordBatch } from '../recordbatch';
+import { Vector as VType } from '../interfaces';
+import { VectorType as BufferType } from '../enum';
+import { UnionMode, DateUnit, TimeUnit } from '../enum';
+import { iterateBits, getBit, getBool } from '../util/bit';
+import {
+    DataType,
+    Float, Int, Date_, Interval, Time, Timestamp, Union,
+    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+} from '../type';
+
+export interface JSONVectorAssembler extends Visitor {
+
+    visit     <T extends Column>  (node: T  ): object;
+    visitMany <T extends Column>  (cols: T[]): object[];
+    getVisitFn<T extends DataType>(node: Column<T>): (column: Column<T>) => { name: string, count: number, VALIDITY: (0 | 1)[], DATA?: any[], OFFSET?: number[], TYPE?: number[], children?: any[] };
+
+    visitNull                 <T extends Null>            (vector: VType<T>): { };
+    visitBool                 <T extends Bool>            (vector: VType<T>): { DATA: boolean[] };
+    visitInt                  <T extends Int>             (vector: VType<T>): { DATA: (number | string)[]  };
+    visitFloat                <T extends Float>           (vector: VType<T>): { DATA: number[]  };
+    visitUtf8                 <T extends Utf8>            (vector: VType<T>): { DATA: string[], OFFSET: number[] };
+    visitBinary               <T extends Binary>          (vector: VType<T>): { DATA: string[], OFFSET: number[] };
+    visitFixedSizeBinary      <T extends FixedSizeBinary> (vector: VType<T>): { DATA: string[]  };
+    visitDate                 <T extends Date_>           (vector: VType<T>): { DATA: number[]  };
+    visitTimestamp            <T extends Timestamp>       (vector: VType<T>): { DATA: string[]  };
+    visitTime                 <T extends Time>            (vector: VType<T>): { DATA: number[]  };
+    visitDecimal              <T extends Decimal>         (vector: VType<T>): { DATA: string[]  };
+    visitList                 <T extends List>            (vector: VType<T>): { children: any[], OFFSET: number[] };
+    visitStruct               <T extends Struct>          (vector: VType<T>): { children: any[] };
+    visitUnion                <T extends Union>           (vector: VType<T>): { children: any[], TYPE: number[],  };
+    visitInterval             <T extends Interval>        (vector: VType<T>): { DATA: number[]  };
+    visitFixedSizeList        <T extends FixedSizeList>   (vector: VType<T>): { children: any[] };
+    visitMap                  <T extends Map_>            (vector: VType<T>): { children: any[] };
+}
+
+export class JSONVectorAssembler extends Visitor {
+
+    /** @nocollapse */
+    public static assemble<T extends Column | RecordBatch>(...args: (T | T[])[]) {
+
+        const vectors = args.reduce(function flatten(xs: any[], x: any): any[] {
+            if (Array.isArray(x)) { return x.reduce(flatten, xs); }
+            if (!(x instanceof RecordBatch)) { return [...xs, x]; }
+            return xs.concat(x.schema.fields.map(
+                (f, i) => new Column(f, [x.getChildAt(i)!])));
+        }, []).filter((x: any): x is Column => x instanceof Column);
+
+        return new JSONVectorAssembler().visitMany(vectors);
+    }
+
+    public visit<T extends Column>(column: T) {
+        const { data, name, length } = column;
+        const { offset, nullCount, nullBitmap } = data;
+        const type = DataType.isDictionary(column.type) ? column.type.indices : column.type;
+        const buffers = Object.assign([], data.buffers, { [BufferType.VALIDITY]: undefined });
+        return {
+            'name': name,
+            'count': length,
+            'VALIDITY': nullCount <= 0
+                ? Array.from({ length }, () => 1)
+                : [...iterateBits(nullBitmap, offset, length, null, getBit)],
+            ...super.visit(Vector.new(data.clone(type, offset, length, 0, buffers)))
+        };
+    }
+    public visitNull() { return {}; }
+    public visitBool<T extends Bool>({ values, offset, length }: VType<T>) {
+        return { 'DATA': [...iterateBits(values, offset, length, null, getBool)] };
+    }
+    public visitInt<T extends Int>(vector: VType<T>) {
+        return {
+            'DATA': vector.type.bitWidth < 64
+                ? [...vector.values]
+                : [...bigNumsToStrings(vector.values as (Int32Array | Uint32Array), 2)]
+        };
+    }
+    public visitFloat<T extends Float>(vector: VType<T>) {
+        return { 'DATA': [...vector.values] };
+    }
+    public visitUtf8<T extends Utf8>(vector: VType<T>) {
+        return { 'DATA': [...vector], 'OFFSET': [...vector.valueOffsets] };
+    }
+    public visitBinary<T extends Binary>(vector: VType<T>) {
+        return { 'DATA': [...binaryToString(vector)], OFFSET: [...vector.valueOffsets] };
+    }
+    public visitFixedSizeBinary<T extends FixedSizeBinary>(vector: VType<T>) {
+        return { 'DATA': [...binaryToString(vector)] };
+    }
+    public visitDate<T extends Date_>(vector: VType<T>) {
+        return {
+            'DATA': vector.type.unit === DateUnit.DAY
+                ? [...vector.values]
+                : [...bigNumsToStrings(vector.values, 2)]
+        };
+    }
+    public visitTimestamp<T extends Timestamp>(vector: VType<T>) {
+        return { 'DATA': [...bigNumsToStrings(vector.values, 2)] };
+    }
+    public visitTime<T extends Time>(vector: VType<T>) {
+        return {
+            'DATA': vector.type.unit < TimeUnit.MICROSECOND
+                ? [...vector.values]
+                : [...bigNumsToStrings(vector.values, 2)]
+        };
+    }
+    public visitDecimal<T extends Decimal>(vector: VType<T>) {
+        return { 'DATA': [...bigNumsToStrings(vector.values, 4)] };
+    }
+    public visitList<T extends List>(vector: VType<T>) {
+        return {
+            'OFFSET': [...vector.valueOffsets],
+            'children': vector.type.children.map((f, i) =>
+                this.visit(new Column(f, [vector.getChildAt(i)!])))
+        };
+    }
+    public visitStruct<T extends Struct>(vector: VType<T>) {
+        return {
+            'children': vector.type.children.map((f, i) =>
+                this.visit(new Column(f, [vector.getChildAt(i)!])))
+        };
+    }
+    public visitUnion<T extends Union>(vector: VType<T>) {
+        return {
+            'TYPE': [...vector.typeIds],
+            'OFFSET': vector.type.mode === UnionMode.Dense ? [...vector.valueOffsets] : undefined,
+            'children': vector.type.children.map((f, i) => this.visit(new Column(f, [vector.getChildAt(i)!])))
+        };
+    }
+    public visitInterval<T extends Interval>(vector: VType<T>) {
+        return { 'DATA': [...vector.values] };
+    }
+    public visitFixedSizeList<T extends FixedSizeList>(vector: VType<T>) {
+        return {
+            'children': vector.type.children.map((f, i) =>
+                this.visit(new Column(f, [vector.getChildAt(i)!])))
+        };
+    }
+    public visitMap<T extends Map_>(vector: VType<T>) {
+        return {
+            'children': vector.type.children.map((f, i) =>
+                this.visit(new Column(f, [vector.getChildAt(i)!])))
+        };
+    }
+}
+
+/** @ignore */
+function* binaryToString(vector: Vector<Binary> | Vector<FixedSizeBinary>) {
+    for (const octets of vector as Iterable<Uint8Array>) {
+        yield octets.reduce((str, byte) => {
+            return `${str}${('0' + (byte & 0xFF).toString(16)).slice(-2)}`;
+        }, '').toUpperCase();
+    }
+}
+
+/** @ignore */
+function* bigNumsToStrings(values: Uint32Array | Int32Array, stride: number) {
+    for (let i = -1, n = values.length / stride; ++i < n;) {
+        yield `${BN.new(values.subarray((i + 0) * stride, (i + 1) * stride))}`;
+    }
+}
diff --git a/js/src/visitor/set.ts b/js/src/visitor/set.ts
new file mode 100644
index 0000000000000..a989c1b7492ad
--- /dev/null
+++ b/js/src/visitor/set.ts
@@ -0,0 +1,327 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data } from '../data';
+import { Visitor } from '../visitor';
+import { Vector } from '../interfaces';
+import { encodeUtf8 } from '../util/utf8';
+import { Type, UnionMode, Precision, DateUnit, TimeUnit, IntervalUnit } from '../enum';
+import {
+    DataType, Dictionary,
+    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Float, Float16, Float32, Float64,
+    Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
+    Date_, DateDay, DateMillisecond,
+    Interval, IntervalDayTime, IntervalYearMonth,
+    Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+    Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+    Union, DenseUnion, SparseUnion,
+} from '../type';
+
+export interface SetVisitor extends Visitor {
+    visit<T extends Vector>(node: T, index: number, value: T['TValue']): void;
+    visitMany<T extends Vector>(nodes: T[], indices: number[], values: T['TValue'][]): void[];
+    getVisitFn<T extends Type>(node: T): (vector: Vector<T>, index: number, value: Vector<T>['TValue']) => void;
+    getVisitFn<T extends DataType>(node: Vector<T> | Data<T> | T): (vector: Vector<T>, index: number, value: Vector<T>['TValue']) => void;
+    visitNull                 <T extends Null>                (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitBool                 <T extends Bool>                (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitInt                  <T extends Int>                 (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitInt8                 <T extends Int8>                (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitInt16                <T extends Int16>               (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitInt32                <T extends Int32>               (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitInt64                <T extends Int64>               (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitUint8                <T extends Uint8>               (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitUint16               <T extends Uint16>              (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitUint32               <T extends Uint32>              (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitUint64               <T extends Uint64>              (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitFloat                <T extends Float>               (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitFloat16              <T extends Float16>             (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitFloat32              <T extends Float32>             (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitFloat64              <T extends Float64>             (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitUtf8                 <T extends Utf8>                (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitBinary               <T extends Binary>              (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitFixedSizeBinary      <T extends FixedSizeBinary>     (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitDate                 <T extends Date_>               (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitDateDay              <T extends DateDay>             (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitDateMillisecond      <T extends DateMillisecond>     (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitTimestamp            <T extends Timestamp>           (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitTimestampSecond      <T extends TimestampSecond>     (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitTimestampMillisecond <T extends TimestampMillisecond>(vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitTimestampMicrosecond <T extends TimestampMicrosecond>(vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitTimestampNanosecond  <T extends TimestampNanosecond> (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitTime                 <T extends Time>                (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitTimeSecond           <T extends TimeSecond>          (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitTimeMillisecond      <T extends TimeMillisecond>     (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitTimeMicrosecond      <T extends TimeMicrosecond>     (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitTimeNanosecond       <T extends TimeNanosecond>      (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitDecimal              <T extends Decimal>             (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitList                 <T extends List>                (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitStruct               <T extends Struct>              (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitUnion                <T extends Union>               (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitDenseUnion           <T extends DenseUnion>          (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitSparseUnion          <T extends SparseUnion>         (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitDictionary           <T extends Dictionary>          (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitInterval             <T extends Interval>            (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitIntervalDayTime      <T extends IntervalDayTime>     (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitIntervalYearMonth    <T extends IntervalYearMonth>   (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitFixedSizeList        <T extends FixedSizeList>       (vector: Vector<T>, index: number, value: T['TValue']): void;
+    visitMap                  <T extends Map_>                (vector: Vector<T>, index: number, value: T['TValue']): void;
+}
+
+export class SetVisitor extends Visitor {}
+
+/** @ignore */
+const setEpochMsToDays = (data: Int32Array, index: number, epochMs: number) => { data[index] = (epochMs / 86400000) | 0; };
+/** @ignore */
+const setEpochMsToMillisecondsLong = (data: Int32Array, index: number, epochMs: number) => {
+    data[index] = (epochMs % 4294967296) | 0;
+    data[index + 1] = (epochMs / 4294967296) | 0;
+};
+/** @ignore */
+const setEpochMsToMicrosecondsLong = (data: Int32Array, index: number, epochMs: number) => {
+    data[index] = ((epochMs * 1000) % 4294967296) | 0;
+    data[index + 1] = ((epochMs * 1000) / 4294967296) | 0;
+};
+/** @ignore */
+const setEpochMsToNanosecondsLong = (data: Int32Array, index: number, epochMs: number) => {
+    data[index] = ((epochMs * 1000000) % 4294967296) | 0;
+    data[index + 1] = ((epochMs * 1000000) / 4294967296) | 0;
+};
+
+/** @ignore */
+const setVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array, index: number, value: Uint8Array) => {
+    const { [index]: x, [index + 1]: y } = valueOffsets;
+    if (x != null && y != null) {
+        values.set(value.subarray(0, y - x), x);
+    }
+};
+
+/** @ignore */
+const setBool = <T extends Bool>({ offset, values }: Vector<T>, index: number, val: boolean) => {
+    const idx = offset + index;
+    val ? (values[idx >> 3] |=  (1 << (idx % 8)))  // true
+        : (values[idx >> 3] &= ~(1 << (idx % 8))); // false
+
+};
+
+/** @ignore */ type Numeric1X = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32 | Float32 | Float64;
+/** @ignore */ type Numeric2X = Int64 | Uint64;
+
+/** @ignore */
+const setDateDay         = <T extends DateDay>        ({ values         }: Vector<T>, index: number, value: T['TValue']): void => { setEpochMsToDays(values, index, value.valueOf()); };
+/** @ignore */
+const setDateMillisecond = <T extends DateMillisecond>({ values         }: Vector<T>, index: number, value: T['TValue']): void => { setEpochMsToMillisecondsLong(values, index * 2, value.valueOf()); };
+/** @ignore */
+const setNumeric         = <T extends Numeric1X>      ({ stride, values }: Vector<T>, index: number, value: T['TValue']): void => { values[stride * index] = value; };
+/** @ignore */
+const setFloat16         = <T extends Float16>        ({ stride, values }: Vector<T>, index: number, value: T['TValue']): void => { values[stride * index] = (value * 32767) + 32767; };
+/** @ignore */
+const setNumericX2       = <T extends Numeric2X>      ({ stride, values }: Vector<T>, index: number, value: T['TValue']): void => { values.set(value.subarray(0, stride), stride * index); };
+/** @ignore */
+const setFixedSizeBinary = <T extends FixedSizeBinary>({ stride, values }: Vector<T>, index: number, value: T['TValue']): void => { values.set(value.subarray(0, stride), stride * index); };
+
+/** @ignore */
+const setBinary = <T extends Binary>({ values, valueOffsets }: Vector<T>, index: number, value: T['TValue']) => setVariableWidthBytes(values, valueOffsets, index, value);
+/** @ignore */
+const setUtf8 = <T extends Utf8>({ values, valueOffsets }: Vector<T>, index: number, value: T['TValue']) => {
+    setVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value));
+};
+
+/* istanbul ignore next */
+/** @ignore */
+const setInt = <T extends Int>(vector: Vector<T>, index: number, value: T['TValue']): void => {
+    vector.type.bitWidth < 64
+        ? setNumeric(<any> vector, index, value as Numeric1X['TValue'])
+        : setNumericX2(<any> vector, index, value as Numeric2X['TValue']);
+};
+
+/* istanbul ignore next */
+/** @ignore */
+const setFloat = <T extends Float>(vector: Vector<T>, index: number, value: T['TValue']): void => {
+    vector.type.precision !== Precision.HALF
+        ? setNumeric(<any> vector, index, value)
+        : setFloat16(<any> vector, index, value);
+};
+
+/* istanbul ignore next */
+const getDate = <T extends Date_> (vector: Vector<T>, index: number, value: T['TValue']): void => {
+    vector.type.unit === DateUnit.DAY
+        ? setDateDay(vector, index, value)
+        : setDateMillisecond(vector, index, value);
+};
+
+/** @ignore */
+const setTimestampSecond      = <T extends TimestampSecond>     ({ values }: Vector<T>, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value / 1000);
+/** @ignore */
+const setTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Vector<T>, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value);
+/** @ignore */
+const setTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Vector<T>, index: number, value: T['TValue']): void => setEpochMsToMicrosecondsLong(values, index * 2, value);
+/** @ignore */
+const setTimestampNanosecond  = <T extends TimestampNanosecond> ({ values }: Vector<T>, index: number, value: T['TValue']): void => setEpochMsToNanosecondsLong(values, index * 2, value);
+/* istanbul ignore next */
+/** @ignore */
+const setTimestamp            = <T extends Timestamp>(vector: Vector<T>, index: number, value: T['TValue']): void => {
+    switch (vector.type.unit) {
+        case TimeUnit.SECOND:      return      setTimestampSecond(vector as Vector<TimestampSecond>, index, value);
+        case TimeUnit.MILLISECOND: return setTimestampMillisecond(vector as Vector<TimestampMillisecond>, index, value);
+        case TimeUnit.MICROSECOND: return setTimestampMicrosecond(vector as Vector<TimestampMicrosecond>, index, value);
+        case TimeUnit.NANOSECOND:  return  setTimestampNanosecond(vector as Vector<TimestampNanosecond>, index, value);
+    }
+};
+
+/** @ignore */
+const setTimeSecond      = <T extends TimeSecond>     ({ values, stride }: Vector<T>, index: number, value: T['TValue']): void => { values[stride * index] = value; };
+/** @ignore */
+const setTimeMillisecond = <T extends TimeMillisecond>({ values, stride }: Vector<T>, index: number, value: T['TValue']): void => { values[stride * index] = value; };
+/** @ignore */
+const setTimeMicrosecond = <T extends TimeMicrosecond>({ values         }: Vector<T>, index: number, value: T['TValue']): void => { values.set(value.subarray(0, 2), 2 * index); };
+/** @ignore */
+const setTimeNanosecond  = <T extends TimeNanosecond> ({ values         }: Vector<T>, index: number, value: T['TValue']): void => { values.set(value.subarray(0, 2), 2 * index); };
+/* istanbul ignore next */
+/** @ignore */
+const setTime            = <T extends Time>(vector: Vector<T>, index: number, value: T['TValue']): void => {
+    switch (vector.type.unit) {
+        case TimeUnit.SECOND:      return      setTimeSecond(vector as Vector<TimeSecond>, index, value as TimeSecond['TValue']);
+        case TimeUnit.MILLISECOND: return setTimeMillisecond(vector as Vector<TimeMillisecond>, index, value as TimeMillisecond['TValue']);
+        case TimeUnit.MICROSECOND: return setTimeMicrosecond(vector as Vector<TimeMicrosecond>, index, value as TimeMicrosecond['TValue']);
+        case TimeUnit.NANOSECOND:  return  setTimeNanosecond(vector as Vector<TimeNanosecond>, index, value as TimeNanosecond['TValue']);
+    }
+};
+
+/** @ignore */
+const setDecimal = <T extends Decimal>({ values }: Vector<T>, index: number, value: T['TValue']): void => { values.set(value.subarray(0, 4), 4 * index); };
+
+/** @ignore */
+const setList = <T extends List>(vector: Vector<T>, index: number, value: T['TValue']): void => {
+    const values = vector.getChildAt(0)!;
+    const { valueOffsets, stride } = vector;
+    let idx = -1, offset = valueOffsets[index * stride];
+    let end = Math.min(value.length, valueOffsets[(index * stride) + 1] - offset);
+    while (offset < end) {
+        values.set(offset++, value.get(++idx));
+    }
+};
+
+/** @ignore */
+const setNested = <
+    S extends { [key: string]: DataType },
+    V extends Vector<Map_<S>> | Vector<Struct<S>>
+>(vector: V, index: number, value: V['TValue']) => {
+    vector.type.children.forEach(({ name }, idx) => {
+        const kid = vector.getChildAt(idx);
+        kid && kid.set(index, value[name]);
+    });
+};
+
+/* istanbul ignore next */
+/** @ignore */
+const setUnion = <
+    V extends Vector<Union> | Vector<DenseUnion> | Vector<SparseUnion>
+>(vector: V, index: number, value: V['TValue']) => {
+    vector.type.mode === UnionMode.Dense ?
+        setDenseUnion(vector as Vector<DenseUnion>, index, value) :
+        setSparseUnion(vector as Vector<SparseUnion>, index, value);
+};
+
+/** @ignore */
+const setDenseUnion = <T extends DenseUnion>(vector: Vector<T>, index: number, value: T['TValue']): void => {
+    const { typeIds, type: { typeIdToChildIndex } } = vector;
+    const child = vector.getChildAt(typeIdToChildIndex[typeIds[index]]);
+    child && child.set(vector.valueOffsets[index], value);
+};
+
+/** @ignore */
+const setSparseUnion = <T extends SparseUnion>(vector: Vector<T>, index: number, value: T['TValue']): void => {
+    const { typeIds, type: { typeIdToChildIndex } } = vector;
+    const child = vector.getChildAt(typeIdToChildIndex[typeIds[index]]);
+    child && child.set(index, value);
+};
+
+/** @ignore */
+const setDictionary = <T extends Dictionary>(vector: Vector<T>, index: number, value: T['TValue']): void => {
+    const key = vector.getKey(index);
+    if (key !== null) {
+        vector.setValue(key, value);
+    }
+};
+
+/* istanbul ignore next */
+/** @ignore */
+const setIntervalValue = <T extends Interval>(vector: Vector<T>, index: number, value: T['TValue']): void => {
+    (vector.type.unit === IntervalUnit.DAY_TIME)
+        ? setIntervalDayTime(vector, index, value)
+        : setIntervalYearMonth(vector, index, value);
+};
+
+/** @ignore */
+const setIntervalDayTime = <T extends IntervalDayTime>({ values }: Vector<T>, index: number, value: T['TValue']): void => { values.set(value.subarray(0, 2), 2 * index); };
+/** @ignore */
+const setIntervalYearMonth = <T extends IntervalYearMonth>({ values }: Vector<T>, index: number, value: T['TValue']): void => { values[index] = (value[0] * 12) + (value[1] % 12); };
+
+/** @ignore */
+const setFixedSizeList = <T extends FixedSizeList>(vector: Vector<T>, index: number, value: T['TValue']): void => {
+    const child = vector.getChildAt(0)!, { stride } = vector;
+    for (let idx = -1, offset = index * stride; ++idx < stride;) {
+        child.set(offset + idx, value.get(idx));
+    }
+};
+
+SetVisitor.prototype.visitBool                 =                 setBool;
+SetVisitor.prototype.visitInt                  =                  setInt;
+SetVisitor.prototype.visitInt8                 =              setNumeric;
+SetVisitor.prototype.visitInt16                =              setNumeric;
+SetVisitor.prototype.visitInt32                =              setNumeric;
+SetVisitor.prototype.visitInt64                =            setNumericX2;
+SetVisitor.prototype.visitUint8                =              setNumeric;
+SetVisitor.prototype.visitUint16               =              setNumeric;
+SetVisitor.prototype.visitUint32               =              setNumeric;
+SetVisitor.prototype.visitUint64               =            setNumericX2;
+SetVisitor.prototype.visitFloat                =                setFloat;
+SetVisitor.prototype.visitFloat16              =              setFloat16;
+SetVisitor.prototype.visitFloat32              =              setNumeric;
+SetVisitor.prototype.visitFloat64              =              setNumeric;
+SetVisitor.prototype.visitUtf8                 =                 setUtf8;
+SetVisitor.prototype.visitBinary               =               setBinary;
+SetVisitor.prototype.visitFixedSizeBinary      =      setFixedSizeBinary;
+SetVisitor.prototype.visitDate                 =                 getDate;
+SetVisitor.prototype.visitDateDay              =              setDateDay;
+SetVisitor.prototype.visitDateMillisecond      =      setDateMillisecond;
+SetVisitor.prototype.visitTimestamp            =            setTimestamp;
+SetVisitor.prototype.visitTimestampSecond      =      setTimestampSecond;
+SetVisitor.prototype.visitTimestampMillisecond = setTimestampMillisecond;
+SetVisitor.prototype.visitTimestampMicrosecond = setTimestampMicrosecond;
+SetVisitor.prototype.visitTimestampNanosecond  =  setTimestampNanosecond;
+SetVisitor.prototype.visitTime                 =                 setTime;
+SetVisitor.prototype.visitTimeSecond           =           setTimeSecond;
+SetVisitor.prototype.visitTimeMillisecond      =      setTimeMillisecond;
+SetVisitor.prototype.visitTimeMicrosecond      =      setTimeMicrosecond;
+SetVisitor.prototype.visitTimeNanosecond       =       setTimeNanosecond;
+SetVisitor.prototype.visitDecimal              =              setDecimal;
+SetVisitor.prototype.visitList                 =                 setList;
+SetVisitor.prototype.visitStruct               =               setNested;
+SetVisitor.prototype.visitUnion                =                setUnion;
+SetVisitor.prototype.visitDenseUnion           =           setDenseUnion;
+SetVisitor.prototype.visitSparseUnion          =          setSparseUnion;
+SetVisitor.prototype.visitDictionary           =           setDictionary;
+SetVisitor.prototype.visitInterval             =        setIntervalValue;
+SetVisitor.prototype.visitIntervalDayTime      =      setIntervalDayTime;
+SetVisitor.prototype.visitIntervalYearMonth    =    setIntervalYearMonth;
+SetVisitor.prototype.visitFixedSizeList        =        setFixedSizeList;
+SetVisitor.prototype.visitMap                  =               setNested;
+
+/** @ignore */
+export const instance = new SetVisitor();
diff --git a/js/src/visitor/toarray.ts b/js/src/visitor/toarray.ts
new file mode 100644
index 0000000000000..433055043986e
--- /dev/null
+++ b/js/src/visitor/toarray.ts
@@ -0,0 +1,152 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data } from '../data';
+import { Visitor } from '../visitor';
+import { Vector } from '../interfaces';
+import { Type, Precision } from '../enum';
+import { instance as iteratorVisitor } from './iterator';
+import {
+    DataType, Dictionary,
+    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Float, Float16, Float32, Float64,
+    Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
+    Date_, DateDay, DateMillisecond,
+    Interval, IntervalDayTime, IntervalYearMonth,
+    Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+    Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+    Union, DenseUnion, SparseUnion,
+} from '../type';
+
+export interface ToArrayVisitor extends Visitor {
+    visit<T extends Vector>(node: T): T['TArray'];
+    visitMany<T extends Vector>(nodes: T[]): T['TArray'][];
+    getVisitFn<T extends Type>(node: T): (vector: Vector<T>) => Vector<T>['TArray'];
+    getVisitFn<T extends DataType>(node: Vector<T> | Data<T> | T): (vector: Vector<T>) => Vector<T>['TArray'];
+    visitNull                                    <T extends Null>                (vector: Vector<T>): Vector<T>['TArray'];
+    visitBool                                    <T extends Bool>                (vector: Vector<T>): Vector<T>['TArray'];
+    visitInt                                     <T extends Int>                 (vector: Vector<T>): Vector<T>['TArray'];
+    visitInt8                                    <T extends Int8>                (vector: Vector<T>): Vector<T>['TArray'];
+    visitInt16                                   <T extends Int16>               (vector: Vector<T>): Vector<T>['TArray'];
+    visitInt32                                   <T extends Int32>               (vector: Vector<T>): Vector<T>['TArray'];
+    visitInt64                                   <T extends Int64>               (vector: Vector<T>): Vector<T>['TArray'];
+    visitUint8                                   <T extends Uint8>               (vector: Vector<T>): Vector<T>['TArray'];
+    visitUint16                                  <T extends Uint16>              (vector: Vector<T>): Vector<T>['TArray'];
+    visitUint32                                  <T extends Uint32>              (vector: Vector<T>): Vector<T>['TArray'];
+    visitUint64                                  <T extends Uint64>              (vector: Vector<T>): Vector<T>['TArray'];
+    visitFloat                                   <T extends Float>               (vector: Vector<T>): Vector<T>['TArray'];
+    visitFloat16                                 <T extends Float16>             (vector: Vector<T>): Vector<T>['TArray'];
+    visitFloat32                                 <T extends Float32>             (vector: Vector<T>): Vector<T>['TArray'];
+    visitFloat64                                 <T extends Float64>             (vector: Vector<T>): Vector<T>['TArray'];
+    visitUtf8                                    <T extends Utf8>                (vector: Vector<T>): Vector<T>['TArray'];
+    visitBinary                                  <T extends Binary>              (vector: Vector<T>): Vector<T>['TArray'];
+    visitFixedSizeBinary                         <T extends FixedSizeBinary>     (vector: Vector<T>): Vector<T>['TArray'];
+    visitDate                                    <T extends Date_>               (vector: Vector<T>): Vector<T>['TArray'];
+    visitDateDay                                 <T extends DateDay>             (vector: Vector<T>): Vector<T>['TArray'];
+    visitDateMillisecond                         <T extends DateMillisecond>     (vector: Vector<T>): Vector<T>['TArray'];
+    visitTimestamp                               <T extends Timestamp>           (vector: Vector<T>): Vector<T>['TArray'];
+    visitTimestampSecond                         <T extends TimestampSecond>     (vector: Vector<T>): Vector<T>['TArray'];
+    visitTimestampMillisecond                    <T extends TimestampMillisecond>(vector: Vector<T>): Vector<T>['TArray'];
+    visitTimestampMicrosecond                    <T extends TimestampMicrosecond>(vector: Vector<T>): Vector<T>['TArray'];
+    visitTimestampNanosecond                     <T extends TimestampNanosecond> (vector: Vector<T>): Vector<T>['TArray'];
+    visitTime                                    <T extends Time>                (vector: Vector<T>): Vector<T>['TArray'];
+    visitTimeSecond                              <T extends TimeSecond>          (vector: Vector<T>): Vector<T>['TArray'];
+    visitTimeMillisecond                         <T extends TimeMillisecond>     (vector: Vector<T>): Vector<T>['TArray'];
+    visitTimeMicrosecond                         <T extends TimeMicrosecond>     (vector: Vector<T>): Vector<T>['TArray'];
+    visitTimeNanosecond                          <T extends TimeNanosecond>      (vector: Vector<T>): Vector<T>['TArray'];
+    visitDecimal                                 <T extends Decimal>             (vector: Vector<T>): Vector<T>['TArray'];
+    visitList                <R extends DataType, T extends List<R>>             (vector: Vector<T>): Vector<T>['TArray'];
+    visitStruct                                  <T extends Struct>              (vector: Vector<T>): Vector<T>['TArray'];
+    visitUnion                                   <T extends Union>               (vector: Vector<T>): Vector<T>['TArray'];
+    visitDenseUnion                              <T extends DenseUnion>          (vector: Vector<T>): Vector<T>['TArray'];
+    visitSparseUnion                             <T extends SparseUnion>         (vector: Vector<T>): Vector<T>['TArray'];
+    visitDictionary          <R extends DataType, T extends Dictionary<R>>       (vector: Vector<T>): Vector<T>['TArray'];
+    visitInterval                                <T extends Interval>            (vector: Vector<T>): Vector<T>['TArray'];
+    visitIntervalDayTime                         <T extends IntervalDayTime>     (vector: Vector<T>): Vector<T>['TArray'];
+    visitIntervalYearMonth                       <T extends IntervalYearMonth>   (vector: Vector<T>): Vector<T>['TArray'];
+    visitFixedSizeList       <R extends DataType, T extends FixedSizeList<R>>    (vector: Vector<T>): Vector<T>['TArray'];
+    visitMap                                     <T extends Map_>                (vector: Vector<T>): Vector<T>['TArray'];
+}
+
+export class ToArrayVisitor extends Visitor {}
+
+/** @ignore */
+function arrayOfVector<T extends DataType>(vector: Vector<T>): T['TArray'] {
+
+    const { type, length, stride } = vector;
+
+    // Fast case, return subarray if possible
+    switch (type.typeId) {
+        case Type.Int: case Type.Decimal:
+        case Type.Time: case Type.Timestamp:
+            return vector.values.subarray(0, length * stride);
+        case Type.Float:
+            return (type as Float).precision === Precision.HALF /* Precision.HALF */
+                ? new Float32Array(vector[Symbol.iterator]())
+                : vector.values.subarray(0, length * stride);
+    }
+
+    // Otherwise if not primitive, slow copy
+    return [...iteratorVisitor.visit(vector)] as T['TArray'];
+}
+
+ToArrayVisitor.prototype.visitNull                 = arrayOfVector;
+ToArrayVisitor.prototype.visitBool                 = arrayOfVector;
+ToArrayVisitor.prototype.visitInt                  = arrayOfVector;
+ToArrayVisitor.prototype.visitInt8                 = arrayOfVector;
+ToArrayVisitor.prototype.visitInt16                = arrayOfVector;
+ToArrayVisitor.prototype.visitInt32                = arrayOfVector;
+ToArrayVisitor.prototype.visitInt64                = arrayOfVector;
+ToArrayVisitor.prototype.visitUint8                = arrayOfVector;
+ToArrayVisitor.prototype.visitUint16               = arrayOfVector;
+ToArrayVisitor.prototype.visitUint32               = arrayOfVector;
+ToArrayVisitor.prototype.visitUint64               = arrayOfVector;
+ToArrayVisitor.prototype.visitFloat                = arrayOfVector;
+ToArrayVisitor.prototype.visitFloat16              = arrayOfVector;
+ToArrayVisitor.prototype.visitFloat32              = arrayOfVector;
+ToArrayVisitor.prototype.visitFloat64              = arrayOfVector;
+ToArrayVisitor.prototype.visitUtf8                 = arrayOfVector;
+ToArrayVisitor.prototype.visitBinary               = arrayOfVector;
+ToArrayVisitor.prototype.visitFixedSizeBinary      = arrayOfVector;
+ToArrayVisitor.prototype.visitDate                 = arrayOfVector;
+ToArrayVisitor.prototype.visitDateDay              = arrayOfVector;
+ToArrayVisitor.prototype.visitDateMillisecond      = arrayOfVector;
+ToArrayVisitor.prototype.visitTimestamp            = arrayOfVector;
+ToArrayVisitor.prototype.visitTimestampSecond      = arrayOfVector;
+ToArrayVisitor.prototype.visitTimestampMillisecond = arrayOfVector;
+ToArrayVisitor.prototype.visitTimestampMicrosecond = arrayOfVector;
+ToArrayVisitor.prototype.visitTimestampNanosecond  = arrayOfVector;
+ToArrayVisitor.prototype.visitTime                 = arrayOfVector;
+ToArrayVisitor.prototype.visitTimeSecond           = arrayOfVector;
+ToArrayVisitor.prototype.visitTimeMillisecond      = arrayOfVector;
+ToArrayVisitor.prototype.visitTimeMicrosecond      = arrayOfVector;
+ToArrayVisitor.prototype.visitTimeNanosecond       = arrayOfVector;
+ToArrayVisitor.prototype.visitDecimal              = arrayOfVector;
+ToArrayVisitor.prototype.visitList                 = arrayOfVector;
+ToArrayVisitor.prototype.visitStruct               = arrayOfVector;
+ToArrayVisitor.prototype.visitUnion                = arrayOfVector;
+ToArrayVisitor.prototype.visitDenseUnion           = arrayOfVector;
+ToArrayVisitor.prototype.visitSparseUnion          = arrayOfVector;
+ToArrayVisitor.prototype.visitDictionary           = arrayOfVector;
+ToArrayVisitor.prototype.visitInterval             = arrayOfVector;
+ToArrayVisitor.prototype.visitIntervalDayTime      = arrayOfVector;
+ToArrayVisitor.prototype.visitIntervalYearMonth    = arrayOfVector;
+ToArrayVisitor.prototype.visitFixedSizeList        = arrayOfVector;
+ToArrayVisitor.prototype.visitMap                  = arrayOfVector;
+
+/** @ignore */
+export const instance = new ToArrayVisitor();
diff --git a/js/src/visitor/typeassembler.ts b/js/src/visitor/typeassembler.ts
new file mode 100644
index 0000000000000..0e2f8b7d96682
--- /dev/null
+++ b/js/src/visitor/typeassembler.ts
@@ -0,0 +1,155 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { flatbuffers } from 'flatbuffers';
+import Long = flatbuffers.Long;
+import Builder = flatbuffers.Builder;
+import * as Schema_ from '../fb/Schema';
+
+import * as type from '../type';
+import { Visitor } from '../visitor';
+
+import Null = Schema_.org.apache.arrow.flatbuf.Null;
+import Int = Schema_.org.apache.arrow.flatbuf.Int;
+import FloatingPoint = Schema_.org.apache.arrow.flatbuf.FloatingPoint;
+import Binary = Schema_.org.apache.arrow.flatbuf.Binary;
+import Bool = Schema_.org.apache.arrow.flatbuf.Bool;
+import Utf8 = Schema_.org.apache.arrow.flatbuf.Utf8;
+import Decimal = Schema_.org.apache.arrow.flatbuf.Decimal;
+import Date = Schema_.org.apache.arrow.flatbuf.Date;
+import Time = Schema_.org.apache.arrow.flatbuf.Time;
+import Timestamp = Schema_.org.apache.arrow.flatbuf.Timestamp;
+import Interval = Schema_.org.apache.arrow.flatbuf.Interval;
+import List = Schema_.org.apache.arrow.flatbuf.List;
+import Struct = Schema_.org.apache.arrow.flatbuf.Struct_;
+import Union = Schema_.org.apache.arrow.flatbuf.Union;
+import DictionaryEncoding = Schema_.org.apache.arrow.flatbuf.DictionaryEncoding;
+import FixedSizeBinary = Schema_.org.apache.arrow.flatbuf.FixedSizeBinary;
+import FixedSizeList = Schema_.org.apache.arrow.flatbuf.FixedSizeList;
+import Map_ = Schema_.org.apache.arrow.flatbuf.Map;
+
+export interface TypeAssembler extends Visitor {
+    visit<T extends type.DataType>(node: T, builder: Builder): number | undefined;
+}
+
+export class TypeAssembler extends Visitor {
+    public visit<T extends type.DataType>(node: T, builder: Builder): number | undefined {
+        return (node == null || builder == null) ? undefined : super.visit(node, builder);
+    }
+    public visitNull<T extends type.Null>(_node: T, b: Builder) {
+        Null.startNull(b);
+        return Null.endNull(b);
+    }
+    public visitInt<T extends type.Int>(node: T, b: Builder) {
+        Int.startInt(b);
+        Int.addBitWidth(b, node.bitWidth);
+        Int.addIsSigned(b, node.isSigned);
+        return Int.endInt(b);
+    }
+    public visitFloat<T extends type.Float>(node: T, b: Builder) {
+        FloatingPoint.startFloatingPoint(b);
+        FloatingPoint.addPrecision(b, node.precision);
+        return FloatingPoint.endFloatingPoint(b);
+    }
+    public visitBinary<T extends type.Binary>(_node: T, b: Builder) {
+        Binary.startBinary(b);
+        return Binary.endBinary(b);
+    }
+    public visitBool<T extends type.Bool>(_node: T, b: Builder) {
+        Bool.startBool(b);
+        return Bool.endBool(b);
+    }
+    public visitUtf8<T extends type.Utf8>(_node: T, b: Builder) {
+        Utf8.startUtf8(b);
+        return Utf8.endUtf8(b);
+    }
+    public visitDecimal<T extends type.Decimal>(node: T, b: Builder) {
+        Decimal.startDecimal(b);
+        Decimal.addScale(b, node.scale);
+        Decimal.addPrecision(b, node.precision);
+        return Decimal.endDecimal(b);
+    }
+    public visitDate<T extends type.Date_>(node: T, b: Builder) {
+        Date.startDate(b);
+        Date.addUnit(b, node.unit);
+        return Date.endDate(b);
+    }
+    public visitTime<T extends type.Time>(node: T, b: Builder) {
+        Time.startTime(b);
+        Time.addUnit(b, node.unit);
+        Time.addBitWidth(b, node.bitWidth);
+        return Time.endTime(b);
+    }
+    public visitTimestamp<T extends type.Timestamp>(node: T, b: Builder) {
+        const timezone = (node.timezone && b.createString(node.timezone)) || undefined;
+        Timestamp.startTimestamp(b);
+        Timestamp.addUnit(b, node.unit);
+        if (timezone !== undefined) {
+            Timestamp.addTimezone(b, timezone);
+        }
+        return Timestamp.endTimestamp(b);
+    }
+    public visitInterval<T extends type.Interval>(node: T, b: Builder) {
+        Interval.startInterval(b);
+        Interval.addUnit(b, node.unit);
+        return Interval.endInterval(b);
+    }
+    public visitList<T extends type.List>(_node: T, b: Builder) {
+        List.startList(b);
+        return List.endList(b);
+    }
+    public visitStruct<T extends type.Struct>(_node: T, b: Builder) {
+        Struct.startStruct_(b);
+        return Struct.endStruct_(b);
+    }
+    public visitUnion<T extends type.Union>(node: T, b: Builder) {
+        Union.startTypeIdsVector(b, node.typeIds.length);
+        const typeIds = Union.createTypeIdsVector(b, node.typeIds);
+        Union.startUnion(b);
+        Union.addMode(b, node.mode);
+        Union.addTypeIds(b, typeIds);
+        return Union.endUnion(b);
+    }
+    public visitDictionary<T extends type.Dictionary>(node: T, b: Builder) {
+        const indexType = this.visit(node.indices, b);
+        DictionaryEncoding.startDictionaryEncoding(b);
+        DictionaryEncoding.addId(b, new Long(node.id, 0));
+        DictionaryEncoding.addIsOrdered(b, node.isOrdered);
+        if (indexType !== undefined) {
+            DictionaryEncoding.addIndexType(b, indexType);
+        }
+        return DictionaryEncoding.endDictionaryEncoding(b);
+    }
+    public visitFixedSizeBinary<T extends type.FixedSizeBinary>(node: T, b: Builder) {
+        FixedSizeBinary.startFixedSizeBinary(b);
+        FixedSizeBinary.addByteWidth(b, node.byteWidth);
+        return FixedSizeBinary.endFixedSizeBinary(b);
+    }
+    public visitFixedSizeList<T extends type.FixedSizeList>(node: T, b: Builder) {
+        FixedSizeList.startFixedSizeList(b);
+        FixedSizeList.addListSize(b, node.listSize);
+        return FixedSizeList.endFixedSizeList(b);
+    }
+    public visitMap<T extends type.Map_>(node: T, b: Builder) {
+        Map_.startMap(b);
+        Map_.addKeysSorted(b, node.keysSorted);
+        return Map_.endMap(b);
+    }
+}
+
+/** @ignore */
+export const instance = new TypeAssembler();
diff --git a/js/src/visitor/typector.ts b/js/src/visitor/typector.ts
new file mode 100644
index 0000000000000..2286475dbfb43
--- /dev/null
+++ b/js/src/visitor/typector.ts
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data } from '../data';
+import { Type } from '../enum';
+import * as type from '../type';
+import { DataType } from '../type';
+import { Visitor } from '../visitor';
+import { Vector } from '../interfaces';
+import { DataTypeCtor } from '../interfaces';
+
+export interface GetDataTypeConstructor extends Visitor {
+    visit<T extends Type>(node: T): DataTypeCtor<T>;
+    visitMany<T extends Type>(nodes: T[]): DataTypeCtor<T>[];
+    getVisitFn<T extends Type>(node: T): () => DataTypeCtor<T>;
+    getVisitFn<T extends DataType>(node: Vector<T> |  Data<T> | T): () => DataTypeCtor<T>;
+}
+
+export class GetDataTypeConstructor extends Visitor {
+    public visitNull                 () { return type.Null; }
+    public visitBool                 () { return type.Bool; }
+    public visitInt                  () { return type.Int; }
+    public visitInt8                 () { return type.Int8; }
+    public visitInt16                () { return type.Int16; }
+    public visitInt32                () { return type.Int32; }
+    public visitInt64                () { return type.Int64; }
+    public visitUint8                () { return type.Uint8; }
+    public visitUint16               () { return type.Uint16; }
+    public visitUint32               () { return type.Uint32; }
+    public visitUint64               () { return type.Uint64; }
+    public visitFloat                () { return type.Float; }
+    public visitFloat16              () { return type.Float16; }
+    public visitFloat32              () { return type.Float32; }
+    public visitFloat64              () { return type.Float64; }
+    public visitUtf8                 () { return type.Utf8; }
+    public visitBinary               () { return type.Binary; }
+    public visitFixedSizeBinary      () { return type.FixedSizeBinary; }
+    public visitDate                 () { return type.Date_; }
+    public visitDateDay              () { return type.DateDay; }
+    public visitDateMillisecond      () { return type.DateMillisecond; }
+    public visitTimestamp            () { return type.Timestamp; }
+    public visitTimestampSecond      () { return type.TimestampSecond; }
+    public visitTimestampMillisecond () { return type.TimestampMillisecond; }
+    public visitTimestampMicrosecond () { return type.TimestampMicrosecond; }
+    public visitTimestampNanosecond  () { return type.TimestampNanosecond; }
+    public visitTime                 () { return type.Time; }
+    public visitTimeSecond           () { return type.TimeSecond; }
+    public visitTimeMillisecond      () { return type.TimeMillisecond; }
+    public visitTimeMicrosecond      () { return type.TimeMicrosecond; }
+    public visitTimeNanosecond       () { return type.TimeNanosecond; }
+    public visitDecimal              () { return type.Decimal; }
+    public visitList                 () { return type.List; }
+    public visitStruct               () { return type.Struct; }
+    public visitUnion                () { return type.Union; }
+    public visitDenseUnion           () { return type.DenseUnion; }
+    public visitSparseUnion          () { return type.SparseUnion; }
+    public visitDictionary           () { return type.Dictionary; }
+    public visitInterval             () { return type.Interval; }
+    public visitIntervalDayTime      () { return type.IntervalDayTime; }
+    public visitIntervalYearMonth    () { return type.IntervalYearMonth; }
+    public visitFixedSizeList        () { return type.FixedSizeList; }
+    public visitMap                  () { return type.Map_; }
+}
+
+/** @ignore */
+export const instance = new GetDataTypeConstructor();
diff --git a/js/src/visitor/vectorassembler.ts b/js/src/visitor/vectorassembler.ts
new file mode 100644
index 0000000000000..ef5868398d4d5
--- /dev/null
+++ b/js/src/visitor/vectorassembler.ts
@@ -0,0 +1,230 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data } from '../data';
+import { Vector } from '../vector';
+import { Visitor } from '../visitor';
+import { Type, UnionMode } from '../enum';
+import { RecordBatch } from '../recordbatch';
+import { Vector as VType } from '../interfaces';
+import { rebaseValueOffsets } from '../util/buffer';
+import { packBools, truncateBitmap } from '../util/bit';
+import { BufferRegion, FieldNode } from '../ipc/metadata/message';
+import {
+    DataType, Dictionary,
+    Float, Int, Date_, Interval, Time, Timestamp, Union,
+    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+} from '../type';
+
+export interface VectorAssembler extends Visitor {
+    visit<T extends Vector>(node: T): this;
+    visitMany<T extends Vector>(nodes: T[]): this[];
+    getVisitFn<T extends Type>(node: T): (vector: VType<T>) => this;
+    getVisitFn<T extends DataType>(node: VType<T> | Data<T> | T): (vector: VType<T>) => this;
+
+    visitBool                 <T extends Bool>            (vector: VType<T>): this;
+    visitInt                  <T extends Int>             (vector: VType<T>): this;
+    visitFloat                <T extends Float>           (vector: VType<T>): this;
+    visitUtf8                 <T extends Utf8>            (vector: VType<T>): this;
+    visitBinary               <T extends Binary>          (vector: VType<T>): this;
+    visitFixedSizeBinary      <T extends FixedSizeBinary> (vector: VType<T>): this;
+    visitDate                 <T extends Date_>           (vector: VType<T>): this;
+    visitTimestamp            <T extends Timestamp>       (vector: VType<T>): this;
+    visitTime                 <T extends Time>            (vector: VType<T>): this;
+    visitDecimal              <T extends Decimal>         (vector: VType<T>): this;
+    visitList                 <T extends List>            (vector: VType<T>): this;
+    visitStruct               <T extends Struct>          (vector: VType<T>): this;
+    visitUnion                <T extends Union>           (vector: VType<T>): this;
+    visitInterval             <T extends Interval>        (vector: VType<T>): this;
+    visitFixedSizeList        <T extends FixedSizeList>   (vector: VType<T>): this;
+    visitMap                  <T extends Map_>            (vector: VType<T>): this;
+}
+
+export class VectorAssembler extends Visitor {
+
+    /** @nocollapse */
+    public static assemble<T extends Vector | RecordBatch>(...args: (T | T[])[]) {
+
+        const vectors = args.reduce(function flatten(xs: any[], x: any): any[] {
+            if (Array.isArray(x)) { return x.reduce(flatten, xs); }
+            if (!(x instanceof RecordBatch)) { return [...xs, x]; }
+            return [...xs, ...x.schema.fields.map((_, i) => x.getChildAt(i)!)];
+        }, []).filter((x: any): x is Vector => x instanceof Vector);
+
+        return new VectorAssembler().visitMany(vectors)[0];
+    }
+
+    private constructor() { super(); }
+
+    public visit<T extends Vector>(vector: T): this {
+        if (!DataType.isDictionary(vector.type)) {
+            const { data, length, nullCount } = vector;
+            if (length > 2147483647) {
+                /* istanbul ignore next */
+                throw new RangeError('Cannot write arrays larger than 2^31 - 1 in length');
+            }
+            addBuffer.call(this, nullCount <= 0
+                ? new Uint8Array(0) // placeholder validity buffer
+                : truncateBitmap(data.offset, length, data.nullBitmap)
+            ).nodes.push(new FieldNode(length, nullCount));
+        }
+        return super.visit(vector);
+    }
+
+    public visitNull<T extends Null>(_nullV: VType<T>) { return this; }
+    public visitDictionary<T extends Dictionary>(vector: VType<T>) {
+        // Assemble the indices here, Dictionary assembled separately.
+        return this.visit(vector.indices);
+    }
+
+    public get nodes() { return this._nodes; }
+    public get buffers() { return this._buffers; }
+    public get byteLength() { return this._byteLength; }
+    public get bufferRegions() { return this._bufferRegions; }
+
+    protected _byteLength = 0;
+    protected _nodes: FieldNode[] = [];
+    protected _buffers: ArrayBufferView[] = [];
+    protected _bufferRegions: BufferRegion[] = [];
+}
+
+/** @ignore */
+function addBuffer(this: VectorAssembler, values: ArrayBufferView) {
+    const byteLength = (values.byteLength + 7) & ~7; // Round up to a multiple of 8
+    this.buffers.push(values);
+    this.bufferRegions.push(new BufferRegion(this._byteLength, byteLength));
+    this._byteLength += byteLength;
+    return this;
+}
+
+/** @ignore */
+function assembleUnion<T extends Union>(this: VectorAssembler, vector: VType<T>) {
+    const { type, length, typeIds, valueOffsets } = vector;
+    // All Union Vectors have a typeIds buffer
+    addBuffer.call(this, typeIds);
+    // If this is a Sparse Union, treat it like all other Nested types
+    if (type.mode === UnionMode.Sparse) {
+        return assembleNestedVector.call(this, vector);
+    } else if (type.mode === UnionMode.Dense) {
+        // If this is a Dense Union, add the valueOffsets buffer and potentially slice the children
+        if (vector.offset <= 0) {
+            // If the Vector hasn't been sliced, write the existing valueOffsets
+            addBuffer.call(this, valueOffsets);
+            // We can treat this like all other Nested types
+            return assembleNestedVector.call(this, vector);
+        } else {
+            // A sliced Dense Union is an unpleasant case. Because the offsets are different for
+            // each child vector, we need to "rebase" the valueOffsets for each child
+            // Union typeIds are not necessary 0-indexed
+            const maxChildTypeId = typeIds.reduce((x, y) => Math.max(x, y), typeIds[0]);
+            const childLengths = new Int32Array(maxChildTypeId + 1);
+            // Set all to -1 to indicate that we haven't observed a first occurrence of a particular child yet
+            const childOffsets = new Int32Array(maxChildTypeId + 1).fill(-1);
+            const shiftedOffsets = new Int32Array(length);
+            // If we have a non-zero offset, then the value offsets do not start at
+            // zero. We must a) create a new offsets array with shifted offsets and
+            // b) slice the values array accordingly
+            const unshiftedOffsets = rebaseValueOffsets(-valueOffsets[0], length, valueOffsets);
+            for (let typeId, shift, index = -1; ++index < length;) {
+                if ((shift = childOffsets[typeId = typeIds[index]]) === -1) {
+                    shift = childOffsets[typeId] = unshiftedOffsets[typeId];
+                }
+                shiftedOffsets[index] = unshiftedOffsets[index] - shift;
+                ++childLengths[typeId];
+            }
+            addBuffer.call(this, shiftedOffsets);
+            // Slice and visit children accordingly
+            for (let child: Vector | null, childIndex = -1, numChildren = type.children.length; ++childIndex < numChildren;) {
+                if (child = vector.getChildAt(childIndex)) {
+                    const typeId = type.typeIds[childIndex];
+                    const childLength = Math.min(length, childLengths[typeId]);
+                    this.visit(child.slice(childOffsets[typeId], childLength));
+                }
+            }
+        }
+    }
+    return this;
+}
+
+/** @ignore */
+function assembleBoolVector<T extends Bool>(this: VectorAssembler, vector: VType<T>) {
+    // Bool vector is a special case of FlatVector, as its data buffer needs to stay packed
+    let values: Uint8Array;
+    if (vector.nullCount >= vector.length) {
+        // If all values are null, just insert a placeholder empty data buffer (fastest path)
+        return addBuffer.call(this, new Uint8Array(0));
+    } else if ((values = vector.values) instanceof Uint8Array) {
+        // If values is already a Uint8Array, slice the bitmap (fast path)
+        return addBuffer.call(this, truncateBitmap(vector.offset, vector.length, values));
+    }
+    // Otherwise if the underlying data *isn't* a Uint8Array, enumerate the
+    // values as bools and re-pack them into a Uint8Array. This code isn't
+    // reachable unless you're trying to manipulate the Data internals,
+    // we we're only doing this for safety.
+    /* istanbul ignore next */
+    return addBuffer.call(this, packBools(vector));
+}
+
+/** @ignore */
+function assembleFlatVector<T extends Int | Float | FixedSizeBinary | Date_ | Timestamp | Time | Decimal | Interval>(this: VectorAssembler, vector: VType<T>) {
+    return addBuffer.call(this, vector.values.subarray(0, vector.length * vector.stride));
+}
+
+/** @ignore */
+function assembleFlatListVector<T extends Utf8 | Binary>(this: VectorAssembler, vector: VType<T>) {
+    const { length, values, valueOffsets } = vector;
+    const firstOffset = valueOffsets[0];
+    const lastOffset = valueOffsets[length];
+    const byteLength = Math.min(lastOffset - firstOffset, values.byteLength - firstOffset);
+    // Push in the order FlatList types read their buffers
+    addBuffer.call(this, rebaseValueOffsets(-valueOffsets[0], length, valueOffsets)); // valueOffsets buffer first
+    addBuffer.call(this, values.subarray(firstOffset, firstOffset + byteLength)); // sliced values buffer second
+    return this;
+}
+
+/** @ignore */
+function assembleListVector<T extends List | FixedSizeList>(this: VectorAssembler, vector: VType<T>) {
+    const { length, valueOffsets } = vector;
+    // If we have valueOffsets (ListVector), push that buffer first
+    if (valueOffsets) {
+        addBuffer.call(this, rebaseValueOffsets(valueOffsets[0], length, valueOffsets));
+    }
+    // Then insert the List's values child
+    return this.visit(vector.getChildAt(0)!);
+}
+
+/** @ignore */
+function assembleNestedVector<T extends Struct | Map_ | Union>(this: VectorAssembler, vector: VType<T>) {
+    return this.visitMany(vector.type.children.map((_, i) => vector.getChildAt(i)!).filter(Boolean))[0];
+}
+
+VectorAssembler.prototype.visitBool            =     assembleBoolVector;
+VectorAssembler.prototype.visitInt             =     assembleFlatVector;
+VectorAssembler.prototype.visitFloat           =     assembleFlatVector;
+VectorAssembler.prototype.visitUtf8            = assembleFlatListVector;
+VectorAssembler.prototype.visitBinary          = assembleFlatListVector;
+VectorAssembler.prototype.visitFixedSizeBinary =     assembleFlatVector;
+VectorAssembler.prototype.visitDate            =     assembleFlatVector;
+VectorAssembler.prototype.visitTimestamp       =     assembleFlatVector;
+VectorAssembler.prototype.visitTime            =     assembleFlatVector;
+VectorAssembler.prototype.visitDecimal         =     assembleFlatVector;
+VectorAssembler.prototype.visitList            =     assembleListVector;
+VectorAssembler.prototype.visitStruct          =   assembleNestedVector;
+VectorAssembler.prototype.visitUnion           =          assembleUnion;
+VectorAssembler.prototype.visitInterval        =     assembleFlatVector;
+VectorAssembler.prototype.visitFixedSizeList   =     assembleListVector;
+VectorAssembler.prototype.visitMap             =   assembleNestedVector;
diff --git a/js/src/visitor/vectorctor.ts b/js/src/visitor/vectorctor.ts
new file mode 100644
index 0000000000000..1aeec5004d63d
--- /dev/null
+++ b/js/src/visitor/vectorctor.ts
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data } from '../data';
+import { Type } from '../enum';
+import { DataType } from '../type';
+import { Visitor } from '../visitor';
+import { Vector, VectorCtor } from '../interfaces';
+
+import { BinaryVector } from '../vector/binary';
+import { BoolVector } from '../vector/bool';
+import { DateVector, DateDayVector, DateMillisecondVector } from '../vector/date';
+import { DecimalVector } from '../vector/decimal';
+import { DictionaryVector } from '../vector/dictionary';
+import { FixedSizeBinaryVector } from '../vector/fixedsizebinary';
+import { FixedSizeListVector } from '../vector/fixedsizelist';
+import { FloatVector, Float16Vector, Float32Vector, Float64Vector } from '../vector/float';
+import { IntervalVector, IntervalDayTimeVector, IntervalYearMonthVector } from '../vector/interval';
+import { IntVector, Int8Vector, Int16Vector, Int32Vector, Int64Vector, Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector } from '../vector/int';
+import { ListVector } from '../vector/list';
+import { MapVector } from '../vector/map';
+import { NullVector } from '../vector/null';
+import { StructVector } from '../vector/struct';
+import { TimestampVector, TimestampSecondVector, TimestampMillisecondVector, TimestampMicrosecondVector, TimestampNanosecondVector } from '../vector/timestamp';
+import { TimeVector, TimeSecondVector, TimeMillisecondVector, TimeMicrosecondVector, TimeNanosecondVector } from '../vector/time';
+import { UnionVector, DenseUnionVector, SparseUnionVector } from '../vector/union';
+import { Utf8Vector } from '../vector/utf8';
+
+export interface GetVectorConstructor extends Visitor {
+    visit<T extends Type>(node: T): VectorCtor<T>;
+    visitMany <T extends Type>(nodes: T[]): VectorCtor<T>[];
+    getVisitFn<T extends Type>(node: T): () => VectorCtor<T>;
+    getVisitFn<T extends DataType>(node: Vector<T> | Data<T> | T): () => VectorCtor<T>;
+}
+
+export class GetVectorConstructor extends Visitor {
+    public visitNull                 () { return NullVector; }
+    public visitBool                 () { return BoolVector; }
+    public visitInt                  () { return IntVector; }
+    public visitInt8                 () { return Int8Vector; }
+    public visitInt16                () { return Int16Vector; }
+    public visitInt32                () { return Int32Vector; }
+    public visitInt64                () { return Int64Vector; }
+    public visitUint8                () { return Uint8Vector; }
+    public visitUint16               () { return Uint16Vector; }
+    public visitUint32               () { return Uint32Vector; }
+    public visitUint64               () { return Uint64Vector; }
+    public visitFloat                () { return FloatVector; }
+    public visitFloat16              () { return Float16Vector; }
+    public visitFloat32              () { return Float32Vector; }
+    public visitFloat64              () { return Float64Vector; }
+    public visitUtf8                 () { return Utf8Vector; }
+    public visitBinary               () { return BinaryVector; }
+    public visitFixedSizeBinary      () { return FixedSizeBinaryVector; }
+    public visitDate                 () { return DateVector; }
+    public visitDateDay              () { return DateDayVector; }
+    public visitDateMillisecond      () { return DateMillisecondVector; }
+    public visitTimestamp            () { return TimestampVector; }
+    public visitTimestampSecond      () { return TimestampSecondVector; }
+    public visitTimestampMillisecond () { return TimestampMillisecondVector; }
+    public visitTimestampMicrosecond () { return TimestampMicrosecondVector; }
+    public visitTimestampNanosecond  () { return TimestampNanosecondVector; }
+    public visitTime                 () { return TimeVector; }
+    public visitTimeSecond           () { return TimeSecondVector; }
+    public visitTimeMillisecond      () { return TimeMillisecondVector; }
+    public visitTimeMicrosecond      () { return TimeMicrosecondVector; }
+    public visitTimeNanosecond       () { return TimeNanosecondVector; }
+    public visitDecimal              () { return DecimalVector; }
+    public visitList                 () { return ListVector; }
+    public visitStruct               () { return StructVector; }
+    public visitUnion                () { return UnionVector; }
+    public visitDenseUnion           () { return DenseUnionVector; }
+    public visitSparseUnion          () { return SparseUnionVector; }
+    public visitDictionary           () { return DictionaryVector; }
+    public visitInterval             () { return IntervalVector; }
+    public visitIntervalDayTime      () { return IntervalDayTimeVector; }
+    public visitIntervalYearMonth    () { return IntervalYearMonthVector; }
+    public visitFixedSizeList        () { return FixedSizeListVector; }
+    public visitMap                  () { return MapVector; }
+}
+
+/** @ignore */
+export const instance = new GetVectorConstructor();
diff --git a/js/src/visitor/vectorloader.ts b/js/src/visitor/vectorloader.ts
new file mode 100644
index 0000000000000..dad8f0c59f592
--- /dev/null
+++ b/js/src/visitor/vectorloader.ts
@@ -0,0 +1,132 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data } from '../data';
+import * as type from '../type';
+import { Field } from '../schema';
+import { DataType } from '../type';
+import { Visitor } from '../visitor';
+import { packBools } from '../util/bit';
+import { encodeUtf8 } from '../util/utf8';
+import { Int64, Int128 } from '../util/int';
+import { UnionMode, DateUnit } from '../enum';
+import { toArrayBufferView } from '../util/buffer';
+import { BufferRegion, FieldNode } from '../ipc/metadata/message';
+
+export interface VectorLoader extends Visitor {
+    visit<T extends DataType>(node: Field<T> | T): Data<T>;
+    visitMany<T extends DataType>(nodes: (Field<T> | T)[]): Data<T>[];
+}
+
+export class VectorLoader extends Visitor {
+    private bytes: Uint8Array;
+    private nodes: FieldNode[];
+    private nodesIndex: number = -1;
+    private buffers: BufferRegion[];
+    private buffersIndex: number = -1;
+    constructor(bytes: Uint8Array, nodes: FieldNode[], buffers: BufferRegion[]) {
+        super();
+        this.bytes = bytes;
+        this.nodes = nodes;
+        this.buffers = buffers;
+    }
+
+    public visit<T extends DataType>(node: Field<T> | T): Data<T> {
+        return super.visit(node instanceof Field ? node.type : node);
+    }
+
+    public visitNull            <T extends type.Null>            (type: T, { length, nullCount } = this.nextFieldNode()) { return            Data.Null(type, 0, length, nullCount, this.readNullBitmap(type, nullCount));                                                                                }
+    public visitBool            <T extends type.Bool>            (type: T, { length, nullCount } = this.nextFieldNode()) { return            Data.Bool(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type));                                                           }
+    public visitInt             <T extends type.Int>             (type: T, { length, nullCount } = this.nextFieldNode()) { return             Data.Int(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type));                                                           }
+    public visitFloat           <T extends type.Float>           (type: T, { length, nullCount } = this.nextFieldNode()) { return           Data.Float(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type));                                                           }
+    public visitUtf8            <T extends type.Utf8>            (type: T, { length, nullCount } = this.nextFieldNode()) { return            Data.Utf8(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readOffsets(type), this.readData(type));                                   }
+    public visitBinary          <T extends type.Binary>          (type: T, { length, nullCount } = this.nextFieldNode()) { return          Data.Binary(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readOffsets(type), this.readData(type));                                   }
+    public visitFixedSizeBinary <T extends type.FixedSizeBinary> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.FixedSizeBinary(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type));                                                           }
+    public visitDate            <T extends type.Date_>           (type: T, { length, nullCount } = this.nextFieldNode()) { return            Data.Date(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type));                                                           }
+    public visitTimestamp       <T extends type.Timestamp>       (type: T, { length, nullCount } = this.nextFieldNode()) { return       Data.Timestamp(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type));                                                           }
+    public visitTime            <T extends type.Time>            (type: T, { length, nullCount } = this.nextFieldNode()) { return            Data.Time(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type));                                                           }
+    public visitDecimal         <T extends type.Decimal>         (type: T, { length, nullCount } = this.nextFieldNode()) { return         Data.Decimal(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type));                                                           }
+    public visitList            <T extends type.List>            (type: T, { length, nullCount } = this.nextFieldNode()) { return            Data.List(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readOffsets(type), this.visit(type.children[0]));                          }
+    public visitStruct          <T extends type.Struct>          (type: T, { length, nullCount } = this.nextFieldNode()) { return          Data.Struct(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.visitMany(type.children));                                                 }
+    public visitUnion           <T extends type.Union>           (type: T                                              ) { return type.mode === UnionMode.Sparse ? this.visitSparseUnion(type as type.SparseUnion) : this.visitDenseUnion(type as type.DenseUnion);                                      }
+    public visitDenseUnion      <T extends type.DenseUnion>      (type: T, { length, nullCount } = this.nextFieldNode()) { return           Data.Union(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readTypeIds(type), this.readOffsets(type), this.visitMany(type.children)); }
+    public visitSparseUnion     <T extends type.SparseUnion>     (type: T, { length, nullCount } = this.nextFieldNode()) { return           Data.Union(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readTypeIds(type), this.visitMany(type.children));                         }
+    public visitDictionary      <T extends type.Dictionary>      (type: T, { length, nullCount } = this.nextFieldNode()) { return      Data.Dictionary(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type.indices));                                                   }
+    public visitInterval        <T extends type.Interval>        (type: T, { length, nullCount } = this.nextFieldNode()) { return        Data.Interval(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type));                                                           }
+    public visitFixedSizeList   <T extends type.FixedSizeList>   (type: T, { length, nullCount } = this.nextFieldNode()) { return   Data.FixedSizeList(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.visit(type.children[0]));                                                  }
+    public visitMap             <T extends type.Map_>            (type: T, { length, nullCount } = this.nextFieldNode()) { return             Data.Map(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.visitMany(type.children));                                                 }
+
+    protected nextFieldNode() { return this.nodes[++this.nodesIndex]; }
+    protected nextBufferRange() { return this.buffers[++this.buffersIndex]; }
+    protected readNullBitmap<T extends DataType>(type: T, nullCount: number, buffer = this.nextBufferRange()) {
+        return nullCount > 0 && this.readData(type, buffer) || new Uint8Array(0);
+    }
+    protected readOffsets<T extends DataType>(type: T, buffer?: BufferRegion) { return this.readData(type, buffer); }
+    protected readTypeIds<T extends DataType>(type: T, buffer?: BufferRegion) { return this.readData(type, buffer); }
+    protected readData<T extends DataType>(_type: T, { length, offset } = this.nextBufferRange()) {
+        return this.bytes.subarray(offset, offset + length);
+    }
+}
+
+export class JSONVectorLoader extends VectorLoader {
+    private sources: any[][];
+    constructor(sources: any[][], nodes: FieldNode[], buffers: BufferRegion[]) {
+        super(new Uint8Array(0), nodes, buffers);
+        this.sources = sources;
+    }
+    protected readNullBitmap<T extends DataType>(_type: T, nullCount: number, { offset } = this.nextBufferRange()) {
+        return nullCount <= 0 ? new Uint8Array(0) : packBools(this.sources[offset]);
+    }
+    protected readOffsets<T extends DataType>(_type: T, { offset } = this.nextBufferRange()) {
+        return toArrayBufferView(Uint8Array, toArrayBufferView(Int32Array, this.sources[offset]));
+    }
+    protected readTypeIds<T extends DataType>(type: T, { offset } = this.nextBufferRange()) {
+        return toArrayBufferView(Uint8Array, toArrayBufferView(type.ArrayType, this.sources[offset]));
+    }
+    protected readData<T extends DataType>(type: T, { offset } = this.nextBufferRange()) {
+        const { sources } = this;
+        if (DataType.isTimestamp(type)) {
+            return toArrayBufferView(Uint8Array, Int64.convertArray(sources[offset] as string[]));
+        } else if ((DataType.isInt(type) || DataType.isTime(type)) && type.bitWidth === 64) {
+            return toArrayBufferView(Uint8Array, Int64.convertArray(sources[offset] as string[]));
+        } else if (DataType.isDate(type) && type.unit === DateUnit.MILLISECOND) {
+            return toArrayBufferView(Uint8Array, Int64.convertArray(sources[offset] as string[]));
+        } else if (DataType.isDecimal(type)) {
+            return toArrayBufferView(Uint8Array, Int128.convertArray(sources[offset] as string[]));
+        } else if (DataType.isBinary(type) || DataType.isFixedSizeBinary(type)) {
+            return binaryDataFromJSON(sources[offset] as string[]);
+        } else if (DataType.isBool(type)) {
+            return packBools(sources[offset] as number[]);
+        } else if (DataType.isUtf8(type)) {
+            return encodeUtf8((sources[offset] as string[]).join(''));
+        }
+        return toArrayBufferView(Uint8Array, toArrayBufferView(type.ArrayType, sources[offset].map((x) => +x)));
+    }
+}
+
+/** @ignore */
+function binaryDataFromJSON(values: string[]) {
+    // "DATA": ["49BC7D5B6C47D2","3F5FB6D9322026"]
+    // There are definitely more efficient ways to do this... but it gets the
+    // job done.
+    const joined = values.join('');
+    const data = new Uint8Array(joined.length / 2);
+    for (let i = 0; i < joined.length; i += 2) {
+        data[i >> 1] = parseInt(joined.substr(i, 2), 16);
+    }
+    return data;
+}
diff --git a/js/test/Arrow.ts b/js/test/Arrow.ts
index 4aac952f4528f..a72e829823f54 100644
--- a/js/test/Arrow.ts
+++ b/js/test/Arrow.ts
@@ -18,6 +18,12 @@
 /* tslint:disable */
 // Dynamically load an Arrow target build based on command line arguments
 
+import '@mattiasbuelens/web-streams-polyfill';
+
+/* tslint:disable */
+// import this before assigning window global since it does a `typeof window` check
+require('web-stream-tools');
+
 (<any> global).window = (<any> global).window || global;
 
 // Fix for Jest in node v10.x
@@ -29,37 +35,23 @@ Object.defineProperty(ArrayBuffer, Symbol.hasInstance, {
     }
 });
 
-const path = require('path');
-const target = process.env.TEST_TARGET!;
-const format = process.env.TEST_MODULE!;
-const useSrc = process.env.TEST_TS_SOURCE === `true`;
-
 // these are duplicated in the gulpfile :<
 const targets = [`es5`, `es2015`, `esnext`];
 const formats = [`cjs`, `esm`, `cls`, `umd`];
 
-function throwInvalidImportError(name: string, value: string, values: string[]) {
-    throw new Error('Unrecognized ' + name + ' \'' + value + '\'. Please run tests with \'--' + name + ' <any of ' + values.join(', ') + '>\'');
-}
+const path = require('path');
+const target = process.env.TEST_TARGET!;
+const format = process.env.TEST_MODULE!;
+const useSrc = process.env.TEST_TS_SOURCE === `true` || (!~targets.indexOf(target) || !~formats.indexOf(format));
 
 let modulePath = ``;
 
 if (useSrc) modulePath = '../src';
 else if (target === `ts` || target === `apache-arrow`) modulePath = target;
-else if (!~targets.indexOf(target)) throwInvalidImportError('target', target, targets);
-else if (!~formats.indexOf(format)) throwInvalidImportError('module', format, formats);
 else modulePath = path.join(target, format);
 
-import { read, readAsync } from '../src/Arrow';
-export { read, readAsync };
-import { View,  VectorLike } from '../src/Arrow';
-export { View,  VectorLike };
-import { Table, Field, Schema, RecordBatch, Type, vector } from '../src/Arrow';
-export { Table, Field, Schema, RecordBatch, Type, vector };
-
-import { TypedArray, TypedArrayConstructor, IntBitWidth, TimeBitWidth } from '../src/Arrow';
-export { TypedArray, TypedArrayConstructor, IntBitWidth, TimeBitWidth };
+modulePath = path.resolve(`./targets`, modulePath);
+modulePath = path.join(modulePath, `Arrow${format === 'umd' ? '' : '.node'}`);
+const Arrow: typeof import('../src/Arrow') = require(modulePath);
 
-import * as Arrow_ from '../src/Arrow';
-export let Arrow = require(path.resolve(`./targets`, modulePath, `Arrow`)) as typeof Arrow_;
-export default Arrow;
+export = Arrow;
diff --git a/js/test/data/tables.ts b/js/test/data/tables.ts
new file mode 100644
index 0000000000000..beb90b954e18d
--- /dev/null
+++ b/js/test/data/tables.ts
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { vecs } from '../generate-test-data';
+import * as generate from '../generate-test-data';
+import { Schema, Field, Dictionary } from '../Arrow';
+
+const listVectorGeneratorNames = ['list', 'fixedSizeList'];
+const nestedVectorGeneratorNames = [ 'struct', 'denseUnion', 'sparseUnion', 'map' ];
+const dictionaryKeyGeneratorNames = ['int8' ,'int16' ,'int32' ,'uint8' ,'uint16' ,'uint32'];
+const valueVectorGeneratorNames = [
+    'null_', 'bool', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64',
+    'float16', 'float32', 'float64', 'utf8', 'binary', 'fixedSizeBinary', 'dateDay', 'dateMillisecond',
+    'timestampSecond', 'timestampMillisecond', 'timestampMicrosecond', 'timestampNanosecond',
+    'timeSecond', 'timeMillisecond', 'timeMicrosecond', 'timeNanosecond', 'decimal',
+    'dictionary', 'intervalDayTime', 'intervalYearMonth'
+];
+
+const vectorGeneratorNames = [...valueVectorGeneratorNames, ...listVectorGeneratorNames, ...nestedVectorGeneratorNames];
+
+export function* generateRandomTables(batchLengths = [1000, 2000, 3000], minCols = 1, maxCols = 5) {
+
+    let numCols = 0;
+    let allNames = shuffle(vectorGeneratorNames);
+
+    do {
+        numCols = Math.max(Math.min(
+            Math.random() * maxCols | 0, allNames.length), minCols);
+
+        let names = allNames.slice(0, numCols);
+        let types = names.map((fn) => vecs[fn](0).vector.type);
+        types.forEach((t) => t.dictionaryVector && (t.dictionaryVector = null));
+        let schema = new Schema(names.map((name, i) => new Field(name, types[i])));
+
+        yield generate.table(batchLengths, schema).table;
+
+    } while ((allNames = allNames.slice(numCols)).length > 0);
+}
+
+/**
+ * Yields a series of tables containing a single Dictionary-encoded column.
+ * Each yielded table will be a unique combination of dictionary and indexType,
+ * such that consuming all tables ensures all Arrow types dictionary-encode.
+ *
+ * @param batchLengths number[] Number and length of recordbatches to generate
+ */
+export function* generateDictionaryTables(batchLengths = [100, 200, 300]) {
+    for (const dictName of valueVectorGeneratorNames) {
+        if (dictName === 'dictionary') { continue; }
+        const dictionary = vecs[dictName](100).vector;
+        for (const keys of dictionaryKeyGeneratorNames) {
+            const valsType = dictionary.type;
+            const keysType = vecs[keys](0).vector.type;
+            const dictType = new Dictionary(valsType, keysType);
+            const schema = new Schema([new Field(`dict[${keys}]`, dictType, true)]);
+            yield generate.table(batchLengths, schema).table;
+        }
+    }
+}
+
+function shuffle(input: any[]) {
+    const result = input.slice();
+    let j, tmp, i = result.length;
+    while (--i > 0) {
+        j = (Math.random() * (i + 1)) | 0;
+        tmp = result[i];
+        result[i] = result[j];
+        result[j] = tmp;
+    }
+    return result;
+}
diff --git a/js/test/generate-test-data.ts b/js/test/generate-test-data.ts
new file mode 100644
index 0000000000000..1f9eda4365068
--- /dev/null
+++ b/js/test/generate-test-data.ts
@@ -0,0 +1,657 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/* tslint:disable */
+const randomatic = require('randomatic');
+import { TextEncoder } from 'text-encoding-utf-8';
+import { Vector as VType } from '../src/interfaces';
+
+import {
+    Data, Vector, Visitor, DataType,
+    Table, Schema, Field, RecordBatch,
+    Null,
+    Bool,
+    Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
+    Float, Float16, Float32, Float64,
+    Utf8,
+    Binary,
+    FixedSizeBinary,
+    Date_, DateDay, DateMillisecond,
+    Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+    Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+    Decimal,
+    List,
+    Struct,
+    Union, DenseUnion, SparseUnion,
+    Dictionary,
+    Interval, IntervalDayTime, IntervalYearMonth,
+    FixedSizeList,
+    Map_,
+    DateUnit, TimeUnit, UnionMode
+} from './Arrow';
+
+interface TestDataVectorGenerator extends Visitor {
+
+    visit<T extends Null>            (type: T, length?: number): GeneratedTestData<T>;
+    visit<T extends Bool>            (type: T, length?: number, nullCount?: number): GeneratedTestData<T>;
+    visit<T extends Int>             (type: T, length?: number, nullCount?: number): GeneratedTestData<T>;
+    visit<T extends Float>           (type: T, length?: number, nullCount?: number): GeneratedTestData<T>;
+    visit<T extends Utf8>            (type: T, length?: number, nullCount?: number): GeneratedTestData<T>;
+    visit<T extends Binary>          (type: T, length?: number, nullCount?: number): GeneratedTestData<T>;
+    visit<T extends FixedSizeBinary> (type: T, length?: number, nullCount?: number): GeneratedTestData<T>;
+    visit<T extends Date_>           (type: T, length?: number, nullCount?: number): GeneratedTestData<T>;
+    visit<T extends Timestamp>       (type: T, length?: number, nullCount?: number): GeneratedTestData<T>;
+    visit<T extends Time>            (type: T, length?: number, nullCount?: number): GeneratedTestData<T>;
+    visit<T extends Decimal>         (type: T, length?: number, nullCount?: number): GeneratedTestData<T>;
+    visit<T extends Interval>        (type: T, length?: number, nullCount?: number): GeneratedTestData<T>;
+    visit<T extends List>            (type: T, length?: number, nullCount?: number, child?: Vector): GeneratedTestData<T>;
+    visit<T extends FixedSizeList>   (type: T, length?: number, nullCount?: number, child?: Vector): GeneratedTestData<T>;
+    visit<T extends Dictionary>      (type: T, length?: number, nullCount?: number, dictionary?: Vector): GeneratedTestData<T>;
+    visit<T extends Union>           (type: T, length?: number, nullCount?: number, children?: Vector[]): GeneratedTestData<T>;
+    visit<T extends Struct>          (type: T, length?: number, nullCount?: number, children?: Vector[]): GeneratedTestData<T>;
+    visit<T extends Map_>            (type: T, length?: number, nullCount?: number, children?: Vector[]): GeneratedTestData<T>;
+    visit<T extends DataType>        (type: T, length?: number, ...args: any[]): GeneratedTestData<T>;
+
+    visitNull:            typeof generateNull;
+    visitBool:            typeof generateBool;
+    visitInt:             typeof generateInt;
+    visitFloat:           typeof generateFloat;
+    visitUtf8:            typeof generateUtf8;
+    visitBinary:          typeof generateBinary;
+    visitFixedSizeBinary: typeof generateFixedSizeBinary;
+    visitDate:            typeof generateDate;
+    visitTimestamp:       typeof generateTimestamp;
+    visitTime:            typeof generateTime;
+    visitDecimal:         typeof generateDecimal;
+    visitList:            typeof generateList;
+    visitStruct:          typeof generateStruct;
+    visitUnion:           typeof generateUnion;
+    visitDictionary:      typeof generateDictionary;
+    visitInterval:        typeof generateInterval;
+    visitFixedSizeList:   typeof generateFixedSizeList;
+    visitMap:             typeof generateMap;
+}
+
+class TestDataVectorGenerator extends Visitor {}
+
+TestDataVectorGenerator.prototype.visitNull            = generateNull;
+TestDataVectorGenerator.prototype.visitBool            = generateBool;
+TestDataVectorGenerator.prototype.visitInt             = generateInt;
+TestDataVectorGenerator.prototype.visitFloat           = generateFloat;
+TestDataVectorGenerator.prototype.visitUtf8            = generateUtf8;
+TestDataVectorGenerator.prototype.visitBinary          = generateBinary;
+TestDataVectorGenerator.prototype.visitFixedSizeBinary = generateFixedSizeBinary;
+TestDataVectorGenerator.prototype.visitDate            = generateDate;
+TestDataVectorGenerator.prototype.visitTimestamp       = generateTimestamp;
+TestDataVectorGenerator.prototype.visitTime            = generateTime;
+TestDataVectorGenerator.prototype.visitDecimal         = generateDecimal;
+TestDataVectorGenerator.prototype.visitList            = generateList;
+TestDataVectorGenerator.prototype.visitStruct          = generateStruct;
+TestDataVectorGenerator.prototype.visitUnion           = generateUnion;
+TestDataVectorGenerator.prototype.visitDictionary      = generateDictionary;
+TestDataVectorGenerator.prototype.visitInterval        = generateInterval;
+TestDataVectorGenerator.prototype.visitFixedSizeList   = generateFixedSizeList;
+TestDataVectorGenerator.prototype.visitMap             = generateMap;
+
+const vectorGenerator = new TestDataVectorGenerator();
+
+const defaultListChild = new Field('list[Int32]', new Int32());
+
+const defaultRecordBatchChildren = [
+    new Field('i32', new Int32()),
+    new Field('f32', new Float32()),
+    new Field('dict', new Dictionary(new Utf8(), new Int32()))
+];
+
+const defaultStructChildren = [
+    new Field('struct[0]', new Int32()),
+    new Field('struct[1]', new Utf8()),
+    new Field('struct[2]', new List(new Field('list[DateDay]', new DateDay())))
+];
+
+const defaultUnionChildren = [
+    new Field('union[0]', new Float64()),
+    new Field('union[1]', new Dictionary(new Uint32(), new Int32())),
+    new Field('union[2]', new Map_(defaultStructChildren))
+];
+
+export type GeneratedTestData<T extends DataType> = {
+    keys?: number[];
+    vector: VType<T>;
+    values: () => (T['TValue'] | null)[];
+};
+
+export const table = (lengths = [100], schema: Schema = new Schema(defaultRecordBatchChildren.slice())) => {
+    const generated = lengths.map((length) => recordBatch(length, schema));
+    const rowBatches = generated.map(({ rows }) => rows);
+    const colBatches = generated.map(({ cols }) => cols);
+    const keyBatches = generated.map(({ keys }) => keys);
+    const rows = memoize(() => rowBatches.reduce((rows: any[][], batch) => [...rows, ...batch()], []));
+    const keys = memoize(() => keyBatches.reduce((keys: any[][], batch) => (
+        !keys.length ? batch() : keys.map((idxs, i) => [...(idxs || []), ...(batch()[i] || [])])
+    ), []));
+    const cols = memoize(() => colBatches.reduce((cols: any[][], batch) => (
+        !cols.length ? batch() : cols.map((vals, i) => [...vals, ...batch()[i]])
+    ), []));
+
+    return { rows, cols, keys, rowBatches, colBatches, keyBatches, table: new Table(schema, generated.map(({ recordBatch }) => recordBatch)) };
+};
+export const recordBatch = (length = 100, schema: Schema = new Schema(defaultRecordBatchChildren.slice())) => {
+
+    const generated = schema.fields.map((f) => vectorGenerator.visit(f.type, length));
+    const vecs = generated.map(({ vector }) => vector);
+
+    const keys = memoize(() => generated.map(({ keys }) => keys));
+    const cols = memoize(() => generated.map(({ values }) => values()));
+    const rows = ((_cols: () => any[][]) => memoize((rows: any[][] = [], cols: any[][] = _cols()) => {
+        for (let i = -1; ++i < length; rows[i] = cols.map((vals) => vals[i]));
+        return rows;
+    }))(cols);
+
+    return { rows, cols, keys, recordBatch: new RecordBatch(schema, length, vecs) };
+};
+export const null_ = (length = 100) => vectorGenerator.visit(new Null(), length);
+export const bool = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Bool(), length, nullCount);
+export const int8 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Int8(), length, nullCount);
+export const int16 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Int16(), length, nullCount);
+export const int32 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Int32(), length, nullCount);
+export const int64 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Int64(), length, nullCount);
+export const uint8 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Uint8(), length, nullCount);
+export const uint16 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Uint16(), length, nullCount);
+export const uint32 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Uint32(), length, nullCount);
+export const uint64 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Uint64(), length, nullCount);
+export const float16 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Float16(), length, nullCount);
+export const float32 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Float32(), length, nullCount);
+export const float64 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Float64(), length, nullCount);
+export const utf8 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Utf8(), length, nullCount);
+export const binary = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Binary(), length, nullCount);
+export const fixedSizeBinary = (length = 100, nullCount = length * 0.2 | 0, byteWidth = 8) => vectorGenerator.visit(new FixedSizeBinary(byteWidth), length, nullCount);
+export const dateDay = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new DateDay(), length, nullCount);
+export const dateMillisecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new DateMillisecond(), length, nullCount);
+export const timestampSecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimestampSecond(), length, nullCount);
+export const timestampMillisecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimestampMillisecond(), length, nullCount);
+export const timestampMicrosecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimestampMicrosecond(), length, nullCount);
+export const timestampNanosecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimestampNanosecond(), length, nullCount);
+export const timeSecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimeSecond(), length, nullCount);
+export const timeMillisecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimeMillisecond(), length, nullCount);
+export const timeMicrosecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimeMicrosecond(), length, nullCount);
+export const timeNanosecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimeNanosecond(), length, nullCount);
+export const decimal = (length = 100, nullCount = length * 0.2 | 0, scale = 2, precision = 9) => vectorGenerator.visit(new Decimal(scale, precision), length, nullCount);
+export const list = (length = 100, nullCount = length * 0.2 | 0, child = defaultListChild) => vectorGenerator.visit(new List(child), length, nullCount);
+export const struct = (length = 100, nullCount = length * 0.2 | 0, children: Field[] = defaultStructChildren.slice()) => vectorGenerator.visit(new Struct(children), length, nullCount);
+export const denseUnion = (length = 100, nullCount = length * 0.2 | 0, children: Field[] = defaultUnionChildren.slice()) => vectorGenerator.visit(new DenseUnion(children.map((f) => f.typeId), children), length, nullCount);
+export const sparseUnion = (length = 100, nullCount = length * 0.2 | 0, children: Field[] = defaultUnionChildren.slice()) => vectorGenerator.visit(new SparseUnion(children.map((f) => f.typeId), children), length, nullCount);
+export const dictionary = (length = 100, nullCount = length * 0.2 | 0, dict: DataType = new Utf8(), keys: Int = new Int32()) => vectorGenerator.visit(new Dictionary(dict, <any> keys), length, nullCount);
+export const intervalDayTime = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new IntervalDayTime(), length, nullCount);
+export const intervalYearMonth = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new IntervalYearMonth(), length, nullCount);
+export const fixedSizeList = (length = 100, nullCount = length * 0.2 | 0, listSize = 2, child = defaultListChild) => vectorGenerator.visit(new FixedSizeList(listSize, child), length, nullCount);
+export const map = (length = 100, nullCount = length * 0.2 | 0, children: Field[] = defaultStructChildren.slice()) => vectorGenerator.visit(new Map_(children), length, nullCount);
+
+export const vecs = {
+    null_, bool, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32, float64, utf8, binary, fixedSizeBinary, dateDay, dateMillisecond, timestampSecond, timestampMillisecond, timestampMicrosecond, timestampNanosecond, timeSecond, timeMillisecond, timeMicrosecond, timeNanosecond, decimal, list, struct, denseUnion, sparseUnion, dictionary, intervalDayTime, intervalYearMonth, fixedSizeList, map
+} as { [k: string]: (...args: any[]) => any };
+
+function generateNull<T extends Null>(this: TestDataVectorGenerator, type: T, length = 100): GeneratedTestData<T> {
+    return { values: () => Array.from({ length }, () => null), vector: Vector.new(Data.Null(type, 0, length, 0, null)) };
+}
+
+function generateBool<T extends Bool>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedTestData<T> {
+    const data = createBitmap(length, length / 2 | 0);
+    const nullBitmap = createBitmap(length, nullCount);
+    const values = memoize(() => {
+        const values = [] as (boolean | null)[];
+        iterateBitmap(length, nullBitmap, (i, valid) => values[i] = !valid ? null : isValid(data, i));
+        return values;
+    });
+    iterateBitmap(length, nullBitmap, (i, valid) => !valid && (data[i >> 3] &= ~(1 << (i % 8))));
+
+    return { values, vector: Vector.new(Data.Bool(type, 0, length, nullCount, nullBitmap, data)) };
+}
+
+function generateInt<T extends Int>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedTestData<T> {
+    const ArrayType = type.ArrayType;
+    const stride = 1 + Number(type.bitWidth > 32);
+    const nullBitmap = createBitmap(length, nullCount);
+    const data = fillRandom(ArrayType as any, length * stride);
+    const values = memoize(() => {
+        const values = [] as (number | null)[];
+        iterateBitmap(length, nullBitmap, (i, valid) => {
+            values[i] = !valid ? null
+                : stride === 1 ? data[i]
+                : data.subarray(i * stride, (i + 1) * stride);
+        });
+        return values;
+    });
+    iterateBitmap(length, nullBitmap, (i, valid) => !valid && (data.set(new Uint8Array(stride), i * stride)));
+    return { values, vector: Vector.new(Data.Int(type, 0, length, nullCount, nullBitmap, data)) };
+}
+
+function generateFloat<T extends Float>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedTestData<T> {
+    const ArrayType = type.ArrayType;
+    const precision = type.precision;
+    const data = fillRandom(ArrayType as any, length);
+    const nullBitmap = createBitmap(length, nullCount);
+    const values = memoize(() => {
+        const values = [] as (number | null)[];
+        iterateBitmap(length, nullBitmap, (i, valid) => {
+            values[i] = !valid ? null : precision > 0 ? data[i] : (data[i] - 32767) / 32767;
+        });
+        return values;
+    });
+    iterateBitmap(length, nullBitmap, (i, valid) => data[i] = !valid ? 0 : data[i] * Math.random());
+    return { values, vector: Vector.new(Data.Float(type, 0, length, nullCount, nullBitmap, data)) };
+}
+
+function generateUtf8<T extends Utf8>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedTestData<T> {
+    const nullBitmap = createBitmap(length, nullCount);
+    const offsets = createVariableWidthOffsets(length, nullBitmap);
+    const values = [...offsets.slice(1)]
+        .map((o, i) => isValid(nullBitmap, i) ? o - offsets[i] : null)
+        .map((length) => length == null ? null : randomString(length));
+    const data = createVariableWidthBytes(length, nullBitmap, offsets, (i) => encodeUtf8(values[i]));
+    return { values: () => values, vector: Vector.new(Data.Utf8(type, 0, length, nullCount, nullBitmap, offsets, data)) };
+}
+
+function generateBinary<T extends Binary>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedTestData<T> {
+    const nullBitmap = createBitmap(length, nullCount);
+    const offsets = createVariableWidthOffsets(length, nullBitmap);
+    const values = [...offsets.slice(1)]
+        .map((o, i) => isValid(nullBitmap, i) ? o - offsets[i] : null)
+        .map((length) => length == null ? null : randomBytes(length));
+    const data = createVariableWidthBytes(length, nullBitmap, offsets, (i) => values[i]!);
+    return { values: () => values, vector: Vector.new(Data.Binary(type, 0, length, nullCount, nullBitmap, offsets, data)) };
+}
+
+function generateFixedSizeBinary<T extends FixedSizeBinary>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedTestData<T> {
+    const nullBitmap = createBitmap(length, nullCount);
+    const data = fillRandom(Uint8Array, length * type.byteWidth);
+    const values = memoize(() => {
+        const values = [] as (Uint8Array | null)[];
+        iterateBitmap(length, nullBitmap, (i, valid) => {
+            values[i] = !valid ? null : data.subarray(i * type.byteWidth, (i + 1) * type.byteWidth);
+        });
+        return values;
+    });
+    iterateBitmap(length, nullBitmap, (i, valid) => !valid && data.set(new Uint8Array(type.byteWidth), i * type.byteWidth));
+    return { values, vector: Vector.new(Data.FixedSizeBinary(type, 0, length, nullCount, nullBitmap, data)) };
+}
+
+function generateDate<T extends Date_>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedTestData<T> {
+    const values = [] as (number | null)[];
+    const nullBitmap = createBitmap(length, nullCount);
+    const data = type.unit === DateUnit.DAY
+        ? createDate32(length, nullBitmap, values)
+        : createDate64(length, nullBitmap, values);
+    return {
+        values: () => values.map((x) => x == null ? null : new Date(x)),
+        vector: Vector.new(Data.Date(type, 0, length, nullCount, nullBitmap, data))
+    };
+}
+
+function generateTimestamp<T extends Timestamp>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedTestData<T> {
+    const values = [] as (number | null)[];
+    const nullBitmap = createBitmap(length, nullCount);
+    const multiple = type.unit === TimeUnit.NANOSECOND ? 1000000000 :
+                     type.unit === TimeUnit.MICROSECOND ? 1000000 :
+                     type.unit === TimeUnit.MILLISECOND ? 1000 : 1;
+    const data = createTimestamp(length, nullBitmap, multiple, values);
+    return { values: () => values, vector: Vector.new(Data.Timestamp(type, 0, length, nullCount, nullBitmap, data)) };
+}
+
+function generateTime<T extends Time>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedTestData<T> {
+    const values = [] as (Int32Array | number | null)[];
+    const nullBitmap = createBitmap(length, nullCount);
+    const multiple = type.unit === TimeUnit.NANOSECOND ? 1000000000 :
+                     type.unit === TimeUnit.MICROSECOND ? 1000000 :
+                     type.unit === TimeUnit.MILLISECOND ? 1000 : 1;
+    const data = type.bitWidth === 32
+        ? createTime32(length, nullBitmap, multiple, values as (number | null)[])
+        : createTime64(length, nullBitmap, multiple, values as (Int32Array | null)[]);
+    return { values: () => values, vector: Vector.new(Data.Time(type, 0, length, nullCount, nullBitmap, data)) };
+}
+
+function generateDecimal<T extends Decimal>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedTestData<T> {
+    const data = fillRandom(Uint32Array, length * 4);
+    const nullBitmap = createBitmap(length, nullCount);
+    const view = new DataView(data.buffer, 0, data.byteLength);
+    const values = memoize(() => {
+        const values = [] as (Uint32Array | null)[];
+        iterateBitmap(length, nullBitmap, (i, valid) => {
+            values[i] = !valid ? null : new Uint32Array(data.buffer, 16 * i, 4);
+        });
+        return values;
+    });
+    iterateBitmap(length, nullBitmap, (i, valid) => {
+        if (!valid) {
+            view.setFloat64(4 * (i + 0), 0, true);
+            view.setFloat64(4 * (i + 1), 0, true);
+        }
+    });
+    return { values, vector: Vector.new(Data.Decimal(type, 0, length,  nullCount, nullBitmap, data))};
+}
+
+function generateInterval<T extends Interval>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedTestData<T> {
+    const stride = (1 + type.unit);
+    const nullBitmap = createBitmap(length, nullCount);
+    const data = fillRandom(Int32Array, length * stride);
+    const values = memoize(() => {
+        const values = [] as (Int32Array | null)[];
+        iterateBitmap(length, nullBitmap, (i: number, valid: boolean) => {
+            values[i] = !valid ? null : stride === 2
+                ? new Int32Array(data.buffer, 4 * i * stride, stride)
+                : new Int32Array([data[i] / 12 | 0, data[i] % 12 | 0]);
+        });
+        return values;
+    });
+    iterateBitmap(length, nullBitmap, (i: number, valid: boolean) => {
+        !valid && data.set(new Int32Array(stride), i * stride)
+    });
+    return { values, vector: Vector.new(Data.Interval(type, 0, length, nullCount, nullBitmap, data)) };
+}
+
+function generateList<T extends List>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0, child: GeneratedTestData<T> = this.visit(type.children[0].type, length * 3)): GeneratedTestData<T> {
+    const childVec = child.vector;
+    const nullBitmap = createBitmap(length, nullCount);
+    const stride = childVec.length / (length - nullCount);
+    const offsets = createVariableWidthOffsets(length, nullBitmap, childVec.length, stride);
+    const values = memoize(() => {
+        const childValues = child.values();
+        const values: (T['valueType'] | null)[] = [...offsets.slice(1)]
+            .map((offset, i) => isValid(nullBitmap, i) ? offset : null)
+            .map((o, i) => o == null ? null : childValues.slice(offsets[i], o));
+        return values;
+    });
+    return { values, vector: Vector.new(Data.List(type, 0, length, nullCount, nullBitmap, offsets, childVec)) };
+}
+
+function generateFixedSizeList<T extends FixedSizeList>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0, child = this.visit(type.children[0].type, length * type.listSize)): GeneratedTestData<T> {
+    const nullBitmap = createBitmap(length, nullCount);
+    const values = memoize(() => {
+        const childValues = child.values();
+        const values = [] as (T['valueType'] | null)[];
+        for (let i = -1, stride = type.listSize; ++i < length;) {
+            values[i] = isValid(nullBitmap, i) ? childValues.slice(i * stride, (i + 1) * stride) : null;
+        }
+        return values;
+    });
+    return { values, vector: Vector.new(Data.FixedSizeList(type, 0, length, nullCount, nullBitmap, child.vector)) };
+}
+
+function generateDictionary<T extends Dictionary>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0, dictionary = this.visit(type.dictionary, length, 0)): GeneratedTestData<T> {
+
+    const dict = type.dictionaryVector ? type.dictionaryVector : dictionary.vector;
+    const vals = type.dictionaryVector ? (<any> type).dictVals : dictionary.values;
+
+    const maxIdx = dict.length - 1;
+    const keys = new type.indices.ArrayType(length) as any;
+    const nullBitmap = createBitmap(length, nullCount);
+
+    const values = memoize(() => {
+        const dict = vals();
+        const values = [] as (T['TValue'] | null)[];
+        iterateBitmap(length, nullBitmap, (i, valid) => {
+            values[i] = !valid ? null : dict[keys[i]];
+        });
+        return values;
+    });
+
+    iterateBitmap(length, nullBitmap, (i, valid) => {
+        keys[i] = !valid ? 0 : rand() * maxIdx | 0;
+    });
+
+    type.dictionaryVector = dict;
+    (<any> type).dictVals = vals;
+
+    return { values, keys, vector: Vector.new(Data.Dictionary(type, 0, length, nullCount, nullBitmap, keys)) };
+}
+
+function generateUnion<T extends Union>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0, children?: GeneratedTestData<any>[]): GeneratedTestData<T> {
+
+    const numChildren = type.children.length;
+
+    if (!children) {
+        children = type.mode === UnionMode.Sparse
+            ? type.children.map((f) => this.visit(f.type, length))
+            : type.children.map((f) => this.visit(f.type, Math.ceil(length / numChildren)));
+    }
+
+    const typeIds = type.typeIds;
+    const typeIdsBuffer = new Int32Array(length);
+    const vecs = children.map(({ vector }) => vector);
+    const cols = children.map(({ values }) => values);
+    const nullBitmap = createBitmap(length, nullCount);
+    const typeIdToChildIndex = typeIds.reduce((typeIdToChildIndex, typeId, idx) => {
+        return (typeIdToChildIndex[typeId] = idx) && typeIdToChildIndex || typeIdToChildIndex;
+    }, Object.create(null) as { [key: number]: number })
+    
+    if (type.mode === UnionMode.Sparse) {
+        const values = memoize(() => {
+            const values = [] as any[];
+            const childValues = cols.map((x) => x());
+            iterateBitmap(length, nullBitmap, (i, valid) => {
+                values[i] = !valid ? null : childValues[typeIdToChildIndex[typeIdsBuffer[i]]][i];
+            });
+            return values;
+        });
+        iterateBitmap(length, nullBitmap, (i, valid) => {
+            typeIdsBuffer[i] = !valid ? 0 : typeIds[rand() * numChildren | 0];
+        });
+        return { values, vector: Vector.new(Data.Union(type as SparseUnion, 0, length, nullCount, nullBitmap, typeIdsBuffer, vecs)) } as GeneratedTestData<T>;
+    }
+
+    const offsets = new Int32Array(length);
+    const values = memoize(() => {
+        const values = [] as any[];
+        const childValues = cols.map((x) => x());
+        iterateBitmap(length, nullBitmap, (i, valid) => {
+            values[i] = !valid ? null : childValues[typeIdToChildIndex[typeIdsBuffer[i]]][offsets[i]];
+        });
+        return values;
+    });
+    iterateBitmap(length, nullBitmap, (i, valid) => {
+        if (!valid) {
+            offsets[i] = 0;
+            typeIdsBuffer[i] = 0;
+        } else {
+            const colIdx = rand() * numChildren | 0;
+            offsets[i] = i / numChildren | 0;
+            typeIdsBuffer[i] = typeIds[colIdx];
+        }
+    });
+    return { values, vector: Vector.new(Data.Union(type as DenseUnion, 0, length, nullCount, nullBitmap, typeIdsBuffer, offsets, vecs)) } as GeneratedTestData<T>;
+}
+
+function generateStruct<T extends Struct>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0, children = type.children.map((f) => this.visit(f.type, length))): GeneratedTestData<T> {
+    const vecs = children.map(({ vector }) => vector);
+    const cols = children.map(({ values }) => values);
+    const nullBitmap = createBitmap(length, nullCount);
+    const values = memoize(() => {
+        const values = [] as any[];
+        const childValues = cols.map((x) => x());
+        iterateBitmap(length, nullBitmap, (i, valid) => {
+            values[i] = !valid ? null : childValues.map((col) => col[i]);
+        });
+        return values;
+    });
+    return { values, vector: Vector.new(Data.Struct(type, 0, length, nullCount, nullBitmap, vecs)) };
+}
+
+function generateMap<T extends Map_>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0, children = type.children.map((f) => this.visit(f.type, length))): GeneratedTestData<T> {
+    const vecs = children.map(({ vector }) => vector);
+    const cols = children.map(({ values }) => values);
+    const nullBitmap = createBitmap(length, nullCount);
+    const values = memoize(() => {
+        const values = [] as any[];
+        const childValues = cols.map((x) => x());
+        const names = type.children.map((f) => f.name);
+        iterateBitmap(length, nullBitmap, (i, valid) => {
+            values[i] = !valid ? null : childValues.reduce((row, col, j) => ({
+                ...row, [names[j]]: col[i]
+            }), {});
+        });
+        return values;
+    });
+    return { values, vector: Vector.new(Data.Map(type, 0, length, nullCount, nullBitmap, vecs)) };
+}
+
+type TypedArrayConstructor =
+    (typeof Int8Array) |
+    (typeof Int16Array) |
+    (typeof Int32Array) |
+    (typeof Uint8Array) |
+    (typeof Uint16Array) |
+    (typeof Uint32Array) |
+    (typeof Float32Array) |
+    (typeof Float64Array);
+
+
+const rand = Math.random.bind(Math);
+const randomBytes = (length: number) => fillRandom(Uint8Array, length);
+const randomString = ((opts) =>
+    (length: number) => randomatic('?', length, opts)
+)({ chars: `abcdefghijklmnopqrstuvwxyz0123456789_` });
+
+const memoize = (fn: () => any) => ((x?: any) => () => x || (x = fn()))();
+
+const encodeUtf8 = ((encoder) =>
+    encoder.encode.bind(encoder) as (input?: string, options?: { stream?: boolean }) => Uint8Array
+)(new TextEncoder('utf-8'));
+
+function fillRandom<T extends TypedArrayConstructor>(ArrayType: T, length: number) {
+    const BPE = ArrayType.BYTES_PER_ELEMENT;
+    const array = new ArrayType(length);
+    const max = (2 ** (8 * BPE)) - 1;
+    for (let i = -1; ++i < length; array[i] = rand() * max * (rand() > 0.5 ? -1 : 1));
+    return array as InstanceType<T>;
+}
+
+function isValid(bitmap: Uint8Array, i: number) {
+    return (bitmap[i >> 3] & 1 << (i % 8)) !== 0;
+}
+
+function iterateBitmap(length: number, bitmap: Uint8Array, fn: (index: number, valid: boolean) => any) {
+    let byteIndex = 0, valueIndex = 0;
+    for (let bit = 0; length > 0; bit = 0) {
+        let byte = bitmap[byteIndex++];
+        do {
+            fn(valueIndex++, (byte & 1 << bit) !== 0);
+        } while (--length > 0 && ++bit < 8);
+    }
+}
+
+function createBitmap(length: number, nullCount: number) {
+    const nulls = Object.create(null) as { [key: number]: boolean };
+    const bytes = new Uint8Array((((length >> 3) + 7) & ~7) || 8).fill(255);
+    for (let i, j = -1; ++j < nullCount;) {
+        while (nulls[i = (rand() * length) | 0]);
+        nulls[i] = true;
+        bytes[i >> 3] &= ~(1 << (i % 8)); // false
+    }
+    return bytes;
+}
+
+function createVariableWidthOffsets(length: number, nullBitmap: Uint8Array, max = Infinity, stride = 20) {
+    const offsets = new Int32Array(length + 1);
+    iterateBitmap(length, nullBitmap, (i, valid) => {
+        offsets[i + 1] = valid ? Math.min(max, offsets[i] + (rand() * stride | 0)) : offsets[i];
+    });
+    return offsets;
+}
+
+function createVariableWidthBytes(length: number, nullBitmap: Uint8Array, offsets: Int32Array, getBytes: (index: number) => Uint8Array) {
+    const bytes = new Uint8Array(offsets[length]);
+    iterateBitmap(length, nullBitmap, (i, valid) => {
+        valid && bytes.set(getBytes(i), offsets[i]);
+    });
+    return bytes;
+}
+
+function createDate32(length: number, nullBitmap: Uint8Array, values: (number | null)[] = []) {
+    const data = new Int32Array(length).fill(Date.now() / 86400000 | 0);
+    iterateBitmap(length, nullBitmap, (i, valid) => {
+        if (!valid) {
+            data[i] = 0;
+            values[i] = null;
+        } else {
+            data[i] = data[i] + (rand() * 10000 * (rand() > 0.5 ? -1 : 1)) | 0;
+            values[i] = data[i] * 86400000;
+        }
+    });
+    return data;
+}
+
+function createDate64(length: number, nullBitmap: Uint8Array, values: (number | null)[] = []) {
+    const data = new Int32Array(length * 2).fill(0);
+    const data32 = createDate32(length, nullBitmap, values);
+    iterateBitmap(length, nullBitmap, (i, valid) => {
+        if (valid) {
+            const value = data32[i] * 86400000;
+            const hi = (value / 4294967296) | 0;
+            const lo = (value - 4294967296 * hi) | 0;
+            values[i] = value;
+            data[i * 2 + 0] = lo;
+            data[i * 2 + 1] = hi;
+        }
+    });
+    return data;
+}
+
+function createTimestamp(length: number, nullBitmap: Uint8Array, multiple: number, values: (number | null)[] = []) {
+    const mult = 86400 * multiple;
+    const data = new Int32Array(length * 2).fill(0);
+    const data32 = createDate32(length, nullBitmap, values);
+    iterateBitmap(length, nullBitmap, (i, valid) => {
+        if (valid) {
+            const value = data32[i] * mult;
+            const hi = (value / 4294967296) | 0;
+            const lo = (value - 4294967296 * hi) | 0;
+            data[i * 2 + 0] = lo;
+            data[i * 2 + 1] = hi;
+        }
+    });
+    return data;
+}
+
+function createTime32(length: number, nullBitmap: Uint8Array, multiple: number, values: (number | null)[] = []) {
+    const data = new Int32Array(length).fill(0);
+    iterateBitmap(length, nullBitmap, (i, valid) => {
+        if (!valid) {
+            data[i] = 0;
+            values[i] = null;
+        } else {
+            values[i] = data[i] = ((1000 * rand()) | 0 * multiple) * (rand() > 0.5 ? -1 : 1);
+        }
+    });
+    return data;
+}
+
+function createTime64(length: number, nullBitmap: Uint8Array, multiple: number, values: (Int32Array | null)[] = []) {
+    const data = new Int32Array(length * 2).fill(0);
+    iterateBitmap(length, nullBitmap, (i, valid) => {
+        if (!valid) {
+            values[i] = null;
+        } else {
+            const value = (1000 * rand()) | 0 * multiple;
+            const hi = (value / 4294967296) | 0;
+            const lo = (value - 4294967296 * hi) | 0;
+            data[i * 2 + 0] = lo;
+            data[i * 2 + 1] = hi;
+            values[i] = data.subarray(i * 2, (i + 1) * 2);
+        }
+    });
+    return data;
+}
diff --git a/js/test/inference/column.ts b/js/test/inference/column.ts
new file mode 100644
index 0000000000000..44d7fab65fe79
--- /dev/null
+++ b/js/test/inference/column.ts
@@ -0,0 +1,70 @@
+import { Data } from '../../src/data';
+import { Field } from '../../src/schema';
+import { Column } from '../../src/column';
+import { Vector } from '../../src/vector';
+import { Bool, Int8, Utf8, List, Dictionary, Struct, Map_ } from '../../src/type';
+
+const boolType = new Bool();
+const boolVector = Vector.new(Data.Bool(boolType, 0, 10, 0, null, new Uint8Array(2)));
+
+const boolColumn = new Column(new Field('bool', boolType), [
+    Vector.new(Data.Bool(boolType, 0, 10, 0, null, new Uint8Array(2))),
+    Vector.new(Data.Bool(boolType, 0, 10, 0, null, new Uint8Array(2))),
+    Vector.new(Data.Bool(boolType, 0, 10, 0, null, new Uint8Array(2))),
+]);
+
+expect(typeof boolVector.get(0) === 'boolean').toBe(true);
+expect(typeof boolColumn.get(0) === 'boolean').toBe(true);
+
+type NamedSchema = {
+    a: Int8,
+    b: Utf8,
+    c: Dictionary<List<Bool>>;
+};
+
+const mapChildFields = [
+    { name: 'a', type: new Int8() },
+    { name: 'b', type: new Utf8() },
+    { name: 'c', type: new Dictionary<List<Bool>>(null!, null!) }
+].map(({ name, type }) => new Field(name, type));
+
+const mapType = new Map_<NamedSchema>(mapChildFields);
+
+const mapVector = Vector.new(Data.Map(mapType, 0, 0, 0, null, []));
+const mapColumn = new Column(new Field('map', mapType, false), [
+    Vector.new(Data.Map(mapType, 0, 0, 0, null, [])),
+    Vector.new(Data.Map(mapType, 0, 0, 0, null, [])),
+    Vector.new(Data.Map(mapType, 0, 0, 0, null, [])),
+]);
+
+const { a: a1, b: b1, c: c1 } = mapVector.get(0)!;
+const { a: a2, b: b2, c: c2 } = mapColumn.get(0)!;
+
+console.log(a1, b1, c1);
+console.log(a2, b2, c2);
+
+type IndexSchema = {
+    0: Int8,
+    1: Utf8,
+    2: Dictionary<List<Bool>>;
+};
+
+const structChildFields = [
+    { name: 0, type: new Int8() },
+    { name: 1, type: new Utf8() },
+    { name: 2, type: new Dictionary<List<Bool>>(null!, null!) }
+].map(({ name, type }) => new Field('' + name, type));
+
+const structType = new Struct<IndexSchema>(structChildFields);
+const structVector = Vector.new(Data.Struct(structType, 0, 0, 0, null, []));
+const structColumn = new Column(new Field('struct', structType), [
+    Vector.new(Data.Struct(structType, 0, 0, 0, null, [])),
+    Vector.new(Data.Struct(structType, 0, 0, 0, null, [])),
+    Vector.new(Data.Struct(structType, 0, 0, 0, null, [])),
+]);
+
+const [x1, y1, z1] = structVector.get(0)!;
+const [x2, y2, z2] = structColumn.get(0)!;
+
+console.log(x1, y1, z1);
+console.log(x2, y2, z2);
diff --git a/js/test/inference/nested.ts b/js/test/inference/nested.ts
new file mode 100644
index 0000000000000..ca164f661e904
--- /dev/null
+++ b/js/test/inference/nested.ts
@@ -0,0 +1,46 @@
+import { Data } from '../../src/data';
+import { Field } from '../../src/schema';
+import { DataType } from '../../src/type';
+import { Row } from '../../src/vector/row';
+import { Vector, BoolVector } from '../../src/vector/index';
+import { Bool, Int8, Utf8, List, Dictionary, Struct, Map_ } from '../../src/type';
+
+type NamedSchema = { a: Int8, b: Utf8, c: Dictionary<List<Bool>>; [idx: string]: DataType; };
+type IndexSchema = { 0: Int8, 1: Utf8, 2: Dictionary<List<Bool>>; [idx: number]: DataType; };
+
+checkIndexTypes({ 0: new Int8(), 1: new Utf8(), 2: new Dictionary<List<Bool>>(null!, null!) } as IndexSchema);
+checkNamedTypes({ a: new Int8(), b: new Utf8(), c: new Dictionary<List<Bool>>(null!, null!) } as NamedSchema);
+
+function checkIndexTypes(schema: IndexSchema) {
+
+    const data = Data.Struct(new Struct(
+        Object.keys(schema).map((x) => new Field(x, schema[(<any> x)]))
+    ), 0, 0, 0, null, []);
+
+    const row = Row.new(schema).bind(Vector.new(data), 0);
+
+    const check_0 = (x = row[0]) => expect(typeof x === 'number').toBe(true);
+    const check_1 = (x = row[1]) => expect(typeof x === 'string').toBe(true);
+    const check_2 = (x = row[2]) => expect(x instanceof BoolVector).toBe(true);
+
+    check_0() && check_0(row[0]) && check_0(row.get(0));
+    check_1() && check_1(row[1]) && check_1(row.get(1));
+    check_2() && check_2(row[2]) && check_2(row.get(2));
+}
+
+function checkNamedTypes(schema: NamedSchema) {
+
+    const data = Data.Map(new Map_(
+        Object.keys(schema).map((x) => new Field(x, schema[x]))
+    ), 0, 0, 0, null, []);
+
+    const row = Row.new(schema).bind(Vector.new(data), 0);
+
+    const check_a = (x = row.a) => expect(typeof x === 'number').toBe(true);
+    const check_b = (x = row.b) => expect(typeof x === 'string').toBe(true);
+    const check_c = (x = row.c) => expect(x instanceof BoolVector).toBe(true);
+
+    check_a() && check_a(row.a) && check_a(row.get('a'));
+    check_b() && check_b(row.b) && check_b(row.get('b'));
+    check_c() && check_c(row.c) && check_c(row.get('c'));
+}
diff --git a/js/test/inference/visitor/get.ts b/js/test/inference/visitor/get.ts
new file mode 100644
index 0000000000000..d4a996a68c288
--- /dev/null
+++ b/js/test/inference/visitor/get.ts
@@ -0,0 +1,39 @@
+import {
+    Data, Vector,
+    Bool, List, Dictionary
+} from '../../Arrow';
+
+import { instance as getVisitor } from '../../../src/visitor/get';
+
+const data_Bool = new Data(new Bool(), 0, 0);
+const data_List_Bool = new Data(new List<Bool>(null as any), 0, 0);
+const data_Dictionary_Bool = new Data(new Dictionary<Bool>(null!, null!), 0, 0);
+const data_Dictionary_List_Bool = new Data(new Dictionary<List<Bool>>(null!, null!), 0, 0);
+
+const boolVec = Vector.new(data_Bool);
+const boolVec_getRaw = boolVec.get(0);
+const boolVec_getVisit = getVisitor.visit(boolVec, 0);
+const boolVec_getFactory = getVisitor.getVisitFn(boolVec)(boolVec, 0);
+const boolVec_getFactoryData = getVisitor.getVisitFn(boolVec.data)(boolVec, 0);
+const boolVec_getFactoryType = getVisitor.getVisitFn(boolVec.type)(boolVec, 0);
+
+const listVec = Vector.new(data_List_Bool);
+const listVec_getRaw = listVec.get(0);
+const listVec_getVisit = getVisitor.visit(listVec, 0);
+const listVec_getFactory = getVisitor.getVisitFn(listVec)(listVec, 0);
+const listVec_getFactoryData = getVisitor.getVisitFn(listVec.data)(listVec, 0);
+const listVec_getFactoryType = getVisitor.getVisitFn(listVec.type)(listVec, 0);
+
+const dictVec = Vector.new(data_Dictionary_Bool);
+const dictVec_getRaw = dictVec.get(0);
+const dictVec_getVisit = getVisitor.visit(dictVec, 0);
+const dictVec_getFactory = getVisitor.getVisitFn(dictVec)(dictVec, 0);
+const dictVec_getFactoryData = getVisitor.getVisitFn(dictVec.data)(dictVec, 0);
+const dictVec_getFactoryType = getVisitor.getVisitFn(dictVec.type)(dictVec, 0);
+
+const dictOfListVec = Vector.new(data_Dictionary_List_Bool);
+const dictOfListVec_getRaw = dictOfListVec.get(0);
+const dictOfListVec_getVisit = getVisitor.visit(dictOfListVec, 0);
+const dictOfListVec_getFactory = getVisitor.getVisitFn(dictOfListVec)(dictOfListVec, 0);
+const dictOfListVec_getFactoryData = getVisitor.getVisitFn(dictOfListVec.data)(dictOfListVec, 0);
+const dictOfListVec_getFactoryType = getVisitor.getVisitFn(dictOfListVec.type)(dictOfListVec, 0);
diff --git a/js/test/integration/test-config.ts b/js/test/integration/test-config.ts
deleted file mode 100644
index d185ecc922c47..0000000000000
--- a/js/test/integration/test-config.ts
+++ /dev/null
@@ -1,52 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import * as fs from 'fs';
-import * as path from 'path';
-import * as glob from 'glob';
-
-export const sources = (process.env.TEST_SOURCES
-    ? JSON.parse(process.env.TEST_SOURCES + '')
-    : [`cpp`, `java`]) as ['cpp' | 'java'];
-
-export const formats = (process.env.TEST_FORMATS
-    ? JSON.parse(process.env.TEST_FORMATS + '')
-    : [`file`, `stream`]) as ['file' | 'stream'];
-
-export const config = sources.reduce((sources, source) => ({
-    ...sources,
-    [source]: formats.reduce((formats, format) => ({
-        ...formats,
-        [format]: loadArrows(source, format)
-    }), {})
-}), {}) as {
-    [k in 'cpp' | 'java']: {
-        [k in 'file' | 'stream']: Arrows
-    }
-};
-
-export type Arrows = { name: string, buffers: Uint8Array[] }[];
-
-function loadArrows(source: string, format: string) {
-    const arrows = [];
-    const filenames = glob.sync(path.resolve(__dirname, `data/${source}/${format}`, `*.arrow`));
-    for (const filename of filenames) {
-        const { name } = path.parse(filename);
-        arrows.push({ name, buffers: [fs.readFileSync(filename)] });
-    }
-    return arrows as Arrows;
-}
diff --git a/js/test/integration/validate-tests.ts b/js/test/integration/validate-tests.ts
deleted file mode 100644
index 0f1ebcc7a9054..0000000000000
--- a/js/test/integration/validate-tests.ts
+++ /dev/null
@@ -1,213 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import '../jest-extensions';
-
-import * as fs from 'fs';
-import * as path from 'path';
-
-import Arrow from '../Arrow';
-import { zip } from 'ix/iterable/zip';
-import { toArray } from 'ix/iterable/toarray';
-
-import { AsyncIterableX } from 'ix/asynciterable/asynciterablex';
-import { zip as zipAsync } from 'ix/asynciterable/zip';
-import { toArray as toArrayAsync } from 'ix/asynciterable/toarray';
-
-/* tslint:disable */
-const { parse: bignumJSONParse } = require('json-bignum');
-
-const { Table, read } = Arrow;
-const { fromReadableStream, readBuffersAsync, readRecordBatchesAsync } = Arrow;
-
-if (!process.env.JSON_PATHS || !process.env.ARROW_PATHS) {
-    throw new Error('Integration tests need paths to both json and arrow files');
-}
-
-function resolvePathArgs(paths: string) {
-    let pathsArray = JSON.parse(paths) as string | string[];
-    return (Array.isArray(pathsArray) ? pathsArray : [pathsArray])
-        .map((p) => path.resolve(p))
-        .map((p) => {
-            if (fs.existsSync(p)) {
-                return p;
-            }
-            console.error(`Could not find file "${p}"`);
-            return undefined;
-        });
-}
-
-const getOrReadFileBuffer = ((cache: any) => function getFileBuffer(path: string, ...args: any[]) {
-    return cache[path] || (cache[path] = fs.readFileSync(path, ...args));
-})({});
-
-const jsonAndArrowPaths = toArray(zip(
-    resolvePathArgs(process.env.JSON_PATHS!),
-    resolvePathArgs(process.env.ARROW_PATHS!)
-))
-.filter(([p1, p2]) => p1 !== undefined && p2 !== undefined) as [string, string][];
-
-describe(`Integration`, () => {
-    for (const [jsonFilePath, arrowFilePath] of jsonAndArrowPaths) {
-        let { name, dir } = path.parse(arrowFilePath);
-        dir = dir.split(path.sep).slice(-2).join(path.sep);
-        const json = bignumJSONParse(getOrReadFileBuffer(jsonFilePath, 'utf8'));
-        const arrowBuffer = getOrReadFileBuffer(arrowFilePath) as Uint8Array;
-        describe(path.join(dir, name), () => {
-            testReaderIntegration(json, arrowBuffer);
-            testTableFromBuffersIntegration(json, arrowBuffer);
-            testTableToBuffersIntegration('json', 'file')(json, arrowBuffer);
-            testTableToBuffersIntegration('binary', 'file')(json, arrowBuffer);
-            testTableToBuffersIntegration('json', 'stream')(json, arrowBuffer);
-            testTableToBuffersIntegration('binary', 'stream')(json, arrowBuffer);
-        });
-    }
-    testReadingMultipleTablesFromTheSameStream();
-});
-
-function testReaderIntegration(jsonData: any, arrowBuffer: Uint8Array) {
-    test(`json and arrow record batches report the same values`, () => {
-        expect.hasAssertions();
-        const jsonRecordBatches = toArray(read(jsonData));
-        const binaryRecordBatches = toArray(read(arrowBuffer));
-        for (const [jsonRecordBatch, binaryRecordBatch] of zip(jsonRecordBatches, binaryRecordBatches)) {
-            expect(jsonRecordBatch.length).toEqual(binaryRecordBatch.length);
-            expect(jsonRecordBatch.numCols).toEqual(binaryRecordBatch.numCols);
-            for (let i = -1, n = jsonRecordBatch.numCols; ++i < n;) {
-                const v1 = jsonRecordBatch.getChildAt(i);
-                const v2 = binaryRecordBatch.getChildAt(i);
-                const name = jsonRecordBatch.schema.fields[i].name;
-                (expect([v1, `json`, name]) as any)
-                    .toEqualVector([v2, `binary`]);
-            }
-        }
-    });
-}
-
-function testTableFromBuffersIntegration(jsonData: any, arrowBuffer: Uint8Array) {
-    test(`json and arrow tables report the same values`, () => {
-        expect.hasAssertions();
-        const jsonTable = Table.from(jsonData);
-        const binaryTable = Table.from(arrowBuffer);
-        expect(jsonTable.length).toEqual(binaryTable.length);
-        expect(jsonTable.numCols).toEqual(binaryTable.numCols);
-        for (let i = -1, n = jsonTable.numCols; ++i < n;) {
-            const v1 = jsonTable.getColumnAt(i);
-            const v2 = binaryTable.getColumnAt(i);
-            const name = jsonTable.schema.fields[i].name;
-            (expect([v1, `json`, name]) as any)
-                .toEqualVector([v2, `binary`]);
-        }
-    });
-}
-
-function testTableToBuffersIntegration(srcFormat: 'json' | 'binary', arrowFormat: 'stream' | 'file') {
-    const refFormat = srcFormat === `json` ? `binary` : `json`;
-    return function testTableToBuffersIntegration(jsonData: any, arrowBuffer: Uint8Array) {
-        test(`serialized ${srcFormat} ${arrowFormat} reports the same values as the ${refFormat} ${arrowFormat}`, () => {
-            expect.hasAssertions();
-            const refTable = Table.from(refFormat === `json` ? jsonData : arrowBuffer);
-            const srcTable = Table.from(srcFormat === `json` ? jsonData : arrowBuffer);
-            const dstTable = Table.from(srcTable.serialize(`binary`, arrowFormat === `stream`));
-            expect(dstTable.length).toEqual(refTable.length);
-            expect(dstTable.numCols).toEqual(refTable.numCols);
-            for (let i = -1, n = dstTable.numCols; ++i < n;) {
-                const v1 = dstTable.getColumnAt(i);
-                const v2 = refTable.getColumnAt(i);
-                const name = dstTable.schema.fields[i].name;
-                (expect([v1, srcFormat, name]) as any)
-                    .toEqualVector([v2, refFormat]);
-            }
-        });
-    }
-}
-
-function testReadingMultipleTablesFromTheSameStream() {
-
-    test('Can read multiple tables from the same stream with a special stream reader', async () => {
-
-        async function* allTablesReadableStream() {
-            for (const [, arrowPath] of jsonAndArrowPaths) {
-                for await (const buffer of fs.createReadStream(arrowPath)) {
-                    yield buffer as Uint8Array;
-                }
-            }
-        }
-
-        const pathsAsync = AsyncIterableX.from(jsonAndArrowPaths);
-        const batchesAsync = readBatches(allTablesReadableStream());
-        const pathsAndBatches = zipAsync(pathsAsync, batchesAsync);
-
-        for await (const [[jsonFilePath, arrowFilePath], batches] of pathsAndBatches) {
-
-            const streamTable = new Table(await toArrayAsync(batches));
-            const binaryTable = Table.from(getOrReadFileBuffer(arrowFilePath) as Uint8Array);
-            const jsonTable = Table.from(bignumJSONParse(getOrReadFileBuffer(jsonFilePath, 'utf8')));
-
-            expect(streamTable.length).toEqual(jsonTable.length);
-            expect(streamTable.length).toEqual(binaryTable.length);
-            expect(streamTable.numCols).toEqual(jsonTable.numCols);
-            expect(streamTable.numCols).toEqual(binaryTable.numCols);
-            for (let i = -1, n = streamTable.numCols; ++i < n;) {
-                const v1 = streamTable.getColumnAt(i);
-                const v2 = jsonTable.getColumnAt(i);
-                const v3 = binaryTable.getColumnAt(i);
-                const name = streamTable.schema.fields[i].name;
-                (expect([v1, `stream`, name]) as any).toEqualVector([v2, `json`]);
-                (expect([v1, `stream`, name]) as any).toEqualVector([v3, `binary`]);
-            }
-        }
-    });
-
-    async function* readBatches(stream: AsyncIterable<Uint8Array>) {
-
-        let message: any, done = false, broke = false;
-        let source = buffers(fromReadableStream(stream as any));
-    
-        do {
-            yield readRecordBatchesAsync(messages({
-                next(x: any) { return source.next(x); },
-                throw(x: any) { return source.throw!(x); },
-                [Symbol.asyncIterator]() { return this; },
-            }));
-        } while (!done || (message = null));
-    
-        source.return && (await source.return());
-    
-        async function* messages(source: AsyncIterableIterator<Uint8Array>) {
-            for await (message of readBuffersAsync(source)) {
-                if (broke = message.message.headerType === 1) {
-                    break;
-                }
-                yield message;
-                message = null;
-            }
-            done = done || !broke;
-            broke = false;
-        }
-    
-        async function* buffers(source: AsyncIterableIterator<Uint8Array>) {
-            while (!done) {
-                message && (yield message.loader.bytes);
-                const next = await source.next();
-                if (!(done = next.done)) {
-                    yield next.value;
-                }
-            }
-        }
-    }
-}
diff --git a/js/test/jest-extensions.ts b/js/test/jest-extensions.ts
index f309777533c88..6c0a8c6a49747 100644
--- a/js/test/jest-extensions.ts
+++ b/js/test/jest-extensions.ts
@@ -16,60 +16,144 @@
 // under the License.
 
 import { zip } from 'ix/iterable/zip';
+import { Table, Vector, RecordBatch, util } from './Arrow';
+
+declare global {
+    namespace jest {
+        interface Matchers<R> {
+            toArrowCompare(expected: any): CustomMatcherResult;
+            toEqualTable(expected: Table): CustomMatcherResult;
+            toEqualRecordBatch(expected: RecordBatch): CustomMatcherResult;
+            toEqualVector(expected: [Vector | null, string, string?]): CustomMatcherResult;
+        }
+    }
+}
 
 expect.extend({
-    toEqualVector([v1, format1, columnName]: [any, string, string], [v2, format2]: [any, string]) {
-
-        const format = (x: any, y: any, msg= ' ') => `${
-            this.utils.printExpected(x)}${
-                msg}${
-            this.utils.printReceived(y)
-        }`;
-
-        let getFailures = new Array<string>();
-        let propsFailures = new Array<string>();
-        let iteratorFailures = new Array<string>();
-        let allFailures = [
-            { title: 'get', failures: getFailures },
-            { title: 'props', failures: propsFailures },
-            { title: 'iterator', failures: iteratorFailures }
-        ];
-
-        let props = [
-            // 'name', 'nullable', 'metadata',
-            'type', 'length', 'nullCount'
-        ];
+    toEqualTable,
+    toEqualVector,
+    toArrowCompare,
+    toEqualRecordBatch
+});
+
+function format(jest: jest.MatcherUtils, x: any, y: any, msg= ' ') {
+    return `${
+        jest.utils.printExpected(x)
+        }${msg}${
+        jest.utils.printReceived(y)
+    }`;
+}
+
+function toArrowCompare(this: jest.MatcherUtils, actual: any, expected: any) {
+    if (!util.createElementComparator(expected)(actual)) {
+        return { pass: false, message: () => format(this, expected, actual, ' != ') };
+    }
+    return { pass: true, message: () => '' };
+}
+
+function toEqualTable(this: jest.MatcherUtils, actual: Table, expected: Table) {
+    const failures = [] as string[];
+    try { expect(actual.length).toEqual(expected.length); } catch (e) { failures.push(`${e}`); }
+    try { expect(actual.numCols).toEqual(expected.numCols); } catch (e) { failures.push(`${e}`); }
+    (() => {
+        for (let i = -1, n = actual.numCols; ++i < n;) {
+            const v1 = actual.getColumnAt(i);
+            const v2 = expected.getColumnAt(i);
+            const name = actual.schema.fields[i].name;
+            try {
+                expect([v1, `actual`, name]).toEqualVector([v2, `expected`, name]);
+            } catch (e) { failures.push(`${e}`); }
+        }
+    })();
+    return {
+        pass: failures.length === 0,
+        message: () => failures.join('\n'),
+    };
+}
+
+function toEqualRecordBatch(this: jest.MatcherUtils, actual: RecordBatch, expected: RecordBatch) {
+    const failures = [] as string[];
+    try { expect(actual.length).toEqual(expected.length); } catch (e) { failures.push(`${e}`); }
+    try { expect(actual.numCols).toEqual(expected.numCols); } catch (e) { failures.push(`${e}`); }
+    (() => {
+        for (let i = -1, n = actual.numCols; ++i < n;) {
+            const v1 = actual.getChildAt(i);
+            const v2 = expected.getChildAt(i);
+            const name = actual.schema.fields[i].name;
+            try {
+                expect([v1, `actual`, name]).toEqualVector([v2, `expected`, name]);
+            } catch (e) { failures.push(`${e}`); }
+        }
+    })();
+    return {
+        pass: failures.length === 0,
+        message: () => failures.join('\n'),
+    };
+}
+
+function toEqualVector<
+    TActual extends [Vector | null, string, string],
+    TExpected extends [Vector | null, string],
+>(this: jest.MatcherUtils, actual: TActual, expected: TExpected) {
+
+    let [v1, format1, columnName] = actual;
+    let [v2, format2] = expected;
+
+    if (v1 == null || v2 == null) {
+        return {
+            pass: false,
+            message: [
+                `${columnName}: (${format(this, format1, format2, ' !== ')})\n`,
+                `${v1 == null ? 'actual' : 'expected'} is null`
+            ].join('\n')
+        };
+    }
 
+    let getFailures = new Array<string>();
+    let propsFailures = new Array<string>();
+    let iteratorFailures = new Array<string>();
+    let allFailures = [
+        { title: 'get', failures: getFailures },
+        { title: 'props', failures: propsFailures },
+        { title: 'iterator', failures: iteratorFailures }
+    ];
+
+    let props: (keyof Vector)[] = ['type', 'length', 'nullCount'];
+
+    (() => {
         for (let i = -1, n = props.length; ++i < n;) {
             const prop = props[i];
             if (`${v1[prop]}` !== `${v2[prop]}`) {
-                propsFailures.push(`${prop}: ${format(v1[prop], v2[prop], ' !== ')}`);
+                propsFailures.push(`${prop}: ${format(this, v1[prop], v2[prop], ' !== ')}`);
             }
         }
+    })();
 
+    (() => {
         for (let i = -1, n = v1.length; ++i < n;) {
             let x1 = v1.get(i), x2 = v2.get(i);
-            if (this.utils.stringify(x1) !== this.utils.stringify(x2)) {
-                getFailures.push(`${i}: ${format(x1, x2, ' !== ')}`);
+            if (!util.createElementComparator(x2)(x1)) {
+                getFailures.push(`${i}: ${format(this, x1, x2, ' !== ')}`);
             }
         }
+    })();
 
+    (() => {
         let i = -1;
         for (let [x1, x2] of zip(v1, v2)) {
             ++i;
-            if (this.utils.stringify(x1) !== this.utils.stringify(x2)) {
-                iteratorFailures.push(`${i}: ${format(x1, x2, ' !== ')}`);
+            if (!util.createElementComparator(x2)(x1)) {
+                iteratorFailures.push(`${i}: ${format(this, x1, x2, ' !== ')}`);
             }
         }
+    })();
 
-        return {
-            pass: allFailures.every(({ failures }) => failures.length === 0),
-            message: () => [
-                `${columnName}: (${format(format1, format2, ' !== ')})\n`,
-                ...allFailures.map(({ failures, title }) =>
-                    !failures.length ? `` : [`${title}:`, ...failures].join(`\n`))
-            ].join('\n')
-        };
-    }
+    return {
+        pass: allFailures.every(({ failures }) => failures.length === 0),
+        message: () => [
+            `${columnName}: (${format(this, format1, format2, ' !== ')})\n`,
+            ...allFailures.map(({ failures, title }) =>
+                !failures.length ? `` : [`${title}:`, ...failures].join(`\n`))
+        ].join('\n')
+    };
 }
-});
diff --git a/js/test/tsconfig.coverage.json b/js/test/tsconfig.coverage.json
new file mode 100644
index 0000000000000..6830bfb9d66d2
--- /dev/null
+++ b/js/test/tsconfig.coverage.json
@@ -0,0 +1,6 @@
+{
+  "extends": "./tsconfig.json",
+  "compilerOptions": {
+    "target": "es2015"
+  }
+}
diff --git a/js/test/tsconfig.json b/js/test/tsconfig.json
index 838bb1d7041e3..c4977d5d6945d 100644
--- a/js/test/tsconfig.json
+++ b/js/test/tsconfig.json
@@ -2,10 +2,17 @@
   "extends": "../tsconfig.json",
   "include": ["./**/*.ts"],
   "compilerOptions": {
-    "target": "es2015",
+    "target": "esnext",
     "module": "commonjs",
     "allowJs": true,
+    "declaration": false,
     "importHelpers": false,
-    "noEmitHelpers": false
+    "noEmitHelpers": false,
+    "noEmitOnError": false,
+
+    "sourceMap": false,
+    "inlineSources": false,
+    "inlineSourceMap": false,
+    "downlevelIteration": false
   }
 }
diff --git a/js/test/unit/generated-data-tests.ts b/js/test/unit/generated-data-tests.ts
new file mode 100644
index 0000000000000..594657b730467
--- /dev/null
+++ b/js/test/unit/generated-data-tests.ts
@@ -0,0 +1,238 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import '../jest-extensions';
+import * as generate from '../generate-test-data';
+import { Table, RecordBatch, Vector, util } from '../Arrow';
+const { createElementComparator: compare } = util;
+
+describe('Generated Test Data', () => {
+    describe('Table',                      () => validateTable(generate.table([100, 150, 75])));
+    describe('RecordBatch',                () => validateRecordBatch(generate.recordBatch()));
+    describe('NullVector',                 () => validateVector(generate.null_()));
+    describe('BoolVector',                 () => validateVector(generate.bool()));
+    describe('Int8Vector',                 () => validateVector(generate.int8()));
+    describe('Int16Vector',                () => validateVector(generate.int16()));
+    describe('Int32Vector',                () => validateVector(generate.int32()));
+    describe('Int64Vector',                () => validateVector(generate.int64()));
+    describe('Uint8Vector',                () => validateVector(generate.uint8()));
+    describe('Uint16Vector',               () => validateVector(generate.uint16()));
+    describe('Uint32Vector',               () => validateVector(generate.uint32()));
+    describe('Uint64Vector',               () => validateVector(generate.uint64()));
+    describe('Float16Vector',              () => validateVector(generate.float16()));
+    describe('Float32Vector',              () => validateVector(generate.float32()));
+    describe('Float64Vector',              () => validateVector(generate.float64()));
+    describe('Utf8Vector',                 () => validateVector(generate.utf8()));
+    describe('BinaryVector',               () => validateVector(generate.binary()));
+    describe('FixedSizeBinaryVector',      () => validateVector(generate.fixedSizeBinary()));
+    describe('DateDayVector',              () => validateVector(generate.dateDay()));
+    describe('DateMillisecondVector',      () => validateVector(generate.dateMillisecond()));
+    describe('TimestampSecondVector',      () => validateVector(generate.timestampSecond()));
+    describe('TimestampMillisecondVector', () => validateVector(generate.timestampMillisecond()));
+    describe('TimestampMicrosecondVector', () => validateVector(generate.timestampMicrosecond()));
+    describe('TimestampNanosecondVector',  () => validateVector(generate.timestampNanosecond()));
+    describe('TimeSecondVector',           () => validateVector(generate.timeSecond()));
+    describe('TimeMillisecondVector',      () => validateVector(generate.timeMillisecond()));
+    describe('TimeMicrosecondVector',      () => validateVector(generate.timeMicrosecond()));
+    describe('TimeNanosecondVector',       () => validateVector(generate.timeNanosecond()));
+    describe('DecimalVector',              () => validateVector(generate.decimal()));
+    describe('ListVector',                 () => validateVector(generate.list()));
+    describe('StructVector',               () => validateVector(generate.struct()));
+    describe('DenseUnionVector',           () => validateVector(generate.denseUnion()));
+    describe('SparseUnionVector',          () => validateVector(generate.sparseUnion()));
+    describe('DictionaryVector',           () => validateVector(generate.dictionary()));
+    describe('IntervalDayTimeVector',      () => validateVector(generate.intervalDayTime()));
+    describe('IntervalYearMonthVector',    () => validateVector(generate.intervalYearMonth()));
+    describe('FixedSizeListVector',        () => validateVector(generate.fixedSizeList()));
+    describe('MapVector',                  () => validateVector(generate.map()));
+});
+
+interface GeneratedTable {
+    table: Table;
+    rows: () => any[][];
+    cols: () => any[][];
+    keys: () => number[][];
+    rowBatches: (() => any[][])[];
+    colBatches: (() => any[][])[];
+    keyBatches: (() => number[][])[];
+}
+
+interface GeneratedRecordBatch {
+    recordBatch: RecordBatch;
+    rows: () => any[][];
+    cols: () => any[][];
+    keys: () => number[][];
+}
+
+interface GeneratedVector {
+    vector: Vector;
+    values: () => any[];
+    keys?: number[];
+}
+
+function validateTable({ keys, rows, cols, rowBatches, colBatches, keyBatches, table }: GeneratedTable) {
+
+    validateVector({ values: rows, vector: table });
+
+    table.chunks.forEach((recordBatch, i) => {
+        describe(`recordBatch ${i}`, () => {
+            validateRecordBatch({ keys: keyBatches[i], rows: rowBatches[i], cols: colBatches[i], recordBatch });
+        });
+    });
+
+    table.schema.fields.forEach((field, i) => {
+        describe(`column ${i}: ${field}`, () => validateVector({
+            keys: keys()[i],
+            values: () => cols()[i],
+            vector: table.getColumnAt(i)!
+        }));
+    });
+}
+
+function validateRecordBatch({ rows, cols, keys, recordBatch }: GeneratedRecordBatch) {
+
+    validateVector({ values: rows, vector: recordBatch });
+
+    recordBatch.schema.fields.forEach((field, i) => {
+        describe(`${field}`, () => validateVector({
+            keys: keys()[i],
+            values: () => cols()[i],
+            vector: recordBatch.getChildAt(i)!
+        }));
+    });
+}
+
+function validateVector({ values: createTestValues, vector, keys }: GeneratedVector, sliced = false) {
+
+    const values = createTestValues();
+
+    describe(`Validate ${vector.type} (sliced=${sliced})`, () => {
+
+        test(`length is correct`, () => {
+            expect(vector.length).toBe(values.length);
+        });
+
+        test(`gets expected values`, () => {
+            expect.hasAssertions();
+            let i = -1, n = vector.length, actual, expected;
+            try {
+                while (++i < n) {
+                    actual = vector.get(i);
+                    expected = values[i];
+                    expect(actual).toArrowCompare(expected);
+                }
+            } catch (e) { throw new Error(`${vector}[${i}]: ${e}`); }
+        });
+
+        if (keys && keys.length > 0) {
+            test(`dictionary indices should match`, () => {
+                expect.hasAssertions();
+                let indices = (vector as any).indices;
+                let i = -1, n = indices.length;
+                try {
+                    while (++i < n) {
+                        indices.isValid(i)
+                            ? expect(indices.get(i)).toBe(keys[i])
+                            : expect(indices.get(i)).toBe(null);
+                    }
+                } catch (e) { throw new Error(`${indices}[${i}]: ${e}`); }
+            });
+        }
+
+        test(`sets expected values`, () => {
+            expect.hasAssertions();
+            let i = -1, n = vector.length, actual, expected;
+            try {
+                while (++i < n) {
+                    expected = vector.get(i);
+                    vector.set(i, expected);
+                    actual = vector.get(i);
+                    expect(actual).toArrowCompare(expected);
+                }
+            } catch (e) { throw new Error(`${vector}[${i}]: ${e}`); }
+        });
+
+        test(`iterates expected values`, () => {
+            expect.hasAssertions();
+            let i = -1, actual, expected;
+            try {
+                for (actual of vector) {
+                    expected = values[++i];
+                    expect(actual).toArrowCompare(expected);
+                }
+            } catch (e) { throw new Error(`${vector}[${i}]: ${e}`); }
+        });
+
+        test(`indexOf returns expected values`, () => {
+            expect.hasAssertions();
+            let i = -1, n = vector.length;
+            const shuffled = shuffle(values);
+            let value: any, actual, expected;
+            try {
+                while (++i < n) {
+                    value = shuffled[i];
+                    actual = vector.indexOf(value);
+                    expected = values.findIndex(compare(value));
+                    expect(actual).toBe(expected);
+                }
+                // I would be pretty surprised if randomatic ever generates these values
+                expect(vector.indexOf('purple elephants')).toBe(-1);
+                expect(vector.indexOf('whistling wombats')).toBe(-1);
+                expect(vector.indexOf('carnivorous novices')).toBe(-1);
+            } catch (e) { throw new Error(`${vector}[${i}]: ${e}`); }
+        });
+    });
+
+    if (!sliced) {
+
+        const begin = (values.length * .25) | 0;
+        const end = (values.length * .75) | 0;
+
+        // test slice with no args
+        validateVector({
+            vector: vector.slice(),
+            values: () => values.slice(),
+            keys: keys ? keys.slice() : undefined
+        }, true);
+
+        // test slicing half the array
+        validateVector({
+            vector: vector.slice(begin, end),
+            values: () => values.slice(begin, end),
+            keys: keys ? keys.slice(begin, end) : undefined
+        }, true);
+
+        // test concat each end together
+        validateVector({
+            vector: vector.slice(0, begin).concat(vector.slice(end)),
+            values: () => values.slice(0, begin).concat(values.slice(end)),
+            keys: keys ? [...keys.slice(0, begin), ...keys.slice(end)] : undefined
+        }, true);
+    }
+}
+
+function shuffle(input: any[]) {
+    const result = input.slice();
+    let j, tmp, i = result.length;
+    while (--i > 0) {
+        j = (Math.random() * (i + 1)) | 0;
+        tmp = result[i];
+        result[i] = result[j];
+        result[j] = tmp;
+    }
+    return result;
+}
diff --git a/js/test/unit/int-tests.ts b/js/test/unit/int-tests.ts
index 4214600163a44..09c531e54325e 100644
--- a/js/test/unit/int-tests.ts
+++ b/js/test/unit/int-tests.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import Arrow from '../Arrow';
+import * as Arrow from '../Arrow';
 const { Int64, Uint64, Int128 } = Arrow.util;
 
 describe(`Uint64`, () => {
diff --git a/js/test/unit/ipc/helpers.ts b/js/test/unit/ipc/helpers.ts
new file mode 100644
index 0000000000000..0b994792d6bc2
--- /dev/null
+++ b/js/test/unit/ipc/helpers.ts
@@ -0,0 +1,206 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import '../../jest-extensions';
+
+import {
+    Table,
+    RecordBatchWriter,
+    RecordBatchFileWriter,
+    RecordBatchJSONWriter,
+    RecordBatchStreamWriter,
+} from '../../Arrow';
+
+import * as fs from 'fs';
+import { fs as memfs } from 'memfs';
+import { Readable, PassThrough } from 'stream';
+
+/* tslint:disable */
+const randomatic = require('randomatic');
+
+export abstract class ArrowIOTestHelper {
+
+    constructor(public table: Table) {}
+
+    public static file(table: Table) { return new ArrowFileIOTestHelper(table); }
+    public static json(table: Table) { return new ArrowJsonIOTestHelper(table); }
+    public static stream(table: Table) { return new ArrowStreamIOTestHelper(table); }
+
+    protected abstract writer(table: Table): RecordBatchWriter;
+    protected async filepath(table: Table): Promise<fs.PathLike> {
+        const path = `/${randomatic('a0', 20)}.arrow`;
+        const data = await this.writer(table).toUint8Array();
+        await memfs.promises.writeFile(path, data);
+        return path;
+    }
+
+    buffer(testFn: (buffer: Uint8Array) => void | Promise<void>) {
+        return async () => {
+            expect.hasAssertions();
+            await testFn(await this.writer(this.table).toUint8Array());
+        };
+    }
+    iterable(testFn: (iterable: Iterable<Uint8Array>) => void | Promise<void>) {
+        return async () => {
+            expect.hasAssertions();
+            await testFn(chunkedIterable(await this.writer(this.table).toUint8Array()));
+        };
+    }
+    asyncIterable(testFn: (asyncIterable: AsyncIterable<Uint8Array>) => void | Promise<void>) {
+        return async () => {
+            expect.hasAssertions();
+            await testFn(asyncChunkedIterable(await this.writer(this.table).toUint8Array()));
+        };
+    }
+    fsFileHandle(testFn: (handle: fs.promises.FileHandle) => void | Promise<void>) {
+        return async () => {
+            expect.hasAssertions();
+            const path = await this.filepath(this.table);
+            await testFn(<any> await memfs.promises.open(path, 'r'));
+            await memfs.promises.unlink(path);
+        };
+    }
+    fsReadableStream(testFn: (stream: fs.ReadStream) => void | Promise<void>) {
+        return async () => {
+            expect.hasAssertions();
+            const path = await this.filepath(this.table);
+            await testFn(<any> memfs.createReadStream(path));
+            await memfs.promises.unlink(path);
+        };
+    }
+    nodeReadableStream(testFn: (stream: NodeJS.ReadableStream) => void | Promise<void>) {
+        return async () => {
+            expect.hasAssertions();
+            const sink = new PassThrough();
+            sink.end(await this.writer(this.table).toUint8Array());
+            await testFn(sink);
+        };
+    }
+    whatwgReadableStream(testFn: (stream: ReadableStream) => void | Promise<void>) {
+        return async () => {
+            expect.hasAssertions();
+            const path = await this.filepath(this.table);
+            await testFn(nodeToDOMStream(memfs.createReadStream(path)));
+            await memfs.promises.unlink(path);
+        };
+    }
+    whatwgReadableByteStream(testFn: (stream: ReadableStream) => void | Promise<void>) {
+        return async () => {
+            expect.hasAssertions();
+            const path = await this.filepath(this.table);
+            await testFn(nodeToDOMStream(memfs.createReadStream(path), { type: 'bytes' }));
+            await memfs.promises.unlink(path);
+        };
+    }
+}
+
+class ArrowFileIOTestHelper extends ArrowIOTestHelper {
+    constructor(table: Table) { super(table); }
+    protected writer(table: Table) {
+        return RecordBatchFileWriter.writeAll(table);
+    }
+}
+
+class ArrowJsonIOTestHelper extends ArrowIOTestHelper {
+    constructor(table: Table) { super(table); }
+    protected writer(table: Table) {
+        return RecordBatchJSONWriter.writeAll(table);
+    }
+}
+
+class ArrowStreamIOTestHelper extends ArrowIOTestHelper {
+    constructor(table: Table) { super(table); }
+    protected writer(table: Table) {
+        return RecordBatchStreamWriter.writeAll(table);
+    }
+}
+
+export function* chunkedIterable(buffer: Uint8Array) {
+    let offset = 0, size = 0;
+    while (offset < buffer.byteLength) {
+        size = yield buffer.subarray(offset, offset +=
+            (isNaN(+size) ? buffer.byteLength - offset : size));
+    }
+}
+
+export async function* asyncChunkedIterable(buffer: Uint8Array) {
+    let offset = 0, size = 0;
+    while (offset < buffer.byteLength) {
+        size = yield buffer.subarray(offset, offset +=
+            (isNaN(+size) ? buffer.byteLength - offset : size));
+    }
+}
+
+export async function concatBuffersAsync(iterator: AsyncIterable<Uint8Array> | ReadableStream) {
+    if (iterator instanceof ReadableStream) {
+        iterator = readableDOMStreamToAsyncIterator(iterator);
+    }
+    let chunks = [], total = 0;
+    for await (const chunk of iterator) {
+        chunks.push(chunk);
+        total += chunk.byteLength;
+    }
+    return chunks.reduce((x, buffer) => {
+        x.buffer.set(buffer, x.offset);
+        x.offset += buffer.byteLength;
+        return x;
+    }, { offset: 0, buffer: new Uint8Array(total) }).buffer;
+}
+
+export async function* readableDOMStreamToAsyncIterator<T>(stream: ReadableStream<T>) {
+    // Get a lock on the stream
+    const reader = stream.getReader();
+    try {
+        while (true) {
+            // Read from the stream
+            const { done, value } = await reader.read();
+            // Exit if we're done
+            if (done) { break; }
+            // Else yield the chunk
+            yield value as T;
+        }
+    } catch (e) {
+        throw e;
+    } finally {
+        try { stream.locked && reader.releaseLock(); } catch (e) {}
+    }
+}
+
+export function nodeToDOMStream<T = any>(stream: NodeJS.ReadableStream, opts: any = {}) {
+    stream = new Readable((stream as any)._readableState).wrap(stream);
+    return new ReadableStream<T>({
+        ...opts,
+        start(controller) {
+            stream.pause();
+            stream.on('data', (chunk) => {
+                controller.enqueue(chunk);
+                stream.pause();
+            });
+            stream.on('end', () => controller.close());
+            stream.on('error', e => controller.error(e));
+        },
+        pull() { stream.resume(); },
+        cancel(reason) {
+            stream.pause();
+            if (typeof (stream as any).cancel === 'function') {
+                return (stream as any).cancel(reason);
+            } else if (typeof (stream as any).destroy === 'function') {
+                return (stream as any).destroy(reason);
+            }
+        }
+    });
+}
diff --git a/js/test/unit/ipc/message-reader-tests.ts b/js/test/unit/ipc/message-reader-tests.ts
new file mode 100644
index 0000000000000..2c49a06bb4901
--- /dev/null
+++ b/js/test/unit/ipc/message-reader-tests.ts
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// import * as fs from 'fs';
+import {
+    generateRandomTables,
+    // generateDictionaryTables
+} from '../../data/tables';
+
+import { ArrowIOTestHelper } from './helpers';
+
+import {
+    Chunked,
+    MessageReader,
+    AsyncMessageReader
+} from '../../Arrow';
+
+for (const table of generateRandomTables([10, 20, 30])) {
+
+    const io = ArrowIOTestHelper.stream(table);
+    const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+    let numMessages = /* schema message */ 1 + table.chunks.length;
+
+    // count dictionary chunks
+    table.schema.dictionaryFields.forEach((fields) => {
+        const vector = fields[0].type.dictionaryVector as Chunked;
+        numMessages += (vector.chunks ? vector.chunks.length : 1);
+    });
+
+    const validate = validateMessageReader.bind(0, numMessages);
+    const validateAsync = validateAsyncMessageReader.bind(0, numMessages);
+
+    describe(`MessageReader (${name})`, () => {
+        describe(`should read all Messages`, () => {
+            test(`Uint8Array`, io.buffer(validate));
+            test(`Iterable`, io.iterable(validate));
+        });
+    });
+
+    describe(`AsyncMessageReader (${name})`, () => {
+        describe(`should read all Messages`, () => {
+            test('AsyncIterable', io.asyncIterable(validateAsync));
+            test('fs.FileHandle', io.fsFileHandle(validateAsync));
+            test('fs.ReadStream', io.fsReadableStream(validateAsync));
+            test('stream.Readable', io.nodeReadableStream(validateAsync));
+            test('whatwg.ReadableStream', io.whatwgReadableStream(validateAsync));
+            test('whatwg.ReadableByteStream', io.whatwgReadableByteStream(validateAsync));
+        });
+    });
+}
+
+export function validateMessageReader(numMessages: number, source: any) {
+    const reader = new MessageReader(source);
+    let index = 0;
+    for (let message of reader) {
+
+        if (index === 0) {
+            expect(message.isSchema()).toBe(true);
+            expect(message.bodyLength).toBe(0);
+        } else {
+            expect(message.isSchema()).toBe(false);
+            expect(message.isRecordBatch() || message.isDictionaryBatch()).toBe(true);
+        }
+
+        try {
+            expect(message.bodyLength % 8).toBe(0);
+        } catch (e) { throw new Error(`bodyLength: ${e}`); }
+
+        const body = reader.readMessageBody(message.bodyLength);
+        expect(body).toBeInstanceOf(Uint8Array);
+        expect(body.byteLength).toBe(message.bodyLength);
+        expect(index++).toBeLessThan(numMessages);
+    }
+    expect(index).toBe(numMessages);
+    reader.return();
+}
+
+export async function validateAsyncMessageReader(numMessages: number, source: any) {
+    const reader = new AsyncMessageReader(source);
+    let index = 0;
+    for await (let message of reader) {
+
+        if (index === 0) {
+            expect(message.isSchema()).toBe(true);
+            expect(message.bodyLength).toBe(0);
+        } else {
+            expect(message.isSchema()).toBe(false);
+            expect(message.isRecordBatch() || message.isDictionaryBatch()).toBe(true);
+        }
+
+        try {
+            expect(message.bodyLength % 8).toBe(0);
+        } catch (e) { throw new Error(`bodyLength: ${e}`); }
+
+        const body = await reader.readMessageBody(message.bodyLength);
+        expect(body).toBeInstanceOf(Uint8Array);
+        expect(body.byteLength).toBe(message.bodyLength);
+        expect(index++).toBeLessThan(numMessages);
+    }
+    expect(index).toBe(numMessages);
+    await reader.return();
+}
diff --git a/js/test/unit/ipc/reader/file-reader-tests.ts b/js/test/unit/ipc/reader/file-reader-tests.ts
new file mode 100644
index 0000000000000..2d784d06589ea
--- /dev/null
+++ b/js/test/unit/ipc/reader/file-reader-tests.ts
@@ -0,0 +1,123 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+    generateRandomTables,
+    // generateDictionaryTables
+} from '../../../data/tables';
+import { ArrowIOTestHelper } from '../helpers';
+import { toArray } from 'ix/asynciterable/toarray';
+
+import {
+    validateRecordBatchReader,
+    validateAsyncRecordBatchReader
+} from '../validate';
+
+import {
+    RecordBatchReader,
+    RecordBatchFileReader,
+    AsyncRecordBatchFileReader
+} from '../../../Arrow';
+
+for (const table of generateRandomTables([10, 20, 30])) {
+
+    const io = ArrowIOTestHelper.file(table);
+    const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+
+    const validate = (source: any) => { validateRecordBatchReader('file', 3, RecordBatchReader.from(source)); };
+    const validateAsync = async (source: any) => { await validateAsyncRecordBatchReader('file', 3, await RecordBatchReader.from(source)); };
+    const validateAsyncWrapped = async (source: any) => { await validateAsyncRecordBatchReader('file', 3, await RecordBatchReader.from(Promise.resolve(source))); };
+
+    describe(`RecordBatchFileReader (${name})`, () => {
+        describe(`should read all RecordBatches`, () => {
+            test(`Uint8Array`, io.buffer(validate));
+            test(`Iterable`, io.iterable(validate));
+        });
+        describe(`should allow random access to record batches after iterating when autoDestroy=false`, () => {
+            test(`Uint8Array`, io.buffer(validateRandomAccess));
+            test(`Iterable`, io.iterable(validateRandomAccess));
+        });
+    });
+
+    describe(`AsyncRecordBatchFileReader (${name})`, () => {
+        describe(`should read all RecordBatches`, () => {
+
+            test('AsyncIterable', io.asyncIterable(validateAsync));
+            test('fs.FileHandle', io.fsFileHandle(validateAsync));
+            test('fs.ReadStream', io.fsReadableStream(validateAsync));
+            test('stream.Readable', io.nodeReadableStream(validateAsync));
+            test('whatwg.ReadableStream', io.whatwgReadableStream(validateAsync));
+            test('whatwg.ReadableByteStream', io.whatwgReadableByteStream(validateAsync));
+
+            test('Promise<AsyncIterable>', io.asyncIterable(validateAsyncWrapped));
+            test('Promise<fs.FileHandle>', io.fsFileHandle(validateAsyncWrapped));
+            test('Promise<fs.ReadStream>', io.fsReadableStream(validateAsyncWrapped));
+            test('Promise<stream.Readable>', io.nodeReadableStream(validateAsyncWrapped));
+            test('Promise<ReadableStream>', io.whatwgReadableStream(validateAsyncWrapped));
+            test('Promise<ReadableByteStream>', io.whatwgReadableByteStream(validateAsyncWrapped));
+        });
+
+        describe(`should allow random access to record batches after iterating when autoDestroy=false`, () => {
+
+            test('AsyncIterable', io.asyncIterable(validateRandomAccessAsync));
+            test('fs.FileHandle', io.fsFileHandle(validateRandomAccessAsync));
+            test('fs.ReadStream', io.fsReadableStream(validateRandomAccessAsync));
+            test('stream.Readable', io.nodeReadableStream(validateRandomAccessAsync));
+            test('whatwg.ReadableStream', io.whatwgReadableStream(validateRandomAccessAsync));
+            test('whatwg.ReadableByteStream', io.whatwgReadableByteStream(validateRandomAccessAsync));
+
+            test('Promise<AsyncIterable>', io.asyncIterable(validateRandomAccessAsync));
+            test('Promise<fs.FileHandle>', io.fsFileHandle(validateRandomAccessAsync));
+            test('Promise<fs.ReadStream>', io.fsReadableStream(validateRandomAccessAsync));
+            test('Promise<stream.Readable>', io.nodeReadableStream(validateRandomAccessAsync));
+            test('Promise<ReadableStream>', io.whatwgReadableStream(validateRandomAccessAsync));
+            test('Promise<ReadableByteStream>', io.whatwgReadableByteStream(validateRandomAccessAsync));
+        });
+    });
+}
+
+function validateRandomAccess(source: any) {
+    const reader = RecordBatchReader.from(source) as RecordBatchFileReader;
+    const schema = reader.open({ autoDestroy: false }).schema;
+    const batches = [...reader];
+    expect(reader.closed).toBe(false);
+    expect(reader.schema).toBe(schema);
+    while (batches.length > 0) {
+        const expected = batches.pop()!;
+        const actual = reader.readRecordBatch(batches.length);
+        expect(actual).toEqualRecordBatch(expected);
+    }
+    reader.cancel();
+    expect(reader.closed).toBe(true);
+    expect(reader.schema).toBeUndefined();
+}
+
+async function validateRandomAccessAsync(source: any) {
+    const reader = (await RecordBatchReader.from(source)) as AsyncRecordBatchFileReader;
+    const schema = (await reader.open({ autoDestroy: false })).schema;
+    const batches = await toArray(reader);
+    expect(reader.closed).toBe(false);
+    expect(reader.schema).toBe(schema);
+    while (batches.length > 0) {
+        const expected = batches.pop()!;
+        const actual = await reader.readRecordBatch(batches.length);
+        expect(actual).toEqualRecordBatch(expected);
+    }
+    await reader.cancel();
+    expect(reader.closed).toBe(true);
+    expect(reader.schema).toBeUndefined();
+}
diff --git a/js/test/unit/ipc/reader/from-inference-tests.ts b/js/test/unit/ipc/reader/from-inference-tests.ts
new file mode 100644
index 0000000000000..a901990ceeffe
--- /dev/null
+++ b/js/test/unit/ipc/reader/from-inference-tests.ts
@@ -0,0 +1,152 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+    generateRandomTables,
+    // generateDictionaryTables
+} from '../../../data/tables';
+
+import { ArrowIOTestHelper } from '../helpers';
+import {
+    RecordBatchReader,
+    RecordBatchFileReader,
+    RecordBatchStreamReader,
+    AsyncRecordBatchFileReader,
+    AsyncRecordBatchStreamReader
+} from '../../../Arrow';
+
+/* tslint:disable */
+const { parse: bignumJSONParse } = require('json-bignum');
+
+for (const table of generateRandomTables([10, 20, 30])) {
+    const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+    describe('RecordBatchReader.from', ((table, name) => () => {
+        testFromFile(ArrowIOTestHelper.file(table), name);
+        testFromJSON(ArrowIOTestHelper.json(table), name);
+        testFromStream(ArrowIOTestHelper.stream(table), name);
+    })(table, name));
+}
+
+function testFromJSON(io: ArrowIOTestHelper, name: string) {
+    describe(`should return a RecordBatchJSONReader (${name})`, () => {
+        test(`Uint8Array`, io.buffer((buffer) => {
+            const json = bignumJSONParse(`${Buffer.from(buffer)}`);
+            const reader = RecordBatchReader.from(json);
+            expect(reader.isSync()).toEqual(true);
+            expect(reader.isAsync()).toEqual(false);
+            expect(reader).toBeInstanceOf(RecordBatchStreamReader);
+        }));
+    });
+}
+
+function testFromFile(io: ArrowIOTestHelper, name: string) {
+
+    describe(`should return a RecordBatchFileReader (${name})`, () => {
+
+        test(`Uint8Array`, io.buffer(syncSync));
+        test(`Iterable`, io.iterable(syncSync));
+        test('AsyncIterable', io.asyncIterable(asyncSync));
+        test('fs.FileHandle', io.fsFileHandle(asyncAsync));
+        test('fs.ReadStream', io.fsReadableStream(asyncSync));
+        test('stream.Readable', io.nodeReadableStream(asyncSync));
+        test('whatwg.ReadableStream', io.whatwgReadableStream(asyncSync));
+        test('whatwg.ReadableByteStream', io.whatwgReadableByteStream(asyncSync));
+
+        test(`Promise<Uint8Array>`, io.buffer((source) => asyncSync(Promise.resolve(source))));
+        test(`Promise<Iterable>`, io.iterable((source) => asyncSync(Promise.resolve(source))));
+        test('Promise<AsyncIterable>', io.asyncIterable((source) => asyncSync(Promise.resolve(source))));
+        test('Promise<fs.FileHandle>', io.fsFileHandle((source) => asyncAsync(Promise.resolve(source))));
+        test('Promise<fs.ReadStream>', io.fsReadableStream((source) => asyncSync(Promise.resolve(source))));
+        test('Promise<stream.Readable>', io.nodeReadableStream((source) => asyncSync(Promise.resolve(source))));
+        test('Promise<whatwg.ReadableStream>', io.whatwgReadableStream((source) => asyncSync(Promise.resolve(source))));
+        test('Promise<whatwg.ReadableByteStream>', io.whatwgReadableByteStream((source) => asyncSync(Promise.resolve(source))));
+    });
+
+    function syncSync(source: any) {
+        const reader = RecordBatchReader.from(source);
+        expect(reader.isSync()).toEqual(true);
+        expect(reader.isAsync()).toEqual(false);
+        expect(reader).toBeInstanceOf(RecordBatchFileReader);
+    }
+
+    async function asyncSync(source: any) {
+        const pending = RecordBatchReader.from(source);
+        expect(pending).toBeInstanceOf(Promise);
+        const reader = await pending;
+        expect(reader.isSync()).toEqual(true);
+        expect(reader.isAsync()).toEqual(false);
+        expect(reader).toBeInstanceOf(RecordBatchFileReader);
+    }
+
+    async function asyncAsync(source: any) {
+        const pending = RecordBatchReader.from(source);
+        expect(pending).toBeInstanceOf(Promise);
+        const reader = await pending;
+        expect(reader.isSync()).toEqual(false);
+        expect(reader.isAsync()).toEqual(true);
+        expect(reader).toBeInstanceOf(AsyncRecordBatchFileReader);
+    }
+}
+
+function testFromStream(io: ArrowIOTestHelper, name: string) {
+
+    describe(`should return a RecordBatchStreamReader (${name})`, () => {
+
+        test(`Uint8Array`, io.buffer(syncSync));
+        test(`Iterable`, io.iterable(syncSync));
+        test('AsyncIterable', io.asyncIterable(asyncAsync));
+        test('fs.FileHandle', io.fsFileHandle(asyncAsync));
+        test('fs.ReadStream', io.fsReadableStream(asyncAsync));
+        test('stream.Readable', io.nodeReadableStream(asyncAsync));
+        test('whatwg.ReadableStream', io.whatwgReadableStream(asyncAsync));
+        test('whatwg.ReadableByteStream', io.whatwgReadableByteStream(asyncAsync));
+
+        test(`Promise<Uint8Array>`, io.buffer((source) => asyncSync(Promise.resolve(source))));
+        test(`Promise<Iterable>`, io.iterable((source) => asyncSync(Promise.resolve(source))));
+        test('Promise<AsyncIterable>', io.asyncIterable((source) => asyncAsync(Promise.resolve(source))));
+        test('Promise<fs.FileHandle>', io.fsFileHandle((source) => asyncAsync(Promise.resolve(source))));
+        test('Promise<fs.ReadStream>', io.fsReadableStream((source) => asyncAsync(Promise.resolve(source))));
+        test('Promise<stream.Readable>', io.nodeReadableStream((source) => asyncAsync(Promise.resolve(source))));
+        test('Promise<whatwg.ReadableStream>', io.whatwgReadableStream((source) => asyncAsync(Promise.resolve(source))));
+        test('Promise<whatwg.ReadableByteStream>', io.whatwgReadableByteStream((source) => asyncAsync(Promise.resolve(source))));
+    });
+
+    function syncSync(source: any) {
+        const reader = RecordBatchReader.from(source);
+        expect(reader.isSync()).toEqual(true);
+        expect(reader.isAsync()).toEqual(false);
+        expect(reader).toBeInstanceOf(RecordBatchStreamReader);
+    }
+
+    async function asyncSync(source: any) {
+        const pending = RecordBatchReader.from(source);
+        expect(pending).toBeInstanceOf(Promise);
+        const reader = await pending;
+        expect(reader.isSync()).toEqual(true);
+        expect(reader.isAsync()).toEqual(false);
+        expect(reader).toBeInstanceOf(RecordBatchStreamReader);
+    }
+
+    async function asyncAsync(source: any) {
+        const pending = RecordBatchReader.from(source);
+        expect(pending).toBeInstanceOf(Promise);
+        const reader = await pending;
+        expect(reader.isSync()).toEqual(false);
+        expect(reader.isAsync()).toEqual(true);
+        expect(reader).toBeInstanceOf(AsyncRecordBatchStreamReader);
+    }
+}
diff --git a/js/test/unit/ipc/reader/json-reader-tests.ts b/js/test/unit/ipc/reader/json-reader-tests.ts
new file mode 100644
index 0000000000000..7a223f03fa5a0
--- /dev/null
+++ b/js/test/unit/ipc/reader/json-reader-tests.ts
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+    generateRandomTables,
+    // generateDictionaryTables
+} from '../../../data/tables';
+
+import { ArrowIOTestHelper } from '../helpers';
+import { RecordBatchReader } from '../../../Arrow';
+import { validateRecordBatchReader } from '../validate';
+
+/* tslint:disable */
+const { parse: bignumJSONParse } = require('json-bignum');
+
+for (const table of generateRandomTables([10, 20, 30])) {
+
+    const io = ArrowIOTestHelper.json(table);
+    const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+
+    describe(`RecordBatchJSONReader (${name})`, () => {
+        describe(`should read all RecordBatches`, () => {
+            test(`Uint8Array`, io.buffer((buffer) => {
+                const json = bignumJSONParse(Buffer.from(buffer).toString());
+                validateRecordBatchReader('json', 3, RecordBatchReader.from(json));
+            }));
+        });
+    });
+}
diff --git a/js/test/unit/ipc/reader/stream-reader-tests.ts b/js/test/unit/ipc/reader/stream-reader-tests.ts
new file mode 100644
index 0000000000000..ae7bbfbf98a12
--- /dev/null
+++ b/js/test/unit/ipc/reader/stream-reader-tests.ts
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+    generateRandomTables,
+    // generateDictionaryTables
+} from '../../../data/tables';
+
+import {
+    validateRecordBatchReader,
+    validateAsyncRecordBatchReader
+} from '../validate';
+
+import { ArrowIOTestHelper } from '../helpers';
+import { RecordBatchReader } from '../../../Arrow';
+
+for (const table of generateRandomTables([10, 20, 30])) {
+
+    const io = ArrowIOTestHelper.stream(table);
+    const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+
+    const validate = (source: any) => { validateRecordBatchReader('stream', 3, RecordBatchReader.from(source)); };
+    const validateAsync = async (source: any) => { await validateAsyncRecordBatchReader('stream', 3, await RecordBatchReader.from(source)); };
+    const validateAsyncWrapped = async (source: any) => { await validateAsyncRecordBatchReader('stream', 3, await RecordBatchReader.from(Promise.resolve(source))); };
+
+    describe(`RecordBatchStreamReader (${name})`, () => {
+        describe(`should read all RecordBatches`, () => {
+            test(`Uint8Array`, io.buffer(validate));
+            test(`Iterable`, io.iterable(validate));
+        });
+    });
+
+    describe(`AsyncRecordBatchStreamReader (${name})`, () => {
+        describe(`should read all RecordBatches`, () => {
+
+            test('AsyncIterable', io.asyncIterable(validateAsync));
+            test('fs.FileHandle', io.fsFileHandle(validateAsync));
+            test('fs.ReadStream', io.fsReadableStream(validateAsync));
+            test('stream.Readable', io.nodeReadableStream(validateAsync));
+            test('whatwg.ReadableStream', io.whatwgReadableStream(validateAsync));
+            test('whatwg.ReadableByteStream', io.whatwgReadableByteStream(validateAsync));
+
+            test('Promise<AsyncIterable>', io.asyncIterable(validateAsyncWrapped));
+            test('Promise<fs.FileHandle>', io.fsFileHandle(validateAsyncWrapped));
+            test('Promise<fs.ReadStream>', io.fsReadableStream(validateAsyncWrapped));
+            test('Promise<stream.Readable>', io.nodeReadableStream(validateAsyncWrapped));
+            test('Promise<ReadableStream>', io.whatwgReadableStream(validateAsyncWrapped));
+            test('Promise<ReadableByteStream>', io.whatwgReadableByteStream(validateAsyncWrapped));
+        });
+    });
+}
diff --git a/js/test/unit/ipc/reader/streams-dom-tests.ts b/js/test/unit/ipc/reader/streams-dom-tests.ts
new file mode 100644
index 0000000000000..e47186716b354
--- /dev/null
+++ b/js/test/unit/ipc/reader/streams-dom-tests.ts
@@ -0,0 +1,189 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+    generateRandomTables,
+    // generateDictionaryTables
+} from '../../../data/tables';
+
+import {
+    Table,
+    RecordBatchReader,
+    RecordBatchStreamWriter
+} from '../../../Arrow';
+
+import { validateRecordBatchAsyncIterator } from '../validate';
+import { ArrowIOTestHelper, readableDOMStreamToAsyncIterator } from '../helpers';
+
+(() => {
+
+    if (process.env.TEST_DOM_STREAMS !== 'true') {
+        return test('not testing DOM streams because process.env.TEST_DOM_STREAMS !== "true"', () => {});
+    }
+
+    /* tslint:disable */
+    const { parse: bignumJSONParse } = require('json-bignum');
+    /* tslint:disable */
+    const { concatStream } = require('web-stream-tools').default;
+
+    for (const table of generateRandomTables([10, 20, 30])) {
+
+        const file = ArrowIOTestHelper.file(table);
+        const json = ArrowIOTestHelper.json(table);
+        const stream = ArrowIOTestHelper.stream(table);
+        const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+
+        describe(`RecordBatchReader.throughDOM (${name})`, () => {
+            describe('file', () => {
+                test('ReadableStream', file.whatwgReadableStream(validate));
+                test('ReadableByteStream', file.whatwgReadableByteStream(validate));
+            });
+            describe('stream', () => {
+                test('ReadableStream', stream.whatwgReadableStream(validate));
+                test('ReadableByteStream', stream.whatwgReadableByteStream(validate));
+            });
+            async function validate(source: ReadableStream) {
+                const stream = source.pipeThrough(RecordBatchReader.throughDOM());
+                await validateRecordBatchAsyncIterator(3, readableDOMStreamToAsyncIterator(stream));
+            }
+        });
+
+        describe(`toDOMStream (${name})`, () => {
+
+            describe(`RecordBatchJSONReader`, () => {
+                test('Uint8Array', json.buffer((source) => validate(bignumJSONParse(`${Buffer.from(source)}`))));
+            });
+
+            describe(`RecordBatchFileReader`, () => {
+                test(`Uint8Array`, file.buffer(validate));
+                test(`Iterable`, file.iterable(validate));
+                test('AsyncIterable', file.asyncIterable(validate));
+                test('fs.FileHandle', file.fsFileHandle(validate));
+                test('fs.ReadStream', file.fsReadableStream(validate));
+                test('stream.Readable', file.nodeReadableStream(validate));
+                test('whatwg.ReadableStream', file.whatwgReadableStream(validate));
+                test('whatwg.ReadableByteStream', file.whatwgReadableByteStream(validate));
+                test('Promise<AsyncIterable>', file.asyncIterable((source) => validate(Promise.resolve(source))));
+                test('Promise<fs.FileHandle>', file.fsFileHandle((source) => validate(Promise.resolve(source))));
+                test('Promise<fs.ReadStream>', file.fsReadableStream((source) => validate(Promise.resolve(source))));
+                test('Promise<stream.Readable>', file.nodeReadableStream((source) => validate(Promise.resolve(source))));
+                test('Promise<ReadableStream>', file.whatwgReadableStream((source) => validate(Promise.resolve(source))));
+                test('Promise<ReadableByteStream>', file.whatwgReadableByteStream((source) => validate(Promise.resolve(source))));
+            });
+
+            describe(`RecordBatchStreamReader`, () => {
+                test(`Uint8Array`, stream.buffer(validate));
+                test(`Iterable`, stream.iterable(validate));
+                test('AsyncIterable', stream.asyncIterable(validate));
+                test('fs.FileHandle', stream.fsFileHandle(validate));
+                test('fs.ReadStream', stream.fsReadableStream(validate));
+                test('stream.Readable', stream.nodeReadableStream(validate));
+                test('whatwg.ReadableStream', stream.whatwgReadableStream(validate));
+                test('whatwg.ReadableByteStream', stream.whatwgReadableByteStream(validate));
+                test('Promise<AsyncIterable>', stream.asyncIterable((source) => validate(Promise.resolve(source))));
+                test('Promise<fs.FileHandle>', stream.fsFileHandle((source) => validate(Promise.resolve(source))));
+                test('Promise<fs.ReadStream>', stream.fsReadableStream((source) => validate(Promise.resolve(source))));
+                test('Promise<stream.Readable>', stream.nodeReadableStream((source) => validate(Promise.resolve(source))));
+                test('Promise<ReadableStream>', stream.whatwgReadableStream((source) => validate(Promise.resolve(source))));
+                test('Promise<ReadableByteStream>', stream.whatwgReadableByteStream((source) => validate(Promise.resolve(source))));
+            });
+
+            async function validate(source: any) {
+                const reader: RecordBatchReader = await RecordBatchReader.from(source);
+                const iterator = readableDOMStreamToAsyncIterator(reader.toDOMStream());
+                await validateRecordBatchAsyncIterator(3, iterator);
+            }
+        });
+    }
+
+    it('should not close the underlying WhatWG ReadableStream when reading multiple tables to completion', async () => {
+
+        expect.hasAssertions();
+
+        const tables = [...generateRandomTables([10, 20, 30])];
+
+        const stream = concatStream(tables.map((table, i) =>
+            RecordBatchStreamWriter.writeAll(table).toDOMStream({
+                // Alternate between bytes mode and regular mode because code coverage
+                type: i % 2 === 0 ? 'bytes' : undefined
+            })
+        )) as ReadableStream<Uint8Array>;
+
+        let tableIndex = -1;
+        let reader = await RecordBatchReader.from(stream);
+
+        validateStreamState(reader, stream, false);
+
+        for await (reader of RecordBatchReader.readAll(reader)) {
+
+            validateStreamState(reader, stream, false);
+
+            const sourceTable = tables[++tableIndex];
+            const streamTable = await Table.from(reader);
+            expect(streamTable).toEqualTable(sourceTable);
+        }
+
+        validateStreamState(reader, stream, true);
+        expect(tableIndex).toBe(tables.length - 1);
+    });
+
+    it('should close the underlying WhatWG ReadableStream when reading multiple tables and we break early', async () => {
+
+        expect.hasAssertions();
+
+        const tables = [...generateRandomTables([10, 20, 30])];
+
+        const stream = concatStream(tables.map((table, i) =>
+            RecordBatchStreamWriter.writeAll(table).toDOMStream({
+                // Alternate between bytes mode and regular mode because code coverage
+                type: i % 2 === 0 ? 'bytes' : undefined
+            })
+        )) as ReadableStream<Uint8Array>;
+
+        let tableIndex = -1;
+        let reader = await RecordBatchReader.from(stream);
+
+        validateStreamState(reader, stream, false);
+
+        for await (reader of RecordBatchReader.readAll(reader)) {
+
+            validateStreamState(reader, stream, false);
+
+            let batchIndex = -1;
+            const sourceTable = tables[++tableIndex];
+            const breakEarly = tableIndex === (tables.length / 2 | 0);
+
+            for await (const streamBatch of reader) {
+                expect(streamBatch).toEqualRecordBatch(sourceTable.chunks[++batchIndex]);
+                if (breakEarly && batchIndex === 1) { break; }
+            }
+            if (breakEarly) {
+                // the reader should stay open until we break from the outermost loop
+                validateStreamState(reader, stream, false);
+                break;
+            }
+        }
+
+        validateStreamState(reader, stream, true);
+        expect(tableIndex).toBe(tables.length / 2 | 0);
+    });
+})();
+
+function validateStreamState(reader: RecordBatchReader, stream: ReadableStream, closed: boolean) {
+    expect(reader.closed).toBe(closed);
+    expect(stream.locked).toBe(!closed);
+}
diff --git a/js/test/unit/ipc/reader/streams-node-tests.ts b/js/test/unit/ipc/reader/streams-node-tests.ts
new file mode 100644
index 0000000000000..62c947a3f170f
--- /dev/null
+++ b/js/test/unit/ipc/reader/streams-node-tests.ts
@@ -0,0 +1,188 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+    generateRandomTables,
+    // generateDictionaryTables
+} from '../../../data/tables';
+
+import {
+    Table,
+    RecordBatchReader,
+    RecordBatchStreamWriter
+} from '../../../Arrow';
+
+import { ArrowIOTestHelper } from '../helpers';
+import { validateRecordBatchAsyncIterator } from '../validate';
+
+(() => {
+
+    if (process.env.TEST_NODE_STREAMS !== 'true') {
+        return test('not testing node streams because process.env.TEST_NODE_STREAMS !== "true"', () => {});
+    }
+
+    /* tslint:disable */
+    const stream = require('stream');
+    /* tslint:disable */
+    const { parse: bignumJSONParse } = require('json-bignum');
+    /* tslint:disable */
+    const concatStream = ((multistream) => (...xs: any[]) =>
+        new stream.Readable().wrap(multistream(...xs))
+    )(require('multistream'));
+
+    for (const table of generateRandomTables([10, 20, 30])) {
+
+        const file = ArrowIOTestHelper.file(table);
+        const json = ArrowIOTestHelper.json(table);
+        const stream = ArrowIOTestHelper.stream(table);
+        const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+
+        describe(`RecordBatchReader.throughNode (${name})`, () => {
+            describe('file', () => {
+                test('fs.ReadStream', file.fsReadableStream(validate));
+                test('stream.Readable', file.nodeReadableStream(validate));
+            });
+            describe('stream', () => {
+                test('fs.ReadStream', file.fsReadableStream(validate));
+                test('stream.Readable', file.nodeReadableStream(validate));
+            });
+            async function validate(source: NodeJS.ReadableStream) {
+                const stream = source.pipe(RecordBatchReader.throughNode());
+                await validateRecordBatchAsyncIterator(3, stream[Symbol.asyncIterator]());
+            }
+        });
+
+        describe(`toNodeStream (${name})`, () => {
+
+            describe(`RecordBatchJSONReader`, () => {
+                test('Uint8Array', json.buffer((source) => validate(bignumJSONParse(`${Buffer.from(source)}`))));
+            });
+
+            describe(`RecordBatchFileReader`, () => {
+                test(`Uint8Array`, file.buffer(validate));
+                test(`Iterable`, file.iterable(validate));
+                test('AsyncIterable', file.asyncIterable(validate));
+                test('fs.FileHandle', file.fsFileHandle(validate));
+                test('fs.ReadStream', file.fsReadableStream(validate));
+                test('stream.Readable', file.nodeReadableStream(validate));
+                test('whatwg.ReadableStream', file.whatwgReadableStream(validate));
+                test('whatwg.ReadableByteStream', file.whatwgReadableByteStream(validate));
+                test('Promise<AsyncIterable>', file.asyncIterable((source) => validate(Promise.resolve(source))));
+                test('Promise<fs.FileHandle>', file.fsFileHandle((source) => validate(Promise.resolve(source))));
+                test('Promise<fs.ReadStream>', file.fsReadableStream((source) => validate(Promise.resolve(source))));
+                test('Promise<stream.Readable>', file.nodeReadableStream((source) => validate(Promise.resolve(source))));
+                test('Promise<ReadableStream>', file.whatwgReadableStream((source) => validate(Promise.resolve(source))));
+                test('Promise<ReadableByteStream>', file.whatwgReadableByteStream((source) => validate(Promise.resolve(source))));
+            });
+
+            describe(`RecordBatchStreamReader`, () => {
+                test(`Uint8Array`, stream.buffer(validate));
+                test(`Iterable`, stream.iterable(validate));
+                test('AsyncIterable', stream.asyncIterable(validate));
+                test('fs.FileHandle', stream.fsFileHandle(validate));
+                test('fs.ReadStream', stream.fsReadableStream(validate));
+                test('stream.Readable', stream.nodeReadableStream(validate));
+                test('whatwg.ReadableStream', stream.whatwgReadableStream(validate));
+                test('whatwg.ReadableByteStream', stream.whatwgReadableByteStream(validate));
+                test('Promise<AsyncIterable>', stream.asyncIterable((source) => validate(Promise.resolve(source))));
+                test('Promise<fs.FileHandle>', stream.fsFileHandle((source) => validate(Promise.resolve(source))));
+                test('Promise<fs.ReadStream>', stream.fsReadableStream((source) => validate(Promise.resolve(source))));
+                test('Promise<stream.Readable>', stream.nodeReadableStream((source) => validate(Promise.resolve(source))));
+                test('Promise<ReadableStream>', stream.whatwgReadableStream((source) => validate(Promise.resolve(source))));
+                test('Promise<ReadableByteStream>', stream.whatwgReadableByteStream((source) => validate(Promise.resolve(source))));
+            });
+
+            async function validate(source: any) {
+                const reader: RecordBatchReader = await RecordBatchReader.from(source);
+                await validateRecordBatchAsyncIterator(3, reader.toNodeStream()[Symbol.asyncIterator]());
+            }
+        });
+    }
+
+    it('should not close the underlying NodeJS ReadableStream when reading multiple tables to completion', async () => {
+
+        expect.hasAssertions();
+
+        const tables = [...generateRandomTables([10, 20, 30])];
+
+        const stream = concatStream(tables.map((table) =>
+            () => RecordBatchStreamWriter.writeAll(table).toNodeStream()
+        )) as NodeJS.ReadableStream;
+
+        let tableIndex = -1;
+        let reader = await RecordBatchReader.from(stream);
+
+        validateStreamState(reader, stream, false);
+
+        for await (reader of RecordBatchReader.readAll(reader)) {
+
+            validateStreamState(reader, stream, false);
+
+            const sourceTable = tables[++tableIndex];
+            const streamTable = await Table.from(reader);
+            expect(streamTable).toEqualTable(sourceTable);
+        }
+
+        validateStreamState(reader, stream, true);
+        expect(tableIndex).toBe(tables.length - 1);
+    });
+
+    it('should close the underlying NodeJS ReadableStream when reading multiple tables and we break early', async () => {
+
+        expect.hasAssertions();
+
+        const tables = [...generateRandomTables([10, 20, 30])];
+
+        const stream = concatStream(tables.map((table) =>
+            () => RecordBatchStreamWriter.writeAll(table).toNodeStream()
+        )) as NodeJS.ReadableStream;
+
+        let tableIndex = -1;
+        let reader = await RecordBatchReader.from(stream);
+
+        validateStreamState(reader, stream, false);
+
+        for await (reader of RecordBatchReader.readAll(reader)) {
+
+            validateStreamState(reader, stream, false);
+
+            let batchIndex = -1;
+            const sourceTable = tables[++tableIndex];
+            const breakEarly = tableIndex === (tables.length / 2 | 0);
+
+            for await (const streamBatch of reader) {
+                expect(streamBatch).toEqualRecordBatch(sourceTable.chunks[++batchIndex]);
+                if (breakEarly && batchIndex === 1) { break; }
+            }
+            if (breakEarly) {
+                // the reader should stay open until we break from the outermost loop
+                validateStreamState(reader, stream, false);
+                break;
+            }
+        }
+
+        validateStreamState(reader, stream, true, true);
+        expect(tableIndex).toBe(tables.length / 2 | 0);
+    });
+})();
+
+function validateStreamState(reader: RecordBatchReader, stream: NodeJS.ReadableStream, closed: boolean, readable = !closed) {
+    expect(reader.closed).toBe(closed);
+    expect(stream.readable).toBe(readable);
+    expect((stream as any).destroyed).toBe(closed);
+    expect((stream as any).readableFlowing).toBe(false);
+}
diff --git a/js/test/unit/ipc/validate.ts b/js/test/unit/ipc/validate.ts
new file mode 100644
index 0000000000000..27e1e03e69e31
--- /dev/null
+++ b/js/test/unit/ipc/validate.ts
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import '../../jest-extensions';
+
+import {
+    Schema,
+    RecordBatch,
+    RecordBatchReader,
+    RecordBatchFileReader,
+    RecordBatchStreamReader,
+} from '../../Arrow';
+
+export function validateRecordBatchReader<T extends RecordBatchFileReader | RecordBatchStreamReader>(type: 'json' | 'file' | 'stream', numBatches: number, r: T) {
+    const reader = r.open();
+    expect(reader).toBeInstanceOf(RecordBatchReader);
+    expect(type === 'file' ? reader.isFile() : reader.isStream()).toBe(true);
+    expect(reader.schema).toBeInstanceOf(Schema);
+    validateRecordBatchIterator(numBatches, reader[Symbol.iterator]());
+    expect(reader.closed).toBe(reader.autoDestroy);
+    return reader;
+}
+
+export async function validateAsyncRecordBatchReader<T extends RecordBatchReader>(type: 'json' | 'file' | 'stream', numBatches: number, r: T) {
+    const reader = await r.open();
+    expect(reader).toBeInstanceOf(RecordBatchReader);
+    expect(reader.schema).toBeInstanceOf(Schema);
+    expect(type === 'file' ? reader.isFile() : reader.isStream()).toBe(true);
+    await validateRecordBatchAsyncIterator(numBatches, reader[Symbol.asyncIterator]());
+    expect(reader.closed).toBe(reader.autoDestroy);
+    return reader;
+}
+
+export function validateRecordBatchIterator(numBatches: number, iterator: Iterable<RecordBatch> | IterableIterator<RecordBatch>) {
+    let i = 0;
+    try {
+        for (const recordBatch of iterator) {
+            expect(recordBatch).toBeInstanceOf(RecordBatch);
+            expect(i++).toBeLessThan(numBatches);
+        }
+    } catch (e) { throw new Error(`${i}: ${e}`); }
+    expect(i).toBe(numBatches);
+    if (typeof (iterator as any).return === 'function') {
+        (iterator as any).return();
+    }
+}
+
+export async function validateRecordBatchAsyncIterator(numBatches: number, iterator: AsyncIterable<RecordBatch> | AsyncIterableIterator<RecordBatch>) {
+    let i = 0;
+    try {
+        for await (const recordBatch of iterator) {
+            expect(recordBatch).toBeInstanceOf(RecordBatch);
+            expect(i++).toBeLessThan(numBatches);
+        }
+    } catch (e) { throw new Error(`${i}: ${e}`); }
+    expect(i).toBe(numBatches);
+    if (typeof (iterator as any).return === 'function') {
+        await (iterator as any).return();
+    }
+}
diff --git a/js/test/unit/ipc/writer/file-writer-tests.ts b/js/test/unit/ipc/writer/file-writer-tests.ts
new file mode 100644
index 0000000000000..81066462304b9
--- /dev/null
+++ b/js/test/unit/ipc/writer/file-writer-tests.ts
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+    generateRandomTables,
+    generateDictionaryTables
+} from '../../../data/tables';
+
+import { validateRecordBatchIterator } from '../validate';
+import { Table, RecordBatchFileWriter } from '../../../Arrow';
+
+describe('RecordBatchFileWriter', () => {
+    for (const table of generateRandomTables([10, 20, 30])) {
+        testFileWriter(table, `[${table.schema.fields.join(', ')}]`);
+    }
+    for (const table of generateDictionaryTables([10, 20, 30])) {
+        testFileWriter(table, `${table.schema.fields[0]}`);
+    }
+});
+
+function testFileWriter(table: Table, name: string) {
+    describe(`should write the Arrow IPC file format (${name})`, () => {
+        test(`Table`, validateTable.bind(0, table));
+    });
+}
+
+async function validateTable(source: Table) {
+    const writer = RecordBatchFileWriter.writeAll(source);
+    const result = await Table.from(writer.toUint8Array());
+    validateRecordBatchIterator(3, source.chunks);
+    expect(result).toEqualTable(source);
+}
diff --git a/js/test/unit/ipc/writer/json-writer-tests.ts b/js/test/unit/ipc/writer/json-writer-tests.ts
new file mode 100644
index 0000000000000..d18cd9141176f
--- /dev/null
+++ b/js/test/unit/ipc/writer/json-writer-tests.ts
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+    generateRandomTables,
+    generateDictionaryTables
+} from '../../../data/tables';
+
+import { validateRecordBatchIterator } from '../validate';
+import { Table, RecordBatchJSONWriter } from '../../../Arrow';
+
+/* tslint:disable */
+const { parse: bignumJSONParse } = require('json-bignum');
+
+describe('RecordBatchJSONWriter', () => {
+    for (const table of generateRandomTables([10, 20, 30])) {
+        testJSONWriter(table, `[${table.schema.fields.join(', ')}]`);
+    }
+    for (const table of generateDictionaryTables([10, 20, 30])) {
+        testJSONWriter(table, `${table.schema.fields[0]}`);
+    }
+});
+
+function testJSONWriter(table: Table, name: string) {
+    describe(`should write the Arrow IPC JSON format (${name})`, () => {
+        test(`Table`, validateTable.bind(0, table));
+    });
+}
+
+async function validateTable(source: Table) {
+    const writer = RecordBatchJSONWriter.writeAll(source);
+    const result = Table.from(bignumJSONParse(await writer.toString()));
+    validateRecordBatchIterator(3, source.chunks);
+    expect(result).toEqualTable(source);
+}
diff --git a/js/test/unit/ipc/writer/stream-writer-tests.ts b/js/test/unit/ipc/writer/stream-writer-tests.ts
new file mode 100644
index 0000000000000..9bafb569cd8b0
--- /dev/null
+++ b/js/test/unit/ipc/writer/stream-writer-tests.ts
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+    generateRandomTables,
+    generateDictionaryTables
+} from '../../../data/tables';
+
+import { validateRecordBatchIterator } from '../validate';
+import { Table, RecordBatchReader, RecordBatchStreamWriter } from '../../../Arrow';
+
+describe('RecordBatchStreamWriter', () => {
+
+    for (const table of generateRandomTables([10, 20, 30])) {
+        testStreamWriter(table, `[${table.schema.fields.join(', ')}]`);
+    }
+
+    for (const table of generateDictionaryTables([10, 20, 30])) {
+        testStreamWriter(table, `${table.schema.fields[0]}`);
+    }
+
+    test(`should write multiple tables to the same output stream`, async () => {
+        const tables = [] as Table[];
+        const writer = new RecordBatchStreamWriter({ autoDestroy: false });
+        const validate = (async () => {
+            for await (const reader of RecordBatchReader.readAll(writer)) {
+                const sourceTable = tables.shift()!;
+                const streamTable = await Table.from(reader);
+                expect(streamTable).toEqualTable(sourceTable);
+            }
+        })();
+        for (const table of generateRandomTables([10, 20, 30])) {
+            tables.push(table);
+            await writer.writeAll((async function*() {
+                for (const chunk of table.chunks) {
+                    yield chunk; // insert some asynchrony
+                    await new Promise((r) => setTimeout(r, 1));
+                }
+            }()));
+        }
+        writer.close();
+        await validate;
+    });
+});
+
+function testStreamWriter(table: Table, name: string) {
+    describe(`should write the Arrow IPC stream format (${name})`, () => {
+        test(`Table`, validateTable.bind(0, table));
+    });
+}
+
+async function validateTable(source: Table) {
+    const writer = RecordBatchStreamWriter.writeAll(source);
+    const result = await Table.from(writer.toUint8Array());
+    validateRecordBatchIterator(3, source.chunks);
+    expect(result).toEqualTable(source);
+}
diff --git a/js/test/unit/ipc/writer/streams-dom-tests.ts b/js/test/unit/ipc/writer/streams-dom-tests.ts
new file mode 100644
index 0000000000000..cc6e96100ffda
--- /dev/null
+++ b/js/test/unit/ipc/writer/streams-dom-tests.ts
@@ -0,0 +1,283 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+    generateRandomTables,
+    // generateDictionaryTables
+} from '../../../data/tables';
+
+import { AsyncIterable } from 'ix';
+
+import {
+    Table,
+    RecordBatchReader,
+    RecordBatchWriter,
+    RecordBatchFileWriter,
+    RecordBatchJSONWriter,
+    RecordBatchStreamWriter,
+} from '../../../Arrow';
+
+import {
+    nodeToDOMStream,
+    ArrowIOTestHelper,
+    concatBuffersAsync,
+    readableDOMStreamToAsyncIterator
+} from '../helpers';
+
+import {
+    validateRecordBatchReader,
+    validateAsyncRecordBatchReader,
+    validateRecordBatchAsyncIterator
+} from '../validate';
+
+(() => {
+
+    if (process.env.TEST_DOM_STREAMS !== 'true') {
+        return test('not testing DOM streams because process.env.TEST_DOM_STREAMS !== "true"', () => {});
+    }
+
+    /* tslint:disable */
+    const { PassThrough } = require('stream');
+
+    /* tslint:disable */
+    const { parse: bignumJSONParse } = require('json-bignum');
+
+    for (const table of generateRandomTables([10, 20, 30])) {
+
+        const file = ArrowIOTestHelper.file(table);
+        const json = ArrowIOTestHelper.json(table);
+        const stream = ArrowIOTestHelper.stream(table);
+        const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+
+        describe(`RecordBatchWriter.throughDOM (${name})`, () => {
+
+            describe('file', () => {
+                describe(`convert`, () => {
+                    test('ReadableStream', file.whatwgReadableStream(validateConvert.bind(0, RecordBatchStreamWriter)));
+                    test('ReadableByteStream', file.whatwgReadableByteStream(validateConvert.bind(0, RecordBatchStreamWriter)));
+                });
+                describe(`through`, () => {
+                    test('ReadableStream', file.whatwgReadableStream(validateThrough.bind(0, RecordBatchFileWriter)));
+                    test('ReadableByteStream', file.whatwgReadableByteStream(validateThrough.bind(0, RecordBatchFileWriter)));
+                });
+            });
+
+            describe('stream', () => {
+                describe(`convert`, () => {
+                    test('ReadableStream', stream.whatwgReadableStream(validateConvert.bind(0, RecordBatchFileWriter)));
+                    test('ReadableByteStream', stream.whatwgReadableByteStream(validateConvert.bind(0, RecordBatchFileWriter)));
+                });
+                describe(`through`, () => {
+                    test('ReadableStream', stream.whatwgReadableStream(validateThrough.bind(0, RecordBatchStreamWriter)));
+                    test('ReadableByteStream', stream.whatwgReadableByteStream(validateThrough.bind(0, RecordBatchStreamWriter)));
+                });
+            });
+
+            async function validateConvert(RBWImplementation: typeof RecordBatchWriter, source: ReadableStream) {
+                const stream = source
+                    .pipeThrough(RecordBatchReader.throughDOM())
+                    .pipeThrough(RBWImplementation.throughDOM());
+                const type = RBWImplementation === RecordBatchFileWriter ? 'file' : 'stream';
+                await validateAsyncRecordBatchReader(type, 3, await RecordBatchReader.from(stream));
+            }
+
+            async function validateThrough(RBWImplementation: typeof RecordBatchWriter, source: ReadableStream) {
+                const stream = source
+                    .pipeThrough(RecordBatchReader.throughDOM())
+                    .pipeThrough(RBWImplementation.throughDOM())
+                    .pipeThrough(RecordBatchReader.throughDOM());
+                await validateRecordBatchAsyncIterator(3, readableDOMStreamToAsyncIterator(stream));
+            }
+        });
+
+        describe(`toDOMStream (${name})`, () => {
+
+            const wrapArgInPromise = (fn: (p: Promise<any>) => any) => (x: any) => fn(Promise.resolve(x));
+
+            describe(`RecordBatchJSONWriter`, () => {
+
+                const toJSON = (x: any): { schema: any } => bignumJSONParse(`${Buffer.from(x)}`);
+
+                test('Uint8Array', json.buffer((source) => validate(toJSON(source))));
+                test('Promise<Uint8Array>', json.buffer((source) => validate(Promise.resolve(toJSON(source)))));
+
+                async function validate(source: { schema: any } | Promise<{ schema: any }>) {
+                    const reader = await RecordBatchReader.from(<any> source);
+                    const writer = await RecordBatchJSONWriter.writeAll(reader);
+                    const buffer = await concatBuffersAsync(writer.toDOMStream());
+                    validateRecordBatchReader('json', 3, RecordBatchReader.from(toJSON(buffer)));
+                }
+            });
+
+            describe(`RecordBatchFileWriter`, () => {
+
+                describe(`sync write/read`, () => {
+
+                    test(`Uint8Array`, file.buffer(validate));
+                    test(`Iterable`, file.iterable(validate));
+                    test('AsyncIterable', file.asyncIterable(validate));
+                    test('fs.FileHandle', file.fsFileHandle(validate));
+                    test('fs.ReadStream', file.fsReadableStream(validate));
+                    test('stream.Readable', file.nodeReadableStream(validate));
+                    test('whatwg.ReadableStream', file.whatwgReadableStream(validate));
+                    test('whatwg.ReadableByteStream', file.whatwgReadableByteStream(validate));
+                    test('Promise<AsyncIterable>', file.asyncIterable(wrapArgInPromise(validate)));
+                    test('Promise<fs.FileHandle>', file.fsFileHandle(wrapArgInPromise(validate)));
+                    test('Promise<fs.ReadStream>', file.fsReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<stream.Readable>', file.nodeReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<ReadableStream>', file.whatwgReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<ReadableByteStream>', file.whatwgReadableByteStream(wrapArgInPromise(validate)));
+
+                    async function validate(source: any) {
+                        const reader = await RecordBatchReader.from(source);
+                        const writer = await RecordBatchFileWriter.writeAll(reader);
+                        const stream = await RecordBatchReader.from(writer.toDOMStream());
+                        await validateAsyncRecordBatchReader('file', 3, stream);
+                    }
+                });
+
+                describe(`async write/read`, () => {
+
+                    test(`Uint8Array`, file.buffer(validate));
+                    test(`Iterable`, file.iterable(validate));
+                    test('AsyncIterable', file.asyncIterable(validate));
+                    test('fs.FileHandle', file.fsFileHandle(validate));
+                    test('fs.ReadStream', file.fsReadableStream(validate));
+                    test('stream.Readable', file.nodeReadableStream(validate));
+                    test('whatwg.ReadableStream', file.whatwgReadableStream(validate));
+                    test('whatwg.ReadableByteStream', file.whatwgReadableByteStream(validate));
+                    test('Promise<AsyncIterable>', file.asyncIterable(wrapArgInPromise(validate)));
+                    test('Promise<fs.FileHandle>', file.fsFileHandle(wrapArgInPromise(validate)));
+                    test('Promise<fs.ReadStream>', file.fsReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<stream.Readable>', file.nodeReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<ReadableStream>', file.whatwgReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<ReadableByteStream>', file.whatwgReadableByteStream(wrapArgInPromise(validate)));
+    
+                    async function validate(source: any) {
+                        const writer = new RecordBatchFileWriter();
+                        /* no await */ writer.writeAll(await RecordBatchReader.from(source));
+                        const reader = await RecordBatchReader.from(writer.toDOMStream());
+                        await validateAsyncRecordBatchReader('file', 3, reader);
+                    }
+                });
+            });
+
+            describe(`RecordBatchStreamWriter`, () => {
+
+                describe(`sync write/read`, () => {
+
+                    test(`Uint8Array`, stream.buffer(validate));
+                    test(`Iterable`, stream.iterable(validate));
+                    test('AsyncIterable', stream.asyncIterable(validate));
+                    test('fs.FileHandle', stream.fsFileHandle(validate));
+                    test('fs.ReadStream', stream.fsReadableStream(validate));
+                    test('stream.Readable', stream.nodeReadableStream(validate));
+                    test('whatwg.ReadableStream', stream.whatwgReadableStream(validate));
+                    test('whatwg.ReadableByteStream', stream.whatwgReadableByteStream(validate));
+                    test('Promise<AsyncIterable>', stream.asyncIterable(wrapArgInPromise(validate)));
+                    test('Promise<fs.FileHandle>', stream.fsFileHandle(wrapArgInPromise(validate)));
+                    test('Promise<fs.ReadStream>', stream.fsReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<stream.Readable>', stream.nodeReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<ReadableStream>', stream.whatwgReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<ReadableByteStream>', stream.whatwgReadableByteStream(wrapArgInPromise(validate)));
+
+                    async function validate(source: any) {
+                        const reader = await RecordBatchReader.from(source);
+                        const writer = await RecordBatchStreamWriter.writeAll(reader);
+                        const stream = await RecordBatchReader.from(writer.toDOMStream());
+                        await validateAsyncRecordBatchReader('stream', 3, stream);
+                    }
+                });
+
+                describe(`async write/read`, () => {
+
+                    test(`Uint8Array`, stream.buffer(validate));
+                    test(`Iterable`, stream.iterable(validate));
+                    test('AsyncIterable', stream.asyncIterable(validate));
+                    test('fs.FileHandle', stream.fsFileHandle(validate));
+                    test('fs.ReadStream', stream.fsReadableStream(validate));
+                    test('stream.Readable', stream.nodeReadableStream(validate));
+                    test('whatwg.ReadableStream', stream.whatwgReadableStream(validate));
+                    test('whatwg.ReadableByteStream', stream.whatwgReadableByteStream(validate));
+                    test('Promise<AsyncIterable>', stream.asyncIterable(wrapArgInPromise(validate)));
+                    test('Promise<fs.FileHandle>', stream.fsFileHandle(wrapArgInPromise(validate)));
+                    test('Promise<fs.ReadStream>', stream.fsReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<stream.Readable>', stream.nodeReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<ReadableStream>', stream.whatwgReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<ReadableByteStream>', stream.whatwgReadableByteStream(wrapArgInPromise(validate)));
+
+                    async function validate(source: any) {
+                        const writer = new RecordBatchStreamWriter();
+                        /* no await */ writer.writeAll(await RecordBatchReader.from(source));
+                        const reader = await RecordBatchReader.from(writer.toDOMStream());
+                        await validateAsyncRecordBatchReader('stream', 3, reader);
+                    }
+                });
+            });
+        });
+    }
+
+    describe(`RecordBatchStreamWriter.throughDOM`, () => {
+
+        const psOpts = { objectMode: true };
+        const opts = { autoDestroy: false };
+        const sleep = (n: number) => new Promise((r) => setTimeout(r, n));
+
+        it(`should write a stream of tables to the same output stream`, async () => {
+
+            const tables = [] as Table[];
+            const stream = (AsyncIterable.from(generateRandomTables([10, 20, 30]))
+                // insert some asynchrony
+                .tap({ async next(table) { tables.push(table); await sleep(1); } })
+                // have to bail out to `any` until Ix supports DOM streams
+                .pipe((xs: any) => <any> nodeToDOMStream(xs.pipe(new PassThrough(psOpts)))) as any)
+                .pipeThrough(RecordBatchStreamWriter.throughDOM(opts)) as ReadableStream<Uint8Array>;
+
+            for await (const reader of RecordBatchReader.readAll(stream)) {
+                const sourceTable = tables.shift()!;
+                const streamTable = await Table.from(reader);
+                expect(streamTable).toEqualTable(sourceTable);
+            }
+
+            expect(tables.length).toBe(0);
+            expect(stream.locked).toBe(false);
+        });
+
+        it(`should write a stream of record batches to the same output stream`, async () => {
+
+            const tables = [] as Table[];
+            const stream = (AsyncIterable.from(generateRandomTables([10, 20, 30]))
+                // insert some asynchrony
+                .tap({ async next(table) { tables.push(table); await sleep(1); } })
+                // flatMap from Table -> RecordBatches[]
+                .flatMap((table) => AsyncIterable.as(table.chunks))
+                // have to bail out to `any` until Ix supports DOM streams
+                .pipe((xs: any) => <any> nodeToDOMStream(xs.pipe(new PassThrough(psOpts)))) as any)
+                .pipeThrough(RecordBatchStreamWriter.throughDOM(opts)) as ReadableStream<Uint8Array>;
+    
+            for await (const reader of RecordBatchReader.readAll(stream)) {
+                const sourceTable = tables.shift()!;
+                const streamTable = await Table.from(reader);
+                expect(streamTable).toEqualTable(sourceTable);
+            }
+
+            expect(tables.length).toBe(0);
+            expect(stream.locked).toBe(false);
+        });
+    });
+
+})();
diff --git a/js/test/unit/ipc/writer/streams-node-tests.ts b/js/test/unit/ipc/writer/streams-node-tests.ts
new file mode 100644
index 0000000000000..a92ed9f909e54
--- /dev/null
+++ b/js/test/unit/ipc/writer/streams-node-tests.ts
@@ -0,0 +1,277 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+    generateRandomTables,
+    // generateDictionaryTables
+} from '../../../data/tables';
+
+import { AsyncIterable } from 'ix';
+
+import {
+    Table,
+    RecordBatchReader,
+    RecordBatchWriter,
+    RecordBatchFileWriter,
+    RecordBatchJSONWriter,
+    RecordBatchStreamWriter,
+} from '../../../Arrow';
+
+import {
+    ArrowIOTestHelper,
+    concatBuffersAsync
+} from '../helpers';
+
+import {
+    validateRecordBatchReader,
+    validateAsyncRecordBatchReader,
+    validateRecordBatchAsyncIterator
+} from '../validate';
+
+(() => {
+
+    if (process.env.TEST_NODE_STREAMS !== 'true') {
+        return test('not testing node streams because process.env.TEST_NODE_STREAMS !== "true"', () => {});
+    }
+
+    /* tslint:disable */
+    const { parse: bignumJSONParse } = require('json-bignum');
+
+    for (const table of generateRandomTables([10, 20, 30])) {
+
+        const file = ArrowIOTestHelper.file(table);
+        const json = ArrowIOTestHelper.json(table);
+        const stream = ArrowIOTestHelper.stream(table);
+        const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+
+        describe(`RecordBatchWriter.throughNode (${name})`, () => {
+
+            describe('file', () => {
+                describe(`convert`, () => {
+                    test('fs.ReadStream', file.fsReadableStream(validateConvert.bind(0, RecordBatchStreamWriter)));
+                    test('stream.Readable', file.nodeReadableStream(validateConvert.bind(0, RecordBatchStreamWriter)));
+                });
+                describe(`through`, () => {
+                    test('fs.ReadStream', file.fsReadableStream(validateThrough.bind(0, RecordBatchFileWriter)));
+                    test('stream.Readable', file.nodeReadableStream(validateThrough.bind(0, RecordBatchFileWriter)));
+                });
+            });
+
+            describe('stream', () => {
+                describe(`convert`, () => {
+                    test('fs.ReadStream', stream.fsReadableStream(validateConvert.bind(0, RecordBatchFileWriter)));
+                    test('stream.Readable', stream.nodeReadableStream(validateConvert.bind(0, RecordBatchFileWriter)));
+                });
+                describe(`through`, () => {
+                    test('fs.ReadStream', stream.fsReadableStream(validateThrough.bind(0, RecordBatchStreamWriter)));
+                    test('stream.Readable', stream.nodeReadableStream(validateThrough.bind(0, RecordBatchStreamWriter)));
+                });
+            });
+
+            async function validateConvert(RBWImplementation: typeof RecordBatchWriter, source: NodeJS.ReadableStream) {
+                const stream = source
+                    .pipe(RecordBatchReader.throughNode())
+                    .pipe(RBWImplementation.throughNode());
+                const type = RBWImplementation === RecordBatchFileWriter ? 'file' : 'stream';
+                await validateAsyncRecordBatchReader(type, 3, await RecordBatchReader.from(stream));
+            }
+
+            async function validateThrough(RBWImplementation: typeof RecordBatchWriter, source: NodeJS.ReadableStream) {
+                const stream = source
+                    .pipe(RecordBatchReader.throughNode())
+                    .pipe(RBWImplementation.throughNode())
+                    .pipe(RecordBatchReader.throughNode());
+                await validateRecordBatchAsyncIterator(3, stream[Symbol.asyncIterator]());
+            }
+        });
+
+        describe(`toNodeStream (${name})`, () => {
+
+            const wrapArgInPromise = (fn: (p: Promise<any>) => any) => (x: any) => fn(Promise.resolve(x));
+
+            describe(`RecordBatchJSONWriter`, () => {
+
+                const toJSON = (x: any): { schema: any } => bignumJSONParse(`${Buffer.from(x)}`);
+
+                test('Uint8Array', json.buffer((source) => validate(toJSON(source))));
+                test('Promise<Uint8Array>', json.buffer((source) => validate(Promise.resolve(toJSON(source)))));
+
+                async function validate(source: { schema: any } | Promise<{ schema: any }>) {
+                    const reader = await RecordBatchReader.from(<any> source);
+                    const writer = await RecordBatchJSONWriter.writeAll(reader);
+                    const buffer = await concatBuffersAsync(writer.toNodeStream());
+                    validateRecordBatchReader('json', 3, RecordBatchReader.from(toJSON(buffer)));
+                }
+            });
+
+            describe(`RecordBatchFileWriter`, () => {
+
+                describe(`sync write/read`, () => {
+
+                    test(`Uint8Array`, file.buffer(validate));
+                    test(`Iterable`, file.iterable(validate));
+                    test('AsyncIterable', file.asyncIterable(validate));
+                    test('fs.FileHandle', file.fsFileHandle(validate));
+                    test('fs.ReadStream', file.fsReadableStream(validate));
+                    test('stream.Readable', file.nodeReadableStream(validate));
+                    test('whatwg.ReadableStream', file.whatwgReadableStream(validate));
+                    test('whatwg.ReadableByteStream', file.whatwgReadableByteStream(validate));
+                    test('Promise<AsyncIterable>', file.asyncIterable(wrapArgInPromise(validate)));
+                    test('Promise<fs.FileHandle>', file.fsFileHandle(wrapArgInPromise(validate)));
+                    test('Promise<fs.ReadStream>', file.fsReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<stream.Readable>', file.nodeReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<ReadableStream>', file.whatwgReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<ReadableByteStream>', file.whatwgReadableByteStream(wrapArgInPromise(validate)));
+
+                    async function validate(source: any) {
+                        const reader = await RecordBatchReader.from(source);
+                        const writer = await RecordBatchFileWriter.writeAll(reader);
+                        const stream = await RecordBatchReader.from(writer.toNodeStream());
+                        await validateAsyncRecordBatchReader('file', 3, stream);
+                    }
+                });
+
+                describe(`async write/read`, () => {
+
+                    test(`Uint8Array`, file.buffer(validate));
+                    test(`Iterable`, file.iterable(validate));
+                    test('AsyncIterable', file.asyncIterable(validate));
+                    test('fs.FileHandle', file.fsFileHandle(validate));
+                    test('fs.ReadStream', file.fsReadableStream(validate));
+                    test('stream.Readable', file.nodeReadableStream(validate));
+                    test('whatwg.ReadableStream', file.whatwgReadableStream(validate));
+                    test('whatwg.ReadableByteStream', file.whatwgReadableByteStream(validate));
+                    test('Promise<AsyncIterable>', file.asyncIterable(wrapArgInPromise(validate)));
+                    test('Promise<fs.FileHandle>', file.fsFileHandle(wrapArgInPromise(validate)));
+                    test('Promise<fs.ReadStream>', file.fsReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<stream.Readable>', file.nodeReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<ReadableStream>', file.whatwgReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<ReadableByteStream>', file.whatwgReadableByteStream(wrapArgInPromise(validate)));
+
+                    async function validate(source: any) {
+                        const writer = new RecordBatchFileWriter();
+                        /* no await */ writer.writeAll(await RecordBatchReader.from(source));
+                        const reader = await RecordBatchReader.from(writer.toNodeStream());
+                        await validateAsyncRecordBatchReader('file', 3, reader);
+                    }
+                });
+            });
+
+            describe(`RecordBatchStreamWriter`, () => {
+
+                describe(`sync write/read`, () => {
+
+                    test(`Uint8Array`, stream.buffer(validate));
+                    test(`Iterable`, stream.iterable(validate));
+                    test('AsyncIterable', stream.asyncIterable(validate));
+                    test('fs.FileHandle', stream.fsFileHandle(validate));
+                    test('fs.ReadStream', stream.fsReadableStream(validate));
+                    test('stream.Readable', stream.nodeReadableStream(validate));
+                    test('whatwg.ReadableStream', stream.whatwgReadableStream(validate));
+                    test('whatwg.ReadableByteStream', stream.whatwgReadableByteStream(validate));
+                    test('Promise<AsyncIterable>', stream.asyncIterable(wrapArgInPromise(validate)));
+                    test('Promise<fs.FileHandle>', stream.fsFileHandle(wrapArgInPromise(validate)));
+                    test('Promise<fs.ReadStream>', stream.fsReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<stream.Readable>', stream.nodeReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<ReadableStream>', stream.whatwgReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<ReadableByteStream>', stream.whatwgReadableByteStream(wrapArgInPromise(validate)));
+
+                    async function validate(source: any) {
+                        const reader = await RecordBatchReader.from(source);
+                        const writer = await RecordBatchStreamWriter.writeAll(reader);
+                        const stream = await RecordBatchReader.from(writer.toNodeStream());
+                        await validateAsyncRecordBatchReader('stream', 3, stream);
+                    }
+                });
+
+                describe(`async write/read`, () => {
+
+                    test(`Uint8Array`, stream.buffer(validate));
+                    test(`Iterable`, stream.iterable(validate));
+                    test('AsyncIterable', stream.asyncIterable(validate));
+                    test('fs.FileHandle', stream.fsFileHandle(validate));
+                    test('fs.ReadStream', stream.fsReadableStream(validate));
+                    test('stream.Readable', stream.nodeReadableStream(validate));
+                    test('whatwg.ReadableStream', stream.whatwgReadableStream(validate));
+                    test('whatwg.ReadableByteStream', stream.whatwgReadableByteStream(validate));
+                    test('Promise<AsyncIterable>', stream.asyncIterable(wrapArgInPromise(validate)));
+                    test('Promise<fs.FileHandle>', stream.fsFileHandle(wrapArgInPromise(validate)));
+                    test('Promise<fs.ReadStream>', stream.fsReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<stream.Readable>', stream.nodeReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<ReadableStream>', stream.whatwgReadableStream(wrapArgInPromise(validate)));
+                    test('Promise<ReadableByteStream>', stream.whatwgReadableByteStream(wrapArgInPromise(validate)));
+
+                    async function validate(source: any) {
+                        const writer = new RecordBatchStreamWriter();
+                        /* no await */ writer.writeAll(await RecordBatchReader.from(source));
+                        const reader = await RecordBatchReader.from(writer.toNodeStream());
+                        await validateAsyncRecordBatchReader('stream', 3, reader);
+                    }
+                });
+            });
+        });
+    }
+
+    describe(`RecordBatchStreamWriter.throughNode`, () => {
+
+        const sleep = (n: number) => new Promise((r) => setTimeout(r, n));
+
+        it(`should write a stream of tables to the same output stream`, async () => {
+
+            const tables = [] as Table[];
+            const writer = RecordBatchStreamWriter.throughNode({ autoDestroy: false });
+            const stream = AsyncIterable
+                .from(generateRandomTables([10, 20, 30]))
+                // insert some asynchrony
+                .tap({ async next(table) { tables.push(table); await sleep(1); } })
+                .pipe(writer);
+                
+            for await (const reader of RecordBatchReader.readAll(stream)) {
+                const sourceTable = tables.shift()!;
+                const streamTable = await Table.from(reader);
+                expect(streamTable).toEqualTable(sourceTable);
+            }
+
+            expect(tables.length).toBe(0);
+            expect(writer.readable).toBe(false);
+            expect((writer as any).destroyed).toBe(true);
+        });
+
+        it(`should write a stream of record batches to the same output stream`, async () => {
+
+            const tables = [] as Table[];
+            const writer = RecordBatchStreamWriter.throughNode({ autoDestroy: false });
+            const stream = AsyncIterable
+                .from(generateRandomTables([10, 20, 30]))
+                // insert some asynchrony
+                .tap({ async next(table) { tables.push(table); await sleep(1); } })
+                .flatMap((table) => AsyncIterable.as(table.chunks))
+                .pipe(writer);
+                
+            for await (const reader of RecordBatchReader.readAll(stream)) {
+                const sourceTable = tables.shift()!;
+                const streamTable = await Table.from(reader);
+                expect(streamTable).toEqualTable(sourceTable);
+            }
+
+            expect(tables.length).toBe(0);
+            expect(writer.readable).toBe(false);
+            expect((writer as any).destroyed).toBe(true);
+        });
+
+    });
+})();
diff --git a/js/test/unit/table-tests.ts b/js/test/unit/table-tests.ts
index 4ee1411d0f493..43a9c7cc58478 100644
--- a/js/test/unit/table-tests.ts
+++ b/js/test/unit/table-tests.ts
@@ -16,13 +16,16 @@
 // under the License.
 
 import '../jest-extensions';
-
-import Arrow, { vector, RecordBatch } from '../Arrow';
-
-const { predicate, Table } = Arrow;
+import {
+    predicate,
+    Data, Schema, Table, RecordBatch,
+    Vector, Utf8Vector, DictionaryVector,
+    Struct, Float32, Int32, Dictionary, Utf8, Int8
+} from '../Arrow';
 
 const { col, lit, custom, and, or, And, Or } = predicate;
 
+const NAMES = ['f32', 'i32', 'dictionary'] as (keyof TestDataSchema)[];
 const F32 = 0, I32 = 1, DICT = 2;
 const test_data = [
     {
@@ -54,7 +57,7 @@ const test_data = [
         ]
     }, {
         name: `struct`,
-        table: () => Table.fromStruct(getStructTable().getColumn('struct') as vector.StructVector),
+        table: () => Table.fromStruct(getStructTable().getColumn('struct')!),
         // Use Math.fround to coerce to float32
         values: () => [
             [Math.fround(-0.3), -1, 'a'],
@@ -68,13 +71,17 @@ const test_data = [
     },
 ];
 
-function compareTables(t1: Table, t2, Table) {
-    expect(t1.length).toEqual(t2.length);
-    expect(t1.numCols).toEqual(t2.numCols);
-    for (let i = -1, n = t1.numCols; ++i < n;) {
-        const v1 = t1.getColumnAt(i);
-        const v2 = t2.getColumnAt(i);
-        (expect([v1, `left`, t1.schema.fields[i].name]) as any).toEqualVector([v2, `right`, t2.schema.fields[i].name]);
+function compareBatchAndTable(source: Table, offset: number, batch: RecordBatch, table: Table) {
+    expect(batch.length).toEqual(table.length);
+    expect(table.numCols).toEqual(source.numCols);
+    expect(batch.numCols).toEqual(source.numCols);
+    for (let i = -1, n = source.numCols; ++i < n;) {
+        const v0 = source.getColumnAt(i)!.slice(offset, offset + batch.length);
+        const v1 = batch.getChildAt(i);
+        const v2 = table.getColumnAt(i);
+        const name = source.schema.fields[i].name;
+        expect([v1, `batch`, name]).toEqualVector([v0, `source`]);
+        expect([v2, `table`, name]).toEqualVector([v0, `source`]);
     }
 }
 
@@ -88,15 +95,41 @@ describe(`Table`, () => {
     test(`Table.from() creates an empty table`, () => {
         expect(Table.from().length).toEqual(0);
     });
+
+    test(`Table.serialize() serializes sliced RecordBatches`, () => {
+
+        const table = getSingleRecordBatchTable();
+        const batch = table.chunks[0], half = batch.length / 2 | 0;
+
+        // First compare what happens when slicing from the batch level
+        let [batch1, batch2] = [batch.slice(0, half), batch.slice(half)];
+
+        compareBatchAndTable(table,    0, batch1, Table.from(new Table(batch1).serialize()));
+        compareBatchAndTable(table, half, batch2, Table.from(new Table(batch2).serialize()));
+
+        // Then compare what happens when creating a RecordBatch by slicing each child individually
+        batch1 = new RecordBatch(batch1.schema, batch1.length, batch1.schema.fields.map((_, i) => {
+            return batch.getChildAt(i)!.slice(0, half);
+        }));
+
+        batch2 = new RecordBatch(batch2.schema, batch2.length, batch2.schema.fields.map((_, i) => {
+            return batch.getChildAt(i)!.slice(half);
+        }));
+
+        compareBatchAndTable(table,    0, batch1, Table.from(new Table(batch1).serialize()));
+        compareBatchAndTable(table, half, batch2, Table.from(new Table(batch2).serialize()));
+    });
+
     for (let datum of test_data) {
         describe(datum.name, () => {
-            const table = datum.table();
-            const values = datum.values();
-
             test(`has the correct length`, () => {
+                const table = datum.table();
+                const values = datum.values();
                 expect(table.length).toEqual(values.length);
             });
             test(`gets expected values`, () => {
+                const table = datum.table();
+                const values = datum.values();
                 for (let i = -1; ++i < values.length;) {
                     const row = table.get(i);
                     const expected = values[i];
@@ -107,6 +140,8 @@ describe(`Table`, () => {
             });
             test(`iterates expected values`, () => {
                 let i = 0;
+                const table = datum.table();
+                const values = datum.values();
                 for (let row of table) {
                     const expected = values[i++];
                     expect(row.f32).toEqual(expected[F32]);
@@ -114,8 +149,15 @@ describe(`Table`, () => {
                     expect(row.dictionary).toEqual(expected[DICT]);
                 }
             });
+            test(`serialize and de-serialize is a no-op`, () => {
+                const table = datum.table();
+                const clone = Table.from(table.serialize());
+                expect(clone).toEqualTable(table);
+            });
+
             describe(`scan()`, () => {
                 test(`yields all values`, () => {
+                    const table = datum.table();
                     let expected_idx = 0;
                     table.scan((idx, batch) => {
                         const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
@@ -123,21 +165,27 @@ describe(`Table`, () => {
                     });
                 });
                 test(`calls bind function with every batch`, () => {
+                    const table = datum.table();
                     let bind = jest.fn();
                     table.scan(() => { }, bind);
-                    for (let batch of table.batches) {
+                    for (let batch of table.chunks) {
                         expect(bind).toHaveBeenCalledWith(batch);
                     }
                 });
             });
             test(`count() returns the correct length`, () => {
+                const table = datum.table();
+                const values = datum.values();
                 expect(table.count()).toEqual(values.length);
             });
             test(`getColumnIndex`, () => {
+                const table = datum.table();
                 expect(table.getColumnIndex('i32')).toEqual(I32);
                 expect(table.getColumnIndex('f32')).toEqual(F32);
                 expect(table.getColumnIndex('dictionary')).toEqual(DICT);
             });
+            const table = datum.table();
+            const values = datum.values();
             let get_i32: (idx: number) => number, get_f32: (idx: number) => number;
             const filter_tests = [
                 {
@@ -228,7 +276,7 @@ describe(`Table`, () => {
                             // that - and that's ok!
                             let bind = jest.fn();
                             filtered.scan(() => { }, bind);
-                            for (let batch of table.batches) {
+                            for (let batch of table.chunks) {
                                 expect(bind).toHaveBeenCalledWith(batch);
                             }
                         });
@@ -259,7 +307,7 @@ describe(`Table`, () => {
                 expect(() => { table.filter(col('dict').eq('a')).countBy('i32'); }).toThrow();
             });
             test(`countBy on non-existent column throws error`, () => {
-                expect(() => { table.countBy('FAKE'); }).toThrow();
+                expect(() => { table.countBy('FAKE' as any); }).toThrow();
             });
             test(`table.select() basic tests`, () => {
                 let selected = table.select('f32', 'dictionary');
@@ -275,16 +323,16 @@ describe(`Table`, () => {
                     expect(row.dictionary).toEqual(expected_row[DICT]);
                 }
             });
-            test(`table.toString()`, () => {
-                let selected = table.select('i32', 'dictionary');
-                let headers = [`"row_id"`, `"i32: Int32"`, `"dictionary: Dictionary<Int8, Utf8>"`];
-                let expected = [headers.join(' | '), ...values.map((row, idx) => {
-                    return [`${idx}`, `${row[I32]}`, `"${row[DICT]}"`].map((str, col) => {
-                        return leftPad(str, ' ', headers[col].length);
-                    }).join(' | ');
-                })].join('\n') + '\n';
-                expect(selected.toString()).toEqual(expected);
-            });
+            // test(`table.toString()`, () => {
+            //     let selected = table.select('i32', 'dictionary');
+            //     let headers = [`"row_id"`, `"i32: Int32"`, `"dictionary: Dictionary<Int8, Utf8>"`];
+            //     let expected = [headers.join(' | '), ...values.map((row, idx) => {
+            //         return [`${idx}`, `${row[I32]}`, `"${row[DICT]}"`].map((str, col) => {
+            //             return leftPad(str, ' ', headers[col].length);
+            //         }).join(' | ');
+            //     })].join('\n') + '\n';
+            //     expect(selected.toString()).toEqual(expected);
+            // });
             test(`table.filter(..).count() on always false predicates returns 0`, () => {
                 expect(table.filter(col('i32').ge(100)).count()).toEqual(0);
                 expect(table.filter(col('dictionary').eq('z')).count()).toEqual(0);
@@ -307,9 +355,6 @@ describe(`Table`, () => {
                     expect(table.filter(col('dictionary').eq(col('dictionary'))).count()).toEqual(table.length);
                 });
             });
-            describe(`serialize and de-serialize is a no-op`, () => {
-                compareTables(Table.from(table.serialize()), table);
-            });
         });
     }
 });
@@ -319,7 +364,7 @@ describe(`Predicate`, () => {
     const p2 = col('a').lt(1000);
     const p3 = col('b').eq('foo');
     const p4 = col('c').eq('bar');
-    const expected = [p1, p2, p3, p4]
+    const expected = [p1, p2, p3, p4];
     test(`and flattens children`, () => {
         expect(and(p1, p2, p3, p4).children).toEqual(expected);
         expect(and(p1.and(p2), new And(p3, p4)).children).toEqual(expected);
@@ -332,342 +377,63 @@ describe(`Predicate`, () => {
     });
 });
 
-function leftPad(str: string, fill: string, n: number) {
-    return (new Array(n + 1).join(fill) + str).slice(-1 * n);
+// function leftPad(str: string, fill: string, n: number) {
+//     return (new Array(n + 1).join(fill) + str).slice(-1 * n);
+// }
+
+type TestDataSchema = { f32: Float32; i32: Int32; dictionary: Dictionary<Utf8, Int8>; };
+
+function getTestVectors(f32Values: number[], i32Values: number[], dictIndices: number[]) {
+
+    const values = Utf8Vector.from(['a', 'b', 'c']);
+    const i32Data = Data.Int(new Int32(), 0, i32Values.length, 0, null, i32Values);
+    const f32Data = Data.Float(new Float32(), 0, f32Values.length, 0, null, f32Values);
+
+    return [Vector.new(f32Data), Vector.new(i32Data), DictionaryVector.from(values, new Int8(), dictIndices)];
 }
 
 export function getSingleRecordBatchTable() {
-    return Table.from({
-        'schema': {
-            'fields': [
-                {
-                    'name': 'f32',
-                    'type': {
-                        'name': 'floatingpoint',
-                        'precision': 'SINGLE'
-                    },
-                    'nullable': false,
-                    'children': [],
-                },
-                {
-                    'name': 'i32',
-                    'type': {
-                        'name': 'int',
-                        'isSigned': true,
-                        'bitWidth': 32
-                    },
-                    'nullable': false,
-                    'children': [],
-                },
-                {
-                    'name': 'dictionary',
-                    'type': {
-                        'name': 'utf8'
-                    },
-                    'nullable': false,
-                    'children': [],
-                    'dictionary': {
-                        'id': 0,
-                        'indexType': {
-                            'name': 'int',
-                            'isSigned': true,
-                            'bitWidth': 8
-                        },
-                        'isOrdered': false
-                    }
-                }
-            ]
-        },
-        'dictionaries': [{
-            'id': 0,
-            'data': {
-                'count': 3,
-                'columns': [
-                    {
-                        'name': 'DICT0',
-                        'count': 3,
-                        'VALIDITY': [],
-                        'OFFSET': [
-                            0,
-                            1,
-                            2,
-                            3
-                        ],
-                        'DATA': [
-                            'a',
-                            'b',
-                            'c',
-                        ]
-                    }
-                ]
-            }
-        }],
-        'batches': [{
-            'count': 7,
-            'columns': [
-                {
-                    'name': 'f32',
-                    'count': 7,
-                    'VALIDITY': [],
-                    'DATA': [-0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3]
-                },
-                {
-                    'name': 'i32',
-                    'count': 7,
-                    'VALIDITY': [],
-                    'DATA': [-1, 1, -1, 1, -1, 1, -1]
-                },
-                {
-                    'name': 'dictionary',
-                    'count': 7,
-                    'VALIDITY': [],
-                    'DATA': [0, 1, 2, 0, 1, 2, 0]
-                }
-            ]
-        }]
-    });
+    const vectors = getTestVectors(
+        [-0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3],
+        [-1, 1, -1, 1, -1, 1, -1],
+        [0, 1, 2, 0, 1, 2, 0]
+    );
+
+    return Table.fromVectors<TestDataSchema>(
+        vectors,
+        NAMES
+    );
 }
 
 function getMultipleRecordBatchesTable() {
-    return Table.from({
-        'schema': {
-            'fields': [
-                {
-                    'name': 'f32',
-                    'type': {
-                        'name': 'floatingpoint',
-                        'precision': 'SINGLE'
-                    },
-                    'nullable': false,
-                    'children': [],
-                },
-                {
-                    'name': 'i32',
-                    'type': {
-                        'name': 'int',
-                        'isSigned': true,
-                        'bitWidth': 32
-                    },
-                    'nullable': false,
-                    'children': [],
-                },
-                {
-                    'name': 'dictionary',
-                    'type': {
-                        'name': 'utf8'
-                    },
-                    'nullable': false,
-                    'children': [],
-                    'dictionary': {
-                        'id': 0,
-                        'indexType': {
-                            'name': 'int',
-                            'isSigned': true,
-                            'bitWidth': 8
-                        },
-                        'isOrdered': false
-                    }
-                }
-            ]
-        },
-        'dictionaries': [{
-            'id': 0,
-            'data': {
-                'count': 3,
-                'columns': [
-                    {
-                        'name': 'DICT0',
-                        'count': 3,
-                        'VALIDITY': [],
-                        'OFFSET': [
-                            0,
-                            1,
-                            2,
-                            3
-                        ],
-                        'DATA': [
-                            'a',
-                            'b',
-                            'c',
-                        ]
-                    }
-                ]
-            }
-        }],
-        'batches': [{
-            'count': 3,
-            'columns': [
-                {
-                    'name': 'f32',
-                    'count': 3,
-                    'VALIDITY': [],
-                    'DATA': [-0.3, -0.2, -0.1]
-                },
-                {
-                    'name': 'i32',
-                    'count': 3,
-                    'VALIDITY': [],
-                    'DATA': [-1, 1, -1]
-                },
-                {
-                    'name': 'dictionary',
-                    'count': 3,
-                    'VALIDITY': [],
-                    'DATA': [0, 1, 2]
-                }
-            ]
-        }, {
-            'count': 3,
-            'columns': [
-                {
-                    'name': 'f32',
-                    'count': 3,
-                    'VALIDITY': [],
-                    'DATA': [0, 0.1, 0.2]
-                },
-                {
-                    'name': 'i32',
-                    'count': 3,
-                    'VALIDITY': [],
-                    'DATA': [1, -1, 1]
-                },
-                {
-                    'name': 'dictionary',
-                    'count': 3,
-                    'VALIDITY': [],
-                    'DATA': [0, 1, 2]
-                }
-            ]
-        }, {
-            'count': 3,
-            'columns': [
-                {
-                    'name': 'f32',
-                    'count': 3,
-                    'VALIDITY': [],
-                    'DATA': [0.3, 0.2, 0.1]
-                },
-                {
-                    'name': 'i32',
-                    'count': 3,
-                    'VALIDITY': [],
-                    'DATA': [-1, 1, -1]
-                },
-                {
-                    'name': 'dictionary',
-                    'count': 3,
-                    'VALIDITY': [],
-                    'DATA': [0, 1, 2]
-                }
-            ]
-        }]
-    });
+
+    const schema = Schema.from<TestDataSchema>(getTestVectors([], [], []), NAMES);
+
+    const b1 = new RecordBatch(schema, 3, getTestVectors(
+        [-0.3, -0.2, -0.1],
+        [-1, 1, -1],
+        [0, 1, 2]
+    ));
+
+    const b2 = new RecordBatch(schema, 3, getTestVectors(
+        [0, 0.1, 0.2],
+        [1, -1, 1],
+        [0, 1, 2]
+    ));
+
+    const b3 = new RecordBatch(schema, 3, getTestVectors(
+        [0.3, 0.2, 0.1],
+        [-1, 1, -1],
+        [0, 1, 2]
+    ));
+
+    return new Table<TestDataSchema>([b1, b2, b3]);
 }
 
 function getStructTable() {
-    return Table.from({
-        'schema': {
-            'fields': [
-                {
-                    'name': 'struct',
-                    'type': {
-                        'name': 'struct'
-                    },
-                    'nullable': false,
-                    'children': [
-                        {
-                            'name': 'f32',
-                            'type': {
-                                'name': 'floatingpoint',
-                                'precision': 'SINGLE'
-                            },
-                            'nullable': false,
-                            'children': [],
-                        },
-                        {
-                            'name': 'i32',
-                            'type': {
-                                'name': 'int',
-                                'isSigned': true,
-                                'bitWidth': 32
-                            },
-                            'nullable': false,
-                            'children': [],
-                        },
-                        {
-                            'name': 'dictionary',
-                            'type': {
-                                'name': 'utf8'
-                            },
-                            'nullable': false,
-                            'children': [],
-                            'dictionary': {
-                                'id': 0,
-                                'indexType': {
-                                    'name': 'int',
-                                    'isSigned': true,
-                                    'bitWidth': 8
-                                },
-                                'isOrdered': false
-                            }
-                        }
-                    ]
-                }
-            ]
-        },
-        'dictionaries': [{
-            'id': 0,
-            'data': {
-                'count': 3,
-                'columns': [
-                    {
-                        'name': 'DICT0',
-                        'count': 3,
-                        'VALIDITY': [],
-                        'OFFSET': [
-                            0,
-                            1,
-                            2,
-                            3
-                        ],
-                        'DATA': [
-                            'a',
-                            'b',
-                            'c',
-                        ]
-                    }
-                ]
-            }
-        }],
-        'batches': [{
-            'count': 7,
-            'columns': [
-                {
-                    'name': 'struct',
-                    'count': 7,
-                    'VALIDITY': [],
-                    'children': [
-                        {
-                            'name': 'f32',
-                            'count': 7,
-                            'VALIDITY': [],
-                            'DATA': [-0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3]
-                        },
-                        {
-                            'name': 'i32',
-                            'count': 7,
-                            'VALIDITY': [],
-                            'DATA': [-1, 1, -1, 1, -1, 1, -1]
-                        },
-                        {
-                            'name': 'dictionary',
-                            'count': 7,
-                            'VALIDITY': [],
-                            'DATA': [0, 1, 2, 0, 1, 2, 0]
-                        }
-                    ]
-                }
-            ]
-        }]
-    });
+    const table = getSingleRecordBatchTable();
+    const children = table.schema.fields.map((_, i) => table.getColumnAt(i)! as Vector<any>);
+    const structVec = Vector.new(Data.Struct(new Struct(table.schema.fields), 0, table.length, 0, null, children));
+
+    return Table.fromVectors<{ struct: Struct<TestDataSchema> }>([structVec], ['struct']);
 }
diff --git a/js/test/unit/vector-tests.ts b/js/test/unit/vector-tests.ts
deleted file mode 100644
index 58a760389ca82..0000000000000
--- a/js/test/unit/vector-tests.ts
+++ /dev/null
@@ -1,433 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import { TextEncoder } from 'text-encoding-utf-8';
-import Arrow from '../Arrow';
-import { TypedArray, TypedArrayConstructor } from '../../src/Arrow';
-
-const utf8Encoder = new TextEncoder('utf-8');
-
-const { packBools } = Arrow.util;
-const { BoolData, FlatData, FlatListData, DictionaryData } = Arrow.data;
-const { IntVector, FloatVector, BoolVector, Utf8Vector, DateVector, DictionaryVector } = Arrow.vector;
-const {
-    Dictionary, Utf8, Bool,
-    Float16, Float32, Float64,
-    Int8, Int16, Int32, Int64,
-    Uint8, Uint16, Uint32, Uint64,
-} = Arrow.type;
-
-const { DateUnit } = Arrow.enum_;
-
-const FixedSizeVectors = {
-    Int64Vector: [IntVector, Int64] as [typeof IntVector, typeof Int64],
-    Uint64Vector: [IntVector, Uint64] as [typeof IntVector, typeof Uint64],
-};
-
-const FixedWidthVectors = {
-    Int8Vector: [IntVector, Int8] as [typeof IntVector, typeof Int8],
-    Int16Vector: [IntVector, Int16] as [typeof IntVector, typeof Int16],
-    Int32Vector: [IntVector, Int32] as [typeof IntVector, typeof Int32],
-    Uint8Vector: [IntVector, Uint8] as [typeof IntVector, typeof Uint8],
-    Uint16Vector: [IntVector, Uint16] as [typeof IntVector, typeof Uint16],
-    Uint32Vector: [IntVector, Uint32] as [typeof IntVector, typeof Uint32],
-    Float32Vector: [FloatVector, Float32] as [typeof FloatVector, typeof Float32],
-    Float64Vector: [FloatVector, Float64] as [typeof FloatVector, typeof Float64],
-};
-
-const fixedSizeVectors = toMap(FixedSizeVectors, Object.keys(FixedSizeVectors));
-const fixedWidthVectors = toMap(FixedWidthVectors, Object.keys(FixedWidthVectors));
-const randomBytes = (n: number) => Uint8Array.from(
-    { length: n },
-    () => Math.random() * 255 | 0
-);
-const bytes = Array.from(
-    { length: 5 },
-    () => randomBytes(64)
-);
-
-describe(`BoolVector`, () => {
-    const values = [true, true, false, true, true, false, false, false], n = values.length;
-    const vector = new BoolVector(new BoolData(new Bool(), n, null, new Uint8Array([27, 0, 0, 0, 0, 0, 0, 0])));
-    test(`gets expected values`, () => {
-        let i = -1;
-        while (++i < n) {
-            expect(vector.get(i)).toEqual(values[i]);
-        }
-    });
-    test(`iterates expected values`, () => {
-        let i = -1;
-        for (let v of vector) {
-            expect(++i).toBeLessThan(n);
-            expect(v).toEqual(values[i]);
-        }
-    });
-    test(`indexOf returns expected values`, () => {
-        for (let test_value of [true, false]) {
-            const expected = values.indexOf(test_value);
-            expect(vector.indexOf(test_value)).toEqual(expected);
-        }
-    });
-    test(`indexOf returns -1 when value not found`, () => {
-        const v = new BoolVector(new BoolData(new Bool(), 3, null, new Uint8Array([0xFF])));
-        expect(v.indexOf(false)).toEqual(-1);
-    });
-    test(`can set values to true and false`, () => {
-        const v = new BoolVector(new BoolData(new Bool(), n, null, new Uint8Array([27, 0, 0, 0, 0, 0, 0, 0])));
-        const expected1 = [true, true, false, true, true, false, false, false];
-        const expected2 = [true, true,  true, true, true, false, false, false];
-        const expected3 = [true, true, false, false, false, false, true, true];
-        function validate(expected: boolean[]) {
-            for (let i = -1; ++i < n;) {
-                expect(v.get(i)).toEqual(expected[i]);
-            }
-        }
-        validate(expected1);
-        v.set(2, true);
-        validate(expected2);
-        v.set(2, false);
-        validate(expected1);
-        v.set(3, false);
-        v.set(4, false);
-        v.set(6, true);
-        v.set(7, true);
-        validate(expected3);
-        v.set(3, true);
-        v.set(4, true);
-        v.set(6, false);
-        v.set(7, false);
-        validate(expected1);
-    });
-    test(`packs 0 values`, () => {
-        expect(BoolVector.from([]).values).toEqual(
-            new Uint8Array([0, 0, 0, 0, 0, 0, 0, 0]));
-    });
-    test(`packs 3 values`, () => {
-        expect(BoolVector.from([
-            true, false, true
-        ]).values).toEqual(new Uint8Array([5, 0, 0, 0, 0, 0, 0, 0]));
-    });
-    test(`packs 8 values`, () => {
-        expect(BoolVector.from([
-            true, true, false, true, true, false, false, false
-        ]).values).toEqual(new Uint8Array([27, 0, 0, 0, 0, 0, 0, 0]));
-    });
-    test(`packs 25 values`, () => {
-        expect(BoolVector.from([
-            true, true, false, true, true, false, false, false,
-            false, false, false, true, true, false, true, true,
-            false
-        ]).values).toEqual(new Uint8Array([27, 216, 0, 0, 0, 0, 0, 0]));
-    });
-    test(`from with boolean Array packs values`, () => {
-        expect(BoolVector
-            .from([true, false, true])
-            .slice().values
-        ).toEqual(new Uint8Array([5, 0, 0, 0, 0, 0, 0, 0]));
-    });
-});
-
-describe('Float16Vector', () => {
-    const values = concatTyped(Uint16Array, ...bytes);
-    const vector = bytes
-        .map((b) => new Uint16Array(b.buffer))
-        .map((b) => new FloatVector<Float16>(new FlatData(new Float16(), b.length, null, b)))
-        .reduce((v: any, v2) => v.concat(v2));
-    const n = values.length;
-    const clamp = (x: number) => (x -  32767) / 32767;
-    const float16s = new Float32Array([...values].map((x) => clamp(x)));
-    test(`gets expected values`, () => {
-        let i = -1;
-        while (++i < n) {
-            expect(vector.get(i)).toEqual(clamp(values[i]));
-        }
-    });
-    test(`iterates expected values`, () => {
-        expect.hasAssertions();
-        let i = -1;
-        for (let v of vector) {
-            expect(++i).toBeLessThan(n);
-            expect(v).toEqual(clamp(values[i]));
-        }
-    });
-    test(`indexOf returns expected values`, () => {
-        const randomValues = new Uint16Array(randomBytes(64).buffer);
-        for (let value of [...values, ...randomValues]) {
-            const expected = values.indexOf(value);
-            expect(vector.indexOf(clamp(value))).toEqual(expected);
-        }
-    });
-    test(`slices the entire array`, () => {
-        expect(vector.slice().toArray()).toEqual(float16s);
-    });
-    test(`slice returns a TypedArray`, () => {
-        expect(vector.slice().toArray()).toBeInstanceOf(Float32Array);
-    });
-    test(`slices from -20 to length`, () => {
-        expect(vector.slice(-20).toArray()).toEqual(float16s.slice(-20));
-    });
-    test(`slices from 0 to -20`, () => {
-        expect(vector.slice(0, -20).toArray()).toEqual(float16s.slice(0, -20));
-    });
-    test(`slices the array from 0 to length - 20`, () => {
-        expect(vector.slice(0, n - 20).toArray()).toEqual(float16s.slice(0, n - 20));
-    });
-    test(`slices the array from 0 to length + 20`, () => {
-        expect(vector.slice(0, n + 20).toArray()).toEqual(float16s.slice(0, n + 20));
-    });
-});
-
-for (const [VectorName, [VectorType, DataType]] of fixedSizeVectors) {
-    describe(`${VectorName}`, () => {
-        const type = new DataType();
-        const values = concatTyped(type.ArrayType as any, ...bytes);
-        const vector = bytes
-            .map((b) => new type.ArrayType(b.buffer))
-            .map((b) => new VectorType(new FlatData(type, b.length * 0.5, null, b)))
-            .reduce((v: any, v2) => v.concat(v2));
-        const n = values.length * 0.5;
-        test(`gets expected values`, () => {
-            let i = -1;
-            while (++i < n) {
-                expect(vector.get(i)).toEqual(values.slice(2 * i, 2 * (i + 1)));
-            }
-        });
-        test(`iterates expected values`, () => {
-            let i = -1;
-            for (let v of vector) {
-                expect(++i).toBeLessThan(n);
-                expect(v).toEqual(values.slice(2 * i, 2 * (i + 1)));
-            }
-        });
-        test(`indexOf returns expected values`, () => {
-            // Create a set of test data composed of all of the actual values
-            // and a few random values
-            let testValues = concatTyped(
-                type.ArrayType,
-                ...bytes,
-                ...[randomBytes(8 * 2 * type.ArrayType.BYTES_PER_ELEMENT)]
-            );
-
-            for (let i = -1, n = testValues.length / 2 | 0; ++i < n;) {
-                const value = testValues.slice(2 * i, 2 * (i + 1));
-                const expected = values.findIndex((d, i) => i % 2 === 0 && d === value[0] && testValues[i + 1] === value[1]);
-                expect(vector.indexOf(value)).toEqual(expected >= 0 ? expected / 2 : -1);
-            }
-        });
-        test(`slices the entire array`, () => {
-            expect(vector.slice().toArray()).toEqual(values);
-        });
-        test(`slice returns a TypedArray`, () => {
-            expect(vector.slice().toArray()).toBeInstanceOf(type.ArrayType);
-        });
-        test(`slices from -20 to length`, () => {
-            expect(vector.slice(-20).toArray()).toEqual(values.slice(-40));
-        });
-        test(`slices from 0 to -20`, () => {
-            expect(vector.slice(0, -20).toArray()).toEqual(values.slice(0, -40));
-        });
-        test(`slices the array from 0 to length - 20`, () => {
-            expect(vector.slice(0, n - 20).toArray()).toEqual(values.slice(0, values.length - 40));
-        });
-        test(`slices the array from 0 to length + 20`, () => {
-            expect(vector.slice(0, n + 20).toArray()).toEqual(values.slice(0, values.length + 40));
-        });
-    });
-}
-
-for (const [VectorName, [VectorType, DataType]] of fixedWidthVectors) {
-    describe(`${VectorName}`, () => {
-        const type = new DataType();
-        const values = concatTyped(type.ArrayType as any, ...bytes);
-        const vector = bytes
-            .map((b) => new type.ArrayType(b.buffer))
-            .map((b) => new VectorType(new FlatData<any>(type, b.length, null, b)))
-            .reduce((v: any, v2) => v.concat(v2));
-
-        const n = values.length;
-        test(`gets expected values`, () => {
-            let i = -1;
-            while (++i < n) {
-                expect(vector.get(i)).toEqual(values[i]);
-            }
-        });
-        test(`iterates expected values`, () => {
-            expect.hasAssertions();
-            let i = -1;
-            for (let v of vector) {
-                expect(++i).toBeLessThan(n);
-                expect(v).toEqual(values[i]);
-            }
-        });
-        test(`indexOf returns expected values`, () => {
-            // Create a set of test data composed of all of the actual values
-            // and a few random values
-            let testValues = concatTyped(
-                type.ArrayType,
-                ...bytes,
-                ...[randomBytes(8 * type.ArrayType.BYTES_PER_ELEMENT)]
-            );
-
-            for (const value of testValues) {
-                const expected = values.indexOf(value);
-                expect(vector.indexOf(value)).toEqual(expected);
-            }
-        });
-        test(`slices the entire array`, () => {
-            expect(vector.slice().toArray()).toEqual(values);
-        });
-        test(`slice returns a TypedArray`, () => {
-            expect(vector.slice().toArray()).toBeInstanceOf(type.ArrayType);
-        });
-        test(`slices from -20 to length`, () => {
-            expect(vector.slice(-20).toArray()).toEqual(values.slice(-20));
-        });
-        test(`slices from 0 to -20`, () => {
-            expect(vector.slice(0, -20).toArray()).toEqual(values.slice(0, -20));
-        });
-        test(`slices the array from 0 to length - 20`, () => {
-            expect(vector.slice(0, n - 20).toArray()).toEqual(values.slice(0, n - 20));
-        });
-        test(`slices the array from 0 to length + 20`, () => {
-            expect(vector.slice(0, n + 20).toArray()).toEqual(values.slice(0, n + 20));
-        });
-    });
-}
-
-describe(`Utf8Vector`, () => {
-    const values = ['foo', 'bar', 'baz', 'foo bar', 'bar'], n = values.length;
-    let offset = 0;
-    const offsets = Uint32Array.of(0, ...values.map((d) => { offset += d.length; return offset; }));
-    const vector = new Utf8Vector(new FlatListData(new Utf8(), n, null, offsets, utf8Encoder.encode(values.join(''))));
-    basicVectorTests(vector, values, ['abc', '123']);
-    describe(`sliced`, () => {
-        basicVectorTests(vector.slice(1,3), values.slice(1,3), ['foo', 'abc']);
-    });
-});
-
-describe(`DateVector`, () => {
-    const extras = [
-        new Date(2000, 0, 1),
-        new Date(1991, 5, 28, 12, 11, 10)
-    ];
-    describe(`unit = MILLISECOND`, () => {
-        const values = [
-            new Date(1989, 5, 22, 1, 2, 3),
-            new Date(1988, 3, 25, 4, 5, 6),
-            new Date(1987, 2, 24, 7, 8, 9),
-            new Date(2018, 4, 12, 17, 30, 0)
-        ];
-        const vector = DateVector.from(values);
-        basicVectorTests(vector, values, extras);
-    });
-    describe(`unit = DAY`, () => {
-        // Use UTC to ensure that dates are always at midnight
-        const values = [
-            new Date(Date.UTC(1989, 5, 22)),
-            new Date(Date.UTC(1988, 3, 25)),
-            new Date(Date.UTC(1987, 2, 24)),
-            new Date(Date.UTC(2018, 4, 12))
-        ];
-        const vector = DateVector.from(values, DateUnit.DAY);
-        basicVectorTests(vector, values, extras);
-    });
-});
-
-describe(`DictionaryVector`, () => {
-    const dictionary = ['foo', 'bar', 'baz'];
-    const extras = ['abc', '123']; // values to search for that should NOT be found
-    let offset = 0;
-    const offsets = Uint32Array.of(0, ...dictionary.map((d) => { offset += d.length; return offset; }));
-    const dictionary_vec = new Utf8Vector(new FlatListData(new Utf8(), dictionary.length, null, offsets, utf8Encoder.encode(dictionary.join(''))));
-
-    const indices = Array.from({length: 50}, () => Math.random() * 3 | 0);
-
-    describe(`index with nullCount == 0`, () => {
-        const indices_data = new FlatData(new Int32(), indices.length, new Uint8Array(0), indices);
-
-        const values = Array.from(indices).map((d) => dictionary[d]);
-        const vector = new DictionaryVector(new DictionaryData(new Dictionary(dictionary_vec.type, indices_data.type), dictionary_vec, indices_data));
-
-        basicVectorTests(vector, values, extras);
-
-        describe(`sliced`, () => {
-            basicVectorTests(vector.slice(10, 20), values.slice(10,20), extras);
-        });
-    });
-
-    describe(`index with nullCount > 0`, () => {
-        const validity = Array.from({length: indices.length}, () => Math.random() > 0.2 ? true : false);
-        const indices_data = new FlatData(new Int32(), indices.length, packBools(validity), indices, 0, validity.reduce((acc, d) => acc + (d ? 0 : 1), 0));
-        const values = Array.from(indices).map((d, i) => validity[i] ? dictionary[d] : null);
-        const vector = new DictionaryVector(new DictionaryData(new Dictionary(dictionary_vec.type, indices_data.type), dictionary_vec, indices_data));
-
-        basicVectorTests(vector, values, ['abc', '123']);
-        describe(`sliced`, () => {
-            basicVectorTests(vector.slice(10, 20), values.slice(10,20), extras);
-        });
-    });
-});
-
-// Creates some basic tests for the given vector.
-// Verifies that:
-// - `get` and the native iterator return the same data as `values`
-// - `indexOf` returns the same indices as `values`
-function basicVectorTests(vector: Vector, values: any[], extras: any[]) {
-    const n = values.length;
-
-    test(`gets expected values`, () => {
-        let i = -1;
-        while (++i < n) {
-            expect(vector.get(i)).toEqual(values[i]);
-        }
-    });
-    test(`iterates expected values`, () => {
-        expect.hasAssertions();
-        let i = -1;
-        for (let v of vector) {
-            expect(++i).toBeLessThan(n);
-            expect(v).toEqual(values[i]);
-        }
-    });
-    test(`indexOf returns expected values`, () => {
-        let testValues = values.concat(extras);
-
-        for (const value of testValues) {
-            const expected = values.indexOf(value);
-            expect(vector.indexOf(value)).toEqual(expected);
-        }
-    });
-}
-
-function toMap<T>(entries: Record<string, T>, keys: string[]) {
-    return keys.reduce((map, key) => {
-        map.set(key, entries[key] as T);
-        return map;
-    }, new Map<string, T>());
-}
-
-function concatTyped<T extends TypedArray>(ArrayType: TypedArrayConstructor<T>, ...bytes: any[]) {
-    const BPE = ArrayType.BYTES_PER_ELEMENT;
-    return bytes.reduce((v, bytes) => {
-        const l = bytes.byteLength / BPE;
-        const a = new ArrayType(v.length + l);
-        const b = new ArrayType(bytes.buffer);
-        a.set(v);
-        a.set(b, v.length);
-        return a;
-    }, new ArrayType(0)) as T;
-}
\ No newline at end of file
diff --git a/js/test/unit/vector/bool-vector-tests.ts b/js/test/unit/vector/bool-vector-tests.ts
new file mode 100644
index 0000000000000..1fb97f07de0e4
--- /dev/null
+++ b/js/test/unit/vector/bool-vector-tests.ts
@@ -0,0 +1,102 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data, Bool, Vector, BoolVector } from '../../Arrow';
+
+const newBoolVector = (length: number, data: Uint8Array) => Vector.new(Data.Bool(new Bool(), 0, length, 0, null, data));
+
+describe(`BoolVector`, () => {
+    const values = [true, true, false, true, true, false, false, false];
+    const n = values.length;
+    const vector = newBoolVector(n, new Uint8Array([27, 0, 0, 0, 0, 0, 0, 0]));
+    test(`gets expected values`, () => {
+        let i = -1;
+        while (++i < n) {
+            expect(vector.get(i)).toEqual(values[i]);
+        }
+    });
+    test(`iterates expected values`, () => {
+        let i = -1;
+        for (let v of vector) {
+            expect(++i).toBeLessThan(n);
+            expect(v).toEqual(values[i]);
+        }
+    });
+    test(`indexOf returns expected values`, () => {
+        for (let test_value of [true, false]) {
+            const expected = values.indexOf(test_value);
+            expect(vector.indexOf(test_value)).toEqual(expected);
+        }
+    });
+    test(`indexOf returns -1 when value not found`, () => {
+        const v = newBoolVector(3, new Uint8Array([0xFF]));
+        expect(v.indexOf(false)).toEqual(-1);
+    });
+    test(`can set values to true and false`, () => {
+        const v = newBoolVector(n, new Uint8Array([27, 0, 0, 0, 0, 0, 0, 0]));
+        const expected1 = [true, true, false, true, true, false, false, false];
+        const expected2 = [true, true,  true, true, true, false, false, false];
+        const expected3 = [true, true, false, false, false, false, true, true];
+        function validate(expected: boolean[]) {
+            for (let i = -1; ++i < n;) {
+                expect(v.get(i)).toEqual(expected[i]);
+            }
+        }
+        validate(expected1);
+        v.set(2, true);
+        validate(expected2);
+        v.set(2, false);
+        validate(expected1);
+        v.set(3, false);
+        v.set(4, false);
+        v.set(6, true);
+        v.set(7, true);
+        validate(expected3);
+        v.set(3, true);
+        v.set(4, true);
+        v.set(6, false);
+        v.set(7, false);
+        validate(expected1);
+    });
+    test(`packs 0 values`, () => {
+        expect(BoolVector.from([]).values).toEqual(
+            new Uint8Array([0, 0, 0, 0, 0, 0, 0, 0]));
+    });
+    test(`packs 3 values`, () => {
+        expect(BoolVector.from([
+            true, false, true
+        ]).values).toEqual(new Uint8Array([5, 0, 0, 0, 0, 0, 0, 0]));
+    });
+    test(`packs 8 values`, () => {
+        expect(BoolVector.from([
+            true, true, false, true, true, false, false, false
+        ]).values).toEqual(new Uint8Array([27, 0, 0, 0, 0, 0, 0, 0]));
+    });
+    test(`packs 25 values`, () => {
+        expect(BoolVector.from([
+            true, true, false, true, true, false, false, false,
+            false, false, false, true, true, false, true, true,
+            false
+        ]).values).toEqual(new Uint8Array([27, 216, 0, 0, 0, 0, 0, 0]));
+    });
+    test(`from with boolean Array packs values`, () => {
+        expect(BoolVector
+            .from([true, false, true])
+            .slice().values
+        ).toEqual(new Uint8Array([5, 0, 0, 0, 0, 0, 0, 0]));
+    });
+});
diff --git a/js/test/unit/date-vector-tests.ts b/js/test/unit/vector/date-vector-tests.ts
similarity index 77%
rename from js/test/unit/date-vector-tests.ts
rename to js/test/unit/vector/date-vector-tests.ts
index b30d0498b034e..4f41d4f8a0512 100644
--- a/js/test/unit/date-vector-tests.ts
+++ b/js/test/unit/vector/date-vector-tests.ts
@@ -15,15 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import Arrow from '../Arrow';
-import { DateVector } from '../../src/vector';
-const { Table } = Arrow;
+import { Table, DateDay, DateMillisecond } from '../../Arrow';
 
 describe(`DateVector`, () => {
     it('returns days since the epoch as correct JS Dates', () => {
         const table = Table.from(test_data);
-        const date32 = table.getColumnAt(0) as DateVector;
         const expectedMillis = expectedMillis32();
+        const date32 = table.getColumnAt<DateDay>(0)!;
         for (const date of date32) {
             const millis = expectedMillis.shift();
             expect(date).toEqual(millis === null ? null : new Date(millis!));
@@ -31,29 +29,13 @@ describe(`DateVector`, () => {
     });
     it('returns millisecond longs since the epoch as correct JS Dates', () => {
         const table = Table.from(test_data);
-        const date64 = table.getColumnAt(1) as DateVector;
         const expectedMillis = expectedMillis64();
+        const date64 = table.getColumnAt<DateMillisecond>(1)!;
         for (const date of date64) {
             const millis = expectedMillis.shift();
             expect(date).toEqual(millis === null ? null : new Date(millis!));
         }
     });
-    it('converts days since the epoch to milliseconds', () => {
-        const table = Table.from(test_data);
-        const date32 = table.getColumnAt(0) as DateVector;
-        const expectedMillis = expectedMillis32();
-        for (const timestamp of date32.asEpochMilliseconds()) {
-            expect(timestamp).toEqual(expectedMillis.shift());
-        }
-    });
-    it('converts millisecond longs since the epoch to millisecond ints', () => {
-        const table = Table.from(test_data);
-        const date64 = table.getColumnAt(1) as DateVector;
-        const expectedMillis = expectedMillis64();
-        for (const timestamp of date64.asEpochMilliseconds()) {
-            expect(timestamp).toEqual(expectedMillis.shift());
-        }
-    });
 });
 
 const expectedMillis32 = () => [
diff --git a/js/test/unit/vector/float16-vector-tests.ts b/js/test/unit/vector/float16-vector-tests.ts
new file mode 100644
index 0000000000000..72bbf1ba17d1c
--- /dev/null
+++ b/js/test/unit/vector/float16-vector-tests.ts
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data, Float16, Vector, util } from '../../Arrow';
+const { joinUint8Arrays } = util;
+
+const newFloat16Vector = (length: number, data: Uint16Array) => Vector.new(Data.Float(new Float16(), 0, length, 0, null, data));
+const randomBytes = (n: number) => Uint8Array.from({ length: n }, () => Math.random() * 255 | 0);
+const bytes = Array.from({ length: 5 }, () => randomBytes(64));
+
+describe('Float16Vector', () => {
+    const values = new Uint16Array(joinUint8Arrays(bytes)[0].buffer);
+    const vector = bytes
+        .map((b) => new Uint16Array(b.buffer))
+        .map((b) => newFloat16Vector(b.length, b))
+        .reduce((v: any, v2) => v.concat(v2));
+    const n = values.length;
+    const clamp = (x: number) => (x -  32767) / 32767;
+    const float16s = new Float32Array([...values].map((x) => clamp(x)));
+    test(`gets expected values`, () => {
+        let i = -1;
+        while (++i < n) {
+            expect(vector.get(i)).toEqual(clamp(values[i]));
+        }
+    });
+    test(`iterates expected values`, () => {
+        expect.hasAssertions();
+        let i = -1;
+        for (let v of vector) {
+            expect(++i).toBeLessThan(n);
+            expect(v).toEqual(clamp(values[i]));
+        }
+    });
+    test(`indexOf returns expected values`, () => {
+        const randomValues = new Uint16Array(randomBytes(64).buffer);
+        for (let value of [...values, ...randomValues]) {
+            const expected = values.indexOf(value);
+            expect(vector.indexOf(clamp(value))).toEqual(expected);
+        }
+    });
+    test(`slices the entire array`, () => {
+        expect(vector.slice().toArray()).toEqual(float16s);
+    });
+    test(`slice returns a TypedArray`, () => {
+        expect(vector.slice().toArray()).toBeInstanceOf(Float32Array);
+    });
+    test(`slices from -20 to length`, () => {
+        expect(vector.slice(-20).toArray()).toEqual(float16s.slice(-20));
+    });
+    test(`slices from 0 to -20`, () => {
+        expect(vector.slice(0, -20).toArray()).toEqual(float16s.slice(0, -20));
+    });
+    test(`slices the array from 0 to length - 20`, () => {
+        expect(vector.slice(0, n - 20).toArray()).toEqual(float16s.slice(0, n - 20));
+    });
+    test(`slices the array from 0 to length + 20`, () => {
+        expect(vector.slice(0, n + 20).toArray()).toEqual(float16s.slice(0, n + 20));
+    });
+});
diff --git a/js/test/unit/vector/numeric-vector-tests.ts b/js/test/unit/vector/numeric-vector-tests.ts
new file mode 100644
index 0000000000000..4a339ea6dd08b
--- /dev/null
+++ b/js/test/unit/vector/numeric-vector-tests.ts
@@ -0,0 +1,190 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+    util,
+    DataType, Data, Vector,
+    Float, Float32, Float64,
+    Int, Int8, Int16, Int32, Int64,
+    Uint8, Uint16, Uint32, Uint64,
+} from '../../Arrow';
+
+const { joinUint8Arrays } = util;
+const randomBytes = (n: number) => Uint8Array.from({ length: n }, () => Math.random() * 255 | 0);
+const bytes = Array.from({ length: 5 }, () => randomBytes(64));
+
+describe(`Int8Vector`, () => { testIntVector(Int8); });
+describe(`Int16Vector`, () => { testIntVector(Int16); });
+describe(`Int32Vector`, () => { testIntVector(Int32); });
+describe(`Int64Vector`, () => { testIntVector(Int64); });
+describe(`Uint64Vector`, () => { testIntVector(Uint64); });
+describe(`Uint8Vector`, () => { testIntVector(Uint8); });
+describe(`Uint16Vector`, () => { testIntVector(Uint16); });
+describe(`Uint32Vector`, () => { testIntVector(Uint32); });
+describe(`Float32Vector`, () => { testFloatVector(Float32); });
+describe(`Float64Vector`, () => { testFloatVector(Float64); });
+
+function testIntVector<T extends Int>(DataType: new () => T) {
+
+    const type = new DataType();
+    const stride = type.bitWidth < 64 ? 1 : 2;
+    const values = new type.ArrayType(joinUint8Arrays(bytes)[0].buffer);
+
+    const vector = bytes
+        .map((b) => new type.ArrayType(b.buffer))
+        .map((b) => Vector.new(Data.Int(type, 0, b.length / stride, 0, null, b)))
+        .reduce((v: any, v2) => v.concat(v2));
+
+    gets_expected_values(vector, values);
+    iterates_expected_values(vector, values);
+    indexof_returns_expected_values(vector, values);
+
+    slice_returns_a_typedarray(vector);
+    slices_the_entire_array(vector, values);
+    slices_from_minus_20_to_length(vector, values);
+    slices_from_0_to_minus_20(vector, values);
+    slices_the_array_from_0_to_length_minus_20(vector, values);
+    slices_the_array_from_0_to_length_plus_20(vector, values);
+}
+
+function testFloatVector<T extends Float>(DataType: new () => T) {
+
+    const type = new DataType();
+    const values = new type.ArrayType(joinUint8Arrays(bytes)[0].buffer);
+
+    const vector = bytes
+        .map((b) => new type.ArrayType(b.buffer))
+        .map((b) => Vector.new(Data.Float(type, 0, b.length, 0, null, b)))
+        .reduce((v: any, v2) => v.concat(v2));
+
+    gets_expected_values(vector, values);
+    iterates_expected_values(vector, values);
+    indexof_returns_expected_values(vector, values);
+
+    slice_returns_a_typedarray(vector);
+    slices_the_entire_array(vector, values);
+    slices_from_minus_20_to_length(vector, values);
+    slices_from_0_to_minus_20(vector, values);
+    slices_the_array_from_0_to_length_minus_20(vector, values);
+    slices_the_array_from_0_to_length_plus_20(vector, values);
+}
+
+function gets_expected_values<T extends DataType>(vector: Vector<T>, values: T['TArray']) {
+    test(`gets expected values`, () => {
+        expect.hasAssertions();
+        let i = -1, n = vector.length;
+        let stride = vector.stride;
+        try {
+            if (stride === 1) {
+                while (++i < n) {
+                    expect(vector.get(i)).toEqual(values[i]);
+                }
+            } else {
+                while (++i < n) {
+                    expect(vector.get(i)!.subarray(0, stride))
+                        .toEqual(values.slice(stride * i, stride * (i + 1)));
+                }
+            }
+        } catch (e) { throw new Error(`${i}: ${e}`); }
+    });
+}
+
+function iterates_expected_values<T extends DataType>(vector: Vector<T>, values: T['TArray']) {
+    test(`iterates expected values`, () => {
+        let i = -1, n = vector.length;
+        let stride = vector.stride;
+        try {
+            if (stride === 1) {
+                for (let v of vector) {
+                    expect(++i).toBeLessThan(n);
+                    expect(v).toEqual(values[i]);
+                }
+            } else {
+                for (let v of vector) {
+                    expect(++i).toBeLessThan(n);
+                    expect(v!.subarray(0, stride))
+                        .toEqual(values.slice(stride * i, stride * (i + 1)));
+                }
+            }
+        } catch (e) { throw new Error(`${i}: ${e}`); }
+    });
+}
+
+function indexof_returns_expected_values<T extends DataType>(vector: Vector<T>, values: T['TArray']) {
+    test(`indexOf returns expected values`, () => {
+
+        // Create a set of test data composed of all of the actual values and a few random values
+        let testValues = new vector.ArrayType(joinUint8Arrays([
+            ...bytes,
+            ...[randomBytes(8 * 2 * vector.ArrayType.BYTES_PER_ELEMENT)]
+        ])[0].buffer);
+
+        let i = -1, n, stride = vector.stride;
+        try {
+            if (vector.stride === 1) {
+                for (const value of testValues) {
+                    ++i;
+                    const expected = values.indexOf(value);
+                    expect(vector.indexOf(value)).toEqual(expected);
+                }
+            } else {
+                for (i = -1, n = testValues.length / stride | 0; ++i < n;) {
+                    const value = testValues.slice(stride * i, stride * (i + 1));
+                    const expected = values.findIndex((d: number, i: number) =>
+                        i % stride === 0 && d === value[0] && testValues[i + 1] === value[1]);
+                    expect(vector.indexOf(value)).toEqual(expected >= 0 ? expected / stride : -1);
+                }
+            }
+        } catch (e) { throw new Error(`${i}: ${e}`); }
+    });
+}
+
+function slice_returns_a_typedarray<T extends DataType>(vector: Vector<T>) {
+    test(`slice returns a TypedArray`, () => {
+        expect(vector.slice().toArray()).toBeInstanceOf(vector.ArrayType);
+    });
+}
+
+function slices_the_entire_array<T extends DataType>(vector: Vector<T>, values: T['TArray']) {
+    test(`slices the entire array`, () => {
+        expect(vector.slice().toArray()).toEqual(values);
+    });
+}
+
+function slices_from_minus_20_to_length<T extends DataType>(vector: Vector<T>, values: T['TArray']) {
+    test(`slices from -20 to length`, () => {
+        expect(vector.slice(-20).toArray()).toEqual(values.slice(-(20 * vector.stride)));
+    });
+}
+
+function slices_from_0_to_minus_20<T extends DataType>(vector: Vector<T>, values: T['TArray']) {
+    test(`slices from 0 to -20`, () => {
+        expect(vector.slice(0, -20).toArray()).toEqual(values.slice(0, -(20 * vector.stride)));
+    });
+}
+
+function slices_the_array_from_0_to_length_minus_20 <T extends DataType>(vector: Vector<T>, values: T['TArray']) {
+    test(`slices the array from 0 to length - 20`, () => {
+        expect(vector.slice(0, vector.length - 20).toArray()).toEqual(values.slice(0, values.length - (20 * vector.stride)));
+    });
+}
+
+function slices_the_array_from_0_to_length_plus_20<T extends DataType>(vector: Vector<T>, values: T['TArray']) {
+    test(`slices the array from 0 to length + 20`, () => {
+        expect(vector.slice(0, vector.length + 20).toArray()).toEqual(values.slice(0, values.length + (20 * vector.stride)));
+    });
+}
diff --git a/js/test/unit/vector/vector-tests.ts b/js/test/unit/vector/vector-tests.ts
new file mode 100644
index 0000000000000..3291c7208d50a
--- /dev/null
+++ b/js/test/unit/vector/vector-tests.ts
@@ -0,0 +1,127 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+    Int32, Dictionary, DateUnit, util,
+    Data, Vector, Utf8Vector, DateVector, DictionaryVector,
+} from '../../Arrow';
+
+describe(`DateVector`, () => {
+    const extras = [
+        new Date(2000, 0, 1),
+        new Date(1991, 5, 28, 12, 11, 10)
+    ];
+    describe(`unit = MILLISECOND`, () => {
+        const values = [
+            new Date(1989, 5, 22, 1, 2, 3),
+            new Date(1988, 3, 25, 4, 5, 6),
+            new Date(1987, 2, 24, 7, 8, 9),
+            new Date(2018, 4, 12, 17, 30, 0)
+        ];
+        const vector = DateVector.from(values);
+        basicVectorTests(vector, values, extras);
+    });
+    describe(`unit = DAY`, () => {
+        // Use UTC to ensure that dates are always at midnight
+        const values = [
+            new Date(Date.UTC(1989, 5, 22)),
+            new Date(Date.UTC(1988, 3, 25)),
+            new Date(Date.UTC(1987, 2, 24)),
+            new Date(Date.UTC(2018, 4, 12))
+        ];
+        const vector = DateVector.from(values, DateUnit.DAY);
+        basicVectorTests(vector, values, extras);
+    });
+});
+
+describe(`DictionaryVector`, () => {
+
+    const dictionary = ['foo', 'bar', 'baz'];
+    const extras = ['abc', '123']; // values to search for that should NOT be found
+    const dictionary_vec = Utf8Vector.from(dictionary);
+
+    const indices = Array.from({length: 50}, () => Math.random() * 3 | 0);
+    const validity = Array.from({ length: indices.length }, () => Math.random() > 0.2 ? true : false);
+
+    describe(`index with nullCount == 0`, () => {
+
+        const values = Array.from(indices).map((d) => dictionary[d]);
+        const vector = DictionaryVector.from(dictionary_vec, new Int32(), indices);
+
+        basicVectorTests(vector, values, extras);
+
+        describe(`sliced`, () => {
+            basicVectorTests(vector.slice(10, 20), values.slice(10,20), extras);
+        });
+    });
+
+    describe(`index with nullCount > 0`, () => {
+
+        const nullBitmap = util.packBools(validity);
+        const nullCount = validity.reduce((acc, d) => acc + (d ? 0 : 1), 0);
+        const values = Array.from(indices).map((d, i) => validity[i] ? dictionary[d] : null);
+        const type = new Dictionary(dictionary_vec.type, new Int32(), null, null, dictionary_vec);
+        const vector = Vector.new(Data.Dictionary(type, 0, indices.length, nullCount, nullBitmap, indices));
+
+        basicVectorTests(vector, values, ['abc', '123']);
+        describe(`sliced`, () => {
+            basicVectorTests(vector.slice(10, 20), values.slice(10,20), extras);
+        });
+    });
+});
+
+describe(`Utf8Vector`, () => {
+    const values = ['foo', 'bar', 'baz', 'foo bar', 'bar'];
+    const vector = Utf8Vector.from(values);
+    basicVectorTests(vector, values, ['abc', '123']);
+    describe(`sliced`, () => {
+        basicVectorTests(vector.slice(1,3), values.slice(1,3), ['foo', 'abc']);
+    });
+});
+
+// Creates some basic tests for the given vector.
+// Verifies that:
+// - `get` and the native iterator return the same data as `values`
+// - `indexOf` returns the same indices as `values`
+function basicVectorTests(vector: Vector, values: any[], extras: any[]) {
+
+    const n = values.length;
+
+    test(`gets expected values`, () => {
+        let i = -1;
+        while (++i < n) {
+            expect(vector.get(i)).toEqual(values[i]);
+        }
+    });
+    test(`iterates expected values`, () => {
+        expect.hasAssertions();
+        let i = -1;
+        for (let v of vector) {
+            expect(++i).toBeLessThan(n);
+            expect(v).toEqual(values[i]);
+        }
+    });
+    test(`indexOf returns expected values`, () => {
+        let testValues = values.concat(extras);
+
+        for (const value of testValues) {
+            const actual = vector.indexOf(value);
+            const expected = values.indexOf(value);
+            expect(actual).toEqual(expected);
+        }
+    });
+}
diff --git a/js/test/unit/visitor-tests.ts b/js/test/unit/visitor-tests.ts
new file mode 100644
index 0000000000000..9a65b243bf68e
--- /dev/null
+++ b/js/test/unit/visitor-tests.ts
@@ -0,0 +1,168 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Visitor } from '../Arrow';
+import {
+    DataType, Dictionary,
+    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Float, Float16, Float32, Float64,
+    Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
+    Date_, DateDay, DateMillisecond,
+    Interval, IntervalDayTime, IntervalYearMonth,
+    Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+    Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+    Union, DenseUnion, SparseUnion,
+} from '../Arrow';
+
+class BasicVisitor extends Visitor {
+    public type: DataType | undefined;
+    public visitNull                 <T extends Null>                 (type: T) { return (this.type = type); }
+    public visitBool                 <T extends Bool>                 (type: T) { return (this.type = type); }
+    public visitInt                  <T extends Int>                  (type: T) { return (this.type = type); }
+    public visitFloat                <T extends Float>                (type: T) { return (this.type = type); }
+    public visitUtf8                 <T extends Utf8>                 (type: T) { return (this.type = type); }
+    public visitBinary               <T extends Binary>               (type: T) { return (this.type = type); }
+    public visitFixedSizeBinary      <T extends FixedSizeBinary>      (type: T) { return (this.type = type); }
+    public visitDate                 <T extends Date_>                (type: T) { return (this.type = type); }
+    public visitTimestamp            <T extends Timestamp>            (type: T) { return (this.type = type); }
+    public visitTime                 <T extends Time>                 (type: T) { return (this.type = type); }
+    public visitDecimal              <T extends Decimal>              (type: T) { return (this.type = type); }
+    public visitList                 <T extends List>                 (type: T) { return (this.type = type); }
+    public visitStruct               <T extends Struct>               (type: T) { return (this.type = type); }
+    public visitUnion                <T extends Union>                (type: T) { return (this.type = type); }
+    public visitDictionary           <T extends Dictionary>           (type: T) { return (this.type = type); }
+    public visitInterval             <T extends Interval>             (type: T) { return (this.type = type); }
+    public visitFixedSizeList        <T extends FixedSizeList>        (type: T) { return (this.type = type); }
+    public visitMap                  <T extends Map_>                 (type: T) { return (this.type = type); }
+}
+
+class FeatureVisitor extends Visitor {
+    public type: DataType | undefined;
+    public visitNull                 <T extends Null>                 (type: T) { return (this.type = type); }
+    public visitBool                 <T extends Bool>                 (type: T) { return (this.type = type); }
+    public visitInt8                 <T extends Int8>                 (type: T) { return (this.type = type); }
+    public visitInt16                <T extends Int16>                (type: T) { return (this.type = type); }
+    public visitInt32                <T extends Int32>                (type: T) { return (this.type = type); }
+    public visitInt64                <T extends Int64>                (type: T) { return (this.type = type); }
+    public visitUint8                <T extends Uint8>                (type: T) { return (this.type = type); }
+    public visitUint16               <T extends Uint16>               (type: T) { return (this.type = type); }
+    public visitUint32               <T extends Uint32>               (type: T) { return (this.type = type); }
+    public visitUint64               <T extends Uint64>               (type: T) { return (this.type = type); }
+    public visitFloat16              <T extends Float16>              (type: T) { return (this.type = type); }
+    public visitFloat32              <T extends Float32>              (type: T) { return (this.type = type); }
+    public visitFloat64              <T extends Float64>              (type: T) { return (this.type = type); }
+    public visitUtf8                 <T extends Utf8>                 (type: T) { return (this.type = type); }
+    public visitBinary               <T extends Binary>               (type: T) { return (this.type = type); }
+    public visitFixedSizeBinary      <T extends FixedSizeBinary>      (type: T) { return (this.type = type); }
+    public visitDateDay              <T extends DateDay>              (type: T) { return (this.type = type); }
+    public visitDateMillisecond      <T extends DateMillisecond>      (type: T) { return (this.type = type); }
+    public visitTimestampSecond      <T extends TimestampSecond>      (type: T) { return (this.type = type); }
+    public visitTimestampMillisecond <T extends TimestampMillisecond> (type: T) { return (this.type = type); }
+    public visitTimestampMicrosecond <T extends TimestampMicrosecond> (type: T) { return (this.type = type); }
+    public visitTimestampNanosecond  <T extends TimestampNanosecond>  (type: T) { return (this.type = type); }
+    public visitTimeSecond           <T extends TimeSecond>           (type: T) { return (this.type = type); }
+    public visitTimeMillisecond      <T extends TimeMillisecond>      (type: T) { return (this.type = type); }
+    public visitTimeMicrosecond      <T extends TimeMicrosecond>      (type: T) { return (this.type = type); }
+    public visitTimeNanosecond       <T extends TimeNanosecond>       (type: T) { return (this.type = type); }
+    public visitDecimal              <T extends Decimal>              (type: T) { return (this.type = type); }
+    public visitList                 <T extends List>                 (type: T) { return (this.type = type); }
+    public visitStruct               <T extends Struct>               (type: T) { return (this.type = type); }
+    public visitDenseUnion           <T extends DenseUnion>           (type: T) { return (this.type = type); }
+    public visitSparseUnion          <T extends SparseUnion>          (type: T) { return (this.type = type); }
+    public visitDictionary           <T extends Dictionary>           (type: T) { return (this.type = type); }
+    public visitIntervalDayTime      <T extends IntervalDayTime>      (type: T) { return (this.type = type); }
+    public visitIntervalYearMonth    <T extends IntervalYearMonth>    (type: T) { return (this.type = type); }
+    public visitFixedSizeList        <T extends FixedSizeList>        (type: T) { return (this.type = type); }
+    public visitMap                  <T extends Map_>                 (type: T) { return (this.type = type); }
+}
+
+describe('Visitor', () => {
+
+    describe('uses the base methods when no feature methods are implemented', () => {
+        test(`visits Null types`, () => validateBasicVisitor(new Null()));
+        test(`visits Bool types`, () => validateBasicVisitor(new Bool()));
+        test(`visits Int types`, () => validateBasicVisitor(new Int(true, 32)));
+        test(`visits Float types`, () => validateBasicVisitor(new Float(0)));
+        test(`visits Utf8 types`, () => validateBasicVisitor(new Utf8()));
+        test(`visits Binary types`, () => validateBasicVisitor(new Binary()));
+        test(`visits FixedSizeBinary types`, () => validateBasicVisitor(new FixedSizeBinary(128)));
+        test(`visits Date types`, () => validateBasicVisitor(new Date_(0)));
+        test(`visits Timestamp types`, () => validateBasicVisitor(new Timestamp(0, 'UTC')));
+        test(`visits Time types`, () => validateBasicVisitor(new Time(0, 64)));
+        test(`visits Decimal types`, () => validateBasicVisitor(new Decimal(2, 9)));
+        test(`visits List types`, () => validateBasicVisitor(new List(null as any)));
+        test(`visits Struct types`, () => validateBasicVisitor(new Struct([] as any[])));
+        test(`visits Union types`, () => validateBasicVisitor(new Union(0, [] as any[], [] as any[])));
+        test(`visits Dictionary types`, () => validateBasicVisitor(new Dictionary(null as any, null as any)));
+        test(`visits Interval types`, () => validateBasicVisitor(new Interval(0)));
+        test(`visits FixedSizeList types`, () => validateBasicVisitor(new FixedSizeList(2, null as any)));
+        test(`visits Map types`, () => validateBasicVisitor(new Map_([] as any[])));
+        function validateBasicVisitor<T extends DataType>(type: T) {
+            const visitor = new BasicVisitor();
+            const result = visitor.visit(type);
+            expect(result).toBe(type);
+            expect(visitor.type).toBe(type);
+        }
+    });
+
+    describe(`uses the feature methods instead of the base methods when they're implemented`, () => {
+
+        test(`visits Null types`, () => validateFeatureVisitor(new Null()));
+        test(`visits Bool types`, () => validateFeatureVisitor(new Bool()));
+        test(`visits Int8 types`, () => validateFeatureVisitor(new Int8()));
+        test(`visits Int16 types`, () => validateFeatureVisitor(new Int16()));
+        test(`visits Int32 types`, () => validateFeatureVisitor(new Int32()));
+        test(`visits Int64 types`, () => validateFeatureVisitor(new Int64()));
+        test(`visits Uint8 types`, () => validateFeatureVisitor(new Uint8()));
+        test(`visits Uint16 types`, () => validateFeatureVisitor(new Uint16()));
+        test(`visits Uint32 types`, () => validateFeatureVisitor(new Uint32()));
+        test(`visits Uint64 types`, () => validateFeatureVisitor(new Uint64()));
+        test(`visits Float16 types`, () => validateFeatureVisitor(new Float16()));
+        test(`visits Float32 types`, () => validateFeatureVisitor(new Float32()));
+        test(`visits Float64 types`, () => validateFeatureVisitor(new Float64()));
+        test(`visits Utf8 types`, () => validateFeatureVisitor(new Utf8()));
+        test(`visits Binary types`, () => validateFeatureVisitor(new Binary()));
+        test(`visits FixedSizeBinary types`, () => validateFeatureVisitor(new FixedSizeBinary(128)));
+        test(`visits DateDay types`, () => validateFeatureVisitor(new DateDay()));
+        test(`visits DateMillisecond types`, () => validateFeatureVisitor(new DateMillisecond()));
+        test(`visits TimestampSecond types`, () => validateFeatureVisitor(new TimestampSecond()));
+        test(`visits TimestampMillisecond types`, () => validateFeatureVisitor(new TimestampMillisecond()));
+        test(`visits TimestampMicrosecond types`, () => validateFeatureVisitor(new TimestampMicrosecond()));
+        test(`visits TimestampNanosecond types`, () => validateFeatureVisitor(new TimestampNanosecond()));
+        test(`visits TimeSecond types`, () => validateFeatureVisitor(new TimeSecond()));
+        test(`visits TimeMillisecond types`, () => validateFeatureVisitor(new TimeMillisecond()));
+        test(`visits TimeMicrosecond types`, () => validateFeatureVisitor(new TimeMicrosecond()));
+        test(`visits TimeNanosecond types`, () => validateFeatureVisitor(new TimeNanosecond()));
+        test(`visits Decimal types`, () => validateFeatureVisitor(new Decimal(2, 9)));
+        test(`visits List types`, () => validateFeatureVisitor(new List(null as any)));
+        test(`visits Struct types`, () => validateFeatureVisitor(new Struct([] as any[])));
+        test(`visits DenseUnion types`, () => validateFeatureVisitor(new DenseUnion([] as any[], [] as any[])));
+        test(`visits SparseUnion types`, () => validateFeatureVisitor(new SparseUnion([] as any[], [] as any[])));
+        test(`visits Dictionary types`, () => validateFeatureVisitor(new Dictionary(null as any, null as any)));
+        test(`visits IntervalDayTime types`, () => validateFeatureVisitor(new IntervalDayTime()));
+        test(`visits IntervalYearMonth types`, () => validateFeatureVisitor(new IntervalYearMonth()));
+        test(`visits FixedSizeList types`, () => validateFeatureVisitor(new FixedSizeList(2, null as any)));
+        test(`visits Map types`, () => validateFeatureVisitor(new Map_([] as any[])));
+
+        function validateFeatureVisitor<T extends DataType>(type: T) {
+            const visitor = new FeatureVisitor();
+            const result = visitor.visit(type);
+            expect(result).toBe(type);
+            expect(visitor.type).toBe(type);
+        }
+    });
+});
diff --git a/js/test/unit/writer-tests.ts b/js/test/unit/writer-tests.ts
deleted file mode 100644
index 7bd63fc9474fe..0000000000000
--- a/js/test/unit/writer-tests.ts
+++ /dev/null
@@ -1,62 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import '../jest-extensions';
-
-import Arrow from '../Arrow';
-import { getSingleRecordBatchTable } from './table-tests';
-const { Table, RecordBatch } = Arrow;
-
-describe('Table.serialize()', () => {
-    test(`Serializes sliced RecordBatches`, () => {
-
-        const table = getSingleRecordBatchTable();
-        const batch = table.batches[0], half = batch.length / 2 | 0;
-
-        // First compare what happens when slicing from the batch level
-        let [batch1, batch2] = [batch.slice(0, half), batch.slice(half)];
-
-        compareBatchAndTable(table,    0, batch1, Table.from(new Table(batch1).serialize()));
-        compareBatchAndTable(table, half, batch2, Table.from(new Table(batch2).serialize()));
-
-        // Then compare what happens when creating a RecordBatch by slicing each child individually
-        batch1 = new RecordBatch(batch1.schema, batch1.length, batch1.schema.fields.map((_, i) => {
-            return batch.getChildAt(i)!.slice(0, half);
-        }));
-
-        batch2 = new RecordBatch(batch2.schema, batch2.length, batch2.schema.fields.map((_, i) => {
-            return batch.getChildAt(i)!.slice(half);
-        }));
-
-        compareBatchAndTable(table,    0, batch1, Table.from(new Table(batch1).serialize()));
-        compareBatchAndTable(table, half, batch2, Table.from(new Table(batch2).serialize()));
-    });
-});
-
-function compareBatchAndTable(source: Table, offset: number, batch: RecordBatch, table: Table) {
-    expect(batch.length).toEqual(table.length);
-    expect(table.numCols).toEqual(source.numCols);
-    expect(batch.numCols).toEqual(source.numCols);
-    for (let i = -1, n = source.numCols; ++i < n;) {
-        const v0 = source.getColumnAt(i)!.slice(offset, offset + batch.length);
-        const v1 = batch.getChildAt(i);
-        const v2 = table.getColumnAt(i);
-        const name = source.schema.fields[i].name;
-        (expect([v1, `batch`, name]) as any).toEqualVector([v0, `source`]);
-        (expect([v2, `table`, name]) as any).toEqualVector([v0, `source`]);
-    }
-}
diff --git a/js/tsconfig/tsconfig.base.json b/js/tsconfig/tsconfig.base.json
index d0b813ef38e2a..8b8210198960a 100644
--- a/js/tsconfig/tsconfig.base.json
+++ b/js/tsconfig/tsconfig.base.json
@@ -1,5 +1,5 @@
 {
-  "exclude": ["../node_modules", "../src/bin/*.ts"],
+  "exclude": ["../node_modules"],
   "include": ["../src/**/*.ts"],
   "compileOnSave": false,
   "compilerOptions": {
diff --git a/js/tsconfig/tsconfig.bin.cjs.json b/js/tsconfig/tsconfig.bin.cjs.json
index e17c1b589acc7..4ac3e4cf8d6d9 100644
--- a/js/tsconfig/tsconfig.bin.cjs.json
+++ b/js/tsconfig/tsconfig.bin.cjs.json
@@ -4,7 +4,7 @@
     "exclude": ["../node_modules"],
     "include": ["../src/bin/*.ts"],
       "compilerOptions": {
-      "target": "ES5",
+      "target": "esnext",
       "module": "commonjs",
       "declaration": false
     }
diff --git a/js/tsconfig/tsconfig.es5.cls.json b/js/tsconfig/tsconfig.es5.cls.json
index 4df18aa595d92..b425289868809 100644
--- a/js/tsconfig/tsconfig.es5.cls.json
+++ b/js/tsconfig/tsconfig.es5.cls.json
@@ -2,10 +2,10 @@
 {
   "extends": "./tsconfig.base.json",
   "compilerOptions": {
-    "target": "es2015",
+    "target": "esnext",
     "module": "es2015",
     "declaration": false,
     "noEmitHelpers": true,
-    "importHelpers": true
+    "importHelpers": false
   }
 }
diff --git a/js/typedoc.js b/js/typedoc.js
new file mode 100644
index 0000000000000..f9d8550fe91b3
--- /dev/null
+++ b/js/typedoc.js
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+    src: 'src',
+    out: 'doc',
+    mode: 'file',
+    name: 'Apache Arrow',
+    target: 'ES6',
+    module: 'commonjs',
+    tsconfig: 'tsconfig.json',
+    excludePrivate: true,
+    excludeProtected: true,
+    excludeNotExported: true,
+    includeDefinitions: true,
+    ignoreCompilerErrors: true,
+    exclude: [
+        'src/fb/*.ts',
+        'src/bin/*.ts'
+    ]
+};
diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt
index 897086637beaf..65df11fb75200 100755
--- a/matlab/CMakeLists.txt
+++ b/matlab/CMakeLists.txt
@@ -18,7 +18,11 @@
 cmake_minimum_required(VERSION 3.2)
 set(CMAKE_CXX_STANDARD 11)
 
-project(mlarrow)
+set(MLARROW_VERSION "0.13.0-SNAPSHOT")
+string(REGEX MATCH
+  "^[0-9]+\\.[0-9]+\\.[0-9]+" MLARROW_BASE_VERSION "${MLARROW_VERSION}")
+
+project(mlarrow VERSION "${MLARROW_BASE_VERSION}")
 
 # Grab CMAKE Modules from the CPP interface
 set(CPP_CMAKE_MODULES "${CMAKE_SOURCE_DIR}/../cpp/cmake_modules")
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 15a3479f63ad0..35f1b821b4d79 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -17,9 +17,6 @@
 #
 # Includes code assembled from BSD/MIT/Apache-licensed code from some 3rd-party
 # projects, including Kudu, Impala, and libdynd. See python/LICENSE.txt
-#
-# TODO(ARROW-3209): rename arrow_gpu to arrow_cuda
-#
 
 cmake_minimum_required(VERSION 2.7)
 project(pyarrow)
@@ -64,6 +61,9 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
   option(PYARROW_BUILD_CUDA
     "Build the PyArrow CUDA support"
     OFF)
+  option(PYARROW_BUILD_GANDIVA
+    "Build the PyArrow Gandiva integration"
+    OFF)
   option(PYARROW_BUILD_PARQUET
     "Build the PyArrow Parquet integration"
     OFF)
@@ -107,6 +107,10 @@ endif(CCACHE_FOUND)
 
 include(BuildUtils)
 include(CompilerInfo)
+
+# Cython generated code emits way to many warnings at CHECKIN and EVERYTHING
+set(BUILD_WARNING_LEVEL "PRODUCTION")
+
 include(SetupCxxFlags)
 
 # Add common flags
@@ -141,6 +145,7 @@ if ("${COMPILER_FAMILY}" STREQUAL "clang")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-constant-logical-operand")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-declarations")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sometimes-uninitialized")
 
   # We have public Cython APIs which return C++ types, which are in an extern
   # "C" blog (no symbol mangling) and clang doesn't like this
@@ -393,13 +398,13 @@ if (PYARROW_BUILD_CUDA)
       endif()
     endif()
     if (MSVC)
-      ADD_THIRDPARTY_LIB(arrow_gpu
+      ADD_THIRDPARTY_LIB(arrow_cuda
         SHARED_LIB ${ARROW_CUDA_SHARED_IMP_LIB})
     else()
-      ADD_THIRDPARTY_LIB(arrow_gpu
+      ADD_THIRDPARTY_LIB(arrow_cuda
         SHARED_LIB ${ARROW_CUDA_SHARED_LIB})
     endif()
-    set(LINK_LIBS ${LINK_LIBS} arrow_gpu_shared)
+    set(LINK_LIBS ${LINK_LIBS} arrow_cuda_shared)
     set(CYTHON_EXTENSIONS ${CYTHON_EXTENSIONS} _cuda)
   endif()
 endif()
@@ -413,6 +418,10 @@ if (PYARROW_BUILD_PARQUET)
   endif()
   include_directories(SYSTEM ${PARQUET_INCLUDE_DIR})
 
+  if (PYARROW_BUNDLE_ARROW_CPP)
+    file(COPY ${PARQUET_INCLUDE_DIR}/parquet DESTINATION ${BUILD_OUTPUT_ROOT_DIRECTORY}/include)
+  endif()
+
   if (PYARROW_PARQUET_USE_SHARED)
     if (PYARROW_BUNDLE_ARROW_CPP)
       bundle_arrow_lib(PARQUET_SHARED_LIB
@@ -503,6 +512,8 @@ if (PYARROW_BUILD_GANDIVA)
   include_directories(SYSTEM ${GANDIVA_INCLUDE_DIR})
 
   if (PYARROW_BUNDLE_ARROW_CPP)
+    file(COPY ${GANDIVA_INCLUDE_DIR}/gandiva DESTINATION ${BUILD_OUTPUT_ROOT_DIRECTORY}/include)
+
     bundle_arrow_lib(GANDIVA_SHARED_LIB
       ABI_VERSION ${ARROW_ABI_VERSION}
       SO_VERSION ${ARROW_SO_VERSION})
diff --git a/python/Dockerfile b/python/Dockerfile
index e97f82ec5300b..db83c0b24eef1 100644
--- a/python/Dockerfile
+++ b/python/Dockerfile
@@ -20,14 +20,14 @@ FROM arrow:cpp
 # install python specific packages
 ARG PYTHON_VERSION=3.6
 ADD ci/conda_env_python.yml /arrow/ci/
-RUN conda install -c conda-forge \
+RUN conda install -q \
         --file arrow/ci/conda_env_python.yml \
-        python=$PYTHON_VERSION && \
+        python=$PYTHON_VERSION \
+        nomkl && \
     conda clean --all
 
-ENV CC=gcc \
-    CXX=g++ \
-    ARROW_PYTHON=ON
+ENV ARROW_PYTHON=ON \
+    ARROW_BUILD_TESTS=OFF
 
 # build and test
 CMD arrow/ci/docker_build_cpp.sh && \
diff --git a/python/Dockerfile.alpine b/python/Dockerfile.alpine
new file mode 100644
index 0000000000000..23c85cb943e31
--- /dev/null
+++ b/python/Dockerfile.alpine
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM arrow:cpp-alpine
+
+# better compatibility for the scripts
+RUN apk add --no-cache coreutils
+
+# install python, either python3(3.6) or python2(2.7)
+ARG PYTHON_VERSION=3.6
+RUN export PYTHON_MAJOR=${PYTHON_VERSION:0:1} && \
+    apk add --no-cache python${PYTHON_MAJOR}-dev && \
+    python${PYTHON_MAJOR} -m ensurepip && \
+    ln -sf /usr/bin/pip${PYTHON_MAJOR} /usr/bin/pip && \
+    ln -sf /usr/bin/python${PYTHON_MAJOR} /usr/bin/python && \
+    pip install --upgrade pip setuptools
+
+# install python requirements
+ADD python/requirements.txt \
+    python/requirements-test.txt \
+    /arrow/python/
+# pandas requires numpy at build time, so install the requirements separately
+RUN pip install -r /arrow/python/requirements.txt cython && \
+    pip install -r /arrow/python/requirements-test.txt
+
+ENV ARROW_PYTHON=ON \
+    PYARROW_WITH_ORC=0 \
+    PYARROW_WITH_PARQUET=0
+
+# build and test
+CMD arrow/ci/docker_build_cpp.sh && \
+    arrow/ci/docker_build_python.sh && \
+    pytest -v --pyargs pyarrow
diff --git a/python/README-benchmarks.md b/python/README-benchmarks.md
deleted file mode 100644
index 77901f3f020bb..0000000000000
--- a/python/README-benchmarks.md
+++ /dev/null
@@ -1,47 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Benchmarks
-
-The `pyarrow` package comes with a suite of benchmarks meant to
-run with [ASV](https://asv.readthedocs.io).  You'll need to install
-the `asv` package first (`pip install asv`).
-
-## Running with your local tree
-
-When developing, the simplest and fastest way to run the benchmark suite
-against your local changes is to use the `asv dev` command.  This will
-use your current Python interpreter and environment.
-
-## Running with arbitrary revisions
-
-ASV allows to store results and generate graphs of the benchmarks over
-the project's evolution.  For this you have the latest development version of ASV:
-
-```shell
-pip install git+https://github.com/airspeed-velocity/asv
-```
-
-Now you should be ready to run `asv run` or whatever other command
-suits your needs.
-
-## Compatibility
-
-We only expect the benchmarking setup to work with Python 3.6 or later,
-on a Unix-like system.
diff --git a/python/README.md b/python/README.md
index a0d727e80a73a..ce7bdde999eed 100644
--- a/python/README.md
+++ b/python/README.md
@@ -76,13 +76,16 @@ pytest pyarrow --help
 
 and look for the "custom options" section.
 
+For running the benchmarks, see the [Sphinx documentation][5].
+
 ### Building the documentation
 
 ```bash
-pip install -r doc/requirements.txt
-python setup.py build_sphinx -s doc/source
+pip install -r ../docs/requirements.txt
+python setup.py build_sphinx -s ../docs/source
 ```
 
-[2]: https://github.com/apache/arrow/blob/master/python/doc/source/development.rst
+[2]: https://github.com/apache/arrow/blob/master/docs/source/python/development.rst
 [3]: https://github.com/pandas-dev/pandas
-[4]: https://docs.pytest.org/en/latest/
\ No newline at end of file
+[4]: https://docs.pytest.org/en/latest/
+[5]: https://arrow.apache.org/docs/latest/python/benchmarks.html
diff --git a/python/asv-build.sh b/python/asv-build.sh
index 7b55456394dcd..90c7872cc2b8d 100755
--- a/python/asv-build.sh
+++ b/python/asv-build.sh
@@ -21,7 +21,9 @@ set -e
 
 # ASV doesn't activate its conda environment for us
 if [ -z "$ASV_ENV_DIR" ]; then exit 1; fi
-conda activate $ASV_ENV_DIR
+# Avoid "conda activate" because it's only set up in interactive shells
+# (https://github.com/conda/conda/issues/8072)
+source activate $ASV_ENV_DIR
 echo "== Conda Prefix for benchmarks: " $CONDA_PREFIX " =="
 
 # Build Arrow C++ libraries
@@ -32,6 +34,8 @@ export ORC_HOME=$CONDA_PREFIX
 export PROTOBUF_HOME=$CONDA_PREFIX
 export BOOST_ROOT=$CONDA_PREFIX
 
+export CXXFLAGS="-D_GLIBCXX_USE_CXX11_ABI=1"
+
 pushd ../cpp
 mkdir -p build
 pushd build
@@ -40,9 +44,11 @@ cmake -GNinja \
       -DCMAKE_BUILD_TYPE=release \
       -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
       -DARROW_CXXFLAGS=$CXXFLAGS \
-      -DARROW_PYTHON=ON \
-      -DARROW_PLASMA=ON \
-      -DARROW_BUILD_TESTS=OFF \
+      -DARROW_USE_GLOG=off \
+      -DARROW_PARQUET=on \
+      -DARROW_PYTHON=on \
+      -DARROW_PLASMA=on \
+      -DARROW_BUILD_TESTS=off \
       ..
 cmake --build . --target install
 
@@ -52,7 +58,8 @@ popd
 # Build pyarrow wrappers
 export SETUPTOOLS_SCM_PRETEND_VERSION=0.0.1
 export PYARROW_BUILD_TYPE=release
-export PYARROW_PARALLEL=4
+export PYARROW_PARALLEL=8
+export PYARROW_WITH_PARQUET=1
 export PYARROW_WITH_PLASMA=1
 
 python setup.py clean
diff --git a/python/asv.conf.json b/python/asv.conf.json
index 40938ee713b08..09031c833035d 100644
--- a/python/asv.conf.json
+++ b/python/asv.conf.json
@@ -35,6 +35,7 @@
     // of the repository.
     "repo_subdir": "python",
 
+    // Custom build commands for Arrow.
     "build_command": ["/bin/bash {build_dir}/asv-build.sh"],
     "install_command": ["/bin/bash {build_dir}/asv-install.sh"],
     "uninstall_command": ["/bin/bash {build_dir}/asv-uninstall.sh"],
@@ -56,7 +57,8 @@
     // determined by looking for tools on the PATH environment
     // variable.
     "environment_type": "conda",
-    "conda_channels": ["conda-forge", "defaults"],
+    // Avoid conda-forge to avoid C++ ABI issues
+    "conda_channels": ["defaults"],
 
     // the base URL to show a commit for the project.
     "show_commit_url": "https://github.com/apache/arrow/commit/",
diff --git a/python/benchmarks/convert_pandas.py b/python/benchmarks/convert_pandas.py
index 244b3dcc84713..bb8d7102ea783 100644
--- a/python/benchmarks/convert_pandas.py
+++ b/python/benchmarks/convert_pandas.py
@@ -17,6 +17,8 @@
 
 import numpy as np
 import pandas as pd
+import pandas.util.testing as tm
+
 import pyarrow as pa
 
 
@@ -50,6 +52,26 @@ def time_to_series(self, n, dtype):
         self.arrow_data.to_pandas()
 
 
+class ToPandasStrings(object):
+
+    param_names = ('uniqueness', 'total')
+    params = ((0.001, 0.01, 0.1, 0.5), (1000000,))
+    string_length = 25
+
+    def setup(self, uniqueness, total):
+        nunique = int(total * uniqueness)
+        unique_values = [tm.rands(self.string_length) for i in range(nunique)]
+        values = unique_values * (total // nunique)
+        self.arr = pa.array(values, type=pa.string())
+        self.table = pa.Table.from_arrays([self.arr], ['f0'])
+
+    def time_to_pandas_dedup(self, *args):
+        self.arr.to_pandas()
+
+    def time_to_pandas_no_dedup(self, *args):
+        self.arr.to_pandas(deduplicate_objects=False)
+
+
 class ZeroCopyPandasRead(object):
 
     def setup(self):
diff --git a/python/benchmarks/parquet.py b/python/benchmarks/parquet.py
index fd617934e8baf..4f555872a1550 100644
--- a/python/benchmarks/parquet.py
+++ b/python/benchmarks/parquet.py
@@ -15,11 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import pandas as pd
-import random
 import shutil
 import tempfile
 
+import numpy as np
+import pandas as pd
+
 import pyarrow as pa
 try:
     import pyarrow.parquet as pq
@@ -38,18 +39,19 @@ class ParquetManifestCreation(object):
 
     def setup(self, num_partitions, num_threads):
         if pq is None:
-            raise NotImplementedError
+            raise NotImplementedError("Parquet support not enabled")
 
         self.tmpdir = tempfile.mkdtemp('benchmark_parquet')
-        num1 = [random.choice(range(0, num_partitions))
-                for _ in range(self.size)]
-        num2 = [random.choice(range(0, 1000)) for _ in range(self.size)]
+        rnd = np.random.RandomState(42)
+        num1 = rnd.randint(0, num_partitions, size=self.size)
+        num2 = rnd.randint(0, 1000, size=self.size)
         output_df = pd.DataFrame({'num1': num1, 'num2': num2})
         output_table = pa.Table.from_pandas(output_df)
         pq.write_to_dataset(output_table, self.tmpdir, ['num1'])
 
     def teardown(self, num_partitions, num_threads):
-        shutil.rmtree(self.tmpdir)
+        if self.tmpdir is not None:
+            shutil.rmtree(self.tmpdir)
 
     def time_manifest_creation(self, num_partitions, num_threads):
         pq.ParquetManifest(self.tmpdir, metadata_nthreads=num_threads)
diff --git a/python/benchmarks/plasma.py b/python/benchmarks/plasma.py
index 7cefcdffad2c6..90a284515315c 100644
--- a/python/benchmarks/plasma.py
+++ b/python/benchmarks/plasma.py
@@ -18,7 +18,12 @@
 import numpy as np
 import timeit
 
-import pyarrow.plasma as plasma
+try:
+    import pyarrow.plasma as plasma
+except ImportError:
+    # TODO(wesm): These are not asv benchmarks, so we can just fail
+    # silently here
+    pass
 
 
 class SimplePlasmaThroughput(object):
@@ -32,7 +37,7 @@ def setup(self, size):
         self.plasma_store_ctx = plasma.start_plasma_store(
             plasma_store_memory=10**9)
         plasma_store_name, p = self.plasma_store_ctx.__enter__()
-        self.plasma_client = plasma.connect(plasma_store_name, "", 64)
+        self.plasma_client = plasma.connect(plasma_store_name)
 
         self.data = np.random.randn(size // 8)
 
@@ -52,7 +57,7 @@ def setup(self):
         self.plasma_store_ctx = plasma.start_plasma_store(
             plasma_store_memory=10**9)
         plasma_store_name, p = self.plasma_store_ctx.__enter__()
-        self.plasma_client = plasma.connect(plasma_store_name, "", 64)
+        self.plasma_client = plasma.connect(plasma_store_name)
 
     def teardown(self):
         self.plasma_store_ctx.__exit__(None, None, None)
diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst
deleted file mode 100644
index 4ecd7d66cec1d..0000000000000
--- a/python/doc/source/api.rst
+++ /dev/null
@@ -1,386 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements.  See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership.  The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License.  You may obtain a copy of the License at
-
-..   http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied.  See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-.. currentmodule:: pyarrow
-.. _api:
-
-*************
-API Reference
-*************
-
-.. _api.types:
-
-Type and Schema Factory Functions
----------------------------------
-
-.. autosummary::
-   :toctree: generated/
-
-   null
-   bool_
-   int8
-   int16
-   int32
-   int64
-   uint8
-   uint16
-   uint32
-   uint64
-   float16
-   float32
-   float64
-   time32
-   time64
-   timestamp
-   date32
-   date64
-   binary
-   string
-   decimal128
-   list_
-   struct
-   dictionary
-   field
-   schema
-   from_numpy_dtype
-
-.. currentmodule:: pyarrow.types
-.. _api.types.checking:
-
-Type checking functions
------------------------
-
-.. autosummary::
-   :toctree: generated/
-
-   is_boolean
-   is_integer
-   is_signed_integer
-   is_unsigned_integer
-   is_int8
-   is_int16
-   is_int32
-   is_int64
-   is_uint8
-   is_uint16
-   is_uint32
-   is_uint64
-   is_floating
-   is_float16
-   is_float32
-   is_float64
-   is_decimal
-   is_list
-   is_struct
-   is_union
-   is_nested
-   is_temporal
-   is_timestamp
-   is_date
-   is_date32
-   is_date64
-   is_time
-   is_time32
-   is_time64
-   is_null
-   is_binary
-   is_unicode
-   is_string
-   is_fixed_size_binary
-   is_map
-   is_dictionary
-
-.. currentmodule:: pyarrow
-
-.. _api.value:
-
-Scalar Value Types
-------------------
-
-.. autosummary::
-   :toctree: generated/
-
-   NA
-   Scalar
-   ArrayValue
-   BooleanValue
-   Int8Value
-   Int16Value
-   Int32Value
-   Int64Value
-   UInt8Value
-   UInt16Value
-   UInt32Value
-   UInt64Value
-   FloatValue
-   DoubleValue
-   ListValue
-   BinaryValue
-   StringValue
-   FixedSizeBinaryValue
-   Date32Value
-   Date64Value
-   TimestampValue
-   DecimalValue
-
-.. _api.array:
-
-.. currentmodule:: pyarrow
-
-Array Types
------------
-
-.. autosummary::
-   :toctree: generated/
-
-   array
-   Array
-   BooleanArray
-   DictionaryArray
-   FloatingPointArray
-   IntegerArray
-   Int8Array
-   Int16Array
-   Int32Array
-   Int64Array
-   NullArray
-   NumericArray
-   UInt8Array
-   UInt16Array
-   UInt32Array
-   UInt64Array
-   BinaryArray
-   FixedSizeBinaryArray
-   StringArray
-   Time32Array
-   Time64Array
-   Date32Array
-   Date64Array
-   TimestampArray
-   Decimal128Array
-   ListArray
-
-.. _api.table:
-
-.. currentmodule:: pyarrow
-
-Tables and Record Batches
--------------------------
-
-.. autosummary::
-   :toctree: generated/
-
-   column
-   chunked_array
-   concat_tables
-   ChunkedArray
-   Column
-   RecordBatch
-   Table
-
-.. _api.tensor:
-
-Tensor type and Functions
--------------------------
-
-.. autosummary::
-   :toctree: generated/
-
-   Tensor
-
-.. _api.io:
-
-In-Memory Buffers
------------------
-
-.. autosummary::
-   :toctree: generated/
-
-   allocate_buffer
-   compress
-   decompress
-   py_buffer
-   foreign_buffer
-   Buffer
-   ResizableBuffer
-
-Input / Output and Shared Memory
---------------------------------
-
-.. autosummary::
-   :toctree: generated/
-
-   input_stream
-   output_stream
-   BufferReader
-   BufferOutputStream
-   FixedSizeBufferWriter
-   NativeFile
-   OSFile
-   MemoryMappedFile
-   CompressedInputStream
-   CompressedOutputStream
-   memory_map
-   create_memory_map
-   PythonFile
-
-File Systems
-------------
-
-.. autosummary::
-   :toctree: generated/
-
-   hdfs.connect
-   LocalFileSystem
-
-.. class:: HadoopFileSystem
-   :noindex:
-
-.. _api.ipc:
-
-Serialization and IPC
----------------------
-
-.. autosummary::
-   :toctree: generated/
-
-   Message
-   MessageReader
-   RecordBatchFileReader
-   RecordBatchFileWriter
-   RecordBatchStreamReader
-   RecordBatchStreamWriter
-   open_file
-   open_stream
-   read_message
-   read_record_batch
-   get_record_batch_size
-   read_tensor
-   write_tensor
-   get_tensor_size
-   serialize
-   serialize_to
-   deserialize
-   deserialize_components
-   deserialize_from
-   read_serialized
-   SerializedPyObject
-   SerializationContext
-
-.. _api.feather:
-
-Feather Format
-~~~~~~~~~~~~~~
-
-.. currentmodule:: pyarrow.feather
-
-.. _api.memory_pool:
-
-.. autosummary::
-   :toctree: generated/
-
-   read_feather
-   write_feather
-
-Memory Pools
-------------
-
-.. currentmodule:: pyarrow
-
-.. autosummary::
-   :toctree: generated/
-
-   MemoryPool
-   default_memory_pool
-   total_allocated_bytes
-   set_memory_pool
-   log_memory_allocations
-
-.. _api.type_classes:
-
-.. currentmodule:: pyarrow
-
-Type Classes
-------------
-
-.. autosummary::
-   :toctree: generated/
-
-   DataType
-   Field
-   Schema
-
-.. currentmodule:: pyarrow.plasma
-
-.. _api.plasma:
-
-In-Memory Object Store
-----------------------
-
-.. autosummary::
-   :toctree: generated/
-
-   ObjectID
-   PlasmaClient
-   PlasmaBuffer
-
-.. currentmodule:: pyarrow.csv
-
-.. _api.csv:
-
-CSV Files
----------
-
-.. autosummary::
-   :toctree: generated/
-
-   ReadOptions
-   ParseOptions
-   ConvertOptions
-   read_csv
-
-.. currentmodule:: pyarrow.parquet
-
-.. _api.parquet:
-
-Apache Parquet
---------------
-
-.. autosummary::
-   :toctree: generated/
-
-   ParquetDataset
-   ParquetFile
-   ParquetWriter
-   read_table
-   read_metadata
-   read_pandas
-   read_schema
-   write_metadata
-   write_table
-
-.. currentmodule:: pyarrow
-
-Using with C extensions
------------------------
-
-.. autosummary::
-   :toctree: generated/
-
-   get_include
-   get_libraries
-   get_library_dirs
diff --git a/python/examples/plasma/sorting/sort_df.py b/python/examples/plasma/sorting/sort_df.py
index 2e4df589ee38c..2a51759a67b89 100644
--- a/python/examples/plasma/sorting/sort_df.py
+++ b/python/examples/plasma/sorting/sort_df.py
@@ -49,7 +49,7 @@
 # Connect to clients
 def connect():
     global client
-    client = plasma.connect('/tmp/store', '', 0)
+    client = plasma.connect('/tmp/store')
     np.random.seed(int(time.time() * 10e7) % 10000000)
 
 
diff --git a/python/manylinux1/Dockerfile-x86_64_base b/python/manylinux1/Dockerfile-x86_64_base
index cc139f71ced57..8ba205ee3754e 100644
--- a/python/manylinux1/Dockerfile-x86_64_base
+++ b/python/manylinux1/Dockerfile-x86_64_base
@@ -17,7 +17,7 @@
 FROM quay.io/pypa/manylinux1_x86_64:latest
 
 # Install dependencies
-RUN yum install -y ccache flex wget && yum clean all
+RUN yum install -y xz ccache flex wget && yum clean all
 
 ADD scripts/build_zlib.sh /
 RUN /build_zlib.sh
@@ -34,7 +34,7 @@ RUN /install_cmake.sh
 
 ADD scripts/build_gtest.sh /
 RUN /build_gtest.sh
-ENV GTEST_HOME /googletest-release-1.7.0
+ENV GTEST_HOME /usr
 
 ADD scripts/build_flatbuffers.sh /
 RUN /build_flatbuffers.sh
@@ -79,3 +79,9 @@ RUN git clone https://github.com/matthew-brett/multibuild.git && cd multibuild &
 
 ADD scripts/build_virtualenvs.sh /
 RUN /build_virtualenvs.sh
+
+ADD scripts/build_llvm.sh /
+RUN /build_llvm.sh
+
+ADD scripts/build_clang.sh /
+RUN /build_clang.sh
diff --git a/python/manylinux1/build_arrow.sh b/python/manylinux1/build_arrow.sh
index 44816526d2179..75537a64e732d 100755
--- a/python/manylinux1/build_arrow.sh
+++ b/python/manylinux1/build_arrow.sh
@@ -35,6 +35,7 @@ cd /arrow/python
 
 # PyArrow build configuration
 export PYARROW_BUILD_TYPE='release'
+export PYARROW_CMAKE_GENERATOR='Ninja'
 export PYARROW_WITH_ORC=1
 export PYARROW_WITH_PARQUET=1
 export PYARROW_WITH_PLASMA=1
@@ -57,18 +58,15 @@ for PYTHON_TUPLE in ${PYTHON_VERSIONS}; do
     PIP="${CPYTHON_PATH}/bin/pip"
     PATH="$PATH:${CPYTHON_PATH}"
 
-    # TensorFlow is not supported for Python 2.7 with unicode width 16 or with Python 3.7
-    if [ $PYTHON != "2.7" ] || [ $U_WIDTH = "32" ]; then
-      if [ $PYTHON != "3.7" ]; then
-        $PIP install tensorflow==1.11.0
-      fi
+    if [ $PYTHON != "2.7" ]; then
+      # Gandiva is not supported on Python 2.7
+      export PYARROW_WITH_GANDIVA=1
+      export BUILD_ARROW_GANDIVA=ON
+    else
+      export PYARROW_WITH_GANDIVA=0
+      export BUILD_ARROW_GANDIVA=OFF
     fi
 
-    # pin wheel, because auditwheel is not compatible with wheel=0.32
-    # pin after installing tensorflow, because it updates to wheel=0.32
-    # TODO(kszucs): remove after auditwheel properly supports wheel>0.31
-    $PIP install "wheel==${WHEEL_VERSION:-0.31.1}"
-
     echo "=== (${PYTHON}) Building Arrow C++ libraries ==="
     ARROW_BUILD_DIR=/tmp/build-PY${PYTHON}-${U_WIDTH}
     mkdir -p "${ARROW_BUILD_DIR}"
@@ -79,6 +77,7 @@ for PYTHON_TUPLE in ${PYTHON_VERSIONS}; do
         -DARROW_BUILD_TESTS=OFF \
         -DARROW_BUILD_SHARED=ON \
         -DARROW_BOOST_USE_SHARED=ON \
+        -DARROW_GANDIVA_PC_CXX_FLAGS="-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2;-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2/x86_64-CentOS-linux/" \
         -DARROW_JEMALLOC=ON \
         -DARROW_RPATH_ORIGIN=ON \
         -DARROW_PYTHON=ON \
@@ -87,6 +86,8 @@ for PYTHON_TUPLE in ${PYTHON_VERSIONS}; do
         -DARROW_PLASMA=ON \
         -DARROW_TENSORFLOW=ON \
         -DARROW_ORC=ON \
+        -DARROW_GANDIVA=${BUILD_ARROW_GANDIVA} \
+        -DARROW_GANDIVA_JAVA=OFF \
         -DBoost_NAMESPACE=arrow_boost \
         -DBOOST_ROOT=/arrow_boost_dist \
         -GNinja /arrow/cpp
@@ -96,9 +97,14 @@ for PYTHON_TUPLE in ${PYTHON_VERSIONS}; do
     # Check that we don't expose any unwanted symbols
     /io/scripts/check_arrow_visibility.sh
 
+    echo "=== (${PYTHON}) Install the wheel build dependencies ==="
+    $PIP install -r requirements-wheel.txt
+
     # Clear output directory
     rm -rf dist/
     echo "=== (${PYTHON}) Building wheel ==="
+    # Remove build directory to ensure CMake gets a clean run
+    rm -rf build/
     PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py build_ext \
         --inplace \
         --bundle-arrow-cpp \
@@ -107,9 +113,6 @@ for PYTHON_TUPLE in ${PYTHON_VERSIONS}; do
     PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py bdist_wheel
     PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py sdist
 
-    echo "=== (${PYTHON}) Test the existence of optional modules ==="
-    $PIP install -r requirements.txt
-
     echo "=== (${PYTHON}) Tag the wheel with manylinux1 ==="
     mkdir -p repaired_wheels/
     auditwheel -v repair -L . dist/pyarrow-*.whl -w repaired_wheels/
@@ -118,10 +121,16 @@ for PYTHON_TUPLE in ${PYTHON_VERSIONS}; do
     source /venv-test-${PYTHON}-${U_WIDTH}/bin/activate
     pip install repaired_wheels/*.whl
 
+    if [ $PYTHON != "2.7" ]; then
+      PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER -c "import pyarrow.gandiva"
+    fi
     PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER -c "import pyarrow.orc"
     PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER -c "import pyarrow.parquet"
     PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER -c "import pyarrow.plasma"
 
+    echo "=== (${PYTHON}) Install modules required for testing ==="
+    pip install -r requirements-test.txt
+
     # The TensorFlow test will be skipped here, since TensorFlow is not
     # manylinux1 compatible; however, the wheels will support TensorFlow on
     # a TensorFlow compatible system
diff --git a/python/manylinux1/scripts/build_boost.sh b/python/manylinux1/scripts/build_boost.sh
index 4bdd7fa842877..3fb394d5ab7cc 100755
--- a/python/manylinux1/scripts/build_boost.sh
+++ b/python/manylinux1/scripts/build_boost.sh
@@ -25,7 +25,7 @@ mkdir /arrow_boost
 pushd /boost_${BOOST_VERSION_UNDERSCORE}
 ./bootstrap.sh
 ./b2 tools/bcp
-./dist/bin/bcp --namespace=arrow_boost --namespace-alias filesystem date_time system regex build algorithm locale format /arrow_boost
+./dist/bin/bcp --namespace=arrow_boost --namespace-alias filesystem date_time system regex build algorithm locale format variant /arrow_boost
 popd
 
 pushd /arrow_boost
diff --git a/python/manylinux1/scripts/build_clang.sh b/python/manylinux1/scripts/build_clang.sh
new file mode 100755
index 0000000000000..0bf4979e836e2
--- /dev/null
+++ b/python/manylinux1/scripts/build_clang.sh
@@ -0,0 +1,39 @@
+#!/bin/bash -ex
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+source /multibuild/manylinux_utils.sh
+
+export LLVM_VERSION="6.0.0"
+curl -sL http://releases.llvm.org/${LLVM_VERSION}/cfe-${LLVM_VERSION}.src.tar.xz -o cfe-${LLVM_VERSION}.src.tar.xz
+unxz cfe-${LLVM_VERSION}.src.tar.xz
+tar xf cfe-${LLVM_VERSION}.src.tar
+pushd cfe-${LLVM_VERSION}.src
+mkdir build
+pushd build
+cmake  \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DCLANG_INCLUDE_TESTS=OFF \
+    -DCLANG_INCLUDE_DOCS=OFF \
+    -DLLVM_INCLUDE_TESTS=OFF \
+    -DLLVM_INCLUDE_DOCS=OFF \
+    -GNinja \
+    ..
+ninja install
+popd
+popd
+rm -rf cfe-${LLVM_VERSION}.src.tar.xz cfe-${LLVM_VERSION}.src.tar cfe-${LLVM_VERSION}.src
diff --git a/python/manylinux1/scripts/build_flatbuffers.sh b/python/manylinux1/scripts/build_flatbuffers.sh
index 70b184c9a59c9..7aaaa60553a48 100755
--- a/python/manylinux1/scripts/build_flatbuffers.sh
+++ b/python/manylinux1/scripts/build_flatbuffers.sh
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-export FLATBUFFERS_VERSION=1.9.0
+export FLATBUFFERS_VERSION=1.10.0
 curl -sL https://github.com/google/flatbuffers/archive/v${FLATBUFFERS_VERSION}.tar.gz \
     -o flatbuffers-${FLATBUFFERS_VERSION}.tar.gz
 tar xf flatbuffers-${FLATBUFFERS_VERSION}.tar.gz
@@ -26,7 +26,7 @@ cmake \
     "-DCMAKE_INSTALL_PREFIX:PATH=/usr" \
     -DFLATBUFFERS_BUILD_TESTS=OFF \
     -DCMAKE_BUILD_TYPE=Release \
-    -GNinja
+    -GNinja .
 ninja install
 popd
 rm -rf flatbuffers-${FLATBUFFERS_VERSION}.tar.gz flatbuffers-${FLATBUFFERS_VERSION}
diff --git a/python/manylinux1/scripts/build_glog.sh b/python/manylinux1/scripts/build_glog.sh
index dc9ac8889e47b..5964e354c4652 100755
--- a/python/manylinux1/scripts/build_glog.sh
+++ b/python/manylinux1/scripts/build_glog.sh
@@ -29,9 +29,8 @@ cmake -DCMAKE_BUILD_TYPE=Release \
       -DBUILD_TESTING=OFF \
       -DWITH_GFLAGS=OFF \
       -DCMAKE_CXX_FLAGS=${CFLAGS} \
-
-make -j5
-make install
+      -GNinja .
+ninja install
 popd
 rm -rf glog-${GLOG_VERSION}.tar.gz.tar.gz glog-${GLOG_VERSION}
 
diff --git a/python/manylinux1/scripts/build_gtest.sh b/python/manylinux1/scripts/build_gtest.sh
index f921efd489d67..723b59bddb7b6 100755
--- a/python/manylinux1/scripts/build_gtest.sh
+++ b/python/manylinux1/scripts/build_gtest.sh
@@ -16,11 +16,24 @@
 # specific language governing permissions and limitations
 # under the License.
 
-curl -sL https://github.com/google/googletest/archive/release-1.7.0.tar.gz -o googletest-release-1.7.0.tar.gz
-tar xf googletest-release-1.7.0.tar.gz
+GTEST_VERSION=1.8.1
+
+curl -sL https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz -o googletest-release-${GTEST_VERSION}.tar.gz
+tar xf googletest-release-${GTEST_VERSION}.tar.gz
 ls -l
-pushd googletest-release-1.7.0
-cmake -DCMAKE_CXX_FLAGS='-fPIC' -Dgtest_force_shared_crt=ON .
-make -j5
+pushd googletest-release-${GTEST_VERSION}
+
+mkdir build_so
+pushd build_so
+cmake -DCMAKE_CXX_FLAGS='-fPIC' -Dgtest_force_shared_crt=ON -DBUILD_SHARED_LIBS=ON -DBUILD_GMOCK=ON -GNinja -DCMAKE_INSTALL_PREFIX=/usr ..
+ninja install
+popd
+
+mkdir build_a
+pushd build_a
+cmake -DCMAKE_CXX_FLAGS='-fPIC' -Dgtest_force_shared_crt=ON -DBUILD_SHARED_LIBS=OFF -DBUILD_GMOCK=ON -GNinja -DCMAKE_INSTALL_PREFIX=/usr ..
+ninja install
+popd
+
 popd
-rm -rf googletest-release-1.7.0.tar.gz
+rm -rf googletest-release-${GTEST_VERSION}.tar.gz
diff --git a/python/manylinux1/scripts/build_llvm.sh b/python/manylinux1/scripts/build_llvm.sh
new file mode 100755
index 0000000000000..a61af79228bd3
--- /dev/null
+++ b/python/manylinux1/scripts/build_llvm.sh
@@ -0,0 +1,42 @@
+#!/bin/bash -ex
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+source /multibuild/manylinux_utils.sh
+
+export LLVM_VERSION="6.0.0"
+curl -sL http://releases.llvm.org/${LLVM_VERSION}/llvm-${LLVM_VERSION}.src.tar.xz -o llvm-${LLVM_VERSION}.src.tar.xz
+unxz llvm-${LLVM_VERSION}.src.tar.xz
+tar xf llvm-${LLVM_VERSION}.src.tar
+pushd llvm-${LLVM_VERSION}.src
+mkdir build
+pushd build
+cmake -DCMAKE_INSTALL_PREFIX=$PREFIX \
+    -DCMAKE_BUILD_TYPE=Release -DLLVM_TARGETS_TO_BUILD=host \
+    -DLLVM_INCLUDE_TESTS=OFF -DLLVM_INCLUDE_UTILS=OFF \
+    -DLLVM_INCLUDE_DOCS=OFF -DLLVM_INCLUDE_EXAMPLES=OFF \
+    -DLLVM_ENABLE_TERMINFO=OFF -DLLVM_ENABLE_ASSERTIONS=ON \
+    -DLLVM_USE_INTEL_JITEVENTS=ON \
+    -DLLVM_ENABLE_OCAMLDOC=OFF \
+    -DLLVM_ENABLE_RTTI=ON \
+    -DPYTHON_EXECUTABLE="$(cpython_path 2.7 32)/bin/python" \
+    -GNinja \
+    ..
+ninja install
+popd
+popd
+rm -rf llvm-${LLVM_VERSION}.src.tar.xz llvm-${LLVM_VERSION}.src.tar llvm-${LLVM_VERSION}
diff --git a/python/manylinux1/scripts/build_openssl.sh b/python/manylinux1/scripts/build_openssl.sh
index 1a54d72f04696..622004d37f2c0 100755
--- a/python/manylinux1/scripts/build_openssl.sh
+++ b/python/manylinux1/scripts/build_openssl.sh
@@ -16,11 +16,13 @@
 # specific language governing permissions and limitations
 # under the License.
 
-wget --no-check-certificate https://www.openssl.org/source/openssl-1.0.2k.tar.gz -O openssl-1.0.2k.tar.gz
-tar xf openssl-1.0.2k.tar.gz
-pushd openssl-1.0.2k
+OPENSSL_VERSION="1.0.2q"
+
+wget --no-check-certificate https://www.openssl.org/source/openssl-${OPENSSL_VERSION}.tar.gz -O openssl-${OPENSSL_VERSION}.tar.gz
+tar xf openssl-${OPENSSL_VERSION}.tar.gz
+pushd openssl-${OPENSSL_VERSION}
 ./config -fpic shared --prefix=/usr
 make -j5
 make install
 popd
-rm -rf openssl-1.0.2k.tar.gz openssl-1.0.2k
+rm -rf openssl-${OPENSSL_VERSION}.tar.gz openssl-${OPENSSL_VERSION}
diff --git a/python/manylinux1/scripts/build_virtualenvs.sh b/python/manylinux1/scripts/build_virtualenvs.sh
index 18f3b0dd4657e..a737e2f6ef968 100755
--- a/python/manylinux1/scripts/build_virtualenvs.sh
+++ b/python/manylinux1/scripts/build_virtualenvs.sh
@@ -33,15 +33,22 @@ for PYTHON_TUPLE in ${PYTHON_VERSIONS}; do
     PATH="$PATH:$(cpython_path $PYTHON ${U_WIDTH})"
 
     echo "=== (${PYTHON}, ${U_WIDTH}) Installing build dependencies ==="
-    $PIP install "numpy==1.14.5"
-    $PIP install "cython==0.28.1"
-    $PIP install "pandas==0.23.4"
-    $PIP install "virtualenv==15.1.0"
+    $PIP install "numpy==1.14.5" "cython==0.29.3" "virtualenv==16.3.0"
+    # Pandas requires numpy and cython
+    $PIP install "pandas==0.24.0"
+
+    # TensorFlow is not supported for Python 2.7 with unicode width 16 or with Python 3.7
+    if [ $PYTHON != "2.7" ] || [ $U_WIDTH = "32" ]; then
+      if [ $PYTHON != "3.7" ]; then
+        $PIP install "tensorflow==1.11.0" "Keras-Preprocessing==1.0.5"
+      fi
+    fi
+
 
     echo "=== (${PYTHON}, ${U_WIDTH}) Preparing virtualenv for tests ==="
     "$(cpython_path $PYTHON ${U_WIDTH})/bin/virtualenv" -p ${PYTHON_INTERPRETER} --no-download /venv-test-${PYTHON}-${U_WIDTH}
     source /venv-test-${PYTHON}-${U_WIDTH}/bin/activate
-    pip install pytest 'numpy==1.14.5' 'pandas==0.23.4'
+    pip install pytest hypothesis 'numpy==1.14.5' 'pandas==0.24.0'
     deactivate
 done
 
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 12c2285f2d24b..dabcdf1813059 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -57,10 +57,12 @@ def parse_git(root, **kwargs):
                          uint8, uint16, uint32, uint64,
                          time32, time64, timestamp, date32, date64,
                          float16, float32, float64,
-                         binary, string, decimal128,
+                         binary, string, utf8, decimal128,
                          list_, struct, union, dictionary, field,
                          type_for_alias,
-                         DataType,
+                         DataType, DictionaryType, ListType, StructType,
+                         UnionType, TimestampType, Time32Type, Time64Type,
+                         FixedSizeBinaryType, Decimal128Type,
                          Field,
                          Schema,
                          schema,
@@ -146,6 +148,28 @@ def parse_git(root, **kwargs):
                          open_stream,
                          open_file,
                          serialize_pandas, deserialize_pandas)
+import pyarrow.ipc as ipc
+
+
+def open_stream(source):
+    """
+    pyarrow.open_stream deprecated since 0.12, use pyarrow.ipc.open_stream
+    """
+    import warnings
+    warnings.warn("pyarrow.open_stream is deprecated, please use "
+                  "pyarrow.ipc.open_stream")
+    return ipc.open_stream(source)
+
+
+def open_file(source):
+    """
+    pyarrow.open_file deprecated since 0.12, use pyarrow.ipc.open_file
+    """
+    import warnings
+    warnings.warn("pyarrow.open_file is deprecated, please use "
+                  "pyarrow.ipc.open_file")
+    return ipc.open_file(source)
+
 
 localfs = LocalFileSystem.get_instance()
 
@@ -192,7 +216,7 @@ def get_libraries():
     Return list of library names to include in the `libraries` argument for C
     or Cython extensions using pyarrow
     """
-    return ['arrow_python']
+    return ['arrow', 'arrow_python']
 
 
 def get_library_dirs():
@@ -231,4 +255,8 @@ def get_library_dirs():
         if _os.path.exists(_os.path.join(library_lib, 'arrow.lib')):
             library_dirs.append(library_lib)
 
+    # ARROW-4074: Allow for ARROW_HOME to be set to some other directory
+    if 'ARROW_HOME' in _os.environ:
+        library_dirs.append(_os.path.join(_os.environ['ARROW_HOME'], 'lib'))
+
     return library_dirs
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index 91d1b08deefad..db8104659884b 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -252,6 +252,9 @@ cdef class ConvertOptions:
     column_types: dict, optional
         Map column names to column types
         (disabling type inference on those columns).
+    null_values: list, optional
+        A sequence of strings that denote nulls in the data
+        (defaults are appropriate in most cases).
     """
     cdef:
         CCSVConvertOptions options
@@ -259,12 +262,14 @@ cdef class ConvertOptions:
     # Avoid mistakingly creating attributes
     __slots__ = ()
 
-    def __init__(self, check_utf8=None, column_types=None):
+    def __init__(self, check_utf8=None, column_types=None, null_values=None):
         self.options = CCSVConvertOptions.Defaults()
         if check_utf8 is not None:
             self.check_utf8 = check_utf8
         if column_types is not None:
             self.column_types = column_types
+        if null_values is not None:
+            self.null_values = null_values
 
     @property
     def check_utf8(self):
@@ -306,6 +311,17 @@ cdef class ConvertOptions:
             assert typ != NULL
             self.options.column_types[tobytes(k)] = typ
 
+    @property
+    def null_values(self):
+        """
+        A sequence of strings that denote nulls in the data.
+        """
+        return [frombytes(x) for x in self.options.null_values]
+
+    @null_values.setter
+    def null_values(self, value):
+        self.options.null_values = [tobytes(x) for x in value]
+
 
 cdef _get_reader(input_file, shared_ptr[InputStream]* out):
     use_memory_map = False
diff --git a/python/pyarrow/_cuda.pyx b/python/pyarrow/_cuda.pyx
index cd5704947297b..eac3daef4f5fc 100644
--- a/python/pyarrow/_cuda.pyx
+++ b/python/pyarrow/_cuda.pyx
@@ -15,29 +15,39 @@
 # specific language governing permissions and limitations
 # under the License.
 
+
 from pyarrow.compat import tobytes
 from pyarrow.lib cimport *
 from pyarrow.includes.libarrow_cuda cimport *
-from pyarrow.lib import py_buffer, allocate_buffer, as_buffer
+from pyarrow.lib import py_buffer, allocate_buffer, as_buffer, ArrowTypeError
+from pyarrow.util import get_contiguous_span
 cimport cpython as cp
 
 
 cdef class Context:
-    """ CUDA driver context.
+    """
+    CUDA driver context.
     """
 
-    def __cinit__(self, int device_number=0, uintptr_t handle=0):
-        """Construct the shared CUDA driver context for a particular device.
+    def __init__(self, *args, **kwargs):
+        """
+        Create a CUDA driver context for a particular device.
+
+        If a CUDA context handle is passed, it is wrapped, otherwise
+        a default CUDA context for the given device is requested.
 
         Parameters
         ----------
-        device_number : int
-          Specify the gpu device for which the CUDA driver context is
+        device_number : int (default 0)
+          Specify the GPU device for which the CUDA driver context is
           requested.
-        handle : int
-          Specify handle for a shared context that has been created by
-          another library.
+        handle : int, optional
+          Specify CUDA handle for a shared context that has been created
+          by another library.
         """
+        # This method exposed because autodoc doesn't pick __cinit__
+
+    def __cinit__(self, int device_number=0, uintptr_t handle=0):
         cdef CCudaDeviceManager* manager
         check_status(CCudaDeviceManager.GetInstance(&manager))
         cdef int n = manager.num_devices()
@@ -55,13 +65,14 @@ cdef class Context:
 
     @staticmethod
     def from_numba(context=None):
-        """Create Context instance from a numba CUDA context.
+        """
+        Create a Context instance from a Numba CUDA context.
 
         Parameters
         ----------
         context : {numba.cuda.cudadrv.driver.Context, None}
-          Specify numba CUDA context instance. When None, use the
-          current numba context.
+          A Numba CUDA context instance.
+          If None, the current Numba context is used.
 
         Returns
         -------
@@ -75,7 +86,8 @@ cdef class Context:
                        handle=context.handle.value)
 
     def to_numba(self):
-        """Convert Context to numba CUDA context.
+        """
+        Convert Context to a Numba CUDA context.
 
         Returns
         -------
@@ -127,10 +139,40 @@ cdef class Context:
 
     @property
     def bytes_allocated(self):
-        """ Return the number of allocated bytes.
+        """Return the number of allocated bytes.
         """
         return self.context.get().bytes_allocated()
 
+    def get_device_address(self, address):
+        """Return the device address that is reachable from kernels running in
+        the context
+
+        Parameters
+        ----------
+        address : int
+          Specify memory address value
+
+        Returns
+        -------
+        device_address : int
+          Device address accessible from device context
+
+        Notes
+        -----
+        The device address is defined as a memory address accessible
+        by device. While it is often a device memory address but it
+        can be also a host memory address, for instance, when the
+        memory is allocated as host memory (using cudaMallocHost or
+        cudaHostAlloc) or as managed memory (using cudaMallocManaged)
+        or the host memory is page-locked (using cudaHostRegister).
+        """
+        cdef:
+            uintptr_t c_addr = address
+            uint8_t* c_devaddr
+        check_status(self.context.get().GetDeviceAddress(<uint8_t*>c_addr,
+                                                         &c_devaddr))
+        return <uintptr_t>c_devaddr
+
     def new_buffer(self, nbytes):
         """Return new device buffer.
 
@@ -149,26 +191,32 @@ cdef class Context:
         return pyarrow_wrap_cudabuffer(cudabuf)
 
     def foreign_buffer(self, address, size):
-        """Create device buffer from device address and size as a view.
+        """Create device buffer from address and size as a view.
 
         The caller is responsible for allocating and freeing the
-        memory as well as ensuring that the memory belongs to the
-        CUDA context that this Context instance holds.
+        memory. When `address==size==0` then a new zero-sized buffer
+        is returned.
 
         Parameters
         ----------
         address : int
-          Specify the starting address of the buffer.
+          Specify the starting address of the buffer. The address can
+          refer to both device or host memory but it must be
+          accessible from device after mapping it with
+          `get_device_address` method.
         size : int
           Specify the size of device buffer in bytes.
 
         Returns
         -------
         cbuf : CudaBuffer
-          Device buffer as a view of device memory.
+          Device buffer as a view of device reachable memory.
+
         """
+        if not address and size == 0:
+            return self.new_buffer(0)
         cdef:
-            intptr_t c_addr = address
+            uintptr_t c_addr = self.get_device_address(address)
             int64_t c_size = size
             shared_ptr[CCudaBuffer] cudabuf
         check_status(self.context.get().View(<uint8_t*>c_addr,
@@ -236,9 +284,52 @@ cdef class Context:
             result.copy_from_device(buf, position=0, nbytes=size)
         return result
 
+    def buffer_from_object(self, obj):
+        """Create device buffer view of arbitrary object that references
+        device accessible memory.
+
+        When the object contains a non-contiguous view of device
+        accessbile memory then the returned device buffer will contain
+        contiguous view of the memory, that is, including the
+        intermediate data that is otherwise invisible to the input
+        object.
+
+        Parameters
+        ----------
+        obj : {object, Buffer, HostBuffer, CudaBuffer, ...}
+          Specify an object that holds (device or host) address that
+          can be accessed from device. This includes objects with
+          types defined in pyarrow.cuda as well as arbitrary objects
+          that implement the CUDA array interface as defined by numba.
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer as a view of device accessible memory.
+
+        """
+        if isinstance(obj, HostBuffer):
+            return self.foreign_buffer(obj.address, obj.size)
+        elif isinstance(obj, Buffer):
+            return CudaBuffer.from_buffer(obj)
+        elif isinstance(obj, CudaBuffer):
+            return obj
+        elif hasattr(obj, '__cuda_array_interface__'):
+            desc = obj.__cuda_array_interface__
+            addr = desc['data'][0]
+            if addr is None:
+                return self.new_buffer(0)
+            import numpy as np
+            start, end = get_contiguous_span(
+                desc['shape'], desc.get('strides'),
+                np.dtype(desc['typestr']).itemsize)
+            return self.foreign_buffer(addr + start, end - start)
+        raise ArrowTypeError('cannot create device buffer view from'
+                             ' `%s` object' % (type(obj)))
+
 
 cdef class IpcMemHandle:
-    """A container for a CUDA IPC handle.
+    """A serializable container for a CUDA IPC handle.
     """
     cdef void init(self, shared_ptr[CCudaIpcMemHandle]& h):
         self.handle = h
@@ -285,14 +376,10 @@ cdef class IpcMemHandle:
 cdef class CudaBuffer(Buffer):
     """An Arrow buffer with data located in a GPU device.
 
-    To create a CudaBuffer instance, use
-
-      <Context instance>.device_buffer(data=<object>, offset=<offset>,
-                                       size=<nbytes>)
-
-    The memory allocated in CudaBuffer instance is freed when the
-    instance is deleted.
+    To create a CudaBuffer instance, use Context.device_buffer().
 
+    The memory allocated in a CudaBuffer is freed when the buffer object
+    is deleted.
     """
 
     def __init__(self):
@@ -337,6 +424,8 @@ cdef class CudaBuffer(Buffer):
           Device buffer as a view of numba MemoryPointer.
         """
         ctx = Context.from_numba(mem.context)
+        if mem.device_pointer.value is None and mem.size==0:
+            return ctx.new_buffer(0)
         return ctx.foreign_buffer(mem.device_pointer.value, mem.size)
 
     def to_numba(self):
@@ -529,7 +618,7 @@ cdef class CudaBuffer(Buffer):
         After calling this function, this device memory will not be
         freed when the CudaBuffer is destructed.
 
-        Results
+        Returns
         -------
         ipc_handle : IpcMemHandle
           The exported IPC handle
@@ -774,9 +863,9 @@ def serialize_record_batch(object batch, object ctx):
     Parameters
     ----------
     batch : RecordBatch
-      Specify record batch to write
+      Record batch to write
     ctx : Context
-      Specify context to allocate device memory from
+      CUDA Context to allocate device memory from
 
     Returns
     -------
@@ -797,14 +886,14 @@ def read_message(object source, pool=None):
     Parameters
     ----------
     source : {CudaBuffer, cuda.BufferReader}
-      Specify device buffer or reader of device buffer.
-    pool : {MemoryPool, None}
-      Specify pool to allocate CPU memory for the metadata
+      Device buffer or reader of device buffer.
+    pool : MemoryPool (optional)
+      Pool to allocate CPU memory for the metadata
 
     Returns
     -------
     message : Message
-      the deserialized message, body still on device
+      The deserialized message, body still on device
     """
     cdef:
         Message result = Message.__new__(Message)
@@ -824,16 +913,16 @@ def read_record_batch(object buffer, object schema, pool=None):
     Parameters
     ----------
     buffer :
-      Specify device buffer containing the complete IPC message
+      Device buffer containing the complete IPC message
     schema : Schema
-      Specify schema for the record batch
-    pool : {MemoryPool, None}
-      Specify pool to use for allocating space for the metadata
+      The schema for the record batch
+    pool : MemoryPool (optional)
+      Pool to allocate metadata from
 
     Returns
     -------
     batch : RecordBatch
-      reconstructed record batch, with device pointers
+      Reconstructed record batch, with device pointers
 
     """
     cdef shared_ptr[CSchema] schema_ = pyarrow_unwrap_schema(schema)
diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd
index 9e1a24961af0e..b63e72c57cfa8 100644
--- a/python/pyarrow/_parquet.pxd
+++ b/python/pyarrow/_parquet.pxd
@@ -19,7 +19,7 @@
 # cython: language_level = 3
 
 from pyarrow.includes.common cimport *
-from pyarrow.includes.libarrow cimport (CArray, CSchema, CStatus,
+from pyarrow.includes.libarrow cimport (CChunkedArray, CSchema, CStatus,
                                         CTable, CMemoryPool,
                                         CKeyValueMetadata,
                                         RandomAccessFile, OutputStream,
@@ -272,8 +272,8 @@ cdef extern from "parquet/arrow/reader.h" namespace "parquet::arrow" nogil:
 
     cdef cppclass FileReader:
         FileReader(CMemoryPool* pool, unique_ptr[ParquetFileReader] reader)
-        CStatus ReadColumn(int i, shared_ptr[CArray]* out)
-        CStatus ReadSchemaField(int i, shared_ptr[CArray]* out)
+        CStatus ReadColumn(int i, shared_ptr[CChunkedArray]* out)
+        CStatus ReadSchemaField(int i, shared_ptr[CChunkedArray]* out)
 
         int num_row_groups()
         CStatus ReadRowGroup(int i, shared_ptr[CTable]* out)
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 8112504e9e403..fcecaf5680e42 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -26,13 +26,14 @@ from pyarrow.lib cimport (Array, Schema,
                           check_status,
                           MemoryPool, maybe_unbox_memory_pool,
                           Table,
+                          pyarrow_wrap_chunked_array,
                           pyarrow_wrap_schema,
                           pyarrow_wrap_table,
                           NativeFile, get_reader, get_writer)
 
 from pyarrow.compat import tobytes, frombytes
-from pyarrow.formatting import indent
 from pyarrow.lib import ArrowException, NativeFile, _stringify_path
+from pyarrow.util import indent
 
 import six
 import warnings
@@ -770,28 +771,18 @@ cdef class ParquetReader:
         return self._column_idx_map[tobytes(column_name)]
 
     def read_column(self, int column_index):
-        cdef:
-            Array array = Array()
-            shared_ptr[CArray] carray
-
+        cdef shared_ptr[CChunkedArray] out
         with nogil:
             check_status(self.reader.get()
-                         .ReadColumn(column_index, &carray))
-
-        array.init(carray)
-        return array
+                         .ReadColumn(column_index, &out))
+        return pyarrow_wrap_chunked_array(out)
 
     def read_schema_field(self, int field_index):
-        cdef:
-            Array array = Array()
-            shared_ptr[CArray] carray
-
+        cdef shared_ptr[CChunkedArray] out
         with nogil:
             check_status(self.reader.get()
-                         .ReadSchemaField(field_index, &carray))
-
-        array.init(carray)
-        return array
+                         .ReadSchemaField(field_index, &out))
+        return pyarrow_wrap_chunked_array(out)
 
 
 cdef class ParquetWriter:
@@ -918,17 +909,16 @@ cdef class ParquetWriter:
                 check_status(self.sink.get().Close())
 
     def write_table(self, Table table, row_group_size=None):
-        cdef CTable* ctable = table.table
+        cdef:
+            CTable* ctable = table.table
+            int64_t c_row_group_size
 
         if row_group_size is None or row_group_size == -1:
-            if ctable.num_rows() > 0:
-                row_group_size = ctable.num_rows()
-            else:
-                row_group_size = 1
+            c_row_group_size = ctable.num_rows()
         elif row_group_size == 0:
             raise ValueError('Row group size cannot be 0')
-
-        cdef int64_t c_row_group_size = row_group_size
+        else:
+            c_row_group_size = row_group_size
 
         with nogil:
             check_status(self.writer.get()
diff --git a/python/pyarrow/_plasma.pyx b/python/pyarrow/_plasma.pyx
index 677e768035e12..4f64f202cef67 100644
--- a/python/pyarrow/_plasma.pyx
+++ b/python/pyarrow/_plasma.pyx
@@ -30,18 +30,23 @@ from cython.operator cimport dereference as deref, preincrement as inc
 from cpython.pycapsule cimport *
 
 import collections
-import pyarrow
 import random
+import socket
+import warnings
 
+import pyarrow
 from pyarrow.lib cimport Buffer, NativeFile, check_status, pyarrow_wrap_buffer
 from pyarrow.includes.libarrow cimport (CBuffer, CMutableBuffer,
                                         CFixedSizeBufferWriter, CStatus)
 
+from pyarrow import compat
 
 PLASMA_WAIT_TIMEOUT = 2 ** 30
 
 
 cdef extern from "plasma/common.h" nogil:
+    cdef cppclass CCudaIpcPlaceholder" plasma::internal::CudaIpcPlaceholder":
+        pass
 
     cdef cppclass CUniqueID" plasma::UniqueID":
 
@@ -60,11 +65,6 @@ cdef extern from "plasma/common.h" nogil:
         @staticmethod
         int64_t size()
 
-    cdef struct CObjectRequest" plasma::ObjectRequest":
-        CUniqueID object_id
-        int type
-        int location
-
     cdef enum CObjectState" plasma::ObjectState":
         PLASMA_CREATED" plasma::ObjectState::PLASMA_CREATED"
         PLASMA_SEALED" plasma::ObjectState::PLASMA_SEALED"
@@ -81,6 +81,7 @@ cdef extern from "plasma/common.h" nogil:
         int64_t create_time
         int64_t construct_duration
         CObjectState state
+        shared_ptr[CCudaIpcPlaceholder] ipc_handle
 
     ctypedef unordered_map[CUniqueID, unique_ptr[CObjectTableEntry]] \
         CObjectTable" plasma::ObjectTable"
@@ -89,14 +90,6 @@ cdef extern from "plasma/common.h" nogil:
 cdef extern from "plasma/common.h":
     cdef int64_t kDigestSize" plasma::kDigestSize"
 
-    cdef enum ObjectRequestType:
-        PLASMA_QUERY_LOCAL"plasma::ObjectRequestType::PLASMA_QUERY_LOCAL",
-        PLASMA_QUERY_ANYWHERE"plasma::ObjectRequestType::PLASMA_QUERY_ANYWHERE"
-
-    cdef enum ObjectLocation:
-        ObjectStatusLocal"plasma::ObjectLocation::Local"
-        ObjectStatusRemote"plasma::ObjectLocation::Remote"
-
 cdef extern from "plasma/client.h" nogil:
 
     cdef cppclass CPlasmaClient" plasma::PlasmaClient":
@@ -131,21 +124,15 @@ cdef extern from "plasma/client.h" nogil:
 
         CStatus Subscribe(int* fd)
 
+        CStatus DecodeNotification(const uint8_t* buffer,
+                                   CUniqueID* object_id, int64_t* data_size,
+                                   int64_t* metadata_size)
+
         CStatus GetNotification(int fd, CUniqueID* object_id,
                                 int64_t* data_size, int64_t* metadata_size)
 
         CStatus Disconnect()
 
-        CStatus Fetch(int num_object_ids, const CUniqueID* object_ids)
-
-        CStatus Wait(int64_t num_object_requests,
-                     CObjectRequest* object_requests,
-                     int num_ready_objects, int64_t timeout_ms,
-                     int* num_objects_ready)
-
-        CStatus Transfer(const char* addr, int port,
-                         const CUniqueID& object_id)
-
         CStatus Delete(const c_vector[CUniqueID] object_ids)
 
 cdef extern from "plasma/client.h" nogil:
@@ -278,13 +265,11 @@ cdef class PlasmaClient:
         shared_ptr[CPlasmaClient] client
         int notification_fd
         c_string store_socket_name
-        c_string manager_socket_name
 
     def __cinit__(self):
         self.client.reset(new CPlasmaClient())
         self.notification_fd = -1
         self.store_socket_name = b""
-        self.manager_socket_name = b""
 
     cdef _get_object_buffers(self, object_ids, int64_t timeout_ms,
                              c_vector[CObjectBuffer]* result):
@@ -308,10 +293,6 @@ cdef class PlasmaClient:
     def store_socket_name(self):
         return self.store_socket_name.decode()
 
-    @property
-    def manager_socket_name(self):
-        return self.manager_socket_name.decode()
-
     def create(self, ObjectID object_id, int64_t data_size,
                c_string metadata=b""):
         """
@@ -635,99 +616,42 @@ cdef class PlasmaClient:
             check_status(self.client.get().Evict(num_bytes, num_bytes_evicted))
         return num_bytes_evicted
 
-    def transfer(self, address, int port, ObjectID object_id):
-        """
-        Transfer local object with id object_id to another plasma instance
-
-        Parameters
-        ----------
-        addr : str
-            IPv4 address of the plasma instance the object is sent to.
-        port : int
-            Port number of the plasma instance the object is sent to.
-        object_id : str
-            A string used to identify an object.
-        """
-        cdef c_string addr = address.encode()
+    def subscribe(self):
+        """Subscribe to notifications about sealed objects."""
         with nogil:
-            check_status(self.client.get()
-                         .Transfer(addr.c_str(), port, object_id.data))
+            check_status(self.client.get().Subscribe(&self.notification_fd))
 
-    def fetch(self, object_ids):
+    def get_notification_socket(self):
         """
-        Fetch the objects with the given IDs from other plasma managers.
-
-        Parameters
-        ----------
-        object_ids : list
-            A list of strings used to identify the objects.
+        Get the notification socket.
         """
-        cdef c_vector[CUniqueID] ids
-        cdef ObjectID object_id
-        for object_id in object_ids:
-            ids.push_back(object_id.data)
-        with nogil:
-            check_status(self.client.get().Fetch(ids.size(), ids.data()))
+        return compat.get_socket_from_fd(self.notification_fd,
+                                         family=socket.AF_UNIX,
+                                         type=socket.SOCK_STREAM)
 
-    def wait(self, object_ids, int64_t timeout=PLASMA_WAIT_TIMEOUT,
-             int num_returns=1):
+    def decode_notification(self, const uint8_t* buf):
         """
-        Wait until num_returns objects in object_ids are ready.
-        Currently, the object ID arguments to wait must be unique.
-
-        Parameters
-        ----------
-        object_ids : list
-            List of object IDs to wait for.
-        timeout :int
-            Return to the caller after timeout milliseconds.
-        num_returns : int
-            We are waiting for this number of objects to be ready.
+        Get the notification from the buffer.
 
         Returns
         -------
-        list
-            List of object IDs that are ready.
-        list
-            List of object IDs we might still wait on.
-        """
-        # Check that the object ID arguments are unique. The plasma manager
-        # currently crashes if given duplicate object IDs.
-        if len(object_ids) != len(set(object_ids)):
-            raise Exception("Wait requires a list of unique object IDs.")
-        cdef int64_t num_object_requests = len(object_ids)
-        cdef c_vector[CObjectRequest] object_requests = (
-            c_vector[CObjectRequest](num_object_requests))
-        cdef int num_objects_ready = 0
-        cdef ObjectID object_id
-        for i, object_id in enumerate(object_ids):
-            object_requests[i].object_id = object_id.data
-            object_requests[i].type = PLASMA_QUERY_ANYWHERE
-        with nogil:
-            check_status(self.client.get().Wait(num_object_requests,
-                                                object_requests.data(),
-                                                num_returns, timeout,
-                                                &num_objects_ready))
-        cdef int num_to_return = min(num_objects_ready, num_returns)
-        ready_ids = []
-        waiting_ids = set(object_ids)
-        cdef int num_returned = 0
-        for i in range(len(object_ids)):
-            if num_returned == num_to_return:
-                break
-            if (object_requests[i].location == ObjectStatusLocal or
-                    object_requests[i].location == ObjectStatusRemote):
-                ready_ids.append(
-                    ObjectID(object_requests[i].object_id.binary()))
-                waiting_ids.discard(
-                    ObjectID(object_requests[i].object_id.binary()))
-                num_returned += 1
-        return ready_ids, list(waiting_ids)
-
-    def subscribe(self):
-        """Subscribe to notifications about sealed objects."""
+        ObjectID
+            The object ID of the object that was stored.
+        int
+            The data size of the object that was stored.
+        int
+            The metadata size of the object that was stored.
+        """
+        cdef CUniqueID object_id
+        cdef int64_t data_size
+        cdef int64_t metadata_size
         with nogil:
-            check_status(self.client.get().Subscribe(&self.notification_fd))
+            check_status(self.client.get()
+                         .DecodeNotification(buf,
+                                             &object_id,
+                                             &data_size,
+                                             &metadata_size))
+        return ObjectID(object_id.binary()), data_size, metadata_size
 
     def get_next_notification(self):
         """
@@ -834,7 +758,7 @@ cdef class PlasmaClient:
         return result
 
 
-def connect(store_socket_name, manager_socket_name, int release_delay,
+def connect(store_socket_name, manager_socket_name=None, int release_delay=0,
             int num_retries=-1):
     """
     Return a new PlasmaClient that is connected a plasma store and
@@ -845,20 +769,24 @@ def connect(store_socket_name, manager_socket_name, int release_delay,
     store_socket_name : str
         Name of the socket the plasma store is listening at.
     manager_socket_name : str
-        Name of the socket the plasma manager is listening at.
+        This parameter is deprecated and has no effect.
     release_delay : int
-        The maximum number of objects that the client will keep and
-        delay releasing (for caching reasons).
+        This parameter is deprecated and has no effect.
     num_retries : int, default -1
         Number of times to try to connect to plasma store. Default value of -1
         uses the default (50)
     """
+    if manager_socket_name is not None:
+        warnings.warn(
+            "manager_socket_name in PlasmaClient.connect is deprecated",
+            FutureWarning)
     cdef PlasmaClient result = PlasmaClient()
     result.store_socket_name = store_socket_name.encode()
-    result.manager_socket_name = manager_socket_name.encode()
+    if release_delay != 0:
+        warnings.warn("release_delay in PlasmaClient.connect is deprecated",
+                      FutureWarning)
     with nogil:
         check_status(result.client.get()
-                     .Connect(result.store_socket_name,
-                              result.manager_socket_name,
+                     .Connect(result.store_socket_name, b"",
                               release_delay, num_retries))
     return result
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index b86872f7ea98d..41a3b970b3acf 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -339,7 +339,62 @@ def _restore_array(data):
     return pyarrow_wrap_array(MakeArray(ad))
 
 
-cdef class Array:
+cdef class _PandasConvertible:
+
+    def to_pandas(self, categories=None, bint strings_to_categorical=False,
+                  bint zero_copy_only=False, bint integer_object_nulls=False,
+                  bint date_as_object=True, bint use_threads=True,
+                  bint deduplicate_objects=True, bint ignore_metadata=False):
+        """
+        Convert to a pandas-compatible NumPy array or DataFrame, as appropriate
+
+        Parameters
+        ----------
+        strings_to_categorical : boolean, default False
+            Encode string (UTF8) and binary types to pandas.Categorical
+        categories: list, default empty
+            List of fields that should be returned as pandas.Categorical. Only
+            applies to table-like data structures
+        zero_copy_only : boolean, default False
+            Raise an ArrowException if this function call would require copying
+            the underlying data
+        integer_object_nulls : boolean, default False
+            Cast integers with nulls to objects
+        date_as_object : boolean, default False
+            Cast dates to objects
+        use_threads: boolean, default True
+            Whether to parallelize the conversion using multiple threads
+        deduplicate_objects : boolean, default False
+            Do not create multiple copies Python objects when created, to save
+            on memory use. Conversion will be slower
+        ignore_metadata : boolean, default False
+            If True, do not use the 'pandas' metadata to reconstruct the
+            DataFrame index, if present
+
+        Returns
+        -------
+        NumPy array or DataFrame depending on type of object
+        """
+        cdef:
+            PyObject* out
+            PandasOptions options
+
+        options = PandasOptions(
+            strings_to_categorical=strings_to_categorical,
+            zero_copy_only=zero_copy_only,
+            integer_object_nulls=integer_object_nulls,
+            date_as_object=date_as_object,
+            use_threads=use_threads,
+            deduplicate_objects=deduplicate_objects)
+
+        return self._to_pandas(options, categories=categories,
+                               ignore_metadata=ignore_metadata)
+
+
+cdef class Array(_PandasConvertible):
+    """
+    The base class for all Arrow arrays.
+    """
 
     def __init__(self):
         raise TypeError("Do not call {}'s constructor directly, use one of "
@@ -564,11 +619,18 @@ cdef class Array:
     def isnull(self):
         raise NotImplemented
 
-    def __getitem__(self, key):
-        if PySlice_Check(key):
-            return _normalize_slice(self, key)
+    def __getitem__(self, index):
+        """
+        Return the value at the given index.
+
+        Returns
+        -------
+        value : Scalar
+        """
+        if PySlice_Check(index):
+            return _normalize_slice(self, index)
 
-        return self.getitem(_normalize_index(key, self.length()))
+        return self.getitem(_normalize_index(index, self.length()))
 
     cdef getitem(self, int64_t i):
         return box_scalar(self.type, self.sp_array, i)
@@ -602,42 +664,13 @@ cdef class Array:
 
         return pyarrow_wrap_array(result)
 
-    def to_pandas(self, bint strings_to_categorical=False,
-                  bint zero_copy_only=False, bint integer_object_nulls=False,
-                  bint date_as_object=False):
-        """
-        Convert to a NumPy array object suitable for use in pandas.
-
-        Parameters
-        ----------
-        strings_to_categorical : boolean, default False
-            Encode string (UTF8) and binary types to pandas.Categorical
-        zero_copy_only : boolean, default False
-            Raise an ArrowException if this function call would require copying
-            the underlying data
-        integer_object_nulls : boolean, default False
-            Cast integers with nulls to objects
-        date_as_object : boolean, default False
-            Cast dates to objects
-
-        See also
-        --------
-        Column.to_pandas
-        Table.to_pandas
-        RecordBatch.to_pandas
-        """
+    def _to_pandas(self, options, **kwargs):
         cdef:
             PyObject* out
-            PandasOptions options
+            PandasOptions c_options = options
 
-        options = PandasOptions(
-            strings_to_categorical=strings_to_categorical,
-            zero_copy_only=zero_copy_only,
-            integer_object_nulls=integer_object_nulls,
-            date_as_object=date_as_object,
-            use_threads=False)
         with nogil:
-            check_status(ConvertArrayToPandas(options, self.sp_array,
+            check_status(ConvertArrayToPandas(c_options, self.sp_array,
                                               self, &out))
         return wrap_array_output(out)
 
@@ -713,6 +746,9 @@ cdef class Array:
 
 
 cdef class Tensor:
+    """
+    A n-dimensional array a.k.a Tensor.
+    """
 
     def __init__(self):
         raise TypeError("Do not call Tensor's constructor directly, use one "
@@ -819,98 +855,147 @@ cdef wrap_array_output(PyObject* output):
 
 
 cdef class NullArray(Array):
-    pass
+    """
+    Concrete class for Arrow arrays of null data type.
+    """
 
 
 cdef class BooleanArray(Array):
-    pass
+    """
+    Concrete class for Arrow arrays of boolean data type.
+    """
 
 
 cdef class NumericArray(Array):
-    pass
+    """
+    A base class for Arrow numeric arrays.
+    """
 
 
 cdef class IntegerArray(NumericArray):
-    pass
+    """
+    A base class for Arrow integer arrays.
+    """
 
 
 cdef class FloatingPointArray(NumericArray):
-    pass
+    """
+    A base class for Arrow floating-point arrays.
+    """
 
 
 cdef class Int8Array(IntegerArray):
-    pass
+    """
+    Concrete class for Arrow arrays of int8 data type.
+    """
 
 
 cdef class UInt8Array(IntegerArray):
-    pass
+    """
+    Concrete class for Arrow arrays of uint8 data type.
+    """
 
 
 cdef class Int16Array(IntegerArray):
-    pass
+    """
+    Concrete class for Arrow arrays of int16 data type.
+    """
 
 
 cdef class UInt16Array(IntegerArray):
-    pass
+    """
+    Concrete class for Arrow arrays of uint16 data type.
+    """
 
 
 cdef class Int32Array(IntegerArray):
-    pass
+    """
+    Concrete class for Arrow arrays of int32 data type.
+    """
 
 
 cdef class UInt32Array(IntegerArray):
-    pass
+    """
+    Concrete class for Arrow arrays of uint32 data type.
+    """
 
 
 cdef class Int64Array(IntegerArray):
-    pass
+    """
+    Concrete class for Arrow arrays of int64 data type.
+    """
 
 
 cdef class UInt64Array(IntegerArray):
-    pass
+    """
+    Concrete class for Arrow arrays of uint64 data type.
+    """
 
 
 cdef class Date32Array(NumericArray):
-    pass
+    """
+    Concrete class for Arrow arrays of date32 data type.
+    """
 
 
 cdef class Date64Array(NumericArray):
-    pass
+    """
+    Concrete class for Arrow arrays of date64 data type.
+    """
 
 
 cdef class TimestampArray(NumericArray):
-    pass
+    """
+    Concrete class for Arrow arrays of timestamp data type.
+    """
 
 
 cdef class Time32Array(NumericArray):
-    pass
+    """
+    Concrete class for Arrow arrays of time32 data type.
+    """
 
 
 cdef class Time64Array(NumericArray):
-    pass
+    """
+    Concrete class for Arrow arrays of time64 data type.
+    """
 
 
 cdef class HalfFloatArray(FloatingPointArray):
-    pass
+    """
+    Concrete class for Arrow arrays of float16 data type.
+    """
 
 
 cdef class FloatArray(FloatingPointArray):
-    pass
+    """
+    Concrete class for Arrow arrays of float32 data type.
+    """
 
 
 cdef class DoubleArray(FloatingPointArray):
-    pass
+    """
+    Concrete class for Arrow arrays of float64 data type.
+    """
 
 
 cdef class FixedSizeBinaryArray(Array):
-    pass
+    """
+    Concrete class for Arrow arrays of a fixed-size binary data type.
+    """
 
 
 cdef class Decimal128Array(FixedSizeBinaryArray):
-    pass
+    """
+    Concrete class for Arrow arrays of decimal128 data type.
+    """
 
 
 cdef class ListArray(Array):
+    """
+    Concrete class for Arrow arrays of a list data type.
+    """
 
     @staticmethod
     def from_arrays(offsets, values, MemoryPool pool=None):
@@ -952,6 +1037,9 @@ cdef class ListArray(Array):
 
 
 cdef class UnionArray(Array):
+    """
+    Concrete class for Arrow arrays of a Union data type.
+    """
 
     @staticmethod
     def from_dense(Array types, Array value_offsets, list children):
@@ -1005,6 +1093,9 @@ cdef class UnionArray(Array):
 
 
 cdef class StringArray(Array):
+    """
+    Concrete class for Arrow arrays of string (or utf8) data type.
+    """
 
     @staticmethod
     def from_buffers(int length, Buffer value_offsets, Buffer data,
@@ -1043,10 +1134,15 @@ cdef class StringArray(Array):
 
 
 cdef class BinaryArray(Array):
-    pass
+    """
+    Concrete class for Arrow arrays of variable-sized binary data type.
+    """
 
 
 cdef class DictionaryArray(Array):
+    """
+    Concrete class for dictionary-encoded Arrow arrays.
+    """
 
     def dictionary_encode(self):
         return self
@@ -1140,6 +1236,9 @@ cdef class DictionaryArray(Array):
 
 
 cdef class StructArray(Array):
+    """
+    Concrete class for Arrow arrays of a struct data type.
+    """
 
     def field(self, index):
         """
diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py
index a481db0d53c5d..ee924ed388ff1 100644
--- a/python/pyarrow/compat.py
+++ b/python/pyarrow/compat.py
@@ -25,6 +25,7 @@
 import sys
 import six
 from six import BytesIO, StringIO, string_types as py_string
+import socket
 
 
 PY26 = sys.version_info[:2] == (2, 6)
@@ -191,11 +192,15 @@ def _iterate_python_module_paths(package_name):
             for finder in sys.meta_path:
                 try:
                     spec = finder.find_spec(absolute_name, None)
-                except AttributeError:
+                except (AttributeError, TypeError):
                     # On Travis (Python 3.5) the above produced:
                     # AttributeError: 'VendorImporter' object has no
                     # attribute 'find_spec'
+                    #
+                    # ARROW-4117: When running "asv dev", TypeError is raised
+                    # due to the meta-importer
                     spec = None
+
                 if spec is not None:
                     break
 
@@ -267,4 +272,13 @@ def import_pytorch_extension():
 
 integer_types = six.integer_types + (np.integer,)
 
+
+def get_socket_from_fd(fileno, family, type):
+    if PY2:
+        socket_obj = socket.fromfd(fileno, family, type)
+        return socket.socket(family, type, _sock=socket_obj)
+    else:
+        return socket.socket(fileno=fileno, family=family, type=type)
+
+
 __all__ = []
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index 930e999a56116..3713c1f135036 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -20,11 +20,10 @@
 
 import six
 import pandas as pd
-import warnings
 
 from pyarrow.compat import pdapi
 from pyarrow.lib import FeatherError  # noqa
-from pyarrow.lib import RecordBatch, concat_tables
+from pyarrow.lib import Table, concat_tables
 import pyarrow.lib as ext
 
 
@@ -44,11 +43,6 @@ def __init__(self, source):
         self.source = source
         self.open(source)
 
-    def read(self, *args, **kwargs):
-        warnings.warn("read has been deprecated. Use read_pandas instead.",
-                      FutureWarning, stacklevel=2)
-        return self.read_pandas(*args, **kwargs)
-
     def read_table(self, columns=None):
         if columns is None:
             return self._read()
@@ -68,6 +62,21 @@ def read_pandas(self, columns=None, use_threads=True):
             use_threads=use_threads)
 
 
+def check_chunked_overflow(col):
+    if col.data.num_chunks == 1:
+        return
+
+    if col.type in (ext.binary(), ext.string()):
+        raise ValueError("Column '{0}' exceeds 2GB maximum capacity of "
+                         "a Feather binary column. This restriction may be "
+                         "lifted in the future".format(col.name))
+    else:
+        # TODO(wesm): Not sure when else this might be reached
+        raise ValueError("Column '{0}' of type {1} was chunked on conversion "
+                         "to Arrow and cannot be currently written to "
+                         "Feather format".format(col.name, str(col.type)))
+
+
 class FeatherWriter(object):
 
     def __init__(self, dest):
@@ -84,10 +93,11 @@ def write(self, df):
 
         # TODO(wesm): Remove this length check, see ARROW-1732
         if len(df.columns) > 0:
-            batch = RecordBatch.from_pandas(df, preserve_index=False)
-            for i, name in enumerate(batch.schema.names):
-                col = batch[i]
-                self.writer.write_array(name, col)
+            table = Table.from_pandas(df, preserve_index=False)
+            for i, name in enumerate(table.schema.names):
+                col = table[i]
+                check_chunked_overflow(col)
+                self.writer.write_array(name, col.data.chunk(0))
 
         self.writer.close()
 
diff --git a/python/pyarrow/filesystem.py b/python/pyarrow/filesystem.py
index f1d0eec3f8df5..43280799bccce 100644
--- a/python/pyarrow/filesystem.py
+++ b/python/pyarrow/filesystem.py
@@ -23,7 +23,7 @@
 from six.moves.urllib.parse import urlparse
 
 import pyarrow as pa
-from pyarrow.util import implements, _stringify_path
+from pyarrow.util import implements, _stringify_path, _is_path_like
 
 
 class FileSystem(object):
@@ -148,8 +148,7 @@ def _isfilestore(self):
         raise NotImplementedError
 
     def read_parquet(self, path, columns=None, metadata=None, schema=None,
-                     use_threads=True, nthreads=None,
-                     use_pandas_metadata=False):
+                     use_threads=True, use_pandas_metadata=False):
         """
         Read Parquet data from path in file system. Can read from a single file
         or a directory of files
@@ -176,8 +175,6 @@ def read_parquet(self, path, columns=None, metadata=None, schema=None,
         table : pyarrow.Table
         """
         from pyarrow.parquet import ParquetDataset
-        from pyarrow.util import _deprecate_nthreads
-        use_threads = _deprecate_nthreads(use_threads, nthreads)
         dataset = ParquetDataset(path, schema=schema, metadata=metadata,
                                  filesystem=self)
         return dataset.read(columns=columns, use_threads=use_threads,
@@ -322,7 +319,7 @@ class S3FSWrapper(DaskFileSystem):
 
     @implements(FileSystem.isdir)
     def isdir(self, path):
-        path = _stringify_path(path)
+        path = _sanitize_s3(_stringify_path(path))
         try:
             contents = self.fs.ls(path)
             if len(contents) == 1 and contents[0] == path:
@@ -334,7 +331,7 @@ def isdir(self, path):
 
     @implements(FileSystem.isfile)
     def isfile(self, path):
-        path = _stringify_path(path)
+        path = _sanitize_s3(_stringify_path(path))
         try:
             contents = self.fs.ls(path)
             return len(contents) == 1 and contents[0] == path
@@ -348,7 +345,7 @@ def walk(self, path, refresh=False):
         Generator version of what is in s3fs, which yields a flattened list of
         files
         """
-        path = _stringify_path(path).replace('s3://', '')
+        path = _sanitize_s3(_stringify_path(path))
         directories = set()
         files = set()
 
@@ -374,6 +371,13 @@ def walk(self, path, refresh=False):
                 yield tup
 
 
+def _sanitize_s3(path):
+    if path.startswith('s3://'):
+        return path.replace('s3://', '')
+    else:
+        return path
+
+
 def _ensure_filesystem(fs):
     fs_type = type(fs)
 
@@ -393,14 +397,22 @@ def _ensure_filesystem(fs):
         return fs
 
 
-def _get_fs_from_path(path):
+def resolve_filesystem_and_path(where, filesystem=None):
     """
     return filesystem from path which could be an HDFS URI
     """
+    if not _is_path_like(where):
+        if filesystem is not None:
+            raise ValueError("filesystem passed but where is file-like, so"
+                             " there is nothing to open with filesystem.")
+        return filesystem, where
+
     # input can be hdfs URI such as hdfs://host:port/myfile.parquet
-    path = _stringify_path(path)
-    # if _has_pathlib and isinstance(path, pathlib.Path):
-    #     path = str(path)
+    path = _stringify_path(where)
+
+    if filesystem is not None:
+        return _ensure_filesystem(filesystem), path
+
     parsed_uri = urlparse(path)
     if parsed_uri.scheme == 'hdfs':
         netloc_split = parsed_uri.netloc.split(':')
@@ -414,4 +426,4 @@ def _get_fs_from_path(path):
     else:
         fs = LocalFileSystem.get_instance()
 
-    return fs
+    return fs, parsed_uri.path
diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx
index 418d0d61502b3..76e55d6ba27ef 100644
--- a/python/pyarrow/gandiva.pyx
+++ b/python/pyarrow/gandiva.pyx
@@ -28,10 +28,9 @@ from libc.stdint cimport int64_t, int32_t, uint8_t, uintptr_t
 
 from pyarrow.includes.libarrow cimport *
 from pyarrow.compat import frombytes
-from pyarrow.types import _as_type
 from pyarrow.lib cimport (Array, DataType, Field, MemoryPool, RecordBatch,
                           Schema, check_status, pyarrow_wrap_array,
-                          pyarrow_wrap_data_type)
+                          pyarrow_wrap_data_type, ensure_type)
 
 from pyarrow.includes.libgandiva cimport (
     CCondition, CExpression,
@@ -173,8 +172,10 @@ cdef class Filter:
         return self
 
     def evaluate(self, RecordBatch batch, MemoryPool pool, dtype='int32'):
-        cdef shared_ptr[CSelectionVector] selection
-        cdef DataType type = _as_type(dtype)
+        cdef:
+            DataType type = ensure_type(dtype)
+            shared_ptr[CSelectionVector] selection
+
         if type.id == _Type_INT16:
             check_status(SelectionVector_MakeInt16(
                 batch.num_rows, pool.pool, &selection))
@@ -187,6 +188,7 @@ cdef class Filter:
         else:
             raise ValueError("'dtype' of the selection vector should be "
                              "one of 'int16', 'int32' and 'int64'.")
+
         check_status(self.filter.get().Evaluate(
             batch.sp_batch.get()[0], selection))
         return SelectionVector.create(selection)
@@ -195,8 +197,10 @@ cdef class Filter:
 cdef class TreeExprBuilder:
 
     def make_literal(self, value, dtype):
-        cdef shared_ptr[CNode] r
-        cdef DataType type = _as_type(dtype)
+        cdef:
+            DataType type = ensure_type(dtype)
+            shared_ptr[CNode] r
+
         if type.id == _Type_BOOL:
             r = TreeExprBuilder_MakeBoolLiteral(value)
         elif type.id == _Type_UINT8:
@@ -225,6 +229,7 @@ cdef class TreeExprBuilder:
             r = TreeExprBuilder_MakeBinaryLiteral(value)
         else:
             raise TypeError("Didn't recognize dtype " + str(dtype))
+
         return Node.create(r)
 
     def make_expression(self, Node root_node, Field return_field):
@@ -353,7 +358,8 @@ cdef class TreeExprBuilder:
         return Node.create(r)
 
     def make_in_expression(self, Node node, values, dtype):
-        cdef DataType type = _as_type(dtype)
+        cdef DataType type = ensure_type(dtype)
+
         if type.id == _Type_INT32:
             return self._make_in_expression_int32(node, values)
         elif type.id == _Type_INT64:
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 3f533d93145da..97bc892ddf3fe 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -23,9 +23,15 @@ cdef extern from "arrow/util/key_value_metadata.h" namespace "arrow" nogil:
     cdef cppclass CKeyValueMetadata" arrow::KeyValueMetadata":
         CKeyValueMetadata()
         CKeyValueMetadata(const unordered_map[c_string, c_string]&)
+        CKeyValueMetadata(const vector[c_string]& keys,
+                          const vector[c_string]& values)
 
-        c_bool Equals(const CKeyValueMetadata& other)
+        void reserve(int64_t n)
+        int64_t size() const
+        c_string key(int64_t i) const
+        c_string value(int64_t i) const
 
+        c_bool Equals(const CKeyValueMetadata& other)
         void Append(const c_string& key, const c_string& value)
         void ToUnorderedMap(unordered_map[c_string, c_string]*) const
 
@@ -270,8 +276,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
     cdef cppclass CStructType" arrow::StructType"(CDataType):
         CStructType(const vector[shared_ptr[CField]]& fields)
 
-        shared_ptr[CField] GetChildByName(const c_string& name)
-        int GetChildIndex(const c_string& name)
+        shared_ptr[CField] GetFieldByName(const c_string& name)
+        int GetFieldIndex(const c_string& name)
 
     cdef cppclass CUnionType" arrow::UnionType"(CDataType):
         CUnionType(const vector[shared_ptr[CField]]& fields,
@@ -533,10 +539,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         shared_ptr[CTable] ReplaceSchemaMetadata(
             const shared_ptr[CKeyValueMetadata]& metadata)
 
-    cdef cppclass RecordBatchReader:
-        CStatus ReadNext(shared_ptr[CRecordBatch]* out)
+    cdef cppclass CRecordBatchReader" arrow::RecordBatchReader":
+        shared_ptr[CSchema] schema()
+        CStatus ReadNext(shared_ptr[CRecordBatch]* batch)
+        CStatus ReadAll(shared_ptr[CTable]* out)
 
-    cdef cppclass TableBatchReader(RecordBatchReader):
+    cdef cppclass TableBatchReader(CRecordBatchReader):
         TableBatchReader(const CTable& table)
         void set_chunksize(int64_t chunksize)
 
@@ -689,6 +697,22 @@ cdef extern from "arrow/io/api.h" namespace "arrow::io" nogil:
         CStatus Make(CCodec* codec, shared_ptr[OutputStream] raw,
                      shared_ptr[CCompressedOutputStream]* out)
 
+    cdef cppclass CBufferedInputStream \
+            " arrow::io::BufferedInputStream"(InputStream):
+
+        @staticmethod
+        CStatus Create(int64_t buffer_size, CMemoryPool* pool,
+                       shared_ptr[InputStream] raw,
+                       shared_ptr[CBufferedInputStream]* out)
+
+    cdef cppclass CBufferedOutputStream \
+            " arrow::io::BufferedOutputStream"(OutputStream):
+
+        @staticmethod
+        CStatus Create(int64_t buffer_size, CMemoryPool* pool,
+                       shared_ptr[OutputStream] raw,
+                       shared_ptr[CBufferedOutputStream]* out)
+
     # ----------------------------------------------------------------------
     # HDFS
 
@@ -825,10 +849,6 @@ cdef extern from "arrow/ipc/api.h" namespace "arrow::ipc" nogil:
                                  c_bool allow_64bit)
         CStatus WriteTable(const CTable& table, int64_t max_chunksize)
 
-    cdef cppclass CRecordBatchReader" arrow::ipc::RecordBatchReader":
-        shared_ptr[CSchema] schema()
-        CStatus ReadNext(shared_ptr[CRecordBatch]* batch)
-
     cdef cppclass CRecordBatchStreamReader \
             " arrow::ipc::RecordBatchStreamReader"(CRecordBatchReader):
         @staticmethod
@@ -950,6 +970,7 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
     cdef cppclass CCSVConvertOptions" arrow::csv::ConvertOptions":
         c_bool check_utf8
         unordered_map[c_string, shared_ptr[CDataType]] column_types
+        vector[c_string] null_values
 
         @staticmethod
         CCSVConvertOptions Defaults()
@@ -1059,20 +1080,20 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
     CStatus TensorToNdarray(const shared_ptr[CTensor]& tensor, object base,
                             PyObject** out)
 
-    CStatus ConvertArrayToPandas(PandasOptions options,
+    CStatus ConvertArrayToPandas(const PandasOptions& options,
                                  const shared_ptr[CArray]& arr,
                                  object py_ref, PyObject** out)
 
-    CStatus ConvertChunkedArrayToPandas(PandasOptions options,
+    CStatus ConvertChunkedArrayToPandas(const PandasOptions& options,
                                         const shared_ptr[CChunkedArray]& arr,
                                         object py_ref, PyObject** out)
 
-    CStatus ConvertColumnToPandas(PandasOptions options,
+    CStatus ConvertColumnToPandas(const PandasOptions& options,
                                   const shared_ptr[CColumn]& arr,
                                   object py_ref, PyObject** out)
 
     CStatus ConvertTableToPandas(
-        PandasOptions options,
+        const PandasOptions& options,
         const unordered_set[c_string]& categorical_columns,
         const shared_ptr[CTable]& table,
         CMemoryPool* pool,
@@ -1105,6 +1126,7 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
         c_bool integer_object_nulls
         c_bool date_as_object
         c_bool use_threads
+        c_bool deduplicate_objects
 
 cdef extern from "arrow/python/api.h" namespace 'arrow::py' nogil:
 
diff --git a/python/pyarrow/includes/libarrow_cuda.pxd b/python/pyarrow/includes/libarrow_cuda.pxd
index 0e0d5e1ce0987..ef89d9cdd2eb6 100644
--- a/python/pyarrow/includes/libarrow_cuda.pxd
+++ b/python/pyarrow/includes/libarrow_cuda.pxd
@@ -19,9 +19,9 @@
 
 from pyarrow.includes.libarrow cimport *
 
-cdef extern from "arrow/gpu/cuda_api.h" namespace "arrow::gpu" nogil:
+cdef extern from "arrow/gpu/cuda_api.h" namespace "arrow::cuda" nogil:
 
-    cdef cppclass CCudaDeviceManager" arrow::gpu::CudaDeviceManager":
+    cdef cppclass CCudaDeviceManager" arrow::cuda::CudaDeviceManager":
         @staticmethod
         CStatus GetInstance(CCudaDeviceManager** manager)
         CStatus GetContext(int gpu_number, shared_ptr[CCudaContext]* ctx)
@@ -33,7 +33,7 @@ cdef extern from "arrow/gpu/cuda_api.h" namespace "arrow::gpu" nogil:
         # CStatus FreeHost(void* data, int64_t nbytes)
         int num_devices() const
 
-    cdef cppclass CCudaContext" arrow::gpu::CudaContext":
+    cdef cppclass CCudaContext" arrow::cuda::CudaContext":
         shared_ptr[CCudaContext]  shared_from_this()
         # CStatus Close()
         CStatus Allocate(int64_t nbytes, shared_ptr[CCudaBuffer]* out)
@@ -46,14 +46,15 @@ cdef extern from "arrow/gpu/cuda_api.h" namespace "arrow::gpu" nogil:
         int64_t bytes_allocated() const
         const void* handle() const
         int device_number() const
+        CStatus GetDeviceAddress(uint8_t* addr, uint8_t** devaddr)
 
-    cdef cppclass CCudaIpcMemHandle" arrow::gpu::CudaIpcMemHandle":
+    cdef cppclass CCudaIpcMemHandle" arrow::cuda::CudaIpcMemHandle":
         @staticmethod
         CStatus FromBuffer(const void* opaque_handle,
                            shared_ptr[CCudaIpcMemHandle]* handle)
         CStatus Serialize(CMemoryPool* pool, shared_ptr[CBuffer]* out) const
 
-    cdef cppclass CCudaBuffer" arrow::gpu::CudaBuffer"(CBuffer):
+    cdef cppclass CCudaBuffer" arrow::cuda::CudaBuffer"(CBuffer):
         CCudaBuffer(uint8_t* data, int64_t size,
                     const shared_ptr[CCudaContext]& context,
                     c_bool own_data=false, c_bool is_ipc=false)
@@ -73,17 +74,18 @@ cdef extern from "arrow/gpu/cuda_api.h" namespace "arrow::gpu" nogil:
         CStatus ExportForIpc(shared_ptr[CCudaIpcMemHandle]* handle)
         shared_ptr[CCudaContext] context() const
 
-    cdef cppclass CCudaHostBuffer" arrow::gpu::CudaHostBuffer"(CMutableBuffer):
+    cdef cppclass \
+            CCudaHostBuffer" arrow::cuda::CudaHostBuffer"(CMutableBuffer):
         pass
 
     cdef cppclass \
-            CCudaBufferReader" arrow::gpu::CudaBufferReader"(CBufferReader):
+            CCudaBufferReader" arrow::cuda::CudaBufferReader"(CBufferReader):
         CCudaBufferReader(const shared_ptr[CBuffer]& buffer)
         CStatus Read(int64_t nbytes, int64_t* bytes_read, void* buffer)
         CStatus Read(int64_t nbytes, shared_ptr[CBuffer]* out)
 
     cdef cppclass \
-            CCudaBufferWriter" arrow::gpu::CudaBufferWriter"(WritableFile):
+            CCudaBufferWriter" arrow::cuda::CudaBufferWriter"(WritableFile):
         CCudaBufferWriter(const shared_ptr[CCudaBuffer]& buffer)
         CStatus Close()
         CStatus Flush()
@@ -98,17 +100,17 @@ cdef extern from "arrow/gpu/cuda_api.h" namespace "arrow::gpu" nogil:
     CStatus AllocateCudaHostBuffer(int device_number, const int64_t size,
                                    shared_ptr[CCudaHostBuffer]* out)
 
-    # Cuda prefix is added to avoid picking up arrow::gpu functions
+    # Cuda prefix is added to avoid picking up arrow::cuda functions
     # from arrow namespace.
-    CStatus CudaSerializeRecordBatch" arrow::gpu::SerializeRecordBatch"\
+    CStatus CudaSerializeRecordBatch" arrow::cuda::SerializeRecordBatch"\
         (const CRecordBatch& batch,
          CCudaContext* ctx,
          shared_ptr[CCudaBuffer]* out)
-    CStatus CudaReadMessage" arrow::gpu::ReadMessage"\
+    CStatus CudaReadMessage" arrow::cuda::ReadMessage"\
         (CCudaBufferReader* reader,
          CMemoryPool* pool,
          unique_ptr[CMessage]* message)
-    CStatus CudaReadRecordBatch" arrow::gpu::ReadRecordBatch"\
+    CStatus CudaReadRecordBatch" arrow::cuda::ReadRecordBatch"\
         (const shared_ptr[CSchema]& schema,
          const shared_ptr[CCudaBuffer]& buffer,
          CMemoryPool* pool, shared_ptr[CRecordBatch]* out)
diff --git a/python/pyarrow/io-hdfs.pxi b/python/pyarrow/io-hdfs.pxi
index e7a322ea469bb..d93bd790eaa1e 100644
--- a/python/pyarrow/io-hdfs.pxi
+++ b/python/pyarrow/io-hdfs.pxi
@@ -433,6 +433,9 @@ cdef class HadoopFileSystem:
 
             out.set_random_access_file(
                 <shared_ptr[RandomAccessFile]> rd_handle)
+            out.is_readable = True
+
+        assert not out.closed
 
         if c_buffer_size == 0:
             c_buffer_size = 2 ** 16
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 9f7dc7bc8386f..8edffbec6dea2 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -44,6 +44,16 @@ cdef extern from "Python.h":
 
 
 cdef class NativeFile:
+    """
+    The base class for all Arrow streams.
+
+    Streams are either readable, writable, or both.
+    They optionally support seeking.
+
+    While this class exposes methods to read or write data from Python, the
+    primary intent of using a Arrow stream is to pass it to other Arrow
+    facilities that will make use of it, such as Arrow IPC routines.
+    """
 
     def __cinit__(self):
         self.own_file = False
@@ -559,6 +569,16 @@ BufferedIOBase.register(NativeFile)
 
 
 cdef class PythonFile(NativeFile):
+    """
+    A stream backed by a Python file object.
+
+    This class allows using Python file objects with arbitrary Arrow
+    functions, including functions written in another language than Python.
+
+    As a downside, there is a non-zero redirection cost in translating
+    Arrow stream calls to Python method calls.  Furthermore, Python's
+    Global Interpreter Lock may limit parallelism in some situations.
+    """
     cdef:
         object handle
 
@@ -628,7 +648,9 @@ cdef class PythonFile(NativeFile):
 
 cdef class MemoryMappedFile(NativeFile):
     """
-    Supports 'r', 'r+w', 'w' modes
+    A stream that represents a memory-mapped file.
+
+    Supports 'r', 'r+', 'w' modes.
     """
     cdef:
         shared_ptr[CMemoryMappedFile] handle
@@ -704,7 +726,9 @@ def memory_map(path, mode='r'):
     Parameters
     ----------
     path : string
-    mode : {'r', 'w'}, default 'r'
+    mode : {'r', 'r+', 'w'}, default 'r'
+        Whether the file is opened for reading ('r+'), writing ('w')
+        or both ('r+').
 
     Returns
     -------
@@ -717,13 +741,14 @@ def memory_map(path, mode='r'):
 
 def create_memory_map(path, size):
     """
-    Create memory map at indicated path of the given size, return open
-    writable file object
+    Create a file of the given size and memory-map it.
 
     Parameters
     ----------
     path : string
+        The file path to create, on the local filesystem.
     size : int
+        The file size to create.
 
     Returns
     -------
@@ -734,7 +759,7 @@ def create_memory_map(path, size):
 
 cdef class OSFile(NativeFile):
     """
-    Supports 'r', 'w' modes
+    A stream backed by a regular file descriptor.
     """
     cdef:
         object path
@@ -774,6 +799,9 @@ cdef class OSFile(NativeFile):
 
 
 cdef class FixedSizeBufferWriter(NativeFile):
+    """
+    A stream writing to a Arrow buffer.
+    """
 
     def __cinit__(self, Buffer buffer):
         self.output_stream.reset(new CFixedSizeBufferWriter(buffer.buffer))
@@ -800,6 +828,12 @@ cdef class FixedSizeBufferWriter(NativeFile):
 
 
 cdef class Buffer:
+    """
+    The base class for all Arrow buffers.
+
+    A buffer represents a contiguous memory area.  Many buffers will own
+    their memory, though not all of them do.
+    """
 
     def __cinit__(self):
         pass
@@ -818,14 +852,23 @@ cdef class Buffer:
 
     @property
     def size(self):
+        """
+        The buffer size in bytes.
+        """
         return self.buffer.get().size()
 
     @property
     def address(self):
+        """
+        The buffer's address, as an integer.
+        """
         return <uintptr_t> self.buffer.get().data()
 
     @property
     def is_mutable(self):
+        """
+        Whether the buffer is mutable.
+        """
         return self.buffer.get().is_mutable()
 
     @property
@@ -848,7 +891,9 @@ cdef class Buffer:
 
     def slice(self, offset=0, length=None):
         """
-        Compute slice of this buffer
+        Slice this buffer.  Memory is not copied.
+
+        You can also use the Python slice notation ``buffer[start:stop]``.
 
         Parameters
         ----------
@@ -861,6 +906,7 @@ cdef class Buffer:
         Returns
         -------
         sliced : Buffer
+            A logical view over this buffer.
         """
         cdef shared_ptr[CBuffer] result
 
@@ -876,7 +922,7 @@ cdef class Buffer:
 
     def equals(self, Buffer other):
         """
-        Determine if two buffers contain exactly the same data
+        Determine if two buffers contain exactly the same data.
 
         Parameters
         ----------
@@ -904,6 +950,9 @@ cdef class Buffer:
             return py_buffer, (self.to_pybytes(),)
 
     def to_pybytes(self):
+        """
+        Return this buffer as a Python bytes object.  Memory is copied.
+        """
         return cp.PyBytes_FromStringAndSize(
             <const char*>self.buffer.get().data(),
             self.buffer.get().size())
@@ -950,21 +999,25 @@ cdef class Buffer:
 
 
 cdef class ResizableBuffer(Buffer):
+    """
+    A base class for buffers that can be resized.
+    """
 
     cdef void init_rz(self, const shared_ptr[CResizableBuffer]& buffer):
         self.init(<shared_ptr[CBuffer]> buffer)
 
     def resize(self, int64_t new_size, shrink_to_fit=False):
         """
-        Resize buffer to indicated size
+        Resize buffer to indicated size.
 
         Parameters
         ----------
-        new_size : int64_t
+        new_size : int
             New size of buffer (padding may be added internally)
         shrink_to_fit : boolean, default False
-            If new_size is less than the current size, shrink internal
-            capacity, otherwise leave at current capacity
+            If this is true, the buffer is shrunk when new_size is less
+            than the current size.
+            If this is false, the buffer is never shrunk.
         """
         cdef c_bool c_shrink_to_fit = shrink_to_fit
         with nogil:
@@ -982,15 +1035,17 @@ cdef shared_ptr[CResizableBuffer] _allocate_buffer(CMemoryPool* pool):
 def allocate_buffer(int64_t size, MemoryPool memory_pool=None,
                     resizable=False):
     """
-    Allocate mutable fixed-size buffer
+    Allocate a mutable buffer.
 
     Parameters
     ----------
     size : int
         Number of bytes to allocate (plus internal padding)
     memory_pool : MemoryPool, optional
-        Uses default memory pool if not provided
+        The pool to allocate memory from.
+        If not given, the default memory pool is used.
     resizable : boolean, default False
+        If true, the returned buffer is resizable.
 
     Returns
     -------
@@ -1064,32 +1119,6 @@ cdef class BufferReader(NativeFile):
         self.is_readable = True
 
 
-cdef shared_ptr[InputStream] _make_compressed_input_stream(
-        shared_ptr[InputStream] stream,
-        CompressionType compression_type) except *:
-    cdef:
-        shared_ptr[CCompressedInputStream] compressed_stream
-        unique_ptr[CCodec] codec
-
-    check_status(CCodec.Create(compression_type, &codec))
-    check_status(CCompressedInputStream.Make(codec.get(), stream,
-                                             &compressed_stream))
-    return <shared_ptr[InputStream]> compressed_stream
-
-
-cdef shared_ptr[OutputStream] _make_compressed_output_stream(
-        shared_ptr[OutputStream] stream,
-        CompressionType compression_type) except *:
-    cdef:
-        shared_ptr[CCompressedOutputStream] compressed_stream
-        unique_ptr[CCodec] codec
-
-    check_status(CCodec.Create(compression_type, &codec))
-    check_status(CCompressedOutputStream.Make(codec.get(), stream,
-                                              &compressed_stream))
-    return <shared_ptr[OutputStream]> compressed_stream
-
-
 cdef class CompressedInputStream(NativeFile):
     """
     An input stream wrapper which decompresses data on the fly.
@@ -1104,26 +1133,19 @@ cdef class CompressedInputStream(NativeFile):
     def __init__(self, NativeFile stream, compression):
         cdef:
             CompressionType compression_type
+            unique_ptr[CCodec] codec
+            shared_ptr[CCompressedInputStream] compressed_stream
 
         compression_type = _get_compression_type(compression)
         if compression_type == CompressionType_UNCOMPRESSED:
-            raise ValueError("Invalid value for compression: %r"
-                             % (compression,))
-        self._init(stream, compression_type)
-
-    @staticmethod
-    cdef create(NativeFile stream, CompressionType compression_type):
-        cdef:
-            CompressedInputStream self
+            raise ValueError('Invalid value for compression: {!r}'
+                             .format(compression))
 
-        self = CompressedInputStream.__new__(CompressedInputStream)
-        self._init(stream, compression_type)
-        return self
+        check_status(CCodec.Create(compression_type, &codec))
+        check_status(CCompressedInputStream.Make(
+            codec.get(), stream.get_input_stream(), &compressed_stream))
 
-    cdef _init(self, NativeFile stream, CompressionType compression_type):
-        self.set_input_stream(
-            _make_compressed_input_stream(stream.get_input_stream(),
-                                          compression_type))
+        self.set_input_stream(<shared_ptr[InputStream]> compressed_stream)
         self.is_readable = True
 
 
@@ -1138,29 +1160,55 @@ cdef class CompressedOutputStream(NativeFile):
         The compression type ("bz2", "brotli", "gzip", "lz4", "snappy"
         or "zstd")
     """
+
     def __init__(self, NativeFile stream, compression):
         cdef:
             CompressionType compression_type
+            unique_ptr[CCodec] codec
+            shared_ptr[CCompressedOutputStream] compressed_stream
 
         compression_type = _get_compression_type(compression)
         if compression_type == CompressionType_UNCOMPRESSED:
-            raise ValueError("Invalid value for compression: %r"
-                             % (compression,))
-        self._init(stream, compression_type)
+            raise ValueError('Invalid value for compression: {!r}'
+                             .format(compression))
 
-    @staticmethod
-    cdef create(NativeFile stream, CompressionType compression_type):
-        cdef:
-            CompressedOutputStream self
+        check_status(CCodec.Create(compression_type, &codec))
+        check_status(CCompressedOutputStream.Make(
+            codec.get(), stream.get_output_stream(), &compressed_stream))
 
-        self = CompressedOutputStream.__new__(CompressedOutputStream)
-        self._init(stream, compression_type)
-        return self
+        self.set_output_stream(<shared_ptr[OutputStream]> compressed_stream)
+        self.is_writable = True
+
+
+cdef class BufferedInputStream(NativeFile):
+
+    def __init__(self, NativeFile stream, int buffer_size,
+                 MemoryPool memory_pool=None):
+        cdef shared_ptr[CBufferedInputStream] buffered_stream
+
+        if buffer_size <= 0:
+            raise ValueError('Buffer size must be larger than zero')
+        check_status(CBufferedInputStream.Create(
+            buffer_size, maybe_unbox_memory_pool(memory_pool),
+            stream.get_input_stream(), &buffered_stream))
+
+        self.set_input_stream(<shared_ptr[InputStream]> buffered_stream)
+        self.is_readable = True
+
+
+cdef class BufferedOutputStream(NativeFile):
+
+    def __init__(self, NativeFile stream, int buffer_size,
+                 MemoryPool memory_pool=None):
+        cdef shared_ptr[CBufferedOutputStream] buffered_stream
 
-    cdef _init(self, NativeFile stream, CompressionType compression_type):
-        self.set_output_stream(
-            _make_compressed_output_stream(stream.get_output_stream(),
-                                           compression_type))
+        if buffer_size <= 0:
+            raise ValueError('Buffer size must be larger than zero')
+        check_status(CBufferedOutputStream.Create(
+            buffer_size, maybe_unbox_memory_pool(memory_pool),
+            stream.get_output_stream(), &buffered_stream))
+
+        self.set_output_stream(<shared_ptr[OutputStream]> buffered_stream)
         self.is_writable = True
 
 
@@ -1173,10 +1221,14 @@ def py_buffer(object obj):
     return pyarrow_wrap_buffer(buf)
 
 
-def foreign_buffer(address, size, base):
+def foreign_buffer(address, size, base=None):
     """
     Construct an Arrow buffer with the given *address* and *size*,
-    backed by the Python *base* object.
+    optionally backed by the Python *base* object.
+
+    The *base* object, if given, will be kept alive as long as this buffer
+    is alive, including accross language boundaries (for example if the
+    buffer is referenced by C++ code).
     """
     cdef:
         intptr_t c_addr = address
@@ -1228,24 +1280,27 @@ cdef get_input_stream(object source, c_bool use_memory_map,
     """
     cdef:
         NativeFile nf
+        unique_ptr[CCodec] codec
         shared_ptr[InputStream] input_stream
         shared_ptr[CCompressedInputStream] compressed_stream
-        CompressionType compression_type = CompressionType_UNCOMPRESSED
-        unique_ptr[CCodec] codec
+        CompressionType compression_type
 
     try:
         source_path = _stringify_path(source)
     except TypeError:
-        pass
+        compression = None
     else:
-        compression_type = _get_compression_type_by_filename(source_path)
+        compression = _detect_compression(source_path)
 
+    compression_type = _get_compression_type(compression)
     nf = _get_native_file(source, use_memory_map)
     input_stream = nf.get_input_stream()
 
     if compression_type != CompressionType_UNCOMPRESSED:
-        input_stream = _make_compressed_input_stream(input_stream,
-                                                     compression_type)
+        check_status(CCodec.Create(compression_type, &codec))
+        check_status(CCompressedInputStream.Make(codec.get(), input_stream,
+                                                 &compressed_stream))
+        input_stream = <shared_ptr[InputStream]> compressed_stream
 
     out[0] = input_stream
 
@@ -1288,27 +1343,24 @@ cdef CompressionType _get_compression_type(object name) except *:
     elif name == 'zstd':
         return CompressionType_ZSTD
     else:
-        raise ValueError("Unrecognized compression type: {0}"
-                         .format(str(name)))
+        raise ValueError('Unrecognized compression type: {}'.format(name))
 
 
-cdef CompressionType _get_compression_type_by_filename(filename) except *:
-    if filename.endswith('.bz2'):
-        return CompressionType_BZ2
-    elif filename.endswith('.gz'):
-        return CompressionType_GZIP
-    elif filename.endswith('.lz4'):
-        return CompressionType_LZ4
-    elif filename.endswith('.zst'):
-        return CompressionType_ZSTD
-    else:
-        return CompressionType_UNCOMPRESSED
+def _detect_compression(path):
+    if isinstance(path, six.string_types):
+        if path.endswith('.bz2'):
+            return 'bz2'
+        elif path.endswith('.gz'):
+            return 'gzip'
+        elif path.endswith('.lz4'):
+            return 'lz4'
+        elif path.endswith('.zst'):
+            return 'zstd'
 
 
 def compress(object buf, codec='lz4', asbytes=False, memory_pool=None):
     """
-    Compress pyarrow.Buffer or Python object supporting the buffer (memoryview)
-    protocol
+    Compress data from buffer-like object.
 
     Parameters
     ----------
@@ -1369,7 +1421,7 @@ def compress(object buf, codec='lz4', asbytes=False, memory_pool=None):
 def decompress(object buf, decompressed_size=None, codec='lz4',
                asbytes=False, memory_pool=None):
     """
-    Decompress data from buffer-like object
+    Decompress data from buffer-like object.
 
     Parameters
     ----------
@@ -1423,18 +1475,7 @@ def decompress(object buf, decompressed_size=None, codec='lz4',
     return pybuf if asbytes else out_buf
 
 
-cdef CompressionType _stream_compression_argument(
-        compression, source_path) except *:
-    if compression == 'detect':
-        if source_path is not None:
-            return _get_compression_type_by_filename(source_path)
-        else:
-            return CompressionType_UNCOMPRESSED
-    else:
-        return _get_compression_type(compression)
-
-
-def input_stream(source, compression='detect'):
+def input_stream(source, compression='detect', buffer_size=None):
     """
     Create an Arrow input stream.
 
@@ -1448,18 +1489,17 @@ def input_stream(source, compression='detect'):
         chosen based on the file extension.
         If None, no compression will be applied.
         Otherwise, a well-known algorithm name must be supplied (e.g. "gzip")
+    buffer_size: int, default None
+        If None or 0, no buffering will happen.  Otherwise the size of the
+        temporary read buffer.
     """
-    cdef:
-        CompressionType compression_type
-        NativeFile stream
+    cdef NativeFile stream
 
     try:
         source_path = _stringify_path(source)
     except TypeError:
         source_path = None
 
-    compression_type = _stream_compression_argument(compression, source_path)
-
     if isinstance(source, NativeFile):
         stream = source
     elif source_path is not None:
@@ -1475,13 +1515,19 @@ def input_stream(source, compression='detect'):
         raise TypeError("pa.input_stream() called with instance of '{}'"
                         .format(source.__class__))
 
-    if compression_type != CompressionType_UNCOMPRESSED:
-        stream = CompressedInputStream.create(stream, compression_type)
+    if compression == 'detect':
+        compression = _detect_compression(source_path)
+
+    if buffer_size is not None and buffer_size != 0:
+        stream = BufferedInputStream(stream, buffer_size)
+
+    if compression is not None:
+        stream = CompressedInputStream(stream, compression)
 
     return stream
 
 
-def output_stream(source, compression='detect'):
+def output_stream(source, compression='detect', buffer_size=None):
     """
     Create an Arrow output stream.
 
@@ -1495,18 +1541,17 @@ def output_stream(source, compression='detect'):
         chosen based on the file extension.
         If None, no compression will be applied.
         Otherwise, a well-known algorithm name must be supplied (e.g. "gzip")
+    buffer_size: int, default None
+        If None or 0, no buffering will happen.  Otherwise the size of the
+        temporary write buffer.
     """
-    cdef:
-        CompressionType compression_type
-        NativeFile stream
+    cdef NativeFile stream
 
     try:
         source_path = _stringify_path(source)
     except TypeError:
         source_path = None
 
-    compression_type = _stream_compression_argument(compression, source_path)
-
     if isinstance(source, NativeFile):
         stream = source
     elif source_path is not None:
@@ -1522,7 +1567,13 @@ def output_stream(source, compression='detect'):
         raise TypeError("pa.output_stream() called with instance of '{}'"
                         .format(source.__class__))
 
-    if compression_type != CompressionType_UNCOMPRESSED:
-        stream = CompressedOutputStream.create(stream, compression_type)
+    if compression == 'detect':
+        compression = _detect_compression(source_path)
+
+    if buffer_size is not None and buffer_size != 0:
+        stream = BufferedOutputStream(stream, buffer_size)
+
+    if compression is not None:
+        stream = CompressedOutputStream(stream, compression)
 
     return stream
diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi
index c9898f020cf52..137d5261d2474 100644
--- a/python/pyarrow/ipc.pxi
+++ b/python/pyarrow/ipc.pxi
@@ -285,21 +285,9 @@ cdef class _RecordBatchReader:
         """
         Read all record batches as a pyarrow.Table
         """
-        cdef:
-            vector[shared_ptr[CRecordBatch]] batches
-            shared_ptr[CRecordBatch] batch
-            shared_ptr[CTable] table
-
+        cdef shared_ptr[CTable] table
         with nogil:
-            while True:
-                check_status(self.reader.get().ReadNext(&batch))
-                if batch.get() == NULL:
-                    break
-                batches.push_back(batch)
-
-            check_status(CTable.FromRecordBatches(self.schema.sp_schema,
-                                                  batches, &table))
-
+            check_status(self.reader.get().ReadAll(&table))
         return pyarrow_wrap_table(table)
 
 
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 098ae62c8f492..8cd8f401a2749 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -65,7 +65,8 @@ cdef class StructType(DataType):
     cdef:
         const CStructType* struct_type
 
-    cdef Field child_by_name(self, name)
+    cdef Field field(self, int i)
+    cdef Field field_by_name(self, name)
 
 
 cdef class DictionaryType(DataType):
@@ -178,7 +179,11 @@ cdef class FixedSizeBinaryValue(ArrayValue):
     pass
 
 
-cdef class Array:
+cdef class _PandasConvertible:
+    pass
+
+
+cdef class Array(_PandasConvertible):
     cdef:
         shared_ptr[CArray] sp_array
         CArray* ap
@@ -305,7 +310,7 @@ cdef object box_scalar(DataType type,
                        int64_t index)
 
 
-cdef class ChunkedArray:
+cdef class ChunkedArray(_PandasConvertible):
     cdef:
         shared_ptr[CChunkedArray] sp_chunked_array
         CChunkedArray* chunked_array
@@ -314,7 +319,7 @@ cdef class ChunkedArray:
     cdef getitem(self, int64_t i)
 
 
-cdef class Column:
+cdef class Column(_PandasConvertible):
     cdef:
         shared_ptr[CColumn] sp_column
         CColumn* column
@@ -322,7 +327,7 @@ cdef class Column:
     cdef void init(self, const shared_ptr[CColumn]& column)
 
 
-cdef class Table:
+cdef class Table(_PandasConvertible):
     cdef:
         shared_ptr[CTable] sp_table
         CTable* table
@@ -330,7 +335,7 @@ cdef class Table:
     cdef void init(self, const shared_ptr[CTable]& table)
 
 
-cdef class RecordBatch:
+cdef class RecordBatch(_PandasConvertible):
     cdef:
         shared_ptr[CRecordBatch] sp_batch
         CRecordBatch* batch
@@ -384,16 +389,20 @@ cdef get_reader(object source, c_bool use_memory_map,
                 shared_ptr[RandomAccessFile]* reader)
 cdef get_writer(object source, shared_ptr[OutputStream]* writer)
 
-cdef dict box_metadata(const CKeyValueMetadata* sp_metadata)
-
 # Default is allow_none=False
 cdef DataType ensure_type(object type, c_bool allow_none=*)
 
+cdef shared_ptr[CKeyValueMetadata] pyarrow_unwrap_metadata(object meta)
+cdef object pyarrow_wrap_metadata(
+    const shared_ptr[const CKeyValueMetadata]& meta)
+
 #
 # Public Cython API for 3rd party code
 #
 
 cdef public object pyarrow_wrap_array(const shared_ptr[CArray]& sp_array)
+cdef public object pyarrow_wrap_chunked_array(
+    const shared_ptr[CChunkedArray]& sp_array)
 # XXX pyarrow.h calls it `wrap_record_batch`
 cdef public object pyarrow_wrap_batch(const shared_ptr[CRecordBatch]& cbatch)
 cdef public object pyarrow_wrap_buffer(const shared_ptr[CBuffer]& buf)
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index 9c661dbc3554a..3fe879a319668 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -42,9 +42,10 @@ def cpu_count():
     Return the number of threads to use in parallel operations.
 
     The number of threads is determined at startup by inspecting the
-    OMP_NUM_THREADS and OMP_THREAD_LIMIT environment variables.  If neither
-    is present, it will default to the number of hardware threads on the
-    system.  It can be modified at runtime by calling set_cpu_count().
+    ``OMP_NUM_THREADS`` and ``OMP_THREAD_LIMIT`` environment variables.
+    If neither is present, it will default to the number of hardware threads
+    on the system.  It can be modified at runtime by calling
+    :func:`set_cpu_count()`.
     """
     return GetCpuThreadPoolCapacity()
 
diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi
index 7fa6d79a370d7..047e70d17abcc 100644
--- a/python/pyarrow/memory.pxi
+++ b/python/pyarrow/memory.pxi
@@ -21,6 +21,12 @@
 
 
 cdef class MemoryPool:
+    """
+    Base class for memory allocation.
+
+    Besides tracking its number of allocated bytes, a memory pool also
+    takes care of the required 64-byte alignment for Arrow data.
+    """
 
     def __init__(self):
         raise TypeError("Do not call {}'s constructor directly, "
@@ -68,8 +74,9 @@ cdef class LoggingMemoryPool(MemoryPool):
 
 cdef class ProxyMemoryPool(MemoryPool):
     """
-    Derived MemoryPool class that tracks the number of bytes and
-    maximum memory allocated through its direct calls.
+    Memory pool implementation that tracks the number of bytes and
+    maximum memory allocated through its direct calls, while redirecting
+    to another memory pool.
     """
     cdef:
         unique_ptr[CProxyMemoryPool] proxy_pool
@@ -81,6 +88,9 @@ cdef class ProxyMemoryPool(MemoryPool):
 
 
 def default_memory_pool():
+    """
+    Return the process-global memory pool.
+    """
     cdef:
         MemoryPool pool = MemoryPool.__new__(MemoryPool)
     pool.init(c_get_memory_pool())
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index ec0e490291384..403f15dfc2cdb 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -33,7 +33,7 @@
 
 def infer_dtype(column):
     try:
-        return pd.api.types.infer_dtype(column)
+        return pd.api.types.infer_dtype(column, skipna=False)
     except AttributeError:
         return pd.lib.infer_dtype(column)
 
@@ -111,6 +111,9 @@ def get_logical_type_from_numpy(pandas_collection):
     except KeyError:
         if hasattr(pandas_collection.dtype, 'tz'):
             return 'datetimetz'
+        # See https://github.com/pandas-dev/pandas/issues/24739
+        if str(pandas_collection.dtype) == 'datetime64[ns]':
+            return 'datetime64[ns]'
         result = infer_dtype(pandas_collection)
 
         if result == 'string':
@@ -477,7 +480,8 @@ def dataframe_to_serialized_dict(frame):
 
         if isinstance(block, _int.DatetimeTZBlock):
             block_data['timezone'] = pa.lib.tzinfo_to_string(values.tz)
-            values = values.values
+            if hasattr(values, 'values'):
+                values = values.values
         elif isinstance(block, _int.CategoricalBlock):
             block_data.update(dictionary=values.categories,
                               ordered=values.ordered)
@@ -548,7 +552,8 @@ def _make_datetimetz(tz):
 # Converting pyarrow.Table efficiently to pandas.DataFrame
 
 
-def table_to_blockmanager(options, table, memory_pool, categories=None):
+def table_to_blockmanager(options, table, categories=None,
+                          ignore_metadata=False):
     from pyarrow.compat import DatetimeTZDtype
 
     index_columns = []
@@ -560,7 +565,8 @@ def table_to_blockmanager(options, table, memory_pool, categories=None):
     row_count = table.num_rows
     metadata = schema.metadata
 
-    has_pandas_metadata = metadata is not None and b'pandas' in metadata
+    has_pandas_metadata = (not ignore_metadata and metadata is not None
+                           and b'pandas' in metadata)
 
     if has_pandas_metadata:
         pandas_metadata = json.loads(metadata[b'pandas'].decode('utf8'))
@@ -622,7 +628,8 @@ def table_to_blockmanager(options, table, memory_pool, categories=None):
                 block_table.schema.get_field_index(raw_name)
             )
 
-    blocks = _table_to_blocks(options, block_table, memory_pool, categories)
+    blocks = _table_to_blocks(options, block_table, pa.default_memory_pool(),
+                              categories)
 
     # Construct the row index
     if len(index_arrays) > 1:
@@ -726,6 +733,14 @@ def _pandas_type_to_numpy_type(pandas_type):
         return np.dtype(pandas_type)
 
 
+def _get_multiindex_codes(mi):
+    # compat for pandas < 0.24 (MI labels renamed to codes).
+    if isinstance(mi, pd.MultiIndex):
+        return mi.codes if hasattr(mi, 'codes') else mi.labels
+    else:
+        return None
+
+
 def _reconstruct_columns_from_metadata(columns, column_indexes):
     """Construct a pandas MultiIndex from `columns` and column index metadata
     in `column_indexes`.
@@ -752,7 +767,7 @@ def _reconstruct_columns_from_metadata(columns, column_indexes):
     # Get levels and labels, and provide sane defaults if the index has a
     # single level to avoid if/else spaghetti.
     levels = getattr(columns, 'levels', None) or [columns]
-    labels = getattr(columns, 'labels', None) or [
+    labels = _get_multiindex_codes(columns) or [
         pd.RangeIndex(len(level)) for level in levels
     ]
 
@@ -779,7 +794,7 @@ def _reconstruct_columns_from_metadata(columns, column_indexes):
 
         new_levels.append(level)
 
-    return pd.MultiIndex(levels=new_levels, labels=labels, names=columns.names)
+    return pd.MultiIndex(new_levels, labels, names=columns.names)
 
 
 def _table_to_blocks(options, block_table, memory_pool, categories):
@@ -796,7 +811,7 @@ def _table_to_blocks(options, block_table, memory_pool, categories):
 def _flatten_single_level_multiindex(index):
     if isinstance(index, pd.MultiIndex) and index.nlevels == 1:
         levels, = index.levels
-        labels, = index.labels
+        labels, = _get_multiindex_codes(index)
 
         # Cheaply check that we do not somehow have duplicate column names
         if not index.is_unique:
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 3ebfc8c0517ce..7142e2f474540 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -18,6 +18,7 @@
 from collections import defaultdict
 from concurrent import futures
 
+from six.moves.urllib.parse import urlparse
 import json
 import numpy as np
 import os
@@ -34,8 +35,30 @@
                               ParquetSchema, ColumnSchema)
 from pyarrow.compat import guid
 from pyarrow.filesystem import (LocalFileSystem, _ensure_filesystem,
-                                _get_fs_from_path)
-from pyarrow.util import _is_path_like, _stringify_path, _deprecate_nthreads
+                                resolve_filesystem_and_path)
+from pyarrow.util import _is_path_like, _stringify_path
+
+_URI_STRIP_SCHEMES = ('hdfs',)
+
+
+def _parse_uri(path):
+    path = _stringify_path(path)
+    parsed_uri = urlparse(path)
+    if parsed_uri.scheme in _URI_STRIP_SCHEMES:
+        return parsed_uri.path
+    else:
+        # ARROW-4073: On Windows returning the path with the scheme
+        # stripped removes the drive letter, if any
+        return path
+
+
+def _get_filesystem_and_path(passed_filesystem, path):
+    if passed_filesystem is None:
+        return resolve_filesystem_and_path(path, passed_filesystem)
+    else:
+        passed_filesystem = _ensure_filesystem(passed_filesystem)
+        parsed_path = _parse_uri(path)
+        return passed_filesystem, parsed_path
 
 
 def _check_contains_null(val):
@@ -135,8 +158,8 @@ def schema(self):
     def num_row_groups(self):
         return self.reader.num_row_groups
 
-    def read_row_group(self, i, columns=None, nthreads=None,
-                       use_threads=True, use_pandas_metadata=False):
+    def read_row_group(self, i, columns=None, use_threads=True,
+                       use_pandas_metadata=False):
         """
         Read a single row group from a Parquet file
 
@@ -157,7 +180,6 @@ def read_row_group(self, i, columns=None, nthreads=None,
         pyarrow.table.Table
             Content of the row group as a table (of columns)
         """
-        use_threads = _deprecate_nthreads(use_threads, nthreads)
         column_indices = self._get_column_indices(
             columns, use_pandas_metadata=use_pandas_metadata)
         return self.reader.read_row_group(i, column_indices=column_indices,
@@ -285,8 +307,8 @@ def _sanitize_table(table, new_schema, flavor):
     Specify if we should use dictionary encoding in general or only for
     some columns.
 use_deprecated_int96_timestamps : boolean, default None
-    Write nanosecond resolution timestamps to INT96 Parquet
-    format. Defaults to False unless enabled by flavor argument
+    Write timestamps to INT96 Parquet format. Defaults to False unless enabled
+    by flavor argument. This take priority over the coerce_timestamps option.
 coerce_timestamps : string, default None
     Cast timestamps a particular resolution.
     Valid values: {None, 'ms', 'us'}
@@ -298,7 +320,10 @@ def _sanitize_table(table, new_schema, flavor):
     Specify the compression codec, either on a general basis or per-column.
     Valid values: {'NONE', 'SNAPPY', 'GZIP', 'LZO', 'BROTLI', 'LZ4', 'ZSTD'}
 flavor : {'spark'}, default None
-    Sanitize schema or set other compatibility options for compatibility"""
+    Sanitize schema or set other compatibility options for compatibility
+filesystem : FileSystem, default None
+    If nothing passed, will be inferred from `where` if path-like, else
+    `where` is already a file-like object so no filesystem is needed."""
 
 
 class ParquetWriter(object):
@@ -313,7 +338,8 @@ class ParquetWriter(object):
 {0}
 """.format(_parquet_writer_arg_docs)
 
-    def __init__(self, where, schema, flavor=None,
+    def __init__(self, where, schema, filesystem=None,
+                 flavor=None,
                  version='1.0',
                  use_dictionary=True,
                  compression='snappy',
@@ -334,13 +360,13 @@ def __init__(self, where, schema, flavor=None,
         self.schema = schema
         self.where = where
 
-        # If we open a file using an implied filesystem, so it can be assured
-        # to be closed
+        # If we open a file using a filesystem, store file handle so we can be
+        # sure to close it when `self.close` is called.
         self.file_handle = None
 
-        if _is_path_like(where):
-            fs = _get_fs_from_path(where)
-            sink = self.file_handle = fs.open(where, 'wb')
+        filesystem, path = resolve_filesystem_and_path(where, filesystem)
+        if filesystem is not None:
+            sink = self.file_handle = filesystem.open(path, 'wb')
         else:
             sink = where
 
@@ -682,7 +708,8 @@ class ParquetManifest(object):
     """
     def __init__(self, dirpath, filesystem=None, pathsep='/',
                  partition_scheme='hive', metadata_nthreads=1):
-        self.filesystem = filesystem or _get_fs_from_path(dirpath)
+        filesystem, dirpath = _get_filesystem_and_path(filesystem, dirpath)
+        self.filesystem = filesystem
         self.pathsep = pathsep
         self.dirpath = _stringify_path(dirpath)
         self.partition_scheme = partition_scheme
@@ -846,15 +873,15 @@ class ParquetDataset(object):
     def __init__(self, path_or_paths, filesystem=None, schema=None,
                  metadata=None, split_row_groups=False, validate_schema=True,
                  filters=None, metadata_nthreads=1):
-        if filesystem is None:
-            a_path = path_or_paths
-            if isinstance(a_path, list):
-                a_path = a_path[0]
-            self.fs = _get_fs_from_path(a_path)
-        else:
-            self.fs = _ensure_filesystem(filesystem)
+        a_path = path_or_paths
+        if isinstance(a_path, list):
+            a_path = a_path[0]
 
-        self.paths = path_or_paths
+        self.fs, _ = _get_filesystem_and_path(filesystem, a_path)
+        if isinstance(path_or_paths, list):
+            self.paths = [_parse_uri(path) for path in path_or_paths]
+        else:
+            self.paths = _parse_uri(path_or_paths)
 
         (self.pieces,
          self.partitions,
@@ -1072,11 +1099,10 @@ def _make_manifest(path_or_paths, fs, pathsep='/', metadata_nthreads=1):
 
 def read_table(source, columns=None, use_threads=True, metadata=None,
                use_pandas_metadata=False, memory_map=True,
-               nthreads=None):
-    use_threads = _deprecate_nthreads(use_threads, nthreads)
+               filesystem=None):
     if _is_path_like(source):
-        fs = _get_fs_from_path(source)
-        return fs.read_parquet(source, columns=columns,
+        fs, path = _get_filesystem_and_path(filesystem, source)
+        return fs.read_parquet(path, columns=columns,
                                use_threads=use_threads, metadata=metadata,
                                use_pandas_metadata=use_pandas_metadata)
 
@@ -1094,8 +1120,8 @@ def read_table(source, columns=None, use_threads=True, metadata=None,
     Content of the file as a table (of columns)""")
 
 
-def read_pandas(source, columns=None, use_threads=True,
-                memory_map=True, nthreads=None, metadata=None):
+def read_pandas(source, columns=None, use_threads=True, memory_map=True,
+                metadata=None):
     return read_table(source, columns=columns,
                       use_threads=use_threads,
                       metadata=metadata, memory_map=True,
@@ -1116,12 +1142,13 @@ def write_table(table, where, row_group_size=None, version='1.0',
                 use_deprecated_int96_timestamps=None,
                 coerce_timestamps=None,
                 allow_truncated_timestamps=False,
-                flavor=None, **kwargs):
+                flavor=None, filesystem=None, **kwargs):
     row_group_size = kwargs.pop('chunk_size', row_group_size)
     use_int96 = use_deprecated_int96_timestamps
     try:
         with ParquetWriter(
                 where, table.schema,
+                filesystem=filesystem,
                 version=version,
                 flavor=flavor,
                 use_dictionary=use_dictionary,
@@ -1195,10 +1222,7 @@ def write_to_dataset(table, root_path, partition_cols=None,
         Parameter for instantiating Table; preserve pandas index or not.
     **kwargs : dict, kwargs for write_table function.
     """
-    if filesystem is None:
-        fs = _get_fs_from_path(root_path)
-    else:
-        fs = _ensure_filesystem(filesystem)
+    fs, root_path = _get_filesystem_and_path(filesystem, root_path)
 
     _mkdir_if_not_exists(fs, root_path)
 
diff --git a/python/pyarrow/plasma.py b/python/pyarrow/plasma.py
index fbca9d55f1a0a..a6ab362536d00 100644
--- a/python/pyarrow/plasma.py
+++ b/python/pyarrow/plasma.py
@@ -39,7 +39,9 @@
 
 tf_plasma_op = None
 
-if os.path.exists(TF_PLASMA_OP_PATH):
+
+def load_plasma_tensorflow_op():
+    global tf_plasma_op
     import tensorflow as tf
     tf_plasma_op = tf.load_op_library(TF_PLASMA_OP_PATH)
 
@@ -76,7 +78,6 @@ def build_plasma_tensorflow_op():
 @contextlib.contextmanager
 def start_plasma_store(plasma_store_memory,
                        use_valgrind=False, use_profiler=False,
-                       use_one_memory_mapped_file=False,
                        plasma_directory=None, use_hugepages=False):
     """Start a plasma store process.
     Args:
@@ -85,8 +86,6 @@ def start_plasma_store(plasma_store_memory,
             of valgrind. If this is True, use_profiler must be False.
         use_profiler (bool): True if the plasma store should be started inside
             a profiler. If this is True, use_valgrind must be False.
-        use_one_memory_mapped_file: If True, then the store will use only a
-            single memory-mapped file.
         plasma_directory (str): Directory where plasma memory mapped files
             will be stored.
         use_hugepages (bool): True if the plasma store should use huge pages.
@@ -105,8 +104,6 @@ def start_plasma_store(plasma_store_memory,
         command = [plasma_store_executable,
                    "-s", plasma_store_name,
                    "-m", str(plasma_store_memory)]
-        if use_one_memory_mapped_file:
-            command += ["-f"]
         if plasma_directory:
             command += ["-d", plasma_directory]
         if use_hugepages:
diff --git a/python/pyarrow/public-api.pxi b/python/pyarrow/public-api.pxi
index e8798c5edbc7d..7bd9154dfa8d7 100644
--- a/python/pyarrow/public-api.pxi
+++ b/python/pyarrow/public-api.pxi
@@ -24,11 +24,11 @@ from pyarrow.includes.libarrow cimport (CArray, CColumn, CDataType, CField,
 # methods don't use Status to indicate a successful operation.
 
 
-cdef public api bint pyarrow_is_buffer(object buffer):
+cdef api bint pyarrow_is_buffer(object buffer):
     return isinstance(buffer, Buffer)
 
 
-cdef public api shared_ptr[CBuffer] pyarrow_unwrap_buffer(object buffer):
+cdef api shared_ptr[CBuffer] pyarrow_unwrap_buffer(object buffer):
     cdef Buffer buf
     if pyarrow_is_buffer(buffer):
         buf = <Buffer>(buffer)
@@ -37,24 +37,24 @@ cdef public api shared_ptr[CBuffer] pyarrow_unwrap_buffer(object buffer):
     return shared_ptr[CBuffer]()
 
 
-cdef public api object pyarrow_wrap_buffer(const shared_ptr[CBuffer]& buf):
+cdef api object pyarrow_wrap_buffer(const shared_ptr[CBuffer]& buf):
     cdef Buffer result = Buffer.__new__(Buffer)
     result.init(buf)
     return result
 
 
-cdef public api object pyarrow_wrap_resizable_buffer(
+cdef api object pyarrow_wrap_resizable_buffer(
         const shared_ptr[CResizableBuffer]& buf):
     cdef ResizableBuffer result = ResizableBuffer.__new__(ResizableBuffer)
     result.init_rz(buf)
     return result
 
 
-cdef public api bint pyarrow_is_data_type(object type_):
+cdef api bint pyarrow_is_data_type(object type_):
     return isinstance(type_, DataType)
 
 
-cdef public api shared_ptr[CDataType] pyarrow_unwrap_data_type(
+cdef api shared_ptr[CDataType] pyarrow_unwrap_data_type(
         object data_type):
     cdef DataType type_
     if pyarrow_is_data_type(data_type):
@@ -64,7 +64,7 @@ cdef public api shared_ptr[CDataType] pyarrow_unwrap_data_type(
     return shared_ptr[CDataType]()
 
 
-cdef public api object pyarrow_wrap_data_type(
+cdef api object pyarrow_wrap_data_type(
         const shared_ptr[CDataType]& type):
     cdef DataType out
 
@@ -92,11 +92,36 @@ cdef public api object pyarrow_wrap_data_type(
     return out
 
 
-cdef public api bint pyarrow_is_field(object field):
+cdef object pyarrow_wrap_metadata(
+        const shared_ptr[const CKeyValueMetadata]& meta):
+    cdef const CKeyValueMetadata* cmeta = meta.get()
+
+    if cmeta == nullptr:
+        return None
+
+    result = OrderedDict()
+    for i in range(cmeta.size()):
+        result[cmeta.key(i)] = cmeta.value(i)
+
+    return result
+
+
+cdef shared_ptr[CKeyValueMetadata] pyarrow_unwrap_metadata(object meta):
+    cdef vector[c_string] keys, values
+
+    if isinstance(meta, dict):
+        keys = map(tobytes, meta.keys())
+        values = map(tobytes, meta.values())
+        return make_shared[CKeyValueMetadata](keys, values)
+
+    return shared_ptr[CKeyValueMetadata]()
+
+
+cdef api bint pyarrow_is_field(object field):
     return isinstance(field, Field)
 
 
-cdef public api shared_ptr[CField] pyarrow_unwrap_field(object field):
+cdef api shared_ptr[CField] pyarrow_unwrap_field(object field):
     cdef Field field_
     if pyarrow_is_field(field):
         field_ = <Field>(field)
@@ -105,7 +130,7 @@ cdef public api shared_ptr[CField] pyarrow_unwrap_field(object field):
     return shared_ptr[CField]()
 
 
-cdef public api object pyarrow_wrap_field(const shared_ptr[CField]& field):
+cdef api object pyarrow_wrap_field(const shared_ptr[CField]& field):
     if field.get() == NULL:
         return None
     cdef Field out = Field.__new__(Field)
@@ -113,11 +138,11 @@ cdef public api object pyarrow_wrap_field(const shared_ptr[CField]& field):
     return out
 
 
-cdef public api bint pyarrow_is_schema(object schema):
+cdef api bint pyarrow_is_schema(object schema):
     return isinstance(schema, Schema)
 
 
-cdef public api shared_ptr[CSchema] pyarrow_unwrap_schema(object schema):
+cdef api shared_ptr[CSchema] pyarrow_unwrap_schema(object schema):
     cdef Schema sch
     if pyarrow_is_schema(schema):
         sch = <Schema>(schema)
@@ -126,17 +151,17 @@ cdef public api shared_ptr[CSchema] pyarrow_unwrap_schema(object schema):
     return shared_ptr[CSchema]()
 
 
-cdef public api object pyarrow_wrap_schema(const shared_ptr[CSchema]& schema):
+cdef api object pyarrow_wrap_schema(const shared_ptr[CSchema]& schema):
     cdef Schema out = Schema.__new__(Schema)
     out.init_schema(schema)
     return out
 
 
-cdef public api bint pyarrow_is_array(object array):
+cdef api bint pyarrow_is_array(object array):
     return isinstance(array, Array)
 
 
-cdef public api shared_ptr[CArray] pyarrow_unwrap_array(object array):
+cdef api shared_ptr[CArray] pyarrow_unwrap_array(object array):
     cdef Array arr
     if pyarrow_is_array(array):
         arr = <Array>(array)
@@ -145,7 +170,7 @@ cdef public api shared_ptr[CArray] pyarrow_unwrap_array(object array):
     return shared_ptr[CArray]()
 
 
-cdef public api object pyarrow_wrap_array(const shared_ptr[CArray]& sp_array):
+cdef api object pyarrow_wrap_array(const shared_ptr[CArray]& sp_array):
     if sp_array.get() == NULL:
         raise ValueError('Array was NULL')
 
@@ -161,7 +186,7 @@ cdef public api object pyarrow_wrap_array(const shared_ptr[CArray]& sp_array):
     return arr
 
 
-cdef public api object pyarrow_wrap_chunked_array(
+cdef api object pyarrow_wrap_chunked_array(
         const shared_ptr[CChunkedArray]& sp_array):
     if sp_array.get() == NULL:
         raise ValueError('ChunkedArray was NULL')
@@ -176,11 +201,11 @@ cdef public api object pyarrow_wrap_chunked_array(
     return arr
 
 
-cdef public api bint pyarrow_is_tensor(object tensor):
+cdef api bint pyarrow_is_tensor(object tensor):
     return isinstance(tensor, Tensor)
 
 
-cdef public api shared_ptr[CTensor] pyarrow_unwrap_tensor(object tensor):
+cdef api shared_ptr[CTensor] pyarrow_unwrap_tensor(object tensor):
     cdef Tensor ten
     if pyarrow_is_tensor(tensor):
         ten = <Tensor>(tensor)
@@ -189,7 +214,7 @@ cdef public api shared_ptr[CTensor] pyarrow_unwrap_tensor(object tensor):
     return shared_ptr[CTensor]()
 
 
-cdef public api object pyarrow_wrap_tensor(
+cdef api object pyarrow_wrap_tensor(
         const shared_ptr[CTensor]& sp_tensor):
     if sp_tensor.get() == NULL:
         raise ValueError('Tensor was NULL')
@@ -199,11 +224,11 @@ cdef public api object pyarrow_wrap_tensor(
     return tensor
 
 
-cdef public api bint pyarrow_is_column(object column):
+cdef api bint pyarrow_is_column(object column):
     return isinstance(column, Column)
 
 
-cdef public api shared_ptr[CColumn] pyarrow_unwrap_column(object column):
+cdef api shared_ptr[CColumn] pyarrow_unwrap_column(object column):
     cdef Column col
     if pyarrow_is_column(column):
         col = <Column>(column)
@@ -212,17 +237,17 @@ cdef public api shared_ptr[CColumn] pyarrow_unwrap_column(object column):
     return shared_ptr[CColumn]()
 
 
-cdef public api object pyarrow_wrap_column(const shared_ptr[CColumn]& ccolumn):
+cdef api object pyarrow_wrap_column(const shared_ptr[CColumn]& ccolumn):
     cdef Column column = Column.__new__(Column)
     column.init(ccolumn)
     return column
 
 
-cdef public api bint pyarrow_is_table(object table):
+cdef api bint pyarrow_is_table(object table):
     return isinstance(table, Table)
 
 
-cdef public api shared_ptr[CTable] pyarrow_unwrap_table(object table):
+cdef api shared_ptr[CTable] pyarrow_unwrap_table(object table):
     cdef Table tab
     if pyarrow_is_table(table):
         tab = <Table>(table)
@@ -231,17 +256,17 @@ cdef public api shared_ptr[CTable] pyarrow_unwrap_table(object table):
     return shared_ptr[CTable]()
 
 
-cdef public api object pyarrow_wrap_table(const shared_ptr[CTable]& ctable):
+cdef api object pyarrow_wrap_table(const shared_ptr[CTable]& ctable):
     cdef Table table = Table.__new__(Table)
     table.init(ctable)
     return table
 
 
-cdef public api bint pyarrow_is_batch(object batch):
+cdef api bint pyarrow_is_batch(object batch):
     return isinstance(batch, RecordBatch)
 
 
-cdef public api shared_ptr[CRecordBatch] pyarrow_unwrap_batch(object batch):
+cdef api shared_ptr[CRecordBatch] pyarrow_unwrap_batch(object batch):
     cdef RecordBatch bat
     if pyarrow_is_batch(batch):
         bat = <RecordBatch>(batch)
@@ -250,7 +275,7 @@ cdef public api shared_ptr[CRecordBatch] pyarrow_unwrap_batch(object batch):
     return shared_ptr[CRecordBatch]()
 
 
-cdef public api object pyarrow_wrap_batch(
+cdef api object pyarrow_wrap_batch(
         const shared_ptr[CRecordBatch]& cbatch):
     cdef RecordBatch batch = RecordBatch.__new__(RecordBatch)
     batch.init(cbatch)
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index a2a133beb43f6..e2c1481797df6 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -19,10 +19,17 @@
 _NULL = NA = None
 
 
+cdef class Scalar:
+    """
+    The base class for all array elements.
+    """
+
+
 cdef class NullType(Scalar):
     """
-    Null (NA) value singleton
+    Singleton for null array elements.
     """
+    # TODO rename this NullValue?
     def __cinit__(self):
         global NA
         if NA is not None:
@@ -44,6 +51,9 @@ _NULL = NA = NullType()
 
 
 cdef class ArrayValue(Scalar):
+    """
+    The base class for non-null array elements.
+    """
 
     def __init__(self):
         raise TypeError("Do not call {}'s constructor directly, use array "
@@ -85,6 +95,9 @@ cdef class ArrayValue(Scalar):
 
 
 cdef class BooleanValue(ArrayValue):
+    """
+    Concrete class for boolean array elements.
+    """
 
     def as_py(self):
         """
@@ -95,6 +108,9 @@ cdef class BooleanValue(ArrayValue):
 
 
 cdef class Int8Value(ArrayValue):
+    """
+    Concrete class for int8 array elements.
+    """
 
     def as_py(self):
         """
@@ -105,6 +121,9 @@ cdef class Int8Value(ArrayValue):
 
 
 cdef class UInt8Value(ArrayValue):
+    """
+    Concrete class for uint8 array elements.
+    """
 
     def as_py(self):
         """
@@ -115,6 +134,9 @@ cdef class UInt8Value(ArrayValue):
 
 
 cdef class Int16Value(ArrayValue):
+    """
+    Concrete class for int16 array elements.
+    """
 
     def as_py(self):
         """
@@ -125,6 +147,9 @@ cdef class Int16Value(ArrayValue):
 
 
 cdef class UInt16Value(ArrayValue):
+    """
+    Concrete class for uint16 array elements.
+    """
 
     def as_py(self):
         """
@@ -135,6 +160,9 @@ cdef class UInt16Value(ArrayValue):
 
 
 cdef class Int32Value(ArrayValue):
+    """
+    Concrete class for int32 array elements.
+    """
 
     def as_py(self):
         """
@@ -145,6 +173,9 @@ cdef class Int32Value(ArrayValue):
 
 
 cdef class UInt32Value(ArrayValue):
+    """
+    Concrete class for uint32 array elements.
+    """
 
     def as_py(self):
         """
@@ -155,6 +186,9 @@ cdef class UInt32Value(ArrayValue):
 
 
 cdef class Int64Value(ArrayValue):
+    """
+    Concrete class for int64 array elements.
+    """
 
     def as_py(self):
         """
@@ -165,6 +199,9 @@ cdef class Int64Value(ArrayValue):
 
 
 cdef class UInt64Value(ArrayValue):
+    """
+    Concrete class for uint64 array elements.
+    """
 
     def as_py(self):
         """
@@ -175,6 +212,9 @@ cdef class UInt64Value(ArrayValue):
 
 
 cdef class Date32Value(ArrayValue):
+    """
+    Concrete class for date32 array elements.
+    """
 
     def as_py(self):
         """
@@ -188,6 +228,9 @@ cdef class Date32Value(ArrayValue):
 
 
 cdef class Date64Value(ArrayValue):
+    """
+    Concrete class for date64 array elements.
+    """
 
     def as_py(self):
         """
@@ -199,6 +242,9 @@ cdef class Date64Value(ArrayValue):
 
 
 cdef class Time32Value(ArrayValue):
+    """
+    Concrete class for time32 array elements.
+    """
 
     def as_py(self):
         """
@@ -217,6 +263,9 @@ cdef class Time32Value(ArrayValue):
 
 
 cdef class Time64Value(ArrayValue):
+    """
+    Concrete class for time64 array elements.
+    """
 
     def as_py(self):
         """
@@ -269,6 +318,9 @@ else:
 
 
 cdef class TimestampValue(ArrayValue):
+    """
+    Concrete class for timestamp array elements.
+    """
 
     @property
     def value(self):
@@ -301,6 +353,9 @@ cdef class TimestampValue(ArrayValue):
 
 
 cdef class HalfFloatValue(ArrayValue):
+    """
+    Concrete class for float16 array elements.
+    """
 
     def as_py(self):
         """
@@ -311,6 +366,9 @@ cdef class HalfFloatValue(ArrayValue):
 
 
 cdef class FloatValue(ArrayValue):
+    """
+    Concrete class for float32 array elements.
+    """
 
     def as_py(self):
         """
@@ -321,6 +379,9 @@ cdef class FloatValue(ArrayValue):
 
 
 cdef class DoubleValue(ArrayValue):
+    """
+    Concrete class for float64 array elements.
+    """
 
     def as_py(self):
         """
@@ -331,6 +392,9 @@ cdef class DoubleValue(ArrayValue):
 
 
 cdef class DecimalValue(ArrayValue):
+    """
+    Concrete class for decimal128 array elements.
+    """
 
     def as_py(self):
         """
@@ -343,6 +407,9 @@ cdef class DecimalValue(ArrayValue):
 
 
 cdef class StringValue(ArrayValue):
+    """
+    Concrete class for string (utf8) array elements.
+    """
 
     def as_py(self):
         """
@@ -353,6 +420,9 @@ cdef class StringValue(ArrayValue):
 
 
 cdef class BinaryValue(ArrayValue):
+    """
+    Concrete class for variable-sized binary array elements.
+    """
 
     def as_py(self):
         """
@@ -380,14 +450,26 @@ cdef class BinaryValue(ArrayValue):
 
 
 cdef class ListValue(ArrayValue):
+    """
+    Concrete class for list array elements.
+    """
 
     def __len__(self):
+        """
+        Return the number of values.
+        """
         return self.length()
 
     def __getitem__(self, i):
+        """
+        Return the value at the given index.
+        """
         return self.getitem(_normalize_index(i, self.length()))
 
     def __iter__(self):
+        """
+        Iterate over this element's values.
+        """
         for i in range(len(self)):
             yield self.getitem(i)
         raise StopIteration
@@ -419,6 +501,9 @@ cdef class ListValue(ArrayValue):
 
 
 cdef class UnionValue(ArrayValue):
+    """
+    Concrete class for union array elements.
+    """
 
     cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
         self.sp_array = sp_array
@@ -436,11 +521,16 @@ cdef class UnionValue(ArrayValue):
     def as_py(self):
         """
         Return this value as a Python object.
+
+        The exact type depends on the underlying union member.
         """
         return self.getitem(self.index).as_py()
 
 
 cdef class FixedSizeBinaryValue(ArrayValue):
+    """
+    Concrete class for fixed-size binary array elements.
+    """
 
     def as_py(self):
         """
@@ -459,18 +549,24 @@ cdef class FixedSizeBinaryValue(ArrayValue):
 
 
 cdef class StructValue(ArrayValue):
+    """
+    Concrete class for struct array elements.
+    """
 
     cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
         self.sp_array = sp_array
         self.ap = <CStructArray*> sp_array.get()
 
     def __getitem__(self, key):
+        """
+        Return the child value for the given field name.
+        """
         cdef:
             CStructType* type
             int index
 
         type = <CStructType*> self.type.type
-        index = type.GetChildIndex(tobytes(key))
+        index = type.GetFieldIndex(tobytes(key))
 
         if index < 0:
             raise KeyError(key)
@@ -496,17 +592,23 @@ cdef class StructValue(ArrayValue):
 
 
 cdef class DictionaryValue(ArrayValue):
+    """
+    Concrete class for dictionary-encoded array elements.
+    """
 
     def as_py(self):
         """
         Return this value as a Python object.
+
+        The exact type depends on the dictionary value type.
         """
         return self.dictionary_value.as_py()
 
     @property
     def index_value(self):
         """
-        Return this value's underlying index as a Int32Value.
+        Return this value's underlying index as a ArrayValue of the right
+        signed integer type.
         """
         cdef CDictionaryArray* darr = <CDictionaryArray*>(self.sp_array.get())
         indices = pyarrow_wrap_array(darr.indices())
diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py
index 22f7c0cb52ab8..96785deb75674 100644
--- a/python/pyarrow/serialization.py
+++ b/python/pyarrow/serialization.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from collections import OrderedDict, defaultdict
+import collections
 import six
 import sys
 
@@ -174,6 +174,28 @@ def _deserialize_pandas_series(data):
         custom_serializer=_pickle_to_buffer,
         custom_deserializer=_load_pickle_from_buffer)
 
+    if hasattr(pd.core, 'arrays'):
+        if hasattr(pd.core.arrays, 'interval'):
+            context.register_type(
+                pd.core.arrays.interval.IntervalArray,
+                'pd.core.arrays.interval.IntervalArray',
+                custom_serializer=_pickle_to_buffer,
+                custom_deserializer=_load_pickle_from_buffer)
+
+        if hasattr(pd.core.arrays, 'period'):
+            context.register_type(
+                pd.core.arrays.period.PeriodArray,
+                'pd.core.arrays.period.PeriodArray',
+                custom_serializer=_pickle_to_buffer,
+                custom_deserializer=_load_pickle_from_buffer)
+
+        if hasattr(pd.core.arrays, 'datetimes'):
+            context.register_type(
+                pd.core.arrays.datetimes.DatetimeArray,
+                'pd.core.arrays.datetimes.DatetimeArray',
+                custom_serializer=_pickle_to_buffer,
+                custom_deserializer=_load_pickle_from_buffer)
+
     context.register_type(
         pd.DataFrame, 'pd.DataFrame',
         custom_serializer=_serialize_pandas_dataframe,
@@ -205,33 +227,26 @@ def _deserialize_torch_tensor(data):
         pass
 
 
-def register_default_serialization_handlers(serialization_context):
+def _register_collections_serialization_handlers(serialization_context):
+    def _serialize_deque(obj):
+        return list(obj)
 
-    # ----------------------------------------------------------------------
-    # Set up serialization for primitive datatypes
+    def _deserialize_deque(data):
+        return collections.deque(data)
 
-    # TODO(pcm): This is currently a workaround until arrow supports
-    # arbitrary precision integers. This is only called on long integers,
-    # see the associated case in the append method in python_to_arrow.cc
     serialization_context.register_type(
-        int, "int",
-        custom_serializer=lambda obj: str(obj),
-        custom_deserializer=lambda data: int(data))
-
-    if (sys.version_info < (3, 0)):
-        serialization_context.register_type(
-            long, "long",  # noqa: F821
-            custom_serializer=lambda obj: str(obj),
-            custom_deserializer=lambda data: long(data))  # noqa: F821
+        collections.deque, "collections.deque",
+        custom_serializer=_serialize_deque,
+        custom_deserializer=_deserialize_deque)
 
     def _serialize_ordered_dict(obj):
         return list(obj.keys()), list(obj.values())
 
     def _deserialize_ordered_dict(data):
-        return OrderedDict(zip(data[0], data[1]))
+        return collections.OrderedDict(zip(data[0], data[1]))
 
     serialization_context.register_type(
-        OrderedDict, "OrderedDict",
+        collections.OrderedDict, "collections.OrderedDict",
         custom_serializer=_serialize_ordered_dict,
         custom_deserializer=_deserialize_ordered_dict)
 
@@ -239,13 +254,44 @@ def _serialize_default_dict(obj):
         return list(obj.keys()), list(obj.values()), obj.default_factory
 
     def _deserialize_default_dict(data):
-        return defaultdict(data[2], zip(data[0], data[1]))
+        return collections.defaultdict(data[2], zip(data[0], data[1]))
 
     serialization_context.register_type(
-        defaultdict, "defaultdict",
+        collections.defaultdict, "collections.defaultdict",
         custom_serializer=_serialize_default_dict,
         custom_deserializer=_deserialize_default_dict)
 
+    def _serialize_counter(obj):
+        return list(obj.keys()), list(obj.values())
+
+    def _deserialize_counter(data):
+        return collections.Counter(dict(zip(data[0], data[1])))
+
+    serialization_context.register_type(
+        collections.Counter, "collections.Counter",
+        custom_serializer=_serialize_counter,
+        custom_deserializer=_deserialize_counter)
+
+
+def register_default_serialization_handlers(serialization_context):
+
+    # ----------------------------------------------------------------------
+    # Set up serialization for primitive datatypes
+
+    # TODO(pcm): This is currently a workaround until arrow supports
+    # arbitrary precision integers. This is only called on long integers,
+    # see the associated case in the append method in python_to_arrow.cc
+    serialization_context.register_type(
+        int, "int",
+        custom_serializer=lambda obj: str(obj),
+        custom_deserializer=lambda data: int(data))
+
+    if (sys.version_info < (3, 0)):
+        serialization_context.register_type(
+            long, "long",  # noqa: F821
+            custom_serializer=lambda obj: str(obj),
+            custom_deserializer=lambda data: long(data))  # noqa: F821
+
     serialization_context.register_type(
         type(lambda: 0), "function",
         pickle=True)
@@ -272,6 +318,7 @@ def _deserialize_default_dict(data):
         custom_serializer=_serialize_pyarrow_table,
         custom_deserializer=_deserialize_pyarrow_table)
 
+    _register_collections_serialization_handlers(serialization_context)
     _register_custom_pandas_handlers(serialization_context)
 
 
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 0d529d3787614..59680ed87aa38 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -28,7 +28,7 @@ else:
     import pyarrow.pandas_compat as pdcompat
 
 
-cdef class ChunkedArray:
+cdef class ChunkedArray(_PandasConvertible):
     """
     Array backed via one or more memory chunks.
 
@@ -117,6 +117,12 @@ cdef class ChunkedArray:
             else:
                 index -= self.chunked_array.chunk(j).get().length()
 
+    def __eq__(self, other):
+        try:
+            return self.equals(other)
+        except TypeError:
+            return NotImplemented
+
     def equals(self, ChunkedArray other):
         """
         Return whether the contents of two chunked arrays are equal
@@ -139,43 +145,14 @@ cdef class ChunkedArray:
 
         return result
 
-    def to_pandas(self, bint strings_to_categorical=False,
-                  bint zero_copy_only=False, bint integer_object_nulls=False,
-                  bint date_as_object=False):
-        """
-        Convert the arrow::ChunkedArray to an array object suitable for use
-        in pandas
-
-        Parameters
-        ----------
-        strings_to_categorical : boolean, default False
-            Encode string (UTF8) and binary types to pandas.Categorical
-        zero_copy_only : boolean, default False
-            Raise an ArrowException if this function call would require copying
-            the underlying data
-        integer_object_nulls : boolean, default False
-            Cast integers with nulls to objects
-        date_as_object : boolean, default False
-            Cast dates to objects
-
-        See also
-        --------
-        Column.to_pandas
-        """
+    def _to_pandas(self, options, **kwargs):
         cdef:
             PyObject* out
-            PandasOptions options
-
-        options = PandasOptions(
-            strings_to_categorical=strings_to_categorical,
-            zero_copy_only=zero_copy_only,
-            integer_object_nulls=integer_object_nulls,
-            date_as_object=date_as_object,
-            use_threads=False)
+            PandasOptions c_options = options
 
         with nogil:
             check_status(libarrow.ConvertChunkedArrayToPandas(
-                options,
+                c_options,
                 self.sp_chunked_array,
                 self, &out))
 
@@ -379,7 +356,7 @@ def column(object field_or_name, arr):
     return pyarrow_wrap_column(sp_column)
 
 
-cdef class Column:
+cdef class Column(_PandasConvertible):
     """
     Named vector of elements of equal type.
 
@@ -411,14 +388,6 @@ cdef class Column:
 
         return result.getvalue()
 
-    def __richcmp__(Column self, Column other, int op):
-        if op == cp.Py_EQ:
-            return self.equals(other)
-        elif op == cp.Py_NE:
-            return not self.equals(other)
-        else:
-            raise TypeError('Invalid comparison')
-
     def __getitem__(self, key):
         return self.data[key]
 
@@ -499,33 +468,8 @@ cdef class Column:
 
         return [pyarrow_wrap_column(col) for col in flattened]
 
-    def to_pandas(self, bint strings_to_categorical=False,
-                  bint zero_copy_only=False, bint integer_object_nulls=False,
-                  bint date_as_object=False):
-        """
-        Convert the arrow::Column to a pandas.Series
-
-        Parameters
-        ----------
-        strings_to_categorical : boolean, default False
-            Encode string (UTF8) and binary types to pandas.Categorical
-        zero_copy_only : boolean, default False
-            Raise an ArrowException if this function call would require copying
-            the underlying data
-        integer_object_nulls : boolean, default False
-            Cast integers with nulls to objects
-        date_as_object : boolean, default False
-            Cast dates to objects
-
-        Returns
-        -------
-        pandas.Series
-        """
-        values = self.data.to_pandas(
-            strings_to_categorical=strings_to_categorical,
-            zero_copy_only=zero_copy_only,
-            date_as_object=date_as_object,
-            integer_object_nulls=integer_object_nulls)
+    def _to_pandas(self, options, **kwargs):
+        values = self.data._to_pandas(options)
         result = pd.Series(values, name=self.name)
 
         if isinstance(self.type, TimestampType):
@@ -540,6 +484,12 @@ cdef class Column:
     def __array__(self, dtype=None):
         return self.data.__array__(dtype=dtype)
 
+    def __eq__(self, other):
+        try:
+            return self.equals(other)
+        except TypeError:
+            return NotImplemented
+
     def equals(self, Column other):
         """
         Check if contents of two columns are equal
@@ -634,36 +584,30 @@ cdef class Column:
         return pyarrow_wrap_chunked_array(self.column.data())
 
 
-cdef shared_ptr[const CKeyValueMetadata] unbox_metadata(dict metadata):
-    if metadata is None:
-        return <shared_ptr[const CKeyValueMetadata]> nullptr
-    cdef:
-        unordered_map[c_string, c_string] unordered_metadata = metadata
-    return (<shared_ptr[const CKeyValueMetadata]>
-            make_shared[CKeyValueMetadata](unordered_metadata))
-
-
-cdef _schema_from_arrays(arrays, names, dict metadata,
-                         shared_ptr[CSchema]* schema):
+cdef _schema_from_arrays(arrays, names, metadata, shared_ptr[CSchema]* schema):
     cdef:
-        Column col
-        c_string c_name
-        vector[shared_ptr[CField]] fields
-        shared_ptr[CDataType] type_
         Py_ssize_t K = len(arrays)
+        c_string c_name
+        CColumn* c_column
+        shared_ptr[CDataType] c_type
+        shared_ptr[CKeyValueMetadata] c_meta
+        vector[shared_ptr[CField]] c_fields
+
+    if metadata is not None:
+        if not isinstance(metadata, dict):
+            raise TypeError('Metadata must be an instance of dict')
+        c_meta = pyarrow_unwrap_metadata(metadata)
 
     if K == 0:
-        schema.reset(new CSchema(fields, unbox_metadata(metadata)))
+        schema.reset(new CSchema(c_fields, c_meta))
         return
 
-    fields.resize(K)
+    c_fields.resize(K)
 
     if isinstance(arrays[0], Column):
         for i in range(K):
-            col = arrays[i]
-            type_ = col.sp_column.get().type()
-            c_name = tobytes(col.name)
-            fields[i].reset(new CField(c_name, type_, True))
+            c_column = (<Column>arrays[i]).column
+            c_fields[i] = c_column.field()
     else:
         if names is None:
             raise ValueError('Must pass names when constructing '
@@ -674,7 +618,7 @@ cdef _schema_from_arrays(arrays, names, dict metadata,
         for i in range(K):
             val = arrays[i]
             if isinstance(val, (Array, ChunkedArray)):
-                type_ = (<DataType> val.type).sp_type
+                c_type = (<DataType> val.type).sp_type
             else:
                 raise TypeError(type(val))
 
@@ -682,12 +626,12 @@ cdef _schema_from_arrays(arrays, names, dict metadata,
                 c_name = tobytes(u'None')
             else:
                 c_name = tobytes(names[i])
-            fields[i].reset(new CField(c_name, type_, True))
+            c_fields[i].reset(new CField(c_name, c_type, True))
 
-    schema.reset(new CSchema(fields, unbox_metadata(metadata)))
+    schema.reset(new CSchema(c_fields, c_meta))
 
 
-cdef class RecordBatch:
+cdef class RecordBatch(_PandasConvertible):
     """
     Batch of rows of columns of equal length
 
@@ -715,7 +659,7 @@ cdef class RecordBatch:
     def __len__(self):
         return self.batch.num_rows()
 
-    def replace_schema_metadata(self, dict metadata=None):
+    def replace_schema_metadata(self, metadata=None):
         """
         EXPERIMENTAL: Create shallow copy of record batch by replacing schema
         key-value metadata with the indicated new metadata (which may be None,
@@ -729,15 +673,19 @@ cdef class RecordBatch:
         -------
         shallow_copy : RecordBatch
         """
-        cdef shared_ptr[CKeyValueMetadata] c_meta
+        cdef:
+            shared_ptr[CKeyValueMetadata] c_meta
+            shared_ptr[CRecordBatch] c_batch
+
         if metadata is not None:
-            convert_metadata(metadata, &c_meta)
+            if not isinstance(metadata, dict):
+                raise TypeError('Metadata must be an instance of dict')
+            c_meta = pyarrow_unwrap_metadata(metadata)
 
-        cdef shared_ptr[CRecordBatch] new_batch
         with nogil:
-            new_batch = self.batch.ReplaceSchemaMetadata(c_meta)
+            c_batch = self.batch.ReplaceSchemaMetadata(c_meta)
 
-        return pyarrow_wrap_batch(new_batch)
+        return pyarrow_wrap_batch(c_batch)
 
     @property
     def num_columns(self):
@@ -885,42 +833,8 @@ cdef class RecordBatch:
             entries.append((name, column))
         return OrderedDict(entries)
 
-    def to_pandas(self, MemoryPool memory_pool=None, categories=None,
-                  bint strings_to_categorical=False, bint zero_copy_only=False,
-                  bint integer_object_nulls=False, bint date_as_object=False,
-                  bint use_threads=True):
-        """
-        Convert the arrow::RecordBatch to a pandas DataFrame
-
-        Parameters
-        ----------
-        memory_pool: MemoryPool, optional
-            Specific memory pool to use to allocate casted columns
-        categories: list, default empty
-            List of columns that should be returned as pandas.Categorical
-        strings_to_categorical : boolean, default False
-            Encode string (UTF8) and binary types to pandas.Categorical
-        zero_copy_only : boolean, default False
-            Raise an ArrowException if this function call would require copying
-            the underlying data
-        integer_object_nulls : boolean, default False
-            Cast integers with nulls to objects
-        date_as_object : boolean, default False
-            Cast dates to objects
-        use_threads: boolean, default True
-            Whether to parallelize the conversion using multiple threads
-
-        Returns
-        -------
-        pandas.DataFrame
-        """
-        return Table.from_batches([self]).to_pandas(
-            memory_pool=memory_pool, categories=categories,
-            strings_to_categorical=strings_to_categorical,
-            zero_copy_only=zero_copy_only,
-            integer_object_nulls=integer_object_nulls,
-            date_as_object=date_as_object, use_threads=use_threads
-        )
+    def _to_pandas(self, options, **kwargs):
+        return Table.from_batches([self])._to_pandas(options, **kwargs)
 
     @classmethod
     def from_pandas(cls, df, Schema schema=None, bint preserve_index=True,
@@ -953,7 +867,7 @@ cdef class RecordBatch:
         return cls.from_arrays(arrays, names, metadata)
 
     @staticmethod
-    def from_arrays(list arrays, names, dict metadata=None):
+    def from_arrays(list arrays, names, metadata=None):
         """
         Construct a RecordBatch from multiple pyarrow.Arrays
 
@@ -1025,7 +939,7 @@ def table_to_blocks(PandasOptions options, Table table,
     return PyObject_to_object(result_obj)
 
 
-cdef class Table:
+cdef class Table(_PandasConvertible):
     """
     A collection of top-level named, equal length Arrow arrays.
 
@@ -1062,7 +976,7 @@ cdef class Table:
         columns = [col.data for col in self.columns]
         return _reconstruct_table, (columns, self.schema)
 
-    def replace_schema_metadata(self, dict metadata=None):
+    def replace_schema_metadata(self, metadata=None):
         """
         EXPERIMENTAL: Create shallow copy of table by replacing schema
         key-value metadata with the indicated new metadata (which may be None,
@@ -1076,15 +990,19 @@ cdef class Table:
         -------
         shallow_copy : Table
         """
-        cdef shared_ptr[CKeyValueMetadata] c_meta
+        cdef:
+            shared_ptr[CKeyValueMetadata] c_meta
+            shared_ptr[CTable] c_table
+
         if metadata is not None:
-            convert_metadata(metadata, &c_meta)
+            if not isinstance(metadata, dict):
+                raise TypeError('Metadata must be an instance of dict')
+            c_meta = pyarrow_unwrap_metadata(metadata)
 
-        cdef shared_ptr[CTable] new_table
         with nogil:
-            new_table = self.table.ReplaceSchemaMetadata(c_meta)
+            c_table = self.table.ReplaceSchemaMetadata(c_meta)
 
-        return pyarrow_wrap_table(new_table)
+        return pyarrow_wrap_table(c_table)
 
     def flatten(self, MemoryPool memory_pool=None):
         """
@@ -1109,6 +1027,12 @@ cdef class Table:
 
         return pyarrow_wrap_table(flattened)
 
+    def __eq__(self, other):
+        try:
+            return self.equals(other)
+        except TypeError:
+            return NotImplemented
+
     def equals(self, Table other):
         """
         Check if contents of two tables are equal
@@ -1225,7 +1149,7 @@ cdef class Table:
         return cls.from_arrays(arrays, names=names, metadata=metadata)
 
     @staticmethod
-    def from_arrays(arrays, names=None, schema=None, dict metadata=None):
+    def from_arrays(arrays, names=None, schema=None, metadata=None):
         """
         Construct a Table from Arrow arrays or columns
 
@@ -1236,6 +1160,8 @@ cdef class Table:
         names: list of str, optional
             Names for the table columns. If Columns passed, will be
             inferred. If Arrays passed, this argument is required
+        schema : Schema, default None
+            If not passed, will be inferred from the arrays
 
         Returns
         -------
@@ -1368,47 +1294,9 @@ cdef class Table:
 
         return result
 
-    def to_pandas(self, MemoryPool memory_pool=None, categories=None,
-                  bint strings_to_categorical=False, bint zero_copy_only=False,
-                  bint integer_object_nulls=False, bint date_as_object=False,
-                  bint use_threads=True):
-        """
-        Convert the arrow::Table to a pandas DataFrame
-
-        Parameters
-        ----------
-        memory_pool: MemoryPool, optional
-            Specific memory pool to use to allocate casted columns
-        categories: list, default empty
-            List of columns that should be returned as pandas.Categorical
-        strings_to_categorical : boolean, default False
-            Encode string (UTF8) and binary types to pandas.Categorical
-        zero_copy_only : boolean, default False
-            Raise an ArrowException if this function call would require copying
-            the underlying data
-        integer_object_nulls : boolean, default False
-            Cast integers with nulls to objects
-        date_as_object : boolean, default False
-            Cast dates to objects
-        use_threads: boolean, default True
-            Whether to parallelize the conversion using multiple threads
-
-        Returns
-        -------
-        pandas.DataFrame
-        """
-        cdef:
-            PandasOptions options
-
-        options = PandasOptions(
-            strings_to_categorical=strings_to_categorical,
-            zero_copy_only=zero_copy_only,
-            integer_object_nulls=integer_object_nulls,
-            date_as_object=date_as_object,
-            use_threads=use_threads)
-
-        mgr = pdcompat.table_to_blockmanager(options, self, memory_pool,
-                                             categories)
+    def _to_pandas(self, options, categories=None, ignore_metadata=False):
+        mgr = pdcompat.table_to_blockmanager(options, self, categories,
+                                             ignore_metadata=ignore_metadata)
         return pd.DataFrame(mgr)
 
     def to_pydict(self):
diff --git a/python/pyarrow/tensorflow/plasma_op.cc b/python/pyarrow/tensorflow/plasma_op.cc
index a341d5a53988f..bf4eec7891235 100644
--- a/python/pyarrow/tensorflow/plasma_op.cc
+++ b/python/pyarrow/tensorflow/plasma_op.cc
@@ -33,6 +33,7 @@
 #include "arrow/adapters/tensorflow/convert.h"
 #include "arrow/api.h"
 #include "arrow/io/api.h"
+#include "arrow/util/logging.h"
 
 // These headers do not include Python.h
 #include "arrow/python/deserialize.h"
@@ -71,13 +72,10 @@ class TensorToPlasmaOp : public tf::AsyncOpKernel {
   explicit TensorToPlasmaOp(tf::OpKernelConstruction* context) : tf::AsyncOpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("plasma_store_socket_name",
                                              &plasma_store_socket_name_));
-    OP_REQUIRES_OK(context, context->GetAttr("plasma_manager_socket_name",
-                                             &plasma_manager_socket_name_));
     tf::mutex_lock lock(mu_);
     if (!connected_) {
       VLOG(1) << "Connecting to Plasma...";
-      ARROW_CHECK_OK(client_.Connect(plasma_store_socket_name_,
-                                     plasma_manager_socket_name_, 0));
+      ARROW_CHECK_OK(client_.Connect(plasma_store_socket_name_));
       VLOG(1) << "Connected!";
       connected_ = true;
     }
@@ -226,7 +224,6 @@ class TensorToPlasmaOp : public tf::AsyncOpKernel {
 
  private:
   std::string plasma_store_socket_name_;
-  std::string plasma_manager_socket_name_;
 
   tf::mutex mu_;
   bool connected_ = false;
@@ -243,13 +240,10 @@ class PlasmaToTensorOp : public tf::AsyncOpKernel {
   explicit PlasmaToTensorOp(tf::OpKernelConstruction* context) : tf::AsyncOpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("plasma_store_socket_name",
                                              &plasma_store_socket_name_));
-    OP_REQUIRES_OK(context, context->GetAttr("plasma_manager_socket_name",
-                                             &plasma_manager_socket_name_));
     tf::mutex_lock lock(mu_);
     if (!connected_) {
       VLOG(1) << "Connecting to Plasma...";
-      ARROW_CHECK_OK(client_.Connect(plasma_store_socket_name_,
-                                     plasma_manager_socket_name_, 0));
+      ARROW_CHECK_OK(client_.Connect(plasma_store_socket_name_));
       VLOG(1) << "Connected!";
       connected_ = true;
     }
@@ -364,7 +358,6 @@ class PlasmaToTensorOp : public tf::AsyncOpKernel {
 
  private:
   std::string plasma_store_socket_name_;
-  std::string plasma_manager_socket_name_;
 
   tf::mutex mu_;
   bool connected_ = false;
@@ -375,8 +368,7 @@ REGISTER_OP("TensorToPlasma")
     .Input("input_tensor: dtypes")
     .Input("plasma_object_id: string")
     .Attr("dtypes: list(type)")
-    .Attr("plasma_store_socket_name: string")
-    .Attr("plasma_manager_socket_name: string");
+    .Attr("plasma_store_socket_name: string");
 
 REGISTER_KERNEL_BUILDER(Name("TensorToPlasma").Device(tf::DEVICE_CPU),
                         TensorToPlasmaOp<CPUDevice>);
@@ -389,8 +381,7 @@ REGISTER_OP("PlasmaToTensor")
     .Input("plasma_object_id: string")
     .Output("tensor: dtype")
     .Attr("dtype: type")
-    .Attr("plasma_store_socket_name: string")
-    .Attr("plasma_manager_socket_name: string");
+    .Attr("plasma_store_socket_name: string");
 
 REGISTER_KERNEL_BUILDER(Name("PlasmaToTensor").Device(tf::DEVICE_CPU),
                         PlasmaToTensorOp<CPUDevice>);
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index 6cdedbbb507cc..daaba59d4d35e 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -15,7 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import os
 import pytest
+import hypothesis as h
 
 try:
     import pathlib
@@ -23,7 +25,20 @@
     import pathlib2 as pathlib  # py2 compat
 
 
+# setup hypothesis profiles
+h.settings.register_profile('ci', max_examples=1000)
+h.settings.register_profile('dev', max_examples=10)
+h.settings.register_profile('debug', max_examples=10,
+                            verbosity=h.Verbosity.verbose)
+
+# load default hypothesis profile, either set HYPOTHESIS_PROFILE environment
+# variable or pass --hypothesis-profile option to pytest, to see the generated
+# examples try: pytest pyarrow -sv --only-hypothesis --hypothesis-profile=debug
+h.settings.load_profile(os.environ.get('HYPOTHESIS_PROFILE', 'default'))
+
+
 groups = [
+    'hypothesis',
     'gandiva',
     'hdfs',
     'large_memory',
@@ -36,6 +51,7 @@
 
 
 defaults = {
+    'hypothesis': False,
     'gandiva': False,
     'hdfs': False,
     'large_memory': False,
@@ -83,20 +99,36 @@ def pytest_configure(config):
 
 
 def pytest_addoption(parser):
-    for group in groups:
-        parser.addoption('--{0}'.format(group), action='store_true',
-                         default=defaults[group],
-                         help=('Enable the {0} test group'.format(group)))
+    def bool_env(name, default=None):
+        value = os.environ.get(name.upper())
+        if value is None:
+            return default
+        value = value.lower()
+        if value in {'1', 'true', 'on', 'yes', 'y'}:
+            return True
+        elif value in {'0', 'false', 'off', 'no', 'n'}:
+            return False
+        else:
+            raise ValueError('{}={} is not parsable as boolean'
+                             .format(name.upper(), value))
 
     for group in groups:
-        parser.addoption('--disable-{0}'.format(group), action='store_true',
-                         default=False,
-                         help=('Disable the {0} test group'.format(group)))
-
-    for group in groups:
-        parser.addoption('--only-{0}'.format(group), action='store_true',
-                         default=False,
-                         help=('Run only the {0} test group'.format(group)))
+        for flag, envvar in [('--{}', 'PYARROW_TEST_{}'),
+                             ('--enable-{}', 'PYARROW_TEST_ENABLE_{}')]:
+            default = bool_env(envvar.format(group), defaults[group])
+            parser.addoption(flag.format(group),
+                             action='store_true', default=default,
+                             help=('Enable the {} test group'.format(group)))
+
+        default = bool_env('PYARROW_TEST_DISABLE_{}'.format(group), False)
+        parser.addoption('--disable-{}'.format(group),
+                         action='store_true', default=default,
+                         help=('Disable the {} test group'.format(group)))
+
+        default = bool_env('PYARROW_TEST_ONLY_{}'.format(group), False)
+        parser.addoption('--only-{}'.format(group),
+                         action='store_true', default=default,
+                         help=('Run only the {} test group'.format(group)))
 
     parser.addoption('--runslow', action='store_true',
                      default=False, help='run slow tests')
@@ -114,24 +146,28 @@ def pytest_collection_modifyitems(config, items):
 def pytest_runtest_setup(item):
     only_set = False
 
+    item_marks = {mark.name: mark for mark in item.iter_markers()}
+
     for group in groups:
+        flag = '--{0}'.format(group)
         only_flag = '--only-{0}'.format(group)
+        enable_flag = '--enable-{0}'.format(group)
         disable_flag = '--disable-{0}'.format(group)
-        flag = '--{0}'.format(group)
 
         if item.config.getoption(only_flag):
             only_set = True
-        elif getattr(item.obj, group, None):
-            if (item.config.getoption(disable_flag) or
-                    not item.config.getoption(flag)):
+        elif group in item_marks:
+            is_enabled = (item.config.getoption(flag) or
+                          item.config.getoption(enable_flag))
+            is_disabled = item.config.getoption(disable_flag)
+            if is_disabled or not is_enabled:
                 pytest.skip('{0} NOT enabled'.format(flag))
 
     if only_set:
         skip_item = True
         for group in groups:
             only_flag = '--only-{0}'.format(group)
-            if (getattr(item.obj, group, False) and
-                    item.config.getoption(only_flag)):
+            if group in item_marks and item.config.getoption(only_flag):
                 skip_item = False
 
         if skip_item:
diff --git a/python/pyarrow/tests/pyarrow_cython_example.pyx b/python/pyarrow/tests/pyarrow_cython_example.pyx
index b5e54063df185..4a6f3ca5dea53 100644
--- a/python/pyarrow/tests/pyarrow_cython_example.pyx
+++ b/python/pyarrow/tests/pyarrow_cython_example.pyx
@@ -16,6 +16,7 @@
 # under the License.
 
 # distutils: language=c++
+# cython: language_level = 3
 
 from pyarrow.lib cimport *
 
diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py
new file mode 100644
index 0000000000000..c95b75b270e56
--- /dev/null
+++ b/python/pyarrow/tests/strategies.py
@@ -0,0 +1,130 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pyarrow as pa
+import hypothesis.strategies as st
+
+
+# TODO(kszucs): alphanum_text, surrogate_text
+custom_text = st.text(
+    alphabet=st.characters(
+        min_codepoint=0x41,
+        max_codepoint=0x7E
+    )
+)
+
+null_type = st.just(pa.null())
+bool_type = st.just(pa.bool_())
+
+binary_type = st.just(pa.binary())
+string_type = st.just(pa.string())
+
+signed_integer_types = st.sampled_from([
+    pa.int8(),
+    pa.int16(),
+    pa.int32(),
+    pa.int64()
+])
+unsigned_integer_types = st.sampled_from([
+    pa.uint8(),
+    pa.uint16(),
+    pa.uint32(),
+    pa.uint64()
+])
+integer_types = st.one_of(signed_integer_types, unsigned_integer_types)
+
+floating_types = st.sampled_from([
+    pa.float16(),
+    pa.float32(),
+    pa.float64()
+])
+decimal_type = st.builds(
+    pa.decimal128,
+    precision=st.integers(min_value=0, max_value=38),
+    scale=st.integers(min_value=0, max_value=38)
+)
+numeric_types = st.one_of(integer_types, floating_types, decimal_type)
+
+date_types = st.sampled_from([
+    pa.date32(),
+    pa.date64()
+])
+time_types = st.sampled_from([
+    pa.time32('s'),
+    pa.time32('ms'),
+    pa.time64('us'),
+    pa.time64('ns')
+])
+timestamp_types = st.sampled_from([
+    pa.timestamp('s'),
+    pa.timestamp('ms'),
+    pa.timestamp('us'),
+    pa.timestamp('ns')
+])
+temporal_types = st.one_of(date_types, time_types, timestamp_types)
+
+primitive_types = st.one_of(
+    null_type,
+    bool_type,
+    binary_type,
+    string_type,
+    numeric_types,
+    temporal_types
+)
+
+metadata = st.dictionaries(st.text(), st.text())
+
+
+def fields(type_strategy=primitive_types):
+    return st.builds(pa.field, name=custom_text, type=type_strategy,
+                     nullable=st.booleans(), metadata=metadata)
+
+
+def list_types(item_strategy=primitive_types):
+    return st.builds(pa.list_, item_strategy)
+
+
+def struct_types(item_strategy=primitive_types):
+    return st.builds(pa.struct, st.lists(fields(item_strategy)))
+
+
+def complex_types(inner_strategy=primitive_types):
+    return list_types(inner_strategy) | struct_types(inner_strategy)
+
+
+def nested_list_types(item_strategy=primitive_types):
+    return st.recursive(item_strategy, list_types)
+
+
+def nested_struct_types(item_strategy=primitive_types):
+    return st.recursive(item_strategy, struct_types)
+
+
+def nested_complex_types(inner_strategy=primitive_types):
+    return st.recursive(inner_strategy, complex_types)
+
+
+def schemas(type_strategy=primitive_types):
+    return st.builds(pa.schema, st.lists(fields(type_strategy)))
+
+
+complex_schemas = schemas(complex_types())
+
+
+all_types = st.one_of(primitive_types, complex_types(), nested_complex_types())
+all_fields = fields(all_types)
+all_schemas = schemas(all_types)
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 1350ad636ab2d..17ff9c625871a 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -724,6 +725,26 @@ def test_cast_date32_to_int():
     assert result2.equals(arr)
 
 
+def test_cast_binary_to_utf8():
+    binary_arr = pa.array([b'foo', b'bar', b'baz'], type=pa.binary())
+    utf8_arr = binary_arr.cast(pa.utf8())
+    expected = pa.array(['foo', 'bar', 'baz'], type=pa.utf8())
+
+    assert utf8_arr.equals(expected)
+
+    non_utf8_values = [(u'mañana').encode('utf-16-le')]
+    non_utf8_binary = pa.array(non_utf8_values)
+    assert non_utf8_binary.type == pa.binary()
+    with pytest.raises(ValueError):
+        non_utf8_binary.cast(pa.string())
+
+    non_utf8_all_null = pa.array(non_utf8_values, mask=np.array([True]),
+                                 type=pa.binary())
+    # No error
+    casted = non_utf8_all_null.cast(pa.string())
+    assert casted.null_count == 1
+
+
 def test_cast_date64_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int64'),
                    type=pa.date64())
@@ -734,6 +755,27 @@ def test_cast_date64_to_int():
     assert result.equals(expected)
 
 
+@pytest.mark.parametrize(('ty', 'values'), [
+    ('bool', [True, False, True]),
+    ('uint8', range(0, 255)),
+    ('int8', range(0, 128)),
+    ('uint16', range(0, 10)),
+    ('int16', range(0, 10)),
+    ('uint32', range(0, 10)),
+    ('int32', range(0, 10)),
+    ('uint64', range(0, 10)),
+    ('int64', range(0, 10)),
+    ('float', [0.0, 0.1, 0.2]),
+    ('double', [0.0, 0.1, 0.2]),
+    ('string', ['a', 'b', 'c']),
+    ('binary', [b'a', b'b', b'c']),
+    (pa.binary(3), [b'abc', b'bcd', b'cde'])
+])
+def test_cast_identities(ty, values):
+    arr = pa.array(values, type=ty)
+    assert arr.cast(ty).equals(arr)
+
+
 pickle_test_parametrize = pytest.mark.parametrize(
     ('data', 'typ'),
     [
@@ -1221,3 +1263,59 @@ def test_nested_dictionary_array():
     dict_arr = pa.DictionaryArray.from_arrays([0, 1, 0], ['a', 'b'])
     dict_arr2 = pa.DictionaryArray.from_arrays([0, 1, 2, 1, 0], dict_arr)
     assert dict_arr2.to_pylist() == ['a', 'b', 'a', 'b', 'a']
+
+
+def test_array_from_numpy_str_utf8():
+    # ARROW-3890 -- in Python 3, NPY_UNICODE arrays are produced, but in Python
+    # 2 they are NPY_STRING (binary), so we must do UTF-8 validation
+    vec = np.array(["toto", "tata"])
+    vec2 = np.array(["toto", "tata"], dtype=object)
+
+    arr = pa.array(vec, pa.string())
+    arr2 = pa.array(vec2, pa.string())
+    expected = pa.array([u"toto", u"tata"])
+    assert arr.equals(expected)
+    assert arr2.equals(expected)
+
+    # with mask, separate code path
+    mask = np.array([False, False], dtype=bool)
+    arr = pa.array(vec, pa.string(), mask=mask)
+    assert arr.equals(expected)
+
+    # UTF8 validation failures
+    vec = np.array([(u'mañana').encode('utf-16-le')])
+    with pytest.raises(ValueError):
+        pa.array(vec, pa.string())
+
+    with pytest.raises(ValueError):
+        pa.array(vec, pa.string(), mask=np.array([False]))
+
+
+@pytest.mark.large_memory
+def test_numpy_string_overflow_to_chunked():
+    # ARROW-3762
+
+    # 2^31 + 1 bytes
+    values = [b'x']
+
+    # Make 10 unique 1MB strings then repeat then 2048 times
+    unique_strings = {
+        i: b'x' * ((1 << 20) - 1) + str(i % 10).encode('utf8')
+        for i in range(10)
+    }
+    values += [unique_strings[i % 10] for i in range(1 << 11)]
+
+    arr = np.array(values)
+    arrow_arr = pa.array(arr)
+
+    assert isinstance(arrow_arr, pa.ChunkedArray)
+
+    # Split up into 16MB chunks. 128 * 16 = 2048, so 129
+    assert arrow_arr.num_chunks == 129
+
+    value_index = 0
+    for i in range(arrow_arr.num_chunks):
+        chunk = arrow_arr.chunk(i)
+        for val in chunk:
+            assert val.as_py() == values[value_index]
+            value_index += 1
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index e4f38ffc91b18..9bee9053c56f4 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -20,9 +20,14 @@
 import decimal
 import json
 import multiprocessing as mp
+
 from collections import OrderedDict
 from datetime import date, datetime, time, timedelta
+from distutils.version import LooseVersion
 
+import hypothesis as h
+import hypothesis.extra.pytz as tzst
+import hypothesis.strategies as st
 import numpy as np
 import numpy.testing as npt
 import pandas as pd
@@ -31,9 +36,6 @@
 import pytz
 
 import pyarrow as pa
-import pyarrow.types as patypes
-from pyarrow.compat import PY2
-
 from .pandas_examples import dataframe_with_arrays, dataframe_with_lists
 
 
@@ -94,7 +96,7 @@ def _check_series_roundtrip(s, type_=None, expected_pa_type=None):
         assert arr.type == expected_pa_type
 
     result = pd.Series(arr.to_pandas(), name=s.name)
-    if patypes.is_timestamp(arr.type) and arr.type.tz is not None:
+    if pa.types.is_timestamp(arr.type) and arr.type.tz is not None:
         result = (result.dt.tz_localize('utc')
                   .dt.tz_convert(arr.type.tz))
 
@@ -112,13 +114,13 @@ def _check_array_roundtrip(values, expected=None, mask=None,
     else:
         assert arr.null_count == (mask | values_nulls).sum()
 
-    if mask is None:
-        tm.assert_series_equal(pd.Series(result), pd.Series(values),
-                               check_names=False)
-    else:
-        expected = pd.Series(np.ma.masked_array(values, mask=mask))
-        tm.assert_series_equal(pd.Series(result), expected,
-                               check_names=False)
+    if expected is None:
+        if mask is None:
+            expected = pd.Series(values)
+        else:
+            expected = pd.Series(np.ma.masked_array(values, mask=mask))
+
+    tm.assert_series_equal(pd.Series(result), expected, check_names=False)
 
 
 def _check_array_from_pandas_roundtrip(np_array, type=None):
@@ -176,6 +178,16 @@ def test_multiindex_columns_unicode(self):
         df = pd.DataFrame([(1, 'a'), (2, 'b'), (3, 'c')], columns=columns)
         _check_pandas_roundtrip(df, preserve_index=True)
 
+    def test_multiindex_doesnt_warn(self):
+        # ARROW-3953: pandas 0.24 rename of MultiIndex labels to codes
+        columns = pd.MultiIndex.from_arrays([['one', 'two'], ['X', 'Y']])
+        df = pd.DataFrame([(1, 'a'), (2, 'b'), (3, 'c')], columns=columns)
+
+        with pytest.warns(None) as record:
+            _check_pandas_roundtrip(df, preserve_index=True)
+
+        assert len(record) == 0
+
     def test_integer_index_column(self):
         df = pd.DataFrame([(1, 'a'), (2, 'b'), (3, 'c')])
         _check_pandas_roundtrip(df, preserve_index=True)
@@ -245,12 +257,14 @@ def test_string_column_index(self):
         column_indexes, = js['column_indexes']
         assert column_indexes['name'] == 'stringz'
         assert column_indexes['name'] == column_indexes['field_name']
-        assert column_indexes['pandas_type'] == ('bytes' if PY2 else 'unicode')
         assert column_indexes['numpy_type'] == 'object'
+        assert column_indexes['pandas_type'] == (
+            'bytes' if six.PY2 else 'unicode'
+        )
 
         md = column_indexes['metadata']
 
-        if not PY2:
+        if not six.PY2:
             assert len(md) == 1
             assert md['encoding'] == 'UTF-8'
         else:
@@ -363,6 +377,17 @@ def test_metadata_with_mixed_types(self):
         assert data_column['pandas_type'] == 'bytes'
         assert data_column['numpy_type'] == 'object'
 
+    def test_ignore_metadata(self):
+        df = pd.DataFrame({'a': [1, 2, 3], 'b': ['foo', 'bar', 'baz']},
+                          index=['one', 'two', 'three'])
+        table = pa.Table.from_pandas(df)
+
+        result = table.to_pandas(ignore_metadata=True)
+        expected = (table.cast(table.schema.remove_metadata())
+                    .to_pandas())
+
+        assert result.equals(expected)
+
     def test_list_metadata(self):
         df = pd.DataFrame({'data': [[1], [2, 3, 4], [5] * 7]})
         schema = pa.schema([pa.field('data', type=pa.list_(pa.int64()))])
@@ -535,6 +560,11 @@ def test_float_nulls_to_ints(self):
         assert table[0].to_pylist() == [1, 2, None]
         tm.assert_frame_equal(df, table.to_pandas())
 
+    def test_float_nulls_to_boolean(self):
+        s = pd.Series([0.0, 1.0, 2.0, None, -3.0])
+        expected = pd.Series([False, True, True, None, True])
+        _check_array_roundtrip(s, expected=expected, type=pa.bool_())
+
     def test_integer_no_nulls(self):
         data = OrderedDict()
         fields = []
@@ -648,6 +678,26 @@ def test_boolean_nulls(self):
 
         tm.assert_frame_equal(result, ex_frame)
 
+    def test_boolean_to_int(self):
+        # test from dtype=bool
+        s = pd.Series([True, True, False, True, True] * 2)
+        expected = pd.Series([1, 1, 0, 1, 1] * 2)
+        _check_array_roundtrip(s, expected=expected, type=pa.int64())
+
+    def test_boolean_objects_to_int(self):
+        # test from dtype=object
+        s = pd.Series([True, True, False, True, True] * 2, dtype=object)
+        expected = pd.Series([1, 1, 0, 1, 1] * 2)
+        expected_msg = 'Expected integer, got bool'
+        with pytest.raises(pa.ArrowTypeError, match=expected_msg):
+            _check_array_roundtrip(s, expected=expected, type=pa.int64())
+
+    def test_boolean_nulls_to_float(self):
+        # test from dtype=object
+        s = pd.Series([True, True, False, None, True] * 2)
+        expected = pd.Series([1.0, 1.0, 0.0, None, 1.0] * 2)
+        _check_array_roundtrip(s, expected=expected, type=pa.float64())
+
     def test_float_object_nulls(self):
         arr = np.array([None, 1.5, np.float64(3.5)] * 5, dtype=object)
         df = pd.DataFrame({'floats': arr})
@@ -788,8 +838,7 @@ def test_timestamps_with_timezone(self):
                 '2010-08-13T05:46:57.437'],
                 dtype='datetime64[ms]')
         })
-        df['datetime64'] = (df['datetime64'].dt.tz_localize('US/Eastern')
-                            .to_frame())
+        df['datetime64'] = df['datetime64'].dt.tz_localize('US/Eastern')
         _check_pandas_roundtrip(df)
 
         _check_series_roundtrip(df['datetime64'])
@@ -803,8 +852,7 @@ def test_timestamps_with_timezone(self):
                 '2010-08-13T05:46:57.437699912'],
                 dtype='datetime64[ns]')
         })
-        df['datetime64'] = (df['datetime64'].dt.tz_localize('US/Eastern')
-                            .to_frame())
+        df['datetime64'] = df['datetime64'].dt.tz_localize('US/Eastern')
 
         _check_pandas_roundtrip(df)
 
@@ -830,6 +878,12 @@ def test_python_datetime_with_pytz_tzinfo(self):
             df = pd.DataFrame({'datetime': values})
             _check_pandas_roundtrip(df)
 
+    @h.given(st.none() | tzst.timezones())
+    def test_python_datetime_with_pytz_timezone(self, tz):
+        values = [datetime(2018, 1, 1, 12, 23, 45, tzinfo=tz)]
+        df = pd.DataFrame({'datetime': values})
+        _check_pandas_roundtrip(df)
+
     @pytest.mark.skipif(six.PY2, reason='datetime.timezone is available since '
                                         'python version 3.2')
     def test_python_datetime_with_timezone_tzinfo(self):
@@ -882,7 +936,7 @@ class MyDate(date):
 
         result = table.to_pandas()
         expected_df = pd.DataFrame(
-            {"date": np.array(["2000-01-01"], dtype="datetime64[ns]")}
+            {"date": np.array([date(2000, 1, 1)], dtype=object)}
         )
         tm.assert_frame_equal(expected_df, result)
 
@@ -932,7 +986,7 @@ def test_pandas_datetime_to_date64_failures(self, mask):
         with pytest.raises(pa.ArrowInvalid, match=expected_msg):
             pa.Array.from_pandas(s, type=pa.date64(), mask=mask)
 
-    def test_array_date_as_object(self):
+    def test_array_types_date_as_object(self):
         data = [date(2000, 1, 1),
                 None,
                 date(1970, 1, 1),
@@ -942,58 +996,23 @@ def test_array_date_as_object(self):
                              '1970-01-01',
                              '2040-02-26'], dtype='datetime64')
 
-        arr = pa.array(data)
-        assert arr.equals(pa.array(expected))
-
-        result = arr.to_pandas()
-        assert result.dtype == expected.dtype
-        npt.assert_array_equal(arr.to_pandas(), expected)
-
-        result = arr.to_pandas(date_as_object=True)
-        expected = expected.astype(object)
-        assert result.dtype == expected.dtype
-        npt.assert_array_equal(result, expected)
+        objects = [
+            # The second value is the expected value for date_as_object=False
+            (pa.array(data), expected),
+            (pa.chunked_array([data]), expected),
+            (pa.column('date', [data]), expected.astype('M8[ns]'))]
 
-    def test_chunked_array_convert_date_as_object(self):
-        data = [date(2000, 1, 1),
-                None,
-                date(1970, 1, 1),
-                date(2040, 2, 26)]
-        expected = np.array(['2000-01-01',
-                             None,
-                             '1970-01-01',
-                             '2040-02-26'], dtype='datetime64')
-        carr = pa.chunked_array([data])
+        assert objects[0][0].equals(pa.array(expected))
 
-        result = carr.to_pandas()
-        assert result.dtype == expected.dtype
-        npt.assert_array_equal(carr.to_pandas(), expected)
+        for obj, expected_datetime64 in objects:
+            result = obj.to_pandas()
+            expected_obj = expected.astype(object)
+            assert result.dtype == expected_obj.dtype
+            npt.assert_array_equal(result, expected_obj)
 
-        result = carr.to_pandas(date_as_object=True)
-        expected = expected.astype(object)
-        assert result.dtype == expected.dtype
-        npt.assert_array_equal(result, expected)
-
-    def test_column_convert_date_as_object(self):
-        data = [date(2000, 1, 1),
-                None,
-                date(1970, 1, 1),
-                date(2040, 2, 26)]
-        expected = np.array(['2000-01-01',
-                             None,
-                             '1970-01-01',
-                             '2040-02-26'], dtype='datetime64')
-
-        arr = pa.array(data)
-        column = pa.column('date', arr)
-
-        result = column.to_pandas()
-        npt.assert_array_equal(column.to_pandas(), expected)
-
-        result = column.to_pandas(date_as_object=True)
-        expected = expected.astype(object)
-        assert result.dtype == expected.dtype
-        npt.assert_array_equal(result, expected)
+            result = obj.to_pandas(date_as_object=False)
+            assert result.dtype == expected_datetime64.dtype
+            npt.assert_array_equal(result, expected_datetime64)
 
     def test_table_convert_date_as_object(self):
         df = pd.DataFrame({
@@ -1004,8 +1023,8 @@ def test_table_convert_date_as_object(self):
 
         table = pa.Table.from_pandas(df, preserve_index=False)
 
-        df_datetime = table.to_pandas()
-        df_object = table.to_pandas(date_as_object=True)
+        df_datetime = table.to_pandas(date_as_object=False)
+        df_object = table.to_pandas()
 
         tm.assert_frame_equal(df.astype('datetime64[ns]'), df_datetime,
                               check_dtype=True)
@@ -1025,9 +1044,7 @@ def test_date_infer(self):
         assert table.schema.equals(expected_schema)
 
         result = table.to_pandas()
-        expected = df.copy()
-        expected['date'] = pd.to_datetime(df['date'])
-        tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(result, df)
 
     def test_date_mask(self):
         arr = np.array([date(2017, 4, 3), date(2017, 4, 4)],
@@ -1064,18 +1081,27 @@ def test_date_objects_typed(self):
         # Test converting back to pandas
         colnames = ['date32', 'date64']
         table = pa.Table.from_arrays([a32, a64], colnames)
-        table_pandas = table.to_pandas()
 
         ex_values = (np.array(['2017-04-03', '2017-04-04', '2017-04-04',
                                '2017-04-05'],
-                              dtype='datetime64[D]')
-                     .astype('datetime64[ns]'))
+                              dtype='datetime64[D]'))
         ex_values[1] = pd.NaT.value
-        expected_pandas = pd.DataFrame({'date32': ex_values,
-                                        'date64': ex_values},
+
+        ex_datetime64ns = ex_values.astype('datetime64[ns]')
+        expected_pandas = pd.DataFrame({'date32': ex_datetime64ns,
+                                        'date64': ex_datetime64ns},
                                        columns=colnames)
+        table_pandas = table.to_pandas(date_as_object=False)
         tm.assert_frame_equal(table_pandas, expected_pandas)
 
+        table_pandas_objects = table.to_pandas()
+        ex_objects = ex_values.astype('object')
+        expected_pandas_objects = pd.DataFrame({'date32': ex_objects,
+                                                'date64': ex_objects},
+                                               columns=colnames)
+        tm.assert_frame_equal(table_pandas_objects,
+                              expected_pandas_objects)
+
     def test_dates_from_integers(self):
         t1 = pa.date32()
         t2 = pa.date64()
@@ -1362,6 +1388,13 @@ def test_selective_categoricals(self):
         result4 = table.to_pandas(categories=tuple())
         tm.assert_frame_equal(result4, expected_str, check_dtype=True)
 
+    def test_to_pandas_categorical_zero_length(self):
+        # ARROW-3586
+        array = pa.array([], type=pa.int32())
+        table = pa.Table.from_arrays(arrays=[array], names=['col'])
+        # This would segfault under 0.11.0
+        table.to_pandas(categories=['col'])
+
     def test_table_str_to_categorical_without_na(self):
         values = ['a', 'a', 'b', 'b', 'c']
         df = pd.DataFrame({'strings': values})
@@ -2191,9 +2224,16 @@ def test_safe_unsafe_casts(self):
         assert table.column('B').type == pa.int32()
 
 
-def _fully_loaded_dataframe_example():
-    from distutils.version import LooseVersion
+def test_safe_cast_from_float_with_nans_to_int():
+    # TODO(kszucs): write tests for creating Date32 and Date64 arrays, see
+    #               ARROW-4258 and https://github.com/apache/arrow/pull/3395
+    values = pd.Series([1, 2, None, 4])
+    arr = pa.Array.from_pandas(values, type=pa.int32(), safe=True)
+    expected = pa.array([1, 2, None, 4], type=pa.int32())
+    assert arr.equals(expected)
 
+
+def _fully_loaded_dataframe_example():
     index = pd.MultiIndex.from_arrays([
         pd.date_range('2000-01-01', periods=5).repeat(2),
         np.tile(np.array(['foo', 'bar'], dtype=object), 5)
@@ -2239,6 +2279,8 @@ def _check_serialize_components_roundtrip(df):
     tm.assert_frame_equal(df, deserialized)
 
 
+@pytest.mark.skipif(LooseVersion(np.__version__) >= '0.16',
+                    reason='Until numpy/numpy#12745 is resolved')
 def test_serialize_deserialize_pandas():
     # ARROW-1784, serialize and deserialize DataFrame by decomposing
     # BlockManager
@@ -2279,6 +2321,91 @@ def test_convert_unsupported_type_error_message():
         pa.Table.from_pandas(df)
 
 
+# ----------------------------------------------------------------------
+# Test object deduplication in to_pandas
+
+
+def _generate_dedup_example(nunique, repeats):
+    unique_values = [tm.rands(10) for i in range(nunique)]
+    return unique_values * repeats
+
+
+def _assert_nunique(obj, expected):
+    assert len({id(x) for x in obj}) == expected
+
+
+def test_to_pandas_deduplicate_strings_array_types():
+    nunique = 100
+    repeats = 10
+    values = _generate_dedup_example(nunique, repeats)
+
+    for arr in [pa.array(values, type=pa.binary()),
+                pa.array(values, type=pa.utf8()),
+                pa.chunked_array([values, values]),
+                pa.column('foo', [values, values])]:
+        _assert_nunique(arr.to_pandas(), nunique)
+        _assert_nunique(arr.to_pandas(deduplicate_objects=False), len(arr))
+
+
+def test_to_pandas_deduplicate_strings_table_types():
+    nunique = 100
+    repeats = 10
+    values = _generate_dedup_example(nunique, repeats)
+
+    arr = pa.array(values)
+    rb = pa.RecordBatch.from_arrays([arr], ['foo'])
+    tbl = pa.Table.from_batches([rb])
+
+    for obj in [rb, tbl]:
+        _assert_nunique(obj.to_pandas()['foo'], nunique)
+        _assert_nunique(obj.to_pandas(deduplicate_objects=False)['foo'],
+                        len(obj))
+
+
+def test_to_pandas_deduplicate_integers_as_objects():
+    nunique = 100
+    repeats = 10
+
+    # Python automatically interns smaller integers
+    unique_values = list(np.random.randint(10000000, 1000000000, size=nunique))
+    unique_values[nunique // 2] = None
+
+    arr = pa.array(unique_values * repeats)
+
+    _assert_nunique(arr.to_pandas(integer_object_nulls=True), nunique)
+    _assert_nunique(arr.to_pandas(integer_object_nulls=True,
+                                  deduplicate_objects=False),
+                    # Account for None
+                    (nunique - 1) * repeats + 1)
+
+
+def test_to_pandas_deduplicate_date_time():
+    nunique = 100
+    repeats = 10
+
+    unique_values = list(range(nunique))
+
+    cases = [
+        # raw type, array type, to_pandas options
+        ('int32', 'date32', {'date_as_object': True}),
+        ('int64', 'date64', {'date_as_object': True}),
+        ('int32', 'time32[ms]', {}),
+        ('int64', 'time64[us]', {})
+    ]
+
+    for raw_type, array_type, pandas_options in cases:
+        raw_arr = pa.array(unique_values * repeats, type=raw_type)
+        casted_arr = raw_arr.cast(array_type)
+
+        _assert_nunique(casted_arr.to_pandas(**pandas_options),
+                        nunique)
+        _assert_nunique(casted_arr.to_pandas(deduplicate_objects=False,
+                                             **pandas_options),
+                        len(casted_arr))
+
+
+# ---------------------------------------------------------------------
+
 def test_table_from_pandas_keeps_column_order_of_dataframe():
     df1 = pd.DataFrame(OrderedDict([
         ('partition', [0, 0, 1, 1]),
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 115595bbb877c..14ba999fea77b 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -146,14 +146,22 @@ def test_convert_options():
     opts.column_types = [('x', pa.binary())]
     assert opts.column_types == {'x': pa.binary()}
 
-    with pytest.raises(TypeError, match='data type expected'):
+    with pytest.raises(TypeError, match='DataType expected'):
         opts.column_types = {'a': None}
     with pytest.raises(TypeError):
         opts.column_types = 0
 
-    opts = cls(check_utf8=False, column_types={'a': pa.null()})
+    assert isinstance(opts.null_values, list)
+    assert '' in opts.null_values
+    assert 'N/A' in opts.null_values
+    opts.null_values = ['xxx', 'yyy']
+    assert opts.null_values == ['xxx', 'yyy']
+
+    opts = cls(check_utf8=False, column_types={'a': pa.null()},
+               null_values=['xxx', 'yyy'])
     assert opts.check_utf8 is False
     assert opts.column_types == {'a': pa.null()}
+    assert opts.null_values == ['xxx', 'yyy']
 
 
 class BaseTestCSVRead:
@@ -233,6 +241,34 @@ def test_simple_timestamps(self):
             'b': [datetime(1970, 1, 1), datetime(1989, 7, 14)],
             }
 
+    def test_custom_nulls(self):
+        # Infer nulls with custom values
+        opts = ConvertOptions(null_values=['Xxx', 'Zzz'])
+        rows = b"a,b,c,d\nZzz,Xxx,1,2\nXxx,#N/A,,Zzz\n"
+        table = self.read_bytes(rows, convert_options=opts)
+        schema = pa.schema([('a', pa.null()),
+                            ('b', pa.string()),
+                            ('c', pa.string()),
+                            ('d', pa.int64())])
+        assert table.schema == schema
+        assert table.to_pydict() == {
+            'a': [None, None],
+            'b': [u"Xxx", u"#N/A"],
+            'c': [u"1", u""],
+            'd': [2, None],
+            }
+
+        opts = ConvertOptions(null_values=[])
+        rows = b"a,b\n#N/A,\n"
+        table = self.read_bytes(rows, convert_options=opts)
+        schema = pa.schema([('a', pa.string()),
+                            ('b', pa.string())])
+        assert table.schema == schema
+        assert table.to_pydict() == {
+            'a': [u"#N/A"],
+            'b': [u""],
+            }
+
     def test_column_types(self):
         # Ask for specific column types in ConvertOptions
         opts = ConvertOptions(column_types={'b': 'float32',
diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py
index 8c86457b57179..4633df1f248ad 100644
--- a/python/pyarrow/tests/test_cuda.py
+++ b/python/pyarrow/tests/test_cuda.py
@@ -60,7 +60,7 @@ def test_Context():
         cuda.Context(cuda.Context.get_num_devices())
 
 
-@pytest.mark.parametrize("size", [0, 1, 8, 1000])
+@pytest.mark.parametrize("size", [0, 1, 1000])
 def test_manage_allocate_free_host(size):
     buf = cuda.new_host_buffer(size)
     arr = np.frombuffer(buf, dtype=np.uint8)
@@ -102,7 +102,7 @@ def make_random_buffer(size, target='host'):
     raise ValueError('invalid target value')
 
 
-@pytest.mark.parametrize("size", [0, 1, 8, 1000])
+@pytest.mark.parametrize("size", [0, 1, 1000])
 def test_context_device_buffer(size):
     # Creating device buffer from host buffer;
     arr, buf = make_random_buffer(size)
@@ -230,7 +230,39 @@ def test_context_device_buffer(size):
     np.testing.assert_equal(arr[soffset:soffset+ssize], arr2)
 
 
-@pytest.mark.parametrize("size", [0, 1, 8, 1000])
+@pytest.mark.parametrize("size", [0, 1, 1000])
+def test_context_from_object(size):
+    ctx = global_context
+    arr, cbuf = make_random_buffer(size, target='device')
+    dtype = arr.dtype
+
+    # Creating device buffer from a CUDA host buffer
+    hbuf = cuda.new_host_buffer(size * arr.dtype.itemsize)
+    np.frombuffer(hbuf, dtype=dtype)[:] = arr
+    cbuf2 = ctx.buffer_from_object(hbuf)
+    assert cbuf2.size == cbuf.size
+    arr2 = np.frombuffer(cbuf2.copy_to_host(), dtype=dtype)
+    np.testing.assert_equal(arr, arr2)
+
+    # Creating device buffer from a device buffer
+    cbuf2 = ctx.buffer_from_object(cbuf2)
+    assert cbuf2.size == cbuf.size
+    arr2 = np.frombuffer(cbuf2.copy_to_host(), dtype=dtype)
+    np.testing.assert_equal(arr, arr2)
+
+    # Trying to create a device buffer from a Buffer
+    with pytest.raises(pa.ArrowTypeError,
+                       match=('buffer is not backed by a CudaBuffer')):
+        ctx.buffer_from_object(pa.py_buffer(b"123"))
+
+    # Trying to create a device buffer from numpy.array
+    with pytest.raises(pa.ArrowTypeError,
+                       match=('cannot create device buffer view from'
+                              ' `<class \'numpy.ndarray\'>` object')):
+        ctx.buffer_from_object(np.array([1, 2, 3]))
+
+
+@pytest.mark.parametrize("size", [0, 1, 1000])
 def test_CudaBuffer(size):
     arr, buf = make_random_buffer(size)
     assert arr.tobytes() == buf.to_pybytes()
@@ -255,7 +287,7 @@ def test_CudaBuffer(size):
         cuda.CudaBuffer()
 
 
-@pytest.mark.parametrize("size", [0, 1, 8, 1000])
+@pytest.mark.parametrize("size", [0, 1, 1000])
 def test_HostBuffer(size):
     arr, buf = make_random_buffer(size)
     assert arr.tobytes() == buf.to_pybytes()
@@ -281,7 +313,7 @@ def test_HostBuffer(size):
         cuda.HostBuffer()
 
 
-@pytest.mark.parametrize("size", [0, 1, 8, 1000])
+@pytest.mark.parametrize("size", [0, 1, 1000])
 def test_copy_from_to_host(size):
 
     # Create a buffer in host containing range(size)
@@ -306,7 +338,7 @@ def test_copy_from_to_host(size):
     np.testing.assert_equal(arr, arr2)
 
 
-@pytest.mark.parametrize("size", [0, 1, 8, 1000])
+@pytest.mark.parametrize("size", [0, 1, 1000])
 def test_copy_to_host(size):
     arr, dbuf = make_random_buffer(size, target='device')
 
@@ -366,7 +398,7 @@ def test_copy_to_host(size):
             dbuf.copy_to_host(buf=buf, position=position, nbytes=nbytes)
 
 
-@pytest.mark.parametrize("size", [0, 1, 8, 1000])
+@pytest.mark.parametrize("size", [0, 1, 1000])
 def test_copy_from_device(size):
     arr, buf = make_random_buffer(size=size, target='device')
     lst = arr.tolist()
@@ -410,7 +442,7 @@ def put(*args, **kwargs):
             put(position=position, nbytes=nbytes)
 
 
-@pytest.mark.parametrize("size", [0, 1, 8, 1000])
+@pytest.mark.parametrize("size", [0, 1, 1000])
 def test_copy_from_host(size):
     arr, buf = make_random_buffer(size=size, target='host')
     lst = arr.tolist()
@@ -617,7 +649,7 @@ def other_process_for_test_IPC(handle_buffer, expected_arr):
 
 @cuda_ipc
 @pytest.mark.skipif(sys.version_info[0] == 2, reason="test needs Python 3")
-@pytest.mark.parametrize("size", [0, 1, 8, 1000])
+@pytest.mark.parametrize("size", [0, 1, 1000])
 def test_IPC(size):
     import multiprocessing
     ctx = multiprocessing.get_context('spawn')
diff --git a/python/pyarrow/tests/test_cuda_numba_interop.py b/python/pyarrow/tests/test_cuda_numba_interop.py
index 296fe2df3e94d..ff1722d278d5e 100644
--- a/python/pyarrow/tests/test_cuda_numba_interop.py
+++ b/python/pyarrow/tests/test_cuda_numba_interop.py
@@ -75,6 +75,85 @@ def make_random_buffer(size, target='host', dtype='uint8', ctx=None):
     raise ValueError('invalid target value')
 
 
+@pytest.mark.parametrize("c", range(len(context_choice_ids)),
+                         ids=context_choice_ids)
+@pytest.mark.parametrize("dtype", dtypes, ids=dtypes)
+@pytest.mark.parametrize("size", [0, 1, 8, 1000])
+def test_from_object(c, dtype, size):
+    ctx, nb_ctx = context_choices[c]
+    arr, cbuf = make_random_buffer(size, target='device', dtype=dtype, ctx=ctx)
+
+    # Creating device buffer from numba DeviceNDArray:
+    darr = nb_cuda.to_device(arr)
+    cbuf2 = ctx.buffer_from_object(darr)
+    assert cbuf2.size == cbuf.size
+    arr2 = np.frombuffer(cbuf2.copy_to_host(), dtype=dtype)
+    np.testing.assert_equal(arr, arr2)
+
+    # Creating device buffer from a slice of numba DeviceNDArray:
+    if size >= 8:
+        # 1-D arrays
+        for s in [slice(size//4, None, None),
+                  slice(size//4, -(size//4), None)]:
+            cbuf2 = ctx.buffer_from_object(darr[s])
+            arr2 = np.frombuffer(cbuf2.copy_to_host(), dtype=dtype)
+            np.testing.assert_equal(arr[s], arr2)
+
+        # cannot test negative strides due to numba bug, see its issue 3705
+        if 0:
+            rdarr = darr[::-1]
+            cbuf2 = ctx.buffer_from_object(rdarr)
+            assert cbuf2.size == cbuf.size
+            arr2 = np.frombuffer(cbuf2.copy_to_host(), dtype=dtype)
+            np.testing.assert_equal(arr, arr2)
+
+        with pytest.raises(ValueError,
+                           match=('array data is non-contiguous')):
+            ctx.buffer_from_object(darr[::2])
+
+        # a rectangular 2-D array
+        s1 = size//4
+        s2 = size//s1
+        assert s1 * s2 == size
+        cbuf2 = ctx.buffer_from_object(darr.reshape(s1, s2))
+        assert cbuf2.size == cbuf.size
+        arr2 = np.frombuffer(cbuf2.copy_to_host(), dtype=dtype)
+        np.testing.assert_equal(arr, arr2)
+
+        with pytest.raises(ValueError,
+                           match=('array data is non-contiguous')):
+            ctx.buffer_from_object(darr.reshape(s1, s2)[:, ::2])
+
+        # a 3-D array
+        s1 = 4
+        s2 = size//8
+        s3 = size//(s1*s2)
+        assert s1 * s2 * s3 == size
+        cbuf2 = ctx.buffer_from_object(darr.reshape(s1, s2, s3))
+        assert cbuf2.size == cbuf.size
+        arr2 = np.frombuffer(cbuf2.copy_to_host(), dtype=dtype)
+        np.testing.assert_equal(arr, arr2)
+
+        with pytest.raises(ValueError,
+                           match=('array data is non-contiguous')):
+            ctx.buffer_from_object(darr.reshape(s1, s2, s3)[::2])
+
+    # Creating device buffer from am object implementing cuda array
+    # interface:
+    class MyObj:
+        def __init__(self, darr):
+            self.darr = darr
+
+        @property
+        def __cuda_array_interface__(self):
+            return self.darr.__cuda_array_interface__
+
+    cbuf2 = ctx.buffer_from_object(MyObj(darr))
+    assert cbuf2.size == cbuf.size
+    arr2 = np.frombuffer(cbuf2.copy_to_host(), dtype=dtype)
+    np.testing.assert_equal(arr, arr2)
+
+
 @pytest.mark.parametrize("c", range(len(context_choice_ids)),
                          ids=context_choice_ids)
 @pytest.mark.parametrize("dtype", dtypes, ids=dtypes)
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index 01b567216bfcf..d144f989d0f0a 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import io
 import os
 import sys
 import tempfile
@@ -535,3 +536,20 @@ def test_unsupported(self):
     def test_large_dataframe(self):
         df = pd.DataFrame({'A': np.arange(400000000)})
         self._check_pandas_roundtrip(df)
+
+
+@pytest.mark.large_memory
+def test_chunked_binary_error_message():
+    # ARROW-3058: As Feather does not yet support chunked columns, we at least
+    # make sure it's clear to the user what is going on
+
+    # 2^31 + 1 bytes
+    values = [b'x'] + [
+        b'x' * (1 << 20)
+    ] * 2 * (1 << 10)
+    df = pd.DataFrame({'byte_col': values})
+
+    with pytest.raises(ValueError, match="'byte_col' exceeds 2GB maximum "
+                       "capacity of a Feather binary column. This restriction "
+                       "may be lifted in the future"):
+        write_feather(df, io.BytesIO())
diff --git a/python/pyarrow/tests/test_hdfs.py b/python/pyarrow/tests/test_hdfs.py
index 81b03b6fb7e4e..1af841f2ecbb1 100644
--- a/python/pyarrow/tests/test_hdfs.py
+++ b/python/pyarrow/tests/test_hdfs.py
@@ -15,21 +15,22 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from io import BytesIO
-from os.path import join as pjoin
 import os
 import pickle
+import pytest
 import random
 import unittest
+import pandas.util.testing as pdt
+
+from io import BytesIO
+from os.path import join as pjoin
 
 import numpy as np
-import pandas.util.testing as pdt
-import pytest
+import pyarrow as pa
+import pyarrow.tests.test_parquet as test_parquet
 
 from pyarrow.compat import guid
-import pyarrow as pa
 
-import pyarrow.tests.test_parquet as test_parquet
 
 # ----------------------------------------------------------------------
 # HDFS tests
@@ -215,7 +216,7 @@ def test_ls(self):
         self.hdfs.mkdir(dir_path)
 
         f = self.hdfs.open(f1_path, 'wb')
-        f.write('a' * 10)
+        f.write(b'a' * 10)
 
         contents = sorted(self.hdfs.ls(base_path, False))
         assert contents == [dir_path, f1_path]
@@ -340,9 +341,9 @@ def test_read_write_parquet_files_with_uri(self):
         df['uint32'] = df['uint32'].astype(np.int64)
         table = pa.Table.from_pandas(df, preserve_index=False)
 
-        pq.write_table(table, path)
+        pq.write_table(table, path, filesystem=self.hdfs)
 
-        result = pq.read_table(path).to_pandas()
+        result = pq.read_table(path, filesystem=self.hdfs).to_pandas()
 
         pdt.assert_frame_equal(result, df)
 
@@ -379,7 +380,7 @@ def check_driver(cls):
     def test_orphaned_file(self):
         hdfs = hdfs_test_client()
         file_path = self._make_test_file(hdfs, 'orphaned_file_test', 'fname',
-                                         'foobarbaz')
+                                         b'foobarbaz')
 
         f = hdfs.open(file_path)
         hdfs = None
@@ -406,3 +407,30 @@ def _get_hdfs_uri(path):
     uri = "hdfs://{}:{}{}".format(host, port, path)
 
     return uri
+
+
+@pytest.mark.parquet
+@pytest.mark.fastparquet
+@pytest.mark.parametrize('client', ['libhdfs', 'libhdfs3'])
+def test_fastparquet_read_with_hdfs(client):
+    try:
+        import snappy  # noqa
+    except ImportError:
+        pytest.skip('fastparquet test requires snappy')
+
+    import pyarrow.parquet as pq
+    fastparquet = pytest.importorskip('fastparquet')
+
+    fs = hdfs_test_client(client)
+
+    df = pdt.makeDataFrame()
+    table = pa.Table.from_pandas(df)
+
+    path = '/tmp/testing.parquet'
+    with fs.open(path, 'wb') as f:
+        pq.write_table(table, f)
+
+    parquet_file = fastparquet.ParquetFile(path, open_with=fs.open)
+
+    result = parquet_file.to_pandas()
+    pdt.assert_frame_equal(result, df)
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index f54f03a9ff92e..77ed70c31ca77 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -1134,6 +1134,44 @@ def test_input_stream_file_path_compressed(tmpdir):
     assert stream.read() == gz_data
 
 
+def test_input_stream_file_path_buffered(tmpdir):
+    data = b"some test data\n" * 10 + b"eof\n"
+    file_path = tmpdir / 'input_stream.buffered'
+    with open(str(file_path), 'wb') as f:
+        f.write(data)
+
+    stream = pa.input_stream(file_path, buffer_size=32)
+    assert stream.read() == data
+    stream = pa.input_stream(str(file_path), buffer_size=64)
+    assert stream.read() == data
+    stream = pa.input_stream(pathlib.Path(str(file_path)), buffer_size=1024)
+    assert stream.read() == data
+
+    unbuffered_stream = pa.input_stream(file_path, buffer_size=0)
+    assert isinstance(unbuffered_stream, pa.OSFile)
+
+    msg = 'Buffer size must be larger than zero'
+    with pytest.raises(ValueError, match=msg):
+        pa.input_stream(file_path, buffer_size=-1)
+    with pytest.raises(TypeError):
+        pa.input_stream(file_path, buffer_size='million')
+
+
+def test_input_stream_file_path_compressed_and_buffered(tmpdir):
+    data = b"some test data\n" * 100 + b"eof\n"
+    gz_data = gzip_compress(data)
+    file_path = tmpdir / 'input_stream_compressed_and_buffered.gz'
+    with open(str(file_path), 'wb') as f:
+        f.write(gz_data)
+
+    stream = pa.input_stream(file_path, buffer_size=32, compression='gzip')
+    assert stream.read() == data
+    stream = pa.input_stream(str(file_path), buffer_size=64)
+    assert stream.read() == data
+    stream = pa.input_stream(pathlib.Path(str(file_path)), buffer_size=1024)
+    assert stream.read() == data
+
+
 def test_input_stream_python_file(tmpdir):
     data = b"some test data\n" * 10 + b"eof\n"
     bio = BytesIO(data)
@@ -1232,6 +1270,54 @@ def check_data(file_path, data, **kwargs):
         check_data(file_path, data, compression='gzip')) == data
     assert check_data(file_path, data, compression=None) == data
 
+    with pytest.raises(ValueError, match='Unrecognized compression type'):
+        assert check_data(file_path, data, compression='rabbit') == data
+
+
+def test_output_stream_file_path_buffered(tmpdir):
+    data = b"some test data\n" * 10 + b"eof\n"
+    file_path = tmpdir / 'output_stream.buffered'
+
+    def check_data(file_path, data, **kwargs):
+        with pa.output_stream(file_path, **kwargs) as stream:
+            stream.write(data)
+        with open(str(file_path), 'rb') as f:
+            return f.read()
+
+    unbuffered_stream = pa.output_stream(file_path, buffer_size=0)
+    assert isinstance(unbuffered_stream, pa.OSFile)
+
+    msg = 'Buffer size must be larger than zero'
+    with pytest.raises(ValueError, match=msg):
+        assert check_data(file_path, data, buffer_size=-128) == data
+
+    assert check_data(file_path, data, buffer_size=32) == data
+    assert check_data(file_path, data, buffer_size=1024) == data
+    assert check_data(str(file_path), data, buffer_size=32) == data
+
+    result = check_data(pathlib.Path(str(file_path)), data, buffer_size=32)
+    assert result == data
+
+
+def test_output_stream_file_path_compressed_and_buffered(tmpdir):
+    data = b"some test data\n" * 100 + b"eof\n"
+    file_path = tmpdir / 'output_stream_compressed_and_buffered.gz'
+
+    def check_data(file_path, data, **kwargs):
+        with pa.output_stream(file_path, **kwargs) as stream:
+            stream.write(data)
+        with open(str(file_path), 'rb') as f:
+            return f.read()
+
+    result = check_data(file_path, data, buffer_size=32)
+    assert gzip_decompress(result) == data
+
+    result = check_data(file_path, data, buffer_size=1024)
+    assert gzip_decompress(result) == data
+
+    result = check_data(file_path, data, buffer_size=1024, compression='gzip')
+    assert gzip_decompress(result) == data
+
 
 def test_output_stream_python_file(tmpdir):
     data = b"some test data\n" * 10 + b"eof\n"
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index b1fa06fd0778f..67a91b9ddd440 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -80,7 +80,7 @@ def _check_roundtrip(self, as_table=False):
         _, batches = self.write_batches(as_table=as_table)
         file_contents = pa.BufferReader(self.get_source())
 
-        reader = pa.open_file(file_contents)
+        reader = pa.ipc.open_file(file_contents)
 
         assert reader.num_record_batches == len(batches)
 
@@ -121,7 +121,7 @@ def stream_fixture():
 def test_empty_file():
     buf = b''
     with pytest.raises(pa.ArrowInvalid):
-        pa.open_file(pa.BufferReader(buf))
+        pa.ipc.open_file(pa.BufferReader(buf))
 
 
 def test_file_simple_roundtrip(file_fixture):
@@ -142,7 +142,7 @@ def test_file_read_all(sink_factory):
     _, batches = fixture.write_batches()
     file_contents = pa.BufferReader(fixture.get_source())
 
-    reader = pa.open_file(file_contents)
+    reader = pa.ipc.open_file(file_contents)
 
     result = reader.read_all()
     expected = pa.Table.from_batches(batches)
@@ -154,8 +154,8 @@ def test_open_file_from_buffer(file_fixture):
     _, batches = file_fixture.write_batches()
     source = file_fixture.get_source()
 
-    reader1 = pa.open_file(source)
-    reader2 = pa.open_file(pa.BufferReader(source))
+    reader1 = pa.ipc.open_file(source)
+    reader2 = pa.ipc.open_file(pa.BufferReader(source))
     reader3 = pa.RecordBatchFileReader(source)
 
     result1 = reader1.read_all()
@@ -170,7 +170,7 @@ def test_file_read_pandas(file_fixture):
     frames, _ = file_fixture.write_batches()
 
     file_contents = pa.BufferReader(file_fixture.get_source())
-    reader = pa.open_file(file_contents)
+    reader = pa.ipc.open_file(file_contents)
     result = reader.read_pandas()
 
     expected = pd.concat(frames)
@@ -189,8 +189,8 @@ def test_file_pathlib(file_fixture, tmpdir):
     with open(path, 'wb') as f:
         f.write(source)
 
-    t1 = pa.open_file(pathlib.Path(path)).read_all()
-    t2 = pa.open_file(pa.OSFile(path)).read_all()
+    t1 = pa.ipc.open_file(pathlib.Path(path)).read_all()
+    t2 = pa.ipc.open_file(pa.OSFile(path)).read_all()
 
     assert t1.equals(t2)
 
@@ -198,7 +198,7 @@ def test_file_pathlib(file_fixture, tmpdir):
 def test_empty_stream():
     buf = io.BytesIO(b'')
     with pytest.raises(pa.ArrowInvalid):
-        pa.open_stream(buf)
+        pa.ipc.open_stream(buf)
 
 
 def test_stream_categorical_roundtrip(stream_fixture):
@@ -213,7 +213,7 @@ def test_stream_categorical_roundtrip(stream_fixture):
     writer.write_batch(pa.RecordBatch.from_pandas(df))
     writer.close()
 
-    table = (pa.open_stream(pa.BufferReader(stream_fixture.get_source()))
+    table = (pa.ipc.open_stream(pa.BufferReader(stream_fixture.get_source()))
              .read_all())
     assert_frame_equal(table.to_pandas(), df)
 
@@ -223,8 +223,8 @@ def test_open_stream_from_buffer(stream_fixture):
     _, batches = stream_fixture.write_batches()
     source = stream_fixture.get_source()
 
-    reader1 = pa.open_stream(source)
-    reader2 = pa.open_stream(pa.BufferReader(source))
+    reader1 = pa.ipc.open_stream(source)
+    reader2 = pa.ipc.open_stream(pa.BufferReader(source))
     reader3 = pa.RecordBatchStreamReader(source)
 
     result1 = reader1.read_all()
@@ -250,7 +250,7 @@ def test_stream_write_dispatch(stream_fixture):
     writer.write(batch)
     writer.close()
 
-    table = (pa.open_stream(pa.BufferReader(stream_fixture.get_source()))
+    table = (pa.ipc.open_stream(pa.BufferReader(stream_fixture.get_source()))
              .read_all())
     assert_frame_equal(table.to_pandas(),
                        pd.concat([df, df], ignore_index=True))
@@ -271,7 +271,7 @@ def test_stream_write_table_batches(stream_fixture):
     writer.write_table(table, chunksize=15)
     writer.close()
 
-    batches = list(pa.open_stream(stream_fixture.get_source()))
+    batches = list(pa.ipc.open_stream(stream_fixture.get_source()))
 
     assert list(map(len, batches)) == [10, 15, 5, 10]
     result_table = pa.Table.from_batches(batches)
@@ -283,7 +283,7 @@ def test_stream_write_table_batches(stream_fixture):
 def test_stream_simple_roundtrip(stream_fixture):
     _, batches = stream_fixture.write_batches()
     file_contents = pa.BufferReader(stream_fixture.get_source())
-    reader = pa.open_stream(file_contents)
+    reader = pa.ipc.open_stream(file_contents)
 
     assert reader.schema.equals(batches[0].schema)
 
@@ -301,7 +301,7 @@ def test_stream_simple_roundtrip(stream_fixture):
 def test_stream_read_all(stream_fixture):
     _, batches = stream_fixture.write_batches()
     file_contents = pa.BufferReader(stream_fixture.get_source())
-    reader = pa.open_stream(file_contents)
+    reader = pa.ipc.open_stream(file_contents)
 
     result = reader.read_all()
     expected = pa.Table.from_batches(batches)
@@ -311,7 +311,7 @@ def test_stream_read_all(stream_fixture):
 def test_stream_read_pandas(stream_fixture):
     frames, _ = stream_fixture.write_batches()
     file_contents = stream_fixture.get_source()
-    reader = pa.open_stream(file_contents)
+    reader = pa.ipc.open_stream(file_contents)
     result = reader.read_pandas()
 
     expected = pd.concat(frames)
@@ -393,7 +393,7 @@ def run(self):
         connection, client_address = self._sock.accept()
         try:
             source = connection.makefile(mode='rb')
-            reader = pa.open_stream(source)
+            reader = pa.ipc.open_stream(source)
             self._schema = reader.schema
             if self._do_read_all:
                 self._table = reader.read_all()
@@ -494,7 +494,7 @@ def test_ipc_stream_no_batches():
     writer.close()
 
     source = sink.getvalue()
-    reader = pa.open_stream(source)
+    reader = pa.ipc.open_stream(source)
     result = reader.read_all()
 
     assert result.schema.equals(table.schema)
@@ -636,6 +636,22 @@ def write_file(batch, sink):
 
 
 def read_file(source):
-    reader = pa.open_file(source)
+    reader = pa.ipc.open_file(source)
     return [reader.get_batch(i)
             for i in range(reader.num_record_batches)]
+
+
+def test_write_empty_ipc_file():
+    # ARROW-3894: IPC file was not being properly initialized when no record
+    # batches are being written
+    schema = pa.schema([('field', pa.int64())])
+
+    sink = pa.BufferOutputStream()
+    writer = pa.RecordBatchFileWriter(sink, schema)
+    writer.close()
+
+    buf = sink.getvalue()
+    reader = pa.RecordBatchFileReader(pa.BufferReader(buf))
+    table = reader.read_all()
+    assert len(table) == 0
+    assert table.schema.equals(schema)
diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py
index 1c384f35d72b0..f7c316a8bafcd 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -28,10 +28,8 @@ def test_get_include():
 
 @pytest.mark.skipif('sys.platform != "win32"')
 def test_get_library_dirs_win32():
-    library_dirs = pa.get_library_dirs()
-
-    library_lib = library_dirs[-1]
-    assert os.path.exists(os.path.join(library_lib, 'arrow.lib'))
+    assert any(os.path.exists(os.path.join(directory, 'arrow.lib'))
+               for directory in pa.get_library_dirs())
 
 
 def test_cpu_count():
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index c14056e8533b8..5156300b01b95 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from collections import OrderedDict
 import datetime
 import decimal
 import io
@@ -30,7 +31,7 @@
 import pyarrow as pa
 from pyarrow.compat import guid, u, BytesIO, unichar, PY2
 from pyarrow.tests import util
-from pyarrow.filesystem import LocalFileSystem
+from pyarrow.filesystem import LocalFileSystem, FileSystem
 from .pandas_examples import dataframe_with_arrays, dataframe_with_lists
 
 try:
@@ -843,7 +844,7 @@ def test_date_time_types():
     a2 = pa.array(data2, type=t2)
 
     t3 = pa.timestamp('us')
-    start = pd.Timestamp('2000-01-01').value / 1000
+    start = pd.Timestamp('2001-01-01').value / 1000
     data3 = np.array([start, start + 1, start + 2], dtype='int64')
     a3 = pa.array(data3, type=t3)
 
@@ -891,8 +892,9 @@ def test_date_time_types():
 
     # date64 as date32
     # time32[s] to time32[ms]
+    # 'timestamp[ms]' is saved as INT96 timestamp
     # 'timestamp[ns]' is saved as INT96 timestamp
-    expected = pa.Table.from_arrays([a1, a1, a3, a4, a5, ex_a6, a7],
+    expected = pa.Table.from_arrays([a1, a1, a7, a4, a5, ex_a6, a7],
                                     ['date32', 'date64', 'timestamp[us]',
                                      'time32[s]', 'time64[us]',
                                      'time32_from64[s]',
@@ -919,6 +921,14 @@ def _assert_unsupported(array):
     _assert_unsupported(a7)
 
 
+def test_list_of_datetime_time_roundtrip():
+    # ARROW-4135
+    times = pd.to_datetime(['09:00', '09:30', '10:00', '10:30', '11:00',
+                            '11:30', '12:00'])
+    df = pd.DataFrame({'time': [times.time]})
+    _roundtrip_pandas_dataframe(df, write_kwargs={})
+
+
 def test_large_list_records():
     # This was fixed in PARQUET-1100
 
@@ -1958,6 +1968,33 @@ def test_large_table_int32_overflow():
     _write_table(table, f)
 
 
+@pytest.mark.large_memory
+def test_binary_array_overflow_to_chunked():
+    # ARROW-3762
+
+    # 2^31 + 1 bytes
+    values = [b'x'] + [
+        b'x' * (1 << 20)
+    ] * 2 * (1 << 10)
+    df = pd.DataFrame({'byte_col': values})
+
+    tbl = pa.Table.from_pandas(df, preserve_index=False)
+
+    buf = io.BytesIO()
+    _write_table(tbl, buf)
+    buf.seek(0)
+    read_tbl = _read_table(buf)
+    buf = None
+
+    col0_data = read_tbl[0].data
+    assert isinstance(col0_data, pa.ChunkedArray)
+
+    # Split up into 16MB chunks. 128 * 16 = 2048, so 129
+    assert col0_data.num_chunks == 129
+
+    assert tbl.equals(read_tbl)
+
+
 def test_index_column_name_duplicate(tempdir):
     data = {
         'close': {
@@ -2222,8 +2259,93 @@ def test_merging_parquet_tables_with_different_pandas_metadata(tempdir):
     writer.write_table(table2)
 
 
+def test_empty_row_groups(tempdir):
+    # ARROW-3020
+    table = pa.Table.from_arrays([pa.array([], type='int32')], ['f0'])
+
+    path = tempdir / 'empty_row_groups.parquet'
+
+    num_groups = 3
+    with pq.ParquetWriter(path, table.schema) as writer:
+        for i in range(num_groups):
+            writer.write_table(table)
+
+    reader = pq.ParquetFile(path)
+    assert reader.metadata.num_row_groups == num_groups
+
+    for i in range(num_groups):
+        assert reader.read_row_group(i).equals(table)
+
+
+def test_parquet_writer_with_caller_provided_filesystem():
+    out = pa.BufferOutputStream()
+
+    class CustomFS(FileSystem):
+        def __init__(self):
+            self.path = None
+            self.mode = None
+
+        def open(self, path, mode='rb'):
+            self.path = path
+            self.mode = mode
+            return out
+
+    fs = CustomFS()
+    fname = 'expected_fname.parquet'
+    df = _test_dataframe(100)
+    table = pa.Table.from_pandas(df, preserve_index=False)
+
+    with pq.ParquetWriter(fname, table.schema, filesystem=fs, version='2.0') \
+            as writer:
+        writer.write_table(table)
+
+    assert fs.path == fname
+    assert fs.mode == 'wb'
+    assert out.closed
+
+    buf = out.getvalue()
+    table_read = _read_table(pa.BufferReader(buf))
+    df_read = table_read.to_pandas()
+    tm.assert_frame_equal(df_read, df)
+
+    # Should raise ValueError when filesystem is passed with file-like object
+    with pytest.raises(ValueError) as err_info:
+        pq.ParquetWriter(pa.BufferOutputStream(), table.schema, filesystem=fs)
+        expected_msg = ("filesystem passed but where is file-like, so"
+                        " there is nothing to open with filesystem.")
+        assert str(err_info) == expected_msg
+
+
 def test_writing_empty_lists():
     # ARROW-2591: [Python] Segmentation fault issue in pq.write_table
-    arr = pa.array([[], []], pa.list_(pa.int32()))
-    table = pa.Table.from_arrays([arr], ['test'])
+    arr1 = pa.array([[], []], pa.list_(pa.int32()))
+    table = pa.Table.from_arrays([arr1], ['list(int32)'])
     _check_roundtrip(table)
+
+
+def test_write_nested_zero_length_array_chunk_failure():
+    # Bug report in ARROW-3792
+    cols = OrderedDict(
+        int32=pa.int32(),
+        list_string=pa.list_(pa.string())
+    )
+    data = [[], [OrderedDict(int32=1, list_string=('G',)), ]]
+
+    # This produces a table with a column like
+    # <Column name='list_string' type=ListType(list<item: string>)>
+    # [
+    #   [],
+    #   [
+    #     [
+    #       "G"
+    #     ]
+    #   ]
+    # ]
+    #
+    # Each column is a ChunkedArray with 2 elements
+    my_arrays = [pa.array(batch, type=pa.struct(cols)).flatten()
+                 for batch in data]
+    my_batches = [pa.RecordBatch.from_arrays(batch, pa.schema(cols))
+                  for batch in my_arrays]
+    tbl = pa.Table.from_batches(my_batches, pa.schema(cols))
+    _check_roundtrip(tbl)
diff --git a/python/pyarrow/tests/test_plasma.py b/python/pyarrow/tests/test_plasma.py
index 69b3d9c0166fc..bcb467aab8e3e 100644
--- a/python/pyarrow/tests/test_plasma.py
+++ b/python/pyarrow/tests/test_plasma.py
@@ -25,6 +25,7 @@
 import pytest
 import random
 import signal
+import struct
 import subprocess
 import sys
 import time
@@ -36,6 +37,7 @@
 
 DEFAULT_PLASMA_STORE_MEMORY = 10 ** 8
 USE_VALGRIND = os.getenv("PLASMA_VALGRIND") == "1"
+SMALL_OBJECT_SIZE = 9000
 
 
 def random_name():
@@ -109,19 +111,15 @@ def assert_get_object_equal(unit_test, client1, client2, object_id,
 class TestPlasmaClient(object):
 
     def setup_method(self, test_method):
-        use_one_memory_mapped_file = (test_method ==
-                                      self.test_use_one_memory_mapped_file)
-
         import pyarrow.plasma as plasma
         # Start Plasma store.
         self.plasma_store_ctx = plasma.start_plasma_store(
             plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY,
-            use_valgrind=USE_VALGRIND,
-            use_one_memory_mapped_file=use_one_memory_mapped_file)
+            use_valgrind=USE_VALGRIND)
         self.plasma_store_name, self.p = self.plasma_store_ctx.__enter__()
         # Connect to Plasma.
-        self.plasma_client = plasma.connect(self.plasma_store_name, "", 64)
-        self.plasma_client2 = plasma.connect(self.plasma_store_name, "", 0)
+        self.plasma_client = plasma.connect(self.plasma_store_name)
+        self.plasma_client2 = plasma.connect(self.plasma_store_name)
 
     def teardown_method(self, test_method):
         try:
@@ -146,7 +144,7 @@ def test_connection_failure_raises_exception(self):
         import pyarrow.plasma as plasma
         # ARROW-1264
         with pytest.raises(IOError):
-            plasma.connect('unknown-store-name', '', 0, 1)
+            plasma.connect('unknown-store-name', num_retries=1)
 
     def test_create(self):
         # Create an object id string.
@@ -470,22 +468,26 @@ def assert_create_raises_plasma_full(unit_test, size):
         memory_buffers.append(memory_buffer)
         # Remaining space is 50%. Make sure that we can't create an
         # object of size 50% + 1, but we can create one of size 20%.
-        assert_create_raises_plasma_full(self, 50 * PERCENT + 1)
+        assert_create_raises_plasma_full(
+            self, 50 * PERCENT + SMALL_OBJECT_SIZE)
         _, memory_buffer, _ = create_object(self.plasma_client, 20 * PERCENT)
         del memory_buffer
         _, memory_buffer, _ = create_object(self.plasma_client, 20 * PERCENT)
         del memory_buffer
-        assert_create_raises_plasma_full(self, 50 * PERCENT + 1)
+        assert_create_raises_plasma_full(
+            self, 50 * PERCENT + SMALL_OBJECT_SIZE)
 
         _, memory_buffer, _ = create_object(self.plasma_client, 20 * PERCENT)
         memory_buffers.append(memory_buffer)
         # Remaining space is 30%.
-        assert_create_raises_plasma_full(self, 30 * PERCENT + 1)
+        assert_create_raises_plasma_full(
+            self, 30 * PERCENT + SMALL_OBJECT_SIZE)
 
         _, memory_buffer, _ = create_object(self.plasma_client, 10 * PERCENT)
         memory_buffers.append(memory_buffer)
         # Remaining space is 20%.
-        assert_create_raises_plasma_full(self, 20 * PERCENT + 1)
+        assert_create_raises_plasma_full(
+            self, 20 * PERCENT + SMALL_OBJECT_SIZE)
 
     def test_contains(self):
         fake_object_ids = [random_object_id() for _ in range(100)]
@@ -742,6 +744,34 @@ def test_subscribe(self):
                 assert data_sizes[j] == recv_dsize
                 assert metadata_sizes[j] == recv_msize
 
+    def test_subscribe_socket(self):
+        # Subscribe to notifications from the Plasma Store.
+        self.plasma_client.subscribe()
+        rsock = self.plasma_client.get_notification_socket()
+        for i in self.SUBSCRIBE_TEST_SIZES:
+            # Get notification from socket.
+            object_ids = [random_object_id() for _ in range(i)]
+            metadata_sizes = [np.random.randint(1000) for _ in range(i)]
+            data_sizes = [np.random.randint(1000) for _ in range(i)]
+
+            for j in range(i):
+                self.plasma_client.create(
+                    object_ids[j], data_sizes[j],
+                    metadata=bytearray(np.random.bytes(metadata_sizes[j])))
+                self.plasma_client.seal(object_ids[j])
+
+            # Check that we received notifications for all of the objects.
+            for j in range(i):
+                # Assume the plasma store will not be full,
+                # so we always get the data size instead of -1.
+                msg_len, = struct.unpack('L', rsock.recv(8))
+                content = rsock.recv(msg_len)
+                recv_objid, recv_dsize, recv_msize = (
+                    self.plasma_client.decode_notification(content))
+                assert object_ids[j] == recv_objid
+                assert data_sizes[j] == recv_dsize
+                assert metadata_sizes[j] == recv_msize
+
     def test_subscribe_deletions(self):
         # Subscribe to notifications from the Plasma Store. We use
         # plasma_client2 to make sure that all used objects will get evicted
@@ -809,7 +839,7 @@ def test_subscribe_deletions(self):
             assert -1 == recv_dsize
             assert -1 == recv_msize
 
-    def test_use_one_memory_mapped_file(self):
+    def test_use_full_memory(self):
         # Fill the object store up with a large number of small objects and let
         # them go out of scope.
         for _ in range(100):
@@ -822,8 +852,8 @@ def test_use_one_memory_mapped_file(self):
             create_object(self.plasma_client2, DEFAULT_PLASMA_STORE_MEMORY, 0)
         # Verify that an object that is too large does not fit.
         with pytest.raises(pa.lib.PlasmaStoreFull):
-            create_object(self.plasma_client2, DEFAULT_PLASMA_STORE_MEMORY + 1,
-                          0)
+            create_object(self.plasma_client2,
+                          DEFAULT_PLASMA_STORE_MEMORY + SMALL_OBJECT_SIZE, 0)
 
     def test_client_death_during_get(self):
         import pyarrow.plasma as plasma
@@ -831,7 +861,7 @@ def test_client_death_during_get(self):
         object_id = random_object_id()
 
         def client_blocked_in_get(plasma_store_name):
-            client = plasma.connect(self.plasma_store_name, "", 0)
+            client = plasma.connect(self.plasma_store_name)
             # Try to get an object ID that doesn't exist. This should block.
             client.get([object_id])
 
@@ -860,7 +890,7 @@ def test_client_getting_multiple_objects(self):
         object_ids = [random_object_id() for _ in range(10)]
 
         def client_get_multiple(plasma_store_name):
-            client = plasma.connect(self.plasma_store_name, "", 0)
+            client = plasma.connect(self.plasma_store_name)
             # Try to get an object ID that doesn't exist. This should block.
             client.get(object_ids)
 
@@ -919,7 +949,7 @@ def test_use_huge_pages():
             plasma_store_memory=2*10**9,
             plasma_directory="/mnt/hugepages",
             use_hugepages=True) as (plasma_store_name, p):
-        plasma_client = plasma.connect(plasma_store_name, "", 64)
+        plasma_client = plasma.connect(plasma_store_name)
         create_object(plasma_client, 10**8)
 
 
@@ -933,7 +963,7 @@ def test_plasma_client_sharing():
     with plasma.start_plasma_store(
             plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY) \
             as (plasma_store_name, p):
-        plasma_client = plasma.connect(plasma_store_name, "", 64)
+        plasma_client = plasma.connect(plasma_store_name)
         object_id = plasma_client.put(np.zeros(3))
         buf = plasma_client.get(object_id)
         del plasma_client
@@ -948,7 +978,7 @@ def test_plasma_list():
     with plasma.start_plasma_store(
             plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY) \
             as (plasma_store_name, p):
-        plasma_client = plasma.connect(plasma_store_name, "", 0)
+        plasma_client = plasma.connect(plasma_store_name)
 
         # Test sizes
         u, _, _ = create_object(plasma_client, 11, metadata_size=7, seal=False)
diff --git a/python/pyarrow/tests/test_plasma_tf_op.py b/python/pyarrow/tests/test_plasma_tf_op.py
index d9bf915d663aa..53ecae217e8e1 100644
--- a/python/pyarrow/tests/test_plasma_tf_op.py
+++ b/python/pyarrow/tests/test_plasma_tf_op.py
@@ -37,15 +37,13 @@ def ToPlasma():
         return plasma.tf_plasma_op.tensor_to_plasma(
             [data_tensor, ones_tensor],
             object_id,
-            plasma_store_socket_name=plasma_store_name,
-            plasma_manager_socket_name="")
+            plasma_store_socket_name=plasma_store_name)
 
     def FromPlasma():
         return plasma.tf_plasma_op.plasma_to_tensor(
             object_id,
             dtype=tf.as_dtype(dtype),
-            plasma_store_socket_name=plasma_store_name,
-            plasma_manager_socket_name="")
+            plasma_store_socket_name=plasma_store_name)
 
     with tf.device(FORCE_DEVICE):
         to_plasma = ToPlasma()
@@ -84,6 +82,7 @@ def FromPlasma():
 
 @pytest.mark.plasma
 @pytest.mark.tensorflow
+@pytest.mark.skip(reason='Until ARROW-4259 is resolved')
 def test_plasma_tf_op(use_gpu=False):
     import pyarrow.plasma as plasma
     import tensorflow as tf
@@ -94,7 +93,7 @@ def test_plasma_tf_op(use_gpu=False):
         pytest.skip("TensorFlow Op not found")
 
     with plasma.start_plasma_store(10**8) as (plasma_store_name, p):
-        client = plasma.connect(plasma_store_name, "", 0)
+        client = plasma.connect(plasma_store_name)
         for dtype in [np.float32, np.float64,
                       np.int8, np.int16, np.int32, np.int64]:
             run_tensorflow_test_with_dtype(tf, plasma, plasma_store_name,
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index 5385c3c8c41d9..8549d61c3456f 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -334,6 +334,20 @@ def test_schema_equals():
     assert not sch1.equals(sch3)
 
 
+def test_schema_equals_propagates_check_metadata():
+    # ARROW-4088
+    schema1 = pa.schema([
+        pa.field('foo', pa.int32()),
+        pa.field('bar', pa.string())
+    ])
+    schema2 = pa.schema([
+        pa.field('foo', pa.int32()),
+        pa.field('bar', pa.string(), metadata={'a': 'alpha'}),
+    ])
+    assert not schema1.equals(schema2)
+    assert schema1.equals(schema2, check_metadata=False)
+
+
 def test_schema_equality_operators():
     fields = [
         pa.field('foo', pa.int32()),
diff --git a/python/pyarrow/tests/test_serialization.py b/python/pyarrow/tests/test_serialization.py
index 7a521361908a6..5f76ff62d200d 100644
--- a/python/pyarrow/tests/test_serialization.py
+++ b/python/pyarrow/tests/test_serialization.py
@@ -19,7 +19,7 @@
 
 import pytest
 
-from collections import namedtuple, OrderedDict, defaultdict
+import collections
 import datetime
 import os
 import string
@@ -197,15 +197,17 @@ class CustomError(Exception):
     pass
 
 
-Point = namedtuple("Point", ["x", "y"])
-NamedTupleExample = namedtuple("Example",
-                               "field1, field2, field3, field4, field5")
+Point = collections.namedtuple("Point", ["x", "y"])
+NamedTupleExample = collections.namedtuple(
+    "Example", "field1, field2, field3, field4, field5")
 
 
 CUSTOM_OBJECTS = [Exception("Test object."), CustomError(), Point(11, y=22),
                   Foo(), Bar(), Baz(), Qux(), SubQux(), SubQuxPickle(),
                   NamedTupleExample(1, 1.0, "hi", np.zeros([3, 5]), [1, 2, 3]),
-                  OrderedDict([("hello", 1), ("world", 2)])]
+                  collections.OrderedDict([("hello", 1), ("world", 2)]),
+                  collections.deque([1, 2, 3, "a", "b", "c", 3.5]),
+                  collections.Counter([1, 1, 1, 2, 2, 3, "a", "b"])]
 
 
 def make_serialization_context():
@@ -339,7 +341,7 @@ def test_custom_serialization(large_buffer):
 def test_default_dict_serialization(large_buffer):
     pytest.importorskip("cloudpickle")
 
-    obj = defaultdict(lambda: 0, [("hello", 1), ("world", 2)])
+    obj = collections.defaultdict(lambda: 0, [("hello", 1), ("world", 2)])
     serialization_roundtrip(obj, large_buffer)
 
 
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 9c9828d8c0764..847b1a4ca550d 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -117,6 +117,8 @@ def eq(xarrs, yarrs):
             y = pa.chunked_array(yarrs)
         assert x.equals(y)
         assert y.equals(x)
+        assert x == y
+        assert x != str(y)
 
     def ne(xarrs, yarrs):
         if isinstance(xarrs, pa.ChunkedArray):
@@ -129,6 +131,7 @@ def ne(xarrs, yarrs):
             y = pa.chunked_array(yarrs)
         assert not x.equals(y)
         assert not y.equals(x)
+        assert x != y
 
     eq(pa.chunked_array([], type=pa.int32()),
        pa.chunked_array([], type=pa.int32()))
@@ -224,6 +227,9 @@ def test_column_basics():
     assert len(column) == 5
     assert column.shape == (5,)
     assert column.to_pylist() == [-10, -5, 0, 5, 10]
+    assert column == pa.Column.from_array("a", column.data)
+    assert column != pa.Column.from_array("b", column.data)
+    assert column != column.data
 
 
 def test_column_factory_function():
@@ -577,6 +583,24 @@ def test_table_basics():
             col.data.chunk(col.data.num_chunks)
 
     assert table.columns == columns
+    assert table == pa.Table.from_arrays(columns)
+    assert table != pa.Table.from_arrays(columns[1:])
+    assert table != columns
+
+
+def test_table_from_arrays_preserves_column_metadata():
+    # Added to test https://issues.apache.org/jira/browse/ARROW-3866
+    arr0 = pa.array([1, 2])
+    arr1 = pa.array([3, 4])
+    field0 = pa.field('field1', pa.int64(), metadata=dict(a="A", b="B"))
+    field1 = pa.field('field2', pa.int64(), nullable=False)
+    columns = [
+        pa.column(field0, arr0),
+        pa.column(field1, arr1)
+    ]
+    table = pa.Table.from_arrays(columns)
+    assert b"a" in table.column(0).field.metadata
+    assert table.column(1).field.nullable is False
 
 
 def test_table_from_arrays_invalid_names():
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index 176ce8769f488..729c76e1471f5 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -19,11 +19,14 @@
 
 import pickle
 import pytest
+import hypothesis as h
+import hypothesis.strategies as st
 
 import pandas as pd
 import numpy as np
 import pyarrow as pa
 import pyarrow.types as types
+import pyarrow.tests.strategies as past
 
 
 def get_many_types():
@@ -228,9 +231,12 @@ def test_list_type():
 
 
 def test_struct_type():
-    fields = [pa.field('a', pa.int64()),
-              pa.field('a', pa.int32()),
-              pa.field('b', pa.int32())]
+    fields = [
+        # Duplicate field name on purpose
+        pa.field('a', pa.int64()),
+        pa.field('a', pa.int32()),
+        pa.field('b', pa.int32())
+    ]
     ty = pa.struct(fields)
 
     assert len(ty) == ty.num_children == 3
@@ -240,11 +246,17 @@ def test_struct_type():
     with pytest.raises(IndexError):
         assert ty[3]
 
-    assert ty['a'] == ty[1]
     assert ty['b'] == ty[2]
+
+    # Duplicate
+    with pytest.raises(KeyError):
+        ty['a']
+
+    # Not found
     with pytest.raises(KeyError):
         ty['c']
 
+    # Neither integer nor string
     with pytest.raises(TypeError):
         ty[None]
 
@@ -300,8 +312,8 @@ def test_dictionary_type():
     assert ty0.dictionary.to_pylist() == ['a', 'b', 'c']
     assert ty0.ordered is False
 
-    ty1 = pa.dictionary(pa.float32(), pa.array([1.0, 2.0]), ordered=True)
-    assert ty1.index_type == pa.float32()
+    ty1 = pa.dictionary(pa.int8(), pa.array([1.0, 2.0]), ordered=True)
+    assert ty1.index_type == pa.int8()
     assert isinstance(ty0.dictionary, pa.Array)
     assert ty1.dictionary.to_pylist() == [1.0, 2.0]
     assert ty1.ordered is True
@@ -466,15 +478,27 @@ def test_field_metadata():
 
 
 def test_field_add_remove_metadata():
+    import collections
+
     f0 = pa.field('foo', pa.int32())
 
     assert f0.metadata is None
 
     metadata = {b'foo': b'bar', b'pandas': b'badger'}
+    metadata2 = collections.OrderedDict([
+        (b'a', b'alpha'),
+        (b'b', b'beta')
+    ])
 
     f1 = f0.add_metadata(metadata)
     assert f1.metadata == metadata
 
+    f2 = f0.add_metadata(metadata2)
+    assert f2.metadata == metadata2
+
+    with pytest.raises(TypeError):
+        f0.add_metadata([1, 2, 3])
+
     f3 = f1.remove_metadata()
     assert f3.metadata is None
 
@@ -533,3 +557,38 @@ def test_schema_from_pandas(data):
     schema = pa.Schema.from_pandas(df)
     expected = pa.Table.from_pandas(df).schema
     assert schema == expected
+
+
+@h.given(
+    past.all_types |
+    past.all_fields |
+    past.all_schemas
+)
+@h.example(
+    pa.field(name='', type=pa.null(), metadata={'0': '', '': ''})
+)
+def test_pickling(field):
+    data = pickle.dumps(field)
+    assert pickle.loads(data) == field
+
+
+@h.given(
+    st.lists(past.all_types) |
+    st.lists(past.all_fields) |
+    st.lists(past.all_schemas)
+)
+def test_hashing(items):
+    h.assume(
+        # well, this is still O(n^2), but makes the input unique
+        all(not a.equals(b) for i, a in enumerate(items) for b in items[:i])
+    )
+
+    container = {}
+    for i, item in enumerate(items):
+        assert hash(item) == hash(item)
+        container[item] = i
+
+    assert len(container) == len(items)
+
+    for i, item in enumerate(items):
+        assert container[item] == i
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index d5d99e4044e23..7c6aec34282fe 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -88,7 +88,9 @@ ctypedef CFixedWidthType* _CFixedWidthTypePtr
 
 cdef class DataType:
     """
-    Base type for Apache Arrow data type instances. Wraps C++ arrow::DataType
+    Base class of all Arrow data types.
+
+    Each data type is an *instance* of this class.
     """
     def __cinit__(self):
         pass
@@ -162,7 +164,7 @@ cdef class DataType:
 
     def to_pandas_dtype(self):
         """
-        Return the NumPy dtype that would be used for storing this
+        Return the equivalent NumPy / Pandas dtype.
         """
         cdef Type type_id = self.type.id()
         if type_id in _pandas_type_map:
@@ -172,6 +174,9 @@ cdef class DataType:
 
 
 cdef class DictionaryType(DataType):
+    """
+    Concrete class for dictionary data types.
+    """
 
     cdef void init(self, const shared_ptr[CDataType]& type):
         DataType.init(self, type)
@@ -182,18 +187,31 @@ cdef class DictionaryType(DataType):
 
     @property
     def ordered(self):
+        """
+        Whether the dictionary is ordered, i.e. whether the ordering of values
+        in the dictionary is important.
+        """
         return self.dict_type.ordered()
 
     @property
     def index_type(self):
+        """
+        The data type of dictionary indices (a signed integer type).
+        """
         return pyarrow_wrap_data_type(self.dict_type.index_type())
 
     @property
     def dictionary(self):
+        """
+        The dictionary array, mapping dictionary indices to values.
+        """
         return pyarrow_wrap_array(self.dict_type.dictionary())
 
 
 cdef class ListType(DataType):
+    """
+    Concrete class for list data types.
+    """
 
     cdef void init(self, const shared_ptr[CDataType]& type):
         DataType.init(self, type)
@@ -204,37 +222,58 @@ cdef class ListType(DataType):
 
     @property
     def value_type(self):
+        """
+        The data type of list values.
+        """
         return pyarrow_wrap_data_type(self.list_type.value_type())
 
 
 cdef class StructType(DataType):
+    """
+    Concrete class for struct data types.
+    """
 
     cdef void init(self, const shared_ptr[CDataType]& type):
         DataType.init(self, type)
         self.struct_type = <const CStructType*> type.get()
 
-    cdef Field child_by_name(self, name):
+    cdef Field field(self, int i):
+        """
+        Return a child field by its index.
+        """
+        return self.child(i)
+
+    cdef Field field_by_name(self, name):
         """
-        Access a child field by its name rather than the column index.
+        Return a child field by its name rather than its index.
         """
         cdef shared_ptr[CField] field
 
-        field = self.struct_type.GetChildByName(tobytes(name))
+        field = self.struct_type.GetFieldByName(tobytes(name))
         if field == nullptr:
             raise KeyError(name)
 
         return pyarrow_wrap_field(field)
 
     def __len__(self):
+        """
+        Like num_children().
+        """
         return self.type.num_children()
 
     def __iter__(self):
+        """
+        Iterate over struct fields, in order.
+        """
         for i in range(len(self)):
             yield self[i]
 
     def __getitem__(self, i):
+        """
+        Return the struct field with the given index or name.
+        """
         if isinstance(i, six.string_types):
-            return self.child_by_name(i)
+            return self.field_by_name(i)
         elif isinstance(i, six.integer_types):
             return self.child(i)
         else:
@@ -245,20 +284,32 @@ cdef class StructType(DataType):
 
     @property
     def num_children(self):
+        """
+        The number of struct fields.
+        """
         return self.type.num_children()
 
 
 cdef class UnionType(DataType):
+    """
+    Concrete class for struct data types.
+    """
 
     cdef void init(self, const shared_ptr[CDataType]& type):
         DataType.init(self, type)
 
     @property
     def num_children(self):
+        """
+        The number of union members.
+        """
         return self.type.num_children()
 
     @property
     def mode(self):
+        """
+        The mode of the union ("dense" or "sparse").
+        """
         cdef CUnionType* type = <CUnionType*> self.sp_type.get()
         cdef int mode = type.mode()
         if mode == _UnionMode_DENSE:
@@ -268,13 +319,22 @@ cdef class UnionType(DataType):
         assert 0
 
     def __len__(self):
+        """
+        Like num_children()
+        """
         return self.type.num_children()
 
     def __iter__(self):
+        """
+        Iterate over union members, in order.
+        """
         for i in range(len(self)):
             yield self[i]
 
     def __getitem__(self, i):
+        """
+        Return a child member by its index.
+        """
         return self.child(i)
 
     def __reduce__(self):
@@ -282,6 +342,9 @@ cdef class UnionType(DataType):
 
 
 cdef class TimestampType(DataType):
+    """
+    Concrete class for timestamp data types.
+    """
 
     cdef void init(self, const shared_ptr[CDataType]& type):
         DataType.init(self, type)
@@ -289,10 +352,16 @@ cdef class TimestampType(DataType):
 
     @property
     def unit(self):
+        """
+        The timestamp unit ('s', 'ms', 'us' or 'ns').
+        """
         return timeunit_to_string(self.ts_type.unit())
 
     @property
     def tz(self):
+        """
+        The timestamp time zone, if any, or None.
+        """
         if self.ts_type.timezone().size() > 0:
             return frombytes(self.ts_type.timezone())
         else:
@@ -300,7 +369,7 @@ cdef class TimestampType(DataType):
 
     def to_pandas_dtype(self):
         """
-        Return the NumPy dtype that would be used for storing this
+        Return the equivalent NumPy / Pandas dtype.
         """
         if self.tz is None:
             return _pandas_type_map[_Type_TIMESTAMP]
@@ -313,6 +382,9 @@ cdef class TimestampType(DataType):
 
 
 cdef class Time32Type(DataType):
+    """
+    Concrete class for time32 data types.
+    """
 
     cdef void init(self, const shared_ptr[CDataType]& type):
         DataType.init(self, type)
@@ -320,10 +392,16 @@ cdef class Time32Type(DataType):
 
     @property
     def unit(self):
+        """
+        The time unit ('s', 'ms', 'us' or 'ns').
+        """
         return timeunit_to_string(self.time_type.unit())
 
 
 cdef class Time64Type(DataType):
+    """
+    Concrete class for time64 data types.
+    """
 
     cdef void init(self, const shared_ptr[CDataType]& type):
         DataType.init(self, type)
@@ -331,10 +409,16 @@ cdef class Time64Type(DataType):
 
     @property
     def unit(self):
+        """
+        The time unit ('s', 'ms', 'us' or 'ns').
+        """
         return timeunit_to_string(self.time_type.unit())
 
 
 cdef class FixedSizeBinaryType(DataType):
+    """
+    Concrete class for fixed-size binary data types.
+    """
 
     cdef void init(self, const shared_ptr[CDataType]& type):
         DataType.init(self, type)
@@ -346,10 +430,16 @@ cdef class FixedSizeBinaryType(DataType):
 
     @property
     def byte_width(self):
+        """
+        The binary size in bytes.
+        """
         return self.fixed_size_binary_type.byte_width()
 
 
 cdef class Decimal128Type(FixedSizeBinaryType):
+    """
+    Concrete class for decimal128 data types.
+    """
 
     cdef void init(self, const shared_ptr[CDataType]& type):
         FixedSizeBinaryType.init(self, type)
@@ -360,17 +450,22 @@ cdef class Decimal128Type(FixedSizeBinaryType):
 
     @property
     def precision(self):
+        """
+        The decimal precision, in number of decimal digits (an integer).
+        """
         return self.decimal128_type.precision()
 
     @property
     def scale(self):
+        """
+        The decimal scale (an integer).
+        """
         return self.decimal128_type.scale()
 
 
 cdef class Field:
     """
-    Represents a named field, with a data type, nullability, and optional
-    metadata
+    A named field, with a data type, nullability, and optional metadata.
 
     Notes
     -----
@@ -430,11 +525,9 @@ cdef class Field:
 
     @property
     def metadata(self):
-        cdef shared_ptr[const CKeyValueMetadata] metadata = (
-            self.field.metadata())
-        return box_metadata(metadata.get())
+        return pyarrow_wrap_metadata(self.field.metadata())
 
-    def add_metadata(self, dict metadata):
+    def add_metadata(self, metadata):
         """
         Add metadata as dict of string keys and values to Field
 
@@ -447,14 +540,18 @@ cdef class Field:
         -------
         field : pyarrow.Field
         """
-        cdef shared_ptr[CKeyValueMetadata] c_meta
-        convert_metadata(metadata, &c_meta)
+        cdef:
+            shared_ptr[CField] c_field
+            shared_ptr[CKeyValueMetadata] c_meta
 
-        cdef shared_ptr[CField] new_field
+        if not isinstance(metadata, dict):
+            raise TypeError('Metadata must be an instance of dict')
+
+        c_meta = pyarrow_unwrap_metadata(metadata)
         with nogil:
-            new_field = self.field.AddMetadata(c_meta)
+            c_field = self.field.AddMetadata(c_meta)
 
-        return pyarrow_wrap_field(new_field)
+        return pyarrow_wrap_field(c_field)
 
     def remove_metadata(self):
         """
@@ -515,6 +612,9 @@ cdef class Schema:
     def __reduce__(self):
         return schema, (list(self), self.metadata)
 
+    def __hash__(self):
+        return hash((tuple(self), self.metadata))
+
     @property
     def names(self):
         """
@@ -544,9 +644,7 @@ cdef class Schema:
 
     @property
     def metadata(self):
-        cdef shared_ptr[const CKeyValueMetadata] metadata = (
-            self.schema.metadata())
-        return box_metadata(metadata.get())
+        return pyarrow_wrap_metadata(self.schema.metadata())
 
     def __eq__(self, other):
         try:
@@ -728,7 +826,7 @@ cdef class Schema:
 
         return pyarrow_wrap_schema(new_schema)
 
-    def add_metadata(self, dict metadata):
+    def add_metadata(self, metadata):
         """
         Add metadata as dict of string keys and values to Schema
 
@@ -741,14 +839,18 @@ cdef class Schema:
         -------
         schema : pyarrow.Schema
         """
-        cdef shared_ptr[CKeyValueMetadata] c_meta
-        convert_metadata(metadata, &c_meta)
+        cdef:
+            shared_ptr[CKeyValueMetadata] c_meta
+            shared_ptr[CSchema] c_schema
 
-        cdef shared_ptr[CSchema] new_schema
+        if not isinstance(metadata, dict):
+            raise TypeError('Metadata must be an instance of dict')
+
+        c_meta = pyarrow_unwrap_metadata(metadata)
         with nogil:
-            new_schema = self.schema.AddMetadata(c_meta)
+            c_schema = self.schema.AddMetadata(c_meta)
 
-        return pyarrow_wrap_schema(new_schema)
+        return pyarrow_wrap_schema(c_schema)
 
     def serialize(self, memory_pool=None):
         """
@@ -810,15 +912,6 @@ cdef class Schema:
         return self.__str__()
 
 
-cdef dict box_metadata(const CKeyValueMetadata* metadata):
-    cdef unordered_map[c_string, c_string] result
-    if metadata != nullptr:
-        metadata.ToUnorderedMap(&result)
-        return result
-    else:
-        return None
-
-
 cdef dict _type_cache = {}
 
 
@@ -832,25 +925,12 @@ cdef DataType primitive_type(Type type):
     _type_cache[type] = out
     return out
 
+
 # -----------------------------------------------------------
 # Type factory functions
 
-cdef int convert_metadata(dict metadata,
-                          shared_ptr[CKeyValueMetadata]* out) except -1:
-    cdef:
-        shared_ptr[CKeyValueMetadata] meta = (
-            make_shared[CKeyValueMetadata]())
-        c_string key, value
-
-    for py_key, py_value in metadata.items():
-        key = tobytes(py_key)
-        value = tobytes(py_value)
-        meta.get().Append(key, value)
-    out[0] = meta
-    return 0
-
 
-def field(name, type, bint nullable=True, dict metadata=None):
+def field(name, type, bint nullable=True, metadata=None):
     """
     Create a pyarrow.Field instance
 
@@ -867,17 +947,21 @@ def field(name, type, bint nullable=True, dict metadata=None):
     field : pyarrow.Field
     """
     cdef:
-        shared_ptr[CKeyValueMetadata] c_meta
         Field result = Field.__new__(Field)
-        DataType _type = _as_type(type)
+        DataType _type = ensure_type(type, allow_none=False)
+        shared_ptr[CKeyValueMetadata] c_meta
 
     if metadata is not None:
-        convert_metadata(metadata, &c_meta)
+        if not isinstance(metadata, dict):
+            raise TypeError('Metadata must be an instance of dict')
+        c_meta = pyarrow_unwrap_metadata(metadata)
 
-    result.sp_field.reset(new CField(tobytes(name), _type.sp_type,
-                                     nullable == 1, c_meta))
+    result.sp_field.reset(
+        new CField(tobytes(name), _type.sp_type, nullable, c_meta)
+    )
     result.field = result.sp_field.get()
     result.type = _type
+
     return result
 
 
@@ -1013,7 +1097,9 @@ def tzinfo_to_string(tz):
             raise ValueError('Offset must represent whole number of minutes')
         return '{}{:02d}:{:02d}'.format(sign, hours, minutes)
 
-    if isinstance(tz, pytz.tzinfo.BaseTzInfo):
+    if tz is pytz.utc:
+        return tz.zone  # ARROW-4055
+    elif isinstance(tz, pytz.tzinfo.BaseTzInfo):
         return tz.zone
     elif isinstance(tz, pytz._FixedOffset):
         return fixed_offset_to_string(tz)
@@ -1246,6 +1332,13 @@ def string():
     return primitive_type(_Type_STRING)
 
 
+def utf8():
+    """
+    Alias for string()
+    """
+    return string()
+
+
 def binary(int length=-1):
     """
     Create variable-length binary type
@@ -1479,23 +1572,18 @@ def type_for_alias(name):
     return alias()
 
 
-def _as_type(typ):
-    if isinstance(typ, DataType):
-        return typ
-    elif isinstance(typ, six.string_types):
-        return type_for_alias(typ)
-    else:
-        raise TypeError("data type expected, got '%r'" % (type(typ),))
-
-
-cdef DataType ensure_type(object type, c_bool allow_none=False):
-    if allow_none and type is None:
+cdef DataType ensure_type(object ty, c_bool allow_none=False):
+    if allow_none and ty is None:
         return None
+    elif isinstance(ty, DataType):
+        return ty
+    elif isinstance(ty, six.string_types):
+        return type_for_alias(ty)
     else:
-        return _as_type(type)
+        raise TypeError('DataType expected, got {!r}'.format(type(ty)))
 
 
-def schema(fields, dict metadata=None):
+def schema(fields, metadata=None):
     """
     Construct pyarrow.Schema from collection of fields
 
@@ -1540,11 +1628,14 @@ def schema(fields, dict metadata=None):
         c_fields.push_back(py_field.sp_field)
 
     if metadata is not None:
-        convert_metadata(metadata, &c_meta)
+        if not isinstance(metadata, dict):
+            raise TypeError('Metadata must be an instance of dict')
+        c_meta = pyarrow_unwrap_metadata(metadata)
 
     c_schema.reset(new CSchema(c_fields, c_meta))
     result = Schema.__new__(Schema)
     result.init_schema(c_schema)
+
     return result
 
 
diff --git a/python/pyarrow/types.py b/python/pyarrow/types.py
index d07dccaedfb97..2bd70276e7ea1 100644
--- a/python/pyarrow/types.py
+++ b/python/pyarrow/types.py
@@ -19,8 +19,7 @@
 
 from pyarrow.lib import (is_boolean_value,  # noqa
                          is_integer_value,
-                         is_float_value,
-                         _as_type)
+                         is_float_value)
 
 import pyarrow.lib as lib
 
diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py
index 1c26ee5e22f73..6c17f5c590a0a 100644
--- a/python/pyarrow/util.py
+++ b/python/pyarrow/util.py
@@ -17,9 +17,17 @@
 
 # Miscellaneous utility code
 
+import functools
 import six
 import warnings
 
+
+try:
+    from textwrap import indent
+except ImportError:
+    def indent(text, prefix):
+        return ''.join(prefix + line for line in text.splitlines(True))
+
 try:
     # pathlib might not be available
     try:
@@ -74,11 +82,44 @@ def _stringify_path(path):
     raise TypeError("not a path-like object")
 
 
-def _deprecate_nthreads(use_threads, nthreads):
-    if nthreads is not None:
-        warnings.warn("`nthreads` argument is deprecated, "
-                      "pass `use_threads` instead", FutureWarning,
-                      stacklevel=3)
-        if nthreads > 1:
-            use_threads = True
-    return use_threads
+def product(seq):
+    """
+    Return a product of sequence items.
+    """
+    return functools.reduce(lambda a, b: a*b, seq, 1)
+
+
+def get_contiguous_span(shape, strides, itemsize):
+    """
+    Return a contiguous span of N-D array data.
+
+    Parameters
+    ----------
+    shape : tuple
+    strides : tuple
+    itemsize : int
+      Specify array shape data
+
+    Returns
+    -------
+    start, end : int
+      The span end points.
+    """
+    if not strides:
+        start = 0
+        end = itemsize * product(shape)
+    else:
+        start = 0
+        end = itemsize
+        for i, dim in enumerate(shape):
+            if dim == 0:
+                start = end = 0
+                break
+            stride = strides[i]
+            if stride > 0:
+                end += stride * (dim - 1)
+            elif stride < 0:
+                start += stride * (dim - 1)
+        if end - start != itemsize * product(shape):
+            raise ValueError('array data is non-contiguous')
+    return start, end
diff --git a/python/requirements-test.txt b/python/requirements-test.txt
new file mode 100644
index 0000000000000..89af5ecac437c
--- /dev/null
+++ b/python/requirements-test.txt
@@ -0,0 +1,4 @@
+pandas
+pytest
+hypothesis
+pathlib2; python_version < "3.4"
diff --git a/python/requirements-wheel.txt b/python/requirements-wheel.txt
new file mode 100644
index 0000000000000..c44903efd36cb
--- /dev/null
+++ b/python/requirements-wheel.txt
@@ -0,0 +1,4 @@
+wheel==0.31.1
+six>=1.0.0
+numpy==1.14.5
+futures; python_version < "3.2"
diff --git a/python/requirements.txt b/python/requirements.txt
index ddedd757da224..3a23d1dacf81e 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -1,6 +1,3 @@
-six
-pytest
-cloudpickle>=0.4.0
-numpy>=1.14.0
-futures; python_version < "3"
-pathlib2; python_version < "3.4"
+six>=1.0.0
+numpy>=1.14
+futures; python_version < "3.2"
diff --git a/python/scripts/test_leak.py b/python/scripts/test_leak.py
index e3de56b28a168..d3984a89ef754 100644
--- a/python/scripts/test_leak.py
+++ b/python/scripts/test_leak.py
@@ -19,29 +19,49 @@
 
 import pyarrow as pa
 import numpy as np
+import pandas as pd
+import pandas.util.testing as tm
 import memory_profiler
 import gc
 import io
 
+MEGABYTE = 1 << 20
 
-def leak():
+
+def assert_does_not_leak(f, iterations=10, check_interval=1, tolerance=5):
+    gc.collect()
+    baseline = memory_profiler.memory_usage()[0]
+    for i in range(iterations):
+        f()
+        if i % check_interval == 0:
+            gc.collect()
+            usage = memory_profiler.memory_usage()[0]
+            diff = usage - baseline
+            print("{0}: {1}\r".format(i, diff), end="")
+            if diff > tolerance:
+                raise Exception("Memory increased by {0} megabytes after {1} "
+                                "iterations".format(diff, i + 1))
+    gc.collect()
+    usage = memory_profiler.memory_usage()[0]
+    diff = usage - baseline
+    print("\nMemory increased by {0} megabytes after {1} "
+          "iterations".format(diff, iterations))
+
+
+def test_leak1():
     data = [pa.array(np.concatenate([np.random.randn(100000)] * 1000))]
     table = pa.Table.from_arrays(data, ['foo'])
-    while True:
-        print('calling to_pandas')
-        print('memory_usage: {0}'.format(memory_profiler.memory_usage()))
-        table.to_pandas()
-        gc.collect()
 
-# leak()
+    def func():
+        table.to_pandas()
+    assert_does_not_leak(func)
 
 
-def leak2():
+def test_leak2():
     data = [pa.array(np.concatenate([np.random.randn(100000)] * 10))]
     table = pa.Table.from_arrays(data, ['foo'])
-    while True:
-        print('calling to_pandas')
-        print('memory_usage: {0}'.format(memory_profiler.memory_usage()))
+
+    def func():
         df = table.to_pandas()
 
         batch = pa.RecordBatch.from_pandas(df)
@@ -55,7 +75,27 @@ def leak2():
         reader = pa.open_file(buf_reader)
         reader.read_all()
 
-        gc.collect()
+    assert_does_not_leak(func, iterations=50, tolerance=50)
+
+
+def test_leak3():
+    import pyarrow.parquet as pq
+
+    df = pd.DataFrame({'a{0}'.format(i): [1, 2, 3, 4]
+                       for i in range(50)})
+    table = pa.Table.from_pandas(df, preserve_index=False)
+
+    writer = pq.ParquetWriter('leak_test_' + tm.rands(5) + '.parquet',
+                              table.schema)
+
+    def func():
+        writer.write_table(table, row_group_size=len(table))
+
+    # This does not "leak" per se but we do want to have this use as little
+    # memory as possible
+    assert_does_not_leak(func, iterations=500,
+                         check_interval=50, tolerance=20)
 
 
-leak2()
+if __name__ == '__main__':
+    test_leak3()
diff --git a/python/setup.py b/python/setup.py
index e6a88712c0e09..67673000ae35d 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -295,6 +295,8 @@ def _run_cmake(self):
                     move_shared_libs(build_prefix, build_lib, "arrow_gpu")
                 if self.with_plasma:
                     move_shared_libs(build_prefix, build_lib, "plasma")
+                if self.with_gandiva:
+                    move_shared_libs(build_prefix, build_lib, "gandiva")
                 if self.with_parquet and not self.with_static_parquet:
                     move_shared_libs(build_prefix, build_lib, "parquet")
                 if not self.with_static_boost and self.bundle_boost:
@@ -483,39 +485,20 @@ def _move_shared_libs_unix(build_prefix, build_lib, lib_name):
 
 # If the event of not running from a git clone (e.g. from a git archive
 # or a Python sdist), see if we can set the version number ourselves
+default_version = '0.12.0-SNAPSHOT'
 if (not os.path.exists('../.git')
         and not os.environ.get('SETUPTOOLS_SCM_PRETEND_VERSION')):
     if os.path.exists('PKG-INFO'):
         # We're probably in a Python sdist, setuptools_scm will handle fine
         pass
-    elif os.path.exists('../java/pom.xml'):
-        # We're probably in a git archive
-        import xml.etree.ElementTree as ET
-        tree = ET.parse('../java/pom.xml')
-        version_tag = list(tree.getroot().findall(
-            '{http://maven.apache.org/POM/4.0.0}version'))[0]
-        use_setuptools_scm = False
-        os.environ['SETUPTOOLS_SCM_PRETEND_VERSION'] = \
-            version_tag.text.replace("-SNAPSHOT", "a0")
     else:
-        raise RuntimeError("""\
-            No reliable source available to get Arrow version.
-
-            This is either because you copied the python/ directory yourself
-            outside of a git clone or source archive, or because you ran
-            `pip install` on the python/ directory.
-
-            * Recommended workaround: first run `python sdist`, then
-              `pip install` the resulting source distribution.
+        os.environ['SETUPTOOLS_SCM_PRETEND_VERSION'] = \
+            default_version.replace('-SNAPSHOT', 'a0')
 
-            * If you're looking for an editable (in-place) install,
-              `python setup.py develop` should work fine in place of
-              `pip install -e .`.
 
-            * If you really want to `pip install` the python/ directory,
-              set the SETUPTOOLS_SCM_PRETEND_VERSION environment variable
-              to force the Arrow version to the given value.
-            """)
+# See https://github.com/pypa/setuptools_scm#configuration-parameters
+scm_version_write_to_prefix = os.environ.get(
+    'SETUPTOOLS_SCM_VERSION_WRITE_TO_PREFIX', setup_dir)
 
 
 def parse_git(root, **kwargs):
@@ -524,8 +507,8 @@ def parse_git(root, **kwargs):
     subprojects, e.g. apache-arrow-js-XXX tags.
     """
     from setuptools_scm.git import parse
-    kwargs['describe_command'] = \
-        "git describe --dirty --tags --long --match 'apache-arrow-[0-9].*'"
+    kwargs['describe_command'] =\
+        'git describe --dirty --tags --long --match "apache-arrow-[0-9].*"'
     return parse(root, **kwargs)
 
 
@@ -553,7 +536,7 @@ def has_ext_modules(foo):
 
 
 setup(
-    name="pyarrow",
+    name='pyarrow',
     packages=['pyarrow', 'pyarrow.tests'],
     zip_safe=False,
     package_data={'pyarrow': ['*.pxd', '*.pyx', 'includes/*.pxd']},
@@ -570,17 +553,19 @@ def has_ext_modules(foo):
             'plasma_store = pyarrow:_plasma_store_entry_point'
         ]
     },
-    use_scm_version={"root": os.path.dirname(setup_dir),
-                     "parse": parse_git,
-                     "write_to": os.path.join(setup_dir,
-                                              "pyarrow/_generated_version.py"),
-                     },
+    use_scm_version={
+        'root': os.path.dirname(setup_dir),
+        'parse': parse_git,
+        'write_to': os.path.join(scm_version_write_to_prefix,
+                                 'pyarrow/_generated_version.py')
+    },
     setup_requires=['setuptools_scm', 'cython >= 0.27'] + setup_requires,
     install_requires=install_requires,
-    tests_require=['pytest', 'pandas', 'pathlib2; python_version < "3.4"'],
-    description="Python library for Apache Arrow",
+    tests_require=['pytest', 'pandas', 'hypothesis',
+                   'pathlib2; python_version < "3.4"'],
+    description='Python library for Apache Arrow',
     long_description=long_description,
-    long_description_content_type="text/markdown",
+    long_description_content_type='text/markdown',
     classifiers=[
         'License :: OSI Approved :: Apache Software License',
         'Programming Language :: Python :: 2.7',
@@ -589,8 +574,8 @@ def has_ext_modules(foo):
         'Programming Language :: Python :: 3.7',
     ],
     license='Apache License, Version 2.0',
-    maintainer="Apache Arrow Developers",
-    maintainer_email="dev@arrow.apache.org",
-    test_suite="pyarrow.tests",
-    url="https://arrow.apache.org/"
+    maintainer='Apache Arrow Developers',
+    maintainer_email='dev@arrow.apache.org',
+    test_suite='pyarrow.tests',
+    url='https://arrow.apache.org/'
 )
diff --git a/python/testing/README.md b/python/testing/README.md
deleted file mode 100644
index d7d0ff0bb7f47..0000000000000
--- a/python/testing/README.md
+++ /dev/null
@@ -1,42 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Testing tools for odds and ends
-
-## Testing Dask integration
-
-Initial integration testing with Dask has been Dockerized.
-To invoke the test run the following command in the `arrow`
-root-directory:
-
-```shell
-bash dev/dask_integration.sh
-```
-
-This script will create a `dask` directory on the same level as
-`arrow`. It will clone the Dask project from Github into `dask`
-and do a Python `--user` install. The Docker code will use the parent
-directory of `arrow` as `$HOME` and that's where Python will
-install `dask` into a `.local` directory.
-
-The output of the Docker session will contain the results of tests
-of the Dask dataframe followed by the single integration test that
-now exists for Arrow. That test creates a set of `csv`-files and then
-does parallel reading of `csv`-files into a Dask dataframe. The code
-for this test resides here in the `dask_test` directory.
diff --git a/python/testing/dask_tests/test_dask_integration.py b/python/testing/dask_tests/test_dask_integration.py
deleted file mode 100644
index 842c45f57d1f7..0000000000000
--- a/python/testing/dask_tests/test_dask_integration.py
+++ /dev/null
@@ -1,58 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import pytest
-
-from datetime import date, timedelta
-import csv
-from random import randint
-
-import pyarrow as pa
-
-dd = pytest.importorskip('dask.dataframe')
-
-
-def make_datafiles(tmpdir, prefix='data', num_files=20):
-    rowcount = 5000
-    fieldnames = ['date', 'temperature', 'dewpoint']
-    start_date = date(1900, 1, 1)
-    for i in range(num_files):
-        filename = '{0}/{1}-{2}.csv'.format(tmpdir, prefix, i)
-        with open(filename, 'w') as outcsv:
-            writer = csv.DictWriter(outcsv, fieldnames)
-            writer.writeheader()
-            the_date = start_date
-            for _ in range(rowcount):
-                temperature = randint(-10, 35)
-                dewpoint = temperature - randint(0, 10)
-                writer.writerow({'date': the_date, 'temperature': temperature,
-                                 'dewpoint': dewpoint})
-                the_date += timedelta(days=1)
-
-
-def test_dask_file_read(tmpdir):
-    prefix = 'data'
-    make_datafiles(tmpdir, prefix)
-    # Read all datafiles in parallel
-    datafiles = '{0}/{1}-*.csv'.format(tmpdir, prefix)
-    dask_df = dd.read_csv(datafiles)
-    # Convert Dask dataframe to Arrow table
-    table = pa.Table.from_pandas(dask_df.compute())
-    # Second column (1) is temperature
-    dask_temp = int(1000 * dask_df['temperature'].mean().compute())
-    arrow_temp = int(1000 * table[1].to_pandas().mean())
-    assert dask_temp == arrow_temp
diff --git a/python/testing/functions.sh b/python/testing/functions.sh
deleted file mode 100644
index 983f490331ff8..0000000000000
--- a/python/testing/functions.sh
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-use_gcc() {
-  export CC=gcc-4.9
-  export CXX=g++-4.9
-}
-
-use_clang() {
-  export CC=clang-4.0
-  export CXX=clang++-4.0
-}
-
-bootstrap_python_env() {
-  PYTHON_VERSION=$1
-  CONDA_ENV_DIR=$BUILD_DIR/pyarrow-test-$PYTHON_VERSION
-
-  conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION cmake curl
-  conda activate $CONDA_ENV_DIR
-
-  python --version
-  which python
-
-  # faster builds, please
-  conda install -y -q nomkl pip numpy pandas cython
-}
-
-build_pyarrow() {
-  # Other stuff pip install
-  pushd $ARROW_PYTHON_DIR
-  pip install -r requirements.txt
-  python setup.py build_ext --with-parquet --with-plasma \
-         install --single-version-externally-managed --record=record.text
-  popd
-
-  python -c "import pyarrow.parquet"
-  python -c "import pyarrow.plasma"
-
-  export PYARROW_PATH=$CONDA_PREFIX/lib/python$PYTHON_VERSION/site-packages/pyarrow
-}
-
-build_arrow() {
-  mkdir -p $ARROW_CPP_BUILD_DIR
-  pushd $ARROW_CPP_BUILD_DIR
-
-  cmake -GNinja \
-        -DCMAKE_BUILD_TYPE=$BUILD_TYPE \
-        -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-        -DARROW_NO_DEPRECATED_API=ON \
-        -DARROW_PARQUET=ON \
-        -DARROW_PYTHON=ON \
-        -DARROW_PLASMA=ON \
-        -DARROW_BOOST_USE_SHARED=off \
-        $ARROW_CPP_DIR
-
-  ninja
-  ninja install
-  popd
-}
diff --git a/python/testing/parquet_interop.py b/python/testing/parquet_interop.py
deleted file mode 100644
index 6d41ba4b6a5f1..0000000000000
--- a/python/testing/parquet_interop.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-
-import fastparquet
-import pyarrow as pa
-import pyarrow.parquet as pq
-import pandas.util.testing as tm
-
-
-def hdfs_test_client(driver='libhdfs'):
-    host = os.environ.get('ARROW_HDFS_TEST_HOST', 'localhost')
-    user = os.environ['ARROW_HDFS_TEST_USER']
-    try:
-        port = int(os.environ.get('ARROW_HDFS_TEST_PORT', 20500))
-    except ValueError:
-        raise ValueError('Env variable ARROW_HDFS_TEST_PORT was not '
-                         'an integer')
-
-    return pa.HdfsClient(host, port, user, driver=driver)
-
-
-def test_fastparquet_read_with_hdfs():
-    fs = hdfs_test_client()
-
-    df = tm.makeDataFrame()
-    table = pa.Table.from_pandas(df)
-
-    path = '/tmp/testing.parquet'
-    with fs.open(path, 'wb') as f:
-        pq.write_table(table, f)
-
-    parquet_file = fastparquet.ParquetFile(path, open_with=fs.open)
-
-    result = parquet_file.to_pandas()
-    tm.assert_frame_equal(result, df)
diff --git a/python/testing/set_env_common.sh b/python/testing/set_env_common.sh
deleted file mode 100644
index 00251f92be4b4..0000000000000
--- a/python/testing/set_env_common.sh
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-export MINICONDA=$HOME/miniconda
-export CPP_TOOLCHAIN=$HOME/cpp-toolchain
-
-export PATH="$MINICONDA/bin:$PATH"
-export CONDA_PKGS_DIRS=$HOME/.conda_packages
-
-export ARROW_CHECKOUT=$HOME/arrow
-export BUILD_DIR=$ARROW_CHECKOUT
-
-export BUILD_OS_NAME=linux
-export BUILD_TYPE=debug
-
-export ARROW_CPP_DIR=$BUILD_DIR/cpp
-export ARROW_PYTHON_DIR=$BUILD_DIR/python
-export ARROW_C_GLIB_DIR=$BUILD_DIR/c_glib
-export ARROW_JAVA_DIR=${BUILD_DIR}/java
-export ARROW_JS_DIR=${BUILD_DIR}/js
-export ARROW_INTEGRATION_DIR=$BUILD_DIR/integration
-
-export CPP_BUILD_DIR=$BUILD_DIR/cpp-build
-
-export ARROW_CPP_INSTALL=$BUILD_DIR/cpp-install
-export ARROW_CPP_BUILD_DIR=$BUILD_DIR/cpp-build
-export ARROW_C_GLIB_INSTALL=$BUILD_DIR/c-glib-install
-
-export ARROW_BUILD_TOOLCHAIN=$CPP_TOOLCHAIN
-export PARQUET_BUILD_TOOLCHAIN=$CPP_TOOLCHAIN
-
-export BOOST_ROOT=$CPP_TOOLCHAIN
-export PATH=$CPP_TOOLCHAIN/bin:$PATH
-export LD_LIBRARY_PATH=$CPP_TOOLCHAIN/lib:$LD_LIBRARY_PATH
-
-export VALGRIND="valgrind --tool=memcheck"
-
-export ARROW_HOME=$CPP_TOOLCHAIN
-export PARQUET_HOME=$CPP_TOOLCHAIN
-
-# Arrow test variables
-
-export JAVA_HOME=/usr/lib/jvm/java-7-oracle
-export HADOOP_HOME=/usr/lib/hadoop
-export CLASSPATH=`$HADOOP_HOME/bin/hadoop classpath --glob`
-export HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$HADOOP_HOME/lib/native"
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HADOOP_HOME/lib/native/
-
-export ARROW_HDFS_TEST_HOST=arrow-hdfs
-export ARROW_HDFS_TEST_PORT=9000
-export ARROW_HDFS_TEST_USER=ubuntu
-export ARROW_LIBHDFS_DIR=/usr/lib
-
-export LIBHDFS3_CONF=/io/hdfs/libhdfs3-hdfs-client.xml
diff --git a/python/testing/setup_toolchain.sh b/python/testing/setup_toolchain.sh
deleted file mode 100644
index 498206ef33a79..0000000000000
--- a/python/testing/setup_toolchain.sh
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-
-export PATH="$MINICONDA/bin:$PATH"
-conda update -y -q conda
-conda config --set auto_update_conda false
-conda info -a
-
-conda config --set show_channel_urls True
-
-# Help with SSL timeouts to S3
-conda config --set remote_connect_timeout_secs 12
-
-conda config --add channels https://repo.continuum.io/pkgs/free
-conda config --add channels conda-forge
-conda info -a
-
-# faster builds, please
-conda install -y nomkl
-
-conda install --y conda-build jinja2 anaconda-client cmake curl
-
-# Set up C++ toolchain
-conda create -y -q -p $CPP_TOOLCHAIN python=3.6 \
-    jemalloc=4.4.0 \
-    nomkl \
-    boost-cpp \
-    rapidjson \
-    flatbuffers \
-    gflags \
-    lz4-c \
-    snappy \
-    zstd \
-    brotli \
-    zlib \
-    git \
-    cmake \
-    curl \
-    thrift-cpp \
-    libhdfs3 \
-    glog \
-    ninja
-
-if [ $BUILD_OS_NAME == "osx" ]; then
-  brew update && brew bundle --file=python/Brewfile
-fi
diff --git a/r/.gitignore b/r/.gitignore
index 85c986810bdf0..0f405f5713608 100644
--- a/r/.gitignore
+++ b/r/.gitignore
@@ -1,3 +1,6 @@
+Meta
+doc
+inst/doc
 *.o
 *.o-*
 *.d
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 0250023e8fbc1..d994575a3fe46 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -1,9 +1,10 @@
 Package: arrow
 Title: R Integration to 'Apache' 'Arrow'
-Version: 0.0.0.9000
+Version: 0.12.0.9000
 Authors@R: c(
     person("Romain", "François", email = "romain@rstudio.com", role = c("aut", "cre")),
     person("Javier", "Luraschi", email = "javier@rstudio.com", role = c("ctb")),
+    person("Jeffrey", "Wong", email = "jeffreyw@netflix.com", role = c("ctb")),
     person("Apache Arrow", email = "dev@arrow.apache.org", role = c("aut", "cph"))
   )
 Description: R Integration to 'Apache' 'Arrow'.
@@ -54,13 +55,19 @@ Collate:
     'Table.R'
     'array.R'
     'buffer.R'
+    'io.R'
+    'compression.R'
     'compute.R'
+    'csv.R'
     'dictionary.R'
     'feather.R'
-    'io.R'
     'memory_pool.R'
     'message.R'
     'on_exit.R'
+    'parquet.R'
+    'read_record_batch.R'
+    'read_table.R'
     'reexports-bit64.R'
     'reexports-tibble.R'
+    'write_arrow.R'
     'zzz.R'
diff --git a/r/Dockerfile b/r/Dockerfile
index a357bcc374883..88cc21e1d1a04 100644
--- a/r/Dockerfile
+++ b/r/Dockerfile
@@ -68,9 +68,8 @@ RUN apt update && \
         repos = 'http://cran.rstudio.com')"
 
 # Tell R where it can find the source code for arrow
-ENV PKG_CONFIG_PATH=${PKG_CONFIG_PATH}:/build/cpp/src/arrow
+ENV PKG_CONFIG_PATH=${PKG_CONFIG_PATH}:/build/cpp/src/arrow:/opt/conda/lib/pkgconfig
 ENV LD_LIBRARY_PATH=/opt/conda/lib/:/build/cpp/src/arrow:/arrow/r/src
-ENV CXXFLAGS="-D_GLIBCXX_USE_CXX11_ABI=0"
 
 # build, install, test R package
 CMD /arrow/ci/docker_build_cpp.sh && \
diff --git a/r/NAMESPACE b/r/NAMESPACE
index 490d2118c5805..4636c1215f253 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -6,57 +6,72 @@ S3method("==","arrow::DataType")
 S3method("==","arrow::Field")
 S3method("==","arrow::RecordBatch")
 S3method("==","arrow::ipc::Message")
+S3method(BufferReader,"arrow::Buffer")
+S3method(BufferReader,default)
+S3method(CompressedInputStream,"arrow::io::InputStream")
+S3method(CompressedInputStream,character)
+S3method(CompressedInputStream,fs_path)
+S3method(CompressedOutputStream,"arrow::io::OutputStream")
+S3method(CompressedOutputStream,character)
+S3method(CompressedOutputStream,fs_path)
+S3method(FeatherTableReader,"arrow::io::RandomAccessFile")
+S3method(FeatherTableReader,"arrow::ipc::feather::TableReader")
+S3method(FeatherTableReader,character)
+S3method(FeatherTableReader,default)
+S3method(FeatherTableReader,fs_path)
+S3method(FeatherTableWriter,"arrow::io::OutputStream")
+S3method(FixedSizeBufferWriter,"arrow::Buffer")
+S3method(FixedSizeBufferWriter,default)
+S3method(MessageReader,"arrow::io::InputStream")
+S3method(MessageReader,default)
+S3method(RecordBatchFileReader,"arrow::Buffer")
+S3method(RecordBatchFileReader,"arrow::io::RandomAccessFile")
+S3method(RecordBatchFileReader,character)
+S3method(RecordBatchFileReader,fs_path)
+S3method(RecordBatchFileReader,raw)
+S3method(RecordBatchFileWriter,"arrow::io::OutputStream")
+S3method(RecordBatchFileWriter,character)
+S3method(RecordBatchFileWriter,fs_path)
+S3method(RecordBatchStreamReader,"arrow::Buffer")
+S3method(RecordBatchStreamReader,"arrow::io::InputStream")
+S3method(RecordBatchStreamReader,raw)
+S3method(RecordBatchStreamWriter,"arrow::io::OutputStream")
+S3method(RecordBatchStreamWriter,character)
+S3method(RecordBatchStreamWriter,fs_path)
 S3method(as_tibble,"arrow::RecordBatch")
 S3method(as_tibble,"arrow::Table")
+S3method(buffer,"arrow::Buffer")
+S3method(buffer,complex)
 S3method(buffer,default)
 S3method(buffer,integer)
 S3method(buffer,numeric)
 S3method(buffer,raw)
-S3method(buffer_reader,"arrow::Buffer")
-S3method(buffer_reader,default)
-S3method(feather_table_reader,"arrow::io::RandomAccessFile")
-S3method(feather_table_reader,"arrow::ipc::feather::TableReader")
-S3method(feather_table_reader,character)
-S3method(feather_table_reader,default)
-S3method(feather_table_reader,fs_path)
-S3method(feather_table_writer,"arrow::io::OutputStream")
-S3method(fixed_size_buffer_writer,"arrow::Buffer")
-S3method(fixed_size_buffer_writer,default)
+S3method(csv_table_reader,"arrow::csv::TableReader")
+S3method(csv_table_reader,"arrow::io::InputStream")
+S3method(csv_table_reader,character)
+S3method(csv_table_reader,default)
+S3method(csv_table_reader,fs_path)
 S3method(length,"arrow::Array")
-S3method(message_reader,"arrow::io::InputStream")
-S3method(message_reader,default)
-S3method(message_reader,raw)
 S3method(names,"arrow::RecordBatch")
 S3method(print,"arrow-enum")
 S3method(read_message,"arrow::io::InputStream")
-S3method(read_message,default)
-S3method(read_record_batch,"arrow::io::BufferReader")
-S3method(read_record_batch,"arrow::io::RandomAccessFile")
+S3method(read_message,"arrow::ipc::MessageReader")
+S3method(read_record_batch,"arrow::Buffer")
+S3method(read_record_batch,"arrow::io::InputStream")
 S3method(read_record_batch,"arrow::ipc::Message")
-S3method(read_record_batch,"arrow::ipc::RecordBatchFileReader")
-S3method(read_record_batch,"arrow::ipc::RecordBatchStreamReader")
-S3method(read_record_batch,character)
-S3method(read_record_batch,fs_path)
 S3method(read_record_batch,raw)
 S3method(read_schema,"arrow::Buffer")
 S3method(read_schema,"arrow::io::InputStream")
-S3method(read_schema,default)
 S3method(read_schema,raw)
-S3method(read_table,"arrow::io::BufferReader")
-S3method(read_table,"arrow::io::RandomAccessFile")
 S3method(read_table,"arrow::ipc::RecordBatchFileReader")
 S3method(read_table,"arrow::ipc::RecordBatchStreamReader")
 S3method(read_table,character)
 S3method(read_table,fs_path)
 S3method(read_table,raw)
-S3method(record_batch_file_reader,"arrow::io::RandomAccessFile")
-S3method(record_batch_file_reader,character)
-S3method(record_batch_file_reader,fs_path)
-S3method(record_batch_stream_reader,"arrow::io::InputStream")
-S3method(record_batch_stream_reader,raw)
-S3method(write_arrow,"arrow::RecordBatch")
-S3method(write_arrow,"arrow::Table")
-S3method(write_arrow,data.frame)
+S3method(write_arrow,"arrow::ipc::RecordBatchWriter")
+S3method(write_arrow,character)
+S3method(write_arrow,fs_path)
+S3method(write_arrow,raw)
 S3method(write_feather,"arrow::RecordBatch")
 S3method(write_feather,data.frame)
 S3method(write_feather,default)
@@ -64,19 +79,27 @@ S3method(write_feather_RecordBatch,"arrow::io::OutputStream")
 S3method(write_feather_RecordBatch,character)
 S3method(write_feather_RecordBatch,default)
 S3method(write_feather_RecordBatch,fs_path)
-S3method(write_record_batch,"arrow::io::OutputStream")
-S3method(write_record_batch,"arrow::ipc::RecordBatchWriter")
-S3method(write_record_batch,character)
-S3method(write_record_batch,fs_path)
-S3method(write_record_batch,raw)
-S3method(write_table,"arrow::io::OutputStream")
-S3method(write_table,"arrow::ipc::RecordBatchWriter")
-S3method(write_table,character)
-S3method(write_table,fs_path)
-S3method(write_table,raw)
+export(BufferOutputStream)
+export(BufferReader)
+export(CompressedInputStream)
+export(CompressedOutputStream)
+export(CompressionType)
 export(DateUnit)
+export(FeatherTableReader)
+export(FeatherTableWriter)
 export(FileMode)
+export(FileOutputStream)
+export(FixedSizeBufferWriter)
+export(GetCpuThreadPoolCapacity)
+export(MessageReader)
 export(MessageType)
+export(MockOutputStream)
+export(ReadableFile)
+export(RecordBatchFileReader)
+export(RecordBatchFileWriter)
+export(RecordBatchStreamReader)
+export(RecordBatchStreamWriter)
+export(SetCpuThreadPoolCapacity)
 export(StatusCode)
 export(TimeUnit)
 export(Type)
@@ -84,20 +107,19 @@ export(array)
 export(as_tibble)
 export(boolean)
 export(buffer)
-export(buffer_output_stream)
-export(buffer_reader)
 export(cast_options)
 export(chunked_array)
+export(compression_codec)
+export(csv_convert_options)
+export(csv_parse_options)
+export(csv_read_options)
+export(csv_table_reader)
 export(date32)
 export(date64)
 export(decimal)
+export(default_memory_pool)
 export(dictionary)
-export(feather_table_reader)
-export(feather_table_writer)
 export(field)
-export(file_open)
-export(file_output_stream)
-export(fixed_size_buffer_writer)
 export(float16)
 export(float32)
 export(float64)
@@ -106,23 +128,19 @@ export(int32)
 export(int64)
 export(int8)
 export(list_of)
-export(message_reader)
 export(mmap_create)
 export(mmap_open)
-export(mock_output_stream)
 export(null)
 export(print.integer64)
 export(read_arrow)
+export(read_csv_arrow)
 export(read_feather)
 export(read_message)
+export(read_parquet)
 export(read_record_batch)
 export(read_schema)
 export(read_table)
 export(record_batch)
-export(record_batch_file_reader)
-export(record_batch_file_writer)
-export(record_batch_stream_reader)
-export(record_batch_stream_writer)
 export(schema)
 export(str.integer64)
 export(struct)
@@ -138,8 +156,6 @@ export(utf8)
 export(write_arrow)
 export(write_feather)
 export(write_feather_RecordBatch)
-export(write_record_batch)
-export(write_table)
 importFrom(R6,R6Class)
 importFrom(Rcpp,sourceCpp)
 importFrom(assertthat,assert_that)
@@ -149,6 +165,7 @@ importFrom(glue,glue)
 importFrom(purrr,map)
 importFrom(purrr,map2)
 importFrom(purrr,map_int)
+importFrom(rlang,abort)
 importFrom(rlang,dots_n)
 importFrom(rlang,list2)
 importFrom(rlang,warn)
diff --git a/r/R/ArrayData.R b/r/R/ArrayData.R
index 47b858d589f3f..765971b405b00 100644
--- a/r/R/ArrayData.R
+++ b/r/R/ArrayData.R
@@ -17,6 +17,30 @@
 
 #' @include R6.R
 
+#' @title class arrow::ArrayData
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Usage:
+#'
+#' ```
+#' data <- array(...)$data()
+#'
+#' data$type()
+#' data$length()
+#' data$null_count()
+#' data$offset()
+#' data$buffers()
+#' ```
+#'
+#' @section Methods:
+#'
+#' ...
+#'
+#' @rdname arrow__ArrayData
+#' @name arrow__ArrayData
 `arrow::ArrayData` <- R6Class("arrow::ArrayData",
   inherit = `arrow::Object`,
   active = list(
diff --git a/r/R/ChunkedArray.R b/r/R/ChunkedArray.R
index 338438f578d7f..46e4076629099 100644
--- a/r/R/ChunkedArray.R
+++ b/r/R/ChunkedArray.R
@@ -17,14 +17,22 @@
 
 #' @include R6.R
 
+#' @title class arrow::ChunkedArray
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' TODO
+#'
+#' @rdname arrow__ChunkedArray
+#' @name arrow__ChunkedArray
 `arrow::ChunkedArray` <- R6Class("arrow::ChunkedArray", inherit = `arrow::Object`,
   public = list(
     length = function() ChunkedArray__length(self),
-    null_count = function() ChunkedArray__null_count(self),
-    num_chunks = function() ChunkedArray__num_chunks(self),
     chunk = function(i) shared_ptr(`arrow::Array`, ChunkedArray__chunk(self, i)),
-    chunks = function() purrr::map(ChunkedArray__chunks(self), shared_ptr, class = `arrow::Array`),
-    type = function() `arrow::DataType`$dispatch(ChunkedArray__type(self)),
     as_vector = function() ChunkedArray__as_vector(self),
     Slice = function(offset, length = NULL){
       if (is.null(length)) {
@@ -38,10 +46,16 @@
       assert_that(inherits(options, "arrow::compute::CastOptions"))
       shared_ptr(`arrow::ChunkedArray`, ChunkedArray__cast(self, target_type, options))
     }
+  ),
+  active = list(
+    null_count = function() ChunkedArray__null_count(self),
+    num_chunks = function() ChunkedArray__num_chunks(self),
+    chunks = function() map(ChunkedArray__chunks(self), shared_ptr, class = `arrow::Array`),
+    type = function() `arrow::DataType`$dispatch(ChunkedArray__type(self))
   )
 )
 
-#' create an arrow::Array from an R vector
+#' create an [arrow::ChunkedArray][arrow__ChunkedArray] from various R vectors
 #'
 #' @param \dots Vectors to coerce
 #' @param type currently ignored
diff --git a/r/R/Column.R b/r/R/Column.R
index bf3fe0a0e10df..fb8af1ea31543 100644
--- a/r/R/Column.R
+++ b/r/R/Column.R
@@ -17,11 +17,26 @@
 
 #' @include R6.R
 
+#' @title class arrow::Column
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' TODO
+#'
+#' @rdname arrow__Column
+#' @name arrow__Column
 `arrow::Column` <- R6Class("arrow::Column", inherit = `arrow::Object`,
   public = list(
     length = function() Column__length(self),
-    null_count = function() Column__null_count(self),
-    type = function() `arrow::DataType`$dispatch(Column__type(self)),
     data = function() shared_ptr(`arrow::ChunkedArray`, Column__data(self))
+  ),
+
+  active = list(
+    null_count = function() Column__null_count(self),
+    type = function() `arrow::DataType`$dispatch(Column__type(self))
   )
 )
diff --git a/r/R/Field.R b/r/R/Field.R
index 79c0f33be6846..4f5636fbfffe2 100644
--- a/r/R/Field.R
+++ b/r/R/Field.R
@@ -17,20 +17,35 @@
 
 #' @include R6.R
 
+#' @title class arrow::Field
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' TODO
+#'
+#' @rdname arrow__Field
+#' @name arrow__Field
 `arrow::Field` <- R6Class("arrow::Field", inherit = `arrow::Object`,
   public = list(
     ToString = function() {
       Field__ToString(self)
     },
+    Equals = function(other) {
+      inherits(other, "arrow::Field") && Field__Equals(self, other)
+    }
+  ),
+
+  active = list(
     name = function() {
       Field__name(self)
     },
     nullable = function() {
       Field__nullable(self)
     },
-    Equals = function(other) {
-      inherits(other, "arrow::Field") && Field__Equals(self, other)
-    },
     type = function() {
       `arrow::DataType`$dispatch(Field__type(self))
     }
diff --git a/r/R/R6.R b/r/R/R6.R
index 1caa885d90cab..69d58e0c13663 100644
--- a/r/R/R6.R
+++ b/r/R/R6.R
@@ -54,15 +54,24 @@ unique_ptr <- function(class, xp) {
   !(lhs == rhs)
 }
 
+#' @title class arrow::DataType
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' TODO
+#'
+#' @rdname arrow__DataType
+#' @name arrow__DataType
 `arrow::DataType` <- R6Class("arrow::DataType",
   inherit = `arrow::Object`,
   public = list(
     ToString = function() {
       DataType__ToString(self)
     },
-    name = function() {
-      DataType__name(self)
-    },
     Equals = function(other) {
       assert_that(inherits(other, "arrow::DataType"))
       DataType__Equals(self, other)
@@ -73,11 +82,9 @@ unique_ptr <- function(class, xp) {
     children = function() {
       map(DataType__children_pointer(self), shared_ptr, class= `arrow::Field`)
     },
-    id = function(){
-      DataType__id(self)
-    },
+
     ..dispatch = function(){
-      switch(names(Type)[self$id()+1],
+      switch(names(Type)[self$id + 1],
         "NA" = null(),
         BOOL = boolean(),
         UINT8 = uint8(),
@@ -107,6 +114,15 @@ unique_ptr <- function(class, xp) {
         MAP = stop("Type MAP not implemented yet")
       )
     }
+  ),
+
+  active = list(
+    id = function(){
+      DataType__id(self)
+    },
+    name = function() {
+      DataType__name(self)
+    }
   )
 )
 
@@ -116,9 +132,21 @@ unique_ptr <- function(class, xp) {
 
 #----- metadata
 
+#' @title class arrow::FixedWidthType
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' TODO
+#'
+#' @rdname arrow__FixedWidthType
+#' @name arrow__FixedWidthType
 `arrow::FixedWidthType` <- R6Class("arrow::FixedWidthType",
   inherit = `arrow::DataType`,
-  public = list(
+  active = list(
     bit_width = function() FixedWidthType__bit_width(self)
   )
 )
diff --git a/r/R/RcppExports.R b/r/R/RcppExports.R
index 324510cf1b680..3d493c7f0661f 100644
--- a/r/R/RcppExports.R
+++ b/r/R/RcppExports.R
@@ -5,14 +5,6 @@ Array__from_vector <- function(x) {
     .Call(`_arrow_Array__from_vector`, x)
 }
 
-Array__as_vector <- function(array) {
-    .Call(`_arrow_Array__as_vector`, array)
-}
-
-ChunkedArray__as_vector <- function(chunked_array) {
-    .Call(`_arrow_ChunkedArray__as_vector`, chunked_array)
-}
-
 Array__Slice1 <- function(array, offset) {
     .Call(`_arrow_Array__Slice1`, array, offset)
 }
@@ -81,6 +73,22 @@ DictionaryArray__dictionary <- function(array) {
     .Call(`_arrow_DictionaryArray__dictionary`, array)
 }
 
+Array__as_vector <- function(array) {
+    .Call(`_arrow_Array__as_vector`, array)
+}
+
+ChunkedArray__as_vector <- function(chunked_array) {
+    .Call(`_arrow_ChunkedArray__as_vector`, chunked_array)
+}
+
+RecordBatch__to_dataframe <- function(batch, use_threads) {
+    .Call(`_arrow_RecordBatch__to_dataframe`, batch, use_threads)
+}
+
+Table__to_dataframe <- function(table, use_threads) {
+    .Call(`_arrow_Table__to_dataframe`, table, use_threads)
+}
+
 ArrayData__get_type <- function(x) {
     .Call(`_arrow_ArrayData__get_type`, x)
 }
@@ -173,6 +181,18 @@ Column__data <- function(column) {
     .Call(`_arrow_Column__data`, column)
 }
 
+util___Codec__Create <- function(codec) {
+    .Call(`_arrow_util___Codec__Create`, codec)
+}
+
+io___CompressedOutputStream__Make <- function(codec, raw) {
+    .Call(`_arrow_io___CompressedOutputStream__Make`, codec, raw)
+}
+
+io___CompressedInputStream__Make <- function(codec, raw) {
+    .Call(`_arrow_io___CompressedInputStream__Make`, codec, raw)
+}
+
 compute___CastOptions__initialize <- function(allow_int_overflow, allow_time_truncate, allow_float_truncate) {
     .Call(`_arrow_compute___CastOptions__initialize`, allow_int_overflow, allow_time_truncate, allow_float_truncate)
 }
@@ -193,6 +213,26 @@ Table__cast <- function(table, schema, options) {
     .Call(`_arrow_Table__cast`, table, schema, options)
 }
 
+csv___ReadOptions__initialize <- function(options) {
+    .Call(`_arrow_csv___ReadOptions__initialize`, options)
+}
+
+csv___ParseOptions__initialize <- function(options) {
+    .Call(`_arrow_csv___ParseOptions__initialize`, options)
+}
+
+csv___ConvertOptions__initialize <- function(options) {
+    .Call(`_arrow_csv___ConvertOptions__initialize`, options)
+}
+
+csv___TableReader__Make <- function(input, read_options, parse_options, convert_options) {
+    .Call(`_arrow_csv___TableReader__Make`, input, read_options, parse_options, convert_options)
+}
+
+csv___TableReader__Read <- function(table_reader) {
+    .Call(`_arrow_csv___TableReader__Read`, table_reader)
+}
+
 shared_ptr_is_null <- function(xp) {
     .Call(`_arrow_shared_ptr_is_null`, xp)
 }
@@ -445,8 +485,8 @@ ipc___feather___TableReader__GetColumn <- function(reader, i) {
     .Call(`_arrow_ipc___feather___TableReader__GetColumn`, reader, i)
 }
 
-ipc___feather___TableReader__Read <- function(reader) {
-    .Call(`_arrow_ipc___feather___TableReader__Read`, reader)
+ipc___feather___TableReader__Read <- function(reader, columns) {
+    .Call(`_arrow_ipc___feather___TableReader__Read`, reader, columns)
 }
 
 ipc___feather___TableReader__Open <- function(stream) {
@@ -525,6 +565,14 @@ io___BufferReader__initialize <- function(buffer) {
     .Call(`_arrow_io___BufferReader__initialize`, buffer)
 }
 
+io___Writable__write <- function(stream, buf) {
+    invisible(.Call(`_arrow_io___Writable__write`, stream, buf))
+}
+
+io___OutputStream__Tell <- function(stream) {
+    .Call(`_arrow_io___OutputStream__Tell`, stream)
+}
+
 io___FileOutputStream__Open <- function(path) {
     .Call(`_arrow_io___FileOutputStream__Open`, path)
 }
@@ -617,6 +665,10 @@ ipc___ReadMessage <- function(stream) {
     .Call(`_arrow_ipc___ReadMessage`, stream)
 }
 
+read_parquet_file <- function(filename) {
+    .Call(`_arrow_read_parquet_file`, filename)
+}
+
 RecordBatch__num_columns <- function(x) {
     .Call(`_arrow_RecordBatch__num_columns`, x)
 }
@@ -629,12 +681,12 @@ RecordBatch__schema <- function(x) {
     .Call(`_arrow_RecordBatch__schema`, x)
 }
 
-RecordBatch__column <- function(batch, i) {
-    .Call(`_arrow_RecordBatch__column`, batch, i)
+RecordBatch__columns <- function(batch) {
+    .Call(`_arrow_RecordBatch__columns`, batch)
 }
 
-RecordBatch__to_dataframe <- function(batch) {
-    .Call(`_arrow_RecordBatch__to_dataframe`, batch)
+RecordBatch__column <- function(batch, i) {
+    .Call(`_arrow_RecordBatch__column`, batch, i)
 }
 
 RecordBatch__from_dataframe <- function(tbl) {
@@ -665,6 +717,14 @@ RecordBatch__Slice2 <- function(self, offset, length) {
     .Call(`_arrow_RecordBatch__Slice2`, self, offset, length)
 }
 
+ipc___SerializeRecordBatch__Raw <- function(batch) {
+    .Call(`_arrow_ipc___SerializeRecordBatch__Raw`, batch)
+}
+
+ipc___ReadRecordBatch__InputStream__Schema <- function(stream, schema) {
+    .Call(`_arrow_ipc___ReadRecordBatch__InputStream__Schema`, stream, schema)
+}
+
 RecordBatchReader__schema <- function(reader) {
     .Call(`_arrow_RecordBatchReader__schema`, reader)
 }
@@ -677,6 +737,10 @@ ipc___RecordBatchStreamReader__Open <- function(stream) {
     .Call(`_arrow_ipc___RecordBatchStreamReader__Open`, stream)
 }
 
+ipc___RecordBatchStreamReader__batches <- function(reader) {
+    .Call(`_arrow_ipc___RecordBatchStreamReader__batches`, reader)
+}
+
 ipc___RecordBatchFileReader__schema <- function(reader) {
     .Call(`_arrow_ipc___RecordBatchFileReader__schema`, reader)
 }
@@ -701,16 +765,12 @@ Table__from_RecordBatchStreamReader <- function(reader) {
     .Call(`_arrow_Table__from_RecordBatchStreamReader`, reader)
 }
 
-ipc___RecordBatchFileWriter__Open <- function(stream, schema) {
-    .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema)
-}
-
-ipc___RecordBatchStreamWriter__Open <- function(stream, schema) {
-    .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema)
+ipc___RecordBatchFileReader__batches <- function(reader) {
+    .Call(`_arrow_ipc___RecordBatchFileReader__batches`, reader)
 }
 
-ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch, allow_64bit) {
-    invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch, allow_64bit))
+ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch) {
+    invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch))
 }
 
 ipc___RecordBatchWriter__WriteTable <- function(batch_writer, table) {
@@ -721,6 +781,14 @@ ipc___RecordBatchWriter__Close <- function(batch_writer) {
     invisible(.Call(`_arrow_ipc___RecordBatchWriter__Close`, batch_writer))
 }
 
+ipc___RecordBatchFileWriter__Open <- function(stream, schema) {
+    .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema)
+}
+
+ipc___RecordBatchStreamWriter__Open <- function(stream, schema) {
+    .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema)
+}
+
 Table__from_dataframe <- function(tbl) {
     .Call(`_arrow_Table__from_dataframe`, tbl)
 }
@@ -737,11 +805,36 @@ Table__schema <- function(x) {
     .Call(`_arrow_Table__schema`, x)
 }
 
-Table__to_dataframe <- function(table) {
-    .Call(`_arrow_Table__to_dataframe`, table)
-}
-
 Table__column <- function(table, i) {
     .Call(`_arrow_Table__column`, table, i)
 }
 
+Table__columns <- function(table) {
+    .Call(`_arrow_Table__columns`, table)
+}
+
+#' Get the capacity of the global thread pool
+#'
+#' @return the number of worker threads in the thread pool to which
+#' Arrow dispatches various CPU-bound tasks. This is an ideal number,
+#' not necessarily the exact number of threads at a given point in time.
+#'
+#' You can change this number using [SetCpuThreadPoolCapacity()].
+#'
+#' @export
+GetCpuThreadPoolCapacity <- function() {
+    .Call(`_arrow_GetCpuThreadPoolCapacity`)
+}
+
+#' Set the capacity of the global thread pool
+#'
+#' @param threads the number of worker threads int the thread pool to which
+#' Arrow dispatches various CPU-bound tasks.
+#'
+#' The current number is returned by [GetCpuThreadPoolCapacity()]
+#'
+#' @export
+SetCpuThreadPoolCapacity <- function(threads) {
+    invisible(.Call(`_arrow_SetCpuThreadPoolCapacity`, threads))
+}
+
diff --git a/r/R/RecordBatch.R b/r/R/RecordBatch.R
index c606d12143bcd..9872117452e85 100644
--- a/r/R/RecordBatch.R
+++ b/r/R/RecordBatch.R
@@ -17,11 +17,20 @@
 
 #' @include R6.R
 
+#' @title class arrow::RecordBatch
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' TODO
+#'
+#' @rdname arrow__RecordBatch
+#' @name arrow__RecordBatch
 `arrow::RecordBatch` <- R6Class("arrow::RecordBatch", inherit = `arrow::Object`,
   public = list(
-    num_columns = function() RecordBatch__num_columns(self),
-    num_rows = function() RecordBatch__num_rows(self),
-    schema = function() shared_ptr(`arrow::Schema`, RecordBatch__schema(self)),
     column = function(i) shared_ptr(`arrow::Array`, RecordBatch__column(self, i)),
     column_name = function(i) RecordBatch__column_name(self, i),
     names = function() RecordBatch__names(self),
@@ -29,9 +38,11 @@
       assert_that(inherits(other, "arrow::RecordBatch"))
       RecordBatch__Equals(self, other)
     },
+
     RemoveColumn = function(i){
       shared_ptr(`arrow::RecordBatch`, RecordBatch__RemoveColumn(self, i))
     },
+
     Slice = function(offset, length = NULL) {
       if (is.null(length)) {
         shared_ptr(`arrow::RecordBatch`, RecordBatch__Slice1(self, offset))
@@ -40,14 +51,21 @@
       }
     },
 
-    serialize = function(output_stream, ...) write_record_batch(self, output_stream, ...),
+    serialize = function() ipc___SerializeRecordBatch__Raw(self),
 
     cast = function(target_schema, safe = TRUE, options = cast_options(safe)) {
       assert_that(inherits(target_schema, "arrow::Schema"))
       assert_that(inherits(options, "arrow::compute::CastOptions"))
-      assert_that(identical(self$schema()$names, target_schema$names), msg = "incompatible schemas")
+      assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas")
       shared_ptr(`arrow::RecordBatch`, RecordBatch__cast(self, target_schema, options))
     }
+  ),
+
+  active = list(
+    num_columns = function() RecordBatch__num_columns(self),
+    num_rows = function() RecordBatch__num_rows(self),
+    schema = function() shared_ptr(`arrow::Schema`, RecordBatch__schema(self)),
+    columns = function() map(RecordBatch__columns(self), shared_ptr, `arrow::Array`)
   )
 )
 
@@ -62,14 +80,15 @@
 }
 
 #' @export
-`as_tibble.arrow::RecordBatch` <- function(x, ...){
-  RecordBatch__to_dataframe(x)
+`as_tibble.arrow::RecordBatch` <- function(x, use_threads = TRUE, ...){
+  RecordBatch__to_dataframe(x, use_threads = use_threads)
 }
 
-#' Create an arrow::RecordBatch from a data frame
+#' Create an [arrow::RecordBatch][arrow__RecordBatch] from a data frame
 #'
 #' @param .data a data frame
 #'
+#' @return a [arrow::RecordBatch][arrow__RecordBatch]
 #' @export
 record_batch <- function(.data){
   shared_ptr(`arrow::RecordBatch`, RecordBatch__from_dataframe(.data))
diff --git a/r/R/RecordBatchReader.R b/r/R/RecordBatchReader.R
index 350375384266f..6dab2d1ff7676 100644
--- a/r/R/RecordBatchReader.R
+++ b/r/R/RecordBatchReader.R
@@ -17,179 +17,127 @@
 
 #' @include R6.R
 
+#' @title class arrow::RecordBatchReader
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' TODO
+#'
+#' @rdname arrow__RecordBatchReader
+#' @name arrow__RecordBatchReader
 `arrow::RecordBatchReader` <- R6Class("arrow::RecordBatchReader", inherit = `arrow::Object`,
   public = list(
-    schema = function() shared_ptr(`arrow::Schema`, RecordBatchReader__schema(self)),
-    ReadNext = function() {
+    read_next_batch = function() {
       shared_ptr(`arrow::RecordBatch`, RecordBatchReader__ReadNext(self))
     }
+  ),
+  active = list(
+    schema = function() shared_ptr(`arrow::Schema`, RecordBatchReader__schema(self))
   )
 )
 
-`arrow::ipc::RecordBatchStreamReader` <- R6Class("arrow::ipc::RecordBatchStreamReader", inherit = `arrow::RecordBatchReader`)
-
-`arrow::ipc::RecordBatchFileReader` <- R6Class("arrow::ipc::RecordBatchFileReader", inherit = `arrow::Object`,
+#' @title class arrow::ipc::RecordBatchStreamReader
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' TODO
+#'
+#' @rdname arrow__ipc__RecordBatchStreamReader
+#' @name arrow__ipc__RecordBatchStreamReader
+`arrow::ipc::RecordBatchStreamReader` <- R6Class("arrow::ipc::RecordBatchStreamReader", inherit = `arrow::RecordBatchReader`,
   public = list(
-    schema = function() shared_ptr(`arrow::Schema`, ipc___RecordBatchFileReader__schema(self)),
-    num_record_batches = function() ipc___RecordBatchFileReader__num_record_batches(self),
-    ReadRecordBatch = function(i) shared_ptr(`arrow::RecordBatch`, ipc___RecordBatchFileReader__ReadRecordBatch(self, i))
+    batches = function() map(ipc___RecordBatchStreamReader__batches(self), shared_ptr, class = `arrow::RecordBatch`)
   )
 )
 
-
-#' Create a `arrow::ipc::RecordBatchStreamReader` from an input stream
+#' @title class arrow::ipc::RecordBatchFileReader
 #'
-#' @param stream input stream
-#' @export
-record_batch_stream_reader <- function(stream){
-  UseMethod("record_batch_stream_reader")
-}
-
-#' @export
-`record_batch_stream_reader.arrow::io::InputStream` <- function(stream) {
-  shared_ptr(`arrow::ipc::RecordBatchStreamReader`, ipc___RecordBatchStreamReader__Open(stream))
-}
-
-#' @export
-`record_batch_stream_reader.raw` <- function(stream) {
-  record_batch_stream_reader(buffer_reader(stream))
-}
-
-
-#' Create an `arrow::ipc::RecordBatchFileReader` from a file
+#' @usage NULL
+#' @format NULL
+#' @docType class
 #'
-#' @param file The file to read from
+#' @section Methods:
 #'
-#' @export
-record_batch_file_reader <- function(file) {
-  UseMethod("record_batch_file_reader")
-}
-
-#' @export
-`record_batch_file_reader.arrow::io::RandomAccessFile` <- function(file) {
-  shared_ptr(`arrow::ipc::RecordBatchFileReader`, ipc___RecordBatchFileReader__Open(file))
-}
-
-#' @export
-`record_batch_file_reader.character` <- function(file) {
-  assert_that(length(file) == 1L)
-  record_batch_file_reader(fs::path_abs(file))
-}
-
-#' @export
-`record_batch_file_reader.fs_path` <- function(file) {
-  record_batch_file_reader(file_open(file))
-}
+#' TODO
+#'
+#' @rdname arrow__ipc__RecordBatchFileReader
+#' @name arrow__ipc__RecordBatchFileReader
+`arrow::ipc::RecordBatchFileReader` <- R6Class("arrow::ipc::RecordBatchFileReader", inherit = `arrow::Object`,
+  public = list(
+    get_batch = function(i) shared_ptr(`arrow::RecordBatch`, ipc___RecordBatchFileReader__ReadRecordBatch(self, i)),
 
-#-------- read_record_batch
+    batches = function() map(ipc___RecordBatchFileReader__batches(self), shared_ptr, class = `arrow::RecordBatch`)
+  ),
+  active = list(
+    num_record_batches = function() ipc___RecordBatchFileReader__num_record_batches(self),
+    schema = function() shared_ptr(`arrow::Schema`, ipc___RecordBatchFileReader__schema(self))
+  )
+)
 
-#' Read a single record batch from a stream
+#' Create a [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] from an input stream
 #'
-#' @param stream input stream
-#' @param ... additional parameters
+#' @param stream input stream, an [arrow::io::InputStream][arrow__io__InputStream] or a raw vector
 #'
-#' @details `stream` can be a `arrow::io::RandomAccessFile` stream as created by [file_open()] or [mmap_open()] or a path.
-#'
-#' @export
-read_record_batch <- function(stream, ...){
-  UseMethod("read_record_batch")
-}
-
-#' @export
-read_record_batch.character <- function(stream, ...){
-  assert_that(length(stream) == 1L)
-  read_record_batch(fs::path_abs(stream))
-}
-
-#' @export
-read_record_batch.fs_path <- function(stream, ...){
-  stream <- close_on_exit(file_open(stream))
-  read_record_batch(stream)
-}
-
 #' @export
-`read_record_batch.arrow::io::RandomAccessFile` <- function(stream, ...){
-  reader <- record_batch_file_reader(stream)
-  reader$ReadRecordBatch(0)
+RecordBatchStreamReader <- function(stream){
+  UseMethod("RecordBatchStreamReader")
 }
 
 #' @export
-`read_record_batch.arrow::io::BufferReader` <- function(stream, ...){
-  reader <- record_batch_stream_reader(stream)
-  reader$ReadNext()
-}
-
-#' @export
-read_record_batch.raw <- function(stream, ...){
-  stream <- close_on_exit(buffer_reader(stream))
-  read_record_batch(stream)
-}
-
-#' @export
-`read_record_batch.arrow::ipc::RecordBatchStreamReader` <- function(stream, ...) {
-  stream$ReadNext()
+`RecordBatchStreamReader.arrow::io::InputStream` <- function(stream) {
+  shared_ptr(`arrow::ipc::RecordBatchStreamReader`, ipc___RecordBatchStreamReader__Open(stream))
 }
 
 #' @export
-`read_record_batch.arrow::ipc::RecordBatchFileReader` <- function(stream, i = 0, ...) {
-  stream$ReadRecordBatch(i)
+`RecordBatchStreamReader.raw` <- function(stream) {
+  RecordBatchStreamReader(BufferReader(stream))
 }
 
 #' @export
-`read_record_batch.arrow::ipc::Message` <- function(stream, schema, ...) {
-  assert_that(inherits(schema, "arrow::Schema"))
-  shared_ptr(`arrow::RecordBatch`, ipc___ReadRecordBatch__Message__Schema(stream, schema))
+`RecordBatchStreamReader.arrow::Buffer` <- function(stream) {
+  RecordBatchStreamReader(BufferReader(stream))
 }
 
 
-#--------- read_table
-
-#' Read an arrow::Table from a stream
+#' Create an [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] from a file
 #'
-#' @param stream stream. Either a stream created by [file_open()] or [mmap_open()] or a file path.
+#' @param file The file to read from. A file path, or an [arrow::io::RandomAccessFile][arrow__ipc__RecordBatchFileReader]
 #'
 #' @export
-read_table <- function(stream){
-  UseMethod("read_table")
+RecordBatchFileReader <- function(file) {
+  UseMethod("RecordBatchFileReader")
 }
 
 #' @export
-read_table.character <- function(stream){
-  assert_that(length(stream) == 1L)
-  read_table(fs::path_abs(stream))
-}
-
-#' @export
-read_table.fs_path <- function(stream) {
-  stream <- close_on_exit(file_open(stream))
-  read_table(stream)
-}
-
-#' @export
-`read_table.arrow::io::RandomAccessFile` <- function(stream) {
-  reader <- record_batch_file_reader(stream)
-  read_table(reader)
+`RecordBatchFileReader.arrow::io::RandomAccessFile` <- function(file) {
+  shared_ptr(`arrow::ipc::RecordBatchFileReader`, ipc___RecordBatchFileReader__Open(file))
 }
 
 #' @export
-`read_table.arrow::ipc::RecordBatchFileReader` <- function(stream) {
-  shared_ptr(`arrow::Table`, Table__from_RecordBatchFileReader(stream))
+`RecordBatchFileReader.character` <- function(file) {
+  assert_that(length(file) == 1L)
+  RecordBatchFileReader(fs::path_abs(file))
 }
 
 #' @export
-`read_table.arrow::ipc::RecordBatchStreamReader` <- function(stream) {
-  shared_ptr(`arrow::Table`, Table__from_RecordBatchStreamReader(stream))
+`RecordBatchFileReader.fs_path` <- function(file) {
+  RecordBatchFileReader(ReadableFile(file))
 }
 
 #' @export
-`read_table.arrow::io::BufferReader` <- function(stream) {
-  reader <- record_batch_stream_reader(stream)
-  read_table(reader)
+`RecordBatchFileReader.arrow::Buffer` <- function(file) {
+  RecordBatchFileReader(BufferReader(file))
 }
 
 #' @export
-`read_table.raw` <- function(stream) {
-  stream <- close_on_exit(buffer_reader(stream))
-  read_table(stream)
+`RecordBatchFileReader.raw` <- function(file) {
+  RecordBatchFileReader(BufferReader(file))
 }
-
diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R
index 515b6986b9445..77305114d3344 100644
--- a/r/R/RecordBatchWriter.R
+++ b/r/R/RecordBatchWriter.R
@@ -17,175 +17,174 @@
 
 #' @include R6.R
 
+#' @title class arrow::ipc::RecordBatchWriter
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' - `$write_batch(batch)`: Write record batch to stream
+#' - `$write_table(table)`: write Table to stream
+#' - `$close()`: close stream
+#'
+#' @section Derived classes:
+#'
+#' - [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] implements the streaming binary format
+#' - [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter] implements the binary file format
+#'
+#' @rdname arrow__ipc__RecordBatchWriter
+#' @name arrow__ipc__RecordBatchWriter
 `arrow::ipc::RecordBatchWriter` <- R6Class("arrow::ipc::RecordBatchWriter", inherit = `arrow::Object`,
   public = list(
-    WriteRecordBatch = function(batch, allow_64bit) ipc___RecordBatchWriter__WriteRecordBatch(self, batch, allow_64bit),
-    WriteTable = function(table) ipc___RecordBatchWriter__WriteTable(self, table),
-    Close = function() ipc___RecordBatchWriter__Close(self)
+    write_batch = function(batch) ipc___RecordBatchWriter__WriteRecordBatch(self, batch),
+    write_table = function(table) ipc___RecordBatchWriter__WriteTable(self, table),
+
+    write = function(x) {
+      if (inherits(x, "arrow::RecordBatch")) {
+        self$write_batch(x)
+      } else if(inherits(x, "arrow::Table")) {
+        self$write_table(x)
+      } else if (inherits(x, "data.frame")) {
+        self$write_table(table(x))
+      } else {
+        abort("unexpected type for RecordBatchWriter$write(), must be an arrow::RecordBatch or an arrow::Table")
+      }
+    },
+
+    close = function() ipc___RecordBatchWriter__Close(self)
   )
 )
 
-`arrow::ipc::RecordBatchStreamWriter` <- R6Class("arrow::ipc::RecordBatchStreamWriter", inherit = `arrow::ipc::RecordBatchWriter`)
-`arrow::ipc::RecordBatchFileWriter` <- R6Class("arrow::ipc::RecordBatchFileWriter", inherit = `arrow::ipc::RecordBatchStreamWriter`)
-
-#' Create a record batch file writer from a stream
+#' @title class arrow::ipc::RecordBatchStreamWriter
 #'
-#' @param stream a stream
-#' @param schema the schema of the batches
+#' Writer for the Arrow streaming binary format
 #'
-#' @return an `arrow::ipc::RecordBatchWriter` object
+#' @usage NULL
+#' @format NULL
+#' @docType class
 #'
-#' @export
-record_batch_file_writer <- function(stream, schema) {
-  assert_that(
-    inherits(stream, "arrow::io::OutputStream"),
-    inherits(schema, "arrow::Schema")
-  )
-  shared_ptr(`arrow::ipc::RecordBatchFileWriter`, ipc___RecordBatchFileWriter__Open(stream, schema))
-}
-
-#' Create a record batch stream writer
+#' @section usage:
 #'
-#' @param stream a stream
-#' @param schema a schema
+#' ```
+#' writer <- RecordBatchStreamWriter(sink, schema)
 #'
-#' @export
-record_batch_stream_writer <- function(stream, schema) {
-  assert_that(
-    inherits(stream, "arrow::io::OutputStream"),
-    inherits(schema, "arrow::Schema")
-  )
-  shared_ptr(`arrow::ipc::RecordBatchStreamWriter`, ipc___RecordBatchStreamWriter__Open(stream, schema))
-}
-
-#-------- write_record_batch
-
-#' write a record batch
+#' writer$write_batch(batch)
+#' writer$write_table(table)
+#' writer$close()
+#' ```
 #'
-#' @param x a `arrow::RecordBatch`
-#' @param stream where to stream the record batch
-#' @param ... extra parameters
+#' @section Factory:
 #'
-#' @export
-write_record_batch <- function(x, stream, ...){
-  UseMethod("write_record_batch", stream)
-}
-
-#' @export
-`write_record_batch.arrow::io::OutputStream` <- function(x, stream, ...) {
-  stream_writer <- close_on_exit(record_batch_stream_writer(stream, x$schema()))
-  write_record_batch(x, stream_writer)
-}
-
-#' @export
-`write_record_batch.arrow::ipc::RecordBatchWriter` <- function(x, stream, allow_64bit = TRUE, ...){
-  stream$WriteRecordBatch(x, allow_64bit)
-}
-
-#' @export
-`write_record_batch.character` <- function(x, stream, ...) {
-  assert_that(length(stream) == 1L)
-  write_record_batch(x, fs::path_abs(stream), ...)
-}
-
-#' @export
-`write_record_batch.fs_path` <- function(x, stream, ...) {
-  assert_that(length(stream) == 1L)
-  file_stream <- close_on_exit(file_output_stream(stream))
-  file_writer <- close_on_exit(record_batch_file_writer(file_stream, x$schema()))
-  write_record_batch(x, file_writer, ...)
-}
-
-#' @export
-`write_record_batch.raw` <- function(x, stream, ...) {
-  # how many bytes do we need
-  mock <- mock_output_stream()
-  write_record_batch(x, mock)
-  n <- mock$GetExtentBytesWritten()
-
-  bytes <- raw(n)
-  buffer <- buffer(bytes)
-  buffer_writer <- fixed_size_buffer_writer(buffer)
-  write_record_batch(x, buffer_writer)
-
-  bytes
-}
-
-#-------- stream Table
-
-#' write an arrow::Table
+#' The [RecordBatchStreamWriter()] function creates a record batch stream writer.
 #'
-#' @param x an `arrow::Table`
-#' @param stream where to stream the record batch
-#' @param ... extra parameters
+#' @section Methods:
+#' inherited from [arrow::ipc::RecordBatchWriter][arrow__ipc__RecordBatchWriter]
 #'
-#' @export
-write_table <- function(x, stream, ...) {
-  UseMethod("write_table", stream)
-}
+#' - `$write_batch(batch)`: Write record batch to stream
+#' - `$write_table(table)`: write Table to stream
+#' - `$close()`: close stream
+#'
+#' @rdname arrow__ipc__RecordBatchStreamWriter
+#' @name arrow__ipc__RecordBatchStreamWriter
+`arrow::ipc::RecordBatchStreamWriter` <- R6Class("arrow::ipc::RecordBatchStreamWriter", inherit = `arrow::ipc::RecordBatchWriter`)
 
+#' Writer for the Arrow streaming binary format
+#'
+#' @param sink Where to write. Can either be:
+#'
+#' - A string, meant as a file path, passed to [fs::path_abs()]
+#' - a [file path][fs::path_abs()]
+#' - [arrow::io::OutputStream][arrow__io__OutputStream]
+#'
+#' @param schema The [arrow::Schema][arrow__Schema] for data to be written.
+#'
+#' @return a [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter]
+#'
 #' @export
-`write_table.arrow::io::OutputStream` <- function(x, stream, ...) {
-  stream_writer <- close_on_exit(record_batch_stream_writer(stream, x$schema()))
-  write_table(x, stream_writer)
+RecordBatchStreamWriter <- function(sink, schema) {
+  UseMethod("RecordBatchStreamWriter")
 }
 
 #' @export
-`write_table.arrow::ipc::RecordBatchWriter` <- function(x, stream, ...){
-  stream$WriteTable(x)
+RecordBatchStreamWriter.character <- function(sink, schema){
+  RecordBatchStreamWriter(fs::path_abs(sink), schema)
 }
 
 #' @export
-`write_table.character` <- function(x, stream, ...) {
-  assert_that(length(stream) == 1L)
-  write_table(x, fs::path_abs(stream), ...)
+RecordBatchStreamWriter.fs_path <- function(sink, schema){
+  RecordBatchStreamWriter(FileOutputStream(sink), schema)
 }
 
 #' @export
-`write_table.fs_path` <- function(x, stream, ...) {
-  assert_that(length(stream) == 1L)
-  file_stream <- close_on_exit(file_output_stream(stream))
-  file_writer <- close_on_exit(record_batch_file_writer(file_stream, x$schema()))
-  write_table(x, file_writer, ...)
+`RecordBatchStreamWriter.arrow::io::OutputStream` <- function(sink, schema){
+  assert_that(inherits(schema, "arrow::Schema"))
+  shared_ptr(`arrow::ipc::RecordBatchStreamWriter`, ipc___RecordBatchStreamWriter__Open(sink, schema))
 }
 
-#' @export
-`write_table.raw` <- function(x, stream, ...) {
-  # how many bytes do we need
-  mock <- mock_output_stream()
-  write_table(x, mock)
-  n <- mock$GetExtentBytesWritten()
-
-  bytes <- raw(n)
-  buffer <- buffer(bytes)
-  buffer_writer <- fixed_size_buffer_writer(buffer)
-  write_table(x, buffer_writer)
-
-  bytes
-}
+#' @title class arrow::ipc::RecordBatchFileWriter
+#'
+#' Writer for the Arrow binary file format
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section usage:
+#'
+#' ```
+#' writer <- RecordBatchFileWriter(sink, schema)
+#'
+#' writer$write_batch(batch)
+#' writer$write_table(table)
+#' writer$close()
+#' ```
+#'
+#' @section Factory:
+#'
+#' The [RecordBatchFileWriter()] function creates a record batch stream writer.
+#'
+#' @section Methods:
+#' inherited from [arrow::ipc::RecordBatchWriter][arrow__ipc__RecordBatchWriter]
+#'
+#' - `$write_batch(batch)`: Write record batch to stream
+#' - `$write_table(table)`: write Table to stream
+#' - `$close()`: close stream
+#'
+#' @rdname arrow__ipc__RecordBatchFileWriter
+#' @name arrow__ipc__RecordBatchFileWriter
+`arrow::ipc::RecordBatchFileWriter` <- R6Class("arrow::ipc::RecordBatchFileWriter", inherit = `arrow::ipc::RecordBatchStreamWriter`)
 
-#' Write an object to a stream
+#' Create a record batch file writer from a stream
 #'
-#' @param x An object to stream
-#' @param stream A stream
-#' @param ... additional parameters
+#' @param sink Where to write. Can either be:
+#'
+#' - character vector of length one
+#' - a [file path][fs::path_abs()]
+#' - [arrow::io::OutputStream][arrow__io__OutputStream]
+#'
+#' @param schema The [arrow::Schema][arrow__Schema] for data to be written.
+#'
+#' @return an `arrow::ipc::RecordBatchWriter` object
 #'
 #' @export
-write_arrow <- function(x, stream, ...){
-  UseMethod("write_arrow")
+RecordBatchFileWriter <- function(sink, schema) {
+  UseMethod("RecordBatchFileWriter")
 }
 
 #' @export
-`write_arrow.arrow::RecordBatch` <- function(x, stream, ...) {
-  write_record_batch(x, stream, ...)
+RecordBatchFileWriter.character <- function(sink, schema){
+  RecordBatchFileWriter(fs::path_abs(sink), schema)
 }
 
 #' @export
-`write_arrow.arrow::Table` <- function(x, stream, ...) {
-  write_table(x, stream, ...)
+RecordBatchFileWriter.fs_path <- function(sink, schema){
+  RecordBatchFileWriter(FileOutputStream(sink), schema)
 }
 
 #' @export
-`write_arrow.data.frame` <- function(x, stream, ...) {
-  write_record_batch(record_batch(x), stream, ...)
+`RecordBatchFileWriter.arrow::io::OutputStream` <- function(sink, schema){
+  assert_that(inherits(schema, "arrow::Schema"))
+  shared_ptr(`arrow::ipc::RecordBatchFileWriter`, ipc___RecordBatchFileWriter__Open(sink, schema))
 }
diff --git a/r/R/Schema.R b/r/R/Schema.R
index b158fee169d34..08047a3b11f46 100644
--- a/r/R/Schema.R
+++ b/r/R/Schema.R
@@ -17,6 +17,30 @@
 
 #' @include R6.R
 
+#' @title class arrow::Schema
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Usage:
+#'
+#' ```
+#' s <- schema(...)
+#'
+#' s$ToString()
+#' s$num_fields()
+#' s$field(i)
+#' ```
+#'
+#' @section Methods:
+#'
+#' - `$ToString()`: convert to a string
+#' - `$num_fields()`: returns the number of fields
+#' - `$field(i)`: returns the field at index `i` (0-based)
+#'
+#' @rdname arrow__Schema
+#' @name arrow__Schema
 `arrow::Schema` <- R6Class("arrow::Schema",
   inherit = `arrow::Object`,
   public = list(
@@ -29,11 +53,11 @@
   )
 )
 
-#' Schema functions
+#' Schema factory
 #'
 #' @param ... named list of data types
 #'
-#' @return a Schema
+#' @return a [schema][arrow__Schema]
 #'
 #' @export
 schema <- function(...){
@@ -50,11 +74,6 @@ read_schema <- function(stream, ...) {
   UseMethod("read_schema")
 }
 
-#' @export
-read_schema.default <- function(stream, ...) {
-  stop("unsupported")
-}
-
 #' @export
 `read_schema.arrow::io::InputStream` <- function(stream, ...) {
   shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream))
@@ -62,10 +81,12 @@ read_schema.default <- function(stream, ...) {
 
 #' @export
 `read_schema.arrow::Buffer` <- function(stream, ...) {
-  read_schema(buffer_reader(stream), ...)
+  stream <- close_on_exit(BufferReader(stream))
+  shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream))
 }
 
 #' @export
 `read_schema.raw` <- function(stream, ...) {
-  read_schema(buffer(stream), ...)
+  stream <- close_on_exit(BufferReader(stream))
+  shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream))
 }
diff --git a/r/R/Table.R b/r/R/Table.R
index e7d4545c1f646..c39fce246af16 100644
--- a/r/R/Table.R
+++ b/r/R/Table.R
@@ -16,12 +16,21 @@
 # under the License.
 
 #' @include R6.R
-
+#'
+#' @title class arrow::Table
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' TODO
+#'
+#' @rdname arrow__Table
+#' @name arrow__Table
 `arrow::Table` <- R6Class("arrow::Table", inherit = `arrow::Object`,
   public = list(
-    num_columns = function() Table__num_columns(self),
-    num_rows = function() Table__num_rows(self),
-    schema = function() shared_ptr(`arrow::Schema`, Table__schema(self)),
     column = function(i) shared_ptr(`arrow::Column`, Table__column(self, i)),
 
     serialize = function(output_stream, ...) write_table(self, output_stream, ...),
@@ -29,9 +38,16 @@
     cast = function(target_schema, safe = TRUE, options = cast_options(safe)) {
       assert_that(inherits(target_schema, "arrow::Schema"))
       assert_that(inherits(options, "arrow::compute::CastOptions"))
-      assert_that(identical(self$schema()$names, target_schema$names), msg = "incompatible schemas")
+      assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas")
       shared_ptr(`arrow::Table`, Table__cast(self, target_schema, options))
     }
+  ),
+
+  active = list(
+    num_columns = function() Table__num_columns(self),
+    num_rows = function() Table__num_rows(self),
+    schema = function() shared_ptr(`arrow::Schema`, Table__schema(self)),
+    columns = function() map(Table__columns(self), shared_ptr, class = `arrow::Column`)
   )
 )
 
@@ -45,17 +61,6 @@ table <- function(.data){
 }
 
 #' @export
-`as_tibble.arrow::Table` <- function(x, ...){
-  Table__to_dataframe(x)
-}
-
-#' Read an tibble from an arrow::Table on disk
-#'
-#' @param stream input stream
-#'
-#' @return a [tibble::tibble]
-#'
-#' @export
-read_arrow <- function(stream){
-  as_tibble(read_table(stream))
+`as_tibble.arrow::Table` <- function(x, use_threads = TRUE, ...){
+  Table__to_dataframe(x, use_threads = use_threads)
 }
diff --git a/r/R/array.R b/r/R/array.R
index 2d434f9a2218c..63fdb4e0f6119 100644
--- a/r/R/array.R
+++ b/r/R/array.R
@@ -17,18 +17,65 @@
 
 #' @include R6.R
 
+#' @title class arrow::Array
+#'
+#' Array base type. Immutable data array with some logical type and some length.
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Usage:
+#'
+#' ```
+#' a <- array(...)
+#'
+#' a$IsNull(i)
+#' a$IsValid(i)
+#' a$length() or length(a)
+#' a$offset()
+#' a$null_count()
+#' a$type()
+#' a$type_id()
+#' a$Equals(b)
+#' a$ApproxEquals(b)
+#' a$as_vector()
+#' a$ToString()
+#' a$Slice(offset, length = NULL)
+#' a$RangeEquals(other, start_idx, end_idx, other_start_idx)
+#'
+#' print(a)
+#' a == a
+#' ```
+#'
+#' @section Methods:
+#'
+#' - `$IsNull(i)`: Return true if value at index is null. Does not boundscheck
+#' - `$IsValid(i)`: Return true if value at index is valid. Does not boundscheck
+#' - `$length()`: Size in the number of elements this array contains
+#' - `$offset()`: A relative position into another array's data, to enable zero-copy slicing
+#' - `$null_count()`: The number of null entries in the array
+#' - `$type()`: logical type of data
+#' - `$type_id()`: type id
+#' - `$Equals(other)` : is this array equal to `other`
+#' - `$ApproxEquals(other)` :
+#' - `$data()`: return the underlying [arrow::ArrayData][arrow__ArrayData]
+#' - `$as_vector()`: convert to an R vector
+#' - `$ToString()`: string representation of the array
+#' - `$Slice(offset, length = NULL)` : Construct a zero-copy slice of the array with the indicated offset and length. If length is `NULL`, the slice goes until the end of the array.
+#' - `$RangeEquals(other, start_idx, end_idx, other_start_idx)` :
+#'
+#' @rdname arrow__Array
+#' @name arrow__Array
 `arrow::Array` <- R6Class("arrow::Array",
   inherit = `arrow::Object`,
   public = list(
     IsNull = function(i) Array__IsNull(self, i),
     IsValid = function(i) Array__IsValid(self, i),
     length = function() Array__length(self),
-    offset = function() Array__offset(self),
-    null_count = function() Array__null_count(self),
-    type = function() `arrow::DataType`$dispatch(Array__type(self)),
     type_id = function() Array__type_id(self),
     Equals = function(other) Array__Equals(self, other),
-    ApproxEquals = function(othet) Array__ApproxEquals(self, other),
+    ApproxEquals = function(other) Array__ApproxEquals(self, other),
     data = function() shared_ptr(`arrow::ArrayData`, Array__data(self)),
     as_vector = function() Array__as_vector(self),
     ToString = function() Array__ToString(self),
@@ -48,6 +95,11 @@
       assert_that(inherits(options, "arrow::compute::CastOptions"))
       `arrow::Array`$dispatch(Array__cast(self, target_type, options))
     }
+  ),
+  active = list(
+    null_count = function() Array__null_count(self),
+    offset = function() Array__offset(self),
+    type = function() `arrow::DataType`$dispatch(Array__type(self))
   )
 )
 
@@ -65,7 +117,7 @@
 #' @export
 `==.arrow::Array` <- function(x, y) x$Equals(y)
 
-#' create an arrow::Array from an R vector
+#' create an [arrow::Array][arrow__Array] from an R vector
 #'
 #' @param \dots Vectors to coerce
 #' @param type currently ignored
diff --git a/r/R/buffer.R b/r/R/buffer.R
index 9684a9729130f..ca9b2eeff2168 100644
--- a/r/R/buffer.R
+++ b/r/R/buffer.R
@@ -18,21 +18,38 @@
 #' @include R6.R
 #' @include enums.R
 
+#' @title class arrow::Buffer
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' - `$is_mutable()` :
+#' - `$ZeroPadding()` :
+#' - `$size()` :
+#' - `$capacity()`:
+#'
+#' @rdname arrow__Buffer
+#' @name arrow__Buffer
 `arrow::Buffer` <- R6Class("arrow::Buffer", inherit = `arrow::Object`,
   public = list(
+    ZeroPadding = function() Buffer__ZeroPadding(self)
+  ),
+
+  active = list(
     is_mutable = function() Buffer__is_mutable(self),
-    ZeroPadding = function() Buffer__ZeroPadding(self),
     size = function() Buffer__size(self),
     capacity = function() Buffer__capacity(self)
   )
 )
 
-`arrow::MutableBuffer` <- R6Class("arrow::Buffer", inherit = `arrow::Buffer`)
-
-#' Create a buffer from an R object
+#' Create a [arrow::Buffer][arrow__Buffer] from an R object
 #'
-#' @param x R object
-#' @return an instance of `arrow::Buffer` that borrows memory from `x`
+#' @param x R object. Only raw, numeric and integer vectors are currently supported
+#'
+#' @return an instance of [arrow::Buffer][arrow__Buffer] that borrows memory from `x`
 #'
 #' @export
 buffer <- function(x){
@@ -44,7 +61,6 @@ buffer.default <- function(x) {
   stop("cannot convert to Buffer")
 }
 
-
 #' @export
 buffer.raw <- function(x) {
   shared_ptr(`arrow::Buffer`, r___RBuffer__initialize(x))
@@ -65,3 +81,8 @@ buffer.complex <- function(x) {
   shared_ptr(`arrow::Buffer`, r___RBuffer__initialize(x))
 }
 
+#' @export
+`buffer.arrow::Buffer` <- function(x) {
+  x
+}
+
diff --git a/r/R/compression.R b/r/R/compression.R
new file mode 100644
index 0000000000000..083774ae0fec2
--- /dev/null
+++ b/r/R/compression.R
@@ -0,0 +1,86 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include enums.R
+#' @include R6.R
+#' @include io.R
+
+`arrow::util::Codec` <- R6Class("arrow::util::Codec", inherit = `arrow::Object`)
+
+`arrow::io::CompressedOutputStream` <- R6Class("arrow::io::CompressedOutputStream", inherit = `arrow::io::OutputStream`)
+`arrow::io::CompressedInputStream` <- R6Class("arrow::io::CompressedInputStream", inherit = `arrow::io::InputStream`)
+
+#' codec
+#'
+#' @param type type of codec
+#'
+#' @export
+compression_codec <- function(type = "GZIP") {
+  type <- CompressionType[[match.arg(type, names(CompressionType))]]
+  unique_ptr(`arrow::util::Codec`, util___Codec__Create(type))
+}
+
+
+#' Compressed output stream
+#'
+#' @param stream Underlying raw output stream
+#' @param codec a codec
+#' @export
+CompressedOutputStream <- function(stream, codec = compression_codec("GZIP")){
+  UseMethod("CompressedOutputStream")
+}
+
+#' @export
+CompressedOutputStream.character <- function(stream, codec = compression_codec("GZIP")){
+  CompressedOutputStream(fs::path_abs(stream), codec = codec)
+}
+
+#' @export
+CompressedOutputStream.fs_path <- function(stream, codec = compression_codec("GZIP")){
+  CompressedOutputStream(FileOutputStream(stream), codec = codec)
+}
+
+#' @export
+`CompressedOutputStream.arrow::io::OutputStream` <- function(stream, codec = compression_codec("GZIP")) {
+  assert_that(inherits(codec, "arrow::util::Codec"))
+  shared_ptr(`arrow::io::CompressedOutputStream`, io___CompressedOutputStream__Make(codec, stream))
+}
+
+#' Compressed input stream
+#'
+#' @param stream Underlying raw input stream
+#' @param codec a codec
+#' @export
+CompressedInputStream <- function(stream, codec = codec("GZIP")){
+  UseMethod("CompressedInputStream")
+}
+
+#' @export
+CompressedInputStream.character <- function(stream, codec = compression_codec("GZIP")){
+  CompressedInputStream(fs::path_abs(stream), codec = codec)
+}
+
+#' @export
+CompressedInputStream.fs_path <- function(stream, codec = compression_codec("GZIP")){
+  CompressedInputStream(ReadableFile(stream), codec = codec)
+}
+
+#' @export
+`CompressedInputStream.arrow::io::InputStream` <- function(stream, codec = compression_codec("GZIP")) {
+  assert_that(inherits(codec, "arrow::util::Codec"))
+  shared_ptr(`arrow::io::CompressedInputStream`, io___CompressedInputStream__Make(codec, stream))
+}
diff --git a/r/R/csv.R b/r/R/csv.R
new file mode 100644
index 0000000000000..bad87559c05e5
--- /dev/null
+++ b/r/R/csv.R
@@ -0,0 +1,182 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include R6.R
+
+`arrow::csv::TableReader` <- R6Class("arrow::csv::TableReader", inherit = `arrow::Object`,
+  public = list(
+    Read = function() shared_ptr(`arrow::Table`, csv___TableReader__Read(self))
+  )
+)
+
+`arrow::csv::ReadOptions` <- R6Class("arrow::csv::ReadOptions", inherit = `arrow::Object`)
+`arrow::csv::ParseOptions` <- R6Class("arrow::csv::ParseOptions", inherit = `arrow::Object`)
+`arrow::csv::ConvertOptions` <- R6Class("arrow::csv::ConvertOptions", inherit = `arrow::Object`)
+
+#' read options for the csv reader
+#'
+#' @param use_threads Whether to use the global CPU thread pool
+#' @param block_size Block size we request from the IO layer; also determines the size of chunks when use_threads is `TRUE`
+#'
+#' @export
+csv_read_options <- function(use_threads = TRUE, block_size = 1048576L) {
+  shared_ptr(`arrow::csv::ReadOptions`, csv___ReadOptions__initialize(
+    list(
+      use_threads = use_threads,
+      block_size = block_size
+    )
+  ))
+}
+
+#' Parsing options
+#'
+#' @param delimiter Field delimiter
+#' @param quoting Whether quoting is used
+#' @param quote_char Quoting character (if `quoting` is `TRUE`)
+#' @param double_quote Whether a quote inside a value is double-quoted
+#' @param escaping Whether escaping is used
+#' @param escape_char Escaping character (if `escaping` is `TRUE`)
+#' @param newlines_in_values Whether values are allowed to contain CR (`0x0d``) and LF (`0x0a``) characters
+#' @param ignore_empty_lines Whether empty lines are ignored.  If false, an empty line represents
+#' @param header_rows Number of header rows to skip (including the first row containing column names)
+#'
+#' @export
+csv_parse_options <- function(
+  delimiter = ",", quoting = TRUE, quote_char = '"',
+  double_quote = TRUE, escaping = FALSE, escape_char = '\\',
+  newlines_in_values = FALSE, ignore_empty_lines = TRUE,
+  header_rows = 1L
+){
+  shared_ptr(`arrow::csv::ParseOptions`, csv___ParseOptions__initialize(
+    list(
+      delimiter = delimiter,
+      quoting = quoting,
+      quote_char = quote_char,
+      double_quote = double_quote,
+      escaping = escaping,
+      escape_char = escape_char,
+      newlines_in_values = newlines_in_values,
+      ignore_empty_lines = ignore_empty_lines,
+      header_rows = header_rows
+    )
+  ))
+}
+
+#' Conversion Options for the csv reader
+#'
+#' @param check_utf8 Whether to check UTF8 validity of string columns
+#'
+#' @export
+csv_convert_options <- function(check_utf8 = TRUE){
+  shared_ptr(`arrow::csv::ConvertOptions`, csv___ConvertOptions__initialize(
+    list(
+      check_utf8 = check_utf8
+    )
+  ))
+}
+
+#' CSV table reader
+#'
+#' @param file file
+#' @param read_options, see [csv_read_options()]
+#' @param parse_options, see [csv_parse_options()]
+#' @param convert_options, see [csv_convert_options()]
+#' @param ... additional parameters.
+#'
+#' @export
+csv_table_reader <- function(file,
+  read_options = csv_read_options(),
+  parse_options = csv_parse_options(),
+  convert_options = csv_convert_options(),
+  ...
+){
+  UseMethod("csv_table_reader")
+}
+
+#' @importFrom rlang abort
+#' @export
+csv_table_reader.default <- function(file,
+  read_options = csv_read_options(),
+  parse_options = csv_parse_options(),
+  convert_options = csv_convert_options(),
+  ...
+) {
+  abort("unsupported")
+}
+
+#' @export
+`csv_table_reader.character` <- function(file,
+  read_options = csv_read_options(),
+  parse_options = csv_parse_options(),
+  convert_options = csv_convert_options(),
+  ...
+){
+  csv_table_reader(fs::path_abs(file),
+    read_options = read_options,
+    parse_options = parse_options,
+    convert_options = convert_options,
+    ...
+  )
+}
+
+#' @export
+`csv_table_reader.fs_path` <- function(file,
+  read_options = csv_read_options(),
+  parse_options = csv_parse_options(),
+  convert_options = csv_convert_options(),
+  ...
+){
+  csv_table_reader(ReadableFile(file),
+    read_options = read_options,
+    parse_options = parse_options,
+    convert_options = convert_options,
+    ...
+  )
+}
+
+#' @export
+`csv_table_reader.arrow::io::InputStream` <- function(file,
+  read_options = csv_read_options(),
+  parse_options = csv_parse_options(),
+  convert_options = csv_convert_options(),
+  ...
+){
+  shared_ptr(`arrow::csv::TableReader`,
+    csv___TableReader__Make(file, read_options, parse_options, convert_options)
+  )
+}
+
+#' @export
+`csv_table_reader.arrow::csv::TableReader` <- function(file,
+  read_options = csv_read_options(),
+  parse_options = csv_parse_options(),
+  convert_options = csv_convert_options(),
+  ...
+){
+  file
+}
+
+#' Read csv file into an arrow::Table
+#'
+#' Use arrow::csv::TableReader from [csv_table_reader()]
+#'
+#' @param ... Used to construct an arrow::csv::TableReader
+#' @export
+read_csv_arrow <- function(...) {
+  csv_table_reader(...)$Read()
+}
+
diff --git a/r/R/dictionary.R b/r/R/dictionary.R
index d8a71d92a9f69..3c3758df303e8 100644
--- a/r/R/dictionary.R
+++ b/r/R/dictionary.R
@@ -17,15 +17,27 @@
 
 #' @include R6.R
 
+#' @title class arrow::DictionaryType
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' TODO
+#'
+#' @rdname arrow__DictionaryType
+#' @name arrow__DictionaryType
 `arrow::DictionaryType` <- R6Class("arrow::DictionaryType",
   inherit = `arrow::FixedWidthType`,
-  public = list(
+
+  active = list(
     index_type = function() `arrow::DataType`$dispatch(DictionaryType__index_type(self)),
-    name = function() DictionaryType__name(self),
     dictionary = function() shared_ptr(`arrow::Array`, DictionaryType__dictionary(self)),
+    name = function() DictionaryType__name(self),
     ordered = function() DictionaryType__ordered(self)
   )
-
 )
 
 #' dictionary type factory
@@ -34,6 +46,8 @@
 #' @param values values array, typically an arrow array of strings
 #' @param ordered Is this an ordered dictionary
 #'
+#' @return a [arrow::DictionaryType][arrow__DictionaryType]
+#'
 #' @export
 dictionary <- function(type, values, ordered = FALSE) {
   assert_that(
diff --git a/r/R/enums.R b/r/R/enums.R
index 35e6aaa440e72..3a6ac5c23bf3e 100644
--- a/r/R/enums.R
+++ b/r/R/enums.R
@@ -70,3 +70,9 @@ FileMode <- enum("arrow::io::FileMode",
 MessageType <- enum("arrow::ipc::Message::Type",
   NONE = 0L, SCHEMA = 1L, DICTIONARY_BATCH = 2L, RECORD_BATCH = 3L, TENSOR = 4L
 )
+
+#' @rdname DataType
+#' @export
+CompressionType <- enum("arrow::Compression::type",
+  UNCOMPRESSED = 0L, SNAPPY = 1L, GZIP = 2L, BROTLI = 3L, ZSTD = 4L, LZ4 = 5L, LZO = 6L, BZ2 = 7L
+)
diff --git a/r/R/feather.R b/r/R/feather.R
index c36c571bd4bd4..eaeea4caefbaa 100644
--- a/r/R/feather.R
+++ b/r/R/feather.R
@@ -35,7 +35,9 @@
     num_columns = function() ipc___feather___TableReader__num_columns(self),
     GetColumnName = function(i) ipc___feather___TableReader__GetColumnName(self, i),
     GetColumn = function(i) shared_ptr(`arrow::Column`, ipc___feather___TableReader__GetColumn(self, i)),
-    Read = function() shared_ptr(`arrow::Table`, ipc___feather___TableReader__Read(self))
+    Read = function(columns) {
+      shared_ptr(`arrow::Table`, ipc___feather___TableReader__Read(self, columns))
+    }
   )
 )
 
@@ -44,12 +46,12 @@
 #' @param stream an OutputStream
 #'
 #' @export
-feather_table_writer <- function(stream) {
-  UseMethod("feather_table_writer")
+FeatherTableWriter <- function(stream) {
+  UseMethod("FeatherTableWriter")
 }
 
 #' @export
-`feather_table_writer.arrow::io::OutputStream` <- function(stream){
+`FeatherTableWriter.arrow::io::OutputStream` <- function(stream){
   unique_ptr(`arrow::ipc::feather::TableWriter`, ipc___feather___TableWriter__Open(stream))
 }
 
@@ -100,14 +102,14 @@ write_feather_RecordBatch <- function(data, stream) {
 #' @export
 #' @method write_feather_RecordBatch fs_path
 `write_feather_RecordBatch.fs_path` <- function(data, stream) {
-  file_stream <- close_on_exit(file_output_stream(stream))
+  file_stream <- close_on_exit(FileOutputStream(stream))
   `write_feather_RecordBatch.arrow::io::OutputStream`(data, file_stream)
 }
 
 #' @export
 #' @method write_feather_RecordBatch arrow::io::OutputStream
 `write_feather_RecordBatch.arrow::io::OutputStream` <- function(data, stream) {
-  ipc___TableWriter__RecordBatch__WriteFeather(feather_table_writer(stream), data)
+  ipc___TableWriter__RecordBatch__WriteFeather(FeatherTableWriter(stream), data)
 }
 
 #' A arrow::ipc::feather::TableReader to read from a file
@@ -117,44 +119,51 @@ write_feather_RecordBatch <- function(data, stream) {
 #' @param ... extra parameters
 #'
 #' @export
-feather_table_reader <- function(file, mmap = TRUE, ...){
-  UseMethod("feather_table_reader")
+FeatherTableReader <- function(file, mmap = TRUE, ...){
+  UseMethod("FeatherTableReader")
 }
 
 #' @export
-feather_table_reader.default <- function(file, mmap = TRUE, ...) {
+FeatherTableReader.default <- function(file, mmap = TRUE, ...) {
   stop("unsupported")
 }
 
 #' @export
-feather_table_reader.character <- function(file, mmap = TRUE, ...) {
-  feather_table_reader(fs::path_abs(file), mmap = mmap, ...)
+FeatherTableReader.character <- function(file, mmap = TRUE, ...) {
+  FeatherTableReader(fs::path_abs(file), mmap = mmap, ...)
 }
 
 #' @export
-feather_table_reader.fs_path <- function(file, mmap = TRUE, ...) {
-  stream <- if(isTRUE(mmap)) mmap_open(file, ...) else file_open(file, ...)
-  feather_table_reader(stream)
+FeatherTableReader.fs_path <- function(file, mmap = TRUE, ...) {
+  stream <- if(isTRUE(mmap)) mmap_open(file, ...) else ReadableFile(file, ...)
+  FeatherTableReader(stream)
 }
 
 #' @export
-`feather_table_reader.arrow::io::RandomAccessFile` <- function(file, mmap = TRUE, ...){
+`FeatherTableReader.arrow::io::RandomAccessFile` <- function(file, mmap = TRUE, ...){
   unique_ptr(`arrow::ipc::feather::TableReader`, ipc___feather___TableReader__Open(file))
 }
 
 #' @export
-`feather_table_reader.arrow::ipc::feather::TableReader` <- function(file, mmap = TRUE, ...){
+`FeatherTableReader.arrow::ipc::feather::TableReader` <- function(file, mmap = TRUE, ...){
   file
 }
 
 #' Read a feather file
 #'
-#' @param file a arrow::ipc::feather::TableReader or whatever the [feather_table_reader()] function can handle
+#' @param file a arrow::ipc::feather::TableReader or whatever the [FeatherTableReader()] function can handle
+#' @param columns names if the columns to read. The default `NULL` means all columns
+#' @param as_tibble should the [arrow::Table][arrow__Table] be converted to a tibble.
+#' @param use_threads Use threads when converting to a tibble.
 #' @param ... additional parameters
 #'
-#' @return an arrow::Table
+#' @return a data frame if `as_tibble` is `TRUE` (the default), or a [arrow::Table][arrow__Table] otherwise
 #'
 #' @export
-read_feather <- function(file, ...){
-  feather_table_reader(file, ...)$Read()
+read_feather <- function(file, columns = NULL, as_tibble = TRUE, use_threads = TRUE, ...){
+  out <- FeatherTableReader(file, ...)$Read(columns)
+  if (isTRUE(as_tibble)) {
+    out <- as_tibble(out, use_threads = use_threads)
+  }
+  out
 }
diff --git a/r/R/io.R b/r/R/io.R
index d4534927412bd..ad350f3d17957 100644
--- a/r/R/io.R
+++ b/r/R/io.R
@@ -19,45 +19,156 @@
 #' @include enums.R
 #' @include buffer.R
 
-`arrow::io::Readable` <- R6Class("arrow::io::Readable", inherit = `arrow::Object`,
-  public = list(
-    Read = function(nbytes) shared_ptr(`arrow::Buffer`, io___Readable__Read(self, nbytes))
-  )
-)
+# OutputStream ------------------------------------------------------------
 
-`arrow::io::InputStream` <- R6Class("arrow::io::InputStream", inherit = `arrow::io::Readable`,
+`arrow::io::Writable` <- R6Class("arrow::io::Writable", inherit = `arrow::Object`,
   public = list(
-    Close = function() io___InputStream__Close(self)
+    write = function(x) io___Writable__write(self, buffer(x))
   )
 )
 
-`arrow::io::Writable` <- R6Class("arrow::io::Writable", inherit = `arrow::Object`)
-
+#' @title OutputStream
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#'  - `arrow::Buffer` `Read`(`int` nbytes):  Read `nbytes` bytes
+#'  - `void` `close`(): close the stream
+#'
+#' @rdname arrow__io__OutputStream
+#' @name arrow__io__OutputStream
 `arrow::io::OutputStream` <- R6Class("arrow::io::OutputStream", inherit = `arrow::io::Writable`,
   public = list(
-    Close = function() io___OutputStream__Close(self)
+    close = function() io___OutputStream__Close(self),
+    tell = function() io___OutputStream__Tell(self)
   )
 )
 
+#' @title class arrow::io::FileOutputStream
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#'  TODO
+#'
+#' @rdname arrow__io__FileOutputStream
+#' @name arrow__io__FileOutputStream
 `arrow::io::FileOutputStream` <- R6Class("arrow::io::FileOutputStream", inherit = `arrow::io::OutputStream`)
 
+#' @title class arrow::io::MockOutputStream
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#'
+#' @section Methods:
+#'
+#'  TODO
+#'
+#' @rdname arrow__io__MockOutputStream
+#' @name arrow__io__MockOutputStream
 `arrow::io::MockOutputStream` <- R6Class("arrow::io::MockOutputStream", inherit = `arrow::io::OutputStream`,
   public = list(
     GetExtentBytesWritten = function() io___MockOutputStream__GetExtentBytesWritten(self)
   )
 )
 
+#' @title class arrow::io::BufferOutputStream
+#'
+#' @usage NULL
+#' @docType class
+#' @section Methods:
+#'
+#'  TODO
+#'
+#' @rdname arrow__io__BufferOutputStream
+#' @name arrow__io__BufferOutputStream
 `arrow::io::BufferOutputStream` <- R6Class("arrow::io::BufferOutputStream", inherit = `arrow::io::OutputStream`,
   public = list(
     capacity = function() io___BufferOutputStream__capacity(self),
-    Finish = function() shared_ptr(`arrow::Buffer`, io___BufferOutputStream__Finish(self)),
+    getvalue = function() shared_ptr(`arrow::Buffer`, io___BufferOutputStream__Finish(self)),
+
     Write = function(bytes) io___BufferOutputStream__Write(self, bytes),
     Tell = function() io___BufferOutputStream__Tell(self)
   )
 )
 
+#' @title class arrow::io::FixedSizeBufferWriter
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#'
+#' @section Methods:
+#'
+#'  TODO
+#'
+#' @rdname arrow__io__FixedSizeBufferWriter
+#' @name arrow__io__FixedSizeBufferWriter
 `arrow::io::FixedSizeBufferWriter` <- R6Class("arrow::io::FixedSizeBufferWriter", inherit = `arrow::io::OutputStream`)
 
+
+# InputStream -------------------------------------------------------------
+
+#' @title class arrow::io::Readable
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#'
+#' @section Methods:
+#'
+#'  TODO
+#'
+#' @rdname arrow__io__Readable
+#' @name arrow__io__Readable
+`arrow::io::Readable` <- R6Class("arrow::io::Readable", inherit = `arrow::Object`,
+  public = list(
+    Read = function(nbytes) shared_ptr(`arrow::Buffer`, io___Readable__Read(self, nbytes))
+  )
+)
+
+#' @title class arrow::io::InputStream
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#'
+#' @section Methods:
+#'
+#'  TODO
+#'
+#' @rdname arrow__io__InputStream
+#' @name arrow__io__InputStream
+`arrow::io::InputStream` <- R6Class("arrow::io::InputStream", inherit = `arrow::io::Readable`,
+  public = list(
+    close = function() io___InputStream__Close(self)
+  )
+)
+
+#' @title class arrow::io::RandomAccessFile
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#'
+#' @section Methods:
+#'
+#'  TODO
+#'
+#' @rdname arrow__io__RandomAccessFile
+#' @name arrow__io__RandomAccessFile
 `arrow::io::RandomAccessFile` <- R6Class("arrow::io::RandomAccessFile", inherit = `arrow::io::InputStream`,
   public = list(
     GetSize = function() io___RandomAccessFile__GetSize(self),
@@ -67,94 +178,159 @@
   )
 )
 
+#' @title class arrow::io::MemoryMappedFile
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#'
+#' @section Methods:
+#'
+#'  TODO
+#'
+#' @seealso [mmap_open()], [mmap_create()]
+#'
+#'
+#' @rdname arrow__io__MemoryMappedFile
+#' @name arrow__io__MemoryMappedFile
 `arrow::io::MemoryMappedFile` <- R6Class("arrow::io::MemoryMappedFile", inherit = `arrow::io::RandomAccessFile`,
   public = list(
     Resize = function(size) io___MemoryMappedFile__Resize(self, size)
   )
 )
 
+#' @title class arrow::io::ReadableFile
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#'
+#' @section Methods:
+#'
+#'  TODO
+#'
+#' @rdname arrow__io__ReadableFile
+#' @name arrow__io__ReadableFile
 `arrow::io::ReadableFile` <- R6Class("arrow::io::ReadableFile", inherit = `arrow::io::RandomAccessFile`)
-`arrow::io::BufferReader` <- R6Class("arrow::io::BufferReader", inherit = `arrow::io::RandomAccessFile`)
 
+#' @title class arrow::io::BufferReader
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#'  TODO
+#'
+#' @rdname arrow__io__BufferReader
+#' @name arrow__io__BufferReader
+`arrow::io::BufferReader` <- R6Class("arrow::io::BufferReader", inherit = `arrow::io::RandomAccessFile`)
 
 #' Create a new read/write memory mapped file of a given size
 #'
 #' @param path file path
 #' @param size size in bytes
-#' @param mode file mode (read/write/readwrite)
-#' @param buffer an `arrow::Buffer`, typically created by [buffer()]
-#' @param initial_capacity initial capacity for the buffer output stream
 #'
-#' @rdname io
+#' @return a [arrow::io::MemoryMappedFile][arrow__io__MemoryMappedFile]
+#'
 #' @export
-mmap_create <- `arrow::io::MemoryMappedFile`$create <- function(path, size) {
+mmap_create <- function(path, size) {
   shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Create(fs::path_abs(path), size))
 }
 
-#' @rdname io
+#' Open a memory mapped file
+#'
+#' @param path file path
+#' @param mode file mode (read/write/readwrite)
+#'
 #' @export
-mmap_open <- `arrow::io::MemoryMappedFile`$open <- function(path, mode = c("read", "write", "readwrite")) {
+mmap_open <- function(path, mode = c("read", "write", "readwrite")) {
   mode <- match(match.arg(mode), c("read", "write", "readwrite")) - 1L
   shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Open(fs::path_abs(path), mode))
 }
 
-#' @rdname io
+#' open a [arrow::io::ReadableFile][arrow__io__ReadableFile]
+#'
+#' @param path file path
+#'
+#' @return a [arrow::io::ReadableFile][arrow__io__ReadableFile]
+#'
 #' @export
-file_open <- `arrow::io::ReadableFile`$open <- function(path) {
+ReadableFile <- function(path) {
   shared_ptr(`arrow::io::ReadableFile`, io___ReadableFile__Open(fs::path_abs(path)))
 }
 
-#' @rdname io
+#' Open a [arrow::io::FileOutputStream][arrow__io__FileOutputStream]
+#'
+#' @param path file path
+#'
+#' @return a [arrow::io::FileOutputStream][arrow__io__FileOutputStream]
+#'
 #' @export
-file_output_stream <- function(path) {
+FileOutputStream <- function(path) {
   shared_ptr(`arrow::io::FileOutputStream`, io___FileOutputStream__Open(path))
 }
 
-#' @rdname io
+#' Open a [arrow::io::MockOutputStream][arrow__io__MockOutputStream]
+#'
+#' @return a [arrow::io::MockOutputStream][arrow__io__MockOutputStream]
+#'
 #' @export
-mock_output_stream <- function() {
+MockOutputStream <- function() {
   shared_ptr(`arrow::io::MockOutputStream`, io___MockOutputStream__initialize())
 }
 
-#' @rdname io
+#' Open a [arrow::io::BufferOutputStream][arrow__io__BufferOutputStream]
+#'
+#' @param initial_capacity initial capacity
+#'
+#' @return a [arrow::io::BufferOutputStream][arrow__io__BufferOutputStream]
+#'
 #' @export
-buffer_output_stream <- function(initial_capacity = 0L) {
+BufferOutputStream <- function(initial_capacity = 0L) {
   shared_ptr(`arrow::io::BufferOutputStream`, io___BufferOutputStream__Create(initial_capacity))
 }
 
-#' @rdname io
+#' Open a [arrow::io::FixedSizeBufferWriter][arrow__io__FixedSizeBufferWriter]
+#'
+#' @param buffer [arrow::Buffer][arrow__Buffer] or something [buffer()] can handle
+#'
+#' @return a [arrow::io::BufferOutputStream][arrow__io__BufferOutputStream]
+#'
 #' @export
-fixed_size_buffer_writer <- function(buffer){
-  UseMethod("fixed_size_buffer_writer")
+FixedSizeBufferWriter <- function(buffer){
+  UseMethod("FixedSizeBufferWriter")
 }
 
 #' @export
-fixed_size_buffer_writer.default <- function(buffer){
-  fixed_size_buffer_writer(buffer(buffer))
+FixedSizeBufferWriter.default <- function(buffer){
+  FixedSizeBufferWriter(buffer(buffer))
 }
 
 #' @export
-`fixed_size_buffer_writer.arrow::Buffer` <- function(buffer){
-  assert_that(buffer$is_mutable())
+`FixedSizeBufferWriter.arrow::Buffer` <- function(buffer){
+  assert_that(buffer$is_mutable)
   shared_ptr(`arrow::io::FixedSizeBufferWriter`, io___FixedSizeBufferWriter__initialize(buffer))
 }
 
-#' Create a `arrow::BufferReader`
+#' Create a [arrow::io::BufferReader][arrow__io__BufferReader]
 #'
 #' @param x R object to treat as a buffer or a buffer created by [buffer()]
 #'
 #' @export
-buffer_reader <- function(x) {
-  UseMethod("buffer_reader")
+BufferReader <- function(x) {
+  UseMethod("BufferReader")
 }
 
 #' @export
-`buffer_reader.arrow::Buffer` <- function(x) {
-  shared_ptr(`arrow::io::BufferReader`, io___BufferReader__initialize(x))
+BufferReader.default <- function(x) {
+  BufferReader(buffer(x))
 }
 
 #' @export
-buffer_reader.default <- function(x) {
-  buffer_reader(buffer(x))
+`BufferReader.arrow::Buffer` <- function(x) {
+  shared_ptr(`arrow::io::BufferReader`, io___BufferReader__initialize(x))
 }
-
diff --git a/r/R/memory_pool.R b/r/R/memory_pool.R
index 49f65d2a1f397..88c2c7bc1980e 100644
--- a/r/R/memory_pool.R
+++ b/r/R/memory_pool.R
@@ -16,7 +16,19 @@
 # under the License.
 
 #' @include R6.R
-
+#'
+#' @title class arrow::MemoryPool
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' TODO
+#'
+#' @rdname arrow___MemoryPool
+#' @name arrow__MemoryPool
 `arrow::MemoryPool` <- R6Class("arrow::MemoryPool",
   inherit = `arrow::Object`,
   public = list(
@@ -28,6 +40,10 @@
   )
 )
 
+#' default [arrow::MemoryPool][arrow__MemoryPool]
+#'
+#' @return the default [arrow::MemoryPool][arrow__MemoryPool]
+#' @export
 default_memory_pool <- function() {
   shared_ptr(`arrow::MemoryPool`, MemoryPool__default())
 }
diff --git a/r/R/message.R b/r/R/message.R
index f31fb9a53b7ab..93c90c097639a 100644
--- a/r/R/message.R
+++ b/r/R/message.R
@@ -17,6 +17,18 @@
 
 #' @include R6.R
 
+#' @title class arrow::ipc::Message
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' TODO
+#'
+#' @rdname arrow__ipc__Message
+#' @name arrow__ipc__Message
 `arrow::ipc::Message` <- R6Class("arrow::ipc::Message", inherit = `arrow::Object`,
   public = list(
     Equals = function(other){
@@ -24,10 +36,10 @@
       ipc___Message__Equals(self, other)
     },
     body_length = function() ipc___Message__body_length(self),
-    Verify = function() ipc___Message__Verify(self),
-    type = function() ipc___Message__type(self)
+    Verify = function() ipc___Message__Verify(self)
   ),
   active = list(
+    type = function() ipc___Message__type(self),
     metadata = function() shared_ptr(`arrow::Buffer`, ipc___Message__metadata(self)),
     body = function() shared_ptr(`arrow::Buffer`, ipc___Message__body(self))
   )
@@ -36,51 +48,58 @@
 #' @export
 `==.arrow::ipc::Message` <- function(x, y) x$Equals(y)
 
+#' @title class arrow::ipc::MessageReader
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' TODO
+#'
+#' @rdname arrow__ipc__MessageReader
+#' @name arrow__ipc__MessageReader
 `arrow::ipc::MessageReader` <- R6Class("arrow::ipc::MessageReader", inherit = `arrow::Object`,
   public = list(
     ReadNextMessage = function() unique_ptr(`arrow::ipc::Message`, ipc___MessageReader__ReadNextMessage(self))
   )
 )
 
-#' Read a Message from a stream
+#' Open a MessageReader that reads from a stream
 #'
 #' @param stream an InputStream
 #'
 #' @export
-read_message <- function(stream) {
-  UseMethod("read_message")
+MessageReader <- function(stream) {
+  UseMethod("MessageReader")
 }
 
 #' @export
-read_message.default <- function(stream) {
-  stop("unsupported")
+MessageReader.default <- function(stream) {
+  MessageReader(BufferReader(stream))
 }
 
 #' @export
-`read_message.arrow::io::InputStream` <- function(stream) {
-  unique_ptr(`arrow::ipc::Message`, ipc___ReadMessage(stream) )
+`MessageReader.arrow::io::InputStream` <- function(stream) {
+  unique_ptr(`arrow::ipc::MessageReader`, ipc___MessageReader__Open(stream))
 }
 
-#' Open a MessageReader that reads from a stream
+#' Read a Message from a stream
 #'
 #' @param stream an InputStream
 #'
 #' @export
-message_reader <- function(stream) {
-  UseMethod("message_reader")
-}
-
-#' @export
-message_reader.default <- function(stream) {
-  stop("unsupported")
+read_message <- function(stream) {
+  UseMethod("read_message")
 }
 
 #' @export
-message_reader.raw <- function(stream) {
-  message_reader(buffer_reader(stream))
+`read_message.arrow::io::InputStream` <- function(stream) {
+  unique_ptr(`arrow::ipc::Message`, ipc___ReadMessage(stream) )
 }
 
 #' @export
-`message_reader.arrow::io::InputStream` <- function(stream) {
-  unique_ptr(`arrow::ipc::MessageReader`, ipc___MessageReader__Open(stream))
+`read_message.arrow::ipc::MessageReader` <- function(stream) {
+  stream$ReadNextMessage()
 }
diff --git a/r/R/on_exit.R b/r/R/on_exit.R
index 9387169b8be9f..52b017404deb8 100644
--- a/r/R/on_exit.R
+++ b/r/R/on_exit.R
@@ -17,7 +17,7 @@
 
 #' @importFrom withr defer_parent
 close_on_exit <- function(x, ...){
-  defer_parent(x$Close(), ...)
+  defer_parent(x$close(), ...)
   x
 }
 
diff --git a/c_glib/tool/get-version.py b/r/R/parquet.R
old mode 100755
new mode 100644
similarity index 56%
rename from c_glib/tool/get-version.py
rename to r/R/parquet.R
index aacea6da3e865..6a393e2c880df
--- a/c_glib/tool/get-version.py
+++ b/r/R/parquet.R
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -17,13 +15,20 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import os
-import re
-
-root = os.environ.get("MESON_SOURCE_ROOT", ".")
-pom_xml = os.path.join(root, "..", "java", "pom.xml")
-with open(pom_xml) as pom:
-    version_tag = re.search('^  <version>(.+)</version>',
-                            pom.read(),
-                            re.MULTILINE)
-    print(version_tag.group(1))
+#' Read parquet file from disk
+#'
+#' @param file a file path
+#' @param as_tibble should the [arrow::Table][arrow__Table] be converted to a tibble.
+#' @param use_threads Use threads when converting to a tibble, only relevant if `as_tibble` is `TRUE`
+#' @param ... currently ignored
+#'
+#' @return a [arrow::Table][arrow__Table], or a data frame if `as_tibble` is `TRUE`.
+#'
+#' @export
+read_parquet <- function(file, as_tibble = TRUE, use_threads = TRUE, ...) {
+  tab <- shared_ptr(`arrow::Table`, read_parquet_file(f))
+  if (isTRUE(as_tibble)) {
+    tab <- as_tibble(tab, use_threads = use_threads)
+  }
+  tab
+}
diff --git a/r/R/read_record_batch.R b/r/R/read_record_batch.R
new file mode 100644
index 0000000000000..967ac5b7650a9
--- /dev/null
+++ b/r/R/read_record_batch.R
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' read [arrow::RecordBatch][arrow__RecordBatch] as encapsulated IPC message, given a known [arrow::Schema][arrow__Schema]
+#'
+#' @param obj a [arrow::ipc::Message][arrow__ipc__Message], a [arrow::io::InputStream][arrow__io__InputStream], a [arrow::Buffer][arrow__Buffer], or a raw vector
+#' @param schema a [arrow::Schema][arrow__Schema]
+#'
+#' @return a [arrow::RecordBatch][arrow__RecordBatch]
+#'
+#' @export
+read_record_batch <- function(obj, schema){
+  UseMethod("read_record_batch")
+}
+
+#' @export
+`read_record_batch.arrow::ipc::Message` <- function(obj, schema) {
+  assert_that(inherits(schema, "arrow::Schema"))
+  shared_ptr(`arrow::RecordBatch`, ipc___ReadRecordBatch__Message__Schema(obj, schema))
+}
+
+#' @export
+`read_record_batch.arrow::io::InputStream` <- function(obj, schema) {
+  assert_that(inherits(schema, "arrow::Schema"))
+  shared_ptr(`arrow::RecordBatch`, ipc___ReadRecordBatch__InputStream__Schema(obj, schema))
+}
+
+#' @export
+read_record_batch.raw <- function(obj, schema){
+  stream <- close_on_exit(BufferReader(obj))
+  read_record_batch(stream, schema)
+}
+
+#' @export
+`read_record_batch.arrow::Buffer` <- function(obj, schema){
+  stream <- close_on_exit(BufferReader(obj))
+  read_record_batch(stream, schema)
+}
diff --git a/r/R/read_table.R b/r/R/read_table.R
new file mode 100644
index 0000000000000..260c50f12374f
--- /dev/null
+++ b/r/R/read_table.R
@@ -0,0 +1,88 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Read an [arrow::Table][arrow__Table] from a stream
+#'
+#' @param stream stream.
+#'
+#' - a [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader]:
+#'   read an [arrow::Table][arrow__Table]
+#'   from all the record batches in the reader
+#'
+#' - a [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader]:
+#'   read an [arrow::Table][arrow__Table] from the remaining record batches
+#'   in the reader
+#'
+#'  - a string or [file path][fs::path_abs()]: interpret the file as an arrow
+#'    binary file format, and uses a [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader]
+#'    to process it.
+#'
+#'  - a raw vector: read using a [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader]
+#'
+#' @param use_threads Use threads when converting to a tibble
+#'
+#' @return
+#'
+#'  - `read_table` returns an [arrow::Table][arrow__Table]
+#'  - `read_arrow` returns a [tibble::tibble()]
+#'
+#' @details
+#'
+#' The methods using [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] and
+#' [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] offer the most
+#' flexibility. The other methods are for convenience.
+#'
+#' @export
+read_table <- function(stream){
+  UseMethod("read_table")
+}
+
+#' @export
+`read_table.arrow::ipc::RecordBatchFileReader` <- function(stream) {
+  shared_ptr(`arrow::Table`, Table__from_RecordBatchFileReader(stream))
+}
+
+#' @export
+`read_table.arrow::ipc::RecordBatchStreamReader` <- function(stream) {
+  shared_ptr(`arrow::Table`, Table__from_RecordBatchStreamReader(stream))
+}
+
+#' @export
+read_table.character <- function(stream){
+  assert_that(length(stream) == 1L)
+  read_table(fs::path_abs(stream))
+}
+
+#' @export
+read_table.fs_path <- function(stream) {
+  stream <- close_on_exit(ReadableFile(stream))
+  batch_reader <- close_on_exit(RecordBatchFileReader(stream))
+  shared_ptr(`arrow::Table`, Table__from_RecordBatchFileReader(batch_reader))
+}
+
+#' @export
+`read_table.raw` <- function(stream) {
+  stream <- close_on_exit(BufferReader(stream))
+  batch_reader <- close_on_exit(RecordBatchStreamReader(stream))
+  shared_ptr(`arrow::Table`, Table__from_RecordBatchStreamReader(batch_reader))
+}
+
+#' @rdname read_table
+#' @export
+read_arrow <- function(stream, use_threads = TRUE){
+  as_tibble(read_table(stream))
+}
diff --git a/r/R/write_arrow.R b/r/R/write_arrow.R
new file mode 100644
index 0000000000000..5fc684771e5f2
--- /dev/null
+++ b/r/R/write_arrow.R
@@ -0,0 +1,94 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+to_arrow <- function(x) {
+  UseMethod("to_arrow")
+}
+
+`to_arrow.arrow::RecordBatch` <- function(x) x
+`to_arrow.arrow::Table` <- function(x) x
+`to_arrow.data.frame` <- function(x) table(x)
+
+#' serialize an [arrow::Table][arrow__Table], an [arrow::RecordBatch][arrow__RecordBatch], or a
+#'   data frame to either the streaming format or the binary file format
+#'
+#' @param x an [arrow::Table][arrow__Table], an [arrow::RecordBatch][arrow__RecordBatch] or a data.frame
+#'
+#' @param stream where to serialize to
+#'
+#' - A [arrow::ipc::RecordBatchWriter][arrow__ipc__RecordBatchWriter]: the `$write()`
+#'      of `x` is used. The stream is left open. This uses the streaming format
+#'      or the binary file format depending on the type of the writer.
+#'
+#' - A string or [file path][fs::path_abs()]: `x` is serialized with
+#'      a [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter], i.e.
+#'      using the binary file format.
+#'
+#' - A raw vector: typically of length zero (its data is ignored, and only used for
+#'      dispatch). `x` is serialized using the streaming format, i.e. using the
+#'      [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter]
+#'
+#' @param ... extra parameters, currently ignored
+#'
+#' `write_arrow` is a convenience function, the classes [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter]
+#' and [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] can be used for more flexibility.
+#'
+#' @export
+write_arrow <- function(x, stream, ...) {
+  UseMethod("write_arrow", stream)
+}
+
+#' @export
+`write_arrow.arrow::ipc::RecordBatchWriter` <- function(x, stream, ...){
+  stream$write(x)
+}
+
+#' @export
+`write_arrow.character` <- function(x, stream, ...) {
+  write_arrow(x, fs::path_abs(stream), ...)
+}
+
+#' @export
+`write_arrow.fs_path` <- function(x, stream, ...) {
+  assert_that(length(stream) == 1L)
+  x <- to_arrow(x)
+  file_stream <- close_on_exit(FileOutputStream(stream))
+  file_writer <- close_on_exit(RecordBatchFileWriter(file_stream, x$schema))
+  write_arrow(x, file_writer, ...)
+}
+
+#' @export
+`write_arrow.raw` <- function(x, stream, ...) {
+  x <- to_arrow(x)
+  schema <- x$schema
+
+  # how many bytes do we need
+  mock_stream <- MockOutputStream()
+  writer <- RecordBatchStreamWriter(mock_stream, schema)
+  writer$write(x)
+  writer$close()
+  n <- mock_stream$GetExtentBytesWritten()
+
+  # now that we know the size, stream in a buffer backed by an R raw vector
+  bytes <- raw(n)
+  buffer_writer <- FixedSizeBufferWriter(buffer(bytes))
+  writer <- RecordBatchStreamWriter(buffer_writer, schema)
+  writer$write(x)
+  writer$close()
+
+  bytes
+}
diff --git a/r/README.Rmd b/r/README.Rmd
index 204a9f9d566ed..9f0f39fef5352 100644
--- a/r/README.Rmd
+++ b/r/README.Rmd
@@ -25,7 +25,7 @@ git clone https://github.com/apache/arrow.git
 cd arrow/cpp && mkdir release && cd release
 
 # It is important to statically link to boost libraries
-cmake .. -DCMAKE_BUILD_TYPE=Release -DARROW_BOOST_USE_SHARED:BOOL=Off
+cmake .. -DARROW_PARQUET=ON -DCMAKE_BUILD_TYPE=Release -DARROW_BOOST_USE_SHARED:BOOL=Off
 make install
 ```
 
@@ -46,9 +46,9 @@ tf <- tempfile()
 
 # write arrow::Table to file
 (tib <- tibble(x = 1:10, y = rnorm(10)))
-arrow::write_arrow(tib, tf)
+# arrow::write_arrow(tib, tf)
 
-# read it back with pyarrow
-pa <- import("pyarrow")
-as_tibble(pa$open_file(tf)$read_pandas())
+# # read it back with pyarrow
+# pa <- import("pyarrow")
+# as_tibble(pa$open_file(tf)$read_pandas())
 ```
diff --git a/r/README.md b/r/README.md
index 868fdff0a06e0..1e2cb2c98ced3 100644
--- a/r/README.md
+++ b/r/README.md
@@ -14,7 +14,7 @@ git clone https://github.com/apache/arrow.git
 cd arrow/cpp && mkdir release && cd release
 
 # It is important to statically link to boost libraries
-cmake .. -DCMAKE_BUILD_TYPE=Release -DARROW_BOOST_USE_SHARED:BOOL=Off
+cmake .. -DARROW_PARQUET=ON -DCMAKE_BUILD_TYPE=Release -DARROW_BOOST_USE_SHARED:BOOL=Off
 make install
 ```
 
@@ -24,6 +24,13 @@ Then the R package:
 devtools::install_github("apache/arrow/r")
 ```
 
+If libarrow was built with the old CXXABI then you need to pass
+the ARROW_USE_OLD_CXXABI configuration variable.
+
+``` r
+devtools::install_github("apache/arrow/r", args=c("--configure-vars=ARROW_USE_OLD_CXXABI=1"))
+```
+
 ## Example
 
 ``` r
@@ -38,48 +45,19 @@ tf <- tempfile()
 #> # A tibble: 10 x 2
 #>        x       y
 #>    <int>   <dbl>
-#>  1     1 -0.255
-#>  2     2 -0.162
-#>  3     3 -0.614
-#>  4     4 -0.322
-#>  5     5  0.0693
-#>  6     6 -0.920
-#>  7     7 -1.08
-#>  8     8  0.658
-#>  9     9  0.821
-#> 10    10  0.539
-arrow::write_arrow(tib, tf)
-
-# read it back with pyarrow
-pa <- import("pyarrow")
-as_tibble(pa$open_file(tf)$read_pandas())
-#> # A tibble: 10 x 2
-#>        x       y
-#>    <int>   <dbl>
-#>  1     1 -0.255
-#>  2     2 -0.162
-#>  3     3 -0.614
-#>  4     4 -0.322
-#>  5     5  0.0693
-#>  6     6 -0.920
-#>  7     7 -1.08
-#>  8     8  0.658
-#>  9     9  0.821
-#> 10    10  0.539
-```
-
-## Development
-
-### Code style
-
-We use Google C++ style in our C++ code. Check for style errors with
-
-```
-./lint.sh
-```
-
-You can fix the style issues with
-
+#>  1     1  0.0855
+#>  2     2 -1.68  
+#>  3     3 -0.0294
+#>  4     4 -0.124 
+#>  5     5  0.0675
+#>  6     6  1.64  
+#>  7     7  1.54  
+#>  8     8 -0.0209
+#>  9     9 -0.982 
+#> 10    10  0.349
+# arrow::write_arrow(tib, tf)
+
+# # read it back with pyarrow
+# pa <- import("pyarrow")
+# as_tibble(pa$open_file(tf)$read_pandas())
 ```
-./lint.sh --fix
-```
\ No newline at end of file
diff --git a/r/configure b/r/configure
index 69f04632a2f5b..9df1f9d048553 100755
--- a/r/configure
+++ b/r/configure
@@ -26,13 +26,13 @@
 # R CMD INSTALL --configure-vars='INCLUDE_DIR=/.../include LIB_DIR=/.../lib'
 
 # Library settings
-PKG_CONFIG_NAME="arrow"
+PKG_CONFIG_NAME="arrow parquet"
 PKG_DEB_NAME="arrow"
 PKG_RPM_NAME="arrow"
 PKG_CSW_NAME="arrow"
 PKG_BREW_NAME="apache-arrow"
 PKG_TEST_HEADER="<arrow/api.h>"
-PKG_LIBS="-larrow"
+PKG_LIBS="-larrow -lparquet"
 
 # Use pkg-config if available
 pkg-config --version >/dev/null 2>&1
@@ -71,6 +71,12 @@ CXX11FLAGS=$("${R_HOME}"/bin/R CMD config CXX11FLAGS)
 CXX11STD=$("${R_HOME}"/bin/R CMD config CXX11STD)
 CPPFLAGS=$("${R_HOME}"/bin/R CMD config CPPFLAGS)
 
+# If libarrow uses the old GLIBCXX ABI, so we have to use it too
+PKG_CXXFLAGS="$C_VISIBILITY"
+if [ "$ARROW_USE_OLD_CXXABI" ]; then
+  PKG_CXXFLAGS="$PKG_CXXFLAGS -D_GLIBCXX_USE_CXX11_ABI=0"
+fi
+
 # Test configuration
 echo "#include $PKG_TEST_HEADER" | ${CXXCPP} ${CPPFLAGS} ${PKG_CFLAGS} ${CXX11FLAGS} ${CXX11STD} -xc++ - >/dev/null 2>&1
 
@@ -91,7 +97,7 @@ if [ $? -ne 0 ]; then
 fi
 
 # Write to Makevars
-sed -e "s|@cflags@|$PKG_CFLAGS|" -e "s|@libs@|$PKG_LIBS|" src/Makevars.in > src/Makevars
+sed -e "s|@cflags@|$PKG_CFLAGS|" -e "s|@libs@|$PKG_LIBS|" -e "s|@pkgcxxflags@|$PKG_CXXFLAGS|" src/Makevars.in > src/Makevars
 
 # Success
 exit 0
diff --git a/r/data-raw/test.R b/r/data-raw/test.R
deleted file mode 100644
index 516af58616ef9..0000000000000
--- a/r/data-raw/test.R
+++ /dev/null
@@ -1,85 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-library(tidyverse)
-library(arrow)
-
-# meta data
-(t1 <- int32())
-(t2 <- utf8())
-(t5 <- timestamp(unit = TimeUnit$MILLI))
-
-# lists
-list_of(t1)
-
-# shema
-schema(x = int32(), y = float64())
-
-# :scream_cat:
-#
-# pa.schema(
-#   [
-#      pa.field('x', pa.int32()),
-#      pa.field('y', pa.float64())
-#   ]
-# )
-#
-
-schema(x = int32(), y = list_of(float64()))
-
-#------- arrays
-
-# arr = pa.array([1, 2, 3])
-arr <- array(1:3, 5:80)
-arr
-arr$as_vector()
-
-#------- read_arrow / stream
-tbl <- tibble(x=1:10, y=rnorm(10))
-write_arrow(tbl, "/tmp/test.arrow")
-readr::write_rds(tbl, "/tmp/test.rds")
-fs::file_info(c("/tmp/test.arrow", "/tmp/test.rds"))
-
-(data <- read_arrow("/tmp/test.arrow"))
-
-# tibble <-> arrow::RecordBatch
-(batch <- record_batch(tbl))
-batch$num_columns()
-batch$num_rows()
-write_arrow(batch, "/tmp/test")
-readBin("/tmp/test", what = raw(), n = 1000)
-batch$schema()
-all.equal(tbl, data)
-
-batch <- read_record_batch("/tmp/test")
-batch$schema()
-batch$column(0)
-batch$column(0)$as_vector()
-
-as_tibble(batch)
-
-# tibble <-> arrow::Table
-tab <- arrow::table(tbl)
-tab
-tab$schema()
-tab$num_columns()
-tab$num_rows()
-
-# read_arrow, stream
-tbl <- tibble(x = rnorm(20), y = seq_len(20))
-write_arrow(tbl, tf)
-
diff --git a/r/lint.sh b/r/lint.sh
index 14e457d1647c2..8a9a2fb52a078 100755
--- a/r/lint.sh
+++ b/r/lint.sh
@@ -23,6 +23,7 @@ CPP_BUILD_SUPPORT=$SOURCE_DIR/../cpp/build-support
 LLVM_VERSION=6.0
 CLANG_FORMAT=clang-format-$LLVM_VERSION
 
-$CPP_BUILD_SUPPORT/run_clang_format.py $CLANG_FORMAT \
-                                       $CPP_BUILD_SUPPORT/clang_format_exclusions.txt \
-                                       $SOURCE_DIR/src --quiet $1
+$CPP_BUILD_SUPPORT/run_clang_format.py \
+    --clang_format_binary=$CLANG_FORMAT \
+    --exclude_glob=$CPP_BUILD_SUPPORT/lint_exclusions.txt \
+    --source_dir=$SOURCE_DIR/src --quiet $1
diff --git a/r/man/BufferOutputStream.Rd b/r/man/BufferOutputStream.Rd
new file mode 100644
index 0000000000000..1776f995930fc
--- /dev/null
+++ b/r/man/BufferOutputStream.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\name{BufferOutputStream}
+\alias{BufferOutputStream}
+\title{Open a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream}}
+\usage{
+BufferOutputStream(initial_capacity = 0L)
+}
+\arguments{
+\item{initial_capacity}{initial capacity}
+}
+\value{
+a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream}
+}
+\description{
+Open a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream}
+}
diff --git a/r/man/buffer_reader.Rd b/r/man/BufferReader.Rd
similarity index 52%
rename from r/man/buffer_reader.Rd
rename to r/man/BufferReader.Rd
index 3b814fb00b19f..ea5dd790cddcb 100644
--- a/r/man/buffer_reader.Rd
+++ b/r/man/BufferReader.Rd
@@ -1,14 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/io.R
-\name{buffer_reader}
-\alias{buffer_reader}
-\title{Create a \code{arrow::BufferReader}}
+\name{BufferReader}
+\alias{BufferReader}
+\title{Create a \link[=arrow__io__BufferReader]{arrow::io::BufferReader}}
 \usage{
-buffer_reader(x)
+BufferReader(x)
 }
 \arguments{
 \item{x}{R object to treat as a buffer or a buffer created by \code{\link[=buffer]{buffer()}}}
 }
 \description{
-Create a \code{arrow::BufferReader}
+Create a \link[=arrow__io__BufferReader]{arrow::io::BufferReader}
 }
diff --git a/r/man/CompressedInputStream.Rd b/r/man/CompressedInputStream.Rd
new file mode 100644
index 0000000000000..cfff053083dd6
--- /dev/null
+++ b/r/man/CompressedInputStream.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compression.R
+\name{CompressedInputStream}
+\alias{CompressedInputStream}
+\title{Compressed input stream}
+\usage{
+CompressedInputStream(stream, codec = codec("GZIP"))
+}
+\arguments{
+\item{stream}{Underlying raw input stream}
+
+\item{codec}{a codec}
+}
+\description{
+Compressed input stream
+}
diff --git a/r/man/CompressedOutputStream.Rd b/r/man/CompressedOutputStream.Rd
new file mode 100644
index 0000000000000..85c4d9209accd
--- /dev/null
+++ b/r/man/CompressedOutputStream.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compression.R
+\name{CompressedOutputStream}
+\alias{CompressedOutputStream}
+\title{Compressed output stream}
+\usage{
+CompressedOutputStream(stream, codec = compression_codec("GZIP"))
+}
+\arguments{
+\item{stream}{Underlying raw output stream}
+
+\item{codec}{a codec}
+}
+\description{
+Compressed output stream
+}
diff --git a/r/man/DataType.Rd b/r/man/DataType.Rd
index b10414020b567..bf5f1d4045652 100644
--- a/r/man/DataType.Rd
+++ b/r/man/DataType.Rd
@@ -8,6 +8,7 @@
 \alias{StatusCode}
 \alias{FileMode}
 \alias{MessageType}
+\alias{CompressionType}
 \alias{int8}
 \alias{int16}
 \alias{int32}
@@ -45,6 +46,8 @@ FileMode
 
 MessageType
 
+CompressionType
+
 int8()
 
 int16()
diff --git a/r/man/feather_table_reader.Rd b/r/man/FeatherTableReader.Rd
similarity index 80%
rename from r/man/feather_table_reader.Rd
rename to r/man/FeatherTableReader.Rd
index fb1c53429f860..15a260bd57cf6 100644
--- a/r/man/feather_table_reader.Rd
+++ b/r/man/FeatherTableReader.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/feather.R
-\name{feather_table_reader}
-\alias{feather_table_reader}
+\name{FeatherTableReader}
+\alias{FeatherTableReader}
 \title{A arrow::ipc::feather::TableReader to read from a file}
 \usage{
-feather_table_reader(file, mmap = TRUE, ...)
+FeatherTableReader(file, mmap = TRUE, ...)
 }
 \arguments{
 \item{file}{A file path, arrow::io::RandomAccessFile}
diff --git a/r/man/feather_table_writer.Rd b/r/man/FeatherTableWriter.Rd
similarity index 74%
rename from r/man/feather_table_writer.Rd
rename to r/man/FeatherTableWriter.Rd
index 36035aca12090..3acf5971a71b3 100644
--- a/r/man/feather_table_writer.Rd
+++ b/r/man/FeatherTableWriter.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/feather.R
-\name{feather_table_writer}
-\alias{feather_table_writer}
+\name{FeatherTableWriter}
+\alias{FeatherTableWriter}
 \title{Create TableWriter that writes into a stream}
 \usage{
-feather_table_writer(stream)
+FeatherTableWriter(stream)
 }
 \arguments{
 \item{stream}{an OutputStream}
diff --git a/r/man/FileOutputStream.Rd b/r/man/FileOutputStream.Rd
new file mode 100644
index 0000000000000..4155d349d1a64
--- /dev/null
+++ b/r/man/FileOutputStream.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\name{FileOutputStream}
+\alias{FileOutputStream}
+\title{Open a \link[=arrow__io__FileOutputStream]{arrow::io::FileOutputStream}}
+\usage{
+FileOutputStream(path)
+}
+\arguments{
+\item{path}{file path}
+}
+\value{
+a \link[=arrow__io__FileOutputStream]{arrow::io::FileOutputStream}
+}
+\description{
+Open a \link[=arrow__io__FileOutputStream]{arrow::io::FileOutputStream}
+}
diff --git a/r/man/FixedSizeBufferWriter.Rd b/r/man/FixedSizeBufferWriter.Rd
new file mode 100644
index 0000000000000..553d61b76e1f4
--- /dev/null
+++ b/r/man/FixedSizeBufferWriter.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\name{FixedSizeBufferWriter}
+\alias{FixedSizeBufferWriter}
+\title{Open a \link[=arrow__io__FixedSizeBufferWriter]{arrow::io::FixedSizeBufferWriter}}
+\usage{
+FixedSizeBufferWriter(buffer)
+}
+\arguments{
+\item{buffer}{\link[=arrow__Buffer]{arrow::Buffer} or something \code{\link[=buffer]{buffer()}} can handle}
+}
+\value{
+a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream}
+}
+\description{
+Open a \link[=arrow__io__FixedSizeBufferWriter]{arrow::io::FixedSizeBufferWriter}
+}
diff --git a/r/man/GetCpuThreadPoolCapacity.Rd b/r/man/GetCpuThreadPoolCapacity.Rd
new file mode 100644
index 0000000000000..8bf0a6fc89424
--- /dev/null
+++ b/r/man/GetCpuThreadPoolCapacity.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{GetCpuThreadPoolCapacity}
+\alias{GetCpuThreadPoolCapacity}
+\title{Get the capacity of the global thread pool}
+\usage{
+GetCpuThreadPoolCapacity()
+}
+\value{
+the number of worker threads in the thread pool to which
+Arrow dispatches various CPU-bound tasks. This is an ideal number,
+not necessarily the exact number of threads at a given point in time.
+
+You can change this number using \code{\link[=SetCpuThreadPoolCapacity]{SetCpuThreadPoolCapacity()}}.
+}
+\description{
+Get the capacity of the global thread pool
+}
diff --git a/r/man/message_reader.Rd b/r/man/MessageReader.Rd
similarity index 79%
rename from r/man/message_reader.Rd
rename to r/man/MessageReader.Rd
index 0d8b1e7ff634e..01589f5d0780b 100644
--- a/r/man/message_reader.Rd
+++ b/r/man/MessageReader.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/message.R
-\name{message_reader}
-\alias{message_reader}
+\name{MessageReader}
+\alias{MessageReader}
 \title{Open a MessageReader that reads from a stream}
 \usage{
-message_reader(stream)
+MessageReader(stream)
 }
 \arguments{
 \item{stream}{an InputStream}
diff --git a/r/man/MockOutputStream.Rd b/r/man/MockOutputStream.Rd
new file mode 100644
index 0000000000000..2e3c0b6d3e378
--- /dev/null
+++ b/r/man/MockOutputStream.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\name{MockOutputStream}
+\alias{MockOutputStream}
+\title{Open a \link[=arrow__io__MockOutputStream]{arrow::io::MockOutputStream}}
+\usage{
+MockOutputStream()
+}
+\value{
+a \link[=arrow__io__MockOutputStream]{arrow::io::MockOutputStream}
+}
+\description{
+Open a \link[=arrow__io__MockOutputStream]{arrow::io::MockOutputStream}
+}
diff --git a/r/man/ReadableFile.Rd b/r/man/ReadableFile.Rd
new file mode 100644
index 0000000000000..11535321bfb6a
--- /dev/null
+++ b/r/man/ReadableFile.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\name{ReadableFile}
+\alias{ReadableFile}
+\title{open a \link[=arrow__io__ReadableFile]{arrow::io::ReadableFile}}
+\usage{
+ReadableFile(path)
+}
+\arguments{
+\item{path}{file path}
+}
+\value{
+a \link[=arrow__io__ReadableFile]{arrow::io::ReadableFile}
+}
+\description{
+open a \link[=arrow__io__ReadableFile]{arrow::io::ReadableFile}
+}
diff --git a/r/man/RecordBatchFileReader.Rd b/r/man/RecordBatchFileReader.Rd
new file mode 100644
index 0000000000000..3ea04817e0ee0
--- /dev/null
+++ b/r/man/RecordBatchFileReader.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RecordBatchReader.R
+\name{RecordBatchFileReader}
+\alias{RecordBatchFileReader}
+\title{Create an \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} from a file}
+\usage{
+RecordBatchFileReader(file)
+}
+\arguments{
+\item{file}{The file to read from. A file path, or an \link[=arrow__ipc__RecordBatchFileReader]{arrow::io::RandomAccessFile}}
+}
+\description{
+Create an \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} from a file
+}
diff --git a/r/man/RecordBatchFileWriter.Rd b/r/man/RecordBatchFileWriter.Rd
new file mode 100644
index 0000000000000..90858304b0ba3
--- /dev/null
+++ b/r/man/RecordBatchFileWriter.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RecordBatchWriter.R
+\name{RecordBatchFileWriter}
+\alias{RecordBatchFileWriter}
+\title{Create a record batch file writer from a stream}
+\usage{
+RecordBatchFileWriter(sink, schema)
+}
+\arguments{
+\item{sink}{Where to write. Can either be:
+\itemize{
+\item character vector of length one
+\item a \link[fs:path_abs]{file path}
+\item \link[=arrow__io__OutputStream]{arrow::io::OutputStream}
+}}
+
+\item{schema}{The \link[=arrow__Schema]{arrow::Schema} for data to be written.}
+}
+\value{
+an \code{arrow::ipc::RecordBatchWriter} object
+}
+\description{
+Create a record batch file writer from a stream
+}
diff --git a/r/man/RecordBatchStreamReader.Rd b/r/man/RecordBatchStreamReader.Rd
new file mode 100644
index 0000000000000..4bd0e8ccdc55d
--- /dev/null
+++ b/r/man/RecordBatchStreamReader.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RecordBatchReader.R
+\name{RecordBatchStreamReader}
+\alias{RecordBatchStreamReader}
+\title{Create a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} from an input stream}
+\usage{
+RecordBatchStreamReader(stream)
+}
+\arguments{
+\item{stream}{input stream, an \link[=arrow__io__InputStream]{arrow::io::InputStream} or a raw vector}
+}
+\description{
+Create a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} from an input stream
+}
diff --git a/r/man/RecordBatchStreamWriter.Rd b/r/man/RecordBatchStreamWriter.Rd
new file mode 100644
index 0000000000000..b9183a80719cf
--- /dev/null
+++ b/r/man/RecordBatchStreamWriter.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RecordBatchWriter.R
+\name{RecordBatchStreamWriter}
+\alias{RecordBatchStreamWriter}
+\title{Writer for the Arrow streaming binary format}
+\usage{
+RecordBatchStreamWriter(sink, schema)
+}
+\arguments{
+\item{sink}{Where to write. Can either be:
+\itemize{
+\item A string, meant as a file path, passed to \code{\link[fs:path_abs]{fs::path_abs()}}
+\item a \link[fs:path_abs]{file path}
+\item \link[=arrow__io__OutputStream]{arrow::io::OutputStream}
+}}
+
+\item{schema}{The \link[=arrow__Schema]{arrow::Schema} for data to be written.}
+}
+\value{
+a \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter}
+}
+\description{
+Writer for the Arrow streaming binary format
+}
diff --git a/r/man/SetCpuThreadPoolCapacity.Rd b/r/man/SetCpuThreadPoolCapacity.Rd
new file mode 100644
index 0000000000000..3a06dd5d6a202
--- /dev/null
+++ b/r/man/SetCpuThreadPoolCapacity.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{SetCpuThreadPoolCapacity}
+\alias{SetCpuThreadPoolCapacity}
+\title{Set the capacity of the global thread pool}
+\usage{
+SetCpuThreadPoolCapacity(threads)
+}
+\arguments{
+\item{threads}{the number of worker threads int the thread pool to which
+Arrow dispatches various CPU-bound tasks.
+
+The current number is returned by \code{\link[=GetCpuThreadPoolCapacity]{GetCpuThreadPoolCapacity()}}}
+}
+\description{
+Set the capacity of the global thread pool
+}
diff --git a/r/man/array.Rd b/r/man/array.Rd
index 38bd773be926d..ccdba181db823 100644
--- a/r/man/array.Rd
+++ b/r/man/array.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/array.R
 \name{array}
 \alias{array}
-\title{create an arrow::Array from an R vector}
+\title{create an \link[=arrow__Array]{arrow::Array} from an R vector}
 \usage{
 array(..., type)
 }
@@ -12,5 +12,5 @@ array(..., type)
 \item{type}{currently ignored}
 }
 \description{
-create an arrow::Array from an R vector
+create an \link[=arrow__Array]{arrow::Array} from an R vector
 }
diff --git a/r/man/arrow__Array.Rd b/r/man/arrow__Array.Rd
new file mode 100644
index 0000000000000..b11373d26b368
--- /dev/null
+++ b/r/man/arrow__Array.Rd
@@ -0,0 +1,57 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/array.R
+\docType{class}
+\name{arrow__Array}
+\alias{arrow__Array}
+\alias{arrow::Array}
+\title{class arrow::Array
+
+Array base type. Immutable data array with some logical type and some length.}
+\description{
+class arrow::Array
+
+Array base type. Immutable data array with some logical type and some length.
+}
+\section{Usage}{
+\preformatted{a <- array(...)
+
+a$IsNull(i)
+a$IsValid(i)
+a$length() or length(a)
+a$offset()
+a$null_count()
+a$type()
+a$type_id()
+a$Equals(b)
+a$ApproxEquals(b)
+a$as_vector()
+a$ToString()
+a$Slice(offset, length = NULL)
+a$RangeEquals(other, start_idx, end_idx, other_start_idx)
+
+print(a)
+a == a
+}
+}
+
+\section{Methods}{
+
+\itemize{
+\item \code{$IsNull(i)}: Return true if value at index is null. Does not boundscheck
+\item \code{$IsValid(i)}: Return true if value at index is valid. Does not boundscheck
+\item \code{$length()}: Size in the number of elements this array contains
+\item \code{$offset()}: A relative position into another array's data, to enable zero-copy slicing
+\item \code{$null_count()}: The number of null entries in the array
+\item \code{$type()}: logical type of data
+\item \code{$type_id()}: type id
+\item \code{$Equals(other)} : is this array equal to \code{other}
+\item \code{$ApproxEquals(other)} :
+\item \code{$data()}: return the underlying \link[=arrow__ArrayData]{arrow::ArrayData}
+\item \code{$as_vector()}: convert to an R vector
+\item \code{$ToString()}: string representation of the array
+\item \code{$Slice(offset, length = NULL)} : Construct a zero-copy slice of the array with the indicated offset and length. If length is \code{NULL}, the slice goes until the end of the array.
+\item \code{$RangeEquals(other, start_idx, end_idx, other_start_idx)} :
+}
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__ArrayData.Rd b/r/man/arrow__ArrayData.Rd
new file mode 100644
index 0000000000000..bdf996605c532
--- /dev/null
+++ b/r/man/arrow__ArrayData.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ArrayData.R
+\docType{class}
+\name{arrow__ArrayData}
+\alias{arrow__ArrayData}
+\alias{arrow::ArrayData}
+\title{class arrow::ArrayData}
+\description{
+class arrow::ArrayData
+}
+\section{Usage}{
+\preformatted{data <- array(...)$data()
+
+data$type()
+data$length()
+data$null_count()
+data$offset()
+data$buffers()
+}
+}
+
+\section{Methods}{
+
+
+...
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__Buffer.Rd b/r/man/arrow__Buffer.Rd
new file mode 100644
index 0000000000000..135da7a20e788
--- /dev/null
+++ b/r/man/arrow__Buffer.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/buffer.R
+\docType{class}
+\name{arrow__Buffer}
+\alias{arrow__Buffer}
+\alias{arrow::Buffer}
+\title{class arrow::Buffer}
+\description{
+class arrow::Buffer
+}
+\section{Methods}{
+
+\itemize{
+\item \code{$is_mutable()} :
+\item \code{$ZeroPadding()} :
+\item \code{$size()} :
+\item \code{$capacity()}:
+}
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__ChunkedArray.Rd b/r/man/arrow__ChunkedArray.Rd
new file mode 100644
index 0000000000000..a87bf1c0dcc1d
--- /dev/null
+++ b/r/man/arrow__ChunkedArray.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ChunkedArray.R
+\docType{class}
+\name{arrow__ChunkedArray}
+\alias{arrow__ChunkedArray}
+\alias{arrow::ChunkedArray}
+\title{class arrow::ChunkedArray}
+\description{
+class arrow::ChunkedArray
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__Column.Rd b/r/man/arrow__Column.Rd
new file mode 100644
index 0000000000000..6a0ee6a40a5a9
--- /dev/null
+++ b/r/man/arrow__Column.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/Column.R
+\docType{class}
+\name{arrow__Column}
+\alias{arrow__Column}
+\alias{arrow::Column}
+\title{class arrow::Column}
+\description{
+class arrow::Column
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__DataType.Rd b/r/man/arrow__DataType.Rd
new file mode 100644
index 0000000000000..53bd6327d9175
--- /dev/null
+++ b/r/man/arrow__DataType.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/R6.R
+\docType{class}
+\name{arrow__DataType}
+\alias{arrow__DataType}
+\alias{arrow::DataType}
+\title{class arrow::DataType}
+\description{
+class arrow::DataType
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__DictionaryType.Rd b/r/man/arrow__DictionaryType.Rd
new file mode 100644
index 0000000000000..ba462ee011497
--- /dev/null
+++ b/r/man/arrow__DictionaryType.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dictionary.R
+\docType{class}
+\name{arrow__DictionaryType}
+\alias{arrow__DictionaryType}
+\alias{arrow::DictionaryType}
+\title{class arrow::DictionaryType}
+\description{
+class arrow::DictionaryType
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__Field.Rd b/r/man/arrow__Field.Rd
new file mode 100644
index 0000000000000..893a65aa08e43
--- /dev/null
+++ b/r/man/arrow__Field.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/Field.R
+\docType{class}
+\name{arrow__Field}
+\alias{arrow__Field}
+\alias{arrow::Field}
+\title{class arrow::Field}
+\description{
+class arrow::Field
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__FixedWidthType.Rd b/r/man/arrow__FixedWidthType.Rd
new file mode 100644
index 0000000000000..610a40034290f
--- /dev/null
+++ b/r/man/arrow__FixedWidthType.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/R6.R
+\docType{class}
+\name{arrow__FixedWidthType}
+\alias{arrow__FixedWidthType}
+\alias{arrow::FixedWidthType}
+\title{class arrow::FixedWidthType}
+\description{
+class arrow::FixedWidthType
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__RecordBatch.Rd b/r/man/arrow__RecordBatch.Rd
new file mode 100644
index 0000000000000..40ba6323ee0a9
--- /dev/null
+++ b/r/man/arrow__RecordBatch.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RecordBatch.R
+\docType{class}
+\name{arrow__RecordBatch}
+\alias{arrow__RecordBatch}
+\alias{arrow::RecordBatch}
+\title{class arrow::RecordBatch}
+\description{
+class arrow::RecordBatch
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__RecordBatchReader.Rd b/r/man/arrow__RecordBatchReader.Rd
new file mode 100644
index 0000000000000..b3ccd3f174944
--- /dev/null
+++ b/r/man/arrow__RecordBatchReader.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RecordBatchReader.R
+\docType{class}
+\name{arrow__RecordBatchReader}
+\alias{arrow__RecordBatchReader}
+\alias{arrow::RecordBatchReader}
+\title{class arrow::RecordBatchReader}
+\description{
+class arrow::RecordBatchReader
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__Schema.Rd b/r/man/arrow__Schema.Rd
new file mode 100644
index 0000000000000..b657ff2c4a8cf
--- /dev/null
+++ b/r/man/arrow__Schema.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/Schema.R
+\docType{class}
+\name{arrow__Schema}
+\alias{arrow__Schema}
+\alias{arrow::Schema}
+\title{class arrow::Schema}
+\description{
+class arrow::Schema
+}
+\section{Usage}{
+\preformatted{s <- schema(...)
+
+s$ToString()
+s$num_fields()
+s$field(i)
+}
+}
+
+\section{Methods}{
+
+\itemize{
+\item \code{$ToString()}: convert to a string
+\item \code{$num_fields()}: returns the number of fields
+\item \code{$field(i)}: returns the field at index \code{i} (0-based)
+}
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__Table.Rd b/r/man/arrow__Table.Rd
new file mode 100644
index 0000000000000..139db980acf4e
--- /dev/null
+++ b/r/man/arrow__Table.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/Table.R
+\docType{class}
+\name{arrow__Table}
+\alias{arrow__Table}
+\alias{arrow::Table}
+\title{class arrow::Table}
+\description{
+class arrow::Table
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow___MemoryPool.Rd b/r/man/arrow___MemoryPool.Rd
new file mode 100644
index 0000000000000..9189e8be4a33c
--- /dev/null
+++ b/r/man/arrow___MemoryPool.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/memory_pool.R
+\docType{class}
+\name{arrow__MemoryPool}
+\alias{arrow__MemoryPool}
+\alias{arrow::MemoryPool}
+\title{class arrow::MemoryPool}
+\description{
+class arrow::MemoryPool
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__io__BufferOutputStream.Rd b/r/man/arrow__io__BufferOutputStream.Rd
new file mode 100644
index 0000000000000..e90d1cc0ed87c
--- /dev/null
+++ b/r/man/arrow__io__BufferOutputStream.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\docType{class}
+\name{arrow__io__BufferOutputStream}
+\alias{arrow__io__BufferOutputStream}
+\alias{arrow::io::BufferOutputStream}
+\title{class arrow::io::BufferOutputStream}
+\format{An object of class \code{R6ClassGenerator} of length 24.}
+\description{
+class arrow::io::BufferOutputStream
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__io__BufferReader.Rd b/r/man/arrow__io__BufferReader.Rd
new file mode 100644
index 0000000000000..609fec5b6d4c8
--- /dev/null
+++ b/r/man/arrow__io__BufferReader.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\docType{class}
+\name{arrow__io__BufferReader}
+\alias{arrow__io__BufferReader}
+\alias{arrow::io::BufferReader}
+\title{class arrow::io::BufferReader}
+\description{
+class arrow::io::BufferReader
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__io__FileOutputStream.Rd b/r/man/arrow__io__FileOutputStream.Rd
new file mode 100644
index 0000000000000..92eaac13c9fd0
--- /dev/null
+++ b/r/man/arrow__io__FileOutputStream.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\docType{class}
+\name{arrow__io__FileOutputStream}
+\alias{arrow__io__FileOutputStream}
+\alias{arrow::io::FileOutputStream}
+\title{class arrow::io::FileOutputStream}
+\description{
+class arrow::io::FileOutputStream
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__io__FixedSizeBufferWriter.Rd b/r/man/arrow__io__FixedSizeBufferWriter.Rd
new file mode 100644
index 0000000000000..39d8bb69c25ff
--- /dev/null
+++ b/r/man/arrow__io__FixedSizeBufferWriter.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\docType{class}
+\name{arrow__io__FixedSizeBufferWriter}
+\alias{arrow__io__FixedSizeBufferWriter}
+\alias{arrow::io::FixedSizeBufferWriter}
+\title{class arrow::io::FixedSizeBufferWriter}
+\description{
+class arrow::io::FixedSizeBufferWriter
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__io__InputStream.Rd b/r/man/arrow__io__InputStream.Rd
new file mode 100644
index 0000000000000..37f83308b6424
--- /dev/null
+++ b/r/man/arrow__io__InputStream.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\docType{class}
+\name{arrow__io__InputStream}
+\alias{arrow__io__InputStream}
+\alias{arrow::io::InputStream}
+\title{class arrow::io::InputStream}
+\description{
+class arrow::io::InputStream
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__io__MemoryMappedFile.Rd b/r/man/arrow__io__MemoryMappedFile.Rd
new file mode 100644
index 0000000000000..409bb17302abd
--- /dev/null
+++ b/r/man/arrow__io__MemoryMappedFile.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\docType{class}
+\name{arrow__io__MemoryMappedFile}
+\alias{arrow__io__MemoryMappedFile}
+\alias{arrow::io::MemoryMappedFile}
+\title{class arrow::io::MemoryMappedFile}
+\description{
+class arrow::io::MemoryMappedFile
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\seealso{
+\code{\link[=mmap_open]{mmap_open()}}, \code{\link[=mmap_create]{mmap_create()}}
+}
+\keyword{datasets}
diff --git a/r/man/arrow__io__MockOutputStream.Rd b/r/man/arrow__io__MockOutputStream.Rd
new file mode 100644
index 0000000000000..f0b2c06d7a55c
--- /dev/null
+++ b/r/man/arrow__io__MockOutputStream.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\docType{class}
+\name{arrow__io__MockOutputStream}
+\alias{arrow__io__MockOutputStream}
+\alias{arrow::io::MockOutputStream}
+\title{class arrow::io::MockOutputStream}
+\description{
+class arrow::io::MockOutputStream
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__io__OutputStream.Rd b/r/man/arrow__io__OutputStream.Rd
new file mode 100644
index 0000000000000..c41b815c0217b
--- /dev/null
+++ b/r/man/arrow__io__OutputStream.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\docType{class}
+\name{arrow__io__OutputStream}
+\alias{arrow__io__OutputStream}
+\alias{arrow::io::OutputStream}
+\title{OutputStream}
+\description{
+OutputStream
+}
+\section{Methods}{
+
+\itemize{
+\item \code{arrow::Buffer} \code{Read}(\code{int} nbytes):  Read \code{nbytes} bytes
+\item \code{void} \code{close}(): close the stream
+}
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__io__RandomAccessFile.Rd b/r/man/arrow__io__RandomAccessFile.Rd
new file mode 100644
index 0000000000000..f8cb86abda6d7
--- /dev/null
+++ b/r/man/arrow__io__RandomAccessFile.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\docType{class}
+\name{arrow__io__RandomAccessFile}
+\alias{arrow__io__RandomAccessFile}
+\alias{arrow::io::RandomAccessFile}
+\title{class arrow::io::RandomAccessFile}
+\description{
+class arrow::io::RandomAccessFile
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__io__Readable.Rd b/r/man/arrow__io__Readable.Rd
new file mode 100644
index 0000000000000..b0b30a42302bd
--- /dev/null
+++ b/r/man/arrow__io__Readable.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\docType{class}
+\name{arrow__io__Readable}
+\alias{arrow__io__Readable}
+\alias{arrow::io::Readable}
+\title{class arrow::io::Readable}
+\description{
+class arrow::io::Readable
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__io__ReadableFile.Rd b/r/man/arrow__io__ReadableFile.Rd
new file mode 100644
index 0000000000000..440149fbbb4c4
--- /dev/null
+++ b/r/man/arrow__io__ReadableFile.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\docType{class}
+\name{arrow__io__ReadableFile}
+\alias{arrow__io__ReadableFile}
+\alias{arrow::io::ReadableFile}
+\title{class arrow::io::ReadableFile}
+\description{
+class arrow::io::ReadableFile
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__ipc__Message.Rd b/r/man/arrow__ipc__Message.Rd
new file mode 100644
index 0000000000000..d3811f8f4c10f
--- /dev/null
+++ b/r/man/arrow__ipc__Message.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/message.R
+\docType{class}
+\name{arrow__ipc__Message}
+\alias{arrow__ipc__Message}
+\alias{arrow::ipc::Message}
+\title{class arrow::ipc::Message}
+\description{
+class arrow::ipc::Message
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__ipc__MessageReader.Rd b/r/man/arrow__ipc__MessageReader.Rd
new file mode 100644
index 0000000000000..883e9e0618b66
--- /dev/null
+++ b/r/man/arrow__ipc__MessageReader.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/message.R
+\docType{class}
+\name{arrow__ipc__MessageReader}
+\alias{arrow__ipc__MessageReader}
+\alias{arrow::ipc::MessageReader}
+\title{class arrow::ipc::MessageReader}
+\description{
+class arrow::ipc::MessageReader
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__ipc__RecordBatchFileReader.Rd b/r/man/arrow__ipc__RecordBatchFileReader.Rd
new file mode 100644
index 0000000000000..675f636b365bf
--- /dev/null
+++ b/r/man/arrow__ipc__RecordBatchFileReader.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RecordBatchReader.R
+\docType{class}
+\name{arrow__ipc__RecordBatchFileReader}
+\alias{arrow__ipc__RecordBatchFileReader}
+\alias{arrow::ipc::RecordBatchFileReader}
+\title{class arrow::ipc::RecordBatchFileReader}
+\description{
+class arrow::ipc::RecordBatchFileReader
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__ipc__RecordBatchFileWriter.Rd b/r/man/arrow__ipc__RecordBatchFileWriter.Rd
new file mode 100644
index 0000000000000..a80b55941fb9e
--- /dev/null
+++ b/r/man/arrow__ipc__RecordBatchFileWriter.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RecordBatchWriter.R
+\docType{class}
+\name{arrow__ipc__RecordBatchFileWriter}
+\alias{arrow__ipc__RecordBatchFileWriter}
+\alias{arrow::ipc::RecordBatchFileWriter}
+\title{class arrow::ipc::RecordBatchFileWriter
+
+Writer for the Arrow binary file format}
+\description{
+class arrow::ipc::RecordBatchFileWriter
+
+Writer for the Arrow binary file format
+}
+\section{usage}{
+\preformatted{writer <- RecordBatchFileWriter(sink, schema)
+
+writer$write_batch(batch)
+writer$write_table(table)
+writer$close()
+}
+}
+
+\section{Factory}{
+
+
+The \code{\link[=RecordBatchFileWriter]{RecordBatchFileWriter()}} function creates a record batch stream writer.
+}
+
+\section{Methods}{
+
+inherited from \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter}
+\itemize{
+\item \code{$write_batch(batch)}: Write record batch to stream
+\item \code{$write_table(table)}: write Table to stream
+\item \code{$close()}: close stream
+}
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__ipc__RecordBatchStreamReader.Rd b/r/man/arrow__ipc__RecordBatchStreamReader.Rd
new file mode 100644
index 0000000000000..49f57cce057d9
--- /dev/null
+++ b/r/man/arrow__ipc__RecordBatchStreamReader.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RecordBatchReader.R
+\docType{class}
+\name{arrow__ipc__RecordBatchStreamReader}
+\alias{arrow__ipc__RecordBatchStreamReader}
+\alias{arrow::ipc::RecordBatchStreamReader}
+\title{class arrow::ipc::RecordBatchStreamReader}
+\description{
+class arrow::ipc::RecordBatchStreamReader
+}
+\section{Methods}{
+
+
+TODO
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__ipc__RecordBatchStreamWriter.Rd b/r/man/arrow__ipc__RecordBatchStreamWriter.Rd
new file mode 100644
index 0000000000000..3d2030287d1b5
--- /dev/null
+++ b/r/man/arrow__ipc__RecordBatchStreamWriter.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RecordBatchWriter.R
+\docType{class}
+\name{arrow__ipc__RecordBatchStreamWriter}
+\alias{arrow__ipc__RecordBatchStreamWriter}
+\alias{arrow::ipc::RecordBatchStreamWriter}
+\title{class arrow::ipc::RecordBatchStreamWriter
+
+Writer for the Arrow streaming binary format}
+\description{
+class arrow::ipc::RecordBatchStreamWriter
+
+Writer for the Arrow streaming binary format
+}
+\section{usage}{
+\preformatted{writer <- RecordBatchStreamWriter(sink, schema)
+
+writer$write_batch(batch)
+writer$write_table(table)
+writer$close()
+}
+}
+
+\section{Factory}{
+
+
+The \code{\link[=RecordBatchStreamWriter]{RecordBatchStreamWriter()}} function creates a record batch stream writer.
+}
+
+\section{Methods}{
+
+inherited from \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter}
+\itemize{
+\item \code{$write_batch(batch)}: Write record batch to stream
+\item \code{$write_table(table)}: write Table to stream
+\item \code{$close()}: close stream
+}
+}
+
+\keyword{datasets}
diff --git a/r/man/arrow__ipc__RecordBatchWriter.Rd b/r/man/arrow__ipc__RecordBatchWriter.Rd
new file mode 100644
index 0000000000000..08593df852436
--- /dev/null
+++ b/r/man/arrow__ipc__RecordBatchWriter.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RecordBatchWriter.R
+\docType{class}
+\name{arrow__ipc__RecordBatchWriter}
+\alias{arrow__ipc__RecordBatchWriter}
+\alias{arrow::ipc::RecordBatchWriter}
+\title{class arrow::ipc::RecordBatchWriter}
+\description{
+class arrow::ipc::RecordBatchWriter
+}
+\section{Methods}{
+
+\itemize{
+\item \code{$write_batch(batch)}: Write record batch to stream
+\item \code{$write_table(table)}: write Table to stream
+\item \code{$close()}: close stream
+}
+}
+
+\section{Derived classes}{
+
+\itemize{
+\item \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter} implements the streaming binary format
+\item \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter} implements the binary file format
+}
+}
+
+\keyword{datasets}
diff --git a/r/man/buffer.Rd b/r/man/buffer.Rd
index 4d4e97e47d8c2..60fd25d4bf159 100644
--- a/r/man/buffer.Rd
+++ b/r/man/buffer.Rd
@@ -2,16 +2,16 @@
 % Please edit documentation in R/buffer.R
 \name{buffer}
 \alias{buffer}
-\title{Create a buffer from an R object}
+\title{Create a \link[=arrow__Buffer]{arrow::Buffer} from an R object}
 \usage{
 buffer(x)
 }
 \arguments{
-\item{x}{R object}
+\item{x}{R object. Only raw, numeric and integer vectors are currently supported}
 }
 \value{
-an instance of \code{arrow::Buffer} that borrows memory from \code{x}
+an instance of \link[=arrow__Buffer]{arrow::Buffer} that borrows memory from \code{x}
 }
 \description{
-Create a buffer from an R object
+Create a \link[=arrow__Buffer]{arrow::Buffer} from an R object
 }
diff --git a/r/man/chunked_array.Rd b/r/man/chunked_array.Rd
index 1f4fb836143db..c6973be721014 100644
--- a/r/man/chunked_array.Rd
+++ b/r/man/chunked_array.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/ChunkedArray.R
 \name{chunked_array}
 \alias{chunked_array}
-\title{create an arrow::Array from an R vector}
+\title{create an \link[=arrow__ChunkedArray]{arrow::ChunkedArray} from various R vectors}
 \usage{
 chunked_array(..., type)
 }
@@ -12,5 +12,5 @@ chunked_array(..., type)
 \item{type}{currently ignored}
 }
 \description{
-create an arrow::Array from an R vector
+create an \link[=arrow__ChunkedArray]{arrow::ChunkedArray} from various R vectors
 }
diff --git a/r/man/compression_codec.Rd b/r/man/compression_codec.Rd
new file mode 100644
index 0000000000000..a7db8ab0deb26
--- /dev/null
+++ b/r/man/compression_codec.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compression.R
+\name{compression_codec}
+\alias{compression_codec}
+\title{codec}
+\usage{
+compression_codec(type = "GZIP")
+}
+\arguments{
+\item{type}{type of codec}
+}
+\description{
+codec
+}
diff --git a/r/man/csv_convert_options.Rd b/r/man/csv_convert_options.Rd
new file mode 100644
index 0000000000000..323c6e01970ca
--- /dev/null
+++ b/r/man/csv_convert_options.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/csv.R
+\name{csv_convert_options}
+\alias{csv_convert_options}
+\title{Conversion Options for the csv reader}
+\usage{
+csv_convert_options(check_utf8 = TRUE)
+}
+\arguments{
+\item{check_utf8}{Whether to check UTF8 validity of string columns}
+}
+\description{
+Conversion Options for the csv reader
+}
diff --git a/r/man/csv_parse_options.Rd b/r/man/csv_parse_options.Rd
new file mode 100644
index 0000000000000..9540771437f75
--- /dev/null
+++ b/r/man/csv_parse_options.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/csv.R
+\name{csv_parse_options}
+\alias{csv_parse_options}
+\title{Parsing options}
+\usage{
+csv_parse_options(delimiter = ",", quoting = TRUE,
+  quote_char = "\\"", double_quote = TRUE, escaping = FALSE,
+  escape_char = "\\\\", newlines_in_values = FALSE,
+  ignore_empty_lines = TRUE, header_rows = 1L)
+}
+\arguments{
+\item{delimiter}{Field delimiter}
+
+\item{quoting}{Whether quoting is used}
+
+\item{quote_char}{Quoting character (if \code{quoting} is \code{TRUE})}
+
+\item{double_quote}{Whether a quote inside a value is double-quoted}
+
+\item{escaping}{Whether escaping is used}
+
+\item{escape_char}{Escaping character (if \code{escaping} is \code{TRUE})}
+
+\item{newlines_in_values}{Whether values are allowed to contain CR (\code{0x0d``) and LF (}0x0a``) characters}
+
+\item{ignore_empty_lines}{Whether empty lines are ignored.  If false, an empty line represents}
+
+\item{header_rows}{Number of header rows to skip (including the first row containing column names)}
+}
+\description{
+Parsing options
+}
diff --git a/r/man/csv_read_options.Rd b/r/man/csv_read_options.Rd
new file mode 100644
index 0000000000000..3fa2d8ccbf2f2
--- /dev/null
+++ b/r/man/csv_read_options.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/csv.R
+\name{csv_read_options}
+\alias{csv_read_options}
+\title{read options for the csv reader}
+\usage{
+csv_read_options(use_threads = TRUE, block_size = 1048576L)
+}
+\arguments{
+\item{use_threads}{Whether to use the global CPU thread pool}
+
+\item{block_size}{Block size we request from the IO layer; also determines the size of chunks when use_threads is \code{TRUE}}
+}
+\description{
+read options for the csv reader
+}
diff --git a/r/man/csv_table_reader.Rd b/r/man/csv_table_reader.Rd
new file mode 100644
index 0000000000000..029cd0b5923c2
--- /dev/null
+++ b/r/man/csv_table_reader.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/csv.R
+\name{csv_table_reader}
+\alias{csv_table_reader}
+\title{CSV table reader}
+\usage{
+csv_table_reader(file, read_options = csv_read_options(),
+  parse_options = csv_parse_options(),
+  convert_options = csv_convert_options(), ...)
+}
+\arguments{
+\item{file}{file}
+
+\item{read_options, }{see \code{\link[=csv_read_options]{csv_read_options()}}}
+
+\item{parse_options, }{see \code{\link[=csv_parse_options]{csv_parse_options()}}}
+
+\item{convert_options, }{see \code{\link[=csv_convert_options]{csv_convert_options()}}}
+
+\item{...}{additional parameters.}
+}
+\description{
+CSV table reader
+}
diff --git a/r/man/default_memory_pool.Rd b/r/man/default_memory_pool.Rd
new file mode 100644
index 0000000000000..1725ff0e10a37
--- /dev/null
+++ b/r/man/default_memory_pool.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/memory_pool.R
+\name{default_memory_pool}
+\alias{default_memory_pool}
+\title{default \link[=arrow__MemoryPool]{arrow::MemoryPool}}
+\usage{
+default_memory_pool()
+}
+\value{
+the default \link[=arrow__MemoryPool]{arrow::MemoryPool}
+}
+\description{
+default \link[=arrow__MemoryPool]{arrow::MemoryPool}
+}
diff --git a/r/man/dictionary.Rd b/r/man/dictionary.Rd
index 2a7989648b01b..340283ec4dafc 100644
--- a/r/man/dictionary.Rd
+++ b/r/man/dictionary.Rd
@@ -13,6 +13,9 @@ dictionary(type, values, ordered = FALSE)
 
 \item{ordered}{Is this an ordered dictionary}
 }
+\value{
+a \link[=arrow__DictionaryType]{arrow::DictionaryType}
+}
 \description{
 dictionary type factory
 }
diff --git a/r/man/field.Rd b/r/man/field.Rd
index e7af66db2905f..5cbd803387560 100644
--- a/r/man/field.Rd
+++ b/r/man/field.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/Field.R
 \name{field}
 \alias{field}
-\title{Factor for a \code{arrow::Field}}
+\title{Factory for a \code{arrow::Field}}
 \usage{
 field(name, type, metadata)
 }
@@ -14,7 +14,7 @@ field(name, type, metadata)
 \item{metadata}{currently ignored}
 }
 \description{
-Factor for a \code{arrow::Field}
+Factory for a \code{arrow::Field}
 }
 \examples{
 field("x", int32())
diff --git a/r/man/io.Rd b/r/man/io.Rd
deleted file mode 100644
index 74817bf88a394..0000000000000
--- a/r/man/io.Rd
+++ /dev/null
@@ -1,40 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/io.R
-\name{mmap_create}
-\alias{mmap_create}
-\alias{mmap_open}
-\alias{file_open}
-\alias{file_output_stream}
-\alias{mock_output_stream}
-\alias{buffer_output_stream}
-\alias{fixed_size_buffer_writer}
-\title{Create a new read/write memory mapped file of a given size}
-\usage{
-mmap_create(path, size)
-
-mmap_open(path, mode = c("read", "write", "readwrite"))
-
-file_open(path)
-
-file_output_stream(path)
-
-mock_output_stream()
-
-buffer_output_stream(initial_capacity = 0L)
-
-fixed_size_buffer_writer(buffer)
-}
-\arguments{
-\item{path}{file path}
-
-\item{size}{size in bytes}
-
-\item{mode}{file mode (read/write/readwrite)}
-
-\item{initial_capacity}{initial capacity for the buffer output stream}
-
-\item{buffer}{an \code{arrow::Buffer}, typically created by \code{\link[=buffer]{buffer()}}}
-}
-\description{
-Create a new read/write memory mapped file of a given size
-}
diff --git a/r/man/mmap_create.Rd b/r/man/mmap_create.Rd
new file mode 100644
index 0000000000000..050ae18c76f3b
--- /dev/null
+++ b/r/man/mmap_create.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\name{mmap_create}
+\alias{mmap_create}
+\title{Create a new read/write memory mapped file of a given size}
+\usage{
+mmap_create(path, size)
+}
+\arguments{
+\item{path}{file path}
+
+\item{size}{size in bytes}
+}
+\value{
+a \link[=arrow__io__MemoryMappedFile]{arrow::io::MemoryMappedFile}
+}
+\description{
+Create a new read/write memory mapped file of a given size
+}
diff --git a/r/man/mmap_open.Rd b/r/man/mmap_open.Rd
new file mode 100644
index 0000000000000..d0047a72c38ce
--- /dev/null
+++ b/r/man/mmap_open.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\name{mmap_open}
+\alias{mmap_open}
+\title{Open a memory mapped file}
+\usage{
+mmap_open(path, mode = c("read", "write", "readwrite"))
+}
+\arguments{
+\item{path}{file path}
+
+\item{mode}{file mode (read/write/readwrite)}
+}
+\description{
+Open a memory mapped file
+}
diff --git a/r/man/read_arrow.Rd b/r/man/read_arrow.Rd
deleted file mode 100644
index 362ee7adc1a8e..0000000000000
--- a/r/man/read_arrow.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/Table.R
-\name{read_arrow}
-\alias{read_arrow}
-\title{Read an tibble from an arrow::Table on disk}
-\usage{
-read_arrow(stream)
-}
-\arguments{
-\item{stream}{input stream}
-}
-\value{
-a \link[tibble:tibble]{tibble::tibble}
-}
-\description{
-Read an tibble from an arrow::Table on disk
-}
diff --git a/r/man/read_csv_arrow.Rd b/r/man/read_csv_arrow.Rd
new file mode 100644
index 0000000000000..4cdca91246b5b
--- /dev/null
+++ b/r/man/read_csv_arrow.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/csv.R
+\name{read_csv_arrow}
+\alias{read_csv_arrow}
+\title{Read csv file into an arrow::Table}
+\usage{
+read_csv_arrow(...)
+}
+\arguments{
+\item{...}{Used to construct an arrow::csv::TableReader}
+}
+\description{
+Use arrow::csv::TableReader from \code{\link[=csv_table_reader]{csv_table_reader()}}
+}
diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd
index e86b86b99e9e2..4509c7d334dbf 100644
--- a/r/man/read_feather.Rd
+++ b/r/man/read_feather.Rd
@@ -4,15 +4,22 @@
 \alias{read_feather}
 \title{Read a feather file}
 \usage{
-read_feather(file, ...)
+read_feather(file, columns = NULL, as_tibble = TRUE,
+  use_threads = TRUE, ...)
 }
 \arguments{
-\item{file}{a arrow::ipc::feather::TableReader or whatever the \code{\link[=feather_table_reader]{feather_table_reader()}} function can handle}
+\item{file}{a arrow::ipc::feather::TableReader or whatever the \code{\link[=FeatherTableReader]{FeatherTableReader()}} function can handle}
+
+\item{columns}{names if the columns to read. The default \code{NULL} means all columns}
+
+\item{as_tibble}{should the \link[=arrow__Table]{arrow::Table} be converted to a tibble.}
+
+\item{use_threads}{Use threads when converting to a tibble.}
 
 \item{...}{additional parameters}
 }
 \value{
-an arrow::Table
+a data frame if \code{as_tibble} is \code{TRUE} (the default), or a \link[=arrow__Table]{arrow::Table} otherwise
 }
 \description{
 Read a feather file
diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd
new file mode 100644
index 0000000000000..a4f294bdd67ed
--- /dev/null
+++ b/r/man/read_parquet.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/parquet.R
+\name{read_parquet}
+\alias{read_parquet}
+\title{Read parquet file from disk}
+\usage{
+read_parquet(file, as_tibble = TRUE, use_threads = TRUE, ...)
+}
+\arguments{
+\item{file}{a file path}
+
+\item{as_tibble}{should the \link[=arrow__Table]{arrow::Table} be converted to a tibble.}
+
+\item{use_threads}{Use threads when converting to a tibble, only relevant if \code{as_tibble} is \code{TRUE}}
+
+\item{...}{currently ignored}
+}
+\value{
+a \link[=arrow__Table]{arrow::Table}, or a data frame if \code{as_tibble} is \code{TRUE}.
+}
+\description{
+Read parquet file from disk
+}
diff --git a/r/man/read_record_batch.Rd b/r/man/read_record_batch.Rd
index 4ca048f28ec71..fef12cbac4a88 100644
--- a/r/man/read_record_batch.Rd
+++ b/r/man/read_record_batch.Rd
@@ -1,19 +1,19 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/RecordBatchReader.R
+% Please edit documentation in R/read_record_batch.R
 \name{read_record_batch}
 \alias{read_record_batch}
-\title{Read a single record batch from a stream}
+\title{read \link[=arrow__RecordBatch]{arrow::RecordBatch} as encapsulated IPC message, given a known \link[=arrow__Schema]{arrow::Schema}}
 \usage{
-read_record_batch(stream, ...)
+read_record_batch(obj, schema)
 }
 \arguments{
-\item{stream}{input stream}
+\item{obj}{a \link[=arrow__ipc__Message]{arrow::ipc::Message}, a \link[=arrow__io__InputStream]{arrow::io::InputStream}, a \link[=arrow__Buffer]{arrow::Buffer}, or a raw vector}
 
-\item{...}{additional parameters}
+\item{schema}{a \link[=arrow__Schema]{arrow::Schema}}
 }
-\description{
-Read a single record batch from a stream
+\value{
+a \link[=arrow__RecordBatch]{arrow::RecordBatch}
 }
-\details{
-\code{stream} can be a \code{arrow::io::RandomAccessFile} stream as created by \code{\link[=file_open]{file_open()}} or \code{\link[=mmap_open]{mmap_open()}} or a path.
+\description{
+read \link[=arrow__RecordBatch]{arrow::RecordBatch} as encapsulated IPC message, given a known \link[=arrow__Schema]{arrow::Schema}
 }
diff --git a/r/man/read_table.Rd b/r/man/read_table.Rd
index f851057e8a7d0..356ec5e740d01 100644
--- a/r/man/read_table.Rd
+++ b/r/man/read_table.Rd
@@ -1,14 +1,42 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/RecordBatchReader.R
+% Please edit documentation in R/read_table.R
 \name{read_table}
 \alias{read_table}
-\title{Read an arrow::Table from a stream}
+\alias{read_arrow}
+\title{Read an \link[=arrow__Table]{arrow::Table} from a stream}
 \usage{
 read_table(stream)
+
+read_arrow(stream, use_threads = TRUE)
 }
 \arguments{
-\item{stream}{stream. Either a stream created by \code{\link[=file_open]{file_open()}} or \code{\link[=mmap_open]{mmap_open()}} or a file path.}
+\item{stream}{stream.
+\itemize{
+\item a \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader}:
+read an \link[=arrow__Table]{arrow::Table}
+from all the record batches in the reader
+\item a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader}:
+read an \link[=arrow__Table]{arrow::Table} from the remaining record batches
+in the reader
+\item a string or \link[fs:path_abs]{file path}: interpret the file as an arrow
+binary file format, and uses a \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader}
+to process it.
+\item a raw vector: read using a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader}
+}}
+
+\item{use_threads}{Use threads when converting to a tibble}
+}
+\value{
+\itemize{
+\item \code{read_table} returns an \link[=arrow__Table]{arrow::Table}
+\item \code{read_arrow} returns a \code{\link[tibble:tibble]{tibble::tibble()}}
+}
 }
 \description{
-Read an arrow::Table from a stream
+Read an \link[=arrow__Table]{arrow::Table} from a stream
+}
+\details{
+The methods using \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} and
+\link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} offer the most
+flexibility. The other methods are for convenience.
 }
diff --git a/r/man/record_batch.Rd b/r/man/record_batch.Rd
index e108d64b46a47..4567a9ab763f9 100644
--- a/r/man/record_batch.Rd
+++ b/r/man/record_batch.Rd
@@ -2,13 +2,16 @@
 % Please edit documentation in R/RecordBatch.R
 \name{record_batch}
 \alias{record_batch}
-\title{Create an arrow::RecordBatch from a data frame}
+\title{Create an \link[=arrow__RecordBatch]{arrow::RecordBatch} from a data frame}
 \usage{
 record_batch(.data)
 }
 \arguments{
 \item{.data}{a data frame}
 }
+\value{
+a \link[=arrow__RecordBatch]{arrow::RecordBatch}
+}
 \description{
-Create an arrow::RecordBatch from a data frame
+Create an \link[=arrow__RecordBatch]{arrow::RecordBatch} from a data frame
 }
diff --git a/r/man/record_batch_file_reader.Rd b/r/man/record_batch_file_reader.Rd
deleted file mode 100644
index b7e211dfbc23e..0000000000000
--- a/r/man/record_batch_file_reader.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/RecordBatchReader.R
-\name{record_batch_file_reader}
-\alias{record_batch_file_reader}
-\title{Create an \code{arrow::ipc::RecordBatchFileReader} from a file}
-\usage{
-record_batch_file_reader(file)
-}
-\arguments{
-\item{file}{The file to read from}
-}
-\description{
-Create an \code{arrow::ipc::RecordBatchFileReader} from a file
-}
diff --git a/r/man/record_batch_file_writer.Rd b/r/man/record_batch_file_writer.Rd
deleted file mode 100644
index b7dcb0c39e47b..0000000000000
--- a/r/man/record_batch_file_writer.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/RecordBatchWriter.R
-\name{record_batch_file_writer}
-\alias{record_batch_file_writer}
-\title{Create a record batch file writer from a stream}
-\usage{
-record_batch_file_writer(stream, schema)
-}
-\arguments{
-\item{stream}{a stream}
-
-\item{schema}{the schema of the batches}
-}
-\value{
-an \code{arrow::ipc::RecordBatchWriter} object
-}
-\description{
-Create a record batch file writer from a stream
-}
diff --git a/r/man/record_batch_stream_reader.Rd b/r/man/record_batch_stream_reader.Rd
deleted file mode 100644
index 018045f6a3272..0000000000000
--- a/r/man/record_batch_stream_reader.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/RecordBatchReader.R
-\name{record_batch_stream_reader}
-\alias{record_batch_stream_reader}
-\title{Create a \code{arrow::ipc::RecordBatchStreamReader} from an input stream}
-\usage{
-record_batch_stream_reader(stream)
-}
-\arguments{
-\item{stream}{input stream}
-}
-\description{
-Create a \code{arrow::ipc::RecordBatchStreamReader} from an input stream
-}
diff --git a/r/man/record_batch_stream_writer.Rd b/r/man/record_batch_stream_writer.Rd
deleted file mode 100644
index d720d50d3a749..0000000000000
--- a/r/man/record_batch_stream_writer.Rd
+++ /dev/null
@@ -1,16 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/RecordBatchWriter.R
-\name{record_batch_stream_writer}
-\alias{record_batch_stream_writer}
-\title{Create a record batch stream writer}
-\usage{
-record_batch_stream_writer(stream, schema)
-}
-\arguments{
-\item{stream}{a stream}
-
-\item{schema}{a schema}
-}
-\description{
-Create a record batch stream writer
-}
diff --git a/r/man/schema.Rd b/r/man/schema.Rd
index 9b77d47b61352..ad3bcb1f4e0eb 100644
--- a/r/man/schema.Rd
+++ b/r/man/schema.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/Schema.R
 \name{schema}
 \alias{schema}
-\title{Schema functions}
+\title{Schema factory}
 \usage{
 schema(...)
 }
@@ -10,8 +10,8 @@ schema(...)
 \item{...}{named list of data types}
 }
 \value{
-a Schema
+a \link[=arrow__Schema]{schema}
 }
 \description{
-Schema functions
+Schema factory
 }
diff --git a/r/man/write_arrow.Rd b/r/man/write_arrow.Rd
index 42b39f1d051fb..4296bcbd899da 100644
--- a/r/man/write_arrow.Rd
+++ b/r/man/write_arrow.Rd
@@ -1,18 +1,34 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/RecordBatchWriter.R
+% Please edit documentation in R/write_arrow.R
 \name{write_arrow}
 \alias{write_arrow}
-\title{Write an object to a stream}
+\title{serialize an \link[=arrow__Table]{arrow::Table}, an \link[=arrow__RecordBatch]{arrow::RecordBatch}, or a
+data frame to either the streaming format or the binary file format}
 \usage{
 write_arrow(x, stream, ...)
 }
 \arguments{
-\item{x}{An object to stream}
+\item{x}{an \link[=arrow__Table]{arrow::Table}, an \link[=arrow__RecordBatch]{arrow::RecordBatch} or a data.frame}
 
-\item{stream}{A stream}
+\item{stream}{where to serialize to
+\itemize{
+\item A \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter}: the \code{$write()}
+of \code{x} is used. The stream is left open. This uses the streaming format
+or the binary file format depending on the type of the writer.
+\item A string or \link[fs:path_abs]{file path}: \code{x} is serialized with
+a \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter}, i.e.
+using the binary file format.
+\item A raw vector: typically of length zero (its data is ignored, and only used for
+dispatch). \code{x} is serialized using the streaming format, i.e. using the
+\link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter}
+}}
 
-\item{...}{additional parameters}
+\item{...}{extra parameters, currently ignored
+
+\code{write_arrow} is a convenience function, the classes \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter}
+and \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter} can be used for more flexibility.}
 }
 \description{
-Write an object to a stream
+serialize an \link[=arrow__Table]{arrow::Table}, an \link[=arrow__RecordBatch]{arrow::RecordBatch}, or a
+data frame to either the streaming format or the binary file format
 }
diff --git a/r/man/write_record_batch.Rd b/r/man/write_record_batch.Rd
deleted file mode 100644
index afc3363f0df14..0000000000000
--- a/r/man/write_record_batch.Rd
+++ /dev/null
@@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/RecordBatchWriter.R
-\name{write_record_batch}
-\alias{write_record_batch}
-\title{write a record batch}
-\usage{
-write_record_batch(x, stream, ...)
-}
-\arguments{
-\item{x}{a \code{arrow::RecordBatch}}
-
-\item{stream}{where to stream the record batch}
-
-\item{...}{extra parameters}
-}
-\description{
-write a record batch
-}
diff --git a/r/man/write_table.Rd b/r/man/write_table.Rd
deleted file mode 100644
index a247870ec0190..0000000000000
--- a/r/man/write_table.Rd
+++ /dev/null
@@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/RecordBatchWriter.R
-\name{write_table}
-\alias{write_table}
-\title{write an arrow::Table}
-\usage{
-write_table(x, stream, ...)
-}
-\arguments{
-\item{x}{an \code{arrow::Table}}
-
-\item{stream}{where to stream the record batch}
-
-\item{...}{extra parameters}
-}
-\description{
-write an arrow::Table
-}
diff --git a/r/src/Makevars.in b/r/src/Makevars.in
index 5e285518f24af..0d2808736c06c 100644
--- a/r/src/Makevars.in
+++ b/r/src/Makevars.in
@@ -16,7 +16,7 @@
 # under the License.
 
 PKG_CPPFLAGS=@cflags@
-PKG_CXXFLAGS+=$(C_VISIBILITY)
+PKG_CXXFLAGS=@pkgcxxflags@
 CXX_STD=CXX11
 PKG_LIBS=@libs@  -Wl,-rpath,/usr/local/lib
 #CXXFLAGS="-D_GLIBCXX_USE_CXX11_ABI=0"
diff --git a/r/src/RcppExports.cpp b/r/src/RcppExports.cpp
index 2c549ad1b90ed..e726f47096910 100644
--- a/r/src/RcppExports.cpp
+++ b/r/src/RcppExports.cpp
@@ -17,28 +17,6 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
-// Array__as_vector
-SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array);
-RcppExport SEXP _arrow_Array__as_vector(SEXP arraySEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type array(arraySEXP);
-    rcpp_result_gen = Rcpp::wrap(Array__as_vector(array));
-    return rcpp_result_gen;
-END_RCPP
-}
-// ChunkedArray__as_vector
-SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array);
-RcppExport SEXP _arrow_ChunkedArray__as_vector(SEXP chunked_arraySEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::ChunkedArray>& >::type chunked_array(chunked_arraySEXP);
-    rcpp_result_gen = Rcpp::wrap(ChunkedArray__as_vector(chunked_array));
-    return rcpp_result_gen;
-END_RCPP
-}
 // Array__Slice1
 std::shared_ptr<arrow::Array> Array__Slice1(const std::shared_ptr<arrow::Array>& array, int offset);
 RcppExport SEXP _arrow_Array__Slice1(SEXP arraySEXP, SEXP offsetSEXP) {
@@ -237,6 +215,52 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// Array__as_vector
+SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array);
+RcppExport SEXP _arrow_Array__as_vector(SEXP arraySEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type array(arraySEXP);
+    rcpp_result_gen = Rcpp::wrap(Array__as_vector(array));
+    return rcpp_result_gen;
+END_RCPP
+}
+// ChunkedArray__as_vector
+SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array);
+RcppExport SEXP _arrow_ChunkedArray__as_vector(SEXP chunked_arraySEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::ChunkedArray>& >::type chunked_array(chunked_arraySEXP);
+    rcpp_result_gen = Rcpp::wrap(ChunkedArray__as_vector(chunked_array));
+    return rcpp_result_gen;
+END_RCPP
+}
+// RecordBatch__to_dataframe
+List RecordBatch__to_dataframe(const std::shared_ptr<arrow::RecordBatch>& batch, bool use_threads);
+RcppExport SEXP _arrow_RecordBatch__to_dataframe(SEXP batchSEXP, SEXP use_threadsSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::RecordBatch>& >::type batch(batchSEXP);
+    Rcpp::traits::input_parameter< bool >::type use_threads(use_threadsSEXP);
+    rcpp_result_gen = Rcpp::wrap(RecordBatch__to_dataframe(batch, use_threads));
+    return rcpp_result_gen;
+END_RCPP
+}
+// Table__to_dataframe
+List Table__to_dataframe(const std::shared_ptr<arrow::Table>& table, bool use_threads);
+RcppExport SEXP _arrow_Table__to_dataframe(SEXP tableSEXP, SEXP use_threadsSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Table>& >::type table(tableSEXP);
+    Rcpp::traits::input_parameter< bool >::type use_threads(use_threadsSEXP);
+    rcpp_result_gen = Rcpp::wrap(Table__to_dataframe(table, use_threads));
+    return rcpp_result_gen;
+END_RCPP
+}
 // ArrayData__get_type
 std::shared_ptr<arrow::DataType> ArrayData__get_type(const std::shared_ptr<arrow::ArrayData>& x);
 RcppExport SEXP _arrow_ArrayData__get_type(SEXP xSEXP) {
@@ -493,6 +517,41 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// util___Codec__Create
+std::unique_ptr<arrow::util::Codec> util___Codec__Create(arrow::Compression::type codec);
+RcppExport SEXP _arrow_util___Codec__Create(SEXP codecSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< arrow::Compression::type >::type codec(codecSEXP);
+    rcpp_result_gen = Rcpp::wrap(util___Codec__Create(codec));
+    return rcpp_result_gen;
+END_RCPP
+}
+// io___CompressedOutputStream__Make
+std::shared_ptr<arrow::io::CompressedOutputStream> io___CompressedOutputStream__Make(const std::unique_ptr<arrow::util::Codec>& codec, const std::shared_ptr<arrow::io::OutputStream>& raw);
+RcppExport SEXP _arrow_io___CompressedOutputStream__Make(SEXP codecSEXP, SEXP rawSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const std::unique_ptr<arrow::util::Codec>& >::type codec(codecSEXP);
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::io::OutputStream>& >::type raw(rawSEXP);
+    rcpp_result_gen = Rcpp::wrap(io___CompressedOutputStream__Make(codec, raw));
+    return rcpp_result_gen;
+END_RCPP
+}
+// io___CompressedInputStream__Make
+std::shared_ptr<arrow::io::CompressedInputStream> io___CompressedInputStream__Make(const std::unique_ptr<arrow::util::Codec>& codec, const std::shared_ptr<arrow::io::InputStream>& raw);
+RcppExport SEXP _arrow_io___CompressedInputStream__Make(SEXP codecSEXP, SEXP rawSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const std::unique_ptr<arrow::util::Codec>& >::type codec(codecSEXP);
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::io::InputStream>& >::type raw(rawSEXP);
+    rcpp_result_gen = Rcpp::wrap(io___CompressedInputStream__Make(codec, raw));
+    return rcpp_result_gen;
+END_RCPP
+}
 // compute___CastOptions__initialize
 std::shared_ptr<arrow::compute::CastOptions> compute___CastOptions__initialize(bool allow_int_overflow, bool allow_time_truncate, bool allow_float_truncate);
 RcppExport SEXP _arrow_compute___CastOptions__initialize(SEXP allow_int_overflowSEXP, SEXP allow_time_truncateSEXP, SEXP allow_float_truncateSEXP) {
@@ -558,6 +617,64 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// csv___ReadOptions__initialize
+std::shared_ptr<arrow::csv::ReadOptions> csv___ReadOptions__initialize(List_ options);
+RcppExport SEXP _arrow_csv___ReadOptions__initialize(SEXP optionsSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< List_ >::type options(optionsSEXP);
+    rcpp_result_gen = Rcpp::wrap(csv___ReadOptions__initialize(options));
+    return rcpp_result_gen;
+END_RCPP
+}
+// csv___ParseOptions__initialize
+std::shared_ptr<arrow::csv::ParseOptions> csv___ParseOptions__initialize(List_ options);
+RcppExport SEXP _arrow_csv___ParseOptions__initialize(SEXP optionsSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< List_ >::type options(optionsSEXP);
+    rcpp_result_gen = Rcpp::wrap(csv___ParseOptions__initialize(options));
+    return rcpp_result_gen;
+END_RCPP
+}
+// csv___ConvertOptions__initialize
+std::shared_ptr<arrow::csv::ConvertOptions> csv___ConvertOptions__initialize(List_ options);
+RcppExport SEXP _arrow_csv___ConvertOptions__initialize(SEXP optionsSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< List_ >::type options(optionsSEXP);
+    rcpp_result_gen = Rcpp::wrap(csv___ConvertOptions__initialize(options));
+    return rcpp_result_gen;
+END_RCPP
+}
+// csv___TableReader__Make
+std::shared_ptr<arrow::csv::TableReader> csv___TableReader__Make(const std::shared_ptr<arrow::io::InputStream>& input, const std::shared_ptr<arrow::csv::ReadOptions>& read_options, const std::shared_ptr<arrow::csv::ParseOptions>& parse_options, const std::shared_ptr<arrow::csv::ConvertOptions>& convert_options);
+RcppExport SEXP _arrow_csv___TableReader__Make(SEXP inputSEXP, SEXP read_optionsSEXP, SEXP parse_optionsSEXP, SEXP convert_optionsSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::io::InputStream>& >::type input(inputSEXP);
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::csv::ReadOptions>& >::type read_options(read_optionsSEXP);
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::csv::ParseOptions>& >::type parse_options(parse_optionsSEXP);
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::csv::ConvertOptions>& >::type convert_options(convert_optionsSEXP);
+    rcpp_result_gen = Rcpp::wrap(csv___TableReader__Make(input, read_options, parse_options, convert_options));
+    return rcpp_result_gen;
+END_RCPP
+}
+// csv___TableReader__Read
+std::shared_ptr<arrow::Table> csv___TableReader__Read(const std::shared_ptr<arrow::csv::TableReader>& table_reader);
+RcppExport SEXP _arrow_csv___TableReader__Read(SEXP table_readerSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::csv::TableReader>& >::type table_reader(table_readerSEXP);
+    rcpp_result_gen = Rcpp::wrap(csv___TableReader__Read(table_reader));
+    return rcpp_result_gen;
+END_RCPP
+}
 // shared_ptr_is_null
 bool shared_ptr_is_null(SEXP xp);
 RcppExport SEXP _arrow_shared_ptr_is_null(SEXP xpSEXP) {
@@ -1244,13 +1361,14 @@ BEGIN_RCPP
 END_RCPP
 }
 // ipc___feather___TableReader__Read
-std::shared_ptr<arrow::Table> ipc___feather___TableReader__Read(const std::unique_ptr<arrow::ipc::feather::TableReader>& reader);
-RcppExport SEXP _arrow_ipc___feather___TableReader__Read(SEXP readerSEXP) {
+std::shared_ptr<arrow::Table> ipc___feather___TableReader__Read(const std::unique_ptr<arrow::ipc::feather::TableReader>& reader, SEXP columns);
+RcppExport SEXP _arrow_ipc___feather___TableReader__Read(SEXP readerSEXP, SEXP columnsSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< const std::unique_ptr<arrow::ipc::feather::TableReader>& >::type reader(readerSEXP);
-    rcpp_result_gen = Rcpp::wrap(ipc___feather___TableReader__Read(reader));
+    Rcpp::traits::input_parameter< SEXP >::type columns(columnsSEXP);
+    rcpp_result_gen = Rcpp::wrap(ipc___feather___TableReader__Read(reader, columns));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -1467,6 +1585,28 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// io___Writable__write
+void io___Writable__write(const std::shared_ptr<arrow::io::Writable>& stream, const std::shared_ptr<arrow::Buffer>& buf);
+RcppExport SEXP _arrow_io___Writable__write(SEXP streamSEXP, SEXP bufSEXP) {
+BEGIN_RCPP
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::io::Writable>& >::type stream(streamSEXP);
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Buffer>& >::type buf(bufSEXP);
+    io___Writable__write(stream, buf);
+    return R_NilValue;
+END_RCPP
+}
+// io___OutputStream__Tell
+int64_t io___OutputStream__Tell(const std::shared_ptr<arrow::io::OutputStream>& stream);
+RcppExport SEXP _arrow_io___OutputStream__Tell(SEXP streamSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::io::OutputStream>& >::type stream(streamSEXP);
+    rcpp_result_gen = Rcpp::wrap(io___OutputStream__Tell(stream));
+    return rcpp_result_gen;
+END_RCPP
+}
 // io___FileOutputStream__Open
 std::shared_ptr<arrow::io::FileOutputStream> io___FileOutputStream__Open(const std::string& path);
 RcppExport SEXP _arrow_io___FileOutputStream__Open(SEXP pathSEXP) {
@@ -1720,6 +1860,17 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// read_parquet_file
+std::shared_ptr<arrow::Table> read_parquet_file(std::string filename);
+RcppExport SEXP _arrow_read_parquet_file(SEXP filenameSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< std::string >::type filename(filenameSEXP);
+    rcpp_result_gen = Rcpp::wrap(read_parquet_file(filename));
+    return rcpp_result_gen;
+END_RCPP
+}
 // RecordBatch__num_columns
 int RecordBatch__num_columns(const std::shared_ptr<arrow::RecordBatch>& x);
 RcppExport SEXP _arrow_RecordBatch__num_columns(SEXP xSEXP) {
@@ -1753,26 +1904,26 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
-// RecordBatch__column
-std::shared_ptr<arrow::Array> RecordBatch__column(const std::shared_ptr<arrow::RecordBatch>& batch, int i);
-RcppExport SEXP _arrow_RecordBatch__column(SEXP batchSEXP, SEXP iSEXP) {
+// RecordBatch__columns
+arrow::ArrayVector RecordBatch__columns(const std::shared_ptr<arrow::RecordBatch>& batch);
+RcppExport SEXP _arrow_RecordBatch__columns(SEXP batchSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< const std::shared_ptr<arrow::RecordBatch>& >::type batch(batchSEXP);
-    Rcpp::traits::input_parameter< int >::type i(iSEXP);
-    rcpp_result_gen = Rcpp::wrap(RecordBatch__column(batch, i));
+    rcpp_result_gen = Rcpp::wrap(RecordBatch__columns(batch));
     return rcpp_result_gen;
 END_RCPP
 }
-// RecordBatch__to_dataframe
-List RecordBatch__to_dataframe(const std::shared_ptr<arrow::RecordBatch>& batch);
-RcppExport SEXP _arrow_RecordBatch__to_dataframe(SEXP batchSEXP) {
+// RecordBatch__column
+std::shared_ptr<arrow::Array> RecordBatch__column(const std::shared_ptr<arrow::RecordBatch>& batch, int i);
+RcppExport SEXP _arrow_RecordBatch__column(SEXP batchSEXP, SEXP iSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< const std::shared_ptr<arrow::RecordBatch>& >::type batch(batchSEXP);
-    rcpp_result_gen = Rcpp::wrap(RecordBatch__to_dataframe(batch));
+    Rcpp::traits::input_parameter< int >::type i(iSEXP);
+    rcpp_result_gen = Rcpp::wrap(RecordBatch__column(batch, i));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -1859,6 +2010,29 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// ipc___SerializeRecordBatch__Raw
+RawVector ipc___SerializeRecordBatch__Raw(const std::shared_ptr<arrow::RecordBatch>& batch);
+RcppExport SEXP _arrow_ipc___SerializeRecordBatch__Raw(SEXP batchSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::RecordBatch>& >::type batch(batchSEXP);
+    rcpp_result_gen = Rcpp::wrap(ipc___SerializeRecordBatch__Raw(batch));
+    return rcpp_result_gen;
+END_RCPP
+}
+// ipc___ReadRecordBatch__InputStream__Schema
+std::shared_ptr<arrow::RecordBatch> ipc___ReadRecordBatch__InputStream__Schema(const std::shared_ptr<arrow::io::InputStream>& stream, const std::shared_ptr<arrow::Schema>& schema);
+RcppExport SEXP _arrow_ipc___ReadRecordBatch__InputStream__Schema(SEXP streamSEXP, SEXP schemaSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::io::InputStream>& >::type stream(streamSEXP);
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Schema>& >::type schema(schemaSEXP);
+    rcpp_result_gen = Rcpp::wrap(ipc___ReadRecordBatch__InputStream__Schema(stream, schema));
+    return rcpp_result_gen;
+END_RCPP
+}
 // RecordBatchReader__schema
 std::shared_ptr<arrow::Schema> RecordBatchReader__schema(const std::shared_ptr<arrow::RecordBatchReader>& reader);
 RcppExport SEXP _arrow_RecordBatchReader__schema(SEXP readerSEXP) {
@@ -1892,6 +2066,17 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// ipc___RecordBatchStreamReader__batches
+std::vector<std::shared_ptr<arrow::RecordBatch>> ipc___RecordBatchStreamReader__batches(const std::shared_ptr<arrow::ipc::RecordBatchStreamReader>& reader);
+RcppExport SEXP _arrow_ipc___RecordBatchStreamReader__batches(SEXP readerSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::ipc::RecordBatchStreamReader>& >::type reader(readerSEXP);
+    rcpp_result_gen = Rcpp::wrap(ipc___RecordBatchStreamReader__batches(reader));
+    return rcpp_result_gen;
+END_RCPP
+}
 // ipc___RecordBatchFileReader__schema
 std::shared_ptr<arrow::Schema> ipc___RecordBatchFileReader__schema(const std::shared_ptr<arrow::ipc::RecordBatchFileReader>& reader);
 RcppExport SEXP _arrow_ipc___RecordBatchFileReader__schema(SEXP readerSEXP) {
@@ -1959,39 +2144,25 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
-// ipc___RecordBatchFileWriter__Open
-std::shared_ptr<arrow::ipc::RecordBatchWriter> ipc___RecordBatchFileWriter__Open(const std::shared_ptr<arrow::io::OutputStream>& stream, const std::shared_ptr<arrow::Schema>& schema);
-RcppExport SEXP _arrow_ipc___RecordBatchFileWriter__Open(SEXP streamSEXP, SEXP schemaSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::io::OutputStream>& >::type stream(streamSEXP);
-    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Schema>& >::type schema(schemaSEXP);
-    rcpp_result_gen = Rcpp::wrap(ipc___RecordBatchFileWriter__Open(stream, schema));
-    return rcpp_result_gen;
-END_RCPP
-}
-// ipc___RecordBatchStreamWriter__Open
-std::shared_ptr<arrow::ipc::RecordBatchWriter> ipc___RecordBatchStreamWriter__Open(const std::shared_ptr<arrow::io::OutputStream>& stream, const std::shared_ptr<arrow::Schema>& schema);
-RcppExport SEXP _arrow_ipc___RecordBatchStreamWriter__Open(SEXP streamSEXP, SEXP schemaSEXP) {
+// ipc___RecordBatchFileReader__batches
+std::vector<std::shared_ptr<arrow::RecordBatch>> ipc___RecordBatchFileReader__batches(const std::shared_ptr<arrow::ipc::RecordBatchFileReader>& reader);
+RcppExport SEXP _arrow_ipc___RecordBatchFileReader__batches(SEXP readerSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::io::OutputStream>& >::type stream(streamSEXP);
-    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Schema>& >::type schema(schemaSEXP);
-    rcpp_result_gen = Rcpp::wrap(ipc___RecordBatchStreamWriter__Open(stream, schema));
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::ipc::RecordBatchFileReader>& >::type reader(readerSEXP);
+    rcpp_result_gen = Rcpp::wrap(ipc___RecordBatchFileReader__batches(reader));
     return rcpp_result_gen;
 END_RCPP
 }
 // ipc___RecordBatchWriter__WriteRecordBatch
-void ipc___RecordBatchWriter__WriteRecordBatch(const std::shared_ptr<arrow::ipc::RecordBatchWriter>& batch_writer, const std::shared_ptr<arrow::RecordBatch>& batch, bool allow_64bit);
-RcppExport SEXP _arrow_ipc___RecordBatchWriter__WriteRecordBatch(SEXP batch_writerSEXP, SEXP batchSEXP, SEXP allow_64bitSEXP) {
+void ipc___RecordBatchWriter__WriteRecordBatch(const std::shared_ptr<arrow::ipc::RecordBatchWriter>& batch_writer, const std::shared_ptr<arrow::RecordBatch>& batch);
+RcppExport SEXP _arrow_ipc___RecordBatchWriter__WriteRecordBatch(SEXP batch_writerSEXP, SEXP batchSEXP) {
 BEGIN_RCPP
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< const std::shared_ptr<arrow::ipc::RecordBatchWriter>& >::type batch_writer(batch_writerSEXP);
     Rcpp::traits::input_parameter< const std::shared_ptr<arrow::RecordBatch>& >::type batch(batchSEXP);
-    Rcpp::traits::input_parameter< bool >::type allow_64bit(allow_64bitSEXP);
-    ipc___RecordBatchWriter__WriteRecordBatch(batch_writer, batch, allow_64bit);
+    ipc___RecordBatchWriter__WriteRecordBatch(batch_writer, batch);
     return R_NilValue;
 END_RCPP
 }
@@ -2016,6 +2187,30 @@ BEGIN_RCPP
     return R_NilValue;
 END_RCPP
 }
+// ipc___RecordBatchFileWriter__Open
+std::shared_ptr<arrow::ipc::RecordBatchWriter> ipc___RecordBatchFileWriter__Open(const std::shared_ptr<arrow::io::OutputStream>& stream, const std::shared_ptr<arrow::Schema>& schema);
+RcppExport SEXP _arrow_ipc___RecordBatchFileWriter__Open(SEXP streamSEXP, SEXP schemaSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::io::OutputStream>& >::type stream(streamSEXP);
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Schema>& >::type schema(schemaSEXP);
+    rcpp_result_gen = Rcpp::wrap(ipc___RecordBatchFileWriter__Open(stream, schema));
+    return rcpp_result_gen;
+END_RCPP
+}
+// ipc___RecordBatchStreamWriter__Open
+std::shared_ptr<arrow::ipc::RecordBatchWriter> ipc___RecordBatchStreamWriter__Open(const std::shared_ptr<arrow::io::OutputStream>& stream, const std::shared_ptr<arrow::Schema>& schema);
+RcppExport SEXP _arrow_ipc___RecordBatchStreamWriter__Open(SEXP streamSEXP, SEXP schemaSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::io::OutputStream>& >::type stream(streamSEXP);
+    Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Schema>& >::type schema(schemaSEXP);
+    rcpp_result_gen = Rcpp::wrap(ipc___RecordBatchStreamWriter__Open(stream, schema));
+    return rcpp_result_gen;
+END_RCPP
+}
 // Table__from_dataframe
 std::shared_ptr<arrow::Table> Table__from_dataframe(DataFrame tbl);
 RcppExport SEXP _arrow_Table__from_dataframe(SEXP tblSEXP) {
@@ -2060,34 +2255,52 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
-// Table__to_dataframe
-List Table__to_dataframe(const std::shared_ptr<arrow::Table>& table);
-RcppExport SEXP _arrow_Table__to_dataframe(SEXP tableSEXP) {
+// Table__column
+std::shared_ptr<arrow::Column> Table__column(const std::shared_ptr<arrow::Table>& table, int i);
+RcppExport SEXP _arrow_Table__column(SEXP tableSEXP, SEXP iSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Table>& >::type table(tableSEXP);
-    rcpp_result_gen = Rcpp::wrap(Table__to_dataframe(table));
+    Rcpp::traits::input_parameter< int >::type i(iSEXP);
+    rcpp_result_gen = Rcpp::wrap(Table__column(table, i));
     return rcpp_result_gen;
 END_RCPP
 }
-// Table__column
-std::shared_ptr<arrow::Column> Table__column(const std::shared_ptr<arrow::Table>& table, int i);
-RcppExport SEXP _arrow_Table__column(SEXP tableSEXP, SEXP iSEXP) {
+// Table__columns
+std::vector<std::shared_ptr<arrow::Column>> Table__columns(const std::shared_ptr<arrow::Table>& table);
+RcppExport SEXP _arrow_Table__columns(SEXP tableSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Table>& >::type table(tableSEXP);
-    Rcpp::traits::input_parameter< int >::type i(iSEXP);
-    rcpp_result_gen = Rcpp::wrap(Table__column(table, i));
+    rcpp_result_gen = Rcpp::wrap(Table__columns(table));
     return rcpp_result_gen;
 END_RCPP
 }
+// GetCpuThreadPoolCapacity
+int GetCpuThreadPoolCapacity();
+RcppExport SEXP _arrow_GetCpuThreadPoolCapacity() {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    rcpp_result_gen = Rcpp::wrap(GetCpuThreadPoolCapacity());
+    return rcpp_result_gen;
+END_RCPP
+}
+// SetCpuThreadPoolCapacity
+void SetCpuThreadPoolCapacity(int threads);
+RcppExport SEXP _arrow_SetCpuThreadPoolCapacity(SEXP threadsSEXP) {
+BEGIN_RCPP
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< int >::type threads(threadsSEXP);
+    SetCpuThreadPoolCapacity(threads);
+    return R_NilValue;
+END_RCPP
+}
 
 static const R_CallMethodDef CallEntries[] = {
     {"_arrow_Array__from_vector", (DL_FUNC) &_arrow_Array__from_vector, 1},
-    {"_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1},
-    {"_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 1},
     {"_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2},
     {"_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3},
     {"_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2},
@@ -2105,6 +2318,10 @@ static const R_CallMethodDef CallEntries[] = {
     {"_arrow_Array__Mask", (DL_FUNC) &_arrow_Array__Mask, 1},
     {"_arrow_DictionaryArray__indices", (DL_FUNC) &_arrow_DictionaryArray__indices, 1},
     {"_arrow_DictionaryArray__dictionary", (DL_FUNC) &_arrow_DictionaryArray__dictionary, 1},
+    {"_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1},
+    {"_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 1},
+    {"_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 2},
+    {"_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 2},
     {"_arrow_ArrayData__get_type", (DL_FUNC) &_arrow_ArrayData__get_type, 1},
     {"_arrow_ArrayData__get_length", (DL_FUNC) &_arrow_ArrayData__get_length, 1},
     {"_arrow_ArrayData__get_null_count", (DL_FUNC) &_arrow_ArrayData__get_null_count, 1},
@@ -2128,11 +2345,19 @@ static const R_CallMethodDef CallEntries[] = {
     {"_arrow_Column__null_count", (DL_FUNC) &_arrow_Column__null_count, 1},
     {"_arrow_Column__type", (DL_FUNC) &_arrow_Column__type, 1},
     {"_arrow_Column__data", (DL_FUNC) &_arrow_Column__data, 1},
+    {"_arrow_util___Codec__Create", (DL_FUNC) &_arrow_util___Codec__Create, 1},
+    {"_arrow_io___CompressedOutputStream__Make", (DL_FUNC) &_arrow_io___CompressedOutputStream__Make, 2},
+    {"_arrow_io___CompressedInputStream__Make", (DL_FUNC) &_arrow_io___CompressedInputStream__Make, 2},
     {"_arrow_compute___CastOptions__initialize", (DL_FUNC) &_arrow_compute___CastOptions__initialize, 3},
     {"_arrow_Array__cast", (DL_FUNC) &_arrow_Array__cast, 3},
     {"_arrow_ChunkedArray__cast", (DL_FUNC) &_arrow_ChunkedArray__cast, 3},
     {"_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3},
     {"_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3},
+    {"_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1},
+    {"_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1},
+    {"_arrow_csv___ConvertOptions__initialize", (DL_FUNC) &_arrow_csv___ConvertOptions__initialize, 1},
+    {"_arrow_csv___TableReader__Make", (DL_FUNC) &_arrow_csv___TableReader__Make, 4},
+    {"_arrow_csv___TableReader__Read", (DL_FUNC) &_arrow_csv___TableReader__Read, 1},
     {"_arrow_shared_ptr_is_null", (DL_FUNC) &_arrow_shared_ptr_is_null, 1},
     {"_arrow_unique_ptr_is_null", (DL_FUNC) &_arrow_unique_ptr_is_null, 1},
     {"_arrow_Int8__initialize", (DL_FUNC) &_arrow_Int8__initialize, 0},
@@ -2196,7 +2421,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"_arrow_ipc___feather___TableReader__num_columns", (DL_FUNC) &_arrow_ipc___feather___TableReader__num_columns, 1},
     {"_arrow_ipc___feather___TableReader__GetColumnName", (DL_FUNC) &_arrow_ipc___feather___TableReader__GetColumnName, 2},
     {"_arrow_ipc___feather___TableReader__GetColumn", (DL_FUNC) &_arrow_ipc___feather___TableReader__GetColumn, 2},
-    {"_arrow_ipc___feather___TableReader__Read", (DL_FUNC) &_arrow_ipc___feather___TableReader__Read, 1},
+    {"_arrow_ipc___feather___TableReader__Read", (DL_FUNC) &_arrow_ipc___feather___TableReader__Read, 2},
     {"_arrow_ipc___feather___TableReader__Open", (DL_FUNC) &_arrow_ipc___feather___TableReader__Open, 1},
     {"_arrow_Field__initialize", (DL_FUNC) &_arrow_Field__initialize, 3},
     {"_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1},
@@ -2216,6 +2441,8 @@ static const R_CallMethodDef CallEntries[] = {
     {"_arrow_io___MemoryMappedFile__Resize", (DL_FUNC) &_arrow_io___MemoryMappedFile__Resize, 2},
     {"_arrow_io___ReadableFile__Open", (DL_FUNC) &_arrow_io___ReadableFile__Open, 1},
     {"_arrow_io___BufferReader__initialize", (DL_FUNC) &_arrow_io___BufferReader__initialize, 1},
+    {"_arrow_io___Writable__write", (DL_FUNC) &_arrow_io___Writable__write, 2},
+    {"_arrow_io___OutputStream__Tell", (DL_FUNC) &_arrow_io___OutputStream__Tell, 1},
     {"_arrow_io___FileOutputStream__Open", (DL_FUNC) &_arrow_io___FileOutputStream__Open, 1},
     {"_arrow_io___BufferOutputStream__Create", (DL_FUNC) &_arrow_io___BufferOutputStream__Create, 1},
     {"_arrow_io___BufferOutputStream__capacity", (DL_FUNC) &_arrow_io___BufferOutputStream__capacity, 1},
@@ -2239,11 +2466,12 @@ static const R_CallMethodDef CallEntries[] = {
     {"_arrow_ipc___MessageReader__Open", (DL_FUNC) &_arrow_ipc___MessageReader__Open, 1},
     {"_arrow_ipc___MessageReader__ReadNextMessage", (DL_FUNC) &_arrow_ipc___MessageReader__ReadNextMessage, 1},
     {"_arrow_ipc___ReadMessage", (DL_FUNC) &_arrow_ipc___ReadMessage, 1},
+    {"_arrow_read_parquet_file", (DL_FUNC) &_arrow_read_parquet_file, 1},
     {"_arrow_RecordBatch__num_columns", (DL_FUNC) &_arrow_RecordBatch__num_columns, 1},
     {"_arrow_RecordBatch__num_rows", (DL_FUNC) &_arrow_RecordBatch__num_rows, 1},
     {"_arrow_RecordBatch__schema", (DL_FUNC) &_arrow_RecordBatch__schema, 1},
+    {"_arrow_RecordBatch__columns", (DL_FUNC) &_arrow_RecordBatch__columns, 1},
     {"_arrow_RecordBatch__column", (DL_FUNC) &_arrow_RecordBatch__column, 2},
-    {"_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 1},
     {"_arrow_RecordBatch__from_dataframe", (DL_FUNC) &_arrow_RecordBatch__from_dataframe, 1},
     {"_arrow_RecordBatch__Equals", (DL_FUNC) &_arrow_RecordBatch__Equals, 2},
     {"_arrow_RecordBatch__RemoveColumn", (DL_FUNC) &_arrow_RecordBatch__RemoveColumn, 2},
@@ -2251,26 +2479,32 @@ static const R_CallMethodDef CallEntries[] = {
     {"_arrow_RecordBatch__names", (DL_FUNC) &_arrow_RecordBatch__names, 1},
     {"_arrow_RecordBatch__Slice1", (DL_FUNC) &_arrow_RecordBatch__Slice1, 2},
     {"_arrow_RecordBatch__Slice2", (DL_FUNC) &_arrow_RecordBatch__Slice2, 3},
+    {"_arrow_ipc___SerializeRecordBatch__Raw", (DL_FUNC) &_arrow_ipc___SerializeRecordBatch__Raw, 1},
+    {"_arrow_ipc___ReadRecordBatch__InputStream__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__InputStream__Schema, 2},
     {"_arrow_RecordBatchReader__schema", (DL_FUNC) &_arrow_RecordBatchReader__schema, 1},
     {"_arrow_RecordBatchReader__ReadNext", (DL_FUNC) &_arrow_RecordBatchReader__ReadNext, 1},
     {"_arrow_ipc___RecordBatchStreamReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__Open, 1},
+    {"_arrow_ipc___RecordBatchStreamReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__batches, 1},
     {"_arrow_ipc___RecordBatchFileReader__schema", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__schema, 1},
     {"_arrow_ipc___RecordBatchFileReader__num_record_batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__num_record_batches, 1},
     {"_arrow_ipc___RecordBatchFileReader__ReadRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__ReadRecordBatch, 2},
     {"_arrow_ipc___RecordBatchFileReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__Open, 1},
     {"_arrow_Table__from_RecordBatchFileReader", (DL_FUNC) &_arrow_Table__from_RecordBatchFileReader, 1},
     {"_arrow_Table__from_RecordBatchStreamReader", (DL_FUNC) &_arrow_Table__from_RecordBatchStreamReader, 1},
-    {"_arrow_ipc___RecordBatchFileWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileWriter__Open, 2},
-    {"_arrow_ipc___RecordBatchStreamWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamWriter__Open, 2},
-    {"_arrow_ipc___RecordBatchWriter__WriteRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteRecordBatch, 3},
+    {"_arrow_ipc___RecordBatchFileReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__batches, 1},
+    {"_arrow_ipc___RecordBatchWriter__WriteRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteRecordBatch, 2},
     {"_arrow_ipc___RecordBatchWriter__WriteTable", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteTable, 2},
     {"_arrow_ipc___RecordBatchWriter__Close", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__Close, 1},
+    {"_arrow_ipc___RecordBatchFileWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileWriter__Open, 2},
+    {"_arrow_ipc___RecordBatchStreamWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamWriter__Open, 2},
     {"_arrow_Table__from_dataframe", (DL_FUNC) &_arrow_Table__from_dataframe, 1},
     {"_arrow_Table__num_columns", (DL_FUNC) &_arrow_Table__num_columns, 1},
     {"_arrow_Table__num_rows", (DL_FUNC) &_arrow_Table__num_rows, 1},
     {"_arrow_Table__schema", (DL_FUNC) &_arrow_Table__schema, 1},
-    {"_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 1},
     {"_arrow_Table__column", (DL_FUNC) &_arrow_Table__column, 2},
+    {"_arrow_Table__columns", (DL_FUNC) &_arrow_Table__columns, 1},
+    {"_arrow_GetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_GetCpuThreadPoolCapacity, 0},
+    {"_arrow_SetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_SetCpuThreadPoolCapacity, 1},
     {NULL, NULL, 0}
 };
 
diff --git a/r/src/array.cpp b/r/src/array.cpp
index 038d7862f41f7..dd0d7e64a20bf 100644
--- a/r/src/array.cpp
+++ b/r/src/array.cpp
@@ -33,9 +33,6 @@ inline bool isna<REALSXP>(double x) {
   return ISNA(x);
 }
 
-// the integer64 sentinel
-constexpr int64_t NA_INT64 = std::numeric_limits<int64_t>::min();
-
 template <int RTYPE, typename Type>
 std::shared_ptr<Array> SimpleArray(SEXP x) {
   Rcpp::Vector<RTYPE> vec(x);
@@ -503,499 +500,6 @@ std::shared_ptr<arrow::Array> Array__from_vector(SEXP x) {
   return nullptr;
 }
 
-// ---------------------------- Array -> R vector
-
-namespace arrow {
-namespace r {
-
-template <typename Converter, typename... Args>
-SEXP ArrayVector_To_Vector(int64_t n, const ArrayVector& arrays, Args... args) {
-  Converter converter(n, std::forward<Args>(args)...);
-
-  R_xlen_t k = 0;
-  for (const auto& array : arrays) {
-    auto n_chunk = array->length();
-    converter.Ingest(array, k, n_chunk);
-    k += n_chunk;
-  }
-  return converter.data;
-}
-
-template <int RTYPE>
-struct Converter_SimpleArray {
-  using Vector = Rcpp::Vector<RTYPE>;
-
-  Converter_SimpleArray(R_xlen_t n) : data(no_init(n)) {}
-
-  void Ingest(const std::shared_ptr<arrow::Array>& array, R_xlen_t start, R_xlen_t n) {
-    using value_type = typename Vector::stored_type;
-    auto null_count = array->null_count();
-
-    if (n == null_count) {
-      std::fill_n(data.begin() + start, n, default_value<RTYPE>());
-    } else {
-      auto p_values = GetValuesSafely<value_type>(array->data(), 1, array->offset());
-      STOP_IF_NULL(p_values);
-
-      // first copy all the data
-      std::copy_n(p_values, n, data.begin() + start);
-
-      if (null_count) {
-        // then set the sentinel NA
-        arrow::internal::BitmapReader bitmap_reader(array->null_bitmap()->data(),
-                                                    array->offset(), n);
-
-        for (size_t i = 0; i < n; i++, bitmap_reader.Next()) {
-          if (bitmap_reader.IsNotSet()) {
-            data[i + start] = default_value<RTYPE>();
-          }
-        }
-      }
-    }
-  }
-
-  Vector data;
-};
-
-struct Converter_String {
-  Converter_String(R_xlen_t n) : data(n) {}
-
-  void Ingest(const std::shared_ptr<arrow::Array>& array, R_xlen_t start, R_xlen_t n) {
-    auto null_count = array->null_count();
-
-    if (null_count == n) {
-      std::fill_n(data.begin(), n, NA_STRING);
-    } else {
-      auto p_offset = GetValuesSafely<int32_t>(array->data(), 1, array->offset());
-      STOP_IF_NULL(p_offset);
-      auto p_data = GetValuesSafely<char>(array->data(), 2, *p_offset);
-      if (!p_data) {
-        // There is an offset buffer, but the data buffer is null
-        // There is at least one value in the array and not all the values are null
-        // That means all values are empty strings so there is nothing to do
-        return;
-      }
-
-      if (null_count) {
-        // need to watch for nulls
-        arrow::internal::BitmapReader null_reader(array->null_bitmap_data(),
-                                                  array->offset(), n);
-        for (int i = 0; i < n; i++, null_reader.Next()) {
-          if (null_reader.IsSet()) {
-            auto diff = p_offset[i + 1] - p_offset[i];
-            SET_STRING_ELT(data, start + i, Rf_mkCharLenCE(p_data, diff, CE_UTF8));
-            p_data += diff;
-          } else {
-            SET_STRING_ELT(data, start + i, NA_STRING);
-          }
-        }
-
-      } else {
-        // no need to check for nulls
-        // TODO: altrep mark this as no na
-        for (int i = 0; i < n; i++) {
-          auto diff = p_offset[i + 1] - p_offset[i];
-          SET_STRING_ELT(data, start + i, Rf_mkCharLenCE(p_data, diff, CE_UTF8));
-          p_data += diff;
-        }
-      }
-    }
-  }
-
-  CharacterVector data;
-};
-
-struct Converter_Boolean {
-  Converter_Boolean(R_xlen_t n) : data(n) {}
-
-  void Ingest(const std::shared_ptr<arrow::Array>& array, R_xlen_t start, R_xlen_t n) {
-    auto null_count = array->null_count();
-
-    if (n == null_count) {
-      std::fill_n(data.begin() + start, n, NA_LOGICAL);
-    } else {
-      // process the data
-      auto p_data = GetValuesSafely<uint8_t>(array->data(), 1, 0);
-      STOP_IF_NULL(p_data);
-
-      arrow::internal::BitmapReader data_reader(p_data, array->offset(), n);
-      for (size_t i = 0; i < n; i++, data_reader.Next()) {
-        data[start + i] = data_reader.IsSet();
-      }
-
-      // then the null bitmap if needed
-      if (null_count) {
-        arrow::internal::BitmapReader null_reader(array->null_bitmap()->data(),
-                                                  array->offset(), n);
-        for (size_t i = 0; i < n; i++, null_reader.Next()) {
-          if (null_reader.IsNotSet()) {
-            data[start + i] = NA_LOGICAL;
-          }
-        }
-      }
-    }
-  }
-
-  LogicalVector data;
-};
-
-template <typename Type>
-struct Converter_Dictionary_Int32Indices {
-  Converter_Dictionary_Int32Indices(R_xlen_t n, const std::shared_ptr<arrow::Array>& dict,
-                                    bool ordered)
-      : data(no_init(n)) {
-    data.attr("levels") = ArrayVector_To_Vector<Converter_String>(dict->length(), {dict});
-    if (ordered) {
-      data.attr("class") = CharacterVector::create("ordered", "factor");
-    } else {
-      data.attr("class") = "factor";
-    }
-  }
-
-  void Ingest(const std::shared_ptr<arrow::Array>& array, R_xlen_t start, R_xlen_t n) {
-    DictionaryArray* dict_array = static_cast<DictionaryArray*>(array.get());
-    using value_type = typename arrow::TypeTraits<Type>::ArrayType::value_type;
-    auto null_count = array->null_count();
-
-    if (n == null_count) {
-      std::fill_n(data.begin() + start, n, NA_INTEGER);
-    } else {
-      std::shared_ptr<Array> indices = dict_array->indices();
-      auto p_array = GetValuesSafely<value_type>(indices->data(), 1, indices->offset());
-      STOP_IF_NULL(p_array);
-
-      if (array->null_count()) {
-        arrow::internal::BitmapReader bitmap_reader(indices->null_bitmap()->data(),
-                                                    indices->offset(), n);
-        for (size_t i = 0; i < n; i++, bitmap_reader.Next(), ++p_array) {
-          data[start + i] =
-              bitmap_reader.IsNotSet() ? NA_INTEGER : (static_cast<int>(*p_array) + 1);
-        }
-      } else {
-        std::transform(
-            p_array, p_array + n, data.begin() + start,
-            [](const value_type value) { return static_cast<int>(value) + 1; });
-      }
-    }
-  }
-
-  IntegerVector data;
-};
-
-struct Converter_Date64 {
-  Converter_Date64(R_xlen_t n) : data(n) {
-    data.attr("class") = CharacterVector::create("POSIXct", "POSIXt");
-  }
-
-  void Ingest(const std::shared_ptr<arrow::Array>& array, R_xlen_t start, R_xlen_t n) {
-    auto null_count = array->null_count();
-    if (null_count == n) {
-      std::fill_n(data.begin() + start, n, NA_REAL);
-    } else {
-      auto p_values = GetValuesSafely<int64_t>(array->data(), 1, array->offset());
-      STOP_IF_NULL(p_values);
-      auto p_vec = data.begin() + start;
-
-      // convert DATE64 milliseconds to R seconds (stored as double)
-      auto seconds = [](int64_t ms) { return static_cast<double>(ms / 1000); };
-
-      if (null_count) {
-        arrow::internal::BitmapReader bitmap_reader(array->null_bitmap()->data(),
-                                                    array->offset(), n);
-        for (size_t i = 0; i < n; i++, bitmap_reader.Next(), ++p_vec, ++p_values) {
-          *p_vec = bitmap_reader.IsSet() ? seconds(*p_values) : NA_REAL;
-        }
-      } else {
-        std::transform(p_values, p_values + n, p_vec, seconds);
-      }
-    }
-  }
-
-  NumericVector data;
-};
-
-template <int RTYPE, typename Type>
-struct Converter_Promotion {
-  using r_stored_type = typename Rcpp::Vector<RTYPE>::stored_type;
-  using value_type = typename TypeTraits<Type>::ArrayType::value_type;
-
-  Converter_Promotion(R_xlen_t n) : data(no_init(n)) {}
-
-  void Ingest(const std::shared_ptr<arrow::Array>& array, R_xlen_t start, R_xlen_t n) {
-    auto null_count = array->null_count();
-    if (null_count == n) {
-      std::fill_n(data.begin() + start, n, default_value<RTYPE>());
-    } else {
-      auto p_values = GetValuesSafely<value_type>(array->data(), 1, array->offset());
-      STOP_IF_NULL(p_values);
-
-      auto value_convert = [](value_type value) {
-        return static_cast<r_stored_type>(value);
-      };
-      if (null_count) {
-        internal::BitmapReader bitmap_reader(array->null_bitmap()->data(),
-                                             array->offset(), n);
-        for (size_t i = 0; i < n; i++, bitmap_reader.Next()) {
-          data[start + i] = bitmap_reader.IsNotSet() ? Rcpp::Vector<RTYPE>::get_na()
-                                                     : value_convert(p_values[i]);
-        }
-      } else {
-        std::transform(p_values, p_values + n, data.begin(), value_convert);
-      }
-    }
-  }
-
-  Rcpp::Vector<RTYPE> data;
-};
-
-template <typename value_type>
-struct Converter_Time {
-  Converter_Time(int64_t n, int32_t multiplier, CharacterVector classes)
-      : data(no_init(n)), multiplier_(multiplier) {
-    data.attr("class") = classes;
-  }
-
-  Converter_Time(int64_t n, int32_t multiplier)
-      : data(no_init(n)), multiplier_(multiplier) {
-    data.attr("class") = CharacterVector::create("hms", "difftime");
-    data.attr("units") = "secs";
-  }
-
-  void Ingest(const std::shared_ptr<arrow::Array>& array, R_xlen_t start, R_xlen_t n) {
-    auto null_count = array->null_count();
-    if (n == null_count) {
-      std::fill_n(data.begin() + start, n, NA_REAL);
-    } else {
-      auto p_values = GetValuesSafely<value_type>(array->data(), 1, array->offset());
-      STOP_IF_NULL(p_values);
-      auto p_vec = data.begin() + start;
-      auto convert = [this](value_type value) {
-        return static_cast<double>(value) / multiplier_;
-      };
-      if (null_count) {
-        arrow::internal::BitmapReader bitmap_reader(array->null_bitmap()->data(),
-                                                    array->offset(), n);
-        for (size_t i = 0; i < n; i++, bitmap_reader.Next(), ++p_vec, ++p_values) {
-          *p_vec = bitmap_reader.IsSet() ? convert(*p_values) : NA_REAL;
-        }
-      } else {
-        std::transform(p_values, p_values + n, p_vec, convert);
-      }
-    }
-  }
-
-  NumericVector data;
-  int32_t multiplier_;
-};
-
-template <typename value_type>
-struct Converter_TimeStamp : Converter_Time<value_type> {
-  Converter_TimeStamp(int64_t n, int32_t multiplier)
-      : Converter_Time<value_type>(n, multiplier,
-                                   CharacterVector::create("POSIXct", "POSIXt")) {}
-};
-
-struct Converter_Int64 {
-  Converter_Int64(R_xlen_t n) : data(no_init(n)) { data.attr("class") = "integer64"; }
-
-  void Ingest(const std::shared_ptr<arrow::Array>& array, R_xlen_t start, R_xlen_t n) {
-    auto null_count = array->null_count();
-    if (null_count == n) {
-      std::fill_n(reinterpret_cast<int64_t*>(data.begin()) + start, n, NA_INT64);
-    } else {
-      auto p_values = GetValuesSafely<int64_t>(array->data(), 1, array->offset());
-      STOP_IF_NULL(p_values);
-      auto p_vec = reinterpret_cast<int64_t*>(data.begin()) + start;
-
-      if (array->null_count()) {
-        internal::BitmapReader bitmap_reader(array->null_bitmap()->data(),
-                                             array->offset(), n);
-        for (size_t i = 0; i < n; i++, bitmap_reader.Next()) {
-          p_vec[i] = bitmap_reader.IsNotSet() ? NA_INT64 : p_values[i];
-        }
-      } else {
-        std::copy_n(p_values, n, p_vec);
-      }
-    }
-  }
-
-  NumericVector data;
-};
-
-SEXP DictionaryArrays_to_Vector(int64_t n, const ArrayVector& arrays) {
-  DictionaryArray* dict_array = static_cast<DictionaryArray*>(arrays[0].get());
-  auto dict = dict_array->dictionary();
-  auto indices = dict_array->indices();
-
-  if (dict->type_id() != Type::STRING) {
-    stop("Cannot convert Dictionary Array of type `%s` to R",
-         dict_array->type()->ToString());
-  }
-  bool ordered = dict_array->dict_type()->ordered();
-  switch (indices->type_id()) {
-    case Type::UINT8:
-      return ArrayVector_To_Vector<Converter_Dictionary_Int32Indices<arrow::UInt8Type>>(
-          n, arrays, dict, ordered);
-
-    case Type::INT8:
-      return ArrayVector_To_Vector<Converter_Dictionary_Int32Indices<arrow::Int8Type>>(
-          n, arrays, dict, ordered);
-
-    case Type::UINT16:
-      return ArrayVector_To_Vector<Converter_Dictionary_Int32Indices<arrow::UInt16Type>>(
-          n, arrays, dict, ordered);
-
-    case Type::INT16:
-      return ArrayVector_To_Vector<Converter_Dictionary_Int32Indices<arrow::Int16Type>>(
-          n, arrays, dict, ordered);
-
-    case Type::INT32:
-      return ArrayVector_To_Vector<Converter_Dictionary_Int32Indices<arrow::Int32Type>>(
-          n, arrays, dict, ordered);
-
-    default:
-      stop("Cannot convert Dictionary Array of type `%s` to R",
-           dict_array->type()->ToString());
-  }
-  return R_NilValue;
-}
-
-SEXP Date32ArrayVector_to_Vector(int64_t n, const ArrayVector& arrays) {
-  IntegerVector out(
-      arrow::r::ArrayVector_To_Vector<Converter_SimpleArray<INTSXP>>(n, arrays));
-  out.attr("class") = "Date";
-  return out;
-}
-
-struct Converter_Decimal {
-  Converter_Decimal(R_xlen_t n) : data(no_init(n)) {}
-
-  void Ingest(const std::shared_ptr<arrow::Array>& array, R_xlen_t start, R_xlen_t n) {
-    auto null_count = array->null_count();
-    if (n == null_count) {
-      std::fill_n(data.begin() + start, n, NA_REAL);
-    } else {
-      auto p_vec = reinterpret_cast<double*>(data.begin()) + start;
-      const auto& decimals_arr =
-          internal::checked_cast<const arrow::Decimal128Array&>(*array);
-
-      if (array->null_count()) {
-        internal::BitmapReader bitmap_reader(array->null_bitmap()->data(),
-                                             array->offset(), n);
-
-        for (size_t i = 0; i < n; i++, bitmap_reader.Next()) {
-          p_vec[i] = bitmap_reader.IsNotSet()
-                         ? NA_REAL
-                         : std::stod(decimals_arr.FormatValue(i).c_str());
-        }
-      } else {
-        for (size_t i = 0; i < n; i++) {
-          p_vec[i] = std::stod(decimals_arr.FormatValue(i).c_str());
-        }
-      }
-    }
-  }
-
-  NumericVector data;
-};
-
-}  // namespace r
-}  // namespace arrow
-
-SEXP ArrayVector__as_vector(int64_t n, const ArrayVector& arrays) {
-  using namespace arrow::r;
-
-  switch (arrays[0]->type_id()) {
-    // direct support
-    case Type::INT8:
-      return ArrayVector_To_Vector<Converter_SimpleArray<RAWSXP>>(n, arrays);
-    case Type::INT32:
-      return ArrayVector_To_Vector<Converter_SimpleArray<INTSXP>>(n, arrays);
-    case Type::DOUBLE:
-      return ArrayVector_To_Vector<Converter_SimpleArray<REALSXP>>(n, arrays);
-
-    // need to handle 1-bit case
-    case Type::BOOL:
-      return ArrayVector_To_Vector<Converter_Boolean>(n, arrays);
-
-      // handle memory dense strings
-    case Type::STRING:
-      return ArrayVector_To_Vector<Converter_String>(n, arrays);
-    case Type::DICTIONARY:
-      return DictionaryArrays_to_Vector(n, arrays);
-
-    case Type::DATE32:
-      return Date32ArrayVector_to_Vector(n, arrays);
-    case Type::DATE64:
-      return ArrayVector_To_Vector<Converter_Date64>(n, arrays);
-
-      // promotions to integer vector
-    case Type::UINT8:
-      return ArrayVector_To_Vector<Converter_Promotion<INTSXP, arrow::UInt8Type>>(n,
-                                                                                  arrays);
-    case Type::INT16:
-      return ArrayVector_To_Vector<Converter_Promotion<INTSXP, arrow::Int16Type>>(n,
-                                                                                  arrays);
-    case Type::UINT16:
-      return ArrayVector_To_Vector<Converter_Promotion<INTSXP, arrow::UInt16Type>>(
-          n, arrays);
-
-      // promotions to numeric vector
-    case Type::UINT32:
-      return ArrayVector_To_Vector<Converter_Promotion<REALSXP, arrow::UInt32Type>>(
-          n, arrays);
-    case Type::HALF_FLOAT:
-      return ArrayVector_To_Vector<Converter_Promotion<REALSXP, arrow::HalfFloatType>>(
-          n, arrays);
-    case Type::FLOAT:
-      return ArrayVector_To_Vector<Converter_Promotion<REALSXP, arrow::FloatType>>(
-          n, arrays);
-
-      // time32 ane time64
-    case Type::TIME32:
-      return ArrayVector_To_Vector<Converter_Time<int32_t>>(
-          n, arrays,
-          static_cast<TimeType*>(arrays[0]->type().get())->unit() == TimeUnit::SECOND
-              ? 1
-              : 1000);
-
-    case Type::TIME64:
-      return ArrayVector_To_Vector<Converter_Time<int64_t>>(
-          n, arrays,
-          static_cast<TimeType*>(arrays[0]->type().get())->unit() == TimeUnit::MICRO
-              ? 1000000
-              : 1000000000);
-
-    case Type::TIMESTAMP:
-      return ArrayVector_To_Vector<Converter_TimeStamp<int64_t>>(
-          n, arrays,
-          static_cast<TimeType*>(arrays[0]->type().get())->unit() == TimeUnit::MICRO
-              ? 1000000
-              : 1000000000);
-
-    case Type::INT64:
-      return ArrayVector_To_Vector<Converter_Int64>(n, arrays);
-    case Type::DECIMAL:
-      return ArrayVector_To_Vector<Converter_Decimal>(n, arrays);
-
-    default:
-      break;
-  }
-
-  stop(tfm::format("cannot handle Array of type %s", arrays[0]->type()->name()));
-  return R_NilValue;
-}
-
-// [[Rcpp::export]]
-SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array) {
-  return ArrayVector__as_vector(array->length(), {array});
-}
-
-// [[Rcpp::export]]
-SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
-  return ArrayVector__as_vector(chunked_array->length(), chunked_array->chunks());
-}
-
 // [[Rcpp::export]]
 std::shared_ptr<arrow::Array> Array__Slice1(const std::shared_ptr<arrow::Array>& array,
                                             int offset) {
diff --git a/r/src/array__to_vector.cpp b/r/src/array__to_vector.cpp
new file mode 100644
index 0000000000000..c531933c04d52
--- /dev/null
+++ b/r/src/array__to_vector.cpp
@@ -0,0 +1,697 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/util/parallel.h>
+#include <arrow/util/task-group.h>
+#include "arrow_types.h"
+
+using namespace Rcpp;
+using namespace arrow;
+
+namespace arrow {
+namespace r {
+
+class Converter {
+ public:
+  Converter(const ArrayVector& arrays) : arrays_(arrays) {}
+
+  virtual ~Converter() {}
+
+  // Allocate a vector of the right R type for this converter
+  virtual SEXP Allocate(R_xlen_t n) const = 0;
+
+  // data[ start:(start + n) ] = NA
+  virtual Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const = 0;
+
+  // ingest the values from the array into data[ start : (start + n)]
+  virtual Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
+                                   R_xlen_t start, R_xlen_t n) const = 0;
+
+  // ingest one array
+  Status IngestOne(SEXP data, const std::shared_ptr<arrow::Array>& array, R_xlen_t start,
+                   R_xlen_t n) const {
+    if (array->null_count() == n) {
+      return Ingest_all_nulls(data, start, n);
+    } else {
+      return Ingest_some_nulls(data, array, start, n);
+    }
+  }
+
+  // can this run in parallel ?
+  virtual bool Parallel() const { return true; }
+
+  // Ingest all the arrays serially
+  Status IngestSerial(SEXP data) {
+    R_xlen_t k = 0;
+    for (const auto& array : arrays_) {
+      auto n_chunk = array->length();
+      RETURN_NOT_OK(IngestOne(data, array, k, n_chunk));
+      k += n_chunk;
+    }
+    return Status::OK();
+  }
+
+  // ingest the arrays in parallel
+  //
+  // for each array, add a task to the task group
+  //
+  // The task group is Finish() iun the caller
+  void IngestParallel(SEXP data, const std::shared_ptr<arrow::internal::TaskGroup>& tg) {
+    R_xlen_t k = 0;
+    for (const auto& array : arrays_) {
+      auto n_chunk = array->length();
+      tg->Append([=] { return IngestOne(data, array, k, n_chunk); });
+      k += n_chunk;
+    }
+  }
+
+  // Converter factory
+  static std::shared_ptr<Converter> Make(const ArrayVector& arrays);
+
+ protected:
+  const ArrayVector& arrays_;
+};
+
+// data[start:(start+n)] = NA
+template <int RTYPE>
+Status AllNull_Ingest(SEXP data, R_xlen_t start, R_xlen_t n) {
+  auto p_data = Rcpp::internal::r_vector_start<RTYPE>(data) + start;
+  std::fill_n(p_data, n, default_value<RTYPE>());
+  return Status::OK();
+}
+
+// ingest the data from `array` into a slice of `data`
+//
+// each element goes through `lambda` when some conversion is needed
+template <int RTYPE, typename array_value_type, typename Lambda>
+Status SomeNull_Ingest(SEXP data, R_xlen_t start, R_xlen_t n,
+                       const array_value_type* p_values,
+                       const std::shared_ptr<arrow::Array>& array, Lambda lambda) {
+  if (!p_values) {
+    return Status::Invalid("Invalid data buffer");
+  }
+  auto p_data = Rcpp::internal::r_vector_start<RTYPE>(data) + start;
+
+  if (array->null_count()) {
+    arrow::internal::BitmapReader bitmap_reader(array->null_bitmap()->data(),
+                                                array->offset(), n);
+    for (size_t i = 0; i < n; i++, bitmap_reader.Next(), ++p_data, ++p_values) {
+      *p_data = bitmap_reader.IsSet() ? lambda(*p_values) : default_value<RTYPE>();
+    }
+  } else {
+    std::transform(p_values, p_values + n, p_data, lambda);
+  }
+
+  return Status::OK();
+}
+
+// Allocate + Ingest
+SEXP ArrayVector__as_vector(R_xlen_t n, const ArrayVector& arrays) {
+  auto converter = Converter::Make(arrays);
+  Shield<SEXP> data(converter->Allocate(n));
+  STOP_IF_NOT_OK(converter->IngestSerial(data));
+  return data;
+}
+
+template <int RTYPE>
+class Converter_SimpleArray : public Converter {
+  using Vector = Rcpp::Vector<RTYPE, Rcpp::NoProtectStorage>;
+  using value_type = typename Vector::stored_type;
+
+ public:
+  Converter_SimpleArray(const ArrayVector& arrays) : Converter(arrays) {}
+
+  SEXP Allocate(R_xlen_t n) const { return Vector(no_init(n)); }
+
+  Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const {
+    return AllNull_Ingest<RTYPE>(data, start, n);
+  }
+
+  Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
+                           R_xlen_t start, R_xlen_t n) const {
+    auto p_values = array->data()->GetValues<value_type>(1);
+    auto echo = [](value_type value) { return value; };
+    return SomeNull_Ingest<RTYPE, value_type>(data, start, n, p_values, array, echo);
+  }
+};
+
+class Converter_Date32 : public Converter_SimpleArray<INTSXP> {
+ public:
+  Converter_Date32(const ArrayVector& arrays) : Converter_SimpleArray<INTSXP>(arrays) {}
+
+  SEXP Allocate(R_xlen_t n) const {
+    IntegerVector data(no_init(n));
+    data.attr("class") = "Date";
+    return data;
+  }
+};
+
+struct Converter_String : public Converter {
+ public:
+  Converter_String(const ArrayVector& arrays) : Converter(arrays) {}
+
+  SEXP Allocate(R_xlen_t n) const { return StringVector_(no_init(n)); }
+
+  Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const {
+    return AllNull_Ingest<STRSXP>(data, start, n);
+  }
+
+  Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
+                           R_xlen_t start, R_xlen_t n) const {
+    auto p_offset = array->data()->GetValues<int32_t>(1);
+    if (!p_offset) {
+      return Status::Invalid("Invalid offset buffer");
+    }
+    auto p_strings = array->data()->GetValues<char>(2, *p_offset);
+    if (!p_strings) {
+      // There is an offset buffer, but the data buffer is null
+      // There is at least one value in the array and not all the values are null
+      // That means all values are either empty strings or nulls so there is nothing to do
+
+      if (array->null_count()) {
+        arrow::internal::BitmapReader null_reader(array->null_bitmap_data(),
+                                                  array->offset(), n);
+        for (int i = 0; i < n; i++, null_reader.Next()) {
+          if (null_reader.IsNotSet()) {
+            SET_STRING_ELT(data, start + i, NA_STRING);
+          }
+        }
+      }
+      return Status::OK();
+    }
+
+    arrow::StringArray* string_array = static_cast<arrow::StringArray*>(array.get());
+    if (array->null_count()) {
+      // need to watch for nulls
+      arrow::internal::BitmapReader null_reader(array->null_bitmap_data(),
+                                                array->offset(), n);
+      for (int i = 0; i < n; i++, null_reader.Next()) {
+        if (null_reader.IsSet()) {
+          SET_STRING_ELT(data, start + i, r_string(string_array->GetString(i)));
+        } else {
+          SET_STRING_ELT(data, start + i, NA_STRING);
+        }
+      }
+
+    } else {
+      for (int i = 0; i < n; i++) {
+        SET_STRING_ELT(data, start + i, r_string(string_array->GetString(i)));
+      }
+    }
+
+    return Status::OK();
+  }
+
+  bool Parallel() const { return false; }
+
+  inline SEXP r_string(const arrow::util::string_view& view) const {
+    return Rf_mkCharLenCE(view.data(), view.size(), CE_UTF8);
+  }
+};
+
+class Converter_Boolean : public Converter {
+ public:
+  Converter_Boolean(const ArrayVector& arrays) : Converter(arrays) {}
+
+  SEXP Allocate(R_xlen_t n) const { return LogicalVector_(no_init(n)); }
+
+  Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const {
+    return AllNull_Ingest<LGLSXP>(data, start, n);
+  }
+
+  Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
+                           R_xlen_t start, R_xlen_t n) const {
+    auto p_data = Rcpp::internal::r_vector_start<LGLSXP>(data) + start;
+    auto p_bools = array->data()->GetValues<uint8_t>(1, 0);
+    if (!p_bools) {
+      return Status::Invalid("Invalid data buffer");
+    }
+
+    arrow::internal::BitmapReader data_reader(p_bools, array->offset(), n);
+    if (array->null_count()) {
+      arrow::internal::BitmapReader null_reader(array->null_bitmap()->data(),
+                                                array->offset(), n);
+
+      for (size_t i = 0; i < n; i++, data_reader.Next(), null_reader.Next(), ++p_data) {
+        *p_data = null_reader.IsSet() ? data_reader.IsSet() : NA_LOGICAL;
+      }
+    } else {
+      for (size_t i = 0; i < n; i++, data_reader.Next(), ++p_data) {
+        *p_data = data_reader.IsSet();
+      }
+    }
+
+    return Status::OK();
+  }
+};
+
+class Converter_Dictionary : public Converter {
+ public:
+  Converter_Dictionary(const ArrayVector& arrays) : Converter(arrays) {}
+
+  SEXP Allocate(R_xlen_t n) const {
+    IntegerVector data(no_init(n));
+    auto dict_array = static_cast<DictionaryArray*>(Converter::arrays_[0].get());
+    auto dict = dict_array->dictionary();
+    auto indices = dict_array->indices();
+    switch (indices->type_id()) {
+      case Type::UINT8:
+      case Type::INT8:
+      case Type::UINT16:
+      case Type::INT16:
+      case Type::INT32:
+        break;
+      default:
+        stop("Cannot convert Dictionary Array of type `%s` to R",
+             dict_array->type()->ToString());
+    }
+
+    if (dict->type_id() != Type::STRING) {
+      stop("Cannot convert Dictionary Array of type `%s` to R",
+           dict_array->type()->ToString());
+    }
+    bool ordered = dict_array->dict_type()->ordered();
+
+    data.attr("levels") = ArrayVector__as_vector(dict->length(), {dict});
+    if (ordered) {
+      data.attr("class") = CharacterVector::create("ordered", "factor");
+    } else {
+      data.attr("class") = "factor";
+    }
+    return data;
+  }
+
+  Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const {
+    return AllNull_Ingest<INTSXP>(data, start, n);
+  }
+
+  Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
+                           R_xlen_t start, R_xlen_t n) const {
+    DictionaryArray* dict_array = static_cast<DictionaryArray*>(array.get());
+    auto indices = dict_array->indices();
+    switch (indices->type_id()) {
+      case Type::UINT8:
+        return Ingest_some_nulls_Impl<arrow::UInt8Type>(data, array, start, n);
+      case Type::INT8:
+        return Ingest_some_nulls_Impl<arrow::Int8Type>(data, array, start, n);
+      case Type::UINT16:
+        return Ingest_some_nulls_Impl<arrow::UInt16Type>(data, array, start, n);
+      case Type::INT16:
+        return Ingest_some_nulls_Impl<arrow::Int16Type>(data, array, start, n);
+      case Type::INT32:
+        return Ingest_some_nulls_Impl<arrow::Int32Type>(data, array, start, n);
+      default:
+        break;
+    }
+    return Status::OK();
+  }
+
+ private:
+  template <typename Type>
+  Status Ingest_some_nulls_Impl(SEXP data, const std::shared_ptr<arrow::Array>& array,
+                                R_xlen_t start, R_xlen_t n) const {
+    using value_type = typename arrow::TypeTraits<Type>::ArrayType::value_type;
+
+    std::shared_ptr<Array> indices =
+        static_cast<DictionaryArray*>(array.get())->indices();
+
+    // convert the 0-based indices from the arrow Array
+    // to 1-based indices used in R factors
+    auto to_r_index = [](value_type value) { return static_cast<int>(value) + 1; };
+
+    return SomeNull_Ingest<INTSXP, value_type>(
+        data, start, n, indices->data()->GetValues<value_type>(1), indices, to_r_index);
+  }
+};
+
+double ms_to_seconds(int64_t ms) { return static_cast<double>(ms / 1000); }
+
+class Converter_Date64 : public Converter {
+ public:
+  Converter_Date64(const ArrayVector& arrays) : Converter(arrays) {}
+
+  SEXP Allocate(R_xlen_t n) const {
+    NumericVector data(no_init(n));
+    data.attr("class") = CharacterVector::create("POSIXct", "POSIXt");
+    return data;
+  }
+
+  Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const {
+    return AllNull_Ingest<REALSXP>(data, start, n);
+  }
+
+  Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
+                           R_xlen_t start, R_xlen_t n) const {
+    auto convert = [](int64_t ms) { return static_cast<double>(ms / 1000); };
+    return SomeNull_Ingest<REALSXP, int64_t>(
+        data, start, n, array->data()->GetValues<int64_t>(1), array, convert);
+  }
+};
+
+template <int RTYPE, typename Type>
+class Converter_Promotion : public Converter {
+  using r_stored_type = typename Rcpp::Vector<RTYPE>::stored_type;
+  using value_type = typename TypeTraits<Type>::ArrayType::value_type;
+
+ public:
+  Converter_Promotion(const ArrayVector& arrays) : Converter(arrays) {}
+
+  SEXP Allocate(R_xlen_t n) const {
+    return Rcpp::Vector<RTYPE, NoProtectStorage>(no_init(n));
+  }
+
+  Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const {
+    return AllNull_Ingest<RTYPE>(data, start, n);
+  }
+
+  Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
+                           R_xlen_t start, R_xlen_t n) const {
+    auto convert = [](value_type value) { return static_cast<r_stored_type>(value); };
+    return SomeNull_Ingest<RTYPE, value_type>(
+        data, start, n, array->data()->GetValues<value_type>(1), array, convert);
+  }
+
+ private:
+  static r_stored_type value_convert(value_type value) {
+    return static_cast<r_stored_type>(value);
+  }
+};
+
+template <typename value_type>
+class Converter_Time : public Converter {
+ public:
+  Converter_Time(const ArrayVector& arrays) : Converter(arrays) {}
+
+  SEXP Allocate(R_xlen_t n) const {
+    NumericVector data(no_init(n));
+    data.attr("class") = CharacterVector::create("hms", "difftime");
+    data.attr("units") = CharacterVector::create("secs");
+    return data;
+  }
+
+  Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const {
+    return AllNull_Ingest<REALSXP>(data, start, n);
+  }
+
+  Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
+                           R_xlen_t start, R_xlen_t n) const {
+    int multiplier = TimeUnit_multiplier(array);
+    auto convert = [=](value_type value) {
+      return static_cast<double>(value) / multiplier;
+    };
+    return SomeNull_Ingest<REALSXP, value_type>(
+        data, start, n, array->data()->GetValues<value_type>(1), array, convert);
+  }
+
+ private:
+  int TimeUnit_multiplier(const std::shared_ptr<Array>& array) const {
+    switch (static_cast<TimeType*>(array->type().get())->unit()) {
+      case TimeUnit::SECOND:
+        return 1;
+      case TimeUnit::MILLI:
+        return 1000;
+      case TimeUnit::MICRO:
+        return 1000000;
+      case TimeUnit::NANO:
+        return 1000000000;
+    }
+  }
+};
+
+template <typename value_type>
+class Converter_Timestamp : public Converter_Time<value_type> {
+ public:
+  Converter_Timestamp(const ArrayVector& arrays) : Converter_Time<value_type>(arrays) {}
+
+  SEXP Allocate(R_xlen_t n) const {
+    NumericVector data(no_init(n));
+    data.attr("class") = CharacterVector::create("POSIXct", "POSIXt");
+    return data;
+  }
+};
+
+class Converter_Decimal : public Converter {
+ public:
+  Converter_Decimal(const ArrayVector& arrays) : Converter(arrays) {}
+
+  SEXP Allocate(R_xlen_t n) const { return NumericVector_(no_init(n)); }
+
+  Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const {
+    return AllNull_Ingest<REALSXP>(data, start, n);
+  }
+
+  Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
+                           R_xlen_t start, R_xlen_t n) const {
+    auto p_data = Rcpp::internal::r_vector_start<REALSXP>(data) + start;
+    const auto& decimals_arr =
+        internal::checked_cast<const arrow::Decimal128Array&>(*array);
+
+    internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), array->offset(),
+                                         n);
+
+    for (size_t i = 0; i < n; i++, bitmap_reader.Next(), ++p_data) {
+      *p_data = bitmap_reader.IsSet() ? std::stod(decimals_arr.FormatValue(i).c_str())
+                                      : NA_REAL;
+    }
+
+    return Status::OK();
+  }
+};
+
+class Converter_Int64 : public Converter {
+ public:
+  Converter_Int64(const ArrayVector& arrays) : Converter(arrays) {}
+
+  SEXP Allocate(R_xlen_t n) const {
+    NumericVector data(no_init(n));
+    data.attr("class") = "integer64";
+    return data;
+  }
+
+  Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const {
+    auto p_data = reinterpret_cast<int64_t*>(REAL(data)) + start;
+    std::fill_n(p_data, n, NA_INT64);
+    return Status::OK();
+  }
+
+  Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
+                           R_xlen_t start, R_xlen_t n) const {
+    auto p_values = array->data()->GetValues<int64_t>(1);
+    if (!p_values) {
+      return Status::Invalid("Invalid data buffer");
+    }
+
+    auto p_data = reinterpret_cast<int64_t*>(REAL(data)) + start;
+
+    if (array->null_count()) {
+      internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), array->offset(),
+                                           n);
+      for (size_t i = 0; i < n; i++, bitmap_reader.Next(), ++p_data) {
+        *p_data = bitmap_reader.IsSet() ? p_values[i] : NA_INT64;
+      }
+    } else {
+      std::copy_n(p_values, n, p_data);
+    }
+
+    return Status::OK();
+  }
+};
+
+std::shared_ptr<Converter> Converter::Make(const ArrayVector& arrays) {
+  using namespace arrow::r;
+
+  switch (arrays[0]->type_id()) {
+    // direct support
+    case Type::INT8:
+      return std::make_shared<Converter_SimpleArray<RAWSXP>>(arrays);
+
+    case Type::INT32:
+      return std::make_shared<Converter_SimpleArray<INTSXP>>(arrays);
+
+    case Type::DOUBLE:
+      return std::make_shared<Converter_SimpleArray<REALSXP>>(arrays);
+
+      // need to handle 1-bit case
+    case Type::BOOL:
+      return std::make_shared<Converter_Boolean>(arrays);
+
+      // handle memory dense strings
+    case Type::STRING:
+      return std::make_shared<Converter_String>(arrays);
+
+    case Type::DICTIONARY:
+      return std::make_shared<Converter_Dictionary>(arrays);
+
+    case Type::DATE32:
+      return std::make_shared<Converter_Date32>(arrays);
+
+    case Type::DATE64:
+      return std::make_shared<Converter_Date64>(arrays);
+
+      // promotions to integer vector
+    case Type::UINT8:
+      return std::make_shared<Converter_Promotion<INTSXP, arrow::UInt8Type>>(arrays);
+
+    case Type::INT16:
+      return std::make_shared<Converter_Promotion<INTSXP, arrow::Int16Type>>(arrays);
+
+    case Type::UINT16:
+      return std::make_shared<Converter_Promotion<INTSXP, arrow::UInt16Type>>(arrays);
+
+      // promotions to numeric vector
+    case Type::UINT32:
+      return std::make_shared<Converter_Promotion<REALSXP, arrow::UInt32Type>>(arrays);
+
+    case Type::HALF_FLOAT:
+      return std::make_shared<Converter_Promotion<REALSXP, arrow::HalfFloatType>>(arrays);
+
+    case Type::FLOAT:
+      return std::make_shared<Converter_Promotion<REALSXP, arrow::FloatType>>(arrays);
+
+      // time32 ane time64
+    case Type::TIME32:
+      return std::make_shared<Converter_Time<int32_t>>(arrays);
+
+    case Type::TIME64:
+      return std::make_shared<Converter_Time<int64_t>>(arrays);
+
+    case Type::TIMESTAMP:
+      return std::make_shared<Converter_Timestamp<int64_t>>(arrays);
+
+    case Type::INT64:
+      return std::make_shared<Converter_Int64>(arrays);
+
+    case Type::DECIMAL:
+      return std::make_shared<Converter_Decimal>(arrays);
+
+    default:
+      break;
+  }
+
+  stop(tfm::format("cannot handle Array of type %s", arrays[0]->type()->name()));
+  return nullptr;
+}
+
+List to_dataframe_serial(int64_t nr, int64_t nc, const CharacterVector& names,
+                         const std::vector<std::shared_ptr<Converter>>& converters) {
+  List tbl(nc);
+
+  for (int i = 0; i < nc; i++) {
+    SEXP column = tbl[i] = converters[i]->Allocate(nr);
+    STOP_IF_NOT_OK(converters[i]->IngestSerial(column));
+  }
+  tbl.attr("names") = names;
+  tbl.attr("class") = CharacterVector::create("tbl_df", "tbl", "data.frame");
+  tbl.attr("row.names") = IntegerVector::create(NA_INTEGER, -nr);
+  return tbl;
+}
+
+List to_dataframe_parallel(int64_t nr, int64_t nc, const CharacterVector& names,
+                           const std::vector<std::shared_ptr<Converter>>& converters) {
+  List tbl(nc);
+
+  // task group to ingest data in parallel
+  auto tg = arrow::internal::TaskGroup::MakeThreaded(arrow::internal::GetCpuThreadPool());
+
+  // allocate and start ingesting immediately the columns that
+  // can be ingested in parallel, i.e. when ingestion no longer
+  // need to happen on the main thread
+  for (int i = 0; i < nc; i++) {
+    // allocate data for column i
+    SEXP column = tbl[i] = converters[i]->Allocate(nr);
+
+    // add a task to ingest data of that column if that can be done in parallel
+    if (converters[i]->Parallel()) {
+      converters[i]->IngestParallel(column, tg);
+    }
+  }
+
+  arrow::Status status = arrow::Status::OK();
+
+  // ingest the columns that cannot be dealt with in parallel
+  for (int i = 0; i < nc; i++) {
+    if (!converters[i]->Parallel()) {
+      status &= converters[i]->IngestSerial(tbl[i]);
+    }
+  }
+
+  // wait for the ingestion to be finished
+  status &= tg->Finish();
+
+  STOP_IF_NOT_OK(status);
+
+  tbl.attr("names") = names;
+  tbl.attr("class") = CharacterVector::create("tbl_df", "tbl", "data.frame");
+  tbl.attr("row.names") = IntegerVector::create(NA_INTEGER, -nr);
+
+  return tbl;
+}
+
+}  // namespace r
+}  // namespace arrow
+
+// [[Rcpp::export]]
+SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array) {
+  return arrow::r::ArrayVector__as_vector(array->length(), {array});
+}
+
+// [[Rcpp::export]]
+SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
+  return arrow::r::ArrayVector__as_vector(chunked_array->length(),
+                                          chunked_array->chunks());
+}
+
+// [[Rcpp::export]]
+List RecordBatch__to_dataframe(const std::shared_ptr<arrow::RecordBatch>& batch,
+                               bool use_threads) {
+  int64_t nc = batch->num_columns();
+  int64_t nr = batch->num_rows();
+  CharacterVector names(nc);
+  std::vector<ArrayVector> arrays(nc);
+  std::vector<std::shared_ptr<arrow::r::Converter>> converters(nc);
+
+  for (int64_t i = 0; i < nc; i++) {
+    names[i] = batch->column_name(i);
+    arrays[i] = {batch->column(i)};
+    converters[i] = arrow::r::Converter::Make(arrays[i]);
+  }
+
+  if (use_threads) {
+    return arrow::r::to_dataframe_parallel(nr, nc, names, converters);
+  } else {
+    return arrow::r::to_dataframe_serial(nr, nc, names, converters);
+  }
+}
+
+// [[Rcpp::export]]
+List Table__to_dataframe(const std::shared_ptr<arrow::Table>& table, bool use_threads) {
+  int64_t nc = table->num_columns();
+  int64_t nr = table->num_rows();
+  CharacterVector names(nc);
+  std::vector<std::shared_ptr<arrow::r::Converter>> converters(nc);
+
+  for (int64_t i = 0; i < nc; i++) {
+    converters[i] = arrow::r::Converter::Make(table->column(i)->data()->chunks());
+    names[i] = table->column(i)->name();
+  }
+
+  if (use_threads) {
+    return arrow::r::to_dataframe_parallel(nr, nc, names, converters);
+  } else {
+    return arrow::r::to_dataframe_serial(nr, nc, names, converters);
+  }
+}
diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h
index 419705f9fcde0..4843f95ace37f 100644
--- a/r/src/arrow_types.h
+++ b/r/src/arrow_types.h
@@ -22,12 +22,15 @@
 #undef Free
 #include <arrow/api.h>
 #include <arrow/compute/api.h>
+#include <arrow/csv/reader.h>
+#include <arrow/io/compressed.h>
 #include <arrow/io/file.h>
 #include <arrow/io/memory.h>
 #include <arrow/ipc/feather.h>
 #include <arrow/ipc/reader.h>
 #include <arrow/ipc/writer.h>
 #include <arrow/type.h>
+#include <arrow/util/compression.h>
 
 #define STOP_IF_NOT(TEST, MSG)  \
   do {                          \
@@ -51,6 +54,8 @@ namespace r {
 struct symbols {
   static SEXP units;
   static SEXP xp;
+  static SEXP dot_Internal;
+  static SEXP inspect;
 };
 }  // namespace r
 }  // namespace arrow
@@ -126,6 +131,7 @@ RCPP_EXPOSED_ENUM_NODECL(arrow::TimeUnit::type)
 RCPP_EXPOSED_ENUM_NODECL(arrow::StatusCode)
 RCPP_EXPOSED_ENUM_NODECL(arrow::io::FileMode::type)
 RCPP_EXPOSED_ENUM_NODECL(arrow::ipc::Message::Type)
+RCPP_EXPOSED_ENUM_NODECL(arrow::Compression::type)
 
 namespace Rcpp {
 namespace internal {
@@ -147,18 +153,20 @@ inline SEXP wrap_dispatch(const T& x, Rcpp::traits::wrap_type_unique_ptr_tag) {
 }  // namespace Rcpp
 
 namespace Rcpp {
+using NumericVector_ = Rcpp::Vector<REALSXP, Rcpp::NoProtectStorage>;
 using IntegerVector_ = Rcpp::Vector<INTSXP, Rcpp::NoProtectStorage>;
 using LogicalVector_ = Rcpp::Vector<LGLSXP, Rcpp::NoProtectStorage>;
 using StringVector_ = Rcpp::Vector<STRSXP, Rcpp::NoProtectStorage>;
 using CharacterVector_ = StringVector_;
 using RawVector_ = Rcpp::Vector<RAWSXP, Rcpp::NoProtectStorage>;
+using List_ = Rcpp::Vector<VECSXP, Rcpp::NoProtectStorage>;
 
 template <int RTYPE>
-inline typename Rcpp::Vector<RTYPE>::stored_type default_value() {
+inline constexpr typename Rcpp::Vector<RTYPE>::stored_type default_value() {
   return Rcpp::Vector<RTYPE>::get_na();
 }
 template <>
-inline Rbyte default_value<RAWSXP>() {
+inline constexpr Rbyte default_value<RAWSXP>() {
   return 0;
 }
 
@@ -172,16 +180,10 @@ std::shared_ptr<arrow::RecordBatch> RecordBatch__from_dataframe(Rcpp::DataFrame
 namespace arrow {
 namespace r {
 
-template <typename T>
-inline const T* GetValuesSafely(const std::shared_ptr<ArrayData>& data, int i,
-                                int64_t offset) {
-  auto buffer = data->buffers[i];
-  if (!buffer) {
-    return nullptr;
-  } else {
-    return reinterpret_cast<const T*>(buffer->data()) + offset;
-  }
-}
+void inspect(SEXP obj);
+
+// the integer64 sentinel
+constexpr int64_t NA_INT64 = std::numeric_limits<int64_t>::min();
 
 template <int RTYPE, typename Vec = Rcpp::Vector<RTYPE>>
 class RBuffer : public MutableBuffer {
diff --git a/r/src/compression.cpp b/r/src/compression.cpp
new file mode 100644
index 0000000000000..4c522d85f5dfd
--- /dev/null
+++ b/r/src/compression.cpp
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow_types.h"
+
+// [[Rcpp::export]]
+std::unique_ptr<arrow::util::Codec> util___Codec__Create(arrow::Compression::type codec) {
+  std::unique_ptr<arrow::util::Codec> out;
+  STOP_IF_NOT_OK(arrow::util::Codec::Create(codec, &out));
+  return out;
+}
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::io::CompressedOutputStream> io___CompressedOutputStream__Make(
+    const std::unique_ptr<arrow::util::Codec>& codec,
+    const std::shared_ptr<arrow::io::OutputStream>& raw) {
+  std::shared_ptr<arrow::io::CompressedOutputStream> stream;
+  STOP_IF_NOT_OK(arrow::io::CompressedOutputStream::Make(codec.get(), raw, &stream));
+  return stream;
+}
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::io::CompressedInputStream> io___CompressedInputStream__Make(
+    const std::unique_ptr<arrow::util::Codec>& codec,
+    const std::shared_ptr<arrow::io::InputStream>& raw) {
+  std::shared_ptr<arrow::io::CompressedInputStream> stream;
+  STOP_IF_NOT_OK(arrow::io::CompressedInputStream::Make(codec.get(), raw, &stream));
+  return stream;
+}
diff --git a/r/src/csv.cpp b/r/src/csv.cpp
new file mode 100644
index 0000000000000..0e1d09fb65e8b
--- /dev/null
+++ b/r/src/csv.cpp
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow_types.h"
+
+using namespace Rcpp;
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::csv::ReadOptions> csv___ReadOptions__initialize(List_ options) {
+  auto res =
+      std::make_shared<arrow::csv::ReadOptions>(arrow::csv::ReadOptions::Defaults());
+  res->use_threads = options["use_threads"];
+  res->block_size = options["block_size"];
+  return res;
+}
+
+inline char get_char(CharacterVector x) { return CHAR(STRING_ELT(x, 0))[0]; }
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::csv::ParseOptions> csv___ParseOptions__initialize(List_ options) {
+  auto res =
+      std::make_shared<arrow::csv::ParseOptions>(arrow::csv::ParseOptions::Defaults());
+  res->delimiter = get_char(options["delimiter"]);
+  res->quoting = options["quoting"];
+  res->quote_char = get_char(options["quote_char"]);
+  res->double_quote = options["double_quote"];
+  res->escape_char = get_char(options["escape_char"]);
+  res->newlines_in_values = options["newlines_in_values"];
+  res->header_rows = options["header_rows"];
+  res->ignore_empty_lines = options["ignore_empty_lines"];
+  return res;
+}
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::csv::ConvertOptions> csv___ConvertOptions__initialize(
+    List_ options) {
+  auto res = std::make_shared<arrow::csv::ConvertOptions>(
+      arrow::csv::ConvertOptions::Defaults());
+  res->check_utf8 = options["check_utf8"];
+  return res;
+}
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::csv::TableReader> csv___TableReader__Make(
+    const std::shared_ptr<arrow::io::InputStream>& input,
+    const std::shared_ptr<arrow::csv::ReadOptions>& read_options,
+    const std::shared_ptr<arrow::csv::ParseOptions>& parse_options,
+    const std::shared_ptr<arrow::csv::ConvertOptions>& convert_options) {
+  std::shared_ptr<arrow::csv::TableReader> table_reader;
+  STOP_IF_NOT_OK(arrow::csv::TableReader::Make(arrow::default_memory_pool(), input,
+                                               *read_options, *parse_options,
+                                               *convert_options, &table_reader));
+  return table_reader;
+}
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::Table> csv___TableReader__Read(
+    const std::shared_ptr<arrow::csv::TableReader>& table_reader) {
+  std::shared_ptr<arrow::Table> table;
+  STOP_IF_NOT_OK(table_reader->Read(&table));
+  return table;
+}
diff --git a/r/src/feather.cpp b/r/src/feather.cpp
index 7b84deefadb9c..8389156c3847b 100644
--- a/r/src/feather.cpp
+++ b/r/src/feather.cpp
@@ -115,9 +115,37 @@ std::shared_ptr<arrow::Column> ipc___feather___TableReader__GetColumn(
 
 // [[Rcpp::export]]
 std::shared_ptr<arrow::Table> ipc___feather___TableReader__Read(
-    const std::unique_ptr<arrow::ipc::feather::TableReader>& reader) {
+    const std::unique_ptr<arrow::ipc::feather::TableReader>& reader, SEXP columns) {
   std::shared_ptr<arrow::Table> table;
-  STOP_IF_NOT_OK(reader->Read(&table));
+
+  switch (TYPEOF(columns)) {
+    case INTSXP: {
+      R_xlen_t n = XLENGTH(columns);
+      std::vector<int> indices(n);
+      int* p_columns = INTEGER(columns);
+      for (int i = 0; i < n; i++) {
+        indices[i] = p_columns[i] - 1;
+      }
+      STOP_IF_NOT_OK(reader->Read(indices, &table));
+      break;
+    }
+    case STRSXP: {
+      R_xlen_t n = XLENGTH(columns);
+      std::vector<std::string> names(n);
+      for (R_xlen_t i = 0; i < n; i++) {
+        names[i] = CHAR(STRING_ELT(columns, i));
+      }
+      STOP_IF_NOT_OK(reader->Read(names, &table));
+      break;
+    }
+    case NILSXP:
+      STOP_IF_NOT_OK(reader->Read(&table));
+      break;
+    default:
+      Rcpp::stop("incompatible column specification");
+      break;
+  };
+
   return table;
 }
 
diff --git a/r/src/io.cpp b/r/src/io.cpp
index b8d2d53ad2149..2f9fe304d8f30 100644
--- a/r/src/io.cpp
+++ b/r/src/io.cpp
@@ -115,6 +115,23 @@ std::shared_ptr<arrow::io::BufferReader> io___BufferReader__initialize(
   return std::make_shared<arrow::io::BufferReader>(buffer);
 }
 
+// ------- arrow::io::Writable
+
+// [[Rcpp::export]]
+void io___Writable__write(const std::shared_ptr<arrow::io::Writable>& stream,
+                          const std::shared_ptr<arrow::Buffer>& buf) {
+  STOP_IF_NOT_OK(stream->Write(buf->data(), buf->size()));
+}
+
+// ------- arrow::io::OutputStream
+
+// [[Rcpp::export]]
+int64_t io___OutputStream__Tell(const std::shared_ptr<arrow::io::OutputStream>& stream) {
+  int64_t position;
+  STOP_IF_NOT_OK(stream->Tell(&position));
+  return position;
+}
+
 // ------ arrow::io::FileOutputStream
 
 // [[Rcpp::export]]
diff --git a/r/src/parquet.cpp b/r/src/parquet.cpp
new file mode 100644
index 0000000000000..859bd4826e7c2
--- /dev/null
+++ b/r/src/parquet.cpp
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <parquet/arrow/reader.h>
+#include <parquet/arrow/writer.h>
+#include <parquet/exception.h>
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::Table> read_parquet_file(std::string filename) {
+  std::shared_ptr<arrow::io::ReadableFile> infile;
+  PARQUET_THROW_NOT_OK(
+      arrow::io::ReadableFile::Open(filename, arrow::default_memory_pool(), &infile));
+
+  std::unique_ptr<parquet::arrow::FileReader> reader;
+  PARQUET_THROW_NOT_OK(
+      parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
+  std::shared_ptr<arrow::Table> table;
+  PARQUET_THROW_NOT_OK(reader->ReadTable(&table));
+
+  return table;
+}
diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp
index 829ad45eadbfc..b776d2ae5753e 100644
--- a/r/src/recordbatch.cpp
+++ b/r/src/recordbatch.cpp
@@ -41,25 +41,20 @@ std::shared_ptr<arrow::Schema> RecordBatch__schema(
 }
 
 // [[Rcpp::export]]
-std::shared_ptr<arrow::Array> RecordBatch__column(
-    const std::shared_ptr<arrow::RecordBatch>& batch, int i) {
-  return batch->column(i);
+arrow::ArrayVector RecordBatch__columns(
+    const std::shared_ptr<arrow::RecordBatch>& batch) {
+  auto nc = batch->num_columns();
+  ArrayVector res(nc);
+  for (int i = 0; i < nc; i++) {
+    res[i] = batch->column(i);
+  }
+  return res;
 }
 
 // [[Rcpp::export]]
-List RecordBatch__to_dataframe(const std::shared_ptr<arrow::RecordBatch>& batch) {
-  int nc = batch->num_columns();
-  int nr = batch->num_rows();
-  List tbl(nc);
-  CharacterVector names(nc);
-  for (int i = 0; i < nc; i++) {
-    tbl[i] = Array__as_vector(batch->column(i));
-    names[i] = batch->column_name(i);
-  }
-  tbl.attr("names") = names;
-  tbl.attr("class") = CharacterVector::create("tbl_df", "tbl", "data.frame");
-  tbl.attr("row.names") = IntegerVector::create(NA_INTEGER, -nr);
-  return tbl;
+std::shared_ptr<arrow::Array> RecordBatch__column(
+    const std::shared_ptr<arrow::RecordBatch>& batch, int i) {
+  return batch->column(i);
 }
 
 // [[Rcpp::export]]
@@ -120,3 +115,32 @@ std::shared_ptr<arrow::RecordBatch> RecordBatch__Slice2(
     const std::shared_ptr<arrow::RecordBatch>& self, int offset, int length) {
   return self->Slice(offset, length);
 }
+
+// [[Rcpp::export]]
+RawVector ipc___SerializeRecordBatch__Raw(
+    const std::shared_ptr<arrow::RecordBatch>& batch) {
+  // how many bytes do we need ?
+  int64_t size;
+  STOP_IF_NOT_OK(arrow::ipc::GetRecordBatchSize(*batch, &size));
+
+  // allocate the result raw vector
+  RawVector out(no_init(size));
+
+  // serialize into the bytes of the raw vector
+  auto buffer = std::make_shared<arrow::r::RBuffer<RAWSXP, RawVector>>(out);
+  arrow::io::FixedSizeBufferWriter stream(buffer);
+  STOP_IF_NOT_OK(
+      arrow::ipc::SerializeRecordBatch(*batch, arrow::default_memory_pool(), &stream));
+  STOP_IF_NOT_OK(stream.Close());
+
+  return out;
+}
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::RecordBatch> ipc___ReadRecordBatch__InputStream__Schema(
+    const std::shared_ptr<arrow::io::InputStream>& stream,
+    const std::shared_ptr<arrow::Schema>& schema) {
+  std::shared_ptr<arrow::RecordBatch> batch;
+  STOP_IF_NOT_OK(arrow::ipc::ReadRecordBatch(schema, stream.get(), &batch));
+  return batch;
+}
diff --git a/r/src/recordbatchreader.cpp b/r/src/recordbatchreader.cpp
index 65a1c9baf3b95..f3e90228d3ce0 100644
--- a/r/src/recordbatchreader.cpp
+++ b/r/src/recordbatchreader.cpp
@@ -41,6 +41,22 @@ std::shared_ptr<arrow::RecordBatchReader> ipc___RecordBatchStreamReader__Open(
   return reader;
 }
 
+// [[Rcpp::export]]
+std::vector<std::shared_ptr<arrow::RecordBatch>> ipc___RecordBatchStreamReader__batches(
+    const std::shared_ptr<arrow::ipc::RecordBatchStreamReader>& reader) {
+  std::vector<std::shared_ptr<arrow::RecordBatch>> res;
+
+  while (true) {
+    std::shared_ptr<arrow::RecordBatch> batch;
+    STOP_IF_NOT_OK(reader->ReadNext(&batch));
+    if (!batch) break;
+
+    res.push_back(batch);
+  }
+
+  return res;
+}
+
 // -------- RecordBatchFileReader
 
 // [[Rcpp::export]]
@@ -104,3 +120,16 @@ std::shared_ptr<arrow::Table> Table__from_RecordBatchStreamReader(
 
   return table;
 }
+
+// [[Rcpp::export]]
+std::vector<std::shared_ptr<arrow::RecordBatch>> ipc___RecordBatchFileReader__batches(
+    const std::shared_ptr<arrow::ipc::RecordBatchFileReader>& reader) {
+  auto n = reader->num_record_batches();
+  std::vector<std::shared_ptr<arrow::RecordBatch>> res(n);
+
+  for (int i = 0; i < n; i++) {
+    STOP_IF_NOT_OK(reader->ReadRecordBatch(i, &res[i]));
+  }
+
+  return res;
+}
diff --git a/r/src/recordbatchwriter.cpp b/r/src/recordbatchwriter.cpp
index f86c474fec311..d4dd212a9bd11 100644
--- a/r/src/recordbatchwriter.cpp
+++ b/r/src/recordbatchwriter.cpp
@@ -17,6 +17,26 @@
 
 #include "arrow_types.h"
 
+// [[Rcpp::export]]
+void ipc___RecordBatchWriter__WriteRecordBatch(
+    const std::shared_ptr<arrow::ipc::RecordBatchWriter>& batch_writer,
+    const std::shared_ptr<arrow::RecordBatch>& batch) {
+  STOP_IF_NOT_OK(batch_writer->WriteRecordBatch(*batch, true));
+}
+
+// [[Rcpp::export]]
+void ipc___RecordBatchWriter__WriteTable(
+    const std::shared_ptr<arrow::ipc::RecordBatchWriter>& batch_writer,
+    const std::shared_ptr<arrow::Table>& table) {
+  STOP_IF_NOT_OK(batch_writer->WriteTable(*table));
+}
+
+// [[Rcpp::export]]
+void ipc___RecordBatchWriter__Close(
+    const std::shared_ptr<arrow::ipc::RecordBatchWriter>& batch_writer) {
+  STOP_IF_NOT_OK(batch_writer->Close());
+}
+
 // [[Rcpp::export]]
 std::shared_ptr<arrow::ipc::RecordBatchWriter> ipc___RecordBatchFileWriter__Open(
     const std::shared_ptr<arrow::io::OutputStream>& stream,
@@ -36,23 +56,3 @@ std::shared_ptr<arrow::ipc::RecordBatchWriter> ipc___RecordBatchStreamWriter__Op
       arrow::ipc::RecordBatchStreamWriter::Open(stream.get(), schema, &stream_writer));
   return stream_writer;
 }
-
-// [[Rcpp::export]]
-void ipc___RecordBatchWriter__WriteRecordBatch(
-    const std::shared_ptr<arrow::ipc::RecordBatchWriter>& batch_writer,
-    const std::shared_ptr<arrow::RecordBatch>& batch, bool allow_64bit) {
-  STOP_IF_NOT_OK(batch_writer->WriteRecordBatch(*batch, allow_64bit));
-}
-
-// [[Rcpp::export]]
-void ipc___RecordBatchWriter__WriteTable(
-    const std::shared_ptr<arrow::ipc::RecordBatchWriter>& batch_writer,
-    const std::shared_ptr<arrow::Table>& table) {
-  STOP_IF_NOT_OK(batch_writer->WriteTable(*table));
-}
-
-// [[Rcpp::export]]
-void ipc___RecordBatchWriter__Close(
-    const std::shared_ptr<arrow::ipc::RecordBatchWriter>& batch_writer) {
-  STOP_IF_NOT_OK(batch_writer->Close());
-}
diff --git a/r/src/symbols.cpp b/r/src/symbols.cpp
index e60bcce631f37..5b4e44e8bfc5f 100644
--- a/r/src/symbols.cpp
+++ b/r/src/symbols.cpp
@@ -21,5 +21,14 @@ namespace arrow {
 namespace r {
 SEXP symbols::units = Rf_install("units");
 SEXP symbols::xp = Rf_install(".:xp:.");
+SEXP symbols::dot_Internal = Rf_install(".Internal");
+SEXP symbols::inspect = Rf_install("inspect");
+
+void inspect(SEXP obj) {
+  Rcpp::Shield<SEXP> call_inspect(Rf_lang2(symbols::inspect, obj));
+  Rcpp::Shield<SEXP> call_internal(Rf_lang2(symbols::dot_Internal, call_inspect));
+  Rf_eval(call_internal, R_GlobalEnv);
+}
+
 }  // namespace r
 }  // namespace arrow
diff --git a/r/src/table.cpp b/r/src/table.cpp
index 4bdff167db9c9..fcf2a0347689b 100644
--- a/r/src/table.cpp
+++ b/r/src/table.cpp
@@ -45,25 +45,19 @@ std::shared_ptr<arrow::Schema> Table__schema(const std::shared_ptr<arrow::Table>
   return x->schema();
 }
 
-// [[Rcpp::export]]
-List Table__to_dataframe(const std::shared_ptr<arrow::Table>& table) {
-  int nc = table->num_columns();
-  int nr = table->num_rows();
-  List tbl(nc);
-  CharacterVector names(nc);
-  for (int i = 0; i < nc; i++) {
-    auto column = table->column(i);
-    tbl[i] = ChunkedArray__as_vector(column->data());
-    names[i] = column->name();
-  }
-  tbl.attr("names") = names;
-  tbl.attr("class") = CharacterVector::create("tbl_df", "tbl", "data.frame");
-  tbl.attr("row.names") = IntegerVector::create(NA_INTEGER, -nr);
-  return tbl;
-}
-
 // [[Rcpp::export]]
 std::shared_ptr<arrow::Column> Table__column(const std::shared_ptr<arrow::Table>& table,
                                              int i) {
   return table->column(i);
 }
+
+// [[Rcpp::export]]
+std::vector<std::shared_ptr<arrow::Column>> Table__columns(
+    const std::shared_ptr<arrow::Table>& table) {
+  auto nc = table->num_columns();
+  std::vector<std::shared_ptr<arrow::Column>> res(nc);
+  for (int i = 0; i < nc; i++) {
+    res[i] = table->column(i);
+  }
+  return res;
+}
diff --git a/r/src/threadpool.cpp b/r/src/threadpool.cpp
new file mode 100644
index 0000000000000..1ce0451ac2b55
--- /dev/null
+++ b/r/src/threadpool.cpp
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/util/parallel.h>
+#include "arrow_types.h"
+
+//' Get the capacity of the global thread pool
+//'
+//' @return the number of worker threads in the thread pool to which
+//' Arrow dispatches various CPU-bound tasks. This is an ideal number,
+//' not necessarily the exact number of threads at a given point in time.
+//'
+//' You can change this number using [SetCpuThreadPoolCapacity()].
+//'
+//' @export
+// [[Rcpp::export]]
+int GetCpuThreadPoolCapacity() { return arrow::GetCpuThreadPoolCapacity(); }
+
+//' Set the capacity of the global thread pool
+//'
+//' @param threads the number of worker threads int the thread pool to which
+//' Arrow dispatches various CPU-bound tasks.
+//'
+//' The current number is returned by [GetCpuThreadPoolCapacity()]
+//'
+//' @export
+// [[Rcpp::export]]
+void SetCpuThreadPoolCapacity(int threads) {
+  STOP_IF_NOT_OK(arrow::SetCpuThreadPoolCapacity(threads));
+}
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index cbf67e711d1b8..e456fe8865496 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -19,35 +19,35 @@ context("arrow::Array")
 
 test_that("Array", {
   x <- array(1:10, 1:10, 1:5)
-  expect_equal(x$type(), int32())
+  expect_equal(x$type, int32())
   expect_equal(x$length(), 25L)
   expect_equal(x$as_vector(), c(1:10, 1:10, 1:5))
 
   y <- x$Slice(10)
-  expect_equal(y$type(), int32())
+  expect_equal(y$type, int32())
   expect_equal(y$length(), 15L)
   expect_equal(y$as_vector(), c(1:10, 1:5))
   expect_true(x$RangeEquals(y, 10, 24, 0))
 
   z <- x$Slice(10, 5)
-  expect_equal(z$type(), int32())
+  expect_equal(z$type, int32())
   expect_equal(z$length(), 5L)
   expect_equal(z$as_vector(), c(1:5))
   expect_true(x$RangeEquals(z, 10, 15, 0))
 
   x_dbl <- array(c(1,2,3), c(4,5,6))
-  expect_equal(x_dbl$type(), float64())
+  expect_equal(x_dbl$type, float64())
   expect_equal(x_dbl$length(), 6L)
   expect_equal(x_dbl$as_vector(), as.numeric(1:6))
 
   y_dbl <- x_dbl$Slice(3)
-  expect_equal(y_dbl$type(), float64())
+  expect_equal(y_dbl$type, float64())
   expect_equal(y_dbl$length(), 3L)
-  expect_equal(y_dbl$offset(), 3L)
+  expect_equal(y_dbl$offset, 3L)
   expect_equal(y_dbl$as_vector(), as.numeric(4:6))
 
   z_dbl <- x_dbl$Slice(3, 2)
-  expect_equal(z_dbl$type(), float64())
+  expect_equal(z_dbl$type, float64())
   expect_equal(z_dbl$length(), 2L)
   expect_equal(z_dbl$as_vector(), as.numeric(4:5))
 })
@@ -138,7 +138,7 @@ test_that("Array supports unordered factors (ARROW-3355)", {
   f <- factor(c("itsy", "bitsy", "spider", "spider"))
   arr_fac <- array(f)
   expect_equal(arr_fac$length(), 4L)
-  expect_equal(arr_fac$type()$index_type(), int8())
+  expect_equal(arr_fac$type$index_type, int8())
   expect_identical(arr_fac$as_vector(), f)
   expect_true(arr_fac$IsValid(0))
   expect_true(arr_fac$IsValid(1))
@@ -147,7 +147,7 @@ test_that("Array supports unordered factors (ARROW-3355)", {
 
   sl <- arr_fac$Slice(1)
   expect_equal(sl$length(), 3L)
-  expect_equal(arr_fac$type()$index_type(), int8())
+  expect_equal(arr_fac$type$index_type, int8())
   expect_equal(sl$as_vector(), f[2:4])
 
   # with NA
@@ -155,7 +155,7 @@ test_that("Array supports unordered factors (ARROW-3355)", {
   # TODO: rm the suppressWarnings when https://github.com/r-lib/vctrs/issues/109
   arr_fac <- suppressWarnings(array(f))
   expect_equal(arr_fac$length(), 5L)
-  expect_equal(arr_fac$type()$index_type(), int8())
+  expect_equal(arr_fac$type$index_type, int8())
   expect_identical(arr_fac$as_vector(), f)
   expect_true(arr_fac$IsValid(0))
   expect_true(arr_fac$IsValid(1))
@@ -165,7 +165,7 @@ test_that("Array supports unordered factors (ARROW-3355)", {
 
   sl <- arr_fac$Slice(1)
   expect_equal(sl$length(), 4L)
-  expect_equal(arr_fac$type()$index_type(), int8())
+  expect_equal(arr_fac$type$index_type, int8())
   expect_equal(sl$as_vector(), f[2:5])
 })
 
@@ -174,7 +174,7 @@ test_that("Array supports ordered factors (ARROW-3355)", {
   f <- ordered(c("itsy", "bitsy", "spider", "spider"))
   arr_fac <- array(f)
   expect_equal(arr_fac$length(), 4L)
-  expect_equal(arr_fac$type()$index_type(), int8())
+  expect_equal(arr_fac$type$index_type, int8())
   expect_identical(arr_fac$as_vector(), f)
   expect_true(arr_fac$IsValid(0))
   expect_true(arr_fac$IsValid(1))
@@ -183,7 +183,7 @@ test_that("Array supports ordered factors (ARROW-3355)", {
 
   sl <- arr_fac$Slice(1)
   expect_equal(sl$length(), 3L)
-  expect_equal(arr_fac$type()$index_type(), int8())
+  expect_equal(arr_fac$type$index_type, int8())
   expect_equal(sl$as_vector(), f[2:4])
 
   # with NA
@@ -191,7 +191,7 @@ test_that("Array supports ordered factors (ARROW-3355)", {
   # TODO: rm the suppressWarnings when https://github.com/r-lib/vctrs/issues/109
   arr_fac <- suppressWarnings(array(f))
   expect_equal(arr_fac$length(), 5L)
-  expect_equal(arr_fac$type()$index_type(), int8())
+  expect_equal(arr_fac$type$index_type, int8())
   expect_identical(arr_fac$as_vector(), f)
   expect_true(arr_fac$IsValid(0))
   expect_true(arr_fac$IsValid(1))
@@ -201,27 +201,27 @@ test_that("Array supports ordered factors (ARROW-3355)", {
 
   sl <- arr_fac$Slice(1)
   expect_equal(sl$length(), 4L)
-  expect_equal(arr_fac$type()$index_type(), int8())
+  expect_equal(arr_fac$type$index_type, int8())
   expect_equal(sl$as_vector(), f[2:5])
 })
 
 test_that("array supports Date (ARROW-3340)", {
   d <- Sys.Date() + 1:10
   a <- array(d)
-  expect_equal(a$type(), date32())
+  expect_equal(a$type, date32())
   expect_equal(a$length(), 10L)
   expect_equal(a$as_vector(), d)
 
   d[5] <- NA
   a <- array(d)
-  expect_equal(a$type(), date32())
+  expect_equal(a$type, date32())
   expect_equal(a$length(), 10L)
   expect_equal(a$as_vector(), d)
   expect_true(a$IsNull(4))
 
   d2 <- d + .5
   a <- array(d2)
-  expect_equal(a$type(), date32())
+  expect_equal(a$type, date32())
   expect_equal(a$length(), 10L)
   expect_equal(a$as_vector(), d)
   expect_true(a$IsNull(4))
@@ -230,15 +230,15 @@ test_that("array supports Date (ARROW-3340)", {
 test_that("array supports POSIXct (ARROW-3340)", {
   times <- lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10
   a <- array(times)
-  expect_equal(a$type()$name(), "timestamp")
-  expect_equal(a$type()$unit(), unclass(TimeUnit$MICRO))
+  expect_equal(a$type$name, "timestamp")
+  expect_equal(a$type$unit(), unclass(TimeUnit$MICRO))
   expect_equal(a$length(), 10L)
   expect_equal(as.numeric(a$as_vector()), as.numeric(times))
 
   times[5] <- NA
   a <- array(times)
-  expect_equal(a$type()$name(), "timestamp")
-  expect_equal(a$type()$unit(), unclass(TimeUnit$MICRO))
+  expect_equal(a$type$name, "timestamp")
+  expect_equal(a$type$unit(), unclass(TimeUnit$MICRO))
   expect_equal(a$length(), 10L)
   expect_equal(as.numeric(a$as_vector()), as.numeric(times))
   expect_true(a$IsNull(4))
@@ -247,13 +247,13 @@ test_that("array supports POSIXct (ARROW-3340)", {
 test_that("array supports integer64", {
   x <- bit64::as.integer64(1:10)
   a <- array(x)
-  expect_equal(a$type(), int64())
+  expect_equal(a$type, int64())
   expect_equal(a$length(), 10L)
   expect_equal(a$as_vector(), x)
 
   x[4] <- NA
   a <- array(x)
-  expect_equal(a$type(), int64())
+  expect_equal(a$type, int64())
   expect_equal(a$length(), 10L)
   expect_equal(a$as_vector(), x)
   expect_true(a$IsNull(3L))
@@ -268,12 +268,12 @@ test_that("array$as_vector() correctly handles all NA inte64 (ARROW-3795)", {
 test_that("array supports difftime", {
   time <- hms::hms(56, 34, 12)
   a <- array(time, time)
-  expect_equal(a$type(), time32(unit = TimeUnit$SECOND))
+  expect_equal(a$type, time32(unit = TimeUnit$SECOND))
   expect_equal(a$length(), 2L)
   expect_equal(a$as_vector(), c(time, time))
 
   a <- array(time, NA)
-  expect_equal(a$type(), time32(unit = TimeUnit$SECOND))
+  expect_equal(a$type, time32(unit = TimeUnit$SECOND))
   expect_equal(a$length(), 2L)
   expect_true(a$IsNull(1))
   expect_equal(a$as_vector()[1], time)
@@ -284,7 +284,7 @@ test_that("support for NaN (ARROW-3615)", {
   x <- c(1, NA, NaN, -1)
   y <- array(x)
   expect_true(y$IsValid(2))
-  expect_equal(y$null_count(), 1L)
+  expect_equal(y$null_count, 1L)
 })
 
 test_that("array ignores the type argument (ARROW-3784)", {
@@ -300,10 +300,10 @@ test_that("integer types casts (ARROW-3741)", {
   a_int32 <- a$cast(int32())
   a_int64 <- a$cast(int64())
 
-  expect_equal(a_int8$type(), int8())
-  expect_equal(a_int16$type(), int16())
-  expect_equal(a_int32$type(), int32())
-  expect_equal(a_int64$type(), int64())
+  expect_equal(a_int8$type, int8())
+  expect_equal(a_int16$type, int16())
+  expect_equal(a_int32$type, int32())
+  expect_equal(a_int64$type, int64())
   expect_true(a_int8$IsNull(10L))
   expect_true(a_int16$IsNull(10L))
   expect_true(a_int32$IsNull(10L))
@@ -314,10 +314,10 @@ test_that("integer types casts (ARROW-3741)", {
   a_uint32 <- a$cast(uint32())
   a_uint64 <- a$cast(uint64())
 
-  expect_equal(a_uint8$type(), uint8())
-  expect_equal(a_uint16$type(), uint16())
-  expect_equal(a_uint32$type(), uint32())
-  expect_equal(a_uint64$type(), uint64())
+  expect_equal(a_uint8$type, uint8())
+  expect_equal(a_uint16$type, uint16())
+  expect_equal(a_uint32$type, uint32())
+  expect_equal(a_uint64$type, uint64())
   expect_true(a_uint8$IsNull(10L))
   expect_true(a_uint16$IsNull(10L))
   expect_true(a_uint32$IsNull(10L))
@@ -345,8 +345,8 @@ test_that("float types casts (ARROW-3741)", {
   a_f32 <- a$cast(float32())
   a_f64 <- a$cast(float64())
 
-  expect_equal(a_f32$type(), float32())
-  expect_equal(a_f64$type(), float64())
+  expect_equal(a_f32$type, float32())
+  expect_equal(a_f64$type, float64())
 
   expect_true(a_f32$IsNull(3L))
   expect_true(a_f64$IsNull(3L))
@@ -359,5 +359,5 @@ test_that("cast to half float works", {
   skip("until https://issues.apache.org/jira/browse/ARROW-3802")
   a <- array(1:4)
   a_f16 <- a$cast(float16())
-  expect_equal(a_16$type(), float16())
+  expect_equal(a_16$type, float16())
 })
diff --git a/r/tests/testthat/test-DataType.R b/r/tests/testthat/test-DataType.R
index b479e5a3f6798..fc9fc896eaee8 100644
--- a/r/tests/testthat/test-DataType.R
+++ b/r/tests/testthat/test-DataType.R
@@ -19,8 +19,8 @@ context("arrow::DataType")
 
 test_that("null type works as expected",{
   x <- null()
-  expect_equal(x$id(), 0L)
-  expect_equal(x$name(), "null")
+  expect_equal(x$id, 0L)
+  expect_equal(x$name, "null")
   expect_equal(x$ToString(), "null")
   expect_true(x == x)
   expect_false(x == int8())
@@ -30,134 +30,134 @@ test_that("null type works as expected",{
 
 test_that("boolean type work as expected",{
   x <- boolean()
-  expect_equal(x$id(), 1L)
-  expect_equal(x$name(), "bool")
+  expect_equal(x$id, 1L)
+  expect_equal(x$name, "bool")
   expect_equal(x$ToString(), "bool")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 1L)
+  expect_equal(x$bit_width, 1L)
 })
 
 test_that("int types works as expected",{
   x <- uint8()
-  expect_equal(x$id(), 2L)
-  expect_equal(x$name(), "uint8")
+  expect_equal(x$id, 2L)
+  expect_equal(x$name, "uint8")
   expect_equal(x$ToString(), "uint8")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 8L)
+  expect_equal(x$bit_width, 8L)
 
   x <- int8()
-  expect_equal(x$id(), 3L)
-  expect_equal(x$name(), "int8")
+  expect_equal(x$id, 3L)
+  expect_equal(x$name, "int8")
   expect_equal(x$ToString(), "int8")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 8L)
+  expect_equal(x$bit_width, 8L)
 
   x <- uint16()
-  expect_equal(x$id(), 4L)
-  expect_equal(x$name(), "uint16")
+  expect_equal(x$id, 4L)
+  expect_equal(x$name, "uint16")
   expect_equal(x$ToString(), "uint16")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 16L)
+  expect_equal(x$bit_width, 16L)
 
   x <- int16()
-  expect_equal(x$id(), 5L)
-  expect_equal(x$name(), "int16")
+  expect_equal(x$id, 5L)
+  expect_equal(x$name, "int16")
   expect_equal(x$ToString(), "int16")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 16L)
+  expect_equal(x$bit_width, 16L)
 
   x <- uint32()
-  expect_equal(x$id(), 6L)
-  expect_equal(x$name(), "uint32")
+  expect_equal(x$id, 6L)
+  expect_equal(x$name, "uint32")
   expect_equal(x$ToString(), "uint32")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 32L)
+  expect_equal(x$bit_width, 32L)
 
   x <- int32()
-  expect_equal(x$id(), 7L)
-  expect_equal(x$name(), "int32")
+  expect_equal(x$id, 7L)
+  expect_equal(x$name, "int32")
   expect_equal(x$ToString(), "int32")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 32L)
+  expect_equal(x$bit_width, 32L)
 
   x <- uint64()
-  expect_equal(x$id(), 8L)
-  expect_equal(x$name(), "uint64")
+  expect_equal(x$id, 8L)
+  expect_equal(x$name, "uint64")
   expect_equal(x$ToString(), "uint64")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 64L)
+  expect_equal(x$bit_width, 64L)
 
   x <- int64()
-  expect_equal(x$id(), 9L)
-  expect_equal(x$name(), "int64")
+  expect_equal(x$id, 9L)
+  expect_equal(x$name, "int64")
   expect_equal(x$ToString(), "int64")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 64L)
+  expect_equal(x$bit_width, 64L)
 })
 
 test_that("float types work as expected",{
   x <- float16()
-  expect_equal(x$id(), 10L)
-  expect_equal(x$name(), "halffloat")
+  expect_equal(x$id, 10L)
+  expect_equal(x$name, "halffloat")
   expect_equal(x$ToString(), "halffloat")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 16L)
+  expect_equal(x$bit_width, 16L)
 
   x <- float32()
-  expect_equal(x$id(), 11L)
-  expect_equal(x$name(), "float")
+  expect_equal(x$id, 11L)
+  expect_equal(x$name, "float")
   expect_equal(x$ToString(), "float")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 32L)
+  expect_equal(x$bit_width, 32L)
 
   x <- float64()
-  expect_equal(x$id(), 12L)
-  expect_equal(x$name(), "double")
+  expect_equal(x$id, 12L)
+  expect_equal(x$name, "double")
   expect_equal(x$ToString(), "double")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 64L)
+  expect_equal(x$bit_width, 64L)
 })
 
 test_that("utf8 type works as expected",{
   x <- utf8()
-  expect_equal(x$id(), 13L)
-  expect_equal(x$name(), "utf8")
+  expect_equal(x$id, 13L)
+  expect_equal(x$name, "utf8")
   expect_equal(x$ToString(), "string")
   expect_true(x == x)
   expect_false(x == null())
@@ -167,8 +167,8 @@ test_that("utf8 type works as expected",{
 
 test_that("date types work as expected", {
   x <- date32()
-  expect_equal(x$id(), 16L)
-  expect_equal(x$name(), "date32")
+  expect_equal(x$id, 16L)
+  expect_equal(x$name, "date32")
   expect_equal(x$ToString(), "date32[day]")
   expect_true(x == x)
   expect_false(x == null())
@@ -177,8 +177,8 @@ test_that("date types work as expected", {
   expect_equal(x$unit(), unclass(DateUnit$DAY))
 
   x <- date64()
-  expect_equal(x$id(), 17L)
-  expect_equal(x$name(), "date64")
+  expect_equal(x$id, 17L)
+  expect_equal(x$name, "date64")
   expect_equal(x$ToString(), "date64[ms]")
   expect_true(x == x)
   expect_false(x == null())
@@ -189,106 +189,106 @@ test_that("date types work as expected", {
 
 test_that("timestamp type works as expected", {
   x <- timestamp(TimeUnit$SECOND)
-  expect_equal(x$id(), 18L)
-  expect_equal(x$name(), "timestamp")
+  expect_equal(x$id, 18L)
+  expect_equal(x$name, "timestamp")
   expect_equal(x$ToString(), "timestamp[s]")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 64L)
+  expect_equal(x$bit_width, 64L)
   expect_equal(x$timezone(), "")
   expect_equal(x$unit(), unclass(TimeUnit$SECOND))
 
   x <- timestamp(TimeUnit$MILLI)
-  expect_equal(x$id(), 18L)
-  expect_equal(x$name(), "timestamp")
+  expect_equal(x$id, 18L)
+  expect_equal(x$name, "timestamp")
   expect_equal(x$ToString(), "timestamp[ms]")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 64L)
+  expect_equal(x$bit_width, 64L)
   expect_equal(x$timezone(), "")
   expect_equal(x$unit(), unclass(TimeUnit$MILLI))
 
   x <- timestamp(TimeUnit$MICRO)
-  expect_equal(x$id(), 18L)
-  expect_equal(x$name(), "timestamp")
+  expect_equal(x$id, 18L)
+  expect_equal(x$name, "timestamp")
   expect_equal(x$ToString(), "timestamp[us]")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 64L)
+  expect_equal(x$bit_width, 64L)
   expect_equal(x$timezone(), "")
   expect_equal(x$unit(), unclass(TimeUnit$MICRO))
 
   x <- timestamp(TimeUnit$NANO)
-  expect_equal(x$id(), 18L)
-  expect_equal(x$name(), "timestamp")
+  expect_equal(x$id, 18L)
+  expect_equal(x$name, "timestamp")
   expect_equal(x$ToString(), "timestamp[ns]")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 64L)
+  expect_equal(x$bit_width, 64L)
   expect_equal(x$timezone(), "")
   expect_equal(x$unit(), unclass(TimeUnit$NANO))
 })
 
 test_that("time32 types work as expected", {
   x <- time32(TimeUnit$SECOND)
-  expect_equal(x$id(), 19L)
-  expect_equal(x$name(), "time32")
+  expect_equal(x$id, 19L)
+  expect_equal(x$name, "time32")
   expect_equal(x$ToString(), "time32[s]")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 32L)
+  expect_equal(x$bit_width, 32L)
   expect_equal(x$unit(), unclass(TimeUnit$SECOND))
 
   x <- time32(TimeUnit$MILLI)
-  expect_equal(x$id(), 19L)
-  expect_equal(x$name(), "time32")
+  expect_equal(x$id, 19L)
+  expect_equal(x$name, "time32")
   expect_equal(x$ToString(), "time32[ms]")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 32L)
+  expect_equal(x$bit_width, 32L)
   expect_equal(x$unit(), unclass(TimeUnit$MILLI))
 })
 
 test_that("time64 types work as expected", {
   x <- time64(TimeUnit$MICRO)
-  expect_equal(x$id(), 20L)
-  expect_equal(x$name(), "time64")
+  expect_equal(x$id, 20L)
+  expect_equal(x$name, "time64")
   expect_equal(x$ToString(), "time64[us]")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 64L)
+  expect_equal(x$bit_width, 64L)
   expect_equal(x$unit(), unclass(TimeUnit$MICRO))
 
   x <- time64(TimeUnit$NANO)
-  expect_equal(x$id(), 20L)
-  expect_equal(x$name(), "time64")
+  expect_equal(x$id, 20L)
+  expect_equal(x$name, "time64")
   expect_equal(x$ToString(), "time64[ns]")
   expect_true(x == x)
   expect_false(x == null())
   expect_equal(x$num_children(), 0L)
   expect_equal(x$children(), list())
-  expect_equal(x$bit_width(), 64L)
+  expect_equal(x$bit_width, 64L)
   expect_equal(x$unit(), unclass(TimeUnit$NANO))
 })
 
 test_that("list type works as expected", {
   x <- list_of(int32())
-  expect_equal(x$id(), 23L)
-  expect_equal(x$name(), "list")
+  expect_equal(x$id, 23L)
+  expect_equal(x$name, "list")
   expect_equal(x$ToString(), "list<item: int32>")
   expect_true(x == x)
   expect_false(x == null())
@@ -301,8 +301,8 @@ test_that("list type works as expected", {
 
 test_that("struct type works as expected", {
   x <- struct(x = int32(), y = boolean())
-  expect_equal(x$id(), 24L)
-  expect_equal(x$name(), "struct")
+  expect_equal(x$id, 24L)
+  expect_equal(x$name, "struct")
   expect_equal(x$ToString(), "struct<x: int32, y: bool>")
   expect_true(x == x)
   expect_false(x == null())
@@ -318,9 +318,9 @@ test_that("DictionaryType works as expected (ARROW-3355)", {
   expect_equal(d, d)
   expect_true(d == d)
   expect_false(d == int32())
-  expect_equal(d$id(), Type$DICTIONARY)
-  expect_equal(d$bit_width(), 32L)
+  expect_equal(d$id, Type$DICTIONARY)
+  expect_equal(d$bit_width, 32L)
   expect_equal(d$ToString(), "dictionary<values=string, indices=int32, ordered=0>")
-  expect_equal(d$index_type(), int32())
-  expect_equal(d$dictionary(), array(c("foo", "bar", "baz")))
+  expect_equal(d$index_type, int32())
+  expect_equal(d$dictionary, array(c("foo", "bar", "baz")))
 })
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index 348327783fda8..29f90946da6e7 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -28,15 +28,15 @@ test_that("RecordBatch", {
 
   expect_true(batch == batch)
   expect_equal(
-    batch$schema(),
+    batch$schema,
     schema(
       int = int32(), dbl = float64(),
       lgl = boolean(), chr = utf8(),
       fct = dictionary(int32(), array(letters[1:10]))
     )
   )
-  expect_equal(batch$num_columns(), 5L)
-  expect_equal(batch$num_rows(), 10L)
+  expect_equal(batch$num_columns, 5L)
+  expect_equal(batch$num_rows, 10L)
   expect_equal(batch$column_name(0), "int")
   expect_equal(batch$column_name(1), "dbl")
   expect_equal(batch$column_name(2), "lgl")
@@ -47,32 +47,31 @@ test_that("RecordBatch", {
   col_int <- batch$column(0)
   expect_true(inherits(col_int, 'arrow::Array'))
   expect_equal(col_int$as_vector(), tbl$int)
-  expect_equal(col_int$type(), int32())
+  expect_equal(col_int$type, int32())
 
   col_dbl <- batch$column(1)
   expect_true(inherits(col_dbl, 'arrow::Array'))
   expect_equal(col_dbl$as_vector(), tbl$dbl)
-  expect_equal(col_dbl$type(), float64())
+  expect_equal(col_dbl$type, float64())
 
   col_lgl <- batch$column(2)
   expect_true(inherits(col_dbl, 'arrow::Array'))
   expect_equal(col_lgl$as_vector(), tbl$lgl)
-  expect_equal(col_lgl$type(), boolean())
+  expect_equal(col_lgl$type, boolean())
 
   col_chr <- batch$column(3)
   expect_true(inherits(col_chr, 'arrow::Array'))
   expect_equal(col_chr$as_vector(), tbl$chr)
-  expect_equal(col_chr$type(), utf8())
+  expect_equal(col_chr$type, utf8())
 
   col_fct <- batch$column(4)
   expect_true(inherits(col_fct, 'arrow::Array'))
   expect_equal(col_fct$as_vector(), tbl$fct)
-  expect_equal(col_fct$type(), dictionary(int32(), array(letters[1:10])))
-
+  expect_equal(col_fct$type, dictionary(int32(), array(letters[1:10])))
 
   batch2 <- batch$RemoveColumn(0)
   expect_equal(
-    batch2$schema(),
+    batch2$schema,
     schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int32(), array(letters[1:10])))
   )
   expect_equal(batch2$column(0), batch$column(1))
@@ -95,10 +94,10 @@ test_that("RecordBatch with 0 rows are supported", {
   )
 
   batch <- record_batch(tbl)
-  expect_equal(batch$num_columns(), 5L)
-  expect_equal(batch$num_rows(), 0L)
+  expect_equal(batch$num_columns, 5L)
+  expect_equal(batch$num_rows, 0L)
   expect_equal(
-    batch$schema(),
+    batch$schema,
     schema(
       int = int32(),
       dbl = float64(),
@@ -107,67 +106,6 @@ test_that("RecordBatch with 0 rows are supported", {
       fct = dictionary(int32(), array(c("a", "b")))
     )
   )
-
-  tf <- local_tempfile()
-  write_record_batch(batch, tf)
-  res <- read_record_batch(tf)
-  expect_equal(res, batch)
-})
-
-test_that("read_record_batch handles various streams (ARROW-3450, ARROW-3505)", {
-  tbl <- tibble::tibble(
-    int = 1:10, dbl = as.numeric(1:10),
-    lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
-    chr = letters[1:10]
-  )
-  batch <- record_batch(tbl)
-  tf <- local_tempfile()
-  write_record_batch(batch, tf)
-
-  bytes <- write_record_batch(batch, raw())
-  buf_reader <- buffer_reader(bytes)
-
-  batch1 <- read_record_batch(tf)
-  batch2 <- read_record_batch(fs::path_abs(tf))
-
-  readable_file <- close_on_exit(file_open(tf))
-  batch3 <- read_record_batch(readable_file)
-
-  mmap_file <- close_on_exit(mmap_open(tf))
-  batch4 <- read_record_batch(mmap_file)
-  batch5 <- read_record_batch(bytes)
-  batch6 <- read_record_batch(buf_reader)
-
-  stream_reader <- record_batch_stream_reader(bytes)
-  batch7 <- read_record_batch(stream_reader)
-  expect_null(read_record_batch(stream_reader))
-
-  file_reader <- record_batch_file_reader(tf)
-  batch8 <- read_record_batch(file_reader)
-  expect_null(read_record_batch(file_reader, i = 2))
-
-  expect_equal(batch, batch1)
-  expect_equal(batch, batch2)
-  expect_equal(batch, batch3)
-  expect_equal(batch, batch4)
-  expect_equal(batch, batch5)
-  expect_equal(batch, batch6)
-  expect_equal(batch, batch7)
-  expect_equal(batch, batch8)
-})
-
-test_that("read_record_batch can handle Message, Schema parameters (ARROW-3499)", {
-  batch <- record_batch(tibble::tibble(x = 1:10))
-  stream <- buffer_reader(write_record_batch(batch, raw()))
-
-  # schema
-  message <- read_message(stream)
-
-  # batch
-  message <- read_message(stream)
-  schema <- batch$schema()
-  batch2 <- read_record_batch(message, schema)
-  expect_equal(batch, batch2)
 })
 
 test_that("RecordBatch cast (ARROW-3741)", {
@@ -178,7 +116,7 @@ test_that("RecordBatch cast (ARROW-3741)", {
 
   s2 <- schema(x = int16(), y = int64())
   batch2 <- batch$cast(s2)
-  expect_equal(batch2$schema(), s2)
-  expect_equal(batch2$column(0L)$type(), int16())
-  expect_equal(batch2$column(1L)$type(), int64())
+  expect_equal(batch2$schema, s2)
+  expect_equal(batch2$column(0L)$type, int16())
+  expect_equal(batch2$column(1L)$type, int64())
 })
diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R
index d5db9de24069d..ec1be9b234886 100644
--- a/r/tests/testthat/test-Table.R
+++ b/r/tests/testthat/test-Table.R
@@ -24,29 +24,28 @@ test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", {
     chr = letters[1:10]
   )
   tab <- arrow::table(tbl)
+
   tf <- local_tempfile()
-  write_table(tab, tf)
+  write_arrow(tab, tf)
 
-  bytes <- write_table(tab, raw())
-  buf_reader <- buffer_reader(bytes)
+  bytes <- write_arrow(tab, raw())
 
   tab1 <- read_table(tf)
   tab2 <- read_table(fs::path_abs(tf))
 
-  readable_file <- close_on_exit(file_open(tf))
-  tab3 <- read_table(readable_file)
+  readable_file <- close_on_exit(ReadableFile(tf))
+  tab3 <- read_table(close_on_exit(RecordBatchFileReader(readable_file)))
 
   mmap_file <- close_on_exit(mmap_open(tf))
-  tab4 <- read_table(mmap_file)
+  tab4 <- read_table(close_on_exit(RecordBatchFileReader(mmap_file)))
 
   tab5 <- read_table(bytes)
-  tab6 <- read_table(buf_reader)
 
-  stream_reader <- record_batch_stream_reader(bytes)
-  tab7 <- read_table(stream_reader)
+  stream_reader <- RecordBatchStreamReader(bytes)
+  tab6 <- read_table(stream_reader)
 
-  file_reader <- record_batch_file_reader(tf)
-  tab8 <- read_table(file_reader)
+  file_reader <- RecordBatchFileReader(tf)
+  tab7 <- read_table(file_reader)
 
   expect_equal(tab, tab1)
   expect_equal(tab, tab2)
@@ -55,7 +54,6 @@ test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", {
   expect_equal(tab, tab5)
   expect_equal(tab, tab6)
   expect_equal(tab, tab7)
-  expect_equal(tab, tab8)
 })
 
 test_that("Table cast (ARROW-3741)", {
@@ -66,7 +64,7 @@ test_that("Table cast (ARROW-3741)", {
 
   s2 <- schema(x = int16(), y = int64())
   tab2 <- tab$cast(s2)
-  expect_equal(tab2$schema(), s2)
-  expect_equal(tab2$column(0L)$type(), int16())
-  expect_equal(tab2$column(1L)$type(), int64())
+  expect_equal(tab2$schema, s2)
+  expect_equal(tab2$column(0L)$type, int16())
+  expect_equal(tab2$column(1L)$type, int64())
 })
diff --git a/r/tests/testthat/test-arraydata.R b/r/tests/testthat/test-arraydata.R
index 5d8f8f1dcaa3c..02ca9b8562595 100644
--- a/r/tests/testthat/test-arraydata.R
+++ b/r/tests/testthat/test-arraydata.R
@@ -24,5 +24,5 @@ test_that("string vectors with only empty strings and nulls don't allocate a dat
   buffers <- a$data()$buffers
   expect_null(buffers[[1]])
   expect_null(buffers[[3]])
-  expect_equal(buffers[[2]]$size(), 8L)
+  expect_equal(buffers[[2]]$size, 8L)
 })
diff --git a/r/tests/testthat/test-arrow-csv-.R b/r/tests/testthat/test-arrow-csv-.R
new file mode 100644
index 0000000000000..2afd0622821ae
--- /dev/null
+++ b/r/tests/testthat/test-arrow-csv-.R
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+context("arrow::csv::TableReader")
+
+test_that("Can read csv file", {
+  tf <- local_tempfile()
+  write.csv(iris, tf, row.names = FALSE, quote = FALSE)
+
+  tab1 <- read_csv_arrow(tf)
+  tab2 <- read_csv_arrow(mmap_open(tf))
+  tab3 <- read_csv_arrow(ReadableFile(tf))
+
+  iris$Species <- as.character(iris$Species)
+  tab0 <- table(iris)
+  expect_equal(tab0, tab1)
+  expect_equal(tab0, tab2)
+  expect_equal(tab0, tab3)
+})
diff --git a/r/tests/testthat/test-buffer.R b/r/tests/testthat/test-buffer.R
index aa712b026803c..26ec8dfde0a9b 100644
--- a/r/tests/testthat/test-buffer.R
+++ b/r/tests/testthat/test-buffer.R
@@ -21,26 +21,26 @@ test_that("arrow::Buffer can be created from raw vector", {
   vec <- raw(123)
   buf <- buffer(vec)
   expect_is(buf, "arrow::Buffer")
-  expect_equal(buf$size(), 123)
+  expect_equal(buf$size, 123)
 })
 
 test_that("arrow::Buffer can be created from integer vector", {
   vec <- integer(17)
   buf <- buffer(vec)
   expect_is(buf, "arrow::Buffer")
-  expect_equal(buf$size(), 17 * 4)
+  expect_equal(buf$size, 17 * 4)
 })
 
 test_that("arrow::Buffer can be created from numeric vector", {
   vec <- numeric(17)
   buf <- buffer(vec)
   expect_is(buf, "arrow::Buffer")
-  expect_equal(buf$size(), 17 * 8)
+  expect_equal(buf$size, 17 * 8)
 })
 
 test_that("arrow::Buffer can be created from complex vector", {
   vec <- complex(3)
   buf <- buffer(vec)
   expect_is(buf, "arrow::Buffer")
-  expect_equal(buf$size(), 3 * 16)
+  expect_equal(buf$size, 3 * 16)
 })
diff --git a/r/tests/testthat/test-bufferreader.R b/r/tests/testthat/test-bufferreader.R
index e7680a493fc0f..72d257101fa56 100644
--- a/r/tests/testthat/test-bufferreader.R
+++ b/r/tests/testthat/test-bufferreader.R
@@ -18,9 +18,9 @@
 context("arrow::BufferReader")
 
 test_that("BufferReader can be created from R objects", {
-  num <- buffer_reader(numeric(13))
-  int <- buffer_reader(integer(13))
-  raw <- buffer_reader(raw(16))
+  num <- BufferReader(numeric(13))
+  int <- BufferReader(integer(13))
+  raw <- BufferReader(raw(16))
 
   expect_is(num, "arrow::io::BufferReader")
   expect_is(int, "arrow::io::BufferReader")
@@ -33,7 +33,7 @@ test_that("BufferReader can be created from R objects", {
 
 test_that("BufferReader can be created from Buffer", {
   buf <- buffer(raw(76))
-  reader <- buffer_reader(buf)
+  reader <- BufferReader(buf)
 
   expect_is(reader, "arrow::io::BufferReader")
   expect_equal(reader$GetSize(), 76)
diff --git a/r/tests/testthat/test-chunkedarray.R b/r/tests/testthat/test-chunkedarray.R
index 8bca62014777a..11a196d039d5f 100644
--- a/r/tests/testthat/test-chunkedarray.R
+++ b/r/tests/testthat/test-chunkedarray.R
@@ -19,38 +19,38 @@ context("arrow::ChunkedArray")
 
 test_that("ChunkedArray", {
   x <- chunked_array(1:10, 1:10, 1:5)
-  expect_equal(x$type(), int32())
-  expect_equal(x$num_chunks(), 3L)
+  expect_equal(x$type, int32())
+  expect_equal(x$num_chunks, 3L)
   expect_equal(x$length(), 25L)
   expect_equal(x$as_vector(), c(1:10, 1:10, 1:5))
 
   y <- x$Slice(8)
-  expect_equal(y$type(), int32())
-  expect_equal(y$num_chunks(), 3L)
+  expect_equal(y$type, int32())
+  expect_equal(y$num_chunks, 3L)
   expect_equal(y$length(), 17L)
   expect_equal(y$as_vector(), c(9:10, 1:10, 1:5))
 
   z <- x$Slice(8, 5)
-  expect_equal(z$type(), int32())
-  expect_equal(z$num_chunks(), 2L)
+  expect_equal(z$type, int32())
+  expect_equal(z$num_chunks, 2L)
   expect_equal(z$length(), 5L)
   expect_equal(z$as_vector(), c(9:10, 1:3))
 
   x_dbl <- chunked_array(c(1,2,3), c(4,5,6))
-  expect_equal(x_dbl$type(), float64())
-  expect_equal(x_dbl$num_chunks(), 2L)
+  expect_equal(x_dbl$type, float64())
+  expect_equal(x_dbl$num_chunks, 2L)
   expect_equal(x_dbl$length(), 6L)
   expect_equal(x_dbl$as_vector(), as.numeric(1:6))
 
   y_dbl <- x_dbl$Slice(2)
-  expect_equal(y_dbl$type(), float64())
-  expect_equal(y_dbl$num_chunks(), 2L)
+  expect_equal(y_dbl$type, float64())
+  expect_equal(y_dbl$num_chunks, 2L)
   expect_equal(y_dbl$length(), 4L)
   expect_equal(y_dbl$as_vector(), as.numeric(3:6))
 
   z_dbl <- x_dbl$Slice(2, 2)
-  expect_equal(z_dbl$type(), float64())
-  expect_equal(z_dbl$num_chunks(), 2L)
+  expect_equal(z_dbl$type, float64())
+  expect_equal(z_dbl$num_chunks, 2L)
   expect_equal(z_dbl$length(), 2L)
   expect_equal(z_dbl$as_vector(), as.numeric(3:4))
 })
@@ -58,19 +58,19 @@ test_that("ChunkedArray", {
 test_that("ChunkedArray handles !!! splicing", {
   data <- list(1, 2, 3)
   x <- chunked_array(!!!data)
-  expect_equal(x$type(), float64())
-  expect_equal(x$num_chunks(), 3L)
+  expect_equal(x$type, float64())
+  expect_equal(x$num_chunks, 3L)
 })
 
 test_that("ChunkedArray handles NA", {
   data <- list(1:10, c(NA, 2:10), c(1:3, NA, 5L))
   x <- chunked_array(!!!data)
-  expect_equal(x$type(), int32())
-  expect_equal(x$num_chunks(), 3L)
+  expect_equal(x$type, int32())
+  expect_equal(x$num_chunks, 3L)
   expect_equal(x$length(), 25L)
   expect_equal(x$as_vector(), c(1:10, c(NA, 2:10), c(1:3, NA, 5)))
 
-  chunks <- x$chunks()
+  chunks <- x$chunks
   expect_equal(Array__Mask(chunks[[1]]), !is.na(data[[1]]))
   expect_equal(Array__Mask(chunks[[2]]), !is.na(data[[2]]))
   expect_equal(Array__Mask(chunks[[3]]), !is.na(data[[3]]))
@@ -81,10 +81,10 @@ test_that("ChunkedArray supports logical vectors (ARROW-3341)", {
   data <- purrr::rerun(3, sample(c(TRUE, FALSE, NA), 100, replace = TRUE))
   arr_lgl <- chunked_array(!!!data)
   expect_equal(arr_lgl$length(), 300L)
-  expect_equal(arr_lgl$null_count(), sum(unlist(map(data, is.na))))
+  expect_equal(arr_lgl$null_count, sum(unlist(map(data, is.na))))
   expect_identical(arr_lgl$as_vector(), purrr::flatten_lgl(data))
 
-  chunks <- arr_lgl$chunks()
+  chunks <- arr_lgl$chunks
   expect_identical(data[[1]], chunks[[1]]$as_vector())
   expect_identical(data[[2]], chunks[[2]]$as_vector())
   expect_identical(data[[3]], chunks[[3]]$as_vector())
@@ -94,10 +94,10 @@ test_that("ChunkedArray supports logical vectors (ARROW-3341)", {
   data <- purrr::rerun(3, sample(c(TRUE, FALSE), 100, replace = TRUE))
   arr_lgl <- chunked_array(!!!data)
   expect_equal(arr_lgl$length(), 300L)
-  expect_equal(arr_lgl$null_count(), sum(unlist(map(data, is.na))))
+  expect_equal(arr_lgl$null_count, sum(unlist(map(data, is.na))))
   expect_identical(arr_lgl$as_vector(), purrr::flatten_lgl(data))
 
-  chunks <- arr_lgl$chunks()
+  chunks <- arr_lgl$chunks
   expect_identical(data[[1]], chunks[[1]]$as_vector())
   expect_identical(data[[2]], chunks[[2]]$as_vector())
   expect_identical(data[[3]], chunks[[3]]$as_vector())
@@ -112,10 +112,10 @@ test_that("ChunkedArray supports character vectors (ARROW-3339)", {
   )
   arr_chr <- chunked_array(!!!data)
   expect_equal(arr_chr$length(), length(unlist(data)))
-  expect_equal(arr_chr$null_count(), 1L)
+  expect_equal(arr_chr$null_count, 1L)
   expect_equal(arr_chr$as_vector(), purrr::flatten_chr(data))
 
-  chunks <- arr_chr$chunks()
+  chunks <- arr_chr$chunks
   expect_equal(data, purrr::map(chunks, ~.$as_vector()))
 })
 
@@ -123,14 +123,14 @@ test_that("ChunkedArray supports factors (ARROW-3716)", {
   f <- factor(c("itsy", "bitsy", "spider", "spider"))
   arr_fac <- chunked_array(f, f, f)
   expect_equal(arr_fac$length(), 12L)
-  expect_equal(arr_fac$type()$index_type(), int8())
+  expect_equal(arr_fac$type$index_type, int8())
   expect_identical(arr_fac$as_vector(), vctrs::vec_c(f, f, f))
 })
 
 test_that("ChunkedArray supports dates (ARROW-3716)", {
   d <- Sys.Date() + 1:10
   a <- chunked_array(d, d)
-  expect_equal(a$type(), date32())
+  expect_equal(a$type, date32())
   expect_equal(a$length(), 20L)
   expect_equal(a$as_vector(), c(d, d))
 })
@@ -138,8 +138,8 @@ test_that("ChunkedArray supports dates (ARROW-3716)", {
 test_that("ChunkedArray supports POSIXct (ARROW-3716)", {
   times <- lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10
   a <- chunked_array(times, times)
-  expect_equal(a$type()$name(), "timestamp")
-  expect_equal(a$type()$unit(), unclass(TimeUnit$MICRO))
+  expect_equal(a$type$name, "timestamp")
+  expect_equal(a$type$unit(), unclass(TimeUnit$MICRO))
   expect_equal(a$length(), 20L)
   expect_equal(as.numeric(a$as_vector()), as.numeric(c(times, times)))
 })
@@ -147,7 +147,7 @@ test_that("ChunkedArray supports POSIXct (ARROW-3716)", {
 test_that("ChunkedArray supports integer64 (ARROW-3716)", {
   x <- bit64::as.integer64(1:10)
   a <- chunked_array(x, x)
-  expect_equal(a$type(), int64())
+  expect_equal(a$type, int64())
   expect_equal(a$length(), 20L)
   expect_equal(a$as_vector(), c(x,x))
 })
@@ -155,7 +155,7 @@ test_that("ChunkedArray supports integer64 (ARROW-3716)", {
 test_that("ChunkedArray supports difftime", {
   time <- hms::hms(56, 34, 12)
   a <- chunked_array(time, time)
-  expect_equal(a$type(), time32(unit = TimeUnit$SECOND))
+  expect_equal(a$type, time32(unit = TimeUnit$SECOND))
   expect_equal(a$length(), 2L)
   expect_equal(a$as_vector(), c(time, time))
 })
@@ -177,10 +177,10 @@ test_that("integer types casts for ChunkedArray (ARROW-3741)", {
   expect_is(a_int16, "arrow::ChunkedArray")
   expect_is(a_int32, "arrow::ChunkedArray")
   expect_is(a_int64, "arrow::ChunkedArray")
-  expect_equal(a_int8$type(), int8())
-  expect_equal(a_int16$type(), int16())
-  expect_equal(a_int32$type(), int32())
-  expect_equal(a_int64$type(), int64())
+  expect_equal(a_int8$type, int8())
+  expect_equal(a_int16$type, int16())
+  expect_equal(a_int32$type, int32())
+  expect_equal(a_int64$type, int64())
 
   a_uint8 <- a$cast(uint8())
   a_uint16 <- a$cast(uint16())
@@ -192,8 +192,8 @@ test_that("integer types casts for ChunkedArray (ARROW-3741)", {
   expect_is(a_uint32, "arrow::ChunkedArray")
   expect_is(a_uint64, "arrow::ChunkedArray")
 
-  expect_equal(a_uint8$type(), uint8())
-  expect_equal(a_uint16$type(), uint16())
-  expect_equal(a_uint32$type(), uint32())
-  expect_equal(a_uint64$type(), uint64())
+  expect_equal(a_uint8$type, uint8())
+  expect_equal(a_uint16$type, uint16())
+  expect_equal(a_uint32$type, uint32())
+  expect_equal(a_uint64$type, uint64())
 })
diff --git a/r/tests/testthat/test-compressed.R b/r/tests/testthat/test-compressed.R
new file mode 100644
index 0000000000000..5ed0df8768fea
--- /dev/null
+++ b/r/tests/testthat/test-compressed.R
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+context("arrow::io::Compressed.*Stream")
+
+test_that("can write Buffer to CompressedOutputStream and read back in CompressedInputStream", {
+  buf <- buffer(as.raw(sample(0:255, size = 1024, replace = TRUE)))
+
+  tf1 <- local_tempfile()
+  stream1 <- CompressedOutputStream(tf1)
+  stream1$write(buf)
+  expect_error(stream1$tell())
+  stream1$close()
+
+  tf2 <- local_tempfile()
+  sink2 <- FileOutputStream(tf2)
+  stream2 <- CompressedOutputStream(sink2)
+  stream2$write(buf)
+  expect_error(stream2$tell())
+  stream2$close()
+  sink2$close()
+
+
+  input1 <- CompressedInputStream(tf1)
+  buf1 <- input1$Read(1024L)
+
+  file2 <- ReadableFile(tf2)
+  input2 <- CompressedInputStream(file2)
+  buf2 <- input2$Read(1024L)
+
+  expect_equal(buf, buf1)
+  expect_equal(buf, buf2)
+})
+
diff --git a/r/tests/testthat/test-cputhreadpoolcapacity.R b/r/tests/testthat/test-cputhreadpoolcapacity.R
new file mode 100644
index 0000000000000..de23f151a3524
--- /dev/null
+++ b/r/tests/testthat/test-cputhreadpoolcapacity.R
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+context("CpuThreadPoolCapacity")
+
+test_that("can set/get cpu thread pool capacity", {
+  old <- GetCpuThreadPoolCapacity()
+  SetCpuThreadPoolCapacity(19L)
+  expect_equal(GetCpuThreadPoolCapacity(), 19L)
+  SetCpuThreadPoolCapacity(old)
+  expect_equal(GetCpuThreadPoolCapacity(), old)
+})
diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R
index f6d9bee581d66..23fdc58fd781e 100644
--- a/r/tests/testthat/test-feather.R
+++ b/r/tests/testthat/test-feather.R
@@ -29,30 +29,71 @@ test_that("feather read/write round trip", {
   expect_true(fs::file_exists(tf2))
 
   tf3 <- local_tempfile()
-  stream <- close_on_exit(file_output_stream(tf3))
+  stream <- close_on_exit(FileOutputStream(tf3))
   write_feather(tib, stream)
   expect_true(fs::file_exists(tf3))
 
   tab1 <- read_feather(tf1)
-  expect_is(tab1, "arrow::Table")
+  expect_is(tab1, "data.frame")
 
   tab2 <- read_feather(tf2)
-  expect_is(tab2, "arrow::Table")
+  expect_is(tab2, "data.frame")
 
   tab3 <- read_feather(tf3)
-  expect_is(tab3, "arrow::Table")
+  expect_is(tab3, "data.frame")
 
   # reading directly from arrow::io::MemoryMappedFile
   tab4 <- read_feather(mmap_open(tf3))
-  expect_is(tab4, "arrow::Table")
+  expect_is(tab4, "data.frame")
 
   # reading directly from arrow::io::ReadableFile
-  tab5 <- read_feather(file_open(tf3))
-  expect_is(tab5, "arrow::Table")
+  tab5 <- read_feather(ReadableFile(tf3))
+  expect_is(tab5, "data.frame")
+
+  expect_equal(tib, tab1)
+  expect_equal(tib, tab2)
+  expect_equal(tib, tab3)
+  expect_equal(tib, tab4)
+  expect_equal(tib, tab5)
+})
+
+test_that("feather handles columns = <names>", {
+  tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])
+
+  tf1 <- local_tempfile()
+  write_feather(tib, tf1)
+  expect_true(fs::file_exists(tf1))
+
+  tab1 <- read_feather(tf1, columns = c("x", "y"))
+  expect_is(tab1, "data.frame")
+
+  expect_equal(tib[, c("x", "y")], as_tibble(tab1))
+})
+
+test_that("feather handles columns = <integer>", {
+  tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])
+
+  tf1 <- local_tempfile()
+  write_feather(tib, tf1)
+  expect_true(fs::file_exists(tf1))
+
+  tab1 <- read_feather(tf1, columns = 1:2)
+  expect_is(tab1, "data.frame")
+
+  expect_equal(tib[, c("x", "y")], as_tibble(tab1))
+})
+
+test_that("feather read/write round trip", {
+  tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])
+
+  tf1 <- local_tempfile()
+  write_feather(tib, tf1)
+  expect_true(fs::file_exists(tf1))
+
+  tab1 <- read_feather(tf1, as_tibble = FALSE)
+  expect_is(tab1, "arrow::Table")
 
   expect_equal(tib, as_tibble(tab1))
-  expect_equal(tib, as_tibble(tab2))
-  expect_equal(tib, as_tibble(tab3))
-  expect_equal(tib, as_tibble(tab4))
-  expect_equal(tib, as_tibble(tab5))
 })
+
+
diff --git a/r/tests/testthat/test-field.R b/r/tests/testthat/test-field.R
index 08bf4db36a51b..aaa2875510a15 100644
--- a/r/tests/testthat/test-field.R
+++ b/r/tests/testthat/test-field.R
@@ -19,8 +19,8 @@ context("arrow::Field")
 
 test_that("field() factory", {
   x <- field("x", int32())
-  expect_equal(x$type(), int32())
-  expect_equal(x$name(), "x")
+  expect_equal(x$type, int32())
+  expect_equal(x$name, "x")
   expect_true(x == x)
   expect_false(x == field("x", int64()))
 })
diff --git a/r/tests/testthat/test-message.R b/r/tests/testthat/test-message.R
index fd05b86056808..3fe5829f86919 100644
--- a/r/tests/testthat/test-message.R
+++ b/r/tests/testthat/test-message.R
@@ -19,16 +19,12 @@ context("arrow::ipc::Message")
 
 test_that("read_message can read from input stream", {
   batch <- record_batch(tibble::tibble(x = 1:10))
-  bytes <- write_record_batch(batch, raw())
-  stream <- buffer_reader(bytes)
+  bytes <- batch$serialize()
+  stream <- BufferReader(bytes)
 
   message <- read_message(stream)
-  expect_equal(message$type(), MessageType$SCHEMA)
-  expect_is(message$body, "arrow::Buffer")
-  expect_is(message$metadata, "arrow::Buffer")
-
-  message <- read_message(stream)
-  expect_equal(message$type(), MessageType$RECORD_BATCH)
+  expect_is(message, "arrow::ipc::Message")
+  expect_equal(message$type, MessageType$RECORD_BATCH)
   expect_is(message$body, "arrow::Buffer")
   expect_is(message$metadata, "arrow::Buffer")
 
diff --git a/r/tests/testthat/test-messagereader.R b/r/tests/testthat/test-messagereader.R
index 4527a2882f022..5ff8277625ddb 100644
--- a/r/tests/testthat/test-messagereader.R
+++ b/r/tests/testthat/test-messagereader.R
@@ -19,16 +19,13 @@ context("arrow::ipc::MessageReader")
 
 test_that("MessageReader can be created from raw vectors", {
   batch <- record_batch(tibble::tibble(x = 1:10))
-  bytes <- write_record_batch(batch, raw())
+  bytes <- batch$serialize()
 
-  reader <- message_reader(bytes)
-  message <- reader$ReadNextMessage()
-  expect_equal(message$type(), MessageType$SCHEMA)
-  expect_is(message$body, "arrow::Buffer")
-  expect_is(message$metadata, "arrow::Buffer")
+  reader <- MessageReader(bytes)
 
   message <- reader$ReadNextMessage()
-  expect_equal(message$type(), MessageType$RECORD_BATCH)
+  expect_is(message, "arrow::ipc::Message")
+  expect_equal(message$type, MessageType$RECORD_BATCH)
   expect_is(message$body, "arrow::Buffer")
   expect_is(message$metadata, "arrow::Buffer")
 
@@ -38,17 +35,17 @@ test_that("MessageReader can be created from raw vectors", {
 
 test_that("MessageReader can be created from input stream", {
   batch <- record_batch(tibble::tibble(x = 1:10))
-  bytes <- write_record_batch(batch, raw())
-  stream <- buffer_reader(bytes)
+  bytes <- batch$serialize()
 
-  reader <- message_reader(stream)
-  message <- reader$ReadNextMessage()
-  expect_equal(message$type(), MessageType$SCHEMA)
-  expect_is(message$body, "arrow::Buffer")
-  expect_is(message$metadata, "arrow::Buffer")
+  stream <- BufferReader(bytes)
+  expect_is(stream, "arrow::io::BufferReader")
+
+  reader <- MessageReader(stream)
+  expect_is(reader, "arrow::ipc::MessageReader")
 
   message <- reader$ReadNextMessage()
-  expect_equal(message$type(), MessageType$RECORD_BATCH)
+  expect_is(message, "arrow::ipc::Message")
+  expect_equal(message$type, MessageType$RECORD_BATCH)
   expect_is(message$body, "arrow::Buffer")
   expect_is(message$metadata, "arrow::Buffer")
 
diff --git a/r/tests/testthat/test-read-write.R b/r/tests/testthat/test-read-write.R
index 2af718ebe565e..ffc14eba72bdb 100644
--- a/r/tests/testthat/test-read-write.R
+++ b/r/tests/testthat/test-read-write.R
@@ -25,24 +25,24 @@ test_that("arrow::table round trip", {
   )
 
   tab <- arrow::table(tbl)
-  expect_equal(tab$num_columns(), 3L)
-  expect_equal(tab$num_rows(), 10L)
+  expect_equal(tab$num_columns, 3L)
+  expect_equal(tab$num_rows, 10L)
 
   # arrow::Column
   col_int <- tab$column(0)
   expect_equal(col_int$length(), 10L)
-  expect_equal(col_int$null_count(), 0L)
-  expect_equal(col_int$type(), int32())
+  expect_equal(col_int$null_count, 0L)
+  expect_equal(col_int$type, int32())
 
   # arrow::ChunkedArray
   chunked_array_int <- col_int$data()
   expect_equal(chunked_array_int$length(), 10L)
-  expect_equal(chunked_array_int$null_count(), 0L)
+  expect_equal(chunked_array_int$null_count, 0L)
   expect_equal(chunked_array_int$as_vector(), tbl$int)
 
   # arrow::Array
-  chunks_int <- chunked_array_int$chunks()
-  expect_equal(length(chunks_int), chunked_array_int$num_chunks())
+  chunks_int <- chunked_array_int$chunks
+  expect_equal(length(chunks_int), chunked_array_int$num_chunks)
   for( i in seq_along(chunks_int)){
     expect_equal(chunked_array_int$chunk(i-1L), chunks_int[[i]])
   }
@@ -50,18 +50,18 @@ test_that("arrow::table round trip", {
   # arrow::Column
   col_dbl <- tab$column(1)
   expect_equal(col_dbl$length(), 10L)
-  expect_equal(col_dbl$null_count(), 0L)
-  expect_equal(col_dbl$type(), float64())
+  expect_equal(col_dbl$null_count, 0L)
+  expect_equal(col_dbl$type, float64())
 
   # arrow::ChunkedArray
   chunked_array_dbl <- col_dbl$data()
   expect_equal(chunked_array_dbl$length(), 10L)
-  expect_equal(chunked_array_dbl$null_count(), 0L)
+  expect_equal(chunked_array_dbl$null_count, 0L)
   expect_equal(chunked_array_dbl$as_vector(), tbl$dbl)
 
   # arrow::Array
-  chunks_dbl <- chunked_array_dbl$chunks()
-  expect_equal(length(chunks_dbl), chunked_array_dbl$num_chunks())
+  chunks_dbl <- chunked_array_dbl$chunks
+  expect_equal(length(chunks_dbl), chunked_array_dbl$num_chunks)
   for( i in seq_along(chunks_dbl)){
     expect_equal(chunked_array_dbl$chunk(i-1L), chunks_dbl[[i]])
   }
@@ -69,18 +69,18 @@ test_that("arrow::table round trip", {
   # arrow::Colmumn
   col_raw <- tab$column(2)
   expect_equal(col_raw$length(), 10L)
-  expect_equal(col_raw$null_count(), 0L)
-  expect_equal(col_raw$type(), int8())
+  expect_equal(col_raw$null_count, 0L)
+  expect_equal(col_raw$type, int8())
 
   # arrow::ChunkedArray
   chunked_array_raw <- col_raw$data()
   expect_equal(chunked_array_raw$length(), 10L)
-  expect_equal(chunked_array_raw$null_count(), 0L)
+  expect_equal(chunked_array_raw$null_count, 0L)
   expect_equal(chunked_array_raw$as_vector(), tbl$raw)
 
   # arrow::Array
-  chunks_raw <- chunked_array_raw$chunks()
-  expect_equal(length(chunks_raw), chunked_array_raw$num_chunks())
+  chunks_raw <- chunked_array_raw$chunks
+  expect_equal(length(chunks_raw), chunked_array_raw$num_chunks)
   for( i in seq_along(chunks_raw)){
     expect_equal(chunked_array_raw$chunk(i-1L), chunks_raw[[i]])
   }
@@ -99,20 +99,20 @@ test_that("arrow::table round trip handles NA in integer and numeric", {
   )
 
   tab <- arrow::table(tbl)
-  expect_equal(tab$num_columns(), 3L)
-  expect_equal(tab$num_rows(), 10L)
+  expect_equal(tab$num_columns, 3L)
+  expect_equal(tab$num_rows, 10L)
 
   expect_equal(tab$column(0)$length(), 10L)
   expect_equal(tab$column(1)$length(), 10L)
   expect_equal(tab$column(2)$length(), 10L)
 
-  expect_equal(tab$column(0)$null_count(), 1L)
-  expect_equal(tab$column(1)$null_count(), 2L)
-  expect_equal(tab$column(2)$null_count(), 0L)
+  expect_equal(tab$column(0)$null_count, 1L)
+  expect_equal(tab$column(1)$null_count, 2L)
+  expect_equal(tab$column(2)$null_count, 0L)
 
-  expect_equal(tab$column(0)$type(), int32())
-  expect_equal(tab$column(1)$type(), float64())
-  expect_equal(tab$column(2)$type(), int8())
+  expect_equal(tab$column(0)$type, int32())
+  expect_equal(tab$column(1)$type, float64())
+  expect_equal(tab$column(2)$type, int8())
 
   tf <- local_tempfile()
   write_arrow(tbl, tf)
diff --git a/r/tests/testthat/test-read_record_batch.R b/r/tests/testthat/test-read_record_batch.R
new file mode 100644
index 0000000000000..8477b7a4c3ddf
--- /dev/null
+++ b/r/tests/testthat/test-read_record_batch.R
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+context("read_record_batch()")
+
+test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", {
+  tab <- table(tibble::tibble(
+    int = 1:10, dbl = as.numeric(1:10),
+    lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+    chr = letters[1:10]
+  ))
+  tf <- local_tempfile()
+
+  writer <- RecordBatchFileWriter(tf, tab$schema)
+  expect_is(writer, "arrow::ipc::RecordBatchFileWriter")
+  writer$write_table(tab)
+  writer$close()
+  tab2 <- read_table(tf)
+  expect_equal(tab, tab2)
+
+  stream <- FileOutputStream(tf)
+  writer <- RecordBatchFileWriter(stream, tab$schema)
+  expect_is(writer, "arrow::ipc::RecordBatchFileWriter")
+  writer$write_table(tab)
+  writer$close()
+  tab3 <- read_table(tf)
+  expect_equal(tab, tab3)
+})
+
+test_that("read_record_batch() handles (raw|Buffer|InputStream, Schema) (ARROW-3450, ARROW-3505)", {
+  tbl <- tibble::tibble(
+    int = 1:10, dbl = as.numeric(1:10),
+    lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+    chr = letters[1:10]
+  )
+  batch <- record_batch(tbl)
+  schema <- batch$schema
+
+  raw <- batch$serialize()
+  batch2 <- read_record_batch(raw, schema)
+  batch3 <- read_record_batch(buffer(raw), schema)
+  batch4 <- read_record_batch(close_on_exit(BufferReader(raw)), schema)
+
+  expect_equal(batch, batch2)
+  expect_equal(batch, batch3)
+  expect_equal(batch, batch4)
+})
+
+test_that("read_record_batch() can handle (Message, Schema) parameters (ARROW-3499)", {
+  batch <- record_batch(tibble::tibble(x = 1:10))
+  schema <- batch$schema
+
+  raw <- batch$serialize()
+  stream <- close_on_exit(BufferReader(raw))
+
+  message <- read_message(stream)
+  batch2 <- read_record_batch(message, schema)
+  expect_equal(batch, batch2)
+})
diff --git a/r/tests/testthat/test-recordbatchreader.R b/r/tests/testthat/test-recordbatchreader.R
new file mode 100644
index 0000000000000..d2b6a09c37b24
--- /dev/null
+++ b/r/tests/testthat/test-recordbatchreader.R
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+context("arrow::RecordBatch.*(Reader|Writer)")
+
+test_that("RecordBatchStreamReader / Writer", {
+  batch <- record_batch(tibble::tibble(
+    x = 1:10,
+    y = letters[1:10]
+  ))
+
+  sink <- BufferOutputStream()
+  writer <- RecordBatchStreamWriter(sink, batch$schema)
+  expect_is(writer, "arrow::ipc::RecordBatchStreamWriter")
+  writer$write_batch(batch)
+  writer$close()
+
+  buf <- sink$getvalue()
+  expect_is(buf, "arrow::Buffer")
+
+  reader <- RecordBatchStreamReader(buf)
+  expect_is(reader, "arrow::ipc::RecordBatchStreamReader")
+
+  batch1 <- reader$read_next_batch()
+  expect_is(batch1, "arrow::RecordBatch")
+  expect_equal(batch, batch1)
+
+  expect_null(reader$read_next_batch())
+})
+
+test_that("RecordBatchFileReader / Writer", {
+  batch <- record_batch(tibble::tibble(
+    x = 1:10,
+    y = letters[1:10]
+  ))
+
+  sink <- BufferOutputStream()
+  writer <- RecordBatchFileWriter(sink, batch$schema)
+  expect_is(writer, "arrow::ipc::RecordBatchFileWriter")
+  writer$write_batch(batch)
+  writer$close()
+
+  buf <- sink$getvalue()
+  expect_is(buf, "arrow::Buffer")
+
+  reader <- RecordBatchFileReader(buf)
+  expect_is(reader, "arrow::ipc::RecordBatchFileReader")
+
+  batch1 <- reader$get_batch(0L)
+  expect_is(batch1, "arrow::RecordBatch")
+  expect_equal(batch, batch1)
+
+  expect_equal(reader$num_record_batches, 1L)
+})
diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R
index d40fbfa36bc18..2f2d3ee84e731 100644
--- a/r/tests/testthat/test-schema.R
+++ b/r/tests/testthat/test-schema.R
@@ -17,18 +17,30 @@
 
 context("arrow::Schema")
 
-test_that("reading schema from raw vector", {
+test_that("reading schema from Buffer", {
+  # TODO: this uses the streaming format, i.e. from RecordBatchStreamWriter
+  #       maybe there is an easier way to serialize a schema
   batch <- record_batch(tibble::tibble(x = 1:10))
-  bytes <- write_record_batch(batch, raw())
-  schema <- read_schema(bytes)
-  expect_equal(schema, batch$schema())
-})
+  expect_is(batch, "arrow::RecordBatch")
 
-test_that("reading schema from streams", {
-  batch <- record_batch(tibble::tibble(x = 1:10))
-  bytes <- write_record_batch(batch, raw())
-  stream <- buffer_reader(bytes)
+  stream <- BufferOutputStream()
+  writer <- RecordBatchStreamWriter(stream, batch$schema)
+  expect_is(writer, "arrow::ipc::RecordBatchStreamWriter")
+  writer$close()
+
+  buffer <- stream$getvalue()
+  expect_is(buffer, "arrow::Buffer")
+
+  reader <- MessageReader(buffer)
+  expect_is(reader, "arrow::ipc::MessageReader")
+
+  message <- reader$ReadNextMessage()
+  expect_is(message, "arrow::ipc::Message")
+  expect_equal(message$type, MessageType$SCHEMA)
 
-  schema <- read_schema(stream)
-  expect_equal(schema, batch$schema())
+  stream <- BufferReader(buffer)
+  expect_is(stream, "arrow::io::BufferReader")
+  message <- read_message(stream)
+  expect_is(message, "arrow::ipc::Message")
+  expect_equal(message$type, MessageType$SCHEMA)
 })
diff --git a/ruby/README.md b/ruby/README.md
index aac714e537841..42486588cf9c6 100644
--- a/ruby/README.md
+++ b/ruby/README.md
@@ -23,4 +23,12 @@ There are the official Ruby bindings for Apache Arrow.
 
 [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow) is the base Apache Arrow bindings.
 
-[Red Arrow GPU](https://github.com/apache/arrow/tree/master/ruby/red-arrow-gpu) is the Apache Arrow bindings of GPU part.
+[Red Arrow CUDA](https://github.com/apache/arrow/tree/master/ruby/red-arrow-cuda) is the Apache Arrow bindings of CUDA part.
+
+[Red Gandiva](https://github.com/apache/arrow/tree/master/ruby/red-gandiva) is the Gandiva bindings.
+
+[Red Plasma](https://github.com/apache/arrow/tree/master/ruby/red-plasma) is the Plasma bindings.
+
+[Red Parquet](https://github.com/apache/arrow/tree/master/ruby/red-parquet) is the Parquet bindings.
+
+
diff --git a/python/manylinux1/.dockerignore b/ruby/red-arrow-cuda/.gitignore
similarity index 99%
rename from python/manylinux1/.dockerignore
rename to ruby/red-arrow-cuda/.gitignore
index be421b169fad4..779545d9026f1 100644
--- a/python/manylinux1/.dockerignore
+++ b/ruby/red-arrow-cuda/.gitignore
@@ -15,4 +15,4 @@
 # specific language governing permissions and limitations
 # under the License.
 
-dist/
+/pkg/
diff --git a/ruby/red-arrow-gpu/Gemfile b/ruby/red-arrow-cuda/Gemfile
similarity index 100%
rename from ruby/red-arrow-gpu/Gemfile
rename to ruby/red-arrow-cuda/Gemfile
diff --git a/ruby/red-arrow-gpu/LICENSE.txt b/ruby/red-arrow-cuda/LICENSE.txt
similarity index 100%
rename from ruby/red-arrow-gpu/LICENSE.txt
rename to ruby/red-arrow-cuda/LICENSE.txt
diff --git a/ruby/red-arrow-gpu/NOTICE.txt b/ruby/red-arrow-cuda/NOTICE.txt
similarity index 100%
rename from ruby/red-arrow-gpu/NOTICE.txt
rename to ruby/red-arrow-cuda/NOTICE.txt
diff --git a/ruby/red-arrow-cuda/README.md b/ruby/red-arrow-cuda/README.md
new file mode 100644
index 0000000000000..76fa51c9b136c
--- /dev/null
+++ b/ruby/red-arrow-cuda/README.md
@@ -0,0 +1,62 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Red Arrow CUDA - Apache Arrow CUDA Ruby
+
+Red Arrow CUDA is the Ruby bindings of Apache Arrow CUDA. Red Arrow CUDA is based on GObject Introspection.
+
+[Apache Arrow CUDA](https://arrow.apache.org/) is an in-memory columnar data store on GPU.
+
+[GObject Introspection](https://wiki.gnome.org/action/show/Projects/GObjectIntrospection) is a middleware for language bindings of C library. GObject Introspection can generate language bindings automatically at runtime.
+
+Red Arrow CUDA uses [Apache Arrow CUDA GLib](https://github.com/apache/arrow/tree/master/c_glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Arrow CUDA.
+
+Apache Arrow CUDA GLib is a C wrapper for [Apache Arrow CUDA C++](https://github.com/apache/arrow/tree/master/cpp). GObject Introspection can't use Apache Arrow CUDA C++ directly. Apache Arrow CUDA GLib is a bridge between Apache Arrow CUDA C++ and GObject Introspection.
+
+gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Arrow CUDA uses GObject Introspection via gobject-introspection gem.
+
+## Install
+
+Install Apache Arrow CUDA GLib before install Red Arrow CUDA. Use [packages.red-data-tools.org](https://github.com/red-data-tools/packages.red-data-tools.org) for installing Apache Arrow CUDA GLib.
+
+Note that the Apache Arrow CUDA GLib packages are "unofficial". "Official" packages will be released in the future.
+
+Install Red Arrow CUDA after you install Apache Arrow CUDA GLib:
+
+```text
+% gem install red-arrow-cuda
+```
+
+## Usage
+
+```ruby
+require "arrow-cuda"
+
+manager = ArrowCUDA::DeviceManager.new
+if manager.n_devices.zero?
+  raise "No GPU is found"
+end
+
+context = manager[0]
+buffer = ArrowCUDA::Buffer.new(context, 128)
+ArrowCUDA::BufferOutputStream.open(buffer) do |stream|
+  stream.write("Hello World")
+end
+puts buffer.copy_to_host(0, 11) # => "Hello World"
+```
diff --git a/ruby/red-arrow-gpu/Rakefile b/ruby/red-arrow-cuda/Rakefile
similarity index 100%
rename from ruby/red-arrow-gpu/Rakefile
rename to ruby/red-arrow-cuda/Rakefile
diff --git a/ruby/red-arrow-gpu/dependency-check/Rakefile b/ruby/red-arrow-cuda/dependency-check/Rakefile
similarity index 88%
rename from ruby/red-arrow-gpu/dependency-check/Rakefile
rename to ruby/red-arrow-cuda/dependency-check/Rakefile
index 0c2284811d95d..c057a1df2c1a3 100644
--- a/ruby/red-arrow-gpu/dependency-check/Rakefile
+++ b/ruby/red-arrow-cuda/dependency-check/Rakefile
@@ -33,9 +33,9 @@ end
 namespace :dependency do
   desc "Check dependency"
   task :check do
-    unless PKGConfig.check_version?("arrow-gpu-glib")
-      unless NativePackageInstaller.install(:debian => "libarrow-gpu-glib-dev",
-                                            :redhat => "arrow-gpu-glib-devel")
+    unless PKGConfig.check_version?("arrow-cuda-glib")
+      unless NativePackageInstaller.install(:debian => "libarrow-cuda-glib-dev",
+                                            :redhat => "arrow-cuda-glib-devel")
         exit(false)
       end
     end
diff --git a/ruby/red-arrow-gpu/lib/arrow-gpu.rb b/ruby/red-arrow-cuda/lib/arrow-cuda.rb
similarity index 92%
rename from ruby/red-arrow-gpu/lib/arrow-gpu.rb
rename to ruby/red-arrow-cuda/lib/arrow-cuda.rb
index 10fdcc3c6cbb3..1fc13d0a053b7 100644
--- a/ruby/red-arrow-gpu/lib/arrow-gpu.rb
+++ b/ruby/red-arrow-cuda/lib/arrow-cuda.rb
@@ -17,11 +17,11 @@
 
 require "arrow"
 
-require "arrow-gpu/version"
+require "arrow-cuda/version"
 
-require "arrow-gpu/loader"
+require "arrow-cuda/loader"
 
-module ArrowGPU
+module ArrowCUDA
   class Error < StandardError
   end
 
diff --git a/ruby/red-arrow-gpu/lib/arrow-gpu/cuda-device-manager.rb b/ruby/red-arrow-cuda/lib/arrow-cuda/device-manager.rb
similarity index 95%
rename from ruby/red-arrow-gpu/lib/arrow-gpu/cuda-device-manager.rb
rename to ruby/red-arrow-cuda/lib/arrow-cuda/device-manager.rb
index 163128b208022..bbef749721e6c 100644
--- a/ruby/red-arrow-gpu/lib/arrow-gpu/cuda-device-manager.rb
+++ b/ruby/red-arrow-cuda/lib/arrow-cuda/device-manager.rb
@@ -15,8 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
-module ArrowGPU
-  class CUDADeviceManager
+module ArrowCUDA
+  class DeviceManager
     # Experimental.
     #
     # Can we think device manager is a container of contexts?
diff --git a/ruby/red-arrow-gpu/lib/arrow-gpu/loader.rb b/ruby/red-arrow-cuda/lib/arrow-cuda/loader.rb
similarity index 91%
rename from ruby/red-arrow-gpu/lib/arrow-gpu/loader.rb
rename to ruby/red-arrow-cuda/lib/arrow-cuda/loader.rb
index b9dc57cc81207..6b2afc4040e0e 100644
--- a/ruby/red-arrow-gpu/lib/arrow-gpu/loader.rb
+++ b/ruby/red-arrow-cuda/lib/arrow-cuda/loader.rb
@@ -15,11 +15,11 @@
 # specific language governing permissions and limitations
 # under the License.
 
-module ArrowGPU
+module ArrowCUDA
   class Loader < GObjectIntrospection::Loader
     class << self
       def load
-        super("ArrowGPU", ArrowGPU)
+        super("ArrowCUDA", ArrowCUDA)
       end
     end
 
@@ -29,7 +29,7 @@ def post_load(repository, namespace)
     end
 
     def require_libraries
-      require "arrow-gpu/cuda-device-manager"
+      require "arrow-cuda/device-manager"
     end
   end
 end
diff --git a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
new file mode 100644
index 0000000000000..583ceb18b5c15
--- /dev/null
+++ b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module ArrowCUDA
+  VERSION = "0.13.0-SNAPSHOT"
+
+  module Version
+    numbers, TAG = VERSION.split("-")
+    MAJOR, MINOR, MICRO = numbers.split(".").collect(&:to_i)
+    STRING = VERSION
+  end
+end
diff --git a/ruby/red-arrow-gpu/red-arrow-gpu.gemspec b/ruby/red-arrow-cuda/red-arrow-cuda.gemspec
similarity index 75%
rename from ruby/red-arrow-gpu/red-arrow-gpu.gemspec
rename to ruby/red-arrow-cuda/red-arrow-cuda.gemspec
index 340d41e8f7680..7bb34c6c2f4a9 100644
--- a/ruby/red-arrow-gpu/red-arrow-gpu.gemspec
+++ b/ruby/red-arrow-cuda/red-arrow-cuda.gemspec
@@ -17,24 +17,24 @@
 # specific language governing permissions and limitations
 # under the License.
 
-require_relative "version"
+require_relative "lib/arrow-cuda/version"
 
 Gem::Specification.new do |spec|
-  spec.name = "red-arrow-gpu"
+  spec.name = "red-arrow-cuda"
   version_components = [
-    ArrowGPU::Version::MAJOR.to_s,
-    ArrowGPU::Version::MINOR.to_s,
-    ArrowGPU::Version::MICRO.to_s,
-    # "beta1",
+    ArrowCUDA::Version::MAJOR.to_s,
+    ArrowCUDA::Version::MINOR.to_s,
+    ArrowCUDA::Version::MICRO.to_s,
+    ArrowCUDA::Version::TAG,
   ]
-  spec.version = version_components.join(".")
+  spec.version = version_components.compact.join(".")
   spec.homepage = "https://arrow.apache.org/"
   spec.authors = ["Apache Arrow Developers"]
   spec.email = ["dev@arrow.apache.org"]
 
-  spec.summary = "Red Arrow GPU is the Ruby bindings of Apache Arrow GPU"
+  spec.summary = "Red Arrow CUDA is the Ruby bindings of Apache Arrow CUDA"
   spec.description =
-    "Apache Arrow GPU is a common in-memory columnar data store on GPU. " +
+    "Apache Arrow CUDA is a common in-memory columnar data store on CUDA. " +
     "It's useful to share and process large data."
   spec.license = "Apache-2.0"
   spec.files = ["README.md", "Rakefile", "Gemfile", "#{spec.name}.gemspec"]
@@ -43,7 +43,7 @@ Gem::Specification.new do |spec|
   spec.test_files += Dir.glob("test/**/*")
   spec.extensions = ["dependency-check/Rakefile"]
 
-  spec.add_runtime_dependency("red-arrow")
+  spec.add_runtime_dependency("red-arrow", "= #{spec.version}")
 
   spec.add_development_dependency("bundler")
   spec.add_development_dependency("rake")
diff --git a/ruby/red-arrow-gpu/test/helper.rb b/ruby/red-arrow-cuda/test/helper.rb
similarity index 89%
rename from ruby/red-arrow-gpu/test/helper.rb
rename to ruby/red-arrow-cuda/test/helper.rb
index 772636ab3cd75..045eb10eea5d0 100644
--- a/ruby/red-arrow-gpu/test/helper.rb
+++ b/ruby/red-arrow-cuda/test/helper.rb
@@ -15,9 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-require_relative "../../red-arrow/version"
-require_relative "../version"
-
-require "arrow-gpu"
+require "arrow-cuda"
 
 require "test-unit"
diff --git a/ruby/red-arrow-gpu/test/run-test.rb b/ruby/red-arrow-cuda/test/run-test.rb
similarity index 100%
rename from ruby/red-arrow-gpu/test/run-test.rb
rename to ruby/red-arrow-cuda/test/run-test.rb
diff --git a/ruby/red-arrow-gpu/test/test-cuda.rb b/ruby/red-arrow-cuda/test/test-cuda.rb
similarity index 87%
rename from ruby/red-arrow-gpu/test/test-cuda.rb
rename to ruby/red-arrow-cuda/test/test-cuda.rb
index 05fd6cc155398..a48b687d36e0d 100644
--- a/ruby/red-arrow-gpu/test/test-cuda.rb
+++ b/ruby/red-arrow-cuda/test/test-cuda.rb
@@ -17,7 +17,7 @@
 
 class TestCUDA < Test::Unit::TestCase
   def setup
-    @manager = ArrowGPU::CUDADeviceManager.new
+    @manager = ArrowCUDA::DeviceManager.new
     omit("At least one GPU is required") if @manager.n_devices.zero?
     @context = @manager[0]
   end
@@ -25,11 +25,11 @@ def setup
   sub_test_case("BufferOutputStream") do
     def setup
       super
-      @buffer = ArrowGPU::CUDABuffer.new(@context, 128)
+      @buffer = ArrowCUDA::Buffer.new(@context, 128)
     end
 
     def test_new
-      ArrowGPU::CUDABufferOutputStream.open(@buffer) do |stream|
+      ArrowCUDA::BufferOutputStream.open(@buffer) do |stream|
         stream.write("Hello World")
       end
       assert_equal("Hello World", @buffer.copy_to_host(0, 11).to_s)
diff --git a/ruby/red-arrow-gpu/README.md b/ruby/red-arrow-gpu/README.md
deleted file mode 100644
index ad76c13011f79..0000000000000
--- a/ruby/red-arrow-gpu/README.md
+++ /dev/null
@@ -1,62 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Red Arrow GPU - Apache Arrow GPU Ruby
-
-Red Arrow GPU is the Ruby bindings of Apache Arrow GPU. Red Arrow GPU is based on GObject Introspection.
-
-[Apache Arrow GPU](https://arrow.apache.org/) is an in-memory columnar data store on GPU.
-
-[GObject Introspection](https://wiki.gnome.org/action/show/Projects/GObjectIntrospection) is a middleware for language bindings of C library. GObject Introspection can generate language bindings automatically at runtime.
-
-Red Arrow GPU uses [Apache Arrow GPU GLib](https://github.com/apache/arrow/tree/master/c_glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Arrow GPU.
-
-Apache Arrow GPU GLib is a C wrapper for [Apache Arrow GPU C++](https://github.com/apache/arrow/tree/master/cpp). GObject Introspection can't use Apache Arrow GPU C++ directly. Apache Arrow GPU GLib is a bridge between Apache Arrow GPU C++ and GObject Introspection.
-
-gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Arrow GPU uses GObject Introspection via gobject-introspection gem.
-
-## Install
-
-Install Apache Arrow GPU GLib before install Red Arrow GPU. Use [packages.red-data-tools.org](https://github.com/red-data-tools/packages.red-data-tools.org) for installing Apache Arrow GPU GLib.
-
-Note that the Apache Arrow GPU GLib packages are "unofficial". "Official" packages will be released in the future.
-
-Install Red Arrow GPU after you install Apache Arrow GPU GLib:
-
-```text
-% gem install red-arrow-gpu
-```
-
-## Usage
-
-```ruby
-require "arrow-gpu"
-
-manager = ArrowGPU::CUDADeviceManager.new
-if manager.n_devices.zero?
-  raise "No GPU is found"
-end
-
-context = manager[0]
-buffer = ArrowGPU::CUDABuffer.new(context, 128)
-ArrowGPU::CUDABufferOutputStream.open(buffer) do |stream|
-  stream.write("Hello World")
-end
-puts buffer.copy_to_host(0, 11) # => "Hello World"
-```
diff --git a/ruby/red-arrow-gpu/version.rb b/ruby/red-arrow-gpu/version.rb
deleted file mode 100644
index fc0d37e6bae6b..0000000000000
--- a/ruby/red-arrow-gpu/version.rb
+++ /dev/null
@@ -1,71 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-require "pathname"
-
-version_rb_path = Pathname.new(__FILE__)
-base_dir = version_rb_path.dirname
-pom_xml_path = base_dir.join("..", "..", "java", "pom.xml")
-lib_version_rb_path = base_dir.join("lib", "arrow-gpu", "version.rb")
-
-need_update = false
-if not lib_version_rb_path.exist?
-  need_update = true
-elsif version_rb_path.mtime > lib_version_rb_path.mtime
-  need_update = true
-elsif pom_xml_path.exist? and pom_xml_path.mtime > lib_version_rb_path.mtime
-  need_update = true
-end
-
-if need_update
-  version = pom_xml_path.read.scan(/^  <version>(.+?)<\/version>/)[0][0]
-  major, minor, micro, tag = version.split(/[.-]/)
-  lib_version_rb_path.open("w") do |lib_version_rb|
-    lib_version_rb.puts(<<-RUBY)
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-module ArrowGPU
-  module Version
-    MAJOR = #{major}
-    MINOR = #{minor}
-    MICRO = #{micro}
-    TAG = #{tag ? tag.dump : nil}
-    STRING = #{version.dump}
-  end
-
-  VERSION = Version::STRING
-end
-    RUBY
-  end
-end
-
-require_relative "lib/arrow-gpu/version"
diff --git a/ruby/red-arrow/.gitignore b/ruby/red-arrow/.gitignore
index 9fcc9cdc16527..68e4b5c7b5de0 100644
--- a/ruby/red-arrow/.gitignore
+++ b/ruby/red-arrow/.gitignore
@@ -15,6 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-/lib/arrow/version.rb
-
+/.yardoc/
+/doc/reference/
 /pkg/
diff --git a/ruby/red-arrow/.yardopts b/ruby/red-arrow/.yardopts
new file mode 100644
index 0000000000000..67159b1dc2f3a
--- /dev/null
+++ b/ruby/red-arrow/.yardopts
@@ -0,0 +1,6 @@
+--output-dir doc/reference
+--markup markdown
+--no-private
+lib/**/*.rb
+-
+doc/text/*
diff --git a/ruby/red-arrow/README.md b/ruby/red-arrow/README.md
index a6798dd90551f..95ec396fae5b1 100644
--- a/ruby/red-arrow/README.md
+++ b/ruby/red-arrow/README.md
@@ -39,7 +39,7 @@ Note that the Apache Arrow GLib packages are "unofficial". "Official" packages w
 
 Install Red Arrow after you install Apache Arrow GLib:
 
-```text
+```console
 % gem install red-arrow
 ```
 
diff --git a/ruby/red-arrow/Rakefile b/ruby/red-arrow/Rakefile
index 96851afb9f9f7..a3ece36b732ac 100644
--- a/ruby/red-arrow/Rakefile
+++ b/ruby/red-arrow/Rakefile
@@ -19,6 +19,7 @@
 
 require "rubygems"
 require "bundler/gem_helper"
+require "yard"
 
 base_dir = File.join(__dir__)
 
@@ -37,3 +38,6 @@ task :test do
 end
 
 task default: :test
+
+YARD::Rake::YardocTask.new do |task|
+end
diff --git a/ruby/red-arrow/lib/arrow/array-builder.rb b/ruby/red-arrow/lib/arrow/array-builder.rb
index 8edb3c4bfbbd9..7cfc4329aed6e 100644
--- a/ruby/red-arrow/lib/arrow/array-builder.rb
+++ b/ruby/red-arrow/lib/arrow/array-builder.rb
@@ -65,6 +65,12 @@ def build(values)
     end
 
     def build(values)
+      append(*values)
+      finish
+    end
+
+    # @since 0.12.0
+    def append(*values)
       value_convertable = respond_to?(:convert_to_arrow_value, true)
       start_index = 0
       current_index = 0
@@ -111,8 +117,6 @@ def build(values)
           append_nulls(current_index - start_index)
         end
       end
-
-      finish
     end
 
     def append_nulls(n)
diff --git a/ruby/red-arrow/lib/arrow/array.rb b/ruby/red-arrow/lib/arrow/array.rb
index 7a0d053901d97..359e70e007bdd 100644
--- a/ruby/red-arrow/lib/arrow/array.rb
+++ b/ruby/red-arrow/lib/arrow/array.rb
@@ -20,11 +20,15 @@ class Array
     include Enumerable
 
     class << self
-      def new(values)
+      def new(*args)
         builder_class_name = "#{name}Builder"
         if const_defined?(builder_class_name)
           builder_class = const_get(builder_class_name)
-          builder_class.build(values)
+          if args.size == builder_class.method(:build).arity
+            builder_class.build(*args)
+          else
+            super
+          end
         else
           super
         end
diff --git a/ruby/red-arrow/lib/arrow/data-type.rb b/ruby/red-arrow/lib/arrow/data-type.rb
index dad74fb40dc83..03960e47debca 100644
--- a/ruby/red-arrow/lib/arrow/data-type.rb
+++ b/ruby/red-arrow/lib/arrow/data-type.rb
@@ -18,21 +18,117 @@
 module Arrow
   class DataType
     class << self
+      # Creates a new suitable {Arrow::DataType}.
+      #
+      # @overload resolve(data_type)
+      #
+      #   Returns the given data type itself. This is convenient to
+      #   use this method as {Arrow::DataType} converter.
+      #
+      #   @param data_type [Arrow::DataType] The data type.
+      #
+      #   @return [Arrow::DataType] The given data type itself.
+      #
+      # @overload resolve(name, *arguments)
+      #
+      #   Creates a suitable data type from type name. For example,
+      #   you can create {Arrow::BooleanDataType} from `:boolean`.
+      #
+      #   @param name [String, Symbol] The type name of the data type.
+      #
+      #   @param arguments [::Array] The additional information of the
+      #     data type.
+      #
+      #     For example, {Arrow::TimestampDataType} needs unit as
+      #     additional information.
+      #
+      #   @example Create a boolean data type
+      #     Arrow::DataType.resolve(:boolean)
+      #
+      #   @example Create a milliseconds unit timestamp data type
+      #     Arrow::DataType.resolve(:timestamp, :milli)
+      #
+      # @overload resolve(description)
+      #
+      #   Creates a suitable data type from data type description.
+      #
+      #   Data type description is a raw `Hash`. Data type description
+      #   must have `:type` value. `:type` is the type of the data type.
+      #
+      #   If the type needs additional information, you need to
+      #   specify it. See constructor document what information is
+      #   needed. For example, {Arrow::ListDataType#initialize} needs
+      #   `:field` value.
+      #
+      #   @param description [Hash] The description of the data type.
+      #
+      #   @option description [String, Symbol] :type The type name of
+      #     the data type.
+      #
+      #   @example Create a boolean data type
+      #     Arrow::DataType.resolve(type: :boolean)
+      #
+      #   @example Create a list data type
+      #     Arrow::DataType.resolve(type: :list,
+      #                             field: {name: "visible", type: :boolean})
       def resolve(data_type)
         case data_type
         when DataType
           data_type
         when String, Symbol
-          data_type_name = data_type.to_s.capitalize.gsub(/\AUint/, "UInt")
-          data_type_class_name = "#{data_type_name}DataType"
-          unless Arrow.const_defined?(data_type_class_name)
-            raise ArgumentError, "invalid data type: #{data_typeinspect}"
+          resolve_class(data_type).new
+        when ::Array
+          type, *arguments = data_type
+          resolve_class(type).new(*arguments)
+        when Hash
+          type = nil
+          description = {}
+          data_type.each do |key, value|
+            key = key.to_sym
+            case key
+            when :type
+              type = value
+            else
+              description[key] = value
+            end
+          end
+          if type.nil?
+            message =
+              "data type description must have :type value: #{data_type.inspect}"
+            raise ArgumentError, message
+          end
+          data_type_class = resolve_class(type)
+          if description.empty?
+            data_type_class.new
+          else
+            data_type_class.new(description)
           end
-          data_type_class = Arrow.const_get(data_type_class_name)
-          data_type_class.new
         else
-          raise ArgumentError, "invalid data type: #{data_type.inspect}"
+          message =
+            "data type must be " +
+            "Arrow::DataType, String, Symbol, [String, ...], [Symbol, ...] " +
+            "{type: String, ...} or {type: Symbol, ...}: #{data_type.inspect}"
+          raise ArgumentError, message
+        end
+      end
+
+      private
+      def resolve_class(data_type)
+        data_type_name = data_type.to_s.capitalize.gsub(/\AUint/, "UInt")
+        data_type_class_name = "#{data_type_name}DataType"
+        unless Arrow.const_defined?(data_type_class_name)
+          available_types = []
+          Arrow.constants.each do |name|
+            if name.to_s.end_with?("DataType")
+              available_types << name.to_s.gsub(/DataType\z/, "").downcase.to_sym
+            end
+          end
+          message =
+            "unknown type: #{data_type.inspect}: " +
+            "available types: #{available_types.inspect}"
+          raise ArgumentError, message
         end
+        Arrow.const_get(data_type_class_name)
       end
     end
   end
diff --git a/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb b/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb
new file mode 100644
index 0000000000000..9a849d487571e
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "bigdecimal"
+
+module Arrow
+  class Decimal128ArrayBuilder
+    class << self
+      def build(data_type, values)
+        builder = new(data_type)
+        builder.build(values)
+      end
+    end
+
+    alias_method :append_value_raw, :append_value
+    def append_value(value)
+      case value
+      when nil
+        return append_null
+      when String
+        value = Decimal128.new(value)
+      when Float
+        value = Decimal128.new(value.to_s)
+      when BigDecimal
+        value = Decimal128.new(value.to_s)
+      end
+      append_value_raw(value)
+    end
+
+    def append_values(values, is_valids=nil)
+      if is_valids
+        is_valids.each_with_index do |is_valid, i|
+          if is_valid
+            append_value(values[i])
+          else
+            append_null
+          end
+        end
+      else
+        values.each do |value|
+          if value.nil?
+            append_null
+          else
+            append_value(value)
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/decimal128-data-type.rb b/ruby/red-arrow/lib/arrow/decimal128-data-type.rb
new file mode 100644
index 0000000000000..c97944bf8db76
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/decimal128-data-type.rb
@@ -0,0 +1,69 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class Decimal128DataType
+    alias_method :initialize_raw, :initialize
+    private :initialize_raw
+
+    # Creates a new {Arrow::Decimal128DataType}.
+    #
+    # @overload initialize(precision, scale)
+    #
+    #   @param precision [Integer] The precision of the decimal data
+    #     type. It's the number of digits including the number of
+    #     digits after the decimal point.
+    #
+    #   @param scale [Integer] The scale of the decimal data
+    #     type. It's the number of digits after the decimal point.
+    #
+    #   @example Create a decimal data type for "XXXXXX.YY" decimal
+    #     Arrow::Decimal128DataType.new(8, 2)
+    #
+    # @overload initialize(description)
+    #
+    #   @param description [Hash] The description of the decimal data
+    #     type. It must have `:precision` and `:scale` values.
+    #
+    #   @option description [Integer] :precision The precision of the
+    #     decimal data type. It's the number of digits including the
+    #     number of digits after the decimal point.
+    #
+    #   @option description [Integer] :scale The scale of the decimal
+    #     data type. It's the number of digits after the decimal
+    #     point.
+    #
+    #   @example Create a decimal data type for "XXXXXX.YY" decimal
+    #     Arrow::Decimal128DataType.new(precision: 8,
+    #                                   scale: 2)
+    def initialize(*args)
+      n_args = args.size
+      case n_args
+      when 1
+        description = args[0]
+        precision = description[:precision]
+        scale = description[:scale]
+      when 2
+        precision, scale = args
+      else
+        message = "wrong number of arguments (given, #{n_args}, expected 1..2)"
+        raise ArgumentError, message
+      end
+      initialize_raw(precision, scale)
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/dense-union-data-type.rb b/ruby/red-arrow/lib/arrow/dense-union-data-type.rb
new file mode 100644
index 0000000000000..740b31331c964
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/dense-union-data-type.rb
@@ -0,0 +1,90 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class DenseUnionDataType
+    alias_method :initialize_raw, :initialize
+    private :initialize_raw
+
+    # Creates a new {Arrow::DenseUnionDataType}.
+    #
+    # @overload initialize(fields, type_codes)
+    #
+    #   @param fields [::Array<Arrow::Field, Hash>] The fields of the
+    #     dense union data type. You can mix {Arrow::Field} and field
+    #     description in the fields.
+    #
+    #     See {Arrow::Field.new} how to specify field description.
+    #
+    #   @param type_codes [::Array<Integer>] The IDs that indicates
+    #     corresponding fields.
+    #
+    #   @example Create a dense union data type for {2: visible, 9: count}
+    #     fields = [
+    #       Arrow::Field.new("visible", :boolean),
+    #       {
+    #         name: "count",
+    #         type: :int32,
+    #       },
+    #     ]
+    #     Arrow::DenseUnionDataType.new(fields, [2, 9])
+    #
+    # @overload initialize(description)
+    #
+    #   @param description [Hash] The description of the dense union
+    #     data type. It must have `:fields` and `:type_codes` values.
+    #
+    #   @option description [::Array<Arrow::Field, Hash>] :fields The
+    #     fields of the dense union data type. You can mix
+    #     {Arrow::Field} and field description in the fields.
+    #
+    #     See {Arrow::Field.new} how to specify field description.
+    #
+    #   @option description [::Array<Integer>] :type_codes The IDs
+    #     that indicates corresponding fields.
+    #
+    #   @example Create a dense union data type for {2: visible, 9: count}
+    #     fields = [
+    #       Arrow::Field.new("visible", :boolean),
+    #       {
+    #         name: "count",
+    #         type: :int32,
+    #       },
+    #     ]
+    #     Arrow::DenseUnionDataType.new(fields: fields,
+    #                                   type_codes: [2, 9])
+    def initialize(*args)
+      n_args = args.size
+      case n_args
+      when 1
+        description = args[0]
+        fields = description[:fields]
+        type_codes = description[:type_codes]
+      when 2
+        fields, type_codes = args
+      else
+        message = "wrong number of arguments (given, #{n_args}, expected 1..2)"
+        raise ArgumentError, message
+      end
+      fields = fields.collect do |field|
+        field = Field.new(field) unless field.is_a?(Field)
+        field
+      end
+      initialize_raw(fields, type_codes)
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/dictionary-data-type.rb b/ruby/red-arrow/lib/arrow/dictionary-data-type.rb
new file mode 100644
index 0000000000000..e799fdfac799e
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/dictionary-data-type.rb
@@ -0,0 +1,106 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class DictionaryDataType
+    alias_method :initialize_raw, :initialize
+    private :initialize_raw
+
+    # Creates a new {Arrow::DictionaryDataType}.
+    #
+    # @overload initialize(index_data_type, dictionary, ordered)
+    #
+    #   @param index_data_type [Arrow::DataType, Hash, String, Symbol]
+    #     The index data type of the dictionary data type. It must be
+    #     signed integer data types. Here are available signed integer
+    #     data types:
+    #
+    #       * Arrow::Int8DataType
+    #       * Arrow::Int16DataType
+    #       * Arrow::Int32DataType
+    #       * Arrow::Int64DataType
+    #
+    #     You can specify data type as a description by `Hash`.
+    #
+    #     See {Arrow::DataType.resolve} how to specify data type
+    #     description.
+    #
+    #   @param dictionary [Arrow::Array] The real values of the
+    #     dictionary data type.
+    #
+    #   @param ordered [Boolean] Whether dictionary contents are
+    #     ordered or not.
+    #
+    #   @example Create a dictionary data type for {0: "Hello", 1: "World"}
+    #     index_data_type = :int8
+    #     dictionary = Arrow::StringArray.new(["Hello", "World"])
+    #     ordered = true
+    #     Arrow::DictionaryDataType.new(index_data_type,
+    #                                   dictionary,
+    #                                   ordered)
+    #
+    # @overload initialize(description)
+    #
+    #   @param description [Hash] The description of the dictionary
+    #     data type. It must have `:index_data_type`, `:dictionary`
+    #     and `:ordered` values.
+    #
+    #   @option description [Arrow::DataType, Hash, String, Symbol]
+    #     :index_data_type The index data type of the dictionary data
+    #     type. It must be signed integer data types. Here are
+    #     available signed integer data types:
+    #
+    #       * Arrow::Int8DataType
+    #       * Arrow::Int16DataType
+    #       * Arrow::Int32DataType
+    #       * Arrow::Int64DataType
+    #
+    #     You can specify data type as a description by `Hash`.
+    #
+    #     See {Arrow::DataType.resolve} how to specify data type
+    #     description.
+    #
+    #   @option description [Arrow::Array] :dictionary The real values
+    #     of the dictionary data type.
+    #
+    #   @option description [Boolean] :ordered Whether dictionary
+    #     contents are ordered or not.
+    #
+    #   @example Create a dictionary data type for {0: "Hello", 1: "World"}
+    #     dictionary = Arrow::StringArray.new(["Hello", "World"])
+    #     Arrow::DictionaryDataType.new(index_data_type: :int8,
+    #                                   dictionary: dictionary,
+    #                                   ordered: true)
+    def initialize(*args)
+      n_args = args.size
+      case n_args
+      when 1
+        description = args[0]
+        index_data_type = description[:index_data_type]
+        dictionary = description[:dictionary]
+        ordered = description[:ordered]
+      when 3
+        index_data_type, dictionary, ordered = args
+      else
+        message = "wrong number of arguments (given, #{n_args}, expected 1 or 3)"
+        raise ArgumentError, message
+      end
+      index_data_type = DataType.resolve(index_data_type)
+      initialize_raw(index_data_type, dictionary, ordered)
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/field.rb b/ruby/red-arrow/lib/arrow/field.rb
index b1ed1149deca9..599ff30975985 100644
--- a/ruby/red-arrow/lib/arrow/field.rb
+++ b/ruby/red-arrow/lib/arrow/field.rb
@@ -18,16 +18,100 @@
 module Arrow
   class Field
     alias_method :initialize_raw, :initialize
-    def initialize(name, data_type)
-      case data_type
-      when String, Symbol
-        data_type_name = data_type.to_s.capitalize.gsub(/\AUint/, "UInt")
-        data_type_class_name = "#{data_type_name}DataType"
-        if Arrow.const_defined?(data_type_class_name)
-          data_type_class = Arrow.const_get(data_type_class_name)
-          data_type = data_type_class.new
+    private :initialize_raw
+
+    # Creates a new {Arrow::Field}.
+    #
+    # @overload initialize(name, data_type)
+    #
+    #   @param name [String, Symbol] The name of the field.
+    #
+    #   @param data_type [Arrow::DataType, Hash, String, Symbol] The
+    #     data type of the field.
+    #
+    #     You can specify data type as a description by `Hash`.
+    #
+    #     See {Arrow::DataType.resolve} how to specify data type
+    #     description.
+    #
+    #   @example Create a field with {Arrow::DataType}s
+    #     Arrow::Field.new("visible", Arrow::BooleanDataType.new)
+    #
+    #   @example Create a field with data type description
+    #     Arrow::Field.new("visible", :boolean)
+    #
+    #   @example Create a field with name as `Symbol`
+    #     Arrow::Field.new(:visible, :boolean)
+    #
+    # @overload initialize(description)
+    #
+    #   @param description [Hash] The description of the field.
+    #
+    #     Field description is a raw `Hash`. Field description must
+    #     have `:name` and `:data_type` values. `:name` is the name of
+    #     the field. `:data_type` is the data type of the field. You
+    #     can use {Arrow::DataType} or data type description as
+    #     `:data_type` value.
+    #
+    #     See {Arrow::DataType.resolve} how to specify data type
+    #     description.
+    #
+    #     There is a shortcut for convenience. If field description
+    #     doesn't have `:data_type`, all keys except `:name` are
+    #     processes as data type description. For example, the
+    #     following field descrptions are the same:
+    #
+    #     ```ruby
+    #     {name: "visible", data_type: {type: :boolean}}
+    #     {name: "visible", type: :boolean} # Shortcut version
+    #     ```
+    #
+    #   @option description [String, Symbol] :name The name of the field.
+    #
+    #   @option description [Arrow::DataType, Hash] :data_type The
+    #     data type of the field. You can specify data type description
+    #     by `Hash`.
+    #
+    #     See {Arrow::DataType.resolve} how to specify data type
+    #     description.
+    #
+    #   @example Create a field with {Arrow::DataType}s
+    #     Arrow::Field.new(name: "visible",
+    #                      data_type: Arrow::BooleanDataType.new)
+    #
+    #   @example Create a field with data type description
+    #     Arrow::Field.new(name: "visible", data_type: {type: :boolean}
+    #
+    #   @example Create a field with shortcut form
+    #     Arrow::Field.new(name: "visible", type: :boolean)
+    def initialize(*args)
+      n_args = args.size
+      case n_args
+      when 1
+        description = args[0]
+        name = nil
+        data_type = nil
+        data_type_description = {}
+        description.each do |key, value|
+          key = key.to_sym
+          case key
+          when :name
+            name = value
+          when :data_type
+            data_type = DataType.resolve(value)
+          else
+            data_type_description[key] = value
+          end
         end
+        data_type ||= DataType.resolve(data_type_description)
+      when 2
+        name = args[0]
+        data_type = DataType.resolve(args[1])
+      else
+        message = "wrong number of arguments (given #{n_args}, expected 1..2)"
+        raise ArgumentError, message
       end
+
       initialize_raw(name, data_type)
     end
   end
diff --git a/ruby/red-arrow/lib/arrow/file-output-stream.rb b/ruby/red-arrow/lib/arrow/file-output-stream.rb
new file mode 100644
index 0000000000000..f39ad14cacf5b
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/file-output-stream.rb
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class FileOutputStream
+    alias_method :initialize_raw, :initialize
+    private :initialize_raw
+    def initialize(path, options={})
+      append = nil
+      case options
+      when true, false
+        append = options
+      when Hash
+        append = options[:append]
+      end
+      append = false if append.nil?
+      initialize_raw(path, append)
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/list-array-builder.rb b/ruby/red-arrow/lib/arrow/list-array-builder.rb
new file mode 100644
index 0000000000000..1fa507f69a72f
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/list-array-builder.rb
@@ -0,0 +1,96 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class ListArrayBuilder
+    class << self
+      def build(data_type, values)
+        builder = new(data_type)
+        builder.build(values)
+      end
+    end
+
+    alias_method :append_value_raw, :append_value
+
+    # @overload append_value
+    #
+    #   Starts appending a list record. You also need to append list
+    #   value by {#value_builder}.
+    #
+    # @overload append_value(list)
+    #
+    #   Appends a list record including list value.
+    #
+    #   @param value [nil, ::Array] The list value of the record.
+    #
+    #     If this is `nil`, the list record is null.
+    #
+    #     If this is `Array`, it's the list value of the record.
+    #
+    # @since 0.12.0
+    def append_value(*args)
+      n_args = args.size
+
+      case n_args
+      when 0
+        append_value_raw
+      when 1
+        value = args[0]
+        case value
+        when nil
+          append_null
+        when ::Array
+          append_value_raw
+          @value_builder ||= value_builder
+          @value_builder.append_values(value, nil)
+        else
+          message = "list value must be nil or Array: #{value.inspect}"
+          raise ArgumentError, message
+        end
+      else
+        message = "wrong number of arguments (given #{n_args}, expected 0..1)"
+        raise ArgumentError, message
+      end
+    end
+
+    def append_values(lists, is_valids=nil)
+      if is_valids
+        is_valids.each_with_index do |is_valid, i|
+          if is_valid
+            append_value(lists[i])
+          else
+            append_null
+          end
+        end
+      else
+        lists.each do |list|
+          append_value(list)
+        end
+      end
+    end
+
+    # @since 0.12.0
+    def append(*values)
+      if values.empty?
+        # For backward compatibility
+        append_value
+      else
+        super
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/list-data-type.rb b/ruby/red-arrow/lib/arrow/list-data-type.rb
new file mode 100644
index 0000000000000..c097da4e881e8
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/list-data-type.rb
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class ListDataType
+    alias_method :initialize_raw, :initialize
+    private :initialize_raw
+
+    # Creates a new {Arrow::ListDataType}.
+    #
+    # @overload initialize(field)
+    #
+    #   @param field [Arrow::Field, Hash] The field of the list data
+    #     type. You can also specify field description by `Hash`.
+    #
+    #     See {Arrow::Field.new} how to specify field description.
+    #
+    #   @example Create a list data type with {Arrow::Field}
+    #     visible_field = Arrow::Field.new("visible", :boolean)
+    #     Arrow::ListDataType.new(visible_field)
+    #
+    #   @example Create a list data type with field description
+    #     Arrow::ListDataType.new(name: "visible", type: :boolean)
+    #
+    # @overload initialize(description)
+    #
+    #   @param description [Hash] The description of the list data
+    #     type. It must have `:field` value.
+    #
+    #   @option description [Arrow::Field, Hash] :field The field of
+    #     the list data type. You can also specify field description
+    #     by `Hash`.
+    #
+    #     See {Arrow::Field.new} how to specify field description.
+    #
+    #   @example Create a list data type with {Arrow::Field}
+    #     visible_field = Arrow::Field.new("visible", :boolean)
+    #     Arrow::ListDataType.new(field: visible_field)
+    #
+    #   @example Create a list data type with field description
+    #     Arrow::ListDataType.new(field: {name: "visible", type: :boolean})
+    def initialize(field)
+      if field.is_a?(Hash) and field.key?(:field)
+        description = field
+        field = description[:field]
+      end
+      if field.is_a?(Hash)
+        field_description = field
+        field = Field.new(field_description)
+      end
+      initialize_raw(field)
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/loader.rb b/ruby/red-arrow/lib/arrow/loader.rb
index 736f25bd60438..6e0bf2929022f 100644
--- a/ruby/red-arrow/lib/arrow/loader.rb
+++ b/ruby/red-arrow/lib/arrow/loader.rb
@@ -43,16 +43,26 @@ def require_libraries
       require "arrow/date32-array-builder"
       require "arrow/date64-array"
       require "arrow/date64-array-builder"
+      require "arrow/decimal128-array-builder"
+      require "arrow/decimal128-data-type"
+      require "arrow/dense-union-data-type"
+      require "arrow/dictionary-data-type"
       require "arrow/field"
+      require "arrow/file-output-stream"
+      require "arrow/list-array-builder"
+      require "arrow/list-data-type"
       require "arrow/path-extension"
       require "arrow/record"
       require "arrow/record-batch"
+      require "arrow/record-batch-builder"
       require "arrow/record-batch-file-reader"
       require "arrow/record-batch-stream-reader"
       require "arrow/rolling-window"
       require "arrow/schema"
       require "arrow/slicer"
+      require "arrow/sparse-union-data-type"
       require "arrow/struct-array"
+      require "arrow/struct-array-builder"
       require "arrow/struct-data-type"
       require "arrow/table"
       require "arrow/table-formatter"
@@ -61,8 +71,11 @@ def require_libraries
       require "arrow/table-loader"
       require "arrow/table-saver"
       require "arrow/tensor"
+      require "arrow/time32-data-type"
+      require "arrow/time64-data-type"
       require "arrow/timestamp-array"
       require "arrow/timestamp-array-builder"
+      require "arrow/timestamp-data-type"
       require "arrow/writable"
     end
 
@@ -77,6 +90,13 @@ def load_object_info(info)
 
     def load_method_info(info, klass, method_name)
       case klass.name
+      when /Builder\z/
+        case method_name
+        when "append"
+          return
+        else
+          super
+        end
       when "Arrow::StringArray"
         case method_name
         when "get_value"
@@ -92,7 +112,7 @@ def load_method_info(info, klass, method_name)
         end
         super(info, klass, method_name)
       else
-       super
+        super
       end
     end
   end
diff --git a/ruby/red-arrow/lib/arrow/record-batch-builder.rb b/ruby/red-arrow/lib/arrow/record-batch-builder.rb
new file mode 100644
index 0000000000000..dc20312f203f6
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/record-batch-builder.rb
@@ -0,0 +1,114 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class RecordBatchBuilder
+    class << self
+      # @since 0.12.0
+      def build(schema, data)
+        builder = new(schema)
+        builder.append(data)
+        builder.flush
+      end
+    end
+
+    alias_method :initialize_raw, :initialize
+    private :initialize_raw
+    def initialize(schema)
+      unless schema.is_a?(Schema)
+        schema = Schema.new(schema)
+      end
+      initialize_raw(schema)
+      @name_to_index = {}
+      schema.fields.each_with_index do |field, i|
+        @name_to_index[field.name] = i
+      end
+    end
+
+    # @since 0.12.0
+    def [](name_or_index)
+      case name_or_index
+      when String, Symbol
+        name = name_or_index
+        self[resolve_name(name)]
+      else
+        index = name_or_index
+        column_builders[index]
+      end
+    end
+
+    # @since 0.12.0
+    def append(*values)
+      values.each do |value|
+        case value
+        when Hash
+          append_columns(value)
+        else
+          append_records(value)
+        end
+      end
+    end
+
+    # @since 0.12.0
+    def append_records(records)
+      n = n_columns
+      columns = n.times.collect do
+        []
+      end
+      records.each_with_index do |record, nth_record|
+        case record
+        when nil
+        when Hash
+          record.each do |name, value|
+            nth_column = resolve_name(name)
+            next if nth_column.nil?
+            columns[nth_column] << value
+          end
+        else
+          record.each_with_index do |value, nth_column|
+            columns[nth_column] << value
+          end
+        end
+        columns.each do |column|
+          column << nil if column.size != (nth_record + 1)
+        end
+      end
+      columns.each_with_index do |column, i|
+        self[i].append(*column)
+      end
+    end
+
+    # @since 0.12.0
+    def append_columns(columns)
+      columns.each do |name, values|
+        self[name].append(*values)
+      end
+    end
+
+    # @since 0.13.0
+    def column_builders
+      @column_builders ||= n_columns.times.collect do |i|
+        get_column_builder(i)
+      end
+    end
+
+    private
+    def resolve_name(name)
+      @name_to_index[name.to_s]
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/record-batch.rb b/ruby/red-arrow/lib/arrow/record-batch.rb
index f5f8ea2e77721..b577d4a41a6c6 100644
--- a/ruby/red-arrow/lib/arrow/record-batch.rb
+++ b/ruby/red-arrow/lib/arrow/record-batch.rb
@@ -22,6 +22,22 @@ class RecordBatch
     include RecordContainable
     include Enumerable
 
+    class << self
+      def new(*args)
+        n_args = args.size
+        case n_args
+        when 2
+          schema, data = args
+          RecordBatchBuilder.build(schema, data)
+        when 3
+          super
+        else
+          message = "wrong number of arguments (given #{n_args}, expected 2..3)"
+          raise ArgumentError, message
+        end
+      end
+    end
+
     alias_method :each, :each_record
 
     alias_method :columns_raw, :columns
@@ -29,6 +45,15 @@ def columns
       @columns ||= columns_raw
     end
 
+    # Converts the record batch to {Arrow::Table}.
+    #
+    # @return [Arrow::Table]
+    #
+    # @since 0.12.0
+    def to_table
+      Table.new(schema, [self])
+    end
+
     def respond_to_missing?(name, include_private)
       return true if find_column(name)
       super
diff --git a/ruby/red-arrow/lib/arrow/schema.rb b/ruby/red-arrow/lib/arrow/schema.rb
index 2e6bad29e6506..ecc3324b8a311 100644
--- a/ruby/red-arrow/lib/arrow/schema.rb
+++ b/ruby/red-arrow/lib/arrow/schema.rb
@@ -21,6 +21,77 @@ module Arrow
   class Schema
     include FieldContainable
 
+    alias_method :initialize_raw, :initialize
+    private :initialize_raw
+
+    # Creates a new {Arrow::Schema}.
+    #
+    # @overload initialize(fields)
+    #
+    #   @param fields [::Array<Arrow::Field, Hash>] The fields of the
+    #     schema. You can mix {Arrow::Field} and field description in
+    #     the fields.
+    #
+    #     See {Arrow::Field.new} how to specify field description.
+    #
+    #   @example Create a schema with {Arrow::Field}s
+    #     visible_field = Arrow::Field.new("visible", :boolean)
+    #     Arrow::Schema.new([visible_field])
+    #
+    #   @example Create a schema with field descriptions
+    #      visible_field_description = {
+    #        name: "visible",
+    #        data_type: :boolean,
+    #      }
+    #      Arrow::Schema.new([visible_field_description])
+    #
+    #   @example Create a schema with {Arrow::Field}s and field descriptions
+    #      fields = [
+    #        Arrow::Field.new("visible", :boolean),
+    #        {
+    #          name: "count",
+    #          type: :int32,
+    #        },
+    #      ]
+    #      Arrow::Schema.new(fields)
+    #
+    # @overload initialize(fields)
+    #
+    #   @param fields [Hash{String, Symbol => Arrow::DataType, Hash}]
+    #     The pairs of field name and field data type of the schema.
+    #     You can mix {Arrow::DataType} and data description for field
+    #     data type.
+    #
+    #     See {Arrow::DataType.new} how to specify data type description.
+    #
+    #   @example Create a schema with fields
+    #      fields = {
+    #        "visible" => Arrow::BooleanDataType.new,
+    #        :count => :int32,
+    #        :tags => {
+    #          type: :list,
+    #          field: {
+    #            name: "tag",
+    #            type: :string,
+    #          },
+    #        },
+    #      }
+    #      Arrow::Schema.new(fields)
+    def initialize(fields)
+      case fields
+      when ::Array
+        fields = fields.collect do |field|
+          field = Field.new(field) unless field.is_a?(Field)
+          field
+        end
+      when Hash
+        fields = fields.collect do |name, data_type|
+          Field.new(name, data_type)
+        end
+      end
+      initialize_raw(fields)
+    end
+
     alias_method :[], :find_field
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/sparse-union-data-type.rb b/ruby/red-arrow/lib/arrow/sparse-union-data-type.rb
new file mode 100644
index 0000000000000..fb0ddf0909165
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/sparse-union-data-type.rb
@@ -0,0 +1,90 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class SparseUnionDataType
+    alias_method :initialize_raw, :initialize
+    private :initialize_raw
+
+    # Creates a new {Arrow::SparseUnionDataType}.
+    #
+    # @overload initialize(fields, type_codes)
+    #
+    #   @param fields [::Array<Arrow::Field, Hash>] The fields of the
+    #     sparse union data type. You can mix {Arrow::Field} and field
+    #     description in the fields.
+    #
+    #     See {Arrow::Field.new} how to specify field description.
+    #
+    #   @param type_codes [::Array<Integer>] The IDs that indicates
+    #     corresponding fields.
+    #
+    #   @example Create a sparse union data type for {2: visible, 9: count}
+    #     fields = [
+    #       Arrow::Field.new("visible", :boolean),
+    #       {
+    #         name: "count",
+    #         type: :int32,
+    #       },
+    #     ]
+    #     Arrow::SparseUnionDataType.new(fields, [2, 9])
+    #
+    # @overload initialize(description)
+    #
+    #   @param description [Hash] The description of the sparse union
+    #     data type. It must have `:fields` and `:type_codes` values.
+    #
+    #   @option description [::Array<Arrow::Field, Hash>] :fields The
+    #     fields of the sparse union data type. You can mix
+    #     {Arrow::Field} and field description in the fields.
+    #
+    #     See {Arrow::Field.new} how to specify field description.
+    #
+    #   @option description [::Array<Integer>] :type_codes The IDs
+    #     that indicates corresponding fields.
+    #
+    #   @example Create a sparse union data type for {2: visible, 9: count}
+    #     fields = [
+    #       Arrow::Field.new("visible", :boolean),
+    #       {
+    #         name: "count",
+    #         type: :int32,
+    #       },
+    #     ]
+    #     Arrow::SparseUnionDataType.new(fields: fields,
+    #                                    type_codes: [2, 9])
+    def initialize(*args)
+      n_args = args.size
+      case n_args
+      when 1
+        description = args[0]
+        fields = description[:fields]
+        type_codes = description[:type_codes]
+      when 2
+        fields, type_codes = args
+      else
+        message = "wrong number of arguments (given, #{n_args}, expected 1..2)"
+        raise ArgumentError, message
+      end
+      fields = fields.collect do |field|
+        field = Field.new(field) unless field.is_a?(Field)
+        field
+      end
+      initialize_raw(fields, type_codes)
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/struct-array-builder.rb b/ruby/red-arrow/lib/arrow/struct-array-builder.rb
new file mode 100644
index 0000000000000..b56056cad4471
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/struct-array-builder.rb
@@ -0,0 +1,146 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class StructArrayBuilder
+    class << self
+      def build(data_type, values)
+        builder = new(data_type)
+        builder.build(values)
+      end
+    end
+
+    def [](index_or_name)
+      find_field_builder(index_or_name)
+    end
+
+    def find_field_builder(index_or_name)
+      case index_or_name
+      when String, Symbol
+        name = index_or_name
+        (@name_to_builder ||= build_name_to_builder)[name.to_s]
+      else
+        index = index_or_name
+        cached_field_builders[index]
+      end
+    end
+
+    alias_method :append_value_raw, :append_value
+
+    # @overload append_value
+    #
+    #   Starts appending a struct record. You need to append values of
+    #   fields.
+    #
+    # @overload append_value(value)
+    #
+    #   Appends a struct record including values of fields.
+    #
+    #   @param value [nil, ::Array, Hash] The struct record value.
+    #
+    #     If this is `nil`, the struct record is null.
+    #
+    #     If this is `Array` or `Hash`, they are values of fields.
+    #
+    # @since 0.12.0
+    def append_value(*args)
+      n_args = args.size
+
+      case n_args
+      when 0
+        append_value_raw
+      when 1
+        value = args[0]
+        case value
+        when nil
+          append_null
+        when ::Array
+          append_value_raw
+          value.each_with_index do |sub_value, i|
+            self[i].append_value(sub_value)
+          end
+        when Arrow::Struct
+          append_value_raw
+          value.values.each_with_index do |sub_value, i|
+            self[i].append_value(sub_value)
+          end
+        when Hash
+          append_value_raw
+          value.each do |name, sub_value|
+            self[name].append_value(sub_value)
+          end
+        else
+          message =
+            "struct value must be nil, Array, " +
+            "Arrow::Struct or Hash: #{value.inspect}"
+          raise ArgumentError, message
+        end
+      else
+        message = "wrong number of arguments (given #{n_args}, expected 0..1)"
+        raise ArgumentError, message
+      end
+    end
+
+    def append_values(values, is_valids=nil)
+      if is_valids
+        is_valids.each_with_index do |is_valid, i|
+          if is_valid
+            append_value(values[i])
+          else
+            append_null
+          end
+        end
+      else
+        values.each do |value|
+          append_value(value)
+        end
+      end
+    end
+
+    alias_method :append_null_raw, :append_null
+    def append_null
+      append_null_raw
+      cached_field_builders.each do |builder|
+        builder.append_null
+      end
+    end
+
+    # @since 0.12.0
+    def append(*values)
+      if values.empty?
+        # For backward compatibility
+        append_value_raw
+      else
+        super
+      end
+    end
+
+    private
+    def cached_field_builders
+      @field_builders ||= field_builders
+    end
+
+    def build_name_to_builder
+      name_to_builder = {}
+      builders = cached_field_builders
+      value_data_type.fields.each_with_index do |field, i|
+        name_to_builder[field.name] = builders[i]
+      end
+      name_to_builder
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/struct-array.rb b/ruby/red-arrow/lib/arrow/struct-array.rb
index 4f9834c5d330f..e55a507868f1a 100644
--- a/ruby/red-arrow/lib/arrow/struct-array.rb
+++ b/ruby/red-arrow/lib/arrow/struct-array.rb
@@ -15,10 +15,44 @@
 # specific language governing permissions and limitations
 # under the License.
 
+require "arrow/struct"
+
 module Arrow
   class StructArray
     def [](i)
+      warn("Use #{self.class}\#find_field instead. " +
+           "This will returns Arrow::Struct instead of Arrow::Array " +
+           "since 0.13.0.")
       get_field(i)
     end
+
+    def get_value(i)
+      Struct.new(self, i)
+    end
+
+    def find_field(index_or_name)
+      case index_or_name
+      when String, Symbol
+        name = index_or_name
+        (@name_to_field ||= build_name_to_field)[name.to_s]
+      else
+        index = index_or_name
+        cached_fields[index]
+      end
+    end
+
+    private
+    def cached_fields
+      @fields ||= fields
+    end
+
+    def build_name_to_field
+      name_to_field = {}
+      field_arrays = cached_fields
+      value_data_type.fields.each_with_index do |field, i|
+        name_to_field[field.name] = field_arrays[i]
+      end
+      name_to_field
+    end
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/struct-data-type.rb b/ruby/red-arrow/lib/arrow/struct-data-type.rb
index 7a59f1f620b81..ad810115d62ad 100644
--- a/ruby/red-arrow/lib/arrow/struct-data-type.rb
+++ b/ruby/red-arrow/lib/arrow/struct-data-type.rb
@@ -21,6 +21,110 @@ module Arrow
   class StructDataType
     include FieldContainable
 
+    alias_method :initialize_raw, :initialize
+    private :initialize_raw
+
+    # Creates a new {Arrow::StructDataType}.
+    #
+    # @overload initialize(fields)
+    #
+    #   @param fields [::Array<Arrow::Field, Hash>] The fields of the
+    #     struct data type. You can also specify field description as
+    #     a field. You can mix {Arrow::Field} and field description.
+    #
+    #     See {Arrow::Field.new} how to specify field description.
+    #
+    #   @example Create a struct data type with {Arrow::Field}s
+    #     visible_field = Arrow::Field.new("visible", :boolean)
+    #     count_field = Arrow::Field.new("count", :int32)
+    #     Arrow::StructDataType.new([visible_field, count_field])
+    #
+    #   @example Create a struct data type with field descriptions
+    #     field_descriptions = [
+    #       {name: "visible", type: :boolean},
+    #       {name: "count", type: :int32},
+    #     ]
+    #     Arrow::StructDataType.new(field_descriptions)
+    #
+    #   @example Create a struct data type with {Arrow::Field} and field description
+    #     fields = [
+    #       Arrow::Field.new("visible", :boolean),
+    #       {name: "count", type: :int32},
+    #     ]
+    #     Arrow::StructDataType.new(fields)
+    #
+    # @overload initialize(fields)
+    #
+    #   @param fields [Hash{String, Symbol => Arrow::DataType, Hash}]
+    #     The pairs of field name and field data type of the struct
+    #     data type. You can also specify data type description by
+    #     `Hash`. You can mix {Arrow::DataType} and data type description.
+    #
+    #     See {Arrow::DataType.resolve} how to specify data type
+    #     description.
+    #
+    #   @example Create a struct data type with {Arrow::DataType}s
+    #     fields = {
+    #       "visible" => Arrow::BooleanDataType.new,
+    #       "count" => Arrow::Int32DataType.new,
+    #     }
+    #     Arrow::StructDataType.new(fields)
+    #
+    #   @example Create a struct data type with data type descriptions
+    #     fields = {
+    #       "visible" => :boolean,
+    #       "count" => {type: :int32},
+    #     }
+    #     Arrow::StructDataType.new(fields)
+    #
+    #   @example Create a struct data type with {Arrow::DataType} and data type description
+    #     fields = {
+    #       "visible" => Arrow::BooleanDataType.new,
+    #       "count" => {type: :int32},
+    #     }
+    #     Arrow::StructDataType.new(fields)
+    #
+    # @overload initialize(description)
+    #
+    #   @param description [Hash] The description of the struct data
+    #     type. It must have `:fields` value.
+    #
+    #   @option description
+    #     [::Array<Arrow::Field, Hash>,
+    #      Hash{String, Symbol => Arrow::DataType, Hash, String, Symbol}]
+    #     :fields The fields of the struct data type.
+    #
+    #   @example Create a struct data type with {Arrow::Field} and field description
+    #     fields = [
+    #       Arrow::Field.new("visible", :boolean),
+    #       {name: "count", type: :int32},
+    #     ]
+    #     Arrow::StructDataType.new(fields: fields)
+    #
+    #   @example Create a struct data type with {Arrow::DataType} and data type description
+    #     fields = {
+    #       "visible" => Arrow::BooleanDataType.new,
+    #       "count" => {type: :int32},
+    #     }
+    #     Arrow::StructDataType.new(fields: fields)
+    def initialize(fields)
+      if fields.is_a?(Hash) and fields.key?(:fields)
+        description = fields
+        fields = description[:fields]
+      end
+      if fields.is_a?(Hash)
+        fields = fields.collect do |name, data_type|
+          Field.new(name, data_type)
+        end
+      else
+        fields = fields.collect do |field|
+          field = Field.new(field) unless field.is_a?(Field)
+          field
+        end
+      end
+      initialize_raw(fields)
+    end
+
     alias_method :[], :find_field
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/struct.rb b/ruby/red-arrow/lib/arrow/struct.rb
new file mode 100644
index 0000000000000..4ae12b871e49e
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/struct.rb
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class Struct
+    attr_accessor :index
+    def initialize(array, index)
+      @array = array
+      @index = index
+    end
+
+    def [](field_name_or_field_index)
+      field = @array.find_field(field_name_or_field_index)
+      return nil if field.nil?
+      field[@index]
+    end
+
+    def fields
+      @array.value_data_type.fields
+    end
+
+    def values
+      @array.fields.collect do |field|
+        field[@index]
+      end
+    end
+
+    def to_a
+      values
+    end
+
+    def to_h
+      attributes = {}
+      field_arrays = @array.fields
+      fields.each_with_index do |field, i|
+        attributes[field.name] = field_arrays[i][@index]
+      end
+      attributes
+    end
+
+    def respond_to_missing?(name, include_private)
+      return true if @array.find_field(name)
+      super
+    end
+
+    def method_missing(name, *args, &block)
+      if args.empty?
+        field = @array.find_field(name)
+        return field[@index] if field
+      end
+      super
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/table-loader.rb b/ruby/red-arrow/lib/arrow/table-loader.rb
index a6ce9a1029bb3..9bfd41042768a 100644
--- a/ruby/red-arrow/lib/arrow/table-loader.rb
+++ b/ruby/red-arrow/lib/arrow/table-loader.rb
@@ -18,14 +18,14 @@
 module Arrow
   class TableLoader
     class << self
-      def load(path, options={})
-        new(path, options).load
+      def load(output, options={})
+        new(output, options).load
       end
     end
 
-    def initialize(path, options={})
-      path = path.to_path if path.respond_to?(:to_path)
-      @path = path
+    def initialize(output, options={})
+      output = output.to_path if output.respond_to?(:to_path)
+      @output = output
       @options = options
       fill_options
     end
@@ -50,7 +50,7 @@ def load
         __send__(custom_load_method)
       else
         # For backward compatibility.
-        __send__(custom_load_method, @path)
+        __send__(custom_load_method, @output)
       end
     end
 
@@ -60,11 +60,15 @@ def fill_options
         return
       end
 
-      extension = PathExtension.new(@path)
-      info = extension.extract
+      if @output.is_a?(Buffer)
+        info = {}
+      else
+        extension = PathExtension.new(@output)
+        info = extension.extract
+      end
       format = info[:format]
       @options = @options.dup
-      if respond_to?("load_as_#{format}", true)
+      if format and respond_to?("load_as_#{format}", true)
         @options[:format] ||= format.to_sym
       else
         @options[:format] ||= :arrow
@@ -74,6 +78,14 @@ def fill_options
       end
     end
 
+    def open_input_stream
+      if @output.is_a?(Buffer)
+        BufferInputStream.new(@output)
+      else
+        MemoryMappedInputStream.new(@output)
+      end
+    end
+
     def load_raw(input, reader)
       schema = reader.schema
       chunked_arrays = []
@@ -100,7 +112,7 @@ def load_as_arrow
         RecordBatchStreamReader,
       ]
       reader_class_candidates.each do |reader_class_candidate|
-        input = MemoryMappedInputStream.new(@path)
+        input = open_input_stream
         begin
           reader = reader_class_candidate.new(input)
         rescue Arrow::Error
@@ -114,20 +126,20 @@ def load_as_arrow
     end
 
     def load_as_batch
-      input = MemoryMappedInputStream.new(@path)
+      input = open_input_stream
       reader = RecordBatchFileReader.new(input)
       load_raw(input, reader)
     end
 
     def load_as_stream
-      input = MemoryMappedInputStream.new(@path)
+      input = open_input_stream
       reader = RecordBatchStreamReader.new(input)
       load_raw(input, reader)
     end
 
     if Arrow.const_defined?(:ORCFileReader)
       def load_as_orc
-        input = MemoryMappedInputStream.new(@path)
+        input = open_input_stream
         reader = ORCFileReader.new(input)
         field_indexes = @options[:field_indexes]
         reader.set_field_indexes(field_indexes) if field_indexes
@@ -140,11 +152,15 @@ def load_as_orc
     def load_as_csv
       options = @options.dup
       options.delete(:format)
-      CSVLoader.load(Pathname.new(@path), options)
+      if @output.is_a?(Buffer)
+        CSVLoader.load(@output.data.to_s, options)
+      else
+        CSVLoader.load(Pathname.new(@output), options)
+      end
     end
 
     def load_as_feather
-      input = MemoryMappedInputStream.new(@path)
+      input = open_input_stream
       reader = FeatherFileReader.new(input)
       table = reader.read
       table.instance_variable_set(:@input, input)
diff --git a/ruby/red-arrow/lib/arrow/table-saver.rb b/ruby/red-arrow/lib/arrow/table-saver.rb
index 99e6e490532c1..817cc548717d8 100644
--- a/ruby/red-arrow/lib/arrow/table-saver.rb
+++ b/ruby/red-arrow/lib/arrow/table-saver.rb
@@ -18,15 +18,15 @@
 module Arrow
   class TableSaver
     class << self
-      def save(table, path, options={})
-        new(table, path, options).save
+      def save(table, output, options={})
+        new(table, output, options).save
       end
     end
 
-    def initialize(table, path, options={})
+    def initialize(table, output, options={})
       @table = table
-      path = path.to_path if path.respond_to?(:to_path)
-      @path = path
+      output = output.to_path if output.respond_to?(:to_path)
+      @output = output
       @options = options
       fill_options
     end
@@ -51,7 +51,7 @@ def save
         __send__(custom_save_method)
       else
         # For backward compatibility.
-        __send__(custom_save_method, @path)
+        __send__(custom_save_method, @output)
       end
     end
 
@@ -61,11 +61,15 @@ def fill_options
         return
       end
 
-      extension = PathExtension.new(@path)
-      info = extension.extract
+      if @output.is_a?(Buffer)
+        info = {}
+      else
+        extension = PathExtension.new(@output)
+        info = extension.extract
+      end
       format = info[:format]
       @options = @options.dup
-      if respond_to?("save_as_#{format}", true)
+      if format and respond_to?("save_as_#{format}", true)
         @options[:format] ||= format.to_sym
       else
         @options[:format] ||= :arrow
@@ -75,8 +79,30 @@ def fill_options
       end
     end
 
+    def open_raw_output_stream(&block)
+      if @output.is_a?(Buffer)
+        BufferOutputStream.open(@output, &block)
+      else
+        FileOutputStream.open(@output, false, &block)
+      end
+    end
+
+    def open_output_stream(&block)
+      compression = @options[:compression]
+      if compression
+        codec = Codec.new(compression)
+        open_raw_output_stream do |raw_output|
+          CompressedOutputStream.open(codec, raw_output) do |output|
+            yield(output)
+          end
+        end
+      else
+        open_raw_output_stream(&block)
+      end
+    end
+
     def save_raw(writer_class)
-      FileOutputStream.open(@path, false) do |output|
+      open_output_stream do |output|
         writer_class.open(output, @table.schema) do |writer|
           writer.write_table(@table)
         end
@@ -95,24 +121,8 @@ def save_as_stream
       save_raw(RecordBatchStreamWriter)
     end
 
-    def open_output
-      compression = @options[:compression]
-      if compression
-        codec = Codec.new(compression)
-        FileOutputStream.open(@path, false) do |raw_output|
-          CompressedOutputStream.open(codec, raw_output) do |output|
-            yield(output)
-          end
-        end
-      else
-        ::File.open(@path, "w") do |output|
-          yield(output)
-        end
-      end
-    end
-
     def save_as_csv
-      open_output do |output|
+      open_output_stream do |output|
         csv = CSV.new(output)
         names = @table.schema.fields.collect(&:name)
         csv << names
@@ -125,7 +135,7 @@ def save_as_csv
     end
 
     def save_as_feather
-      FileOutputStream.open(@path, false) do |output|
+      open_output_stream do |output|
         FeatherFileWriter.open(output) do |writer|
           writer.write(@table)
         end
diff --git a/ruby/red-arrow/lib/arrow/table.rb b/ruby/red-arrow/lib/arrow/table.rb
index 524517f03b9e6..69a1de31722a3 100644
--- a/ruby/red-arrow/lib/arrow/table.rb
+++ b/ruby/red-arrow/lib/arrow/table.rb
@@ -29,6 +29,7 @@ def load(path, options={})
     end
 
     alias_method :initialize_raw, :initialize
+    private :initialize_raw
     def initialize(schema_or_raw_table_or_columns, columns=nil)
       if columns.nil?
         if schema_or_raw_table_or_columns[0].is_a?(Column)
diff --git a/ruby/red-arrow/lib/arrow/time32-data-type.rb b/ruby/red-arrow/lib/arrow/time32-data-type.rb
new file mode 100644
index 0000000000000..9e8d955494338
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/time32-data-type.rb
@@ -0,0 +1,61 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class Time32DataType
+    alias_method :initialize_raw, :initialize
+    private :initialize_raw
+
+    # Creates a new {Arrow::Time32DataType}.
+    #
+    # @overload initialize(unit)
+    #
+    #   @param unit [Arrow::TimeUnit, Symbol] The unit of the
+    #     time32 data type.
+    #
+    #     The unit must be second or millisecond.
+    #
+    #   @example Create a time32 data type with {Arrow::TimeUnit}
+    #     Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI)
+    #
+    #   @example Create a time32 data type with Symbol
+    #     Arrow::Time32DataType.new(:milli)
+    #
+    # @overload initialize(description)
+    #
+    #   @param description [Hash] The description of the time32 data
+    #     type. It must have `:unit` value.
+    #
+    #   @option description [Arrow::TimeUnit, Symbol] :unit The unit of
+    #     the time32 data type.
+    #
+    #     The unit must be second or millisecond.
+    #
+    #   @example Create a time32 data type with {Arrow::TimeUnit}
+    #     Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI)
+    #
+    #   @example Create a time32 data type with Symbol
+    #     Arrow::Time32DataType.new(unit: :milli)
+    def initialize(unit)
+      if unit.is_a?(Hash)
+        description = unit
+        unit = description[:unit]
+      end
+      initialize_raw(unit)
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/time64-data-type.rb b/ruby/red-arrow/lib/arrow/time64-data-type.rb
new file mode 100644
index 0000000000000..ca31a561b43c4
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/time64-data-type.rb
@@ -0,0 +1,61 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class Time64DataType
+    alias_method :initialize_raw, :initialize
+    private :initialize_raw
+
+    # Creates a new {Arrow::Time64DataType}.
+    #
+    # @overload initialize(unit)
+    #
+    #   @param unit [Arrow::TimeUnit, Symbol] The unit of the
+    #     time64 data type.
+    #
+    #     The unit must be microsecond or nanosecond.
+    #
+    #   @example Create a time64 data type with {Arrow::TimeUnit}
+    #     Arrow::Time64DataType.new(Arrow::TimeUnit::NANO)
+    #
+    #   @example Create a time64 data type with Symbol
+    #     Arrow::Time64DataType.new(:nano)
+    #
+    # @overload initialize(description)
+    #
+    #   @param description [Hash] The description of the time64 data
+    #     type. It must have `:unit` value.
+    #
+    #   @option description [Arrow::TimeUnit, Symbol] :unit The unit of
+    #     the time64 data type.
+    #
+    #     The unit must be microsecond or nanosecond.
+    #
+    #   @example Create a time64 data type with {Arrow::TimeUnit}
+    #     Arrow::Time64DataType.new(unit: Arrow::TimeUnit::NANO)
+    #
+    #   @example Create a time64 data type with Symbol
+    #     Arrow::Time64DataType.new(unit: :nano)
+    def initialize(unit)
+      if unit.is_a?(Hash)
+        description = unit
+        unit = description[:unit]
+      end
+      initialize_raw(unit)
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/timestamp-data-type.rb b/ruby/red-arrow/lib/arrow/timestamp-data-type.rb
new file mode 100644
index 0000000000000..86ed3e00eadd1
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/timestamp-data-type.rb
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class TimestampDataType
+    alias_method :initialize_raw, :initialize
+    private :initialize_raw
+
+    # Creates a new {Arrow::TimestampDataType}.
+    #
+    # @overload initialize(unit)
+    #
+    #   @param unit [Arrow::TimeUnit, Symbol] The unit of the
+    #     timestamp data type.
+    #
+    #   @example Create a timestamp data type with {Arrow::TimeUnit}
+    #     Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI)
+    #
+    #   @example Create a timestamp data type with Symbol
+    #     Arrow::TimestampDataType.new(:milli)
+    #
+    # @overload initialize(description)
+    #
+    #   @param description [Hash] The description of the timestamp data
+    #     type. It must have `:unit` value.
+    #
+    #   @option description [Arrow::TimeUnit, Symbol] :unit The unit of
+    #     the timestamp data type.
+    #
+    #   @example Create a timestamp data type with {Arrow::TimeUnit}
+    #     Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI)
+    #
+    #   @example Create a timestamp data type with Symbol
+    #     Arrow::TimestampDataType.new(unit: :milli)
+    def initialize(unit)
+      if unit.is_a?(Hash)
+        description = unit
+        unit = description[:unit]
+      end
+      initialize_raw(unit)
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/version.rb b/ruby/red-arrow/lib/arrow/version.rb
new file mode 100644
index 0000000000000..7da3090851830
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/version.rb
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  VERSION = "0.13.0-SNAPSHOT"
+
+  module Version
+    numbers, TAG = VERSION.split("-")
+    MAJOR, MINOR, MICRO = numbers.split(".").collect(&:to_i)
+    STRING = VERSION
+  end
+end
diff --git a/ruby/red-arrow/red-arrow.gemspec b/ruby/red-arrow/red-arrow.gemspec
index cca87749ea19c..2d417f08b0087 100644
--- a/ruby/red-arrow/red-arrow.gemspec
+++ b/ruby/red-arrow/red-arrow.gemspec
@@ -17,7 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-require_relative "version"
+require_relative "lib/arrow/version"
 
 Gem::Specification.new do |spec|
   spec.name = "red-arrow"
@@ -25,9 +25,9 @@ Gem::Specification.new do |spec|
     Arrow::Version::MAJOR.to_s,
     Arrow::Version::MINOR.to_s,
     Arrow::Version::MICRO.to_s,
-    # "beta1",
+    Arrow::Version::TAG,
   ]
-  spec.version = version_components.join(".")
+  spec.version = version_components.compact.join(".")
   spec.homepage = "https://arrow.apache.org/"
   spec.authors = ["Apache Arrow Developers"]
   spec.email = ["dev@arrow.apache.org"]
@@ -45,11 +45,15 @@ Gem::Specification.new do |spec|
   spec.test_files += Dir.glob("test/**/*")
   spec.extensions = ["dependency-check/Rakefile"]
 
-  spec.add_runtime_dependency("gobject-introspection", ">= 3.1.1")
+  spec.add_runtime_dependency("gobject-introspection", ">= 3.3.1")
   spec.add_runtime_dependency("pkg-config")
   spec.add_runtime_dependency("native-package-installer")
 
   spec.add_development_dependency("bundler")
   spec.add_development_dependency("rake")
+  spec.add_development_dependency("redcarpet")
   spec.add_development_dependency("test-unit")
+  spec.add_development_dependency("yard")
+
+  spec.metadata["msys2_mingw_dependencies"] = "apache-arrow"
 end
diff --git a/ruby/red-arrow/test/helper.rb b/ruby/red-arrow/test/helper.rb
index 2aa868bfa7c01..12f12d3a192e9 100644
--- a/ruby/red-arrow/test/helper.rb
+++ b/ruby/red-arrow/test/helper.rb
@@ -15,8 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-require_relative "../version"
-
 require "arrow"
 
 require "pathname"
diff --git a/ruby/red-arrow/test/test-data-type.rb b/ruby/red-arrow/test/test-data-type.rb
new file mode 100644
index 0000000000000..c9dbfc6f11b6f
--- /dev/null
+++ b/ruby/red-arrow/test/test-data-type.rb
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class DataTypeTest < Test::Unit::TestCase
+  sub_test_case(".resolve") do
+    test("DataType") do
+      assert_equal(Arrow::BooleanDataType.new,
+                   Arrow::DataType.resolve(Arrow::BooleanDataType.new))
+    end
+
+    test("String") do
+      assert_equal(Arrow::BooleanDataType.new,
+                   Arrow::DataType.resolve("boolean"))
+    end
+
+    test("Symbol") do
+      assert_equal(Arrow::BooleanDataType.new,
+                   Arrow::DataType.resolve(:boolean))
+    end
+
+    test("Array") do
+      field = Arrow::Field.new(:visible, :boolean)
+      assert_equal(Arrow::ListDataType.new(field),
+                   Arrow::DataType.resolve([:list, field]))
+    end
+
+    test("Hash") do
+      field = Arrow::Field.new(:visible, :boolean)
+      assert_equal(Arrow::ListDataType.new(field),
+                   Arrow::DataType.resolve(type: :list, field: field))
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-decimal128-array-builder.rb b/ruby/red-arrow/test/test-decimal128-array-builder.rb
new file mode 100644
index 0000000000000..841846490b792
--- /dev/null
+++ b/ruby/red-arrow/test/test-decimal128-array-builder.rb
@@ -0,0 +1,95 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class Decimal128ArrayBuilderTest < Test::Unit::TestCase
+  def setup
+    @data_type = Arrow::Decimal128DataType.new(8, 2)
+    @builder = Arrow::Decimal128ArrayBuilder.new(@data_type)
+  end
+
+  sub_test_case("#append_value") do
+    test("nil") do
+      @builder.append_value(nil)
+      array = @builder.finish
+      assert_equal(nil, array[0])
+    end
+
+    test("Arrow::Decimal128") do
+      @builder.append_value(Arrow::Decimal128.new("10.1"))
+      array = @builder.finish
+      assert_equal(Arrow::Decimal128.new("10.1"),
+                   array[0])
+    end
+
+    test("String") do
+      @builder.append_value("10.1")
+      array = @builder.finish
+      assert_equal(Arrow::Decimal128.new("10.1"),
+                   array[0])
+    end
+
+    test("Float") do
+      @builder.append_value(10.1)
+      array = @builder.finish
+      assert_equal(Arrow::Decimal128.new("10.1"),
+                   array[0])
+    end
+
+    test("BigDecimal") do
+      @builder.append_value(BigDecimal("10.1"))
+      array = @builder.finish
+      assert_equal(Arrow::Decimal128.new("10.1"),
+                   array[0])
+    end
+  end
+
+  sub_test_case("#append_values") do
+    test("mixed") do
+      @builder.append_values([
+                               Arrow::Decimal128.new("10.1"),
+                               nil,
+                               "10.1",
+                               10.1,
+                               BigDecimal("10.1"),
+                             ])
+      array = @builder.finish
+      assert_equal([
+                     Arrow::Decimal128.new("10.1"),
+                     nil,
+                     Arrow::Decimal128.new("10.1"),
+                     Arrow::Decimal128.new("10.1"),
+                     Arrow::Decimal128.new("10.1"),
+                   ],
+                   array.to_a)
+    end
+
+    test("is_valids") do
+      @builder.append_values([
+                               Arrow::Decimal128.new("10.1"),
+                               nil,
+                               Arrow::Decimal128.new("10.1"),
+                             ])
+      array = @builder.finish
+      assert_equal([
+                     Arrow::Decimal128.new("10.1"),
+                     nil,
+                     Arrow::Decimal128.new("10.1"),
+                   ],
+                   array.to_a)
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-decimal128-array.rb b/ruby/red-arrow/test/test-decimal128-array.rb
new file mode 100644
index 0000000000000..9162be8b4cf13
--- /dev/null
+++ b/ruby/red-arrow/test/test-decimal128-array.rb
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class Decimal128ArrayTest < Test::Unit::TestCase
+  sub_test_case(".new") do
+    test("build") do
+      data_type = Arrow::Decimal128DataType.new(8, 2)
+      values = [
+        10.1,
+        nil,
+        "10.1",
+        BigDecimal("10.1"),
+      ]
+      array = Arrow::Decimal128Array.new(data_type, values)
+      assert_equal([
+                     Arrow::Decimal128.new("10.1"),
+                     nil,
+                     Arrow::Decimal128.new("10.1"),
+                     Arrow::Decimal128.new("10.1"),
+                   ],
+                   array.to_a)
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-decimal128-data-type.rb b/ruby/red-arrow/test/test-decimal128-data-type.rb
new file mode 100644
index 0000000000000..6cdd22fff8ea8
--- /dev/null
+++ b/ruby/red-arrow/test/test-decimal128-data-type.rb
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class Decimal128DataTypeTest < Test::Unit::TestCase
+  sub_test_case(".new") do
+    test("ordered arguments") do
+      assert_equal("decimal(8, 2)",
+                   Arrow::Decimal128DataType.new(8, 2).to_s)
+    end
+
+    test("description") do
+      assert_equal("decimal(8, 2)",
+                   Arrow::Decimal128DataType.new(precision: 8,
+                                                 scale: 2).to_s)
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-dense-union-data-type.rb b/ruby/red-arrow/test/test-dense-union-data-type.rb
new file mode 100644
index 0000000000000..96699e52e45d9
--- /dev/null
+++ b/ruby/red-arrow/test/test-dense-union-data-type.rb
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class DenseUnionDataTypeTest < Test::Unit::TestCase
+  sub_test_case(".new") do
+    def setup
+      @fields = [
+        Arrow::Field.new("visible", :boolean),
+        {
+          name: "count",
+          type: :int32,
+        },
+      ]
+    end
+
+    test("ordered arguments") do
+      assert_equal("union[dense]<visible: bool=2, count: int32=9>",
+                   Arrow::DenseUnionDataType.new(@fields, [2, 9]).to_s)
+    end
+
+    test("description") do
+      assert_equal("union[dense]<visible: bool=2, count: int32=9>",
+                   Arrow::DenseUnionDataType.new(fields: @fields,
+                                                  type_codes: [2, 9]).to_s)
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-dictionary-data-type.rb b/ruby/red-arrow/test/test-dictionary-data-type.rb
new file mode 100644
index 0000000000000..be9cd6f301035
--- /dev/null
+++ b/ruby/red-arrow/test/test-dictionary-data-type.rb
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class DictionaryDataTypeTest < Test::Unit::TestCase
+  sub_test_case(".new") do
+    def setup
+      @index_data_type = :int8
+      @dictionary = Arrow::StringArray.new(["Hello", "World"])
+      @ordered = true
+    end
+
+    test("ordered arguments") do
+      assert_equal("dictionary<values=string, indices=int8, ordered=1>",
+                   Arrow::DictionaryDataType.new(@index_data_type,
+                                                 @dictionary,
+                                                 @ordered).to_s)
+    end
+
+    test("description") do
+      assert_equal("dictionary<values=string, indices=int8, ordered=1>",
+                   Arrow::DictionaryDataType.new(index_data_type: @index_data_type,
+                                                 dictionary: @dictionary,
+                                                 ordered: @ordered).to_s)
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-field.rb b/ruby/red-arrow/test/test-field.rb
new file mode 100644
index 0000000000000..9be2068ea544b
--- /dev/null
+++ b/ruby/red-arrow/test/test-field.rb
@@ -0,0 +1,71 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class FieldTest < Test::Unit::TestCase
+  sub_test_case(".new") do
+    test("String, Arrow::DataType") do
+      assert_equal("visible: bool",
+                   Arrow::Field.new("visible", Arrow::BooleanDataType.new).to_s)
+    end
+
+    test("Symbol, Arrow::DataType") do
+      assert_equal("visible: bool",
+                   Arrow::Field.new(:visible, Arrow::BooleanDataType.new).to_s)
+    end
+
+    test("String, Symbol") do
+      assert_equal("visible: bool",
+                   Arrow::Field.new(:visible, :boolean).to_s)
+    end
+
+    test("String, Hash") do
+      assert_equal("visible: bool",
+                   Arrow::Field.new(:visible, type: :boolean).to_s)
+    end
+
+    test("description: String") do
+      assert_equal("visible: bool",
+                   Arrow::Field.new(name: "visible",
+                                    data_type: :boolean).to_s)
+    end
+
+    test("description: Symbol") do
+      assert_equal("visible: bool",
+                   Arrow::Field.new(name: :visible,
+                                    data_type: :boolean).to_s)
+    end
+
+    test("description: shortcut") do
+      assert_equal("visible: bool",
+                   Arrow::Field.new(name: :visible,
+                                    type: :boolean).to_s)
+    end
+
+    test("Hash: shortcut: additional") do
+      description = {
+        name: :tags,
+        type: :list,
+        field: {
+          name: "tag",
+          type: :string,
+        },
+      }
+      assert_equal("tags: list<tag: string>",
+                   Arrow::Field.new(description).to_s)
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-file-output-stream.rb b/ruby/red-arrow/test/test-file-output-stream.rb
new file mode 100644
index 0000000000000..559406a4e1efe
--- /dev/null
+++ b/ruby/red-arrow/test/test-file-output-stream.rb
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFileOutputStream < Test::Unit::TestCase
+  sub_test_case(".open") do
+    def setup
+      @file = Tempfile.open("arrow-file-output-stream")
+      @file.write("Hello")
+      @file.close
+    end
+
+    def test_default
+      Arrow::FileOutputStream.open(@file.path) do |file|
+        file.write(" World")
+      end
+      assert_equal(" World", File.read(@file.path))
+    end
+
+    def test_options_append
+      Arrow::FileOutputStream.open(@file.path, append: true) do |file|
+        file.write(" World")
+      end
+      assert_equal("Hello World", File.read(@file.path))
+    end
+
+    def test_append_true
+      Arrow::FileOutputStream.open(@file.path, true) do |file|
+        file.write(" World")
+      end
+      assert_equal("Hello World", File.read(@file.path))
+    end
+
+    def test_append_false
+      Arrow::FileOutputStream.open(@file.path, false) do |file|
+        file.write(" World")
+      end
+      assert_equal(" World", File.read(@file.path))
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-list-array-builder.rb b/ruby/red-arrow/test/test-list-array-builder.rb
new file mode 100644
index 0000000000000..aee31e73b1b96
--- /dev/null
+++ b/ruby/red-arrow/test/test-list-array-builder.rb
@@ -0,0 +1,79 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class ListArrayBuilderTest < Test::Unit::TestCase
+  def setup
+    @data_type = Arrow::ListDataType.new(name: "visible", type: :boolean)
+    @builder = Arrow::ListArrayBuilder.new(@data_type)
+  end
+
+  sub_test_case("#append_value") do
+    test("nil") do
+      @builder.append_value(nil)
+      array = @builder.finish
+      assert_equal(nil, array[0])
+    end
+
+    test("Array") do
+      @builder.append_value([true, false, true])
+      array = @builder.finish
+      assert_equal([true, false, true], array[0].to_a)
+    end
+  end
+
+  sub_test_case("#append_values") do
+    test("[nil, Array]") do
+      @builder.append_values([[false], nil, [true, false, true]])
+      array = @builder.finish
+      assert_equal([
+                     [false],
+                     nil,
+                     [true, false, true],
+                   ],
+                   array.collect {|list| list ? list.to_a : nil})
+    end
+
+    test("is_valids") do
+      @builder.append_values([[false], [true, true], [true, false, true]],
+                             [true, false, true])
+      array = @builder.finish
+      assert_equal([
+                     [false],
+                     nil,
+                     [true, false, true],
+                   ],
+                   array.collect {|list| list ? list.to_a : nil})
+    end
+  end
+
+  sub_test_case("#append") do
+    test("backward compatibility") do
+      @builder.append
+      @builder.value_builder.append(true)
+      @builder.value_builder.append(false)
+      @builder.append
+      @builder.value_builder.append(true)
+      array = @builder.finish
+
+      assert_equal([
+                     [true, false],
+                     [true],
+                   ],
+                   array.collect(&:to_a))
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-list-array.rb b/ruby/red-arrow/test/test-list-array.rb
new file mode 100644
index 0000000000000..c1f762492e4ef
--- /dev/null
+++ b/ruby/red-arrow/test/test-list-array.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class ListArrayTest < Test::Unit::TestCase
+  sub_test_case(".new") do
+    test("build") do
+      data_type = Arrow::ListDataType.new(name: "visible", type: :boolean)
+      values = [
+        [true, false],
+        nil,
+        [false, true, false],
+      ]
+      array = Arrow::ListArray.new(data_type, values)
+      assert_equal(values,
+                   array.collect {|value| value ? value.to_a : nil})
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-list-data-type.rb b/ruby/red-arrow/test/test-list-data-type.rb
new file mode 100644
index 0000000000000..cca6ca3914b2b
--- /dev/null
+++ b/ruby/red-arrow/test/test-list-data-type.rb
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class ListDataTypeTest < Test::Unit::TestCase
+  sub_test_case(".new") do
+    test("Arrow::Field") do
+      field = Arrow::Field.new(:tag, :string)
+      assert_equal("list<tag: string>",
+                   Arrow::ListDataType.new(field).to_s)
+    end
+
+    test("Hash") do
+      assert_equal("list<tag: string>",
+                   Arrow::ListDataType.new(name: "tag", type: :string).to_s)
+    end
+
+    test("field: Arrow::Field") do
+      field = Arrow::Field.new(:tag, :string)
+      assert_equal("list<tag: string>",
+                   Arrow::ListDataType.new(field: field).to_s)
+    end
+
+    test("field: Hash") do
+      field_description = {name: "tag", type: :string}
+      assert_equal("list<tag: string>",
+                   Arrow::ListDataType.new(field: field_description).to_s)
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-record-batch-builder.rb b/ruby/red-arrow/test/test-record-batch-builder.rb
new file mode 100644
index 0000000000000..988e0204345a4
--- /dev/null
+++ b/ruby/red-arrow/test/test-record-batch-builder.rb
@@ -0,0 +1,125 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class RecordBatchBuilderTest < Test::Unit::TestCase
+  sub_test_case(".new") do
+    test("Schema") do
+      schema = Arrow::Schema.new(visible: :boolean,
+                                 count: :uint32)
+      builder = Arrow::RecordBatchBuilder.new(schema)
+      assert_equal(schema,
+                   builder.schema)
+    end
+
+    test("Hash") do
+      builder = Arrow::RecordBatchBuilder.new(visible: :boolean,
+                                              count: :uint32)
+      assert_equal(Arrow::Schema.new(visible: :boolean,
+                                     count: :uint32),
+                   builder.schema)
+    end
+  end
+
+  sub_test_case("instance methods") do
+    def setup
+      @schema = Arrow::Schema.new(visible: :boolean,
+                                  count: :uint32)
+      @builder = Arrow::RecordBatchBuilder.new(@schema)
+    end
+
+    sub_test_case("#[]") do
+      test("String") do
+        assert_equal(Arrow::BooleanDataType.new,
+                     @builder["visible"].value_data_type)
+      end
+
+      test("Symbol") do
+        assert_equal(Arrow::BooleanDataType.new,
+                     @builder[:visible].value_data_type)
+      end
+
+      test("Integer") do
+        assert_equal(Arrow::UInt32DataType.new,
+                     @builder[1].value_data_type)
+      end
+    end
+
+    test("#append") do
+      records = [
+        {visible: true, count: 1},
+      ]
+      columns = {
+        visible: [false],
+        count: [2],
+      }
+      arrays = [
+        Arrow::BooleanArray.new([true, false]),
+        Arrow::UInt32Array.new([1, 2]),
+      ]
+      @builder.append(records, columns)
+      assert_equal(Arrow::RecordBatch.new(@schema,
+                                          arrays[0].length,
+                                          arrays),
+                   @builder.flush)
+    end
+
+    test("#append_records") do
+      records = [
+        {visible: true, count: 1},
+        {visible: true, count: 2, garbage: "garbage"},
+        {visible: true},
+        [false, 4],
+        nil,
+        [true],
+      ]
+      arrays = [
+        Arrow::BooleanArray.new([true, true, true, false, nil, true]),
+        Arrow::UInt32Array.new([1, 2, nil, 4, nil, nil]),
+      ]
+      @builder.append_records(records)
+      assert_equal(Arrow::RecordBatch.new(@schema,
+                                          arrays[0].length,
+                                          arrays),
+                   @builder.flush)
+    end
+
+    test("#append_columns") do
+      columns = {
+        visible: [true, true, true, false, nil, true],
+        count: [1, 2, nil, 4, nil, nil],
+      }
+      arrays = [
+        Arrow::BooleanArray.new(columns[:visible]),
+        Arrow::UInt32Array.new(columns[:count]),
+      ]
+      @builder.append_columns(columns)
+      assert_equal(Arrow::RecordBatch.new(@schema,
+                                          arrays[0].length,
+                                          arrays),
+                   @builder.flush)
+    end
+
+    test("#column_builders") do
+      column_builders = [
+        @builder.get_column_builder(0),
+        @builder.get_column_builder(1),
+      ]
+      assert_equal(column_builders,
+                   @builder.column_builders)
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-record-batch.rb b/ruby/red-arrow/test/test-record-batch.rb
index 994b16de99813..d33298b4e5f7f 100644
--- a/ruby/red-arrow/test/test-record-batch.rb
+++ b/ruby/red-arrow/test/test-record-batch.rb
@@ -16,42 +16,97 @@
 # under the License.
 
 class RecordBatchTest < Test::Unit::TestCase
-  sub_test_case(".each") do
-    setup do
-      fields = [
-        Arrow::Field.new("count", :uint32),
+  sub_test_case(".new") do
+    def setup
+      @schema = Arrow::Schema.new(visible: :boolean,
+                                  count: :uint32)
+    end
+
+    test("[Schema, records]") do
+      records = [
+        {visible: true, count: 1},
+        nil,
+        [false, 3],
       ]
-      @schema = Arrow::Schema.new(fields)
-      @counts = Arrow::UInt32Array.new([1, 2, 4, 8])
-      @record_batch = Arrow::RecordBatch.new(@schema, @counts.length, [@counts])
+      record_batch = Arrow::RecordBatch.new(@schema, records)
+      assert_equal([
+                     {"visible" => true,  "count" => 1},
+                     {"visible" => nil,   "count" => nil},
+                     {"visible" => false, "count" => 3},
+                   ],
+                   record_batch.each_record.collect(&:to_h))
     end
 
-    test("default") do
-      records = []
-      @record_batch.each do |record|
-        records << [record, record.index]
-      end
+    test("[Schema, columns]") do
+      columns = {
+        visible: [true, nil, false],
+        count: [1, 2, nil],
+      }
+      record_batch = Arrow::RecordBatch.new(@schema, columns)
       assert_equal([
-                     [0, 0],
-                     [1, 1],
-                     [2, 2],
-                     [3, 3],
+                     {"visible" => true,  "count" => 1},
+                     {"visible" => nil,   "count" => 2},
+                     {"visible" => false, "count" => nil},
                    ],
-                   records.collect {|record, i| [record.index, i]})
+                   record_batch.each_record.collect(&:to_h))
     end
 
-    test("reuse_record: true") do
-      records = []
-      @record_batch.each(reuse_record: true) do |record|
-        records << [record, record.index]
-      end
+    test("[Schema, n_rows, columns]") do
+      columns = [
+        Arrow::BooleanArray.new([true, nil, false]),
+        Arrow::UInt32Array.new([1, 2, nil]),
+      ]
+      n_rows = columns[0].length
+      record_batch = Arrow::RecordBatch.new(@schema, n_rows, columns)
       assert_equal([
-                     [3, 0],
-                     [3, 1],
-                     [3, 2],
-                     [3, 3],
+                     {"visible" => true,  "count" => 1},
+                     {"visible" => nil,   "count" => 2},
+                     {"visible" => false, "count" => nil},
                    ],
-                   records.collect {|record, i| [record.index, i]})
+                   record_batch.each_record.collect(&:to_h))
+    end
+  end
+
+  sub_test_case("instance methods") do
+    def setup
+      @schema = Arrow::Schema.new(count: :uint32)
+      @counts = Arrow::UInt32Array.new([1, 2, 4, 8])
+      @record_batch = Arrow::RecordBatch.new(@schema, @counts.length, [@counts])
+    end
+
+    sub_test_case("#each") do
+      test("default") do
+        records = []
+        @record_batch.each do |record|
+          records << [record, record.index]
+        end
+        assert_equal([
+                       [0, 0],
+                       [1, 1],
+                       [2, 2],
+                       [3, 3],
+                     ],
+                     records.collect {|record, i| [record.index, i]})
+      end
+
+      test("reuse_record: true") do
+        records = []
+        @record_batch.each(reuse_record: true) do |record|
+          records << [record, record.index]
+        end
+        assert_equal([
+                       [3, 0],
+                       [3, 1],
+                       [3, 2],
+                       [3, 3],
+                     ],
+                     records.collect {|record, i| [record.index, i]})
+      end
+    end
+
+    test("#to_table") do
+      assert_equal(Arrow::Table.new(@schema, [@counts]),
+                   @record_batch.to_table)
     end
   end
 end
diff --git a/ruby/red-arrow/test/test-schema.rb b/ruby/red-arrow/test/test-schema.rb
index 2f989cf19f2ec..6cfbbb117d94d 100644
--- a/ruby/red-arrow/test/test-schema.rb
+++ b/ruby/red-arrow/test/test-schema.rb
@@ -19,31 +19,85 @@ class SchemaTest < Test::Unit::TestCase
   def setup
     @count_field = Arrow::Field.new("count", :uint32)
     @visible_field = Arrow::Field.new("visible", :boolean)
-    @schema = Arrow::Schema.new([@count_field, @visible_field])
   end
 
-  sub_test_case("#[]") do
-    test("[String]") do
-      assert_equal([@count_field, @visible_field],
-                   [@schema["count"], @schema["visible"]])
+  sub_test_case(".new") do
+    test("[Arrow::Field]") do
+      fields = [
+        @count_field,
+        @visible_field,
+      ]
+      assert_equal("count: uint32\n" +
+                   "visible: bool",
+                   Arrow::Schema.new(fields).to_s)
     end
 
-    test("[Symbol]") do
-      assert_equal([@count_field, @visible_field],
-                   [@schema[:count], @schema[:visible]])
+    test("[Arrow::Field, Hash]") do
+      fields = [
+        @count_field,
+        {name: "visible", type: :boolean},
+      ]
+      assert_equal("count: uint32\n" +
+                   "visible: bool",
+                   Arrow::Schema.new(fields).to_s)
     end
 
-    test("[Integer]") do
-      assert_equal([@count_field, @visible_field],
-                   [@schema[0], @schema[1]])
+    test("{String, Symbol => Arrow::DataType}") do
+      fields = {
+        "count" => Arrow::UInt32DataType.new,
+        :visible => :boolean,
+      }
+      assert_equal("count: uint32\n" +
+                   "visible: bool",
+                   Arrow::Schema.new(fields).to_s)
     end
 
-    test("[invalid]") do
-      invalid = []
-      message = "field name or index must be String, Symbol or Integer"
-      message << ": <#{invalid.inspect}>"
-      assert_raise(ArgumentError.new(message)) do
-        @schema[invalid]
+    test("{String, Symbol => Hash}") do
+      fields = {
+        "count" => {type: :uint32},
+        :tags => {
+          type: :list,
+          field: {
+            name: "tag",
+            type: :string,
+          },
+        },
+      }
+      assert_equal("count: uint32\n" +
+                   "tags: list<tag: string>",
+                   Arrow::Schema.new(fields).to_s)
+    end
+  end
+
+  sub_test_case("instance methods") do
+    def setup
+      super
+      @schema = Arrow::Schema.new([@count_field, @visible_field])
+    end
+
+    sub_test_case("#[]") do
+      test("[String]") do
+        assert_equal([@count_field, @visible_field],
+                     [@schema["count"], @schema["visible"]])
+      end
+
+      test("[Symbol]") do
+        assert_equal([@count_field, @visible_field],
+                     [@schema[:count], @schema[:visible]])
+      end
+
+      test("[Integer]") do
+        assert_equal([@count_field, @visible_field],
+                     [@schema[0], @schema[1]])
+      end
+
+      test("[invalid]") do
+        invalid = []
+        message = "field name or index must be String, Symbol or Integer"
+        message << ": <#{invalid.inspect}>"
+        assert_raise(ArgumentError.new(message)) do
+          @schema[invalid]
+        end
       end
     end
   end
diff --git a/ruby/red-arrow/test/test-sparse-union-data-type.rb b/ruby/red-arrow/test/test-sparse-union-data-type.rb
new file mode 100644
index 0000000000000..4159b42268da9
--- /dev/null
+++ b/ruby/red-arrow/test/test-sparse-union-data-type.rb
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class SparseUnionDataTypeTest < Test::Unit::TestCase
+  sub_test_case(".new") do
+    def setup
+      @fields = [
+        Arrow::Field.new("visible", :boolean),
+        {
+          name: "count",
+          type: :int32,
+        },
+      ]
+    end
+
+    test("ordered arguments") do
+      assert_equal("union[sparse]<visible: bool=2, count: int32=9>",
+                   Arrow::SparseUnionDataType.new(@fields, [2, 9]).to_s)
+    end
+
+    test("description") do
+      assert_equal("union[sparse]<visible: bool=2, count: int32=9>",
+                   Arrow::SparseUnionDataType.new(fields: @fields,
+                                                  type_codes: [2, 9]).to_s)
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-struct-array-builder.rb b/ruby/red-arrow/test/test-struct-array-builder.rb
new file mode 100644
index 0000000000000..f7706ee8d190b
--- /dev/null
+++ b/ruby/red-arrow/test/test-struct-array-builder.rb
@@ -0,0 +1,180 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class StructArrayBuilderTest < Test::Unit::TestCase
+  def setup
+    @data_type = Arrow::StructDataType.new(visible: {type: :boolean},
+                                           count: {type: :uint64})
+    @builder = Arrow::StructArrayBuilder.new(@data_type)
+  end
+
+  sub_test_case("#append_value") do
+    test("nil") do
+      @builder.append_value(nil)
+      array = @builder.finish
+      assert_equal([
+                     [nil],
+                     [nil],
+                   ],
+                   [
+                     array.find_field(0).to_a,
+                     array.find_field(1).to_a,
+                   ])
+    end
+
+    test("Array") do
+      @builder.append_value([true, 1])
+      array = @builder.finish
+      assert_equal([
+                     [true],
+                     [1],
+                   ],
+                   [
+                     array.find_field(0).to_a,
+                     array.find_field(1).to_a,
+                   ])
+    end
+
+    test("Arrow::Struct") do
+      source_array = Arrow::StructArray.new(@data_type, [[true, 1]])
+      struct = source_array.get_value(0)
+      @builder.append_value(struct)
+      array = @builder.finish
+      assert_equal([
+                     [true],
+                     [1],
+                   ],
+                   [
+                     array.find_field(0).to_a,
+                     array.find_field(1).to_a,
+                   ])
+    end
+
+    test("Hash") do
+      @builder.append_value(count: 1, visible: true)
+      array = @builder.finish
+      assert_equal([
+                     [true],
+                     [1],
+                   ],
+                   [
+                     array.find_field(0).to_a,
+                     array.find_field(1).to_a,
+                   ])
+    end
+  end
+
+  sub_test_case("#append_values") do
+    test("[nil]") do
+      @builder.append_values([nil])
+      array = @builder.finish
+      assert_equal([
+                     [nil],
+                     [nil],
+                   ],
+                   [
+                     array.find_field(0).to_a,
+                     array.find_field(1).to_a,
+                   ])
+    end
+
+    test("[Array]") do
+      @builder.append_values([[true, 1]])
+      array = @builder.finish
+      assert_equal([
+                     [true],
+                     [1],
+                   ],
+                   [
+                     array.find_field(0).to_a,
+                     array.find_field(1).to_a,
+                   ])
+    end
+
+    test("[Hash]") do
+      @builder.append_values([{count: 1, visible: true}])
+      array = @builder.finish
+      assert_equal([
+                     [true],
+                     [1],
+                   ],
+                   [
+                     array.find_field(0).to_a,
+                     array.find_field(1).to_a,
+                   ])
+    end
+
+    test("[nil, Array, Hash]") do
+      @builder.append_values([
+                               nil,
+                               [true, 1],
+                               {count: 2, visible: false},
+                             ])
+      array = @builder.finish
+      assert_equal([
+                     [nil, true, false],
+                     [nil, 1, 2],
+                   ],
+                   [
+                     array.find_field(0).to_a,
+                     array.find_field(1).to_a,
+                   ])
+    end
+
+    test("is_valids") do
+      @builder.append_values([
+                               [true, 1],
+                               [false, 2],
+                               [true, 3],
+                             ],
+                             [
+                               true,
+                               false,
+                               true,
+                             ])
+      array = @builder.finish
+      assert_equal([
+                     [true, nil, true],
+                     [1, nil, 3],
+                   ],
+                   [
+                     array.find_field(0).to_a,
+                     array.find_field(1).to_a,
+                   ])
+    end
+  end
+
+  sub_test_case("#append") do
+    test("backward compatibility") do
+      @builder.append
+      @builder.get_field_builder(0).append(true)
+      @builder.get_field_builder(1).append(1)
+      @builder.append
+      @builder.get_field_builder(0).append(false)
+      @builder.get_field_builder(1).append(2)
+      array = @builder.finish
+      assert_equal([
+                     [true, 1],
+                     [false, 2],
+                   ],
+                   [
+                     array.get_value(0).values,
+                     array.get_value(1).values,
+                   ])
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-struct-array.rb b/ruby/red-arrow/test/test-struct-array.rb
index 1957db4d1fd5a..5a00434713a33 100644
--- a/ruby/red-arrow/test/test-struct-array.rb
+++ b/ruby/red-arrow/test/test-struct-array.rb
@@ -16,21 +16,66 @@
 # under the License.
 
 class StructArrayTest < Test::Unit::TestCase
-  test("#[]") do
-    type = Arrow::StructDataType.new([
-      Arrow::Field.new("field1", :boolean),
-      Arrow::Field.new("field2", :uint64),
-    ])
-    builder = Arrow::StructArrayBuilder.new(type)
-    builder.append
-    builder.get_field_builder(0).append(true)
-    builder.get_field_builder(1).append(1)
-    builder.append
-    builder.get_field_builder(0).append(false)
-    builder.get_field_builder(1).append(2)
-    array = builder.finish
+  sub_test_case(".new") do
+    test("build") do
+      data_type = Arrow::StructDataType.new(visible: :boolean,
+                                            count: :uint64)
+      values = [
+        [true, 1],
+        nil,
+        [false, 2],
+      ]
+      array = Arrow::StructArray.new(data_type, values)
+      assert_equal([
+                     [true, nil, false],
+                     [1, nil, 2],
+                   ],
+                   [
+                     array.find_field(0).to_a,
+                     array.find_field(1).to_a,
+                   ])
+    end
+  end
+
+  sub_test_case("instance methods") do
+    def setup
+      @data_type = Arrow::StructDataType.new(visible: {type: :boolean},
+                                             count: {type: :uint64})
+      @values = [
+        [true, 1],
+        [false, 2],
+      ]
+      @array = Arrow::StructArray.new(@data_type, @values)
+    end
+
+    test("#[]") do
+      notify("TODO: Returns Arrow::Struct instead.")
+      assert_equal([[true, false], [1, 2]],
+                   [@array[0].to_a, @array[1].to_a])
+    end
+
+    sub_test_case("#find_field") do
+      test("Integer") do
+        assert_equal([
+                       [true, false],
+                       [1, 2],
+                     ],
+                     [
+                       @array.find_field(0).to_a,
+                       @array.find_field(1).to_a,
+                     ])
+      end
 
-    assert_equal([[true, false], [1, 2]],
-                 [array[0].to_a, array[1].to_a])
+      test("String, Symbol") do
+        assert_equal([
+                       [true, false],
+                       [1, 2],
+                     ],
+                     [
+                       @array.find_field("visible").to_a,
+                       @array.find_field(:count).to_a,
+                     ])
+      end
+    end
   end
 end
diff --git a/ruby/red-arrow/test/test-struct-data-type.rb b/ruby/red-arrow/test/test-struct-data-type.rb
index c802c44731072..d106e38b1d841 100644
--- a/ruby/red-arrow/test/test-struct-data-type.rb
+++ b/ruby/red-arrow/test/test-struct-data-type.rb
@@ -19,31 +19,93 @@ class StructDataTypeTest < Test::Unit::TestCase
   def setup
     @count_field = Arrow::Field.new("count", :uint32)
     @visible_field = Arrow::Field.new("visible", :boolean)
-    @data_type = Arrow::StructDataType.new([@count_field, @visible_field])
   end
 
-  sub_test_case("#[]") do
-    test("[String]") do
-      assert_equal([@count_field, @visible_field],
-                   [@data_type["count"], @data_type["visible"]])
+  sub_test_case(".new") do
+    test("[Arrow::Field]") do
+      fields = [
+        @count_field,
+        @visible_field,
+      ]
+      assert_equal("struct<count: uint32, visible: bool>",
+                   Arrow::StructDataType.new(fields).to_s)
     end
 
-    test("[Symbol]") do
-      assert_equal([@count_field, @visible_field],
-                   [@data_type[:count], @data_type[:visible]])
+    test("[Hash]") do
+      fields = [
+        {name: "count", data_type: :uint32},
+        {name: "visible", data_type: :boolean},
+      ]
+      assert_equal("struct<count: uint32, visible: bool>",
+                   Arrow::StructDataType.new(fields).to_s)
     end
 
-    test("[Integer]") do
-      assert_equal([@count_field, @visible_field],
-                   [@data_type[0], @data_type[1]])
+    test("[Arrow::Field, Hash]") do
+      fields = [
+        @count_field,
+        {name: "visible", data_type: :boolean},
+      ]
+      assert_equal("struct<count: uint32, visible: bool>",
+                   Arrow::StructDataType.new(fields).to_s)
     end
 
-    test("[invalid]") do
-      invalid = []
-      message = "field name or index must be String, Symbol or Integer"
-      message << ": <#{invalid.inspect}>"
-      assert_raise(ArgumentError.new(message)) do
-        @data_type[invalid]
+    test("{Arrow::DataType}") do
+      fields = {
+        "count" => Arrow::UInt32DataType.new,
+        "visible" => Arrow::BooleanDataType.new,
+      }
+      assert_equal("struct<count: uint32, visible: bool>",
+                   Arrow::StructDataType.new(fields).to_s)
+    end
+
+    test("{Hash}") do
+      fields = {
+        "count" => {type: :uint32},
+        "visible" => {type: :boolean},
+      }
+      assert_equal("struct<count: uint32, visible: bool>",
+                   Arrow::StructDataType.new(fields).to_s)
+    end
+
+    test("{String, Symbol}") do
+      fields = {
+        "count" => "uint32",
+        "visible" => :boolean,
+      }
+      assert_equal("struct<count: uint32, visible: bool>",
+                   Arrow::StructDataType.new(fields).to_s)
+    end
+  end
+
+  sub_test_case("instance methods") do
+    def setup
+      super
+      @data_type = Arrow::StructDataType.new([@count_field, @visible_field])
+    end
+
+    sub_test_case("#[]") do
+      test("[String]") do
+        assert_equal([@count_field, @visible_field],
+                     [@data_type["count"], @data_type["visible"]])
+      end
+
+      test("[Symbol]") do
+        assert_equal([@count_field, @visible_field],
+                     [@data_type[:count], @data_type[:visible]])
+      end
+
+      test("[Integer]") do
+        assert_equal([@count_field, @visible_field],
+                     [@data_type[0], @data_type[1]])
+      end
+
+      test("[invalid]") do
+        invalid = []
+        message = "field name or index must be String, Symbol or Integer"
+        message << ": <#{invalid.inspect}>"
+        assert_raise(ArgumentError.new(message)) do
+          @data_type[invalid]
+        end
       end
     end
   end
diff --git a/ruby/red-arrow/test/test-struct.rb b/ruby/red-arrow/test/test-struct.rb
new file mode 100644
index 0000000000000..412549c7dfb34
--- /dev/null
+++ b/ruby/red-arrow/test/test-struct.rb
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class StructTest < Test::Unit::TestCase
+  def setup
+    @data_type = Arrow::StructDataType.new(visible: {type: :boolean},
+                                           count: {type: :uint64})
+    @values = [
+      [true, 1],
+      [false, 2],
+    ]
+    @array = Arrow::StructArray.new(@data_type, @values)
+    @struct = @array.get_value(0)
+  end
+
+  sub_test_case("#[]") do
+    test("Integer") do
+      assert_equal(true, @struct[0])
+    end
+
+    test("String") do
+      assert_equal(true, @struct["visible"])
+    end
+
+    test("Symbol") do
+      assert_equal(true, @struct[:visible])
+    end
+  end
+
+  test("#fields") do
+    assert_equal(@data_type.fields,
+                 @struct.fields)
+  end
+
+  test("#values") do
+    assert_equal([true, 1],
+                 @struct.values)
+  end
+
+  test("#to_a") do
+    assert_equal([true, 1],
+                 @struct.to_a)
+  end
+
+  test("#to_h") do
+    assert_equal({
+                   "visible" => true,
+                   "count" => 1,
+                 },
+                 @struct.to_h)
+  end
+
+  test("#respond_to_missing?") do
+    assert_equal([
+                   true,
+                   false,
+                 ],
+                 [
+                   @struct.respond_to?(:visible),
+                   @struct.respond_to?(:nonexistent),
+                 ])
+  end
+
+  test("#method_missing?") do
+    assert_equal(1, @struct.count)
+  end
+end
diff --git a/ruby/red-arrow/test/test-table.rb b/ruby/red-arrow/test/test-table.rb
index 1576f779ce3b6..2876f762f00bd 100644
--- a/ruby/red-arrow/test/test-table.rb
+++ b/ruby/red-arrow/test/test-table.rb
@@ -395,83 +395,128 @@ def setup
   end
 
   sub_test_case("#save and .load") do
-    sub_test_case(":format") do
-      test("default") do
-        file = Tempfile.new(["red-arrow", ".arrow"])
-        @table.save(file.path)
-        assert_equal(@table, Arrow::Table.load(file.path))
+    module SaveLoadFormatTests
+      def test_default
+        output = create_output(".arrow")
+        @table.save(output)
+        assert_equal(@table, Arrow::Table.load(output))
       end
 
-      test(":batch") do
-        file = Tempfile.new(["red-arrow", ".arrow"])
-        @table.save(file.path, :format => :batch)
-        assert_equal(@table, Arrow::Table.load(file.path, :format => :batch))
+      def test_batch
+        output = create_output(".arrow")
+        @table.save(output, format: :batch)
+        assert_equal(@table, Arrow::Table.load(output, format: :batch))
       end
 
-      test(":stream") do
-        file = Tempfile.new(["red-arrow", ".arrow"])
-        @table.save(file.path, :format => :stream)
-        assert_equal(@table, Arrow::Table.load(file.path, :format => :stream))
+      def test_stream
+        output = create_output(".arrow")
+        @table.save(output, format: :stream)
+        assert_equal(@table, Arrow::Table.load(output, format: :stream))
       end
 
-      test(":csv") do
-        file = Tempfile.new(["red-arrow", ".csv"])
-        @table.save(file.path, :format => :csv)
+      def test_csv
+        output = create_output(".csv")
+        @table.save(output, format: :csv)
         assert_equal(@table,
-                     Arrow::Table.load(file.path,
-                                       :format => :csv,
-                                       :schema => @table.schema))
+                     Arrow::Table.load(output,
+                                       format: :csv,
+                                       schema: @table.schema))
       end
 
-      test("csv.gz") do
-        file = Tempfile.new(["red-arrow", ".csv.gz"])
-        @table.save(file.path)
+      def test_csv_gz
+        output = create_output(".csv.gz")
+        @table.save(output,
+                    format: :csv,
+                    compression: :gzip)
         assert_equal(@table,
-                     Arrow::Table.load(file.path,
-                                       :format => :csv,
-                                       :compression => :gzip,
-                                       :schema => @table.schema))
+                     Arrow::Table.load(output,
+                                       format: :csv,
+                                       compression: :gzip,
+                                       schema: @table.schema))
       end
+    end
+
+    sub_test_case("path") do
+      sub_test_case(":format") do
+        include SaveLoadFormatTests
 
-      sub_test_case("load: auto detect") do
-        test("batch") do
-          file = Tempfile.new(["red-arrow", ".arrow"])
-          @table.save(file.path, :format => :batch)
-          assert_equal(@table, Arrow::Table.load(file.path))
+        def create_output(extension)
+          @file = Tempfile.new(["red-arrow", extension])
+          @file.path
         end
 
-        test("stream") do
-          file = Tempfile.new(["red-arrow", ".arrow"])
-          @table.save(file.path, :format => :stream)
-          assert_equal(@table, Arrow::Table.load(file.path))
+        sub_test_case("save: auto detect") do
+          test("csv") do
+            output = create_output(".csv")
+            @table.save(output)
+            assert_equal(@table,
+                         Arrow::Table.load(output,
+                                           format: :csv,
+                                           schema: @table.schema))
+          end
+
+          test("csv.gz") do
+            output = create_output(".csv.gz")
+            @table.save(output)
+            assert_equal(@table,
+                         Arrow::Table.load(output,
+                                           format: :csv,
+                                           compression: :gzip,
+                                           schema: @table.schema))
+          end
         end
 
-        test("csv") do
-          path = fixture_path("with-header.csv")
-          assert_equal(<<-TABLE, Arrow::Table.load(path, skip_lines: /^#/).to_s)
+        sub_test_case("load: auto detect") do
+          test("batch") do
+            output = create_output(".arrow")
+            @table.save(output, format: :batch)
+            assert_equal(@table, Arrow::Table.load(output))
+          end
+
+          test("stream") do
+            output = create_output(".arrow")
+            @table.save(output, format: :stream)
+            assert_equal(@table, Arrow::Table.load(output))
+          end
+
+          test("csv") do
+            path = fixture_path("with-header.csv")
+            table = Arrow::Table.load(path, skip_lines: /^\#/)
+            assert_equal(<<-TABLE, table.to_s)
 	name	score
 0	alice	   10
 1	bob 	   29
 2	chris	   -1
-          TABLE
-        end
+            TABLE
+          end
 
-        test("csv.gz") do
-          file = Tempfile.new(["red-arrow", ".csv.gz"])
-          Zlib::GzipWriter.wrap(file) do |gz|
-            gz.write(<<-CSV)
+          test("csv.gz") do
+            file = Tempfile.new(["red-arrow", ".csv.gz"])
+            Zlib::GzipWriter.wrap(file) do |gz|
+              gz.write(<<-CSV)
 name,score
 alice,10
 bob,29
 chris,-1
-            CSV
-          end
-          assert_equal(<<-TABLE, Arrow::Table.load(file.path).to_s)
+              CSV
+            end
+            assert_equal(<<-TABLE, Arrow::Table.load(file.path).to_s)
 	name	score
 0	alice	   10
 1	bob 	   29
 2	chris	   -1
           TABLE
+          end
+        end
+      end
+    end
+
+    sub_test_case("Buffer") do
+      sub_test_case(":format") do
+        include SaveLoadFormatTests
+
+        def create_output(extension)
+          Arrow::ResizableBuffer.new(1024)
         end
       end
     end
diff --git a/ruby/red-arrow/test/test-time32-data-type.rb b/ruby/red-arrow/test/test-time32-data-type.rb
new file mode 100644
index 0000000000000..26f17359a1223
--- /dev/null
+++ b/ruby/red-arrow/test/test-time32-data-type.rb
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class Time32DataTypeTest < Test::Unit::TestCase
+  sub_test_case(".new") do
+    test("Arrow::TimeUnit") do
+      assert_equal("time32[ms]",
+                   Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI).to_s)
+    end
+
+    test("Symbol") do
+      assert_equal("time32[ms]",
+                   Arrow::Time32DataType.new(:milli).to_s)
+    end
+
+    test("unit: Arrow::TimeUnit") do
+      data_type = Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI)
+      assert_equal("time32[ms]",
+                   data_type.to_s)
+    end
+
+    test("unit: Symbol") do
+      data_type = Arrow::Time32DataType.new(unit: :milli)
+      assert_equal("time32[ms]",
+                   data_type.to_s)
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-time64-data-type.rb b/ruby/red-arrow/test/test-time64-data-type.rb
new file mode 100644
index 0000000000000..a5f34175398ca
--- /dev/null
+++ b/ruby/red-arrow/test/test-time64-data-type.rb
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class Time64DataTypeTest < Test::Unit::TestCase
+  sub_test_case(".new") do
+    test("Arrow::TimeUnit") do
+      assert_equal("time64[ns]",
+                   Arrow::Time64DataType.new(Arrow::TimeUnit::NANO).to_s)
+    end
+
+    test("Symbol") do
+      assert_equal("time64[ns]",
+                   Arrow::Time64DataType.new(:nano).to_s)
+    end
+
+    test("unit: Arrow::TimeUnit") do
+      data_type = Arrow::Time64DataType.new(unit: Arrow::TimeUnit::NANO)
+      assert_equal("time64[ns]",
+                   data_type.to_s)
+    end
+
+    test("unit: Symbol") do
+      data_type = Arrow::Time64DataType.new(unit: :nano)
+      assert_equal("time64[ns]",
+                   data_type.to_s)
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-timestamp-data-type.rb b/ruby/red-arrow/test/test-timestamp-data-type.rb
new file mode 100644
index 0000000000000..f8ccd3d8bb8b4
--- /dev/null
+++ b/ruby/red-arrow/test/test-timestamp-data-type.rb
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TimestampDataTypeTest < Test::Unit::TestCase
+  sub_test_case(".new") do
+    test("Arrow::TimeUnit") do
+      assert_equal("timestamp[ms]",
+                   Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI).to_s)
+    end
+
+    test("Symbol") do
+      assert_equal("timestamp[ms]",
+                   Arrow::TimestampDataType.new(:milli).to_s)
+    end
+
+    test("unit: Arrow::TimeUnit") do
+      data_type = Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI)
+      assert_equal("timestamp[ms]",
+                   data_type.to_s)
+    end
+
+    test("unit: Symbol") do
+      data_type = Arrow::TimestampDataType.new(unit: :milli)
+      assert_equal("timestamp[ms]",
+                   data_type.to_s)
+    end
+  end
+end
diff --git a/ruby/red-arrow/version.rb b/ruby/red-arrow/version.rb
deleted file mode 100644
index e8f043f897d1f..0000000000000
--- a/ruby/red-arrow/version.rb
+++ /dev/null
@@ -1,71 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-require "pathname"
-
-version_rb_path = Pathname.new(__FILE__)
-base_dir = version_rb_path.dirname
-pom_xml_path = base_dir.join("..", "..", "java", "pom.xml")
-lib_version_rb_path = base_dir.join("lib", "arrow", "version.rb")
-
-need_update = false
-if not lib_version_rb_path.exist?
-  need_update = true
-elsif version_rb_path.mtime > lib_version_rb_path.mtime
-  need_update = true
-elsif pom_xml_path.exist? and pom_xml_path.mtime > lib_version_rb_path.mtime
-  need_update = true
-end
-
-if need_update
-  version = pom_xml_path.read.scan(/^  <version>(.+?)<\/version>/)[0][0]
-  major, minor, micro, tag = version.split(/[.-]/)
-  lib_version_rb_path.open("w") do |lib_version_rb|
-    lib_version_rb.puts(<<-RUBY)
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-module Arrow
-  module Version
-    MAJOR = #{major}
-    MINOR = #{minor}
-    MICRO = #{micro}
-    TAG = #{tag ? tag.dump : nil}
-    STRING = #{version.dump}
-  end
-
-  VERSION = Version::STRING
-end
-    RUBY
-  end
-end
-
-require_relative "lib/arrow/version"
diff --git a/ruby/red-gandiva/.gitignore b/ruby/red-gandiva/.gitignore
index 99c64a5d3dd52..779545d9026f1 100644
--- a/ruby/red-gandiva/.gitignore
+++ b/ruby/red-gandiva/.gitignore
@@ -15,6 +15,4 @@
 # specific language governing permissions and limitations
 # under the License.
 
-/lib/gandiva/version.rb
-
 /pkg/
diff --git a/ruby/red-gandiva/lib/gandiva/loader.rb b/ruby/red-gandiva/lib/gandiva/loader.rb
index 5a95897b61730..845275c3e7cbd 100644
--- a/ruby/red-gandiva/lib/gandiva/loader.rb
+++ b/ruby/red-gandiva/lib/gandiva/loader.rb
@@ -22,5 +22,19 @@ def load
         super("Gandiva", Gandiva)
       end
     end
+
+    private
+    def load_method_info(info, klass, method_name)
+      case klass.name
+      when "Gandiva::BooleanLiteralNode"
+        case method_name
+        when "value?"
+          method_name = "value"
+        end
+        super(info, klass, method_name)
+      else
+        super
+      end
+    end
   end
 end
diff --git a/python/testing/test_hdfs.sh b/ruby/red-gandiva/lib/gandiva/version.rb
old mode 100755
new mode 100644
similarity index 80%
rename from python/testing/test_hdfs.sh
rename to ruby/red-gandiva/lib/gandiva/version.rb
index 016e54a66a671..f5562506644fd
--- a/python/testing/test_hdfs.sh
+++ b/ruby/red-gandiva/lib/gandiva/version.rb
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -17,9 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -ex
+module Gandiva
+  VERSION = "0.13.0-SNAPSHOT"
 
-docker build -t arrow-hdfs-test -f hdfs/Dockerfile .
-bash hdfs/restart_docker_container.sh
-docker exec -it arrow-hdfs /io/hdfs/run_tests.sh
-docker stop arrow-hdfs
+  module Version
+    numbers, TAG = VERSION.split("-")
+    MAJOR, MINOR, MICRO = numbers.split(".").collect(&:to_i)
+    STRING = VERSION
+  end
+end
diff --git a/ruby/red-gandiva/red-gandiva.gemspec b/ruby/red-gandiva/red-gandiva.gemspec
index 7f84faf2ec035..ec4db8913e902 100644
--- a/ruby/red-gandiva/red-gandiva.gemspec
+++ b/ruby/red-gandiva/red-gandiva.gemspec
@@ -17,7 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-require_relative "version"
+require_relative "lib/gandiva/version"
 
 Gem::Specification.new do |spec|
   spec.name = "red-gandiva"
@@ -25,9 +25,9 @@ Gem::Specification.new do |spec|
     Gandiva::Version::MAJOR.to_s,
     Gandiva::Version::MINOR.to_s,
     Gandiva::Version::MICRO.to_s,
-    # "beta1",
+    Gandiva::Version::TAG,
   ]
-  spec.version = version_components.join(".")
+  spec.version = version_components.compact.join(".")
   spec.homepage = "https://arrow.apache.org/"
   spec.authors = ["Apache Arrow Developers"]
   spec.email = ["dev@arrow.apache.org"]
@@ -41,7 +41,7 @@ Gem::Specification.new do |spec|
   spec.test_files += Dir.glob("test/**/*")
   spec.extensions = ["dependency-check/Rakefile"]
 
-  spec.add_runtime_dependency("red-arrow")
+  spec.add_runtime_dependency("red-arrow", "= #{spec.version}")
 
   spec.add_development_dependency("bundler")
   spec.add_development_dependency("rake")
diff --git a/ruby/red-gandiva/test/helper.rb b/ruby/red-gandiva/test/helper.rb
index 2f4e7dc46b1e3..9c291f7aebf42 100644
--- a/ruby/red-gandiva/test/helper.rb
+++ b/ruby/red-gandiva/test/helper.rb
@@ -15,9 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-require_relative "../../red-arrow/version"
-require_relative "../version"
-
 require "gandiva"
 
 require "test-unit"
diff --git a/ruby/red-gandiva/test/test-boolean-literal-node.rb b/ruby/red-gandiva/test/test-boolean-literal-node.rb
new file mode 100644
index 0000000000000..d79f72994b6a0
--- /dev/null
+++ b/ruby/red-gandiva/test/test-boolean-literal-node.rb
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestBooleanLiteralNode < Test::Unit::TestCase
+  def test_value
+    value = true
+    literal_node = Gandiva::BooleanLiteralNode.new(value)
+    assert_equal(value, literal_node.value)
+  end
+end
diff --git a/ruby/red-gandiva/version.rb b/ruby/red-gandiva/version.rb
deleted file mode 100644
index ba769796accad..0000000000000
--- a/ruby/red-gandiva/version.rb
+++ /dev/null
@@ -1,71 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-require "pathname"
-
-version_rb_path = Pathname.new(__FILE__)
-base_dir = version_rb_path.dirname
-pom_xml_path = base_dir.join("..", "..", "java", "pom.xml")
-lib_version_rb_path = base_dir.join("lib", "gandiva", "version.rb")
-
-need_update = false
-if not lib_version_rb_path.exist?
-  need_update = true
-elsif version_rb_path.mtime > lib_version_rb_path.mtime
-  need_update = true
-elsif pom_xml_path.exist? and pom_xml_path.mtime > lib_version_rb_path.mtime
-  need_update = true
-end
-
-if need_update
-  version = pom_xml_path.read.scan(/^  <version>(.+?)<\/version>/)[0][0]
-  major, minor, micro, tag = version.split(/[.-]/)
-  lib_version_rb_path.open("w") do |lib_version_rb|
-    lib_version_rb.puts(<<-RUBY)
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-module Gandiva
-  module Version
-    MAJOR = #{major}
-    MINOR = #{minor}
-    MICRO = #{micro}
-    TAG = #{tag ? tag.dump : nil}
-    STRING = #{version.dump}
-  end
-
-  VERSION = Version::STRING
-end
-    RUBY
-  end
-end
-
-require_relative "lib/gandiva/version"
diff --git a/ruby/red-parquet/.gitignore b/ruby/red-parquet/.gitignore
index 542f54c56a5ca..779545d9026f1 100644
--- a/ruby/red-parquet/.gitignore
+++ b/ruby/red-parquet/.gitignore
@@ -15,6 +15,4 @@
 # specific language governing permissions and limitations
 # under the License.
 
-/lib/parquet/version.rb
-
 /pkg/
diff --git a/ruby/red-parquet/lib/parquet/arrow-table-loadable.rb b/ruby/red-parquet/lib/parquet/arrow-table-loadable.rb
index 4df527bb8da3b..e3aa1ce0a67bf 100644
--- a/ruby/red-parquet/lib/parquet/arrow-table-loadable.rb
+++ b/ruby/red-parquet/lib/parquet/arrow-table-loadable.rb
@@ -19,9 +19,12 @@ module Parquet
   module ArrowTableLoadable
     private
     def load_as_parquet
-      reader = Parquet::ArrowFileReader.new(@path)
+      input = open_input_stream
+      reader = Parquet::ArrowFileReader.new(input)
       reader.use_threads = (@options[:use_threads] != false)
-      reader.read_table
+      table = reader.read_table
+      table.instance_variable_set(:@input, input)
+      table
     end
   end
 end
diff --git a/ruby/red-parquet/lib/parquet/arrow-table-savable.rb b/ruby/red-parquet/lib/parquet/arrow-table-savable.rb
index 5d96d5f58ec00..7667381867d9a 100644
--- a/ruby/red-parquet/lib/parquet/arrow-table-savable.rb
+++ b/ruby/red-parquet/lib/parquet/arrow-table-savable.rb
@@ -20,8 +20,10 @@ module ArrowTableSavable
     private
     def save_as_parquet
       chunk_size = @options[:chunk_size] || 1024 # TODO
-      Parquet::ArrowFileWriter.open(@table.schema, @path) do |writer|
-        writer.write_table(@table, chunk_size)
+      open_output_stream do |output|
+        Parquet::ArrowFileWriter.open(@table.schema, output) do |writer|
+          writer.write_table(@table, chunk_size)
+        end
       end
     end
   end
diff --git a/ruby/red-parquet/lib/parquet/version.rb b/ruby/red-parquet/lib/parquet/version.rb
new file mode 100644
index 0000000000000..f06039b34a26a
--- /dev/null
+++ b/ruby/red-parquet/lib/parquet/version.rb
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Parquet
+  VERSION = "0.13.0-SNAPSHOT"
+
+  module Version
+    numbers, TAG = VERSION.split("-")
+    MAJOR, MINOR, MICRO = numbers.split(".").collect(&:to_i)
+    STRING = VERSION
+  end
+end
diff --git a/ruby/red-parquet/red-parquet.gemspec b/ruby/red-parquet/red-parquet.gemspec
index 491648b7af97f..dffafed193414 100644
--- a/ruby/red-parquet/red-parquet.gemspec
+++ b/ruby/red-parquet/red-parquet.gemspec
@@ -17,7 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-require_relative "version"
+require_relative "lib/parquet/version"
 
 Gem::Specification.new do |spec|
   spec.name = "red-parquet"
@@ -25,9 +25,9 @@ Gem::Specification.new do |spec|
     Parquet::Version::MAJOR.to_s,
     Parquet::Version::MINOR.to_s,
     Parquet::Version::MICRO.to_s,
-    # "beta1",
+    Parquet::Version::TAG,
   ]
-  spec.version = version_components.join(".")
+  spec.version = version_components.compact.join(".")
   spec.homepage = "https://arrow.apache.org/"
   spec.authors = ["Apache Arrow Developers"]
   spec.email = ["dev@arrow.apache.org"]
@@ -41,7 +41,7 @@ Gem::Specification.new do |spec|
   spec.test_files += Dir.glob("test/**/*")
   spec.extensions = ["dependency-check/Rakefile"]
 
-  spec.add_runtime_dependency("red-arrow")
+  spec.add_runtime_dependency("red-arrow", "= #{spec.version}")
 
   spec.add_development_dependency("bundler")
   spec.add_development_dependency("rake")
diff --git a/ruby/red-parquet/test/helper.rb b/ruby/red-parquet/test/helper.rb
index 43013ab5686d6..169d1df424ea7 100644
--- a/ruby/red-parquet/test/helper.rb
+++ b/ruby/red-parquet/test/helper.rb
@@ -15,9 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-require_relative "../../red-arrow/version"
-require_relative "../version"
-
 require "parquet"
 
 require "tempfile"
diff --git a/ruby/red-parquet/test/test-arrow-table.rb b/ruby/red-parquet/test/test-arrow-table.rb
index 258b4173948c3..1a565b64451a8 100644
--- a/ruby/red-parquet/test/test-arrow-table.rb
+++ b/ruby/red-parquet/test/test-arrow-table.rb
@@ -40,9 +40,15 @@ def setup
     @table = Arrow::Table.new(schema, [@count_column, @visible_column])
   end
 
-  def test_save_load
+  def test_save_load_path
     tempfile = Tempfile.open(["red-parquet", ".parquet"])
     @table.save(tempfile.path)
     assert_equal(@table, Arrow::Table.load(tempfile.path))
   end
+
+  def test_save_load_buffer
+    buffer = Arrow::ResizableBuffer.new(1024)
+    @table.save(buffer, format: :parquet)
+    assert_equal(@table, Arrow::Table.load(buffer, format: :parquet))
+  end
 end
diff --git a/ruby/red-parquet/version.rb b/ruby/red-parquet/version.rb
deleted file mode 100644
index 06045167e9495..0000000000000
--- a/ruby/red-parquet/version.rb
+++ /dev/null
@@ -1,71 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-require "pathname"
-
-version_rb_path = Pathname.new(__FILE__)
-base_dir = version_rb_path.dirname
-pom_xml_path = base_dir.join("..", "..", "java", "pom.xml")
-lib_version_rb_path = base_dir.join("lib", "parquet", "version.rb")
-
-need_update = false
-if not lib_version_rb_path.exist?
-  need_update = true
-elsif version_rb_path.mtime > lib_version_rb_path.mtime
-  need_update = true
-elsif pom_xml_path.exist? and pom_xml_path.mtime > lib_version_rb_path.mtime
-  need_update = true
-end
-
-if need_update
-  version = pom_xml_path.read.scan(/^  <version>(.+?)<\/version>/)[0][0]
-  major, minor, micro, tag = version.split(/[.-]/)
-  lib_version_rb_path.open("w") do |lib_version_rb|
-    lib_version_rb.puts(<<-RUBY)
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-module Parquet
-  module Version
-    MAJOR = #{major}
-    MINOR = #{minor}
-    MICRO = #{micro}
-    TAG = #{tag ? tag.dump : nil}
-    STRING = #{version.dump}
-  end
-
-  VERSION = Version::STRING
-end
-    RUBY
-  end
-end
-
-require_relative "lib/parquet/version"
diff --git a/ruby/red-plasma/.gitignore b/ruby/red-plasma/.gitignore
index bd50ff8187f6d..779545d9026f1 100644
--- a/ruby/red-plasma/.gitignore
+++ b/ruby/red-plasma/.gitignore
@@ -15,6 +15,4 @@
 # specific language governing permissions and limitations
 # under the License.
 
-/lib/plasma/version.rb
-
 /pkg/
diff --git a/ruby/red-plasma/lib/plasma/client.rb b/ruby/red-plasma/lib/plasma/client.rb
index 464ef8c336fd9..d32ded6ff60b4 100644
--- a/ruby/red-plasma/lib/plasma/client.rb
+++ b/ruby/red-plasma/lib/plasma/client.rb
@@ -18,9 +18,18 @@
 module Plasma
   class Client
     alias_method :initialize_raw, :initialize
-    def initialize(socket_path)
+    private :initialize_raw
+    def initialize(socket_path, options=nil)
       socket_path = socket_path.to_path if socket_path.respond_to?(:to_path)
-      initialize_raw(socket_path)
+      if options
+        options_raw = options
+        options = ClientOptions.new
+        options_raw.each do |key, value|
+          setter = "#{key}="
+          options.__send__(setter, value) if options.respond_to?(setter)
+        end
+      end
+      initialize_raw(socket_path, options)
     end
   end
 end
diff --git a/ruby/red-plasma/lib/plasma/version.rb b/ruby/red-plasma/lib/plasma/version.rb
new file mode 100644
index 0000000000000..fbc01dcc0dd47
--- /dev/null
+++ b/ruby/red-plasma/lib/plasma/version.rb
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Plasma
+  VERSION = "0.13.0-SNAPSHOT"
+
+  module Version
+    numbers, TAG = VERSION.split("-")
+    MAJOR, MINOR, MICRO = numbers.split(".").collect(&:to_i)
+    STRING = VERSION
+  end
+end
diff --git a/ruby/red-plasma/red-plasma.gemspec b/ruby/red-plasma/red-plasma.gemspec
index 53b4d1ec0dade..67e189a3cc8fd 100644
--- a/ruby/red-plasma/red-plasma.gemspec
+++ b/ruby/red-plasma/red-plasma.gemspec
@@ -17,7 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-require_relative "version"
+require_relative "lib/plasma/version"
 
 Gem::Specification.new do |spec|
   spec.name = "red-plasma"
@@ -25,9 +25,9 @@ Gem::Specification.new do |spec|
     Plasma::Version::MAJOR.to_s,
     Plasma::Version::MINOR.to_s,
     Plasma::Version::MICRO.to_s,
-    # "beta1",
+    Plasma::Version::TAG,
   ]
-  spec.version = version_components.join(".")
+  spec.version = version_components.compact.join(".")
   spec.homepage = "https://arrow.apache.org/"
   spec.authors = ["Apache Arrow Developers"]
   spec.email = ["dev@arrow.apache.org"]
@@ -41,7 +41,7 @@ Gem::Specification.new do |spec|
   spec.test_files += Dir.glob("test/**/*")
   spec.extensions = ["dependency-check/Rakefile"]
 
-  spec.add_runtime_dependency("red-arrow")
+  spec.add_runtime_dependency("red-arrow", "= #{spec.version}")
 
   spec.add_development_dependency("bundler")
   spec.add_development_dependency("rake")
diff --git a/ruby/red-plasma/test/helper.rb b/ruby/red-plasma/test/helper.rb
index d66d43ecc94c0..255cad2870044 100644
--- a/ruby/red-plasma/test/helper.rb
+++ b/ruby/red-plasma/test/helper.rb
@@ -15,9 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-require_relative "../../red-arrow/version"
-require_relative "../version"
-
 require "plasma"
 
 require "tempfile"
diff --git a/ruby/red-plasma/test/test-plasma-client.rb b/ruby/red-plasma/test/test-plasma-client.rb
index e7f8dbdba42e0..de76fb9d36e8b 100644
--- a/ruby/red-plasma/test/test-plasma-client.rb
+++ b/ruby/red-plasma/test/test-plasma-client.rb
@@ -20,15 +20,31 @@ def setup
     @store = nil
     @store = Helper::PlasmaStore.new
     @store.start
+    @id = Plasma::ObjectID.new("Hello")
+    @data = "World"
   end
 
   def teardown
     @store.stop if @store
   end
 
-  def test_new
-    assert_nothing_raised do
-      Plasma::Client.new(Pathname(@store.socket_path))
-    end
+  def test_new_pathname
+    client = Plasma::Client.new(Pathname(@store.socket_path))
+    object = client.create(@id, @data.bytesize, nil)
+    object.data.set_data(0, @data)
+    object.seal
+
+    object = client.refer_object(@id, -1)
+    assert_equal(@data, object.data.data.to_s)
+  end
+
+  def test_new_options
+    client = Plasma::Client.new(@store.socket_path, n_retries: 1)
+    object = client.create(@id, @data.bytesize, nil)
+    object.data.set_data(0, @data)
+    object.seal
+
+    object = client.refer_object(@id, -1)
+    assert_equal(@data, object.data.data.to_s)
   end
 end
diff --git a/ruby/red-plasma/version.rb b/ruby/red-plasma/version.rb
deleted file mode 100644
index 015aac9594d26..0000000000000
--- a/ruby/red-plasma/version.rb
+++ /dev/null
@@ -1,71 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-require "pathname"
-
-version_rb_path = Pathname.new(__FILE__)
-base_dir = version_rb_path.dirname
-pom_xml_path = base_dir.join("..", "..", "java", "pom.xml")
-lib_version_rb_path = base_dir.join("lib", "plasma", "version.rb")
-
-need_update = false
-if not lib_version_rb_path.exist?
-  need_update = true
-elsif version_rb_path.mtime > lib_version_rb_path.mtime
-  need_update = true
-elsif pom_xml_path.exist? and pom_xml_path.mtime > lib_version_rb_path.mtime
-  need_update = true
-end
-
-if need_update
-  version = pom_xml_path.read.scan(/^  <version>(.+?)<\/version>/)[0][0]
-  major, minor, micro, tag = version.split(/[.-]/)
-  lib_version_rb_path.open("w") do |lib_version_rb|
-    lib_version_rb.puts(<<-RUBY)
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-module Plasma
-  module Version
-    MAJOR = #{major}
-    MINOR = #{minor}
-    MICRO = #{micro}
-    TAG = #{tag ? tag.dump : nil}
-    STRING = #{version.dump}
-  end
-
-  VERSION = Version::STRING
-end
-    RUBY
-  end
-end
-
-require_relative "lib/plasma/version"
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index b8750945fb162..abfb71ada7951 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -15,40 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
-[package]
-name = "arrow"
-version = "0.10.0"
-description = "Rust implementation of Apache Arrow"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-license = "Apache-2.0"
-keywords = [ "arrow" ]
-include = [
-    "src/**/*.rs",
-    "Cargo.toml",
-]
-
-[lib]
-name = "arrow"
-path = "src/lib.rs"
-
-[dependencies]
-bytes = "0.4"
-libc = "0.2"
-serde = { version = "1.0.80", features = ["alloc", "rc"] }
-serde_derive = "1.0.80"
-serde_json = "1.0.13"
-rand = "0.5"
-csv = "1.0.0"
-
-[dev-dependencies]
-criterion = "0.2"
-
-[[bench]]
-name = "array_from_vec"
-harness = false
-
-[[bench]]
-name = "builder"
-harness = false
+[workspace]
+members = [
+        "arrow",
+        "parquet",
+]
\ No newline at end of file
diff --git a/rust/Dockerfile b/rust/Dockerfile
index c63dcda79d55b..17661fcb5d8e5 100644
--- a/rust/Dockerfile
+++ b/rust/Dockerfile
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-FROM rust
+FROM rustlang/rust:nightly
 
 # sadly cargo doesn't have a command to fetch and build the
 # dependencies without building the library itself
diff --git a/rust/README.md b/rust/README.md
index 51beb46bab51f..8fe7885de068c 100644
--- a/rust/README.md
+++ b/rust/README.md
@@ -19,70 +19,32 @@
 
 # Native Rust implementation of Apache Arrow
 
-[![Build Status](https://travis-ci.org/apache/arrow.svg?branch=master)](https://travis-ci.org/apache/arrow)
-[![Coverage Status](https://coveralls.io/repos/github/apache/arrow/badge.svg)](https://coveralls.io/github/apache/arrow)
+## The Rust implementation of Arrow consists of the following crates
 
-## Status
-
-This is a starting point for a native Rust implementation of Arrow.
-
-The current code demonstrates arrays of primitive types and structs.
-
-## Creating an Array from a Vec
-
-```rust
-// create a memory-aligned Arrow array from an existing Vec
-let array = PrimitiveArray::from(vec![1, 2, 3, 4, 5]);
-
-println!("array contents: {:?}", array.iter().collect::<Vec<i32>>());
-```
-
-## Run Examples
-
-The examples folder shows how to construct some different types of Arrow
-arrays, including dynamic arrays created at runtime.
-
-Examples can be run using the `cargo run --example` command. For example:
-
-```bash
-cargo run --example builders
-cargo run --example dynamic_types
-cargo run --example read_csv
-```
+- Arrow [(README)](arrow/README.md)
+- Parquet [(README)](parquet/README.md)
 
 ## Run Tests
 
-```bash
-cargo test
-```
-
-# Publishing to crates.io
-
-An Arrow committer can publish this crate after an official project release has
-been made to crates.io using the following instructions.
-
-Follow [these
-instructions](https://doc.rust-lang.org/cargo/reference/publishing.html) to
-create an account and login to crates.io before asking to be added as an owner
-of the [arrow crate](https://crates.io/crates/arrow).
-
-Checkout the tag for the version to be released. For example:
+Parquet support in Arrow requires data to test against, this data is in a
+git submodule.  To pull down this data run the following:
 
 ```bash
-git checkout apache-arrow-0.11.0
+git submodule update --init
 ```
 
-If the Cargo.toml in this tag already contains `version = "0.11.0"` (as it
-should) then the crate can be published with the following command:
+The data can then be found in `cpp/submodules/parquet_testing/data`.
+Create a new environment variable called `PARQUET_TEST_DATA` to point
+to this location and then `cargo test` as usual.
 
-```bash
-cargo publish
-```
+## Code Formatting
 
-If the Cargo.toml does not have the correct version then it will be necessary
-to modify it manually. Since there is now a modified file locally that is not
-committed to github it will be necessary to use the following command.
+Our CI uses `rustfmt` to check code formatting.  Although the project is
+built and tested against nightly rust we use the stable version of
+`rustfmt`.  So before submitting a PR be sure to run the following
+and check for lint issues:
 
 ```bash
-cargo publish --allow-dirty
+cargo +stable fmt --all -- --check
 ```
+
diff --git a/python/pyarrow/formatting.py b/rust/arrow/Cargo.toml
similarity index 50%
rename from python/pyarrow/formatting.py
rename to rust/arrow/Cargo.toml
index 5ef9482ed144c..1ebd4e6ba1054 100644
--- a/python/pyarrow/formatting.py
+++ b/rust/arrow/Cargo.toml
@@ -15,29 +15,45 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# Pretty-printing and other formatting utilities for Arrow data structures
+[package]
+name = "arrow"
+version = "0.13.0-SNAPSHOT"
+description = "Rust implementation of Apache Arrow"
+homepage = "https://github.com/apache/arrow"
+repository = "https://github.com/apache/arrow"
+authors = ["Apache Arrow <dev@arrow.apache.org>"]
+license = "Apache-2.0"
+keywords = [ "arrow" ]
+include = [
+    "src/**/*.rs",
+    "Cargo.toml",
+]
+edition = "2018"
 
-import pyarrow.lib as lib
-import warnings
+[lib]
+name = "arrow"
+path = "src/lib.rs"
 
-try:
-    from textwrap import indent
-except ImportError:
-    def indent(text, prefix):
-        return ''.join(prefix + line for line in text.splitlines(True))
+[dependencies]
+bytes = "0.4"
+libc = "0.2"
+serde = { version = "1.0.80", features = ["alloc", "rc"] }
+serde_derive = "1.0.80"
+serde_json = "1.0.13"
+rand = "0.5"
+csv = "1.0.0"
+num = "0.2"
+regex = "1.1"
+lazy_static = "1.2"
 
+[dev-dependencies]
+criterion = "0.2"
+lazy_static = "1"
 
-def array_format(arr, window=10):
-    warnings.warn("array_format is deprecated, use Array.format() instead",
-                  FutureWarning)
-    return arr.format(window=window)
+[[bench]]
+name = "array_from_vec"
+harness = false
 
-
-def value_format(x, indent_level=0):
-    warnings.warn("value_format is deprecated",
-                  FutureWarning)
-    if isinstance(x, lib.ListValue):
-        contents = ',\n'.join(value_format(item) for item in x)
-        return '[{0}]'.format(indent(contents, ' ').strip())
-    else:
-        return repr(x)
+[[bench]]
+name = "builder"
+harness = false
\ No newline at end of file
diff --git a/rust/arrow/README.md b/rust/arrow/README.md
new file mode 100644
index 0000000000000..b2fd520319894
--- /dev/null
+++ b/rust/arrow/README.md
@@ -0,0 +1,82 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Native Rust implementation of Apache Arrow
+
+[![Build Status](https://travis-ci.org/apache/arrow.svg?branch=master)](https://travis-ci.org/apache/arrow)
+[![Coverage Status](https://coveralls.io/repos/github/apache/arrow/badge.svg)](https://coveralls.io/github/apache/arrow)
+
+## Status
+
+This is a native Rust implementation of Apache Arrow. Currently the project
+is developed and tested against nightly Rust.  The current status is:
+
+- [x] Primitive Arrays
+- [x] List Arrays
+- [x] Struct Arrays
+- [x] CSV Reader
+- [ ] CSV Writer
+- [ ] Parquet Reader
+- [ ] Parquet Writer
+- [ ] Arrow IPC
+- [ ] Interop tests with other implementations
+
+## Examples
+
+The examples folder shows how to construct some different types of Arrow
+arrays, including dynamic arrays created at runtime.
+
+Examples can be run using the `cargo run --example` command. For example:
+
+```bash
+cargo run --example builders
+cargo run --example dynamic_types
+cargo run --example read_csv
+```
+
+# Publishing to crates.io
+
+An Arrow committer can publish this crate after an official project release has
+been made to crates.io using the following instructions.
+
+Follow [these
+instructions](https://doc.rust-lang.org/cargo/reference/publishing.html) to
+create an account and login to crates.io before asking to be added as an owner
+of the [arrow crate](https://crates.io/crates/arrow).
+
+Checkout the tag for the version to be released. For example:
+
+```bash
+git checkout apache-arrow-0.11.0
+```
+
+If the Cargo.toml in this tag already contains `version = "0.11.0"` (as it
+should) then the crate can be published with the following command:
+
+```bash
+cargo publish
+```
+
+If the Cargo.toml does not have the correct version then it will be necessary
+to modify it manually. Since there is now a modified file locally that is not
+committed to github it will be necessary to use the following command.
+
+```bash
+cargo publish --allow-dirty
+```
diff --git a/rust/benches/array_from_vec.rs b/rust/arrow/benches/array_from_vec.rs
similarity index 99%
rename from rust/benches/array_from_vec.rs
rename to rust/arrow/benches/array_from_vec.rs
index 669b88eaa40d9..f9357140922a6 100644
--- a/rust/benches/array_from_vec.rs
+++ b/rust/arrow/benches/array_from_vec.rs
@@ -17,7 +17,6 @@
 
 #[macro_use]
 extern crate criterion;
-
 use criterion::Criterion;
 
 extern crate arrow;
diff --git a/rust/benches/builder.rs b/rust/arrow/benches/builder.rs
similarity index 94%
rename from rust/benches/builder.rs
rename to rust/arrow/benches/builder.rs
index 04f8a33b5bd55..70369804f8712 100644
--- a/rust/benches/builder.rs
+++ b/rust/arrow/benches/builder.rs
@@ -19,11 +19,13 @@ extern crate arrow;
 extern crate criterion;
 extern crate rand;
 
-use arrow::builder::*;
+use std::mem::size_of;
+
 use criterion::*;
 use rand::distributions::Standard;
 use rand::{thread_rng, Rng};
-use std::mem::size_of;
+
+use arrow::builder::*;
 
 // Build arrays with 512k elements.
 const BATCH_SIZE: usize = 8 << 10;
@@ -37,7 +39,7 @@ fn bench_primitive(c: &mut Criterion) {
             b.iter(|| {
                 let mut builder = Int64Builder::new(64);
                 for _ in 0..NUM_BATCHES {
-                    let _ = black_box(builder.push_slice(&data[..]));
+                    let _ = black_box(builder.append_slice(&data[..]));
                 }
                 black_box(builder.finish());
             })
@@ -60,7 +62,7 @@ fn bench_bool(c: &mut Criterion) {
             b.iter(|| {
                 let mut builder = BooleanBuilder::new(64);
                 for _ in 0..NUM_BATCHES {
-                    let _ = black_box(builder.push_slice(&data[..]));
+                    let _ = black_box(builder.append_slice(&data[..]));
                 }
                 black_box(builder.finish());
             })
diff --git a/rust/examples/builders.rs b/rust/arrow/examples/builders.rs
similarity index 76%
rename from rust/examples/builders.rs
rename to rust/arrow/examples/builders.rs
index 5273558d966e0..c1aede81023ce 100644
--- a/rust/examples/builders.rs
+++ b/rust/arrow/examples/builders.rs
@@ -18,25 +18,25 @@
 ///! Many builders are available to easily create different types of arrow arrays
 extern crate arrow;
 
-use arrow::builder::{ArrayBuilder, Int32Builder};
+use arrow::builder::Int32Builder;
 
 fn main() {
     // Primitive Arrays
     //
-    // Primitive arrays are arrays of fixed-width primitive types (bool, u8, u16, u32, u64, i8, i16,
-    // i32, i64, f32, f64)
+    // Primitive arrays are arrays of fixed-width primitive types (bool, u8, u16, u32,
+    // u64, i8, i16, i32, i64, f32, f64)
 
     // Create a new builder with a capacity of 100
     let mut primitive_array_builder = Int32Builder::new(100);
 
-    // Push an individual primitive value
-    primitive_array_builder.push(55).unwrap();
+    // Append an individual primitive value
+    primitive_array_builder.append_value(55).unwrap();
 
-    // Push a null value
-    primitive_array_builder.push_null().unwrap();
+    // Append a null value
+    primitive_array_builder.append_null().unwrap();
 
-    // Push a slice of primitive values
-    primitive_array_builder.push_slice(&[39, 89, 12]).unwrap();
+    // Append a slice of primitive values
+    primitive_array_builder.append_slice(&[39, 89, 12]).unwrap();
 
     // Build the `PrimitiveArray`
     let _primitive_array = primitive_array_builder.finish();
diff --git a/rust/examples/dynamic_types.rs b/rust/arrow/examples/dynamic_types.rs
similarity index 100%
rename from rust/examples/dynamic_types.rs
rename to rust/arrow/examples/dynamic_types.rs
diff --git a/rust/examples/read_csv.rs b/rust/arrow/examples/read_csv.rs
similarity index 95%
rename from rust/examples/read_csv.rs
rename to rust/arrow/examples/read_csv.rs
index df66a8112e5f2..fd15e333bcfc9 100644
--- a/rust/examples/read_csv.rs
+++ b/rust/arrow/examples/read_csv.rs
@@ -17,11 +17,12 @@
 
 extern crate arrow;
 
+use std::fs::File;
+use std::sync::Arc;
+
 use arrow::array::{BinaryArray, Float64Array};
 use arrow::csv;
 use arrow::datatypes::{DataType, Field, Schema};
-use std::fs::File;
-use std::sync::Arc;
 
 fn main() {
     let schema = Schema::new(vec![
@@ -58,7 +59,7 @@ fn main() {
         .unwrap();
 
     for i in 0..batch.num_rows() {
-        let city_name: String = String::from_utf8(city.get_value(i).to_vec()).unwrap();
+        let city_name: String = String::from_utf8(city.value(i).to_vec()).unwrap();
 
         println!(
             "City: {}, Latitude: {}, Longitude: {}",
diff --git a/rust/arrow/examples/read_csv_infer_schema.rs b/rust/arrow/examples/read_csv_infer_schema.rs
new file mode 100644
index 0000000000000..9dd2d2aaf2cc2
--- /dev/null
+++ b/rust/arrow/examples/read_csv_infer_schema.rs
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate arrow;
+
+use arrow::array::{BinaryArray, Float64Array};
+use arrow::csv;
+use std::fs::File;
+
+fn main() {
+    let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
+    let builder = csv::ReaderBuilder::new()
+        .has_headers(true)
+        .infer_schema(Some(100));
+    let mut csv = builder.build(file).unwrap();
+    let batch = csv.next().unwrap().unwrap();
+
+    println!(
+        "Loaded {} rows containing {} columns",
+        batch.num_rows(),
+        batch.num_columns()
+    );
+
+    println!("Inferred schema: {:?}", batch.schema());
+
+    let city = batch
+        .column(0)
+        .as_any()
+        .downcast_ref::<BinaryArray>()
+        .unwrap();
+    let lat = batch
+        .column(1)
+        .as_any()
+        .downcast_ref::<Float64Array>()
+        .unwrap();
+    let lng = batch
+        .column(2)
+        .as_any()
+        .downcast_ref::<Float64Array>()
+        .unwrap();
+
+    for i in 0..batch.num_rows() {
+        let city_name: String = String::from_utf8(city.value(i).to_vec()).unwrap();
+
+        println!(
+            "City: {}, Latitude: {}, Longitude: {}",
+            city_name,
+            lat.value(i),
+            lng.value(i)
+        );
+    }
+}
diff --git a/rust/src/array.rs b/rust/arrow/src/array.rs
similarity index 88%
rename from rust/src/array.rs
rename to rust/arrow/src/array.rs
index 264aa50121f6c..127edb9a9e350 100644
--- a/rust/src/array.rs
+++ b/rust/arrow/src/array.rs
@@ -15,19 +15,57 @@
 // specific language governing permissions and limitations
 // under the License.
 
-///! Array types
+//! Defines public types representing Apache Arrow arrays. Arrow's specification defines
+//! an array as "a sequence of values with known length all having the same type." For
+//! example, the type `Int16Array` represents an Apache Arrow array of 16-bit integers.
+//!
+//! ```
+//! extern crate arrow;
+//!
+//! use arrow::array::Int16Array;
+//!
+//! // Create a new builder with a capacity of 100
+//! let mut builder = Int16Array::builder(100);
+//!
+//! // Append a single primitive value
+//! builder.append_value(1).unwrap();
+//!
+//! // Append a null value
+//! builder.append_null().unwrap();
+//!
+//! // Append a slice of primitive values
+//! builder.append_slice(&[2, 3, 4]).unwrap();
+//!
+//! // Build the array
+//! let array = builder.finish();
+//!
+//! assert_eq!(
+//!     5,
+//!     array.len(),
+//!     "The array has 5 values, counting the null value"
+//! );
+//!
+//! assert_eq!(2, array.value(2), "Get the value with index 2");
+//!
+//! assert_eq!(
+//!     array.value_slice(3, 2),
+//!     &[3, 4],
+//!     "Get slice of len 2 starting at idx 3"
+//! )
+//! ```
+
 use std::any::Any;
 use std::convert::From;
 use std::io::Write;
 use std::mem;
 use std::sync::Arc;
 
-use array_data::{ArrayData, ArrayDataRef};
-use buffer::{Buffer, MutableBuffer};
-use builder::*;
-use datatypes::*;
-use memory;
-use util::bit_util;
+use crate::array_data::{ArrayData, ArrayDataRef};
+use crate::buffer::{Buffer, MutableBuffer};
+use crate::builder::*;
+use crate::datatypes::*;
+use crate::memory;
+use crate::util::bit_util;
 
 /// Trait for dealing with different types of array at runtime when the type of the
 /// array is not known in advance
@@ -47,27 +85,27 @@ pub trait Array: Send + Sync {
     }
 
     /// Returns the length (i.e., number of elements) of this array
-    fn len(&self) -> i64 {
+    fn len(&self) -> usize {
         self.data().len()
     }
 
     /// Returns the offset of this array
-    fn offset(&self) -> i64 {
+    fn offset(&self) -> usize {
         self.data().offset()
     }
 
     /// Returns whether the element at index `i` is null
-    fn is_null(&self, i: i64) -> bool {
+    fn is_null(&self, i: usize) -> bool {
         self.data().is_null(i)
     }
 
     /// Returns whether the element at index `i` is not null
-    fn is_valid(&self, i: i64) -> bool {
+    fn is_valid(&self, i: usize) -> bool {
         self.data().is_valid(i)
     }
 
     /// Returns the total number of nulls in this array
-    fn null_count(&self) -> i64 {
+    fn null_count(&self) -> usize {
         self.data().null_count()
     }
 }
@@ -123,8 +161,8 @@ pub struct PrimitiveArray<T: ArrowPrimitiveType> {
     data: ArrayDataRef,
     /// Pointer to the value array. The lifetime of this must be <= to the value buffer
     /// stored in `data`, so it's safe to store.
-    /// Also note that boolean arrays are bit-packed, so although the underlying pointer is of type
-    /// bool it should be cast back to u8 before being used.
+    /// Also note that boolean arrays are bit-packed, so although the underlying pointer
+    /// is of type bool it should be cast back to u8 before being used.
     /// i.e. `self.raw_values.get() as *const u8`
     raw_values: RawPtrBox<T::Native>,
 }
@@ -158,7 +196,7 @@ impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
 /// Implementation for primitive arrays with numeric types.
 /// Boolean arrays are bit-packed and so implemented separately.
 impl<T: ArrowNumericType> PrimitiveArray<T> {
-    pub fn new(length: i64, values: Buffer, null_count: i64, offset: i64) -> Self {
+    pub fn new(length: usize, values: Buffer, null_count: usize, offset: usize) -> Self {
         let array_data = ArrayData::builder(T::get_data_type())
             .len(length)
             .add_buffer(values)
@@ -176,72 +214,41 @@ impl<T: ArrowNumericType> PrimitiveArray<T> {
     }
 
     /// Returns the length of this array
-    pub fn len(&self) -> i64 {
+    pub fn len(&self) -> usize {
         self.data.len()
     }
 
     /// Returns a raw pointer to the values of this array.
     pub fn raw_values(&self) -> *const T::Native {
-        unsafe { mem::transmute(self.raw_values.get().offset(self.data.offset() as isize)) }
+        unsafe {
+            mem::transmute(self.raw_values.get().offset(self.data.offset() as isize))
+        }
     }
 
     /// Returns the primitive value at index `i`.
     ///
     /// Note this doesn't do any bound checking, for performance reason.
-    pub fn value(&self, i: i64) -> T::Native {
+    pub fn value(&self, i: usize) -> T::Native {
         unsafe { *(self.raw_values().offset(i as isize)) }
     }
 
     /// Returns a slice for the given offset and length
     ///
     /// Note this doesn't do any bound checking, for performance reason.
-    pub fn value_slice(&self, offset: i64, len: i64) -> &[T::Native] {
-        let raw = unsafe { std::slice::from_raw_parts(self.raw_values(), self.len() as usize) };
-        &raw[offset as usize..offset as usize + len as usize]
-    }
-
-    /// Returns the minimum value in the array, according to the natural order.
-    pub fn min(&self) -> Option<T::Native> {
-        self.min_max_helper(|a, b| a < b)
-    }
-
-    /// Returns the maximum value in the array, according to the natural order.
-    pub fn max(&self) -> Option<T::Native> {
-        self.min_max_helper(|a, b| a > b)
-    }
-
-    fn min_max_helper<F>(&self, cmp: F) -> Option<T::Native>
-    where
-        F: Fn(T::Native, T::Native) -> bool,
-    {
-        let mut n: Option<T::Native> = None;
-        let data = self.data();
-        for i in 0..data.len() {
-            if data.is_null(i) {
-                continue;
-            }
-            let m = self.value(i as i64);
-            match n {
-                None => n = Some(m),
-                Some(nn) => {
-                    if cmp(m, nn) {
-                        n = Some(m)
-                    }
-                }
-            }
-        }
-        n
+    pub fn value_slice(&self, offset: usize, len: usize) -> &[T::Native] {
+        let raw = unsafe { std::slice::from_raw_parts(self.raw_values(), self.len()) };
+        &raw[offset..offset + len]
     }
 
     // Returns a new primitive array builder
-    pub fn builder(capacity: i64) -> PrimitiveArrayBuilder<T> {
-        PrimitiveArrayBuilder::<T>::new(capacity)
+    pub fn builder(capacity: usize) -> PrimitiveBuilder<T> {
+        PrimitiveBuilder::<T>::new(capacity)
     }
 }
 
 /// Specific implementation for Boolean arrays due to bit-packing
 impl PrimitiveArray<BooleanType> {
-    pub fn new(length: i64, values: Buffer, null_count: i64, offset: i64) -> Self {
+    pub fn new(length: usize, values: Buffer, null_count: usize, offset: usize) -> Self {
         let array_data = ArrayData::builder(DataType::Boolean)
             .len(length)
             .add_buffer(values)
@@ -259,14 +266,14 @@ impl PrimitiveArray<BooleanType> {
     }
 
     /// Returns the boolean value at index `i`.
-    pub fn value(&self, i: i64) -> bool {
+    pub fn value(&self, i: usize) -> bool {
         let offset = i + self.offset();
         assert!(offset < self.data.len());
-        unsafe { bit_util::get_bit_raw(self.raw_values.get() as *const u8, offset as usize) }
+        unsafe { bit_util::get_bit_raw(self.raw_values.get() as *const u8, offset) }
     }
 
     // Returns a new primitive array builder
-    pub fn builder(capacity: i64) -> BooleanBuilder {
+    pub fn builder(capacity: usize) -> BooleanBuilder {
         BooleanBuilder::new(capacity)
     }
 }
@@ -279,7 +286,7 @@ macro_rules! def_numeric_from_vec {
         impl From<Vec<$native_ty>> for PrimitiveArray<$ty> {
             fn from(data: Vec<$native_ty>) -> Self {
                 let array_data = ArrayData::builder($ty_id)
-                    .len(data.len() as i64)
+                    .len(data.len())
                     .add_buffer(Buffer::from(data.to_byte_slice()))
                     .build();
                 PrimitiveArray::from(array_data)
@@ -290,9 +297,11 @@ macro_rules! def_numeric_from_vec {
         impl From<Vec<Option<$native_ty>>> for PrimitiveArray<$ty> {
             fn from(data: Vec<Option<$native_ty>>) -> Self {
                 let data_len = data.len();
-                let num_bytes = bit_util::ceil(data_len as i64, 8) as usize;
-                let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false);
-                let mut val_buf = MutableBuffer::new(data_len * mem::size_of::<$native_ty>());
+                let num_bytes = bit_util::ceil(data_len, 8);
+                let mut null_buf =
+                    MutableBuffer::new(num_bytes).with_bitset(num_bytes, false);
+                let mut val_buf =
+                    MutableBuffer::new(data_len * mem::size_of::<$native_ty>());
 
                 {
                     let null = vec![0; mem::size_of::<$native_ty>()];
@@ -310,7 +319,7 @@ macro_rules! def_numeric_from_vec {
                 }
 
                 let array_data = ArrayData::builder($ty_id)
-                    .len(data_len as i64)
+                    .len(data_len)
                     .add_buffer(val_buf.freeze())
                     .null_bit_buffer(null_buf.freeze())
                     .build();
@@ -334,7 +343,7 @@ def_numeric_from_vec!(Float64Type, f64, DataType::Float64);
 /// Constructs a boolean array from a vector. Should only be used for testing.
 impl From<Vec<bool>> for BooleanArray {
     fn from(data: Vec<bool>) -> Self {
-        let num_byte = bit_util::ceil(data.len() as i64, 8) as usize;
+        let num_byte = bit_util::ceil(data.len(), 8);
         let mut mut_buf = MutableBuffer::new(num_byte).with_bitset(num_byte, false);
         {
             let mut_slice = mut_buf.data_mut();
@@ -345,7 +354,7 @@ impl From<Vec<bool>> for BooleanArray {
             }
         }
         let array_data = ArrayData::builder(DataType::Boolean)
-            .len(data.len() as i64)
+            .len(data.len())
             .add_buffer(mut_buf.freeze())
             .build();
         BooleanArray::from(array_data)
@@ -354,8 +363,8 @@ impl From<Vec<bool>> for BooleanArray {
 
 impl From<Vec<Option<bool>>> for BooleanArray {
     fn from(data: Vec<Option<bool>>) -> Self {
-        let data_len = data.len() as i64;
-        let num_byte = bit_util::ceil(data_len, 8) as usize;
+        let data_len = data.len();
+        let num_byte = bit_util::ceil(data_len, 8);
         let mut null_buf = MutableBuffer::new(num_byte).with_bitset(num_byte, false);
         let mut val_buf = MutableBuffer::new(num_byte).with_bitset(num_byte, false);
 
@@ -425,7 +434,7 @@ impl ListArray {
     ///
     /// Note this doesn't do any bound checking, for performance reason.
     #[inline]
-    pub fn value_offset(&self, i: i64) -> i32 {
+    pub fn value_offset(&self, i: usize) -> i32 {
         self.value_offset_at(self.data.offset() + i)
     }
 
@@ -433,13 +442,13 @@ impl ListArray {
     ///
     /// Note this doesn't do any bound checking, for performance reason.
     #[inline]
-    pub fn value_length(&self, mut i: i64) -> i32 {
+    pub fn value_length(&self, mut i: usize) -> i32 {
         i += self.data.offset();
         self.value_offset_at(i + 1) - self.value_offset_at(i)
     }
 
     #[inline]
-    fn value_offset_at(&self, i: i64) -> i32 {
+    fn value_offset_at(&self, i: usize) -> i32 {
         unsafe { *self.value_offsets.get().offset(i as isize) }
     }
 }
@@ -503,11 +512,8 @@ pub struct BinaryArray {
 
 impl BinaryArray {
     /// Returns the element at index `i` as a byte slice.
-    pub fn get_value(&self, i: i64) -> &[u8] {
-        assert!(
-            i >= 0 && i < self.data.len(),
-            "BinaryArray out of bounds access"
-        );
+    pub fn value(&self, i: usize) -> &[u8] {
+        assert!(i < self.data.len(), "BinaryArray out of bounds access");
         let offset = i.checked_add(self.data.offset()).unwrap();
         unsafe {
             let pos = self.value_offset_at(offset);
@@ -521,8 +527,8 @@ impl BinaryArray {
     /// Returns the element at index `i` as a string.
     ///
     /// Note this doesn't do any bound checking, for performance reason.
-    pub fn get_string(&self, i: i64) -> String {
-        let slice = self.get_value(i);
+    pub fn get_string(&self, i: usize) -> String {
+        let slice = self.value(i);
         unsafe { String::from_utf8_unchecked(Vec::from(slice)) }
     }
 
@@ -530,7 +536,7 @@ impl BinaryArray {
     ///
     /// Note this doesn't do any bound checking, for performance reason.
     #[inline]
-    pub fn value_offset(&self, i: i64) -> i32 {
+    pub fn value_offset(&self, i: usize) -> i32 {
         self.value_offset_at(self.data.offset() + i)
     }
 
@@ -538,13 +544,13 @@ impl BinaryArray {
     ///
     /// Note this doesn't do any bound checking, for performance reason.
     #[inline]
-    pub fn value_length(&self, mut i: i64) -> i32 {
+    pub fn value_length(&self, mut i: usize) -> i32 {
         i += self.data.offset();
         self.value_offset_at(i + 1) - self.value_offset_at(i)
     }
 
     #[inline]
-    fn value_offset_at(&self, i: i64) -> i32 {
+    fn value_offset_at(&self, i: usize) -> i32 {
         unsafe { *self.value_offsets.get().offset(i as isize) }
     }
 }
@@ -582,7 +588,7 @@ impl<'a> From<Vec<&'a str>> for BinaryArray {
             values.extend_from_slice(s.as_bytes());
         }
         let array_data = ArrayData::builder(DataType::Utf8)
-            .len(v.len() as i64)
+            .len(v.len())
             .add_buffer(Buffer::from(offsets.to_byte_slice()))
             .add_buffer(Buffer::from(&values[..]))
             .build();
@@ -596,7 +602,8 @@ impl From<ListArray> for BinaryArray {
         assert_eq!(
             v.data().child_data()[0].child_data().len(),
             0,
-            "BinaryArray can only be created from list array of u8 values (i.e. List<PrimitiveArray<u8>>)."
+            "BinaryArray can only be created from list array of u8 values \
+             (i.e. List<PrimitiveArray<u8>>)."
         );
         assert_eq!(
             v.data().child_data()[0].data_type(),
@@ -604,11 +611,17 @@ impl From<ListArray> for BinaryArray {
             "BinaryArray can only be created from List<u8> arrays, mismatched data types."
         );
 
-        let data = ArrayData::builder(DataType::Utf8)
+        let mut builder = ArrayData::builder(DataType::Utf8)
             .len(v.len())
             .add_buffer(v.data().buffers()[0].clone())
-            .add_buffer(v.data().child_data()[0].buffers()[0].clone())
-            .build();
+            .add_buffer(v.data().child_data()[0].buffers()[0].clone());
+        if let Some(bitmap) = v.data().null_bitmap() {
+            builder = builder
+                .null_count(v.data().null_count())
+                .null_bit_buffer(bitmap.bits.clone())
+        }
+
+        let data = builder.build();
         Self::from(data)
     }
 }
@@ -627,7 +640,8 @@ impl Array for BinaryArray {
     }
 }
 
-/// A nested array type where each child (called *field*) is represented by a separate array.
+/// A nested array type where each child (called *field*) is represented by a separate
+/// array.
 pub struct StructArray {
     data: ArrayDataRef,
     boxed_fields: Vec<ArrayRef>,
@@ -664,7 +678,7 @@ impl Array for StructArray {
     }
 
     /// Returns the length (i.e., number of elements) of this array
-    fn len(&self) -> i64 {
+    fn len(&self) -> usize {
         self.boxed_fields[0].len()
     }
 }
@@ -692,14 +706,15 @@ impl From<Vec<(Field, ArrayRef)>> for StructArray {
 
 #[cfg(test)]
 mod tests {
-    use std::thread;
-
     use super::*;
-    use array_data::ArrayData;
-    use buffer::Buffer;
-    use datatypes::{DataType, Field, ToByteSlice};
-    use memory;
+
     use std::sync::Arc;
+    use std::thread;
+
+    use crate::array_data::ArrayData;
+    use crate::buffer::Buffer;
+    use crate::datatypes::{DataType, Field};
+    use crate::memory;
 
     #[test]
     fn test_primitive_array_from_vec() {
@@ -758,9 +773,8 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "PrimitiveArray data should contain a single buffer only (values buffer)"
-    )]
+    #[should_panic(expected = "PrimitiveArray data should contain a single buffer only \
+                               (values buffer)")]
     fn test_primitive_array_invalid_buffer_len() {
         let data = ArrayData::builder(DataType::Int32).len(5).build();
         Int32Array::from(data);
@@ -840,9 +854,8 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "PrimitiveArray data should contain a single buffer only (values buffer)"
-    )]
+    #[should_panic(expected = "PrimitiveArray data should contain a single buffer only \
+                               (values buffer)")]
     fn test_boolean_array_invalid_buffer_len() {
         let data = ArrayData::builder(DataType::Boolean).len(5).build();
         BooleanArray::from(data);
@@ -877,8 +890,8 @@ mod tests {
         assert_eq!(6, list_array.value_offset(2));
         assert_eq!(2, list_array.value_length(2));
         for i in 0..3 {
-            assert!(list_array.is_valid(i as i64));
-            assert!(!list_array.is_null(i as i64));
+            assert!(list_array.is_valid(i));
+            assert!(!list_array.is_null(i));
         }
 
         // Now test with a non-zero offset
@@ -900,7 +913,9 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
+    #[should_panic(
+        expected = "ListArray data should contain a single buffer only (value offsets)"
+    )]
     fn test_list_array_invalid_buffer_len() {
         let value_data = ArrayData::builder(DataType::Int32)
             .len(8)
@@ -915,7 +930,9 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "ListArray should contain a single child array (values array)")]
+    #[should_panic(
+        expected = "ListArray should contain a single child array (values array)"
+    )]
     fn test_list_array_invalid_child_array_len() {
         let value_offsets = Buffer::from(&[0, 2, 5, 7].to_byte_slice());
         let list_data_type = DataType::List(Box::new(DataType::Int32));
@@ -980,20 +997,20 @@ mod tests {
         let binary_array = BinaryArray::from(array_data);
         assert_eq!(3, binary_array.len());
         assert_eq!(0, binary_array.null_count());
-        assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.get_value(0));
+        assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
         assert_eq!("hello", binary_array.get_string(0));
-        assert_eq!([] as [u8; 0], binary_array.get_value(1));
+        assert_eq!([] as [u8; 0], binary_array.value(1));
         assert_eq!("", binary_array.get_string(1));
         assert_eq!(
             [b'p', b'a', b'r', b'q', b'u', b'e', b't'],
-            binary_array.get_value(2)
+            binary_array.value(2)
         );
         assert_eq!("parquet", binary_array.get_string(2));
         assert_eq!(5, binary_array.value_offset(2));
         assert_eq!(7, binary_array.value_length(2));
         for i in 0..3 {
-            assert!(binary_array.is_valid(i as i64));
-            assert!(!binary_array.is_null(i as i64));
+            assert!(binary_array.is_valid(i));
+            assert!(!binary_array.is_null(i));
         }
 
         // Test binary array with offset
@@ -1006,7 +1023,7 @@ mod tests {
         let binary_array = BinaryArray::from(array_data);
         assert_eq!(
             [b'p', b'a', b'r', b'q', b'u', b'e', b't'],
-            binary_array.get_value(1)
+            binary_array.value(1)
         );
         assert_eq!("parquet", binary_array.get_string(1));
         assert_eq!(5, binary_array.value_offset(0));
@@ -1048,7 +1065,7 @@ mod tests {
         assert_eq!(binary_array1.len(), binary_array2.len());
         assert_eq!(binary_array1.null_count(), binary_array2.null_count());
         for i in 0..binary_array1.len() {
-            assert_eq!(binary_array1.get_value(i), binary_array2.get_value(i));
+            assert_eq!(binary_array1.value(i), binary_array2.value(i));
             assert_eq!(binary_array1.get_string(i), binary_array2.get_string(i));
             assert_eq!(binary_array1.value_offset(i), binary_array2.value_offset(i));
             assert_eq!(binary_array1.value_length(i), binary_array2.value_length(i));
@@ -1057,7 +1074,8 @@ mod tests {
 
     #[test]
     #[should_panic(
-        expected = "BinaryArray can only be created from List<u8> arrays, mismatched data types."
+        expected = "BinaryArray can only be created from List<u8> arrays, mismatched \
+                    data types."
     )]
     fn test_binary_array_from_incorrect_list_array_type() {
         let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
@@ -1078,7 +1096,8 @@ mod tests {
 
     #[test]
     #[should_panic(
-        expected = "BinaryArray can only be created from list array of u8 values (i.e. List<PrimitiveArray<u8>>)."
+        expected = "BinaryArray can only be created from list array of u8 values \
+                    (i.e. List<PrimitiveArray<u8>>)."
     )]
     fn test_binary_array_from_incorrect_list_array() {
         let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
@@ -1111,7 +1130,7 @@ mod tests {
             .add_buffer(Buffer::from(&values[..]))
             .build();
         let binary_array = BinaryArray::from(array_data);
-        binary_array.get_value(4);
+        binary_array.value(4);
     }
 
     #[test]
@@ -1150,7 +1169,8 @@ mod tests {
         let struct_array = StructArray::from(vec![
             (
                 Field::new("b", DataType::Boolean, false),
-                Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<Array>,
+                Arc::new(BooleanArray::from(vec![false, false, true, true]))
+                    as Arc<Array>,
             ),
             (
                 Field::new("c", DataType::Int32, false),
@@ -1162,7 +1182,9 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "all child arrays of a StructArray must have the same length")]
+    #[should_panic(
+        expected = "all child arrays of a StructArray must have the same length"
+    )]
     fn test_invalid_struct_child_array_lengths() {
         StructArray::from(vec![
             (
@@ -1222,20 +1244,6 @@ mod tests {
         BinaryArray::from(array_data);
     }
 
-    #[test]
-    fn test_buffer_array_min_max() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        assert_eq!(5, a.min().unwrap());
-        assert_eq!(9, a.max().unwrap());
-    }
-
-    #[test]
-    fn test_buffer_array_min_max_with_nulls() {
-        let a = Int32Array::from(vec![Some(5), None, None, Some(8), Some(9)]);
-        assert_eq!(5, a.min().unwrap());
-        assert_eq!(9, a.max().unwrap());
-    }
-
     #[test]
     fn test_access_array_concurrently() {
         let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
diff --git a/rust/src/array_data.rs b/rust/arrow/src/array_data.rs
similarity index 82%
rename from rust/src/array_data.rs
rename to rust/arrow/src/array_data.rs
index 055c8d91e7f09..a24dd0115e349 100644
--- a/rust/src/array_data.rs
+++ b/rust/arrow/src/array_data.rs
@@ -15,12 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Contains `ArrayData`, a generic representation of Arrow array data which encapsulates
+//! common attributes and operations for Arrow array.
+
 use std::sync::Arc;
 
-use bitmap::Bitmap;
-use buffer::Buffer;
-use datatypes::DataType;
-use util::bit_util;
+use crate::bitmap::Bitmap;
+use crate::buffer::Buffer;
+use crate::datatypes::DataType;
+use crate::util::bit_util;
 
 /// An generic representation of Arrow array data which encapsulates common attributes and
 /// operations for Arrow array. Specific operations for different arrays types (e.g.,
@@ -31,13 +34,13 @@ pub struct ArrayData {
     data_type: DataType,
 
     /// The number of elements in this array data
-    len: i64,
+    len: usize,
 
     /// The number of null elements in this array data
-    null_count: i64,
+    null_count: usize,
 
     /// The offset into this array data
-    offset: i64,
+    offset: usize,
 
     /// The buffers for this array data. Note that depending on the array types, this
     /// could hold different kinds of buffers (e.g., value buffer, value offset buffer)
@@ -54,25 +57,28 @@ pub struct ArrayData {
 }
 
 pub type ArrayDataRef = Arc<ArrayData>;
-pub const UNKNOWN_NULL_COUNT: i64 = -1;
 
 impl ArrayData {
     pub fn new(
         data_type: DataType,
-        len: i64,
-        mut null_count: i64,
+        len: usize,
+        null_count: Option<usize>,
         null_bit_buffer: Option<Buffer>,
-        offset: i64,
+        offset: usize,
         buffers: Vec<Buffer>,
         child_data: Vec<ArrayDataRef>,
     ) -> Self {
-        if null_count < 0 {
-            null_count = if let Some(ref buf) = null_bit_buffer {
-                len - bit_util::count_set_bits_offset(buf.data(), offset as usize)
-            } else {
-                0
-            };
-        }
+        let null_count = match null_count {
+            None => {
+                if let Some(ref buf) = null_bit_buffer {
+                    len.checked_sub(bit_util::count_set_bits_offset(buf.data(), offset))
+                        .unwrap()
+                } else {
+                    0
+                }
+            }
+            Some(null_count) => null_count,
+        };
         let null_bitmap = null_bit_buffer.map(Bitmap::from);
         Self {
             data_type,
@@ -106,7 +112,7 @@ impl ArrayData {
     }
 
     /// Returns whether the element at index `i` is null
-    pub fn is_null(&self, i: i64) -> bool {
+    pub fn is_null(&self, i: usize) -> bool {
         if let Some(ref b) = self.null_bitmap {
             return !b.is_set(i);
         }
@@ -119,7 +125,7 @@ impl ArrayData {
     }
 
     /// Returns whether the element at index `i` is not null
-    pub fn is_valid(&self, i: i64) -> bool {
+    pub fn is_valid(&self, i: usize) -> bool {
         if let Some(ref b) = self.null_bitmap {
             return b.is_set(i);
         }
@@ -127,17 +133,17 @@ impl ArrayData {
     }
 
     /// Returns the length (i.e., number of elements) of this array
-    pub fn len(&self) -> i64 {
+    pub fn len(&self) -> usize {
         self.len
     }
 
     /// Returns the offset of this array
-    pub fn offset(&self) -> i64 {
+    pub fn offset(&self) -> usize {
         self.offset
     }
 
     /// Returns the total number of nulls in this array
-    pub fn null_count(&self) -> i64 {
+    pub fn null_count(&self) -> usize {
         self.null_count
     }
 }
@@ -145,10 +151,10 @@ impl ArrayData {
 /// Builder for `ArrayData` type
 pub struct ArrayDataBuilder {
     data_type: DataType,
-    len: i64,
-    null_count: i64,
+    len: usize,
+    null_count: Option<usize>,
     null_bit_buffer: Option<Buffer>,
-    offset: i64,
+    offset: usize,
     buffers: Vec<Buffer>,
     child_data: Vec<ArrayDataRef>,
 }
@@ -158,7 +164,7 @@ impl ArrayDataBuilder {
         Self {
             data_type,
             len: 0,
-            null_count: UNKNOWN_NULL_COUNT,
+            null_count: None,
             null_bit_buffer: None,
             offset: 0,
             buffers: vec![],
@@ -166,13 +172,13 @@ impl ArrayDataBuilder {
         }
     }
 
-    pub fn len(mut self, n: i64) -> Self {
+    pub fn len(mut self, n: usize) -> Self {
         self.len = n;
         self
     }
 
-    pub fn null_count(mut self, n: i64) -> Self {
-        self.null_count = n;
+    pub fn null_count(mut self, n: usize) -> Self {
+        self.null_count = Some(n);
         self
     }
 
@@ -181,7 +187,7 @@ impl ArrayDataBuilder {
         self
     }
 
-    pub fn offset(mut self, n: i64) -> Self {
+    pub fn offset(mut self, n: usize) -> Self {
         self.offset = n;
         self
     }
@@ -222,15 +228,17 @@ impl ArrayDataBuilder {
 
 #[cfg(test)]
 mod tests {
+    use super::*;
+
     use std::sync::Arc;
 
-    use super::{ArrayData, DataType};
-    use buffer::Buffer;
-    use util::bit_util;
+    use crate::buffer::Buffer;
+    use crate::util::bit_util;
 
     #[test]
     fn test_new() {
-        let arr_data = ArrayData::new(DataType::Boolean, 10, 1, None, 2, vec![], vec![]);
+        let arr_data =
+            ArrayData::new(DataType::Boolean, 10, Some(1), None, 2, vec![], vec![]);
         assert_eq!(10, arr_data.len());
         assert_eq!(1, arr_data.null_count());
         assert_eq!(2, arr_data.offset());
@@ -244,7 +252,7 @@ mod tests {
         let child_arr_data = Arc::new(ArrayData::new(
             DataType::Int32,
             10,
-            0,
+            Some(0),
             None,
             0,
             vec![],
diff --git a/rust/arrow/src/array_ops.rs b/rust/arrow/src/array_ops.rs
new file mode 100644
index 0000000000000..6e847c8b378f2
--- /dev/null
+++ b/rust/arrow/src/array_ops.rs
@@ -0,0 +1,668 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Defines primitive computations on arrays, e.g. addition, equality, boolean logic.
+
+use std::ops::{Add, Div, Mul, Sub};
+
+use num::Zero;
+
+use crate::array::{Array, BooleanArray, PrimitiveArray};
+use crate::builder::PrimitiveBuilder;
+use crate::datatypes;
+use crate::datatypes::ArrowNumericType;
+use crate::error::{ArrowError, Result};
+
+/// Perform `left + right` operation on two arrays. If either left or right value is null
+/// then the result is also null.
+pub fn add<T>(
+    left: &PrimitiveArray<T>,
+    right: &PrimitiveArray<T>,
+) -> Result<PrimitiveArray<T>>
+where
+    T: datatypes::ArrowNumericType,
+    T::Native: Add<Output = T::Native>
+        + Sub<Output = T::Native>
+        + Mul<Output = T::Native>
+        + Div<Output = T::Native>
+        + Zero,
+{
+    math_op(left, right, |a, b| Ok(a + b))
+}
+
+/// Perform `left - right` operation on two arrays. If either left or right value is null
+/// then the result is also null.
+pub fn subtract<T>(
+    left: &PrimitiveArray<T>,
+    right: &PrimitiveArray<T>,
+) -> Result<PrimitiveArray<T>>
+where
+    T: datatypes::ArrowNumericType,
+    T::Native: Add<Output = T::Native>
+        + Sub<Output = T::Native>
+        + Mul<Output = T::Native>
+        + Div<Output = T::Native>
+        + Zero,
+{
+    math_op(left, right, |a, b| Ok(a - b))
+}
+
+/// Perform `left * right` operation on two arrays. If either left or right value is null
+/// then the result is also null.
+pub fn multiply<T>(
+    left: &PrimitiveArray<T>,
+    right: &PrimitiveArray<T>,
+) -> Result<PrimitiveArray<T>>
+where
+    T: datatypes::ArrowNumericType,
+    T::Native: Add<Output = T::Native>
+        + Sub<Output = T::Native>
+        + Mul<Output = T::Native>
+        + Div<Output = T::Native>
+        + Zero,
+{
+    math_op(left, right, |a, b| Ok(a * b))
+}
+
+/// Perform `left / right` operation on two arrays. If either left or right value is null
+/// then the result is also null. If any right hand value is zero then the result of this
+/// operation will be `Err(ArrowError::DivideByZero)`.
+pub fn divide<T>(
+    left: &PrimitiveArray<T>,
+    right: &PrimitiveArray<T>,
+) -> Result<PrimitiveArray<T>>
+where
+    T: datatypes::ArrowNumericType,
+    T::Native: Add<Output = T::Native>
+        + Sub<Output = T::Native>
+        + Mul<Output = T::Native>
+        + Div<Output = T::Native>
+        + Zero,
+{
+    math_op(left, right, |a, b| {
+        if b.is_zero() {
+            Err(ArrowError::DivideByZero)
+        } else {
+            Ok(a / b)
+        }
+    })
+}
+
+/// Helper function to perform math lambda function on values from two arrays. If either
+/// left or right value is null then the output value is also null, so `1 + null` is
+/// `null`.
+fn math_op<T, F>(
+    left: &PrimitiveArray<T>,
+    right: &PrimitiveArray<T>,
+    op: F,
+) -> Result<PrimitiveArray<T>>
+where
+    T: datatypes::ArrowNumericType,
+    F: Fn(T::Native, T::Native) -> Result<T::Native>,
+{
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(
+            "Cannot perform math operation on arrays of different length".to_string(),
+        ));
+    }
+    let mut b = PrimitiveBuilder::<T>::new(left.len());
+    for i in 0..left.len() {
+        let index = i;
+        if left.is_null(i) || right.is_null(i) {
+            b.append_null()?;
+        } else {
+            b.append_value(op(left.value(index), right.value(index))?)?;
+        }
+    }
+    Ok(b.finish())
+}
+
+/// Returns the minimum value in the array, according to the natural order.
+pub fn min<T>(array: &PrimitiveArray<T>) -> Option<T::Native>
+where
+    T: ArrowNumericType,
+{
+    min_max_helper(array, |a, b| a < b)
+}
+
+/// Returns the maximum value in the array, according to the natural order.
+pub fn max<T>(array: &PrimitiveArray<T>) -> Option<T::Native>
+where
+    T: ArrowNumericType,
+{
+    min_max_helper(array, |a, b| a > b)
+}
+
+/// Helper function to perform min/max lambda function on values from a numeric array.
+fn min_max_helper<T, F>(array: &PrimitiveArray<T>, cmp: F) -> Option<T::Native>
+where
+    T: ArrowNumericType,
+    F: Fn(T::Native, T::Native) -> bool,
+{
+    let mut n: Option<T::Native> = None;
+    let data = array.data();
+    for i in 0..data.len() {
+        if data.is_null(i) {
+            continue;
+        }
+        let m = array.value(i);
+        match n {
+            None => n = Some(m),
+            Some(nn) => {
+                if cmp(m, nn) {
+                    n = Some(m)
+                }
+            }
+        }
+    }
+    n
+}
+
+/// Returns the sum of values in the array.
+///
+/// Returns `None` if the array is empty or only contains null values.
+pub fn sum<T>(array: &PrimitiveArray<T>) -> Option<T::Native>
+where
+    T: ArrowNumericType,
+    T::Native: Add<Output = T::Native>,
+{
+    let mut n: T::Native = T::default_value();
+    // iteratively track whether all values are null (or array is empty)
+    let mut all_nulls = true;
+    let data = array.data();
+    for i in 0..data.len() {
+        if data.is_null(i) {
+            continue;
+        }
+        if all_nulls {
+            all_nulls = false;
+        }
+        let m = array.value(i);
+        n = n + m;
+    }
+    if all_nulls {
+        None
+    } else {
+        Some(n)
+    }
+}
+
+/// Perform `left == right` operation on two arrays.
+pub fn eq<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
+where
+    T: ArrowNumericType,
+{
+    bool_op(left, right, |a, b| a == b)
+}
+
+/// Perform `left != right` operation on two arrays.
+pub fn neq<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
+where
+    T: ArrowNumericType,
+{
+    bool_op(left, right, |a, b| a != b)
+}
+
+/// Perform `left < right` operation on two arrays. Null values are less than non-null
+/// values.
+pub fn lt<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
+where
+    T: ArrowNumericType,
+{
+    bool_op(left, right, |a, b| match (a, b) {
+        (None, None) => false,
+        (None, _) => true,
+        (_, None) => false,
+        (Some(aa), Some(bb)) => aa < bb,
+    })
+}
+
+/// Perform `left <= right` operation on two arrays. Null values are less than non-null
+/// values.
+pub fn lt_eq<T>(
+    left: &PrimitiveArray<T>,
+    right: &PrimitiveArray<T>,
+) -> Result<BooleanArray>
+where
+    T: ArrowNumericType,
+{
+    bool_op(left, right, |a, b| match (a, b) {
+        (None, None) => true,
+        (None, _) => true,
+        (_, None) => false,
+        (Some(aa), Some(bb)) => aa <= bb,
+    })
+}
+
+/// Perform `left > right` operation on two arrays. Non-null values are greater than null
+/// values.
+pub fn gt<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
+where
+    T: ArrowNumericType,
+{
+    bool_op(left, right, |a, b| match (a, b) {
+        (None, None) => false,
+        (None, _) => false,
+        (_, None) => true,
+        (Some(aa), Some(bb)) => aa > bb,
+    })
+}
+
+/// Perform `left >= right` operation on two arrays. Non-null values are greater than null
+/// values.
+pub fn gt_eq<T>(
+    left: &PrimitiveArray<T>,
+    right: &PrimitiveArray<T>,
+) -> Result<BooleanArray>
+where
+    T: ArrowNumericType,
+{
+    bool_op(left, right, |a, b| match (a, b) {
+        (None, None) => true,
+        (None, _) => false,
+        (_, None) => true,
+        (Some(aa), Some(bb)) => aa >= bb,
+    })
+}
+
+/// Helper function to perform boolean lambda function on values from two arrays.
+fn bool_op<T, F>(
+    left: &PrimitiveArray<T>,
+    right: &PrimitiveArray<T>,
+    op: F,
+) -> Result<BooleanArray>
+where
+    T: ArrowNumericType,
+    F: Fn(Option<T::Native>, Option<T::Native>) -> bool,
+{
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(
+            "Cannot perform math operation on arrays of different length".to_string(),
+        ));
+    }
+    let mut b = BooleanArray::builder(left.len());
+    for i in 0..left.len() {
+        let index = i;
+        let l = if left.is_null(i) {
+            None
+        } else {
+            Some(left.value(index))
+        };
+        let r = if right.is_null(i) {
+            None
+        } else {
+            Some(right.value(index))
+        };
+        b.append_value(op(l, r))?;
+    }
+    Ok(b.finish())
+}
+
+/// Perform `AND` operation on two arrays. If either left or right value is null then the
+/// result is also null.
+pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(
+            "Cannot perform boolean operation on arrays of different length".to_string(),
+        ));
+    }
+    let mut b = BooleanArray::builder(left.len());
+    for i in 0..left.len() {
+        if left.is_null(i) || right.is_null(i) {
+            b.append_null()?;
+        } else {
+            b.append_value(left.value(i) && right.value(i))?;
+        }
+    }
+    Ok(b.finish())
+}
+
+/// Perform `OR` operation on two arrays. If either left or right value is null then the
+/// result is also null.
+pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(
+            "Cannot perform boolean operation on arrays of different length".to_string(),
+        ));
+    }
+    let mut b = BooleanArray::builder(left.len());
+    for i in 0..left.len() {
+        if left.is_null(i) || right.is_null(i) {
+            b.append_null()?;
+        } else {
+            b.append_value(left.value(i) || right.value(i))?;
+        }
+    }
+    Ok(b.finish())
+}
+
+/// Perform unary `NOT` operation on an arrays. If value is null then the result is also
+/// null.
+pub fn not(left: &BooleanArray) -> Result<BooleanArray> {
+    let mut b = BooleanArray::builder(left.len());
+    for i in 0..left.len() {
+        if left.is_null(i) {
+            b.append_null()?;
+        } else {
+            b.append_value(!left.value(i))?;
+        }
+    }
+    Ok(b.finish())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::array::{Float64Array, Int32Array};
+
+    #[test]
+    fn test_primitive_array_add() {
+        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
+        let b = Int32Array::from(vec![6, 7, 8, 9, 8]);
+        let c = add(&a, &b).unwrap();
+        assert_eq!(11, c.value(0));
+        assert_eq!(13, c.value(1));
+        assert_eq!(15, c.value(2));
+        assert_eq!(17, c.value(3));
+        assert_eq!(17, c.value(4));
+    }
+
+    #[test]
+    fn test_primitive_array_add_mismatched_length() {
+        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
+        let b = Int32Array::from(vec![6, 7, 8]);
+        let e = add(&a, &b)
+            .err()
+            .expect("should have failed due to different lengths");
+        assert_eq!(
+            "ComputeError(\"Cannot perform math operation on arrays of different length\")",
+            format!("{:?}", e)
+        );
+    }
+
+    #[test]
+    fn test_primitive_array_subtract() {
+        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
+        let b = Int32Array::from(vec![5, 4, 3, 2, 1]);
+        let c = subtract(&a, &b).unwrap();
+        assert_eq!(-4, c.value(0));
+        assert_eq!(-2, c.value(1));
+        assert_eq!(0, c.value(2));
+        assert_eq!(2, c.value(3));
+        assert_eq!(4, c.value(4));
+    }
+
+    #[test]
+    fn test_primitive_array_multiply() {
+        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
+        let b = Int32Array::from(vec![6, 7, 8, 9, 8]);
+        let c = multiply(&a, &b).unwrap();
+        assert_eq!(30, c.value(0));
+        assert_eq!(42, c.value(1));
+        assert_eq!(56, c.value(2));
+        assert_eq!(72, c.value(3));
+        assert_eq!(72, c.value(4));
+    }
+
+    #[test]
+    fn test_primitive_array_divide() {
+        let a = Int32Array::from(vec![15, 15, 8, 1, 9]);
+        let b = Int32Array::from(vec![5, 6, 8, 9, 1]);
+        let c = divide(&a, &b).unwrap();
+        assert_eq!(3, c.value(0));
+        assert_eq!(2, c.value(1));
+        assert_eq!(1, c.value(2));
+        assert_eq!(0, c.value(3));
+        assert_eq!(9, c.value(4));
+    }
+
+    #[test]
+    fn test_primitive_array_divide_by_zero() {
+        let a = Int32Array::from(vec![15]);
+        let b = Int32Array::from(vec![0]);
+        assert_eq!(
+            ArrowError::DivideByZero,
+            divide(&a, &b).err().expect("divide by zero should fail")
+        );
+    }
+
+    #[test]
+    fn test_primitive_array_divide_f64() {
+        let a = Float64Array::from(vec![15.0, 15.0, 8.0]);
+        let b = Float64Array::from(vec![5.0, 6.0, 8.0]);
+        let c = divide(&a, &b).unwrap();
+        assert_eq!(3.0, c.value(0));
+        assert_eq!(2.5, c.value(1));
+        assert_eq!(1.0, c.value(2));
+    }
+
+    #[test]
+    fn test_primitive_array_add_with_nulls() {
+        let a = Int32Array::from(vec![Some(5), None, Some(7), None]);
+        let b = Int32Array::from(vec![None, None, Some(6), Some(7)]);
+        let c = add(&a, &b).unwrap();
+        assert_eq!(true, c.is_null(0));
+        assert_eq!(true, c.is_null(1));
+        assert_eq!(false, c.is_null(2));
+        assert_eq!(true, c.is_null(3));
+        assert_eq!(13, c.value(2));
+    }
+
+    #[test]
+    fn test_primitive_array_sum() {
+        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
+        assert_eq!(15, sum(&a).unwrap());
+    }
+
+    #[test]
+    fn test_primitive_array_float_sum() {
+        let a = Float64Array::from(vec![1.1, 2.2, 3.3, 4.4, 5.5]);
+        assert_eq!(16.5, sum(&a).unwrap());
+    }
+
+    #[test]
+    fn test_primitive_array_sum_with_nulls() {
+        let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
+        assert_eq!(10, sum(&a).unwrap());
+    }
+
+    #[test]
+    fn test_primitive_array_sum_all_nulls() {
+        let a = Int32Array::from(vec![None, None, None]);
+        assert_eq!(None, sum(&a));
+    }
+
+    #[test]
+    fn test_primitive_array_eq() {
+        let a = Int32Array::from(vec![8, 8, 8, 8, 8]);
+        let b = Int32Array::from(vec![6, 7, 8, 9, 10]);
+        let c = eq(&a, &b).unwrap();
+        assert_eq!(false, c.value(0));
+        assert_eq!(false, c.value(1));
+        assert_eq!(true, c.value(2));
+        assert_eq!(false, c.value(3));
+        assert_eq!(false, c.value(4));
+    }
+
+    #[test]
+    fn test_primitive_array_neq() {
+        let a = Int32Array::from(vec![8, 8, 8, 8, 8]);
+        let b = Int32Array::from(vec![6, 7, 8, 9, 10]);
+        let c = neq(&a, &b).unwrap();
+        assert_eq!(true, c.value(0));
+        assert_eq!(true, c.value(1));
+        assert_eq!(false, c.value(2));
+        assert_eq!(true, c.value(3));
+        assert_eq!(true, c.value(4));
+    }
+
+    #[test]
+    fn test_primitive_array_lt() {
+        let a = Int32Array::from(vec![8, 8, 8, 8, 8]);
+        let b = Int32Array::from(vec![6, 7, 8, 9, 10]);
+        let c = lt(&a, &b).unwrap();
+        assert_eq!(false, c.value(0));
+        assert_eq!(false, c.value(1));
+        assert_eq!(false, c.value(2));
+        assert_eq!(true, c.value(3));
+        assert_eq!(true, c.value(4));
+    }
+
+    #[test]
+    fn test_primitive_array_lt_nulls() {
+        let a = Int32Array::from(vec![None, None, Some(1)]);
+        let b = Int32Array::from(vec![None, Some(1), None]);
+        let c = lt(&a, &b).unwrap();
+        assert_eq!(false, c.value(0));
+        assert_eq!(true, c.value(1));
+        assert_eq!(false, c.value(2));
+    }
+
+    #[test]
+    fn test_primitive_array_lt_eq() {
+        let a = Int32Array::from(vec![8, 8, 8, 8, 8]);
+        let b = Int32Array::from(vec![6, 7, 8, 9, 10]);
+        let c = lt_eq(&a, &b).unwrap();
+        assert_eq!(false, c.value(0));
+        assert_eq!(false, c.value(1));
+        assert_eq!(true, c.value(2));
+        assert_eq!(true, c.value(3));
+        assert_eq!(true, c.value(4));
+    }
+
+    #[test]
+    fn test_primitive_array_lt_eq_nulls() {
+        let a = Int32Array::from(vec![None, None, Some(1)]);
+        let b = Int32Array::from(vec![None, Some(1), None]);
+        let c = lt_eq(&a, &b).unwrap();
+        assert_eq!(true, c.value(0));
+        assert_eq!(true, c.value(1));
+        assert_eq!(false, c.value(2));
+    }
+
+    #[test]
+    fn test_primitive_array_gt() {
+        let a = Int32Array::from(vec![8, 8, 8, 8, 8]);
+        let b = Int32Array::from(vec![6, 7, 8, 9, 10]);
+        let c = gt(&a, &b).unwrap();
+        assert_eq!(true, c.value(0));
+        assert_eq!(true, c.value(1));
+        assert_eq!(false, c.value(2));
+        assert_eq!(false, c.value(3));
+        assert_eq!(false, c.value(4));
+    }
+
+    #[test]
+    fn test_primitive_array_gt_nulls() {
+        let a = Int32Array::from(vec![None, None, Some(1)]);
+        let b = Int32Array::from(vec![None, Some(1), None]);
+        let c = gt(&a, &b).unwrap();
+        assert_eq!(false, c.value(0));
+        assert_eq!(false, c.value(1));
+        assert_eq!(true, c.value(2));
+    }
+
+    #[test]
+    fn test_primitive_array_gt_eq() {
+        let a = Int32Array::from(vec![8, 8, 8, 8, 8]);
+        let b = Int32Array::from(vec![6, 7, 8, 9, 10]);
+        let c = gt_eq(&a, &b).unwrap();
+        assert_eq!(true, c.value(0));
+        assert_eq!(true, c.value(1));
+        assert_eq!(true, c.value(2));
+        assert_eq!(false, c.value(3));
+        assert_eq!(false, c.value(4));
+    }
+
+    #[test]
+    fn test_primitive_array_gt_eq_nulls() {
+        let a = Int32Array::from(vec![None, None, Some(1)]);
+        let b = Int32Array::from(vec![None, Some(1), None]);
+        let c = gt_eq(&a, &b).unwrap();
+        assert_eq!(true, c.value(0));
+        assert_eq!(false, c.value(1));
+        assert_eq!(true, c.value(2));
+    }
+
+    #[test]
+    fn test_buffer_array_min_max() {
+        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
+        assert_eq!(5, min(&a).unwrap());
+        assert_eq!(9, max(&a).unwrap());
+    }
+
+    #[test]
+    fn test_buffer_array_min_max_with_nulls() {
+        let a = Int32Array::from(vec![Some(5), None, None, Some(8), Some(9)]);
+        assert_eq!(5, min(&a).unwrap());
+        assert_eq!(9, max(&a).unwrap());
+    }
+
+    #[test]
+    fn test_bool_array_and() {
+        let a = BooleanArray::from(vec![false, false, true, true]);
+        let b = BooleanArray::from(vec![false, true, false, true]);
+        let c = and(&a, &b).unwrap();
+        assert_eq!(false, c.value(0));
+        assert_eq!(false, c.value(1));
+        assert_eq!(false, c.value(2));
+        assert_eq!(true, c.value(3));
+    }
+
+    #[test]
+    fn test_bool_array_or() {
+        let a = BooleanArray::from(vec![false, false, true, true]);
+        let b = BooleanArray::from(vec![false, true, false, true]);
+        let c = or(&a, &b).unwrap();
+        assert_eq!(false, c.value(0));
+        assert_eq!(true, c.value(1));
+        assert_eq!(true, c.value(2));
+        assert_eq!(true, c.value(3));
+    }
+
+    #[test]
+    fn test_bool_array_or_nulls() {
+        let a = BooleanArray::from(vec![None, Some(false), None, Some(false)]);
+        let b = BooleanArray::from(vec![None, None, Some(false), Some(false)]);
+        let c = or(&a, &b).unwrap();
+        assert_eq!(true, c.is_null(0));
+        assert_eq!(true, c.is_null(1));
+        assert_eq!(true, c.is_null(2));
+        assert_eq!(false, c.is_null(3));
+    }
+
+    #[test]
+    fn test_bool_array_not() {
+        let a = BooleanArray::from(vec![false, false, true, true]);
+        let c = not(&a).unwrap();
+        assert_eq!(true, c.value(0));
+        assert_eq!(true, c.value(1));
+        assert_eq!(false, c.value(2));
+        assert_eq!(false, c.value(3));
+    }
+
+    #[test]
+    fn test_bool_array_and_nulls() {
+        let a = BooleanArray::from(vec![None, Some(false), None, Some(false)]);
+        let b = BooleanArray::from(vec![None, None, Some(false), Some(false)]);
+        let c = and(&a, &b).unwrap();
+        assert_eq!(true, c.is_null(0));
+        assert_eq!(true, c.is_null(1));
+        assert_eq!(true, c.is_null(2));
+        assert_eq!(false, c.is_null(3));
+    }
+}
diff --git a/rust/src/bitmap.rs b/rust/arrow/src/bitmap.rs
similarity index 85%
rename from rust/src/bitmap.rs
rename to rust/arrow/src/bitmap.rs
index 6cec4d51bb625..93b6ee83c0492 100644
--- a/rust/src/bitmap.rs
+++ b/rust/arrow/src/bitmap.rs
@@ -15,12 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Defines a bitmap, which is used to track which values in an Arrow array are null.
+//! This is called a "validity bitmap" in the Arrow documentation.
+
 use super::buffer::Buffer;
-use util::bit_util;
+use crate::util::bit_util;
 
 #[derive(PartialEq, Debug)]
 pub struct Bitmap {
-    bits: Buffer,
+    pub(crate) bits: Buffer,
 }
 
 impl Bitmap {
@@ -45,9 +48,9 @@ impl Bitmap {
         self.bits.len()
     }
 
-    pub fn is_set(&self, i: i64) -> bool {
-        assert!(i < (self.bits.len() << 3) as i64);
-        unsafe { bit_util::get_bit_raw(self.bits.raw_data(), i as usize) }
+    pub fn is_set(&self, i: usize) -> bool {
+        assert!(i < (self.bits.len() << 3));
+        unsafe { bit_util::get_bit_raw(self.bits.raw_data(), i) }
     }
 }
 
diff --git a/rust/src/buffer.rs b/rust/arrow/src/buffer.rs
similarity index 93%
rename from rust/src/buffer.rs
rename to rust/arrow/src/buffer.rs
index 67d2896b339f8..6172445ec821e 100644
--- a/rust/src/buffer.rs
+++ b/rust/arrow/src/buffer.rs
@@ -15,14 +15,18 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! The main type in the module is `Buffer`, a contiguous immutable memory region of
+//! fixed size aligned at a 64-byte boundary. `MutableBuffer` is like `Buffer`, but it can
+//! be mutated and grown.
+
 use std::cmp;
 use std::io::{Error as IoError, ErrorKind, Result as IoResult, Write};
 use std::mem;
 use std::sync::Arc;
 
-use error::Result;
-use memory;
-use util::bit_util;
+use crate::error::Result;
+use crate::memory;
+use crate::util::bit_util;
 
 /// Buffer is a contiguous memory region of fixed size and is aligned at a 64-byte
 /// boundary. Buffer is immutable.
@@ -49,7 +53,7 @@ impl PartialEq for BufferData {
         if self.len != other.len {
             return false;
         }
-        unsafe { memory::memcmp(self.ptr, other.ptr, self.len as usize) == 0 }
+        unsafe { memory::memcmp(self.ptr, other.ptr, self.len) == 0 }
     }
 }
 
@@ -73,7 +77,7 @@ impl Buffer {
 
     /// Returns the number of bytes in the buffer
     pub fn len(&self) -> usize {
-        self.data.len - self.offset as usize
+        self.data.len - self.offset
     }
 
     /// Returns whether the buffer is empty.
@@ -128,7 +132,7 @@ impl<T: AsRef<[u8]>> From<T> for Buffer {
         // allocate aligned memory buffer
         let slice = p.as_ref();
         let len = slice.len() * mem::size_of::<u8>();
-        let buffer = memory::allocate_aligned((len) as i64).unwrap();
+        let buffer = memory::allocate_aligned(len).unwrap();
         unsafe {
             memory::memcpy(buffer, slice.as_ptr(), len);
         }
@@ -151,12 +155,12 @@ pub struct MutableBuffer {
 impl MutableBuffer {
     /// Allocate a new mutable buffer with initial capacity to be `capacity`.
     pub fn new(capacity: usize) -> Self {
-        let new_capacity = bit_util::round_upto_multiple_of_64(capacity as i64);
+        let new_capacity = bit_util::round_upto_multiple_of_64(capacity);
         let ptr = memory::allocate_aligned(new_capacity).unwrap();
         Self {
             data: ptr,
             len: 0,
-            capacity: new_capacity as usize,
+            capacity: new_capacity,
         }
     }
 
@@ -178,8 +182,9 @@ impl MutableBuffer {
 
     /// Ensure that `count` bytes from `start` contain zero bits
     ///
-    /// This is used to initialize the bits in a buffer, however, it has no impact on the `len`
-    /// of the buffer and so can be used to initialize the memory region from `len` to `capacity`.
+    /// This is used to initialize the bits in a buffer, however, it has no impact on the
+    /// `len` of the buffer and so can be used to initialize the memory region from
+    /// `len` to `capacity`.
     pub fn set_null_bits(&mut self, start: usize, count: usize) {
         assert!(start + count <= self.capacity);
         unsafe {
@@ -193,8 +198,8 @@ impl MutableBuffer {
     /// Returns the new capacity for this buffer.
     pub fn reserve(&mut self, capacity: usize) -> Result<usize> {
         if capacity > self.capacity {
-            let new_capacity = bit_util::round_upto_multiple_of_64(capacity as i64);
-            let new_capacity = cmp::max(new_capacity, self.capacity as i64 * 2) as usize;
+            let new_capacity = bit_util::round_upto_multiple_of_64(capacity);
+            let new_capacity = cmp::max(new_capacity, self.capacity * 2);
             let new_data = memory::reallocate(self.capacity, new_capacity, self.data)?;
             self.data = new_data as *mut u8;
             self.capacity = new_capacity;
@@ -213,9 +218,10 @@ impl MutableBuffer {
         if new_len > self.len {
             self.reserve(new_len)?;
         } else {
-            let new_capacity = bit_util::round_upto_multiple_of_64(new_len as i64) as usize;
+            let new_capacity = bit_util::round_upto_multiple_of_64(new_len);
             if new_capacity < self.capacity {
-                let new_data = memory::reallocate(self.capacity, new_capacity, self.data)?;
+                let new_data =
+                    memory::reallocate(self.capacity, new_capacity, self.data)?;
                 self.data = new_data as *mut u8;
                 self.capacity = new_capacity;
             }
@@ -251,7 +257,9 @@ impl MutableBuffer {
 
     /// Returns the data stored in this buffer as a mutable slice.
     pub fn data_mut(&mut self) -> &mut [u8] {
-        unsafe { ::std::slice::from_raw_parts_mut(self.raw_data() as *mut u8, self.len()) }
+        unsafe {
+            ::std::slice::from_raw_parts_mut(self.raw_data() as *mut u8, self.len())
+        }
     }
 
     /// Returns a raw pointer for this buffer.
@@ -287,7 +295,7 @@ impl PartialEq for MutableBuffer {
         if self.len != other.len {
             return false;
         }
-        unsafe { memory::memcmp(self.data, other.data, self.len as usize) == 0 }
+        unsafe { memory::memcmp(self.data, other.data, self.len) == 0 }
     }
 }
 
@@ -314,9 +322,9 @@ unsafe impl Send for MutableBuffer {}
 
 #[cfg(test)]
 mod tests {
+    use crate::util::bit_util;
     use std::ptr::null_mut;
     use std::thread;
-    use util::bit_util;
 
     use super::*;
 
@@ -393,7 +401,9 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
+    #[should_panic(
+        expected = "the offset of the new Buffer cannot exceed the existing length"
+    )]
     fn test_slice_offset_out_of_bound() {
         let buf = Buffer::from(&[2, 4, 6, 8, 10]);
         buf.slice(6);
diff --git a/rust/arrow/src/builder.rs b/rust/arrow/src/builder.rs
new file mode 100644
index 0000000000000..77dcc24f250ac
--- /dev/null
+++ b/rust/arrow/src/builder.rs
@@ -0,0 +1,1364 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Defines a `BufferBuilder` capable of creating a `Buffer` which can be used as an
+//! internal buffer in an `ArrayData` object.
+
+use std::any::Any;
+use std::io::Write;
+use std::marker::PhantomData;
+use std::mem;
+use std::sync::Arc;
+
+use crate::array::*;
+use crate::array_data::ArrayData;
+use crate::buffer::{Buffer, MutableBuffer};
+use crate::datatypes::*;
+use crate::error::{ArrowError, Result};
+use crate::util::bit_util;
+
+/// Buffer builder with zero-copy build method
+pub struct BufferBuilder<T: ArrowPrimitiveType> {
+    buffer: MutableBuffer,
+    len: usize,
+    _marker: PhantomData<T>,
+}
+
+pub type BooleanBufferBuilder = BufferBuilder<BooleanType>;
+pub type Int8BufferBuilder = BufferBuilder<Int8Type>;
+pub type Int16BufferBuilder = BufferBuilder<Int16Type>;
+pub type Int32BufferBuilder = BufferBuilder<Int32Type>;
+pub type Int64BufferBuilder = BufferBuilder<Int64Type>;
+pub type UInt8BufferBuilder = BufferBuilder<UInt8Type>;
+pub type UInt16BufferBuilder = BufferBuilder<UInt16Type>;
+pub type UInt32BufferBuilder = BufferBuilder<UInt32Type>;
+pub type UInt64BufferBuilder = BufferBuilder<UInt64Type>;
+pub type Float32BufferBuilder = BufferBuilder<Float32Type>;
+pub type Float64BufferBuilder = BufferBuilder<Float64Type>;
+
+// Trait for buffer builder. This is used mainly to offer separate implementations for
+// numeric types and boolean types, while still be able to call methods on buffer builder
+// with generic primitive type.
+pub trait BufferBuilderTrait<T: ArrowPrimitiveType> {
+    fn new(capacity: usize) -> Self;
+    fn len(&self) -> usize;
+    fn capacity(&self) -> usize;
+    fn advance(&mut self, i: usize) -> Result<()>;
+    fn reserve(&mut self, n: usize) -> Result<()>;
+    fn append(&mut self, v: T::Native) -> Result<()>;
+    fn append_slice(&mut self, slice: &[T::Native]) -> Result<()>;
+    fn finish(&mut self) -> Buffer;
+}
+
+impl<T: ArrowPrimitiveType> BufferBuilderTrait<T> for BufferBuilder<T> {
+    /// Creates a builder with a fixed initial capacity
+    default fn new(capacity: usize) -> Self {
+        let buffer = MutableBuffer::new(capacity * mem::size_of::<T::Native>());
+        Self {
+            buffer,
+            len: 0,
+            _marker: PhantomData,
+        }
+    }
+
+    /// Returns the number of array elements (slots) in the builder
+    fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Returns the current capacity of the builder (number of elements)
+    fn capacity(&self) -> usize {
+        let bit_capacity = self.buffer.capacity() * 8;
+        (bit_capacity / T::get_bit_width())
+    }
+
+    // Advances the `len` of the underlying `Buffer` by `i` slots of type T
+    default fn advance(&mut self, i: usize) -> Result<()> {
+        let new_buffer_len = (self.len + i) * mem::size_of::<T::Native>();
+        self.buffer.resize(new_buffer_len)?;
+        self.len += i;
+        Ok(())
+    }
+
+    /// Reserves memory for `n` elements of type `T`.
+    default fn reserve(&mut self, n: usize) -> Result<()> {
+        let new_capacity = self.len + n;
+        let byte_capacity = mem::size_of::<T::Native>() * new_capacity;
+        self.buffer.reserve(byte_capacity)?;
+        Ok(())
+    }
+
+    /// Appends a value into the builder, growing the internal buffer as needed.
+    default fn append(&mut self, v: T::Native) -> Result<()> {
+        self.reserve(1)?;
+        self.write_bytes(v.to_byte_slice(), 1)
+    }
+
+    /// Appends a slice of type `T`, growing the internal buffer as needed.
+    default fn append_slice(&mut self, slice: &[T::Native]) -> Result<()> {
+        let array_slots = slice.len();
+        self.reserve(array_slots)?;
+        self.write_bytes(slice.to_byte_slice(), array_slots)
+    }
+
+    /// Reset this builder and returns an immutable `Buffer`.
+    default fn finish(&mut self) -> Buffer {
+        let buf = ::std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
+        self.len = 0;
+        buf.freeze()
+    }
+}
+
+impl<T: ArrowPrimitiveType> BufferBuilder<T> {
+    /// Writes a byte slice to the underlying buffer and updates the `len`, i.e. the
+    /// number array elements in the builder.  Also, converts the `io::Result`
+    /// required by the `Write` trait to the Arrow `Result` type.
+    fn write_bytes(&mut self, bytes: &[u8], len_added: usize) -> Result<()> {
+        let write_result = self.buffer.write(bytes);
+        // `io::Result` has many options one of which we use, so pattern matching is
+        // overkill here
+        if write_result.is_err() {
+            Err(ArrowError::MemoryError(
+                "Could not write to Buffer, not big enough".to_string(),
+            ))
+        } else {
+            self.len += len_added;
+            Ok(())
+        }
+    }
+}
+
+impl BufferBuilderTrait<BooleanType> for BufferBuilder<BooleanType> {
+    /// Creates a builder with a fixed initial capacity.
+    fn new(capacity: usize) -> Self {
+        let byte_capacity = bit_util::ceil(capacity, 8);
+        let actual_capacity = bit_util::round_upto_multiple_of_64(byte_capacity);
+        let mut buffer = MutableBuffer::new(actual_capacity);
+        buffer.set_null_bits(0, actual_capacity);
+        Self {
+            buffer,
+            len: 0,
+            _marker: PhantomData,
+        }
+    }
+
+    // Advances the `len` of the underlying `Buffer` by `i` slots of type T
+    fn advance(&mut self, i: usize) -> Result<()> {
+        let new_buffer_len = bit_util::ceil(self.len + i, 8);
+        self.buffer.resize(new_buffer_len)?;
+        self.len += i;
+        Ok(())
+    }
+
+    /// Appends a value into the builder, growing the internal buffer as needed.
+    fn append(&mut self, v: bool) -> Result<()> {
+        self.reserve(1)?;
+        if v {
+            // For performance the `len` of the buffer is not updated on each append but
+            // is updated in the `freeze` method instead.
+            unsafe {
+                bit_util::set_bit_raw(self.buffer.raw_data() as *mut u8, self.len);
+            }
+        }
+        self.len += 1;
+        Ok(())
+    }
+
+    /// Appends a slice of type `T`, growing the internal buffer as needed.
+    fn append_slice(&mut self, slice: &[bool]) -> Result<()> {
+        let array_slots = slice.len();
+        for i in 0..array_slots {
+            self.append(slice[i])?;
+        }
+        Ok(())
+    }
+
+    /// Reserves memory for `n` elements of type `T`.
+    fn reserve(&mut self, n: usize) -> Result<()> {
+        let new_capacity = self.len + n;
+        if new_capacity > self.capacity() {
+            let new_byte_capacity = bit_util::ceil(new_capacity, 8);
+            let existing_capacity = self.buffer.capacity();
+            let new_capacity = self.buffer.reserve(new_byte_capacity)?;
+            self.buffer
+                .set_null_bits(existing_capacity, new_capacity - existing_capacity);
+        }
+        Ok(())
+    }
+
+    /// Reset this builder and returns an immutable `Buffer`.
+    fn finish(&mut self) -> Buffer {
+        // `append` does not update the buffer's `len` so do it before `freeze` is called.
+        let new_buffer_len = bit_util::ceil(self.len, 8);
+        debug_assert!(new_buffer_len >= self.buffer.len());
+        let mut buf = ::std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
+        self.len = 0;
+        buf.resize(new_buffer_len).unwrap();
+        buf.freeze()
+    }
+}
+
+/// Trait for dealing with different array builders at runtime
+pub trait ArrayBuilder: Any {
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize;
+
+    /// Builds the array
+    fn finish(&mut self) -> ArrayRef;
+
+    /// Returns the builder as an non-mutable `Any` reference.
+    ///
+    /// This is most useful when one wants to call non-mutable APIs on a specific builder
+    /// type. In this case, one can first cast this into a `Any`, and then use
+    /// `downcast_ref` to get a reference on the specific builder.
+    fn as_any(&self) -> &Any;
+
+    /// Returns the builder as an mutable `Any` reference.
+    ///
+    /// This is most useful when one wants to call mutable APIs on a specific builder
+    /// type. In this case, one can first cast this into a `Any`, and then use
+    /// `downcast_mut` to get a reference on the specific builder.
+    fn as_any_mut(&mut self) -> &mut Any;
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<Any>;
+}
+
+///  Array builder for fixed-width primitive types
+pub struct PrimitiveBuilder<T: ArrowPrimitiveType> {
+    values_builder: BufferBuilder<T>,
+    bitmap_builder: BooleanBufferBuilder,
+}
+
+pub type BooleanBuilder = PrimitiveBuilder<BooleanType>;
+pub type Int8Builder = PrimitiveBuilder<Int8Type>;
+pub type Int16Builder = PrimitiveBuilder<Int16Type>;
+pub type Int32Builder = PrimitiveBuilder<Int32Type>;
+pub type Int64Builder = PrimitiveBuilder<Int64Type>;
+pub type UInt8Builder = PrimitiveBuilder<UInt8Type>;
+pub type UInt16Builder = PrimitiveBuilder<UInt16Type>;
+pub type UInt32Builder = PrimitiveBuilder<UInt32Type>;
+pub type UInt64Builder = PrimitiveBuilder<UInt64Type>;
+pub type Float32Builder = PrimitiveBuilder<Float32Type>;
+pub type Float64Builder = PrimitiveBuilder<Float64Type>;
+
+impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> {
+    /// Returns the builder as an non-mutable `Any` reference.
+    fn as_any(&self) -> &Any {
+        self
+    }
+
+    /// Returns the builder as an mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.values_builder.len
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+}
+
+impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
+    /// Creates a new primitive array builder
+    pub fn new(capacity: usize) -> Self {
+        Self {
+            values_builder: BufferBuilder::<T>::new(capacity),
+            bitmap_builder: BooleanBufferBuilder::new(capacity),
+        }
+    }
+
+    /// Returns the capacity of this builder measured in slots of type `T`
+    pub fn capacity(&self) -> usize {
+        self.values_builder.capacity()
+    }
+
+    /// Appends a value of type `T` into the builder
+    pub fn append_value(&mut self, v: T::Native) -> Result<()> {
+        self.bitmap_builder.append(true)?;
+        self.values_builder.append(v)?;
+        Ok(())
+    }
+
+    /// Appends a null slot into the builder
+    pub fn append_null(&mut self) -> Result<()> {
+        self.bitmap_builder.append(false)?;
+        self.values_builder.advance(1)?;
+        Ok(())
+    }
+
+    /// Appends an `Option<T>` into the builder
+    pub fn append_option(&mut self, v: Option<T::Native>) -> Result<()> {
+        match v {
+            None => self.append_null()?,
+            Some(v) => self.append_value(v)?,
+        };
+        Ok(())
+    }
+
+    /// Appends a slice of type `T` into the builder
+    pub fn append_slice(&mut self, v: &[T::Native]) -> Result<()> {
+        self.bitmap_builder.append_slice(&vec![true; v.len()][..])?;
+        self.values_builder.append_slice(v)?;
+        Ok(())
+    }
+
+    /// Builds the `PrimitiveArray` and reset this builder.
+    pub fn finish(&mut self) -> PrimitiveArray<T> {
+        let len = self.len();
+        let null_bit_buffer = self.bitmap_builder.finish();
+        let null_count = len - bit_util::count_set_bits(null_bit_buffer.data());
+        let mut builder = ArrayData::builder(T::get_data_type())
+            .len(len)
+            .add_buffer(self.values_builder.finish());
+        if null_count > 0 {
+            builder = builder
+                .null_count(null_count)
+                .null_bit_buffer(null_bit_buffer);
+        }
+        let data = builder.build();
+        PrimitiveArray::<T>::from(data)
+    }
+}
+
+///  Array builder for `ListArray`
+pub struct ListBuilder<T: ArrayBuilder> {
+    offsets_builder: Int32BufferBuilder,
+    bitmap_builder: BooleanBufferBuilder,
+    values_builder: T,
+    len: usize,
+}
+
+impl<T: ArrayBuilder> ListBuilder<T> {
+    /// Creates a new `ListArrayBuilder` from a given values array builder
+    pub fn new(values_builder: T) -> Self {
+        let mut offsets_builder = Int32BufferBuilder::new(values_builder.len() + 1);
+        offsets_builder.append(0).unwrap();
+        Self {
+            offsets_builder,
+            bitmap_builder: BooleanBufferBuilder::new(values_builder.len()),
+            values_builder,
+            len: 0,
+        }
+    }
+}
+
+impl<T: ArrayBuilder> ArrayBuilder for ListBuilder<T>
+where
+    T: 'static,
+{
+    /// Returns the builder as an non-mutable `Any` reference.
+    fn as_any(&self) -> &Any {
+        self
+    }
+
+    /// Returns the builder as an mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+}
+
+impl<T: ArrayBuilder> ListBuilder<T>
+where
+    T: 'static,
+{
+    /// Returns the child array builder as a mutable reference.
+    ///
+    /// This mutable reference can be used to append values into the child array builder,
+    /// but you must call `append` to delimit each distinct list value.
+    pub fn values(&mut self) -> &mut T {
+        &mut self.values_builder
+    }
+
+    /// Finish the current variable-length list array slot
+    pub fn append(&mut self, is_valid: bool) -> Result<()> {
+        self.offsets_builder
+            .append(self.values_builder.len() as i32)?;
+        self.bitmap_builder.append(is_valid)?;
+        self.len += 1;
+        Ok(())
+    }
+
+    /// Builds the `ListArray` and reset this builder.
+    pub fn finish(&mut self) -> ListArray {
+        let len = self.len();
+        self.len = 0;
+        let values_arr = self
+            .values_builder
+            .as_any_mut()
+            .downcast_mut::<T>()
+            .unwrap()
+            .finish();
+        let values_data = values_arr.data();
+
+        let offset_buffer = self.offsets_builder.finish();
+        let null_bit_buffer = self.bitmap_builder.finish();
+        self.offsets_builder.append(0).unwrap();
+        let data =
+            ArrayData::builder(DataType::List(Box::new(values_data.data_type().clone())))
+                .len(len)
+                .null_count(len - bit_util::count_set_bits(null_bit_buffer.data()))
+                .add_buffer(offset_buffer)
+                .add_child_data(values_data)
+                .null_bit_buffer(null_bit_buffer)
+                .build();
+
+        ListArray::from(data)
+    }
+}
+
+///  Array builder for `BinaryArray`
+pub struct BinaryBuilder {
+    builder: ListBuilder<UInt8Builder>,
+}
+
+impl ArrayBuilder for BinaryBuilder {
+    /// Returns the builder as an non-mutable `Any` reference.
+    fn as_any(&self) -> &Any {
+        self
+    }
+
+    /// Returns the builder as an mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.builder.len()
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+}
+
+impl BinaryBuilder {
+    /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
+    /// array
+    pub fn new(capacity: usize) -> Self {
+        let values_builder = UInt8Builder::new(capacity);
+        Self {
+            builder: ListBuilder::new(values_builder),
+        }
+    }
+
+    /// Appends a single byte value into the builder's values array.
+    ///
+    /// Note, when appending individual byte values you must call `append` to delimit each
+    /// distinct list value.
+    pub fn append_value(&mut self, value: u8) -> Result<()> {
+        self.builder.values().append_value(value)?;
+        Ok(())
+    }
+
+    /// Appends a `&String` or `&str` into the builder.
+    ///
+    /// Automatically calls the `append` method to delimit the string appended in as a
+    /// distinct array element.
+    pub fn append_string(&mut self, value: &str) -> Result<()> {
+        self.builder.values().append_slice(value.as_bytes())?;
+        self.builder.append(true)?;
+        Ok(())
+    }
+
+    /// Finish the current variable-length list array slot.
+    pub fn append(&mut self, is_valid: bool) -> Result<()> {
+        self.builder.append(is_valid)
+    }
+
+    /// Append a null value to the array.
+    pub fn append_null(&mut self) -> Result<()> {
+        self.append(false)
+    }
+
+    /// Builds the `BinaryArray` and reset this builder.
+    pub fn finish(&mut self) -> BinaryArray {
+        BinaryArray::from(self.builder.finish())
+    }
+}
+
+/// Array builder for Struct types.
+///
+/// Note that callers should make sure that methods of all the child field builders are
+/// properly called to maintain the consistency of the data structure.
+pub struct StructBuilder {
+    fields: Vec<Field>,
+    field_anys: Vec<Box<Any>>,
+    field_builders: Vec<Box<ArrayBuilder>>,
+    bitmap_builder: BooleanBufferBuilder,
+    len: usize,
+}
+
+impl ArrayBuilder for StructBuilder {
+    /// Returns the number of array slots in the builder.
+    ///
+    /// Note that this always return the first child field builder's length, and it is
+    /// the caller's responsibility to maintain the consistency that all the child field
+    /// builder should have the equal number of elements.
+    fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Builds the array.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+
+    /// Returns the builder as an non-mutable `Any` reference.
+    ///
+    /// This is most useful when one wants to call non-mutable APIs on a specific builder
+    /// type. In this case, one can first cast this into a `Any`, and then use
+    /// `downcast_ref` to get a reference on the specific builder.
+    fn as_any(&self) -> &Any {
+        self
+    }
+
+    /// Returns the builder as an mutable `Any` reference.
+    ///
+    /// This is most useful when one wants to call mutable APIs on a specific builder
+    /// type. In this case, one can first cast this into a `Any`, and then use
+    /// `downcast_mut` to get a reference on the specific builder.
+    fn as_any_mut(&mut self) -> &mut Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<Any> {
+        self
+    }
+}
+
+impl StructBuilder {
+    pub fn new(fields: Vec<Field>, builders: Vec<Box<ArrayBuilder>>) -> Self {
+        let mut field_anys = Vec::with_capacity(builders.len());
+        let mut field_builders = Vec::with_capacity(builders.len());
+
+        // Create and maintain two references for each of the input builder. We need the
+        // extra `Any` reference because we need to cast the builder to a specific type
+        // in `field_builder()` by calling `downcast_mut`.
+        for f in builders.into_iter() {
+            let raw_f = Box::into_raw(f);
+            let raw_f_copy = raw_f;
+            unsafe {
+                field_anys.push(Box::from_raw(raw_f).into_box_any());
+                field_builders.push(Box::from_raw(raw_f_copy));
+            }
+        }
+
+        Self {
+            fields,
+            field_anys,
+            field_builders,
+            bitmap_builder: BooleanBufferBuilder::new(0),
+            len: 0,
+        }
+    }
+
+    pub fn from_schema(schema: Schema, capacity: usize) -> Self {
+        let fields = schema.fields();
+        let mut builders = Vec::with_capacity(fields.len());
+        for f in schema.fields() {
+            builders.push(Self::from_field(f.clone(), capacity));
+        }
+        Self::new(schema.fields, builders)
+    }
+
+    fn from_field(f: Field, capacity: usize) -> Box<ArrayBuilder> {
+        match f.data_type() {
+            DataType::Boolean => Box::new(BooleanBuilder::new(capacity)),
+            DataType::Int8 => Box::new(Int8Builder::new(capacity)),
+            DataType::Int16 => Box::new(Int16Builder::new(capacity)),
+            DataType::Int32 => Box::new(Int32Builder::new(capacity)),
+            DataType::Int64 => Box::new(Int64Builder::new(capacity)),
+            DataType::UInt8 => Box::new(UInt8Builder::new(capacity)),
+            DataType::UInt16 => Box::new(UInt16Builder::new(capacity)),
+            DataType::UInt32 => Box::new(UInt32Builder::new(capacity)),
+            DataType::UInt64 => Box::new(UInt64Builder::new(capacity)),
+            DataType::Float32 => Box::new(Float32Builder::new(capacity)),
+            DataType::Float64 => Box::new(Float64Builder::new(capacity)),
+            DataType::Utf8 => Box::new(BinaryBuilder::new(capacity)),
+            DataType::Struct(fields) => {
+                let schema = Schema::new(fields.clone());
+                Box::new(Self::from_schema(schema, capacity))
+            }
+            t @ _ => panic!("Data type {:?} is not currently supported", t),
+        }
+    }
+
+    /// Returns a mutable reference to the child field builder at index `i`.
+    /// Result will be `None` if the input type `T` provided doesn't match the actual
+    /// field builder's type.
+    pub fn field_builder<T: ArrayBuilder>(&mut self, i: usize) -> Option<&mut T> {
+        self.field_anys[i].downcast_mut::<T>()
+    }
+
+    /// Returns the number of fields for the struct this builder is building.
+    pub fn num_fields(&self) -> usize {
+        self.field_builders.len()
+    }
+
+    /// Appends an element (either null or non-null) to the struct. The actual elements
+    /// should be appended for each child sub-array in a consistent way.
+    pub fn append(&mut self, is_valid: bool) -> Result<()> {
+        self.bitmap_builder.append(is_valid)?;
+        self.len += 1;
+        Ok(())
+    }
+
+    /// Appends a null element to the struct.
+    pub fn append_null(&mut self) -> Result<()> {
+        self.append(false)
+    }
+
+    /// Builds the `StructArray` and reset this builder.
+    pub fn finish(&mut self) -> StructArray {
+        let mut child_data = Vec::with_capacity(self.field_builders.len());
+        for f in &mut self.field_builders {
+            let arr = f.finish();
+            child_data.push(arr.data());
+        }
+
+        let null_bit_buffer = self.bitmap_builder.finish();
+        let null_count = self.len - bit_util::count_set_bits(null_bit_buffer.data());
+        let mut builder = ArrayData::builder(DataType::Struct(self.fields.clone()))
+            .len(self.len)
+            .child_data(child_data);
+        if null_count > 0 {
+            builder = builder
+                .null_count(null_count)
+                .null_bit_buffer(null_bit_buffer);
+        }
+        StructArray::from(builder.build())
+    }
+}
+
+impl Drop for StructBuilder {
+    fn drop(&mut self) {
+        // To avoid double drop on the field array builders.
+        let builders = ::std::mem::replace(&mut self.field_builders, Vec::new());
+        ::std::mem::forget(builders);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::array::Array;
+    use crate::bitmap::Bitmap;
+
+    #[test]
+    fn test_builder_i32_empty() {
+        let mut b = Int32BufferBuilder::new(5);
+        assert_eq!(0, b.len());
+        assert_eq!(16, b.capacity());
+        let a = b.finish();
+        assert_eq!(0, a.len());
+    }
+
+    #[test]
+    fn test_builder_i32_alloc_zero_bytes() {
+        let mut b = Int32BufferBuilder::new(0);
+        b.append(123).unwrap();
+        let a = b.finish();
+        assert_eq!(4, a.len());
+    }
+
+    #[test]
+    fn test_builder_i32() {
+        let mut b = Int32BufferBuilder::new(5);
+        for i in 0..5 {
+            b.append(i).unwrap();
+        }
+        assert_eq!(16, b.capacity());
+        let a = b.finish();
+        assert_eq!(20, a.len());
+    }
+
+    #[test]
+    fn test_builder_i32_grow_buffer() {
+        let mut b = Int32BufferBuilder::new(2);
+        assert_eq!(16, b.capacity());
+        for i in 0..20 {
+            b.append(i).unwrap();
+        }
+        assert_eq!(32, b.capacity());
+        let a = b.finish();
+        assert_eq!(80, a.len());
+    }
+
+    #[test]
+    fn test_builder_finish() {
+        let mut b = Int32BufferBuilder::new(5);
+        assert_eq!(16, b.capacity());
+        for i in 0..10 {
+            b.append(i).unwrap();
+        }
+        let mut a = b.finish();
+        assert_eq!(40, a.len());
+        assert_eq!(0, b.len());
+        assert_eq!(0, b.capacity());
+
+        // Try build another buffer after cleaning up.
+        for i in 0..20 {
+            b.append(i).unwrap()
+        }
+        assert_eq!(32, b.capacity());
+        a = b.finish();
+        assert_eq!(80, a.len());
+    }
+
+    #[test]
+    fn test_reserve() {
+        let mut b = UInt8BufferBuilder::new(2);
+        assert_eq!(64, b.capacity());
+        b.reserve(64).unwrap();
+        assert_eq!(64, b.capacity());
+        b.reserve(65).unwrap();
+        assert_eq!(128, b.capacity());
+
+        let mut b = Int32BufferBuilder::new(2);
+        assert_eq!(16, b.capacity());
+        b.reserve(16).unwrap();
+        assert_eq!(16, b.capacity());
+        b.reserve(17).unwrap();
+        assert_eq!(32, b.capacity());
+    }
+
+    #[test]
+    fn test_append_slice() {
+        let mut b = UInt8BufferBuilder::new(0);
+        b.append_slice("Hello, ".as_bytes()).unwrap();
+        b.append_slice("World!".as_bytes()).unwrap();
+        let buffer = b.finish();
+        assert_eq!(13, buffer.len());
+
+        let mut b = Int32BufferBuilder::new(0);
+        b.append_slice(&[32, 54]).unwrap();
+        let buffer = b.finish();
+        assert_eq!(8, buffer.len());
+    }
+
+    #[test]
+    fn test_write_bytes() {
+        let mut b = BooleanBufferBuilder::new(4);
+        b.append(false).unwrap();
+        b.append(true).unwrap();
+        b.append(false).unwrap();
+        b.append(true).unwrap();
+        assert_eq!(4, b.len());
+        assert_eq!(512, b.capacity());
+        let buffer = b.finish();
+        assert_eq!(1, buffer.len());
+
+        let mut b = BooleanBufferBuilder::new(4);
+        b.append_slice(&[false, true, false, true]).unwrap();
+        assert_eq!(4, b.len());
+        assert_eq!(512, b.capacity());
+        let buffer = b.finish();
+        assert_eq!(1, buffer.len());
+    }
+
+    #[test]
+    fn test_write_bytes_i32() {
+        let mut b = Int32BufferBuilder::new(4);
+        let bytes = [8, 16, 32, 64].to_byte_slice();
+        b.write_bytes(bytes, 4).unwrap();
+        assert_eq!(4, b.len());
+        assert_eq!(16, b.capacity());
+        let buffer = b.finish();
+        assert_eq!(16, buffer.len());
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not write to Buffer, not big enough")]
+    fn test_write_too_many_bytes() {
+        let mut b = Int32BufferBuilder::new(0);
+        let bytes = [8, 16, 32, 64].to_byte_slice();
+        b.write_bytes(bytes, 4).unwrap();
+    }
+
+    #[test]
+    fn test_boolean_builder_increases_buffer_len() {
+        // 00000010 01001000
+        let buf = Buffer::from([72_u8, 2_u8]);
+        let mut builder = BooleanBufferBuilder::new(8);
+
+        for i in 0..10 {
+            if i == 3 || i == 6 || i == 9 {
+                builder.append(true).unwrap();
+            } else {
+                builder.append(false).unwrap();
+            }
+        }
+        let buf2 = builder.finish();
+
+        assert_eq!(buf.len(), buf2.len());
+        assert_eq!(buf.data(), buf2.data());
+    }
+
+    #[test]
+    fn test_primitive_array_builder_i32() {
+        let mut builder = Int32Array::builder(5);
+        for i in 0..5 {
+            builder.append_value(i).unwrap();
+        }
+        let arr = builder.finish();
+        assert_eq!(5, arr.len());
+        assert_eq!(0, arr.offset());
+        assert_eq!(0, arr.null_count());
+        for i in 0..5 {
+            assert!(!arr.is_null(i));
+            assert!(arr.is_valid(i));
+            assert_eq!(i as i32, arr.value(i));
+        }
+    }
+
+    #[test]
+    fn test_primitive_array_builder_bool() {
+        // 00000010 01001000
+        let buf = Buffer::from([72_u8, 2_u8]);
+        let mut builder = BooleanArray::builder(10);
+        for i in 0..10 {
+            if i == 3 || i == 6 || i == 9 {
+                builder.append_value(true).unwrap();
+            } else {
+                builder.append_value(false).unwrap();
+            }
+        }
+
+        let arr = builder.finish();
+        assert_eq!(buf, arr.values());
+        assert_eq!(10, arr.len());
+        assert_eq!(0, arr.offset());
+        assert_eq!(0, arr.null_count());
+        for i in 0..10 {
+            assert!(!arr.is_null(i));
+            assert!(arr.is_valid(i));
+            assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {}", i)
+        }
+    }
+
+    #[test]
+    fn test_primitive_array_builder_append_option() {
+        let arr1 = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
+
+        let mut builder = Int32Array::builder(5);
+        builder.append_option(Some(0)).unwrap();
+        builder.append_option(None).unwrap();
+        builder.append_option(Some(2)).unwrap();
+        builder.append_option(None).unwrap();
+        builder.append_option(Some(4)).unwrap();
+        let arr2 = builder.finish();
+
+        assert_eq!(arr1.len(), arr2.len());
+        assert_eq!(arr1.offset(), arr2.offset());
+        assert_eq!(arr1.null_count(), arr2.null_count());
+        for i in 0..5 {
+            assert_eq!(arr1.is_null(i), arr2.is_null(i));
+            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
+            if arr1.is_valid(i) {
+                assert_eq!(arr1.value(i), arr2.value(i));
+            }
+        }
+    }
+
+    #[test]
+    fn test_primitive_array_builder_append_null() {
+        let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
+
+        let mut builder = Int32Array::builder(5);
+        builder.append_value(0).unwrap();
+        builder.append_value(2).unwrap();
+        builder.append_null().unwrap();
+        builder.append_null().unwrap();
+        builder.append_value(4).unwrap();
+        let arr2 = builder.finish();
+
+        assert_eq!(arr1.len(), arr2.len());
+        assert_eq!(arr1.offset(), arr2.offset());
+        assert_eq!(arr1.null_count(), arr2.null_count());
+        for i in 0..5 {
+            assert_eq!(arr1.is_null(i), arr2.is_null(i));
+            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
+            if arr1.is_valid(i) {
+                assert_eq!(arr1.value(i), arr2.value(i));
+            }
+        }
+    }
+
+    #[test]
+    fn test_primitive_array_builder_append_slice() {
+        let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
+
+        let mut builder = Int32Array::builder(5);
+        builder.append_slice(&[0, 2]).unwrap();
+        builder.append_null().unwrap();
+        builder.append_null().unwrap();
+        builder.append_value(4).unwrap();
+        let arr2 = builder.finish();
+
+        assert_eq!(arr1.len(), arr2.len());
+        assert_eq!(arr1.offset(), arr2.offset());
+        assert_eq!(arr1.null_count(), arr2.null_count());
+        for i in 0..5 {
+            assert_eq!(arr1.is_null(i), arr2.is_null(i));
+            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
+            if arr1.is_valid(i) {
+                assert_eq!(arr1.value(i), arr2.value(i));
+            }
+        }
+    }
+
+    #[test]
+    fn test_primitive_array_builder_finish() {
+        let mut builder = Int32Builder::new(5);
+        builder.append_slice(&[2, 4, 6, 8]).unwrap();
+        let mut arr = builder.finish();
+        assert_eq!(4, arr.len());
+        assert_eq!(0, builder.len());
+
+        builder.append_slice(&[1, 3, 5, 7, 9]).unwrap();
+        arr = builder.finish();
+        assert_eq!(5, arr.len());
+        assert_eq!(0, builder.len());
+    }
+
+    #[test]
+    fn test_list_array_builder() {
+        let values_builder = Int32Builder::new(10);
+        let mut builder = ListBuilder::new(values_builder);
+
+        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
+        builder.values().append_value(0).unwrap();
+        builder.values().append_value(1).unwrap();
+        builder.values().append_value(2).unwrap();
+        builder.append(true).unwrap();
+        builder.values().append_value(3).unwrap();
+        builder.values().append_value(4).unwrap();
+        builder.values().append_value(5).unwrap();
+        builder.append(true).unwrap();
+        builder.values().append_value(6).unwrap();
+        builder.values().append_value(7).unwrap();
+        builder.append(true).unwrap();
+        let list_array = builder.finish();
+
+        let values = list_array.values().data().buffers()[0].clone();
+        assert_eq!(
+            Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()),
+            values
+        );
+        assert_eq!(
+            Buffer::from(&[0, 3, 6, 8].to_byte_slice()),
+            list_array.data().buffers()[0].clone()
+        );
+        assert_eq!(DataType::Int32, list_array.value_type());
+        assert_eq!(3, list_array.len());
+        assert_eq!(0, list_array.null_count());
+        assert_eq!(6, list_array.value_offset(2));
+        assert_eq!(2, list_array.value_length(2));
+        for i in 0..3 {
+            assert!(list_array.is_valid(i));
+            assert!(!list_array.is_null(i));
+        }
+    }
+
+    #[test]
+    fn test_list_array_builder_nulls() {
+        let values_builder = Int32Builder::new(10);
+        let mut builder = ListBuilder::new(values_builder);
+
+        //  [[0, 1, 2], null, [3, null, 5], [6, 7]]
+        builder.values().append_value(0).unwrap();
+        builder.values().append_value(1).unwrap();
+        builder.values().append_value(2).unwrap();
+        builder.append(true).unwrap();
+        builder.append(false).unwrap();
+        builder.values().append_value(3).unwrap();
+        builder.values().append_null().unwrap();
+        builder.values().append_value(5).unwrap();
+        builder.append(true).unwrap();
+        builder.values().append_value(6).unwrap();
+        builder.values().append_value(7).unwrap();
+        builder.append(true).unwrap();
+        let list_array = builder.finish();
+
+        assert_eq!(DataType::Int32, list_array.value_type());
+        assert_eq!(4, list_array.len());
+        assert_eq!(1, list_array.null_count());
+        assert_eq!(3, list_array.value_offset(2));
+        assert_eq!(3, list_array.value_length(2));
+    }
+
+    #[test]
+    fn test_list_array_builder_finish() {
+        let values_builder = Int32Array::builder(5);
+        let mut builder = ListBuilder::new(values_builder);
+
+        builder.values().append_slice(&[1, 2, 3]).unwrap();
+        builder.append(true).unwrap();
+        builder.values().append_slice(&[4, 5, 6]).unwrap();
+        builder.append(true).unwrap();
+
+        let mut arr = builder.finish();
+        assert_eq!(2, arr.len());
+        assert_eq!(0, builder.len());
+
+        builder.values().append_slice(&[7, 8, 9]).unwrap();
+        builder.append(true).unwrap();
+        arr = builder.finish();
+        assert_eq!(1, arr.len());
+        assert_eq!(0, builder.len());
+    }
+
+    #[test]
+    fn test_list_list_array_builder() {
+        let primitive_builder = Int32Builder::new(10);
+        let values_builder = ListBuilder::new(primitive_builder);
+        let mut builder = ListBuilder::new(values_builder);
+
+        //  [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]]
+        builder.values().values().append_value(1).unwrap();
+        builder.values().values().append_value(2).unwrap();
+        builder.values().append(true).unwrap();
+        builder.values().values().append_value(3).unwrap();
+        builder.values().values().append_value(4).unwrap();
+        builder.values().append(true).unwrap();
+        builder.append(true).unwrap();
+
+        builder.values().values().append_value(5).unwrap();
+        builder.values().values().append_value(6).unwrap();
+        builder.values().values().append_value(7).unwrap();
+        builder.values().append(true).unwrap();
+        builder.values().append(false).unwrap();
+        builder.values().values().append_value(8).unwrap();
+        builder.values().append(true).unwrap();
+        builder.append(true).unwrap();
+
+        builder.append(false).unwrap();
+
+        builder.values().values().append_value(9).unwrap();
+        builder.values().values().append_value(10).unwrap();
+        builder.values().append(true).unwrap();
+        builder.append(true).unwrap();
+
+        let list_array = builder.finish();
+
+        assert_eq!(4, list_array.len());
+        assert_eq!(1, list_array.null_count());
+        assert_eq!(
+            Buffer::from(&[0, 2, 5, 5, 6].to_byte_slice()),
+            list_array.data().buffers()[0].clone()
+        );
+
+        assert_eq!(6, list_array.values().data().len());
+        assert_eq!(1, list_array.values().data().null_count());
+        assert_eq!(
+            Buffer::from(&[0, 2, 4, 7, 7, 8, 10].to_byte_slice()),
+            list_array.values().data().buffers()[0].clone()
+        );
+
+        assert_eq!(10, list_array.values().data().child_data()[0].len());
+        assert_eq!(0, list_array.values().data().child_data()[0].null_count());
+        assert_eq!(
+            Buffer::from(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10].to_byte_slice()),
+            list_array.values().data().child_data()[0].buffers()[0].clone()
+        );
+    }
+
+    #[test]
+    fn test_binary_array_builder() {
+        let mut builder = BinaryBuilder::new(20);
+
+        builder.append_value(b'h').unwrap();
+        builder.append_value(b'e').unwrap();
+        builder.append_value(b'l').unwrap();
+        builder.append_value(b'l').unwrap();
+        builder.append_value(b'o').unwrap();
+        builder.append(true).unwrap();
+        builder.append(true).unwrap();
+        builder.append_value(b'w').unwrap();
+        builder.append_value(b'o').unwrap();
+        builder.append_value(b'r').unwrap();
+        builder.append_value(b'l').unwrap();
+        builder.append_value(b'd').unwrap();
+        builder.append(true).unwrap();
+
+        let array = builder.finish();
+
+        let binary_array = BinaryArray::from(array);
+
+        assert_eq!(3, binary_array.len());
+        assert_eq!(0, binary_array.null_count());
+        assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
+        assert_eq!("hello", binary_array.get_string(0));
+        assert_eq!([] as [u8; 0], binary_array.value(1));
+        assert_eq!("", binary_array.get_string(1));
+        assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.value(2));
+        assert_eq!("world", binary_array.get_string(2));
+        assert_eq!(5, binary_array.value_offset(2));
+        assert_eq!(5, binary_array.value_length(2));
+    }
+
+    #[test]
+    fn test_binary_array_builder_finish() {
+        let mut builder = BinaryBuilder::new(10);
+
+        builder.append_string("hello").unwrap();
+        builder.append_string("world").unwrap();
+
+        let mut arr = builder.finish();
+        assert_eq!(2, arr.len());
+        assert_eq!(0, builder.len());
+
+        builder.append_string("arrow").unwrap();
+        arr = builder.finish();
+        assert_eq!(1, arr.len());
+        assert_eq!(0, builder.len());
+    }
+
+    #[test]
+    fn test_binary_array_builder_append_string() {
+        let mut builder = BinaryBuilder::new(20);
+
+        let var = "hello".to_owned();
+        builder.append_string(&var).unwrap();
+        builder.append(true).unwrap();
+        builder.append_string("world").unwrap();
+
+        let array = builder.finish();
+
+        let binary_array = BinaryArray::from(array);
+
+        assert_eq!(3, binary_array.len());
+        assert_eq!(0, binary_array.null_count());
+        assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
+        assert_eq!("hello", binary_array.get_string(0));
+        assert_eq!([] as [u8; 0], binary_array.value(1));
+        assert_eq!("", binary_array.get_string(1));
+        assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.value(2));
+        assert_eq!("world", binary_array.get_string(2));
+        assert_eq!(5, binary_array.value_offset(2));
+        assert_eq!(5, binary_array.value_length(2));
+    }
+
+    #[test]
+    fn test_struct_array_builder() {
+        let string_builder = BinaryBuilder::new(4);
+        let int_builder = Int32Builder::new(4);
+
+        let mut fields = Vec::new();
+        let mut field_builders = Vec::new();
+        fields.push(Field::new("f1", DataType::Utf8, false));
+        field_builders.push(Box::new(string_builder) as Box<ArrayBuilder>);
+        fields.push(Field::new("f2", DataType::Int32, false));
+        field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
+
+        let mut builder = StructBuilder::new(fields, field_builders);
+        assert_eq!(2, builder.num_fields());
+
+        let string_builder = builder
+            .field_builder::<BinaryBuilder>(0)
+            .expect("builder at field 0 should be binary builder");
+        string_builder.append_string("joe").unwrap();
+        string_builder.append_null().unwrap();
+        string_builder.append_null().unwrap();
+        string_builder.append_string("mark").unwrap();
+
+        let int_builder = builder
+            .field_builder::<Int32Builder>(1)
+            .expect("builder at field 1 should be int builder");
+        int_builder.append_value(1).unwrap();
+        int_builder.append_value(2).unwrap();
+        int_builder.append_null().unwrap();
+        int_builder.append_value(4).unwrap();
+
+        builder.append(true).unwrap();
+        builder.append(true).unwrap();
+        builder.append_null().unwrap();
+        builder.append(true).unwrap();
+
+        let arr = builder.finish();
+
+        let struct_data = arr.data();
+        assert_eq!(4, struct_data.len());
+        assert_eq!(1, struct_data.null_count());
+        assert_eq!(
+            &Some(Bitmap::from(Buffer::from(&[11_u8]))),
+            struct_data.null_bitmap()
+        );
+
+        let expected_string_data = ArrayData::builder(DataType::Utf8)
+            .len(4)
+            .null_count(2)
+            .null_bit_buffer(Buffer::from(&[9_u8]))
+            .add_buffer(Buffer::from(&[0, 3, 3, 3, 7].to_byte_slice()))
+            .add_buffer(Buffer::from("joemark".as_bytes()))
+            .build();
+
+        let expected_int_data = ArrayData::builder(DataType::Int32)
+            .len(4)
+            .null_count(1)
+            .null_bit_buffer(Buffer::from(&[11_u8]))
+            .add_buffer(Buffer::from(&[1, 2, 0, 4].to_byte_slice()))
+            .build();
+
+        assert_eq!(expected_string_data, arr.column(0).data());
+
+        // TODO: implement equality for ArrayData
+        assert_eq!(expected_int_data.len(), arr.column(1).data().len());
+        assert_eq!(
+            expected_int_data.null_count(),
+            arr.column(1).data().null_count()
+        );
+        assert_eq!(
+            expected_int_data.null_bitmap(),
+            arr.column(1).data().null_bitmap()
+        );
+        let expected_value_buf = expected_int_data.buffers()[0].clone();
+        let actual_value_buf = arr.column(1).data().buffers()[0].clone();
+        for i in 0..expected_int_data.len() {
+            if !expected_int_data.is_null(i) {
+                assert_eq!(
+                    expected_value_buf.data()[i * 4..(i + 1) * 4],
+                    actual_value_buf.data()[i * 4..(i + 1) * 4]
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_struct_array_builder_finish() {
+        let int_builder = Int32Builder::new(10);
+        let bool_builder = BooleanBuilder::new(10);
+
+        let mut fields = Vec::new();
+        let mut field_builders = Vec::new();
+        fields.push(Field::new("f1", DataType::Int32, false));
+        field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
+        fields.push(Field::new("f2", DataType::Boolean, false));
+        field_builders.push(Box::new(bool_builder) as Box<ArrayBuilder>);
+
+        let mut builder = StructBuilder::new(fields, field_builders);
+        builder
+            .field_builder::<Int32Builder>(0)
+            .unwrap()
+            .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+            .unwrap();
+        builder
+            .field_builder::<BooleanBuilder>(1)
+            .unwrap()
+            .append_slice(&[
+                false, true, false, true, false, true, false, true, false, true,
+            ])
+            .unwrap();
+
+        let arr = builder.finish();
+        assert_eq!(10, arr.len());
+        assert_eq!(0, builder.len());
+
+        builder
+            .field_builder::<Int32Builder>(0)
+            .unwrap()
+            .append_slice(&[1, 3, 5, 7, 9])
+            .unwrap();
+        builder
+            .field_builder::<BooleanBuilder>(1)
+            .unwrap()
+            .append_slice(&[false, true, false, true, false])
+            .unwrap();
+
+        let arr = builder.finish();
+        assert_eq!(5, arr.len());
+        assert_eq!(0, builder.len());
+    }
+
+    #[test]
+    fn test_struct_array_builder_from_schema() {
+        let mut fields = Vec::new();
+        fields.push(Field::new("f1", DataType::Float32, false));
+        fields.push(Field::new("f2", DataType::Utf8, false));
+        let mut sub_fields = Vec::new();
+        sub_fields.push(Field::new("g1", DataType::Int32, false));
+        sub_fields.push(Field::new("g2", DataType::Boolean, false));
+        let struct_type = DataType::Struct(sub_fields);
+        fields.push(Field::new("f3", struct_type, false));
+
+        let mut builder = StructBuilder::from_schema(Schema::new(fields), 5);
+        assert_eq!(3, builder.num_fields());
+        assert!(builder.field_builder::<Float32Builder>(0).is_some());
+        assert!(builder.field_builder::<BinaryBuilder>(1).is_some());
+        assert!(builder.field_builder::<StructBuilder>(2).is_some());
+    }
+
+    #[test]
+    #[should_panic(expected = "Data type List(Int64) is not currently supported")]
+    fn test_struct_array_builder_from_schema_unsupported_type() {
+        let mut fields = Vec::new();
+        fields.push(Field::new("f1", DataType::Int16, false));
+        let list_type = DataType::List(Box::new(DataType::Int64));
+        fields.push(Field::new("f2", list_type, false));
+
+        let _ = StructBuilder::from_schema(Schema::new(fields), 5);
+    }
+
+    #[test]
+    fn test_struct_array_builder_field_builder_type_mismatch() {
+        let int_builder = Int32Builder::new(10);
+
+        let mut fields = Vec::new();
+        let mut field_builders = Vec::new();
+        fields.push(Field::new("f1", DataType::Int32, false));
+        field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
+
+        let mut builder = StructBuilder::new(fields, field_builders);
+        assert!(builder.field_builder::<BinaryBuilder>(0).is_none());
+    }
+
+}
diff --git a/rust/arrow/src/csv/mod.rs b/rust/arrow/src/csv/mod.rs
new file mode 100644
index 0000000000000..b3deb3ff61b58
--- /dev/null
+++ b/rust/arrow/src/csv/mod.rs
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Transfer data between the Arrow memory format and CSV (comma-separated values).
+
+pub mod reader;
+
+pub use self::reader::Reader;
+pub use self::reader::ReaderBuilder;
diff --git a/rust/arrow/src/csv/reader.rs b/rust/arrow/src/csv/reader.rs
new file mode 100644
index 0000000000000..b543011ff31ef
--- /dev/null
+++ b/rust/arrow/src/csv/reader.rs
@@ -0,0 +1,707 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! CSV Reader
+//!
+//! This CSV reader allows CSV files to be read into the Arrow memory model. Records are
+//! loaded in batches and are then converted from row-based data to columnar data.
+//!
+//! Example:
+//!
+//! ```
+//! use arrow::csv;
+//! use arrow::datatypes::{DataType, Field, Schema};
+//! use std::fs::File;
+//! use std::sync::Arc;
+//!
+//! let schema = Schema::new(vec![
+//!     Field::new("city", DataType::Utf8, false),
+//!     Field::new("lat", DataType::Float64, false),
+//!     Field::new("lng", DataType::Float64, false),
+//! ]);
+//!
+//! let file = File::open("test/data/uk_cities.csv").unwrap();
+//!
+//! let mut csv = csv::Reader::new(file, Arc::new(schema), false, 1024, None);
+//! let batch = csv.next().unwrap().unwrap();
+//! ```
+
+use lazy_static::lazy_static;
+use regex::{Regex, RegexBuilder};
+use std::collections::HashSet;
+use std::fs::File;
+use std::io::{BufReader, Read, Seek, SeekFrom};
+use std::sync::Arc;
+
+use csv as csv_crate;
+
+use crate::array::ArrayRef;
+use crate::builder::*;
+use crate::datatypes::*;
+use crate::error::{ArrowError, Result};
+use crate::record_batch::RecordBatch;
+
+use self::csv_crate::{StringRecord, StringRecordsIntoIter};
+
+lazy_static! {
+    static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d+\.\d+)$").unwrap();
+    static ref INTEGER_RE: Regex = Regex::new(r"^-?(\d*.)$").unwrap();
+    static ref BOOLEAN_RE: Regex = RegexBuilder::new(r"^(true)$|^(false)$")
+        .case_insensitive(true)
+        .build()
+        .unwrap();
+}
+
+/// Infer the data type of a record
+fn infer_field_schema(string: &str) -> DataType {
+    // when quoting is enabled in the reader, these quotes aren't escaped, we default to
+    // Utf8 for them
+    if string.starts_with("\"") {
+        return DataType::Utf8;
+    }
+    // match regex in a particular order
+    if BOOLEAN_RE.is_match(string) {
+        return DataType::Boolean;
+    } else if DECIMAL_RE.is_match(string) {
+        return DataType::Float64;
+    } else if INTEGER_RE.is_match(string) {
+        return DataType::Int64;
+    } else {
+        return DataType::Utf8;
+    }
+}
+
+/// Infer the schema of a CSV file by reading through the first n records of the file,
+/// with `max_read_records` controlling the maximum number of records to read.
+///
+/// If `max_read_records` is not set, the whole file is read to infer its schema.
+fn infer_file_schema(
+    mut file: File,
+    delimiter: u8,
+    max_read_records: Option<usize>,
+    has_headers: bool,
+) -> Result<Schema> {
+    let mut csv_reader = csv::ReaderBuilder::new()
+        .delimiter(delimiter)
+        .from_reader(BufReader::new(file.try_clone()?));
+
+    // get or create header names
+    // when has_headers is false, creates default column names with column_ prefix
+    let headers: Vec<String> = if has_headers {
+        let headers = &csv_reader.headers()?.clone();
+        headers.iter().map(|s| s.to_string()).collect()
+    } else {
+        let first_record_count = &csv_reader.headers()?.len();
+        (0..*first_record_count)
+            .map(|i| format!("column_{}", i + 1))
+            .into_iter()
+            .collect()
+    };
+
+    // save the csv reader position after reading headers
+    let position = csv_reader.position().clone();
+
+    let header_length = headers.len();
+    // keep track of inferred field types
+    let mut column_types: Vec<HashSet<DataType>> = vec![HashSet::new(); header_length];
+    // keep track of columns with nulls
+    let mut nulls: Vec<bool> = vec![false; header_length];
+
+    // return csv reader position to after headers
+    csv_reader.seek(position)?;
+
+    let mut fields = vec![];
+
+    for result in csv_reader
+        .into_records()
+        .take(max_read_records.unwrap_or(std::usize::MAX))
+    {
+        let record = result?;
+
+        for i in 0..header_length {
+            let string: Option<&str> = record.get(i);
+            match string {
+                Some(s) => {
+                    if s == "" {
+                        nulls[i] = true;
+                    } else {
+                        column_types[i].insert(infer_field_schema(s));
+                    }
+                }
+                _ => {}
+            }
+        }
+    }
+
+    // build schema from inference results
+    for i in 0..header_length {
+        let possibilities = &column_types[i];
+        let has_nulls = nulls[i];
+        let field_name = &headers[i];
+
+        // determine data type based on possible types
+        // if there are incompatible types, use DataType::Utf8
+        match possibilities.len() {
+            1 => {
+                for dtype in possibilities.iter() {
+                    fields.push(Field::new(&field_name, dtype.clone(), has_nulls));
+                }
+            }
+            2 => {
+                if possibilities.contains(&DataType::Int64)
+                    && possibilities.contains(&DataType::Float64)
+                {
+                    // we have an integer and double, fall down to double
+                    fields.push(Field::new(&field_name, DataType::Float64, has_nulls));
+                } else {
+                    // default to Utf8 for conflicting datatypes (e.g bool and int)
+                    fields.push(Field::new(&field_name, DataType::Utf8, has_nulls));
+                }
+            }
+            _ => fields.push(Field::new(&field_name, DataType::Utf8, has_nulls)),
+        }
+    }
+
+    // return the file seek back to the start
+    file.seek(SeekFrom::Start(0))?;
+
+    Ok(Schema::new(fields))
+}
+
+/// CSV file reader
+pub struct Reader<R: Read> {
+    /// Explicit schema for the CSV file
+    schema: Arc<Schema>,
+    /// Optional projection for which columns to load (zero-based column indices)
+    projection: Option<Vec<usize>>,
+    /// File reader
+    record_iter: StringRecordsIntoIter<BufReader<R>>,
+    /// Batch size (number of records to load each time)
+    batch_size: usize,
+}
+
+impl<R: Read> Reader<R> {
+    /// Create a new CsvReader from any value that implements the `Read` trait.
+    ///
+    /// If reading a `File` you can customise the Reader, such as to enable schema
+    /// inference, use `ReaderBuilder`.
+    pub fn new(
+        reader: R,
+        schema: Arc<Schema>,
+        has_headers: bool,
+        batch_size: usize,
+        projection: Option<Vec<usize>>,
+    ) -> Self {
+        Self::from_buf_reader(
+            BufReader::new(reader),
+            schema,
+            has_headers,
+            batch_size,
+            projection,
+        )
+    }
+
+    /// Create a new CsvReader from a `BufReader<R: Read>
+    ///
+    /// This constructor allows you more flexibility in what records are processed by the
+    /// csv reader.
+    pub fn from_buf_reader(
+        buf_reader: BufReader<R>,
+        schema: Arc<Schema>,
+        has_headers: bool,
+        batch_size: usize,
+        projection: Option<Vec<usize>>,
+    ) -> Self {
+        let csv_reader = csv::ReaderBuilder::new()
+            .has_headers(has_headers)
+            .from_reader(buf_reader);
+        let record_iter = csv_reader.into_records();
+        Self {
+            schema,
+            projection,
+            record_iter,
+            batch_size,
+        }
+    }
+
+    /// Read the next batch of rows
+    pub fn next(&mut self) -> Result<Option<RecordBatch>> {
+        // read a batch of rows into memory
+        let mut rows: Vec<StringRecord> = Vec::with_capacity(self.batch_size);
+        for _ in 0..self.batch_size {
+            match self.record_iter.next() {
+                Some(Ok(r)) => {
+                    rows.push(r);
+                }
+                Some(Err(_)) => {
+                    return Err(ArrowError::ParseError(
+                        "Error reading CSV file".to_string(),
+                    ));
+                }
+                None => break,
+            }
+        }
+
+        // return early if no data was loaded
+        if rows.is_empty() {
+            return Ok(None);
+        }
+
+        let projection: Vec<usize> = match self.projection {
+            Some(ref v) => v.clone(),
+            None => self
+                .schema
+                .fields()
+                .iter()
+                .enumerate()
+                .map(|(i, _)| i)
+                .collect(),
+        };
+
+        let rows = &rows[..];
+        let arrays: Result<Vec<ArrayRef>> = projection
+            .iter()
+            .map(|i| {
+                let field = self.schema.field(*i);
+                match field.data_type() {
+                    &DataType::Boolean => {
+                        self.build_primitive_array::<BooleanType>(rows, i)
+                    }
+                    &DataType::Int8 => self.build_primitive_array::<Int8Type>(rows, i),
+                    &DataType::Int16 => self.build_primitive_array::<Int16Type>(rows, i),
+                    &DataType::Int32 => self.build_primitive_array::<Int32Type>(rows, i),
+                    &DataType::Int64 => self.build_primitive_array::<Int64Type>(rows, i),
+                    &DataType::UInt8 => self.build_primitive_array::<UInt8Type>(rows, i),
+                    &DataType::UInt16 => {
+                        self.build_primitive_array::<UInt16Type>(rows, i)
+                    }
+                    &DataType::UInt32 => {
+                        self.build_primitive_array::<UInt32Type>(rows, i)
+                    }
+                    &DataType::UInt64 => {
+                        self.build_primitive_array::<UInt64Type>(rows, i)
+                    }
+                    &DataType::Float32 => {
+                        self.build_primitive_array::<Float32Type>(rows, i)
+                    }
+                    &DataType::Float64 => {
+                        self.build_primitive_array::<Float64Type>(rows, i)
+                    }
+                    &DataType::Utf8 => {
+                        let mut builder = BinaryBuilder::new(rows.len());
+                        for row_index in 0..rows.len() {
+                            match rows[row_index].get(*i) {
+                                Some(s) => builder.append_string(s).unwrap(),
+                                _ => builder.append(false).unwrap(),
+                            }
+                        }
+                        Ok(Arc::new(builder.finish()) as ArrayRef)
+                    }
+                    other => Err(ArrowError::ParseError(format!(
+                        "Unsupported data type {:?}",
+                        other
+                    ))),
+                }
+            })
+            .collect();
+
+        match arrays {
+            Ok(arr) => Ok(Some(RecordBatch::new(self.schema.clone(), arr))),
+            Err(e) => Err(e),
+        }
+    }
+
+    fn build_primitive_array<T: ArrowPrimitiveType>(
+        &self,
+        rows: &[StringRecord],
+        col_idx: &usize,
+    ) -> Result<ArrayRef> {
+        let mut builder = PrimitiveBuilder::<T>::new(rows.len());
+        let is_boolean_type =
+            *self.schema.field(*col_idx).data_type() == DataType::Boolean;
+        for row_index in 0..rows.len() {
+            match rows[row_index].get(*col_idx) {
+                Some(s) if s.len() > 0 => {
+                    let t = if is_boolean_type {
+                        s.to_lowercase().parse::<T::Native>()
+                    } else {
+                        s.parse::<T::Native>()
+                    };
+                    match t {
+                        Ok(v) => builder.append_value(v)?,
+                        Err(_) => {
+                            // TODO: we should surface the underlying error here.
+                            return Err(ArrowError::ParseError(format!(
+                                "Error while parsing value {}",
+                                s
+                            )));
+                        }
+                    }
+                }
+                _ => builder.append_null()?,
+            }
+        }
+        Ok(Arc::new(builder.finish()))
+    }
+}
+
+/// CSV file reader builder
+pub struct ReaderBuilder {
+    /// Optional schema for the CSV file
+    ///
+    /// If the schema is not supplied, the reader will try to infer the schema
+    /// based on the CSV structure.
+    schema: Option<Arc<Schema>>,
+    /// Whether the file has headers or not
+    ///
+    /// If schema inference is run on a file with no headers, default column names
+    /// are created.
+    has_headers: bool,
+    /// An optional column delimiter. Defauits to `b','`
+    delimiter: Option<u8>,
+    /// Optional maximum number of records to read during schema inference
+    ///
+    /// If a number is not provided, all the records are read.
+    max_records: Option<usize>,
+    /// Batch size (number of records to load each time)
+    ///
+    /// The default batch size when using the `ReaderBuilder` is 1024 records
+    batch_size: usize,
+    /// Optional projection for which columns to load (zero-based column indices)
+    projection: Option<Vec<usize>>,
+}
+
+impl Default for ReaderBuilder {
+    fn default() -> ReaderBuilder {
+        ReaderBuilder {
+            schema: None,
+            has_headers: false,
+            delimiter: None,
+            max_records: None,
+            batch_size: 1024,
+            projection: None,
+        }
+    }
+}
+
+impl ReaderBuilder {
+    /// Create a new builder for configuring CSV parsing options.
+    ///
+    /// To convert a builder into a reader, call `Reader::from_builder`
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// extern crate arrow;
+    ///
+    /// use arrow::csv;
+    /// use std::fs::File;
+    ///
+    /// fn example() -> csv::Reader<File> {
+    ///     let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
+    ///
+    ///     // create a builder, inferring the schema with the first 100 records
+    ///     let builder = csv::ReaderBuilder::new().infer_schema(Some(100));
+    ///
+    ///     let reader = builder.build(file).unwrap();
+    ///
+    ///     reader
+    /// }
+    /// ```
+    pub fn new() -> ReaderBuilder {
+        ReaderBuilder::default()
+    }
+
+    /// Set the CSV file's schema
+    pub fn with_schema(mut self, schema: Arc<Schema>) -> Self {
+        self.schema = Some(schema);
+        self
+    }
+
+    /// Set whether the CSV file has headers
+    pub fn has_headers(mut self, has_headers: bool) -> Self {
+        self.has_headers = has_headers;
+        self
+    }
+
+    /// Set the CSV file's column delimiter as a byte character
+    pub fn with_delimiter(mut self, delimiter: u8) -> Self {
+        self.delimiter = Some(delimiter);
+        self
+    }
+
+    /// Set the CSV reader to infer the schema of the file
+    pub fn infer_schema(mut self, max_records: Option<usize>) -> Self {
+        // remove any schema that is set
+        self.schema = None;
+        self.max_records = max_records;
+        self
+    }
+
+    /// Set the batch size (number of records to load at one time)
+    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
+        self.batch_size = batch_size;
+        self
+    }
+
+    /// Set the reader's column projection
+    pub fn with_projection(mut self, projection: Vec<usize>) -> Self {
+        self.projection = Some(projection);
+        self
+    }
+
+    /// Create a new `Reader` from the `ReaderBuilder`
+    pub fn build(self, file: File) -> Result<Reader<File>> {
+        // check if schema should be inferred
+        let schema = match self.schema {
+            Some(schema) => schema,
+            None => {
+                let inferred_schema = infer_file_schema(
+                    file.try_clone().unwrap(),
+                    self.delimiter.unwrap_or(b','),
+                    self.max_records,
+                    self.has_headers,
+                )?;
+
+                Arc::new(inferred_schema)
+            }
+        };
+        let csv_reader = csv::ReaderBuilder::new()
+            .delimiter(self.delimiter.unwrap_or(b','))
+            .has_headers(self.has_headers)
+            .from_reader(BufReader::new(file));
+        let record_iter = csv_reader.into_records();
+        Ok(Reader {
+            schema,
+            projection: self.projection.clone(),
+            record_iter,
+            batch_size: self.batch_size,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use std::io::Cursor;
+
+    use crate::array::*;
+    use crate::datatypes::Field;
+
+    #[test]
+    fn test_csv() {
+        let schema = Schema::new(vec![
+            Field::new("city", DataType::Utf8, false),
+            Field::new("lat", DataType::Float64, false),
+            Field::new("lng", DataType::Float64, false),
+        ]);
+
+        let file = File::open("test/data/uk_cities.csv").unwrap();
+
+        let mut csv = Reader::new(file, Arc::new(schema), false, 1024, None);
+        let batch = csv.next().unwrap().unwrap();
+        assert_eq!(37, batch.num_rows());
+        assert_eq!(3, batch.num_columns());
+
+        // access data from a primitive array
+        let lat = batch
+            .column(1)
+            .as_any()
+            .downcast_ref::<Float64Array>()
+            .unwrap();
+        assert_eq!(57.653484, lat.value(0));
+
+        // access data from a string array (ListArray<u8>)
+        let city = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<BinaryArray>()
+            .unwrap();
+
+        let city_name: String = String::from_utf8(city.value(13).to_vec()).unwrap();
+
+        assert_eq!("Aberdeen, Aberdeen City, UK", city_name);
+    }
+
+    #[test]
+    fn test_csv_from_buf_reader() {
+        let schema = Schema::new(vec![
+            Field::new("city", DataType::Utf8, false),
+            Field::new("lat", DataType::Float64, false),
+            Field::new("lng", DataType::Float64, false),
+        ]);
+
+        let file_with_headers =
+            File::open("test/data/uk_cities_with_headers.csv").unwrap();
+        let file_without_headers = File::open("test/data/uk_cities.csv").unwrap();
+        let both_files = file_with_headers
+            .chain(Cursor::new("\n".to_string()))
+            .chain(file_without_headers);
+        let mut csv = Reader::from_buf_reader(
+            BufReader::new(both_files),
+            Arc::new(schema),
+            true,
+            1024,
+            None,
+        );
+        let batch = csv.next().unwrap().unwrap();
+        assert_eq!(74, batch.num_rows());
+        assert_eq!(3, batch.num_columns());
+    }
+
+    #[test]
+    fn test_csv_with_schema_inference() {
+        let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
+
+        let builder = ReaderBuilder::new().has_headers(true).infer_schema(None);
+
+        let mut csv = builder.build(file).unwrap();
+        let batch = csv.next().unwrap().unwrap();
+        assert_eq!(37, batch.num_rows());
+        assert_eq!(3, batch.num_columns());
+
+        // access data from a primitive array
+        let lat = batch
+            .column(1)
+            .as_any()
+            .downcast_ref::<Float64Array>()
+            .unwrap();
+        assert_eq!(57.653484, lat.value(0));
+
+        // access data from a string array (ListArray<u8>)
+        let city = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<BinaryArray>()
+            .unwrap();
+
+        let city_name: String = String::from_utf8(city.value(13).to_vec()).unwrap();
+
+        assert_eq!("Aberdeen, Aberdeen City, UK", city_name);
+    }
+
+    #[test]
+    fn test_csv_with_schema_inference_no_headers() {
+        let file = File::open("test/data/uk_cities.csv").unwrap();
+
+        let builder = ReaderBuilder::new().infer_schema(None);
+
+        let mut csv = builder.build(file).unwrap();
+        let batch = csv.next().unwrap().unwrap();
+
+        // csv field names should be 'column_{number}'
+        let schema = batch.schema();
+        assert_eq!("column_1", schema.field(0).name());
+        assert_eq!("column_2", schema.field(1).name());
+        assert_eq!("column_3", schema.field(2).name());
+
+        assert_eq!(37, batch.num_rows());
+        assert_eq!(3, batch.num_columns());
+
+        // access data from a primitive array
+        let lat = batch
+            .column(1)
+            .as_any()
+            .downcast_ref::<Float64Array>()
+            .unwrap();
+        assert_eq!(57.653484, lat.value(0));
+
+        // access data from a string array (ListArray<u8>)
+        let city = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<BinaryArray>()
+            .unwrap();
+
+        let city_name: String = String::from_utf8(city.value(13).to_vec()).unwrap();
+
+        assert_eq!("Aberdeen, Aberdeen City, UK", city_name);
+    }
+
+    #[test]
+    fn test_csv_with_projection() {
+        let schema = Schema::new(vec![
+            Field::new("city", DataType::Utf8, false),
+            Field::new("lat", DataType::Float64, false),
+            Field::new("lng", DataType::Float64, false),
+        ]);
+
+        let file = File::open("test/data/uk_cities.csv").unwrap();
+
+        let mut csv = Reader::new(file, Arc::new(schema), false, 1024, Some(vec![0, 1]));
+        let batch = csv.next().unwrap().unwrap();
+        assert_eq!(37, batch.num_rows());
+        assert_eq!(2, batch.num_columns());
+    }
+
+    #[test]
+    fn test_nulls() {
+        let schema = Schema::new(vec![
+            Field::new("c_int", DataType::UInt64, false),
+            Field::new("c_float", DataType::Float32, false),
+            Field::new("c_string", DataType::Utf8, false),
+        ]);
+
+        let file = File::open("test/data/null_test.csv").unwrap();
+
+        let mut csv = Reader::new(file, Arc::new(schema), true, 1024, None);
+        let batch = csv.next().unwrap().unwrap();
+
+        assert_eq!(false, batch.column(1).is_null(0));
+        assert_eq!(false, batch.column(1).is_null(1));
+        assert_eq!(true, batch.column(1).is_null(2));
+        assert_eq!(false, batch.column(1).is_null(3));
+        assert_eq!(false, batch.column(1).is_null(4));
+    }
+
+    #[test]
+    fn test_nulls_with_inference() {
+        let file = File::open("test/data/various_types.csv").unwrap();
+
+        let builder = ReaderBuilder::new()
+            .infer_schema(None)
+            .has_headers(true)
+            .with_delimiter(b'|')
+            .with_batch_size(512)
+            .with_projection(vec![0, 1, 2, 3]);
+
+        let mut csv = builder.build(file).unwrap();
+        let batch = csv.next().unwrap().unwrap();
+
+        assert_eq!(5, batch.num_rows());
+        assert_eq!(4, batch.num_columns());
+
+        let schema = batch.schema();
+
+        assert_eq!(&DataType::Int64, schema.field(0).data_type());
+        assert_eq!(&DataType::Float64, schema.field(1).data_type());
+        assert_eq!(&DataType::Float64, schema.field(2).data_type());
+        assert_eq!(&DataType::Boolean, schema.field(3).data_type());
+
+        assert_eq!(false, schema.field(0).is_nullable());
+        assert_eq!(true, schema.field(1).is_nullable());
+        assert_eq!(true, schema.field(2).is_nullable());
+        assert_eq!(false, schema.field(3).is_nullable());
+
+        assert_eq!(false, batch.column(1).is_null(0));
+        assert_eq!(false, batch.column(1).is_null(1));
+        assert_eq!(true, batch.column(1).is_null(2));
+        assert_eq!(false, batch.column(1).is_null(3));
+        assert_eq!(false, batch.column(1).is_null(4));
+    }
+}
diff --git a/rust/src/datatypes.rs b/rust/arrow/src/datatypes.rs
similarity index 63%
rename from rust/src/datatypes.rs
rename to rust/arrow/src/datatypes.rs
index fdb9351e61abc..36f73414f46da 100644
--- a/rust/src/datatypes.rs
+++ b/rust/arrow/src/datatypes.rs
@@ -17,8 +17,8 @@
 
 //! Defines the data-types of Arrow arrays.
 //!
-//! For an overview of the terminology used within the arrow project and more general information
-//! regarding data-types and memory layouts see
+//! For an overview of the terminology used within the arrow project and more general
+//! information regarding data-types and memory layouts see
 //! [here](https://arrow.apache.org/docs/memory_layout.html).
 
 use std::fmt;
@@ -26,13 +26,15 @@ use std::mem::size_of;
 use std::slice::from_raw_parts;
 use std::str::FromStr;
 
-use error::{ArrowError, Result};
-use serde_json::Value;
+use serde_derive::{Deserialize, Serialize};
+use serde_json::{json, Value};
+
+use crate::error::{ArrowError, Result};
 
 /// The possible relative types that are supported.
 ///
-/// The variants of this enum include primitive fixed size types as well as parametric or nested
-/// types.
+/// The variants of this enum include primitive fixed size types as well as parametric or
+/// nested types.
 /// Currently the Rust implementation supports the following  nested types:
 ///  - `List<T>`
 ///  - `Struct<T, U, V, ...>`
@@ -40,7 +42,7 @@ use serde_json::Value;
 /// Nested types can themselves be nested within other arrays.
 /// For more information on these types please see
 /// [here](https://arrow.apache.org/docs/memory_layout.html).
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)]
 pub enum DataType {
     Boolean,
     Int8,
@@ -54,15 +56,40 @@ pub enum DataType {
     Float16,
     Float32,
     Float64,
+    Timestamp(TimeUnit),
+    Date(DateUnit),
+    Time32(TimeUnit),
+    Time64(TimeUnit),
+    Interval(IntervalUnit),
     Utf8,
     List(Box<DataType>),
     Struct(Vec<Field>),
 }
 
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)]
+pub enum DateUnit {
+    Day,
+    Millisecond,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)]
+pub enum TimeUnit {
+    Second,
+    Millisecond,
+    Microsecond,
+    Nanosecond,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)]
+pub enum IntervalUnit {
+    YearMonth,
+    DayTime,
+}
+
 /// Contains the meta-data for a single relative type.
 ///
 /// The `Schema` object is an ordered collection of `Field` objects.
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)]
 pub struct Field {
     name: String,
     data_type: DataType,
@@ -81,10 +108,15 @@ pub trait ArrowPrimitiveType: 'static {
 
     /// Returns the bit width of this primitive type.
     fn get_bit_width() -> usize;
+
+    /// Returns a default value of this primitive type.
+    ///
+    /// This is useful for aggregate array ops like `sum()`, `mean()`.
+    fn default_value() -> Self::Native;
 }
 
 macro_rules! make_type {
-    ($name:ident, $native_ty:ty, $data_ty:path, $bit_width:expr) => {
+    ($name:ident, $native_ty:ty, $data_ty:path, $bit_width:expr, $default_val:expr) => {
         impl ArrowNativeType for $native_ty {}
 
         pub struct $name {}
@@ -99,21 +131,25 @@ macro_rules! make_type {
             fn get_bit_width() -> usize {
                 $bit_width
             }
+
+            fn default_value() -> Self::Native {
+                $default_val
+            }
         }
     };
 }
 
-make_type!(BooleanType, bool, DataType::Boolean, 1);
-make_type!(Int8Type, i8, DataType::Int8, 8);
-make_type!(Int16Type, i16, DataType::Int16, 16);
-make_type!(Int32Type, i32, DataType::Int32, 32);
-make_type!(Int64Type, i64, DataType::Int64, 64);
-make_type!(UInt8Type, u8, DataType::UInt8, 8);
-make_type!(UInt16Type, u16, DataType::UInt16, 16);
-make_type!(UInt32Type, u32, DataType::UInt32, 32);
-make_type!(UInt64Type, u64, DataType::UInt64, 64);
-make_type!(Float32Type, f32, DataType::Float32, 32);
-make_type!(Float64Type, f64, DataType::Float64, 64);
+make_type!(BooleanType, bool, DataType::Boolean, 1, false);
+make_type!(Int8Type, i8, DataType::Int8, 8, 0i8);
+make_type!(Int16Type, i16, DataType::Int16, 16, 0i16);
+make_type!(Int32Type, i32, DataType::Int32, 32, 0i32);
+make_type!(Int64Type, i64, DataType::Int64, 64, 0i64);
+make_type!(UInt8Type, u8, DataType::UInt8, 8, 0u8);
+make_type!(UInt16Type, u16, DataType::UInt16, 16, 0u16);
+make_type!(UInt32Type, u32, DataType::UInt32, 32, 0u32);
+make_type!(UInt64Type, u64, DataType::UInt64, 64, 0u64);
+make_type!(Float32Type, f32, DataType::Float32, 32, 0.0f32);
+make_type!(Float64Type, f64, DataType::Float64, 64, 0.0f64);
 
 /// A subtype of primitive type that represents numeric values.
 pub trait ArrowNumericType: ArrowPrimitiveType {}
@@ -164,6 +200,59 @@ impl DataType {
                         "floatingpoint precision missing or invalid".to_string(),
                     )),
                 },
+                Some(s) if s == "timestamp" => match map.get("unit") {
+                    Some(p) if p == "SECOND" => Ok(DataType::Timestamp(TimeUnit::Second)),
+                    Some(p) if p == "MILLISECOND" => {
+                        Ok(DataType::Timestamp(TimeUnit::Millisecond))
+                    }
+                    Some(p) if p == "MICROSECOND" => {
+                        Ok(DataType::Timestamp(TimeUnit::Microsecond))
+                    }
+                    Some(p) if p == "NANOSECOND" => {
+                        Ok(DataType::Timestamp(TimeUnit::Nanosecond))
+                    }
+                    _ => Err(ArrowError::ParseError(
+                        "timestamp unit missing or invalid".to_string(),
+                    )),
+                },
+                Some(s) if s == "date" => match map.get("unit") {
+                    Some(p) if p == "DAY" => Ok(DataType::Date(DateUnit::Day)),
+                    Some(p) if p == "MILLISECOND" => {
+                        Ok(DataType::Date(DateUnit::Millisecond))
+                    }
+                    _ => Err(ArrowError::ParseError(
+                        "date unit missing or invalid".to_string(),
+                    )),
+                },
+                Some(s) if s == "time" => {
+                    let unit = match map.get("unit") {
+                        Some(p) if p == "SECOND" => Ok(TimeUnit::Second),
+                        Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond),
+                        Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond),
+                        Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond),
+                        _ => Err(ArrowError::ParseError(
+                            "time unit missing or invalid".to_string(),
+                        )),
+                    };
+                    match map.get("bitWidth") {
+                        Some(p) if p == "32" => Ok(DataType::Time32(unit?)),
+                        Some(p) if p == "64" => Ok(DataType::Time32(unit?)),
+                        _ => Err(ArrowError::ParseError(
+                            "time bitWidth missing or invalid".to_string(),
+                        )),
+                    }
+                }
+                Some(s) if s == "interval" => match map.get("unit") {
+                    Some(p) if p == "DAY_TIME" => {
+                        Ok(DataType::Interval(IntervalUnit::DayTime))
+                    }
+                    Some(p) if p == "YEAR_MONTH" => {
+                        Ok(DataType::Interval(IntervalUnit::YearMonth))
+                    }
+                    _ => Err(ArrowError::ParseError(
+                        "interval unit missing or invalid".to_string(),
+                    )),
+                },
                 Some(s) if s == "int" => match map.get("isSigned") {
                     Some(&Value::Bool(true)) => match map.get("bitWidth") {
                         Some(&Value::Number(ref n)) => match n.as_u64() {
@@ -220,7 +309,7 @@ impl DataType {
 
     /// Generate a JSON representation of the data type
     pub fn to_json(&self) -> Value {
-        match *self {
+        match self {
             DataType::Boolean => json!({"name": "bool"}),
             DataType::Int8 => json!({"name": "int", "bitWidth": 8, "isSigned": true}),
             DataType::Int16 => json!({"name": "int", "bitWidth": 16, "isSigned": true}),
@@ -235,14 +324,45 @@ impl DataType {
             DataType::Float64 => json!({"name": "floatingpoint", "precision": "DOUBLE"}),
             DataType::Utf8 => json!({"name": "utf8"}),
             DataType::Struct(ref fields) => {
-                let field_json_array =
-                    Value::Array(fields.iter().map(|f| f.to_json()).collect::<Vec<Value>>());
+                let field_json_array = Value::Array(
+                    fields.iter().map(|f| f.to_json()).collect::<Vec<Value>>(),
+                );
                 json!({ "fields": field_json_array })
             }
             DataType::List(ref t) => {
                 let child_json = t.to_json();
                 json!({ "name": "list", "children": child_json })
             }
+            DataType::Time32(unit) => {
+                json!({"name": "time", "bitWidth": "32", "unit": match unit {
+                    TimeUnit::Second => "SECOND",
+                    TimeUnit::Millisecond => "MILLISECOND",
+                    TimeUnit::Microsecond => "MICROSECOND",
+                    TimeUnit::Nanosecond => "NANOSECOND",
+                }})
+            }
+            DataType::Time64(unit) => {
+                json!({"name": "time", "bitWidth": "64", "unit": match unit {
+                    TimeUnit::Second => "SECOND",
+                    TimeUnit::Millisecond => "MILLISECOND",
+                    TimeUnit::Microsecond => "MICROSECOND",
+                    TimeUnit::Nanosecond => "NANOSECOND",
+                }})
+            }
+            DataType::Date(unit) => json!({"name": "date", "unit": match unit {
+                DateUnit::Day => "DAY",
+                DateUnit::Millisecond => "MILLISECOND",
+            }}),
+            DataType::Timestamp(unit) => json!({"name": "timestamp", "unit": match unit {
+                TimeUnit::Second => "SECOND",
+                TimeUnit::Millisecond => "MILLISECOND",
+                TimeUnit::Microsecond => "MICROSECOND",
+                TimeUnit::Nanosecond => "NANOSECOND",
+            }}),
+            DataType::Interval(unit) => json!({"name": "interval", "unit": match unit {
+                IntervalUnit::YearMonth => "YEAR_MONTH",
+                IntervalUnit::DayTime => "DAY_TIME",
+            }}),
         }
     }
 }
@@ -281,7 +401,7 @@ impl Field {
                     _ => {
                         return Err(ArrowError::ParseError(
                             "Field missing 'name' attribute".to_string(),
-                        ))
+                        ));
                     }
                 };
                 let nullable = match map.get("nullable") {
@@ -289,7 +409,7 @@ impl Field {
                     _ => {
                         return Err(ArrowError::ParseError(
                             "Field missing 'nullable' attribute".to_string(),
-                        ))
+                        ));
                     }
                 };
                 let data_type = match map.get("type") {
@@ -297,7 +417,7 @@ impl Field {
                     _ => {
                         return Err(ArrowError::ParseError(
                             "Field missing 'type' attribute".to_string(),
-                        ))
+                        ));
                     }
                 };
                 Ok(Field {
@@ -335,11 +455,11 @@ impl fmt::Display for Field {
 
 /// Describes the meta-data of an ordered sequence of relative types.
 ///
-/// Note that this information is only part of the meta-data and not part of the physical memory
-/// layout.
+/// Note that this information is only part of the meta-data and not part of the physical
+/// memory layout.
 #[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct Schema {
-    fields: Vec<Field>,
+    pub(crate) fields: Vec<Field>,
 }
 
 impl Schema {
@@ -369,8 +489,8 @@ impl Schema {
         &self.fields
     }
 
-    /// Returns an immutable reference of a specific `Field` instance selected using an offset
-    /// within the internal `fields` vector
+    /// Returns an immutable reference of a specific `Field` instance selected using an
+    /// offset within the internal `fields` vector
     pub fn field(&self, i: usize) -> &Field {
         &self.fields[i]
     }
@@ -383,6 +503,13 @@ impl Schema {
             .enumerate()
             .find(|&(_, c)| c.name == name)
     }
+
+    /// Generate a JSON representation of the `Field`
+    pub fn to_json(&self) -> Value {
+        json!({
+            "fields": self.fields.iter().map(|field| field.to_json()).collect::<Vec<Value>>(),
+        })
+    }
 }
 
 impl fmt::Display for Schema {
@@ -517,6 +644,51 @@ mod tests {
         assert_eq!(DataType::Int32, dt);
     }
 
+    #[test]
+    fn schema_json() {
+        let schema = Schema::new(vec![
+            Field::new("c1", DataType::Utf8, false),
+            Field::new("c2", DataType::Date(DateUnit::Day), false),
+            Field::new("c3", DataType::Date(DateUnit::Millisecond), false),
+            Field::new("c7", DataType::Time32(TimeUnit::Second), false),
+            Field::new("c8", DataType::Time32(TimeUnit::Millisecond), false),
+            Field::new("c9", DataType::Time32(TimeUnit::Microsecond), false),
+            Field::new("c10", DataType::Time32(TimeUnit::Nanosecond), false),
+            Field::new("c11", DataType::Time64(TimeUnit::Second), false),
+            Field::new("c12", DataType::Time64(TimeUnit::Millisecond), false),
+            Field::new("c13", DataType::Time64(TimeUnit::Microsecond), false),
+            Field::new("c14", DataType::Time64(TimeUnit::Nanosecond), false),
+            Field::new("c15", DataType::Timestamp(TimeUnit::Second), false),
+            Field::new("c16", DataType::Timestamp(TimeUnit::Millisecond), false),
+            Field::new("c17", DataType::Timestamp(TimeUnit::Microsecond), false),
+            Field::new("c18", DataType::Timestamp(TimeUnit::Nanosecond), false),
+            Field::new("c19", DataType::Interval(IntervalUnit::DayTime), false),
+            Field::new("c20", DataType::Interval(IntervalUnit::YearMonth), false),
+            Field::new(
+                "c21",
+                DataType::Struct(vec![
+                    Field::new("a", DataType::Utf8, false),
+                    Field::new("b", DataType::UInt16, false),
+                ]),
+                false,
+            ),
+        ]);
+
+        let json = schema.to_json().to_string();
+        assert_eq!(json, "{\"fields\":[{\"name\":\"c1\",\"nullable\":false,\"type\":{\"name\":\"utf8\"}},{\"name\":\"c2\",\"nullable\":false,\"type\":{\"name\":\"date\",\"unit\":\"DAY\"}},{\"name\":\"c3\",\"nullable\":false,\"type\":{\"name\":\"date\",\"unit\":\"MILLISECOND\"}},{\"name\":\"c7\",\"nullable\":false,\"type\":{\"bitWidth\":\"32\",\"name\":\"time\",\"unit\":\"SECOND\"}},{\"name\":\"c8\",\"nullable\":false,\"type\":{\"bitWidth\":\"32\",\"name\":\"time\",\"unit\":\"MILLISECOND\"}},{\"name\":\"c9\",\"nullable\":false,\"type\":{\"bitWidth\":\"32\",\"name\":\"time\",\"unit\":\"MICROSECOND\"}},{\"name\":\"c10\",\"nullable\":false,\"type\":{\"bitWidth\":\"32\",\"name\":\"time\",\"unit\":\"NANOSECOND\"}},{\"name\":\"c11\",\"nullable\":false,\"type\":{\"bitWidth\":\"64\",\"name\":\"time\",\"unit\":\"SECOND\"}},{\"name\":\"c12\",\"nullable\":false,\"type\":{\"bitWidth\":\"64\",\"name\":\"time\",\"unit\":\"MILLISECOND\"}},{\"name\":\"c13\",\"nullable\":false,\"type\":{\"bitWidth\":\"64\",\"name\":\"time\",\"unit\":\"MICROSECOND\"}},{\"name\":\"c14\",\"nullable\":false,\"type\":{\"bitWidth\":\"64\",\"name\":\"time\",\"unit\":\"NANOSECOND\"}},{\"name\":\"c15\",\"nullable\":false,\"type\":{\"name\":\"timestamp\",\"unit\":\"SECOND\"}},{\"name\":\"c16\",\"nullable\":false,\"type\":{\"name\":\"timestamp\",\"unit\":\"MILLISECOND\"}},{\"name\":\"c17\",\"nullable\":false,\"type\":{\"name\":\"timestamp\",\"unit\":\"MICROSECOND\"}},{\"name\":\"c18\",\"nullable\":false,\"type\":{\"name\":\"timestamp\",\"unit\":\"NANOSECOND\"}},{\"name\":\"c19\",\"nullable\":false,\"type\":{\"name\":\"interval\",\"unit\":\"DAY_TIME\"}},{\"name\":\"c20\",\"nullable\":false,\"type\":{\"name\":\"interval\",\"unit\":\"YEAR_MONTH\"}},{\"name\":\"c21\",\"nullable\":false,\"type\":{\"fields\":[{\"name\":\"a\",\"nullable\":false,\"type\":{\"name\":\"utf8\"}},{\"name\":\"b\",\"nullable\":false,\"type\":{\"bitWidth\":16,\"isSigned\":false,\"name\":\"int\"}}]}}]}");
+
+        // convert back to a schema
+        let value: Value = serde_json::from_str(&json).unwrap();
+        let schema2 = DataType::from(&value).unwrap();
+
+        match schema2 {
+            DataType::Struct(fields) => {
+                assert_eq!(schema.fields().len(), fields.len());
+            }
+            _ => panic!(),
+        }
+    }
+
     #[test]
     fn create_schema_string() {
         let _person = Schema::new(vec![
diff --git a/rust/arrow/src/error.rs b/rust/arrow/src/error.rs
new file mode 100644
index 0000000000000..58204a362d731
--- /dev/null
+++ b/rust/arrow/src/error.rs
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Defines `ArrowError` for representing failures in various Arrow operations
+use std::error::Error;
+
+use csv as csv_crate;
+
+/// Many different operations in the `arrow` crate return this error type
+#[derive(Debug, Clone, PartialEq)]
+pub enum ArrowError {
+    MemoryError(String),
+    ParseError(String),
+    ComputeError(String),
+    DivideByZero,
+    CsvError(String),
+    IoError(String),
+}
+
+impl From<::std::io::Error> for ArrowError {
+    fn from(error: ::std::io::Error) -> Self {
+        ArrowError::IoError(error.description().to_string())
+    }
+}
+
+impl From<csv_crate::Error> for ArrowError {
+    fn from(error: csv_crate::Error) -> Self {
+        match error.kind() {
+            csv_crate::ErrorKind::Io(error) => {
+                ArrowError::CsvError(error.description().to_string())
+            }
+            csv_crate::ErrorKind::Utf8 { pos: _, err } => ArrowError::CsvError(format!(
+                "Encountered UTF-8 error while reading CSV file: {:?}",
+                err.description()
+            )),
+            csv_crate::ErrorKind::UnequalLengths {
+                pos: _,
+                expected_len,
+                len,
+            } => ArrowError::CsvError(format!(
+                "Encountered unequal lengths between records on CSV file. Expected {} \
+                 records, found {} records",
+                len, expected_len
+            )),
+            _ => ArrowError::CsvError("Error reading CSV file".to_string()),
+        }
+    }
+}
+
+pub type Result<T> = ::std::result::Result<T, ArrowError>;
diff --git a/rust/arrow/src/lib.rs b/rust/arrow/src/lib.rs
new file mode 100644
index 0000000000000..dbac4db115165
--- /dev/null
+++ b/rust/arrow/src/lib.rs
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! A native Rust implementation of [Apache Arrow](https://arrow.apache.org), a cross-language
+//! development platform for in-memory data.
+//!
+//! Currently the project is developed and tested against nightly Rust. To learn more
+//! about the status of Arrow in Rust, see `README.md`.
+
+#![feature(type_ascription)]
+#![feature(rustc_private)]
+#![feature(specialization)]
+#![feature(try_from)]
+#![allow(dead_code)]
+#![allow(non_camel_case_types)]
+
+pub mod array;
+pub mod array_data;
+pub mod array_ops;
+pub mod bitmap;
+pub mod buffer;
+pub mod builder;
+pub mod csv;
+pub mod datatypes;
+pub mod error;
+pub mod memory;
+pub mod record_batch;
+pub mod tensor;
+pub mod util;
diff --git a/rust/src/memory.rs b/rust/arrow/src/memory.rs
similarity index 87%
rename from rust/src/memory.rs
rename to rust/arrow/src/memory.rs
index 376499e9c217a..4e9ed98cc90bc 100644
--- a/rust/src/memory.rs
+++ b/rust/arrow/src/memory.rs
@@ -15,11 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Defines memory-related functions, currently mostly to make this library play nicely
+//! with C.
+
 use libc;
 use std::cmp;
 use std::mem;
 
-use super::error::{ArrowError, Result};
+use crate::error::{ArrowError, Result};
 
 const ALIGNMENT: usize = 64;
 
@@ -31,8 +34,9 @@ extern "C" {
 }
 
 #[cfg(windows)]
-pub fn allocate_aligned(size: i64) -> Result<*mut u8> {
-    let page = unsafe { _aligned_malloc(size as libc::size_t, ALIGNMENT as libc::size_t) };
+pub fn allocate_aligned(size: usize) -> Result<*mut u8> {
+    let page =
+        unsafe { _aligned_malloc(size as libc::size_t, ALIGNMENT as libc::size_t) };
     match page {
         0 => Err(ArrowError::MemoryError(
             "Failed to allocate memory".to_string(),
@@ -42,10 +46,10 @@ pub fn allocate_aligned(size: i64) -> Result<*mut u8> {
 }
 
 #[cfg(not(windows))]
-pub fn allocate_aligned(size: i64) -> Result<*mut u8> {
+pub fn allocate_aligned(size: usize) -> Result<*mut u8> {
     unsafe {
         let mut page: *mut libc::c_void = mem::uninitialized();
-        let result = libc::posix_memalign(&mut page, ALIGNMENT, size as usize);
+        let result = libc::posix_memalign(&mut page, ALIGNMENT, size);
         match result {
             0 => Ok(mem::transmute::<*mut libc::c_void, *mut u8>(page)),
             _ => Err(ArrowError::MemoryError(
@@ -69,10 +73,14 @@ pub fn free_aligned(p: *const u8) {
     }
 }
 
-pub fn reallocate(old_size: usize, new_size: usize, pointer: *const u8) -> Result<*const u8> {
+pub fn reallocate(
+    old_size: usize,
+    new_size: usize,
+    pointer: *const u8,
+) -> Result<*const u8> {
     unsafe {
         let old_src = mem::transmute::<*const u8, *mut libc::c_void>(pointer);
-        let result = allocate_aligned(new_size as i64)?;
+        let result = allocate_aligned(new_size)?;
         let dst = mem::transmute::<*const u8, *mut libc::c_void>(result);
         libc::memcpy(dst, old_src, cmp::min(old_size, new_size));
         free_aligned(pointer);
diff --git a/rust/src/lib.rs b/rust/arrow/src/mod.rs
similarity index 80%
rename from rust/src/lib.rs
rename to rust/arrow/src/mod.rs
index b2db090cf7c87..b9fa43ab8184b 100644
--- a/rust/src/lib.rs
+++ b/rust/arrow/src/mod.rs
@@ -15,22 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#![feature(specialization)]
-
-extern crate bytes;
-extern crate csv as csv_crate;
-extern crate libc;
-
-#[macro_use]
-extern crate serde_derive;
-
-#[macro_use]
-extern crate serde_json;
-
-extern crate serde;
-
-extern crate rand;
-
 pub mod array;
 pub mod array_data;
 pub mod bitmap;
@@ -42,4 +26,3 @@ pub mod error;
 pub mod memory;
 pub mod record_batch;
 pub mod tensor;
-pub mod util;
diff --git a/rust/src/record_batch.rs b/rust/arrow/src/record_batch.rs
similarity index 84%
rename from rust/src/record_batch.rs
rename to rust/arrow/src/record_batch.rs
index cde1122aadc0a..a2bbd8b553ff0 100644
--- a/rust/src/record_batch.rs
+++ b/rust/arrow/src/record_batch.rs
@@ -15,10 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use super::array::*;
-use super::datatypes::*;
+//! According to the [Arrow Metadata Specification](https://arrow.apache.org/docs/metadata.html):
+//!
+//! > A record batch is a collection of top-level named, equal length Arrow arrays
+//! > (or vectors). If one of the arrays contains nested data, its child arrays are not
+//! > required to be the same length as the top-level arrays.
+
 use std::sync::Arc;
 
+use crate::array::*;
+use crate::datatypes::*;
+
 /// A batch of column-oriented data
 pub struct RecordBatch {
     schema: Arc<Schema>,
@@ -52,7 +59,7 @@ impl RecordBatch {
         self.columns.len()
     }
 
-    pub fn num_rows(&self) -> i64 {
+    pub fn num_rows(&self) -> usize {
         self.columns[0].data().len()
     }
 
@@ -67,8 +74,9 @@ unsafe impl Sync for RecordBatch {}
 #[cfg(test)]
 mod tests {
     use super::*;
-    use array_data::*;
-    use buffer::*;
+
+    use crate::array_data::*;
+    use crate::buffer::*;
 
     #[test]
     fn create_record_batch() {
@@ -93,7 +101,8 @@ mod tests {
             .build();
         let b = BinaryArray::from(array_data);
 
-        let record_batch = RecordBatch::new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]);
+        let record_batch =
+            RecordBatch::new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]);
 
         assert_eq!(5, record_batch.num_rows());
         assert_eq!(2, record_batch.num_columns());
diff --git a/rust/src/tensor.rs b/rust/arrow/src/tensor.rs
similarity index 79%
rename from rust/src/tensor.rs
rename to rust/arrow/src/tensor.rs
index e50a3136d2ba1..66572286a6198 100644
--- a/rust/src/tensor.rs
+++ b/rust/arrow/src/tensor.rs
@@ -15,38 +15,40 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Arrow Tensor Type
+//! Arrow Tensor Type, defined in
+//! [`format/Tensor.fbs`](https://github.com/apache/arrow/blob/master/format/Tensor.fbs).
+
 use std::marker::PhantomData;
 use std::mem;
 
-use buffer::Buffer;
-use datatypes::*;
+use crate::buffer::Buffer;
+use crate::datatypes::*;
 
 /// Computes the strides required assuming a row major memory layout
-fn compute_row_major_strides<T: ArrowPrimitiveType>(shape: &Vec<i64>) -> Vec<i64> {
+fn compute_row_major_strides<T: ArrowPrimitiveType>(shape: &Vec<usize>) -> Vec<usize> {
     let mut remaining_bytes = mem::size_of::<T::Native>();
     for i in shape {
         remaining_bytes = remaining_bytes
-            .checked_mul(*i as usize)
+            .checked_mul(*i)
             .expect("Overflow occurred when computing row major strides.");
     }
 
-    let mut strides = Vec::<i64>::new();
+    let mut strides = Vec::<usize>::new();
     for i in shape {
-        remaining_bytes /= *i as usize;
-        strides.push(remaining_bytes as i64);
+        remaining_bytes /= *i;
+        strides.push(remaining_bytes);
     }
     strides
 }
 
 /// Computes the strides required assuming a column major memory layout
-fn compute_column_major_strides<T: ArrowPrimitiveType>(shape: &Vec<i64>) -> Vec<i64> {
+fn compute_column_major_strides<T: ArrowPrimitiveType>(shape: &Vec<usize>) -> Vec<usize> {
     let mut remaining_bytes = mem::size_of::<T::Native>();
-    let mut strides = Vec::<i64>::new();
+    let mut strides = Vec::<usize>::new();
     for i in shape {
-        strides.push(remaining_bytes as i64);
+        strides.push(remaining_bytes);
         remaining_bytes = remaining_bytes
-            .checked_mul(*i as usize)
+            .checked_mul(*i)
             .expect("Overflow occurred when computing column major strides.");
     }
     strides
@@ -56,8 +58,8 @@ fn compute_column_major_strides<T: ArrowPrimitiveType>(shape: &Vec<i64>) -> Vec<
 pub struct Tensor<'a, T: ArrowPrimitiveType> {
     data_type: DataType,
     buffer: Buffer,
-    shape: Option<Vec<i64>>,
-    strides: Option<Vec<i64>>,
+    shape: Option<Vec<usize>>,
+    strides: Option<Vec<usize>>,
     names: Option<Vec<&'a str>>,
     _marker: PhantomData<T>,
 }
@@ -78,8 +80,8 @@ impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> {
     /// Creates a new `Tensor`
     pub fn new(
         buffer: Buffer,
-        shape: Option<Vec<i64>>,
-        strides: Option<Vec<i64>>,
+        shape: Option<Vec<usize>>,
+        strides: Option<Vec<usize>>,
         names: Option<Vec<&'a str>>,
     ) -> Self {
         match &shape {
@@ -95,7 +97,9 @@ impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> {
             Some(ref s) => {
                 strides
                     .iter()
-                    .map(|i| assert_eq!(s.len(), i.len(), "shape and stride dimensions differ"))
+                    .map(|i| {
+                        assert_eq!(s.len(), i.len(), "shape and stride dimensions differ")
+                    })
                     .next();
                 names
                     .iter()
@@ -122,7 +126,7 @@ impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> {
     /// Creates a new Tensor using row major memory layout
     pub fn new_row_major(
         buffer: Buffer,
-        shape: Option<Vec<i64>>,
+        shape: Option<Vec<usize>>,
         names: Option<Vec<&'a str>>,
     ) -> Self {
         let strides = match &shape {
@@ -135,7 +139,7 @@ impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> {
     /// Creates a new Tensor using column major memory layout
     pub fn new_column_major(
         buffer: Buffer,
-        shape: Option<Vec<i64>>,
+        shape: Option<Vec<usize>>,
         names: Option<Vec<&'a str>>,
     ) -> Self {
         let strides = match &shape {
@@ -151,7 +155,7 @@ impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> {
     }
 
     /// The sizes of the dimensions
-    pub fn shape(&self) -> Option<&Vec<i64>> {
+    pub fn shape(&self) -> Option<&Vec<usize>> {
         self.shape.as_ref()
     }
 
@@ -161,7 +165,7 @@ impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> {
     }
 
     /// The number of bytes between elements in each dimension
-    pub fn strides(&self) -> Option<&Vec<i64>> {
+    pub fn strides(&self) -> Option<&Vec<usize>> {
         self.strides.as_ref()
     }
 
@@ -171,24 +175,24 @@ impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> {
     }
 
     /// The number of dimensions
-    pub fn ndim(&self) -> i64 {
+    pub fn ndim(&self) -> usize {
         match &self.shape {
             None => 0,
-            Some(v) => v.len() as i64,
+            Some(v) => v.len(),
         }
     }
 
     /// The name of dimension i
-    pub fn dim_name(&self, i: i64) -> Option<&'a str> {
+    pub fn dim_name(&self, i: usize) -> Option<&'a str> {
         match &self.names {
             None => None,
-            Some(ref names) => Some(&names[i as usize]),
+            Some(ref names) => Some(&names[i]),
         }
     }
 
     /// The total number of elements in the `Tensor`
-    pub fn size(&self) -> i64 {
-        (self.buffer.len() / mem::size_of::<T::Native>()) as i64
+    pub fn size(&self) -> usize {
+        (self.buffer.len() / mem::size_of::<T::Native>())
     }
 
     /// Indicates if the data is laid out contiguously in memory
@@ -216,22 +220,23 @@ impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use buffer::Buffer;
-    use builder::*;
+
+    use crate::buffer::Buffer;
+    use crate::builder::*;
 
     #[test]
     fn test_compute_row_major_strides() {
         assert_eq!(
             vec![48, 8],
-            compute_row_major_strides::<Int64Type>(&vec![4_i64, 6])
+            compute_row_major_strides::<Int64Type>(&vec![4_usize, 6])
         );
         assert_eq!(
             vec![24, 4],
-            compute_row_major_strides::<Int32Type>(&vec![4_i64, 6])
+            compute_row_major_strides::<Int32Type>(&vec![4_usize, 6])
         );
         assert_eq!(
             vec![6, 1],
-            compute_row_major_strides::<Int8Type>(&vec![4_i64, 6])
+            compute_row_major_strides::<Int8Type>(&vec![4_usize, 6])
         );
     }
 
@@ -239,15 +244,15 @@ mod tests {
     fn test_compute_column_major_strides() {
         assert_eq!(
             vec![8, 32],
-            compute_column_major_strides::<Int64Type>(&vec![4_i64, 6])
+            compute_column_major_strides::<Int64Type>(&vec![4_usize, 6])
         );
         assert_eq!(
             vec![4, 16],
-            compute_column_major_strides::<Int32Type>(&vec![4_i64, 6])
+            compute_column_major_strides::<Int32Type>(&vec![4_usize, 6])
         );
         assert_eq!(
             vec![1, 4],
-            compute_column_major_strides::<Int8Type>(&vec![4_i64, 6])
+            compute_column_major_strides::<Int8Type>(&vec![4_usize, 6])
         );
     }
 
@@ -278,12 +283,12 @@ mod tests {
     fn test_tensor() {
         let mut builder = Int32BufferBuilder::new(16);
         for i in 0..16 {
-            builder.push(i).unwrap();
+            builder.append(i).unwrap();
         }
         let buf = builder.finish();
         let tensor = Int32Tensor::new(buf, Some(vec![2, 8]), None, None);
         assert_eq!(16, tensor.size());
-        assert_eq!(Some(vec![2_i64, 8]).as_ref(), tensor.shape());
+        assert_eq!(Some(vec![2_usize, 8]).as_ref(), tensor.shape());
         assert_eq!(None, tensor.strides());
         assert_eq!(2, tensor.ndim());
         assert_eq!(None, tensor.names());
@@ -293,13 +298,13 @@ mod tests {
     fn test_new_row_major() {
         let mut builder = Int32BufferBuilder::new(16);
         for i in 0..16 {
-            builder.push(i).unwrap();
+            builder.append(i).unwrap();
         }
         let buf = builder.finish();
         let tensor = Int32Tensor::new_row_major(buf, Some(vec![2, 8]), None);
         assert_eq!(16, tensor.size());
-        assert_eq!(Some(vec![2_i64, 8]).as_ref(), tensor.shape());
-        assert_eq!(Some(vec![32_i64, 4]).as_ref(), tensor.strides());
+        assert_eq!(Some(vec![2_usize, 8]).as_ref(), tensor.shape());
+        assert_eq!(Some(vec![32_usize, 4]).as_ref(), tensor.strides());
         assert_eq!(None, tensor.names());
         assert_eq!(2, tensor.ndim());
         assert_eq!(true, tensor.is_row_major());
@@ -311,13 +316,13 @@ mod tests {
     fn test_new_column_major() {
         let mut builder = Int32BufferBuilder::new(16);
         for i in 0..16 {
-            builder.push(i).unwrap();
+            builder.append(i).unwrap();
         }
         let buf = builder.finish();
         let tensor = Int32Tensor::new_column_major(buf, Some(vec![2, 8]), None);
         assert_eq!(16, tensor.size());
-        assert_eq!(Some(vec![2_i64, 8]).as_ref(), tensor.shape());
-        assert_eq!(Some(vec![4_i64, 8]).as_ref(), tensor.strides());
+        assert_eq!(Some(vec![2_usize, 8]).as_ref(), tensor.shape());
+        assert_eq!(Some(vec![4_usize, 8]).as_ref(), tensor.strides());
         assert_eq!(None, tensor.names());
         assert_eq!(2, tensor.ndim());
         assert_eq!(false, tensor.is_row_major());
@@ -329,14 +334,14 @@ mod tests {
     fn test_with_names() {
         let mut builder = Int64BufferBuilder::new(8);
         for i in 0..8 {
-            builder.push(i).unwrap();
+            builder.append(i).unwrap();
         }
         let buf = builder.finish();
         let names = vec!["Dim 1", "Dim 2"];
         let tensor = Int64Tensor::new_column_major(buf, Some(vec![2, 4]), Some(names));
         assert_eq!(8, tensor.size());
-        assert_eq!(Some(vec![2_i64, 4]).as_ref(), tensor.shape());
-        assert_eq!(Some(vec![8_i64, 16]).as_ref(), tensor.strides());
+        assert_eq!(Some(vec![2_usize, 4]).as_ref(), tensor.shape());
+        assert_eq!(Some(vec![8_usize, 16]).as_ref(), tensor.strides());
         assert_eq!("Dim 1", tensor.dim_name(0).unwrap());
         assert_eq!("Dim 2", tensor.dim_name(1).unwrap());
         assert_eq!(2, tensor.ndim());
@@ -350,18 +355,20 @@ mod tests {
     fn test_inconsistent_strides() {
         let mut builder = Int32BufferBuilder::new(16);
         for i in 0..16 {
-            builder.push(i).unwrap();
+            builder.append(i).unwrap();
         }
         let buf = builder.finish();
         Int32Tensor::new(buf, Some(vec![2, 8]), Some(vec![2, 8, 1]), None);
     }
 
     #[test]
-    #[should_panic(expected = "number of dimensions and number of dimension names differ")]
+    #[should_panic(
+        expected = "number of dimensions and number of dimension names differ"
+    )]
     fn test_inconsistent_names() {
         let mut builder = Int32BufferBuilder::new(16);
         for i in 0..16 {
-            builder.push(i).unwrap();
+            builder.append(i).unwrap();
         }
         let buf = builder.finish();
         Int32Tensor::new(
diff --git a/rust/src/util/bit_util.rs b/rust/arrow/src/util/bit_util.rs
similarity index 88%
rename from rust/src/util/bit_util.rs
rename to rust/arrow/src/util/bit_util.rs
index da6d10d269ca2..4674783b092f7 100644
--- a/rust/src/util/bit_util.rs
+++ b/rust/arrow/src/util/bit_util.rs
@@ -15,28 +15,32 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Utils for working with bits
+
 static BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128];
 
 static POPCOUNT_TABLE: [u8; 256] = [
-    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
+    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4,
+    3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5,
+    3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4,
+    3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4,
+    3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
+    3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4,
+    3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7,
+    6, 7, 7, 8,
 ];
 
 /// Returns the nearest number that is `>=` than `num` and is a multiple of 64
 #[inline]
-pub fn round_upto_multiple_of_64(num: i64) -> i64 {
+pub fn round_upto_multiple_of_64(num: usize) -> usize {
     round_upto_power_of_2(num, 64)
 }
 
 /// Returns the nearest multiple of `factor` that is `>=` than `num`. Here `factor` must
 /// be a power of 2.
-fn round_upto_power_of_2(num: i64, factor: i64) -> i64 {
+fn round_upto_power_of_2(num: usize, factor: usize) -> usize {
     debug_assert!(factor > 0 && (factor & (factor - 1)) == 0);
     (num + (factor - 1)) & !(factor - 1)
 }
@@ -73,20 +77,20 @@ pub unsafe fn set_bit_raw(data: *mut u8, i: usize) {
 
 /// Returns the number of 1-bits in `data`
 #[inline]
-pub fn count_set_bits(data: &[u8]) -> i64 {
-    let mut count: i64 = 0;
+pub fn count_set_bits(data: &[u8]) -> usize {
+    let mut count: usize = 0;
     for u in data {
-        count += POPCOUNT_TABLE[*u as usize] as i64;
+        count += POPCOUNT_TABLE[*u as usize] as usize;
     }
     count
 }
 
 /// Returns the number of 1-bits in `data`, starting from `offset`.
 #[inline]
-pub fn count_set_bits_offset(data: &[u8], offset: usize) -> i64 {
+pub fn count_set_bits_offset(data: &[u8], offset: usize) -> usize {
     debug_assert!(offset <= (data.len() << 3));
 
-    let start_byte_pos = (offset >> 3) as usize;
+    let start_byte_pos = offset >> 3;
     let start_bit_pos = offset & 7;
 
     if start_bit_pos == 0 {
@@ -95,7 +99,7 @@ pub fn count_set_bits_offset(data: &[u8], offset: usize) -> i64 {
         let mut result = 0;
         result += count_set_bits(&data[start_byte_pos + 1..]);
         for i in start_bit_pos..8 {
-            if get_bit(&data[start_byte_pos..start_byte_pos + 1], i as usize) {
+            if get_bit(&data[start_byte_pos..start_byte_pos + 1], i) {
                 result += 1;
             }
         }
@@ -105,7 +109,7 @@ pub fn count_set_bits_offset(data: &[u8], offset: usize) -> i64 {
 
 /// Returns the ceil of `value`/`divisor`
 #[inline]
-pub fn ceil(value: i64, divisor: i64) -> i64 {
+pub fn ceil(value: usize, divisor: usize) -> usize {
     let mut result = value / divisor;
     if value % divisor != 0 {
         result += 1
diff --git a/rust/src/util/mod.rs b/rust/arrow/src/util/mod.rs
similarity index 100%
rename from rust/src/util/mod.rs
rename to rust/arrow/src/util/mod.rs
diff --git a/rust/src/util/test_util.rs b/rust/arrow/src/util/test_util.rs
similarity index 96%
rename from rust/src/util/test_util.rs
rename to rust/arrow/src/util/test_util.rs
index 4ef48e6003292..5d0e7b97480d5 100644
--- a/rust/src/util/test_util.rs
+++ b/rust/arrow/src/util/test_util.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Utils to make testing easier
+
 use rand::{thread_rng, Rng};
 
 /// Returns a vector of size `n`, filled with randomly generated bytes.
diff --git a/rust/arrow/test/data/null_test.csv b/rust/arrow/test/data/null_test.csv
new file mode 100644
index 0000000000000..7e0dde5371429
--- /dev/null
+++ b/rust/arrow/test/data/null_test.csv
@@ -0,0 +1,6 @@
+c_int,c_float,c_string,c_bool
+1,1.1,"1.11",True
+2,2.2,"2.22",TRUE
+3,,"3.33",true
+4,4.4,,False
+5,6.6,"",FALSE
\ No newline at end of file
diff --git a/rust/test/data/uk_cities.csv b/rust/arrow/test/data/uk_cities.csv
similarity index 100%
rename from rust/test/data/uk_cities.csv
rename to rust/arrow/test/data/uk_cities.csv
diff --git a/rust/arrow/test/data/uk_cities_with_headers.csv b/rust/arrow/test/data/uk_cities_with_headers.csv
new file mode 100644
index 0000000000000..92f5a17bdda38
--- /dev/null
+++ b/rust/arrow/test/data/uk_cities_with_headers.csv
@@ -0,0 +1,38 @@
+city,lat,lng
+"Elgin, Scotland, the UK",57.653484,-3.335724
+"Stoke-on-Trent, Staffordshire, the UK",53.002666,-2.179404
+"Solihull, Birmingham, UK",52.412811,-1.778197
+"Cardiff, Cardiff county, UK",51.481583,-3.179090
+"Eastbourne, East Sussex, UK",50.768036,0.290472
+"Oxford, Oxfordshire, UK",51.752022,-1.257677
+"London, UK",51.509865,-0.118092
+"Swindon, Swindon, UK",51.568535,-1.772232
+"Gravesend, Kent, UK",51.441883,0.370759
+"Northampton, Northamptonshire, UK",52.240479,-0.902656
+"Rugby, Warwickshire, UK",52.370876,-1.265032
+"Sutton Coldfield, West Midlands, UK",52.570385,-1.824042
+"Harlow, Essex, UK",51.772938,0.102310
+"Aberdeen, Aberdeen City, UK",57.149651,-2.099075
+"Swansea, Swansea, UK",51.621441,-3.943646
+"Chesterfield, Derbyshire, UK",53.235046,-1.421629
+"Londonderry, Derry, UK",55.006763,-7.318268
+"Salisbury, Wiltshire, UK",51.068787,-1.794472
+"Weymouth, Dorset, UK",50.614429,-2.457621
+"Wolverhampton, West Midlands, UK",52.591370,-2.110748
+"Preston, Lancashire, UK",53.765762,-2.692337
+"Bournemouth, UK",50.720806,-1.904755
+"Doncaster, South Yorkshire, UK",53.522820,-1.128462
+"Ayr, South Ayrshire, UK",55.458565,-4.629179
+"Hastings, East Sussex, UK",50.854259,0.573453
+"Bedford, UK",52.136436,-0.460739
+"Basildon, Essex, UK",51.572376,0.470009
+"Chippenham, Wiltshire, UK",51.458057,-2.116074
+"Belfast, UK",54.607868,-5.926437
+"Uckfield, East Sussex, UK",50.967941,0.085831
+"Worthing, West Sussex, UK",50.825024,-0.383835
+"Leeds, West Yorkshire, UK",53.801277,-1.548567
+"Kendal, Cumbria, UK",54.328506,-2.743870
+"Plymouth, UK",50.376289,-4.143841
+"Haverhill, Suffolk, UK",52.080875,0.444517
+"Frankton, Warwickshire, UK",52.328415,-1.377561
+"Inverness, the UK",57.477772,-4.224721
\ No newline at end of file
diff --git a/rust/arrow/test/data/various_types.csv b/rust/arrow/test/data/various_types.csv
new file mode 100644
index 0000000000000..322d9c347aaa6
--- /dev/null
+++ b/rust/arrow/test/data/various_types.csv
@@ -0,0 +1,6 @@
+c_int|c_float|c_string|c_bool
+1|1.1|"1.11"|true
+2|2.2|"2.22"|true
+3||"3.33"|true
+4|4.4||false
+5|6.6|""|false
\ No newline at end of file
diff --git a/rust/parquet/Cargo.toml b/rust/parquet/Cargo.toml
new file mode 100644
index 0000000000000..b8dfda5f5b3b4
--- /dev/null
+++ b/rust/parquet/Cargo.toml
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "parquet"
+version = "0.13.0-SNAPSHOT"
+license = "Apache-2.0"
+description = "Apache Parquet implementation in Rust"
+homepage = "https://github.com/apache/arrow"
+repository = "https://github.com/apache/arrow"
+authors = ["Apache Arrow <dev@arrow.apache.org>"]
+keywords = [ "arrow", "parquet", "hadoop" ]
+readme = "README.md"
+build = "build.rs"
+edition = "2018"
+
+[dependencies]
+parquet-format = "2.5.0"
+quick-error = "1.2.2"
+byteorder = "1"
+thrift = "0.0.4"
+snap = "0.2"
+brotli = "2.5"
+flate2 = "1.0.2"
+lz4 = "1.23"
+zstd = "0.4"
+chrono = "0.4"
+num-bigint = "0.2"
+arrow = { path = "../arrow" }
+
+[dev-dependencies]
+lazy_static = "1"
+rand = "0.5"
\ No newline at end of file
diff --git a/rust/parquet/README.md b/rust/parquet/README.md
new file mode 100644
index 0000000000000..c3960008ae456
--- /dev/null
+++ b/rust/parquet/README.md
@@ -0,0 +1,114 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# An Apache Parquet implementation in Rust
+
+## Usage
+Add this to your Cargo.toml:
+```toml
+[dependencies]
+parquet = "0.12"
+```
+
+and this to your crate root:
+```rust
+extern crate parquet;
+```
+
+Example usage of reading data:
+```rust
+use std::fs::File;
+use std::path::Path;
+use parquet::file::reader::{FileReader, SerializedFileReader};
+
+let file = File::open(&Path::new("/path/to/file")).unwrap();
+let reader = SerializedFileReader::new(file).unwrap();
+let mut iter = reader.get_row_iter(None).unwrap();
+while let Some(record) = iter.next() {
+    println!("{}", record);
+}
+```
+See [crate documentation](https://docs.rs/crate/parquet/0.12) on available API.
+
+## Supported Parquet Version
+- Parquet-format 2.4.0
+
+To update Parquet format to a newer version, check if [parquet-format](https://github.com/sunchao/parquet-format-rs)
+version is available. Then simply update version of `parquet-format` crate in Cargo.toml.
+
+## Features
+- [X] All encodings supported
+- [X] All compression codecs supported
+- [X] Read support
+  - [X] Primitive column value readers
+  - [X] Row record reader
+  - [ ] Arrow record reader
+- [X] Statistics support
+- [X] Write support
+  - [X] Primitive column value writers
+  - [ ] Row record writer
+  - [ ] Arrow record writer
+- [ ] Predicate pushdown
+- [ ] Parquet format 2.5 support
+- [ ] HDFS support
+
+## Requirements
+- Rust nightly
+
+See [Working with nightly Rust](https://github.com/rust-lang-nursery/rustup.rs/blob/master/README.md#working-with-nightly-rust)
+to install nightly toolchain and set it as default.
+
+Parquet requires LLVM.  Our windows CI image includes LLVM but to build the libraries locally windows
+users will have to install LLVM. Follow [this](https://github.com/appveyor/ci/issues/2651) link for info.
+
+## Build
+Run `cargo build` or `cargo build --release` to build in release mode.
+Some features take advantage of SSE4.2 instructions, which can be
+enabled by adding `RUSTFLAGS="-C target-feature=+sse4.2"` before the
+`cargo build` command.
+
+## Test
+Run `cargo test` for unit tests.
+
+## Binaries
+The following binaries are provided (use `cargo install` to install them):
+- **parquet-schema** for printing Parquet file schema and metadata.
+`Usage: parquet-schema <file-path> [verbose]`, where `file-path` is the path to a Parquet file,
+and optional `verbose` is the boolean flag that allows to print full metadata or schema only
+(when not specified only schema will be printed).
+
+- **parquet-read** for reading records from a Parquet file.
+`Usage: parquet-read <file-path> [num-records]`, where `file-path` is the path to a Parquet file,
+and `num-records` is the number of records to read from a file (when not specified all records will
+be printed).
+
+If you see `Library not loaded` error, please make sure `LD_LIBRARY_PATH` is set properly:
+```
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(rustc --print sysroot)/lib
+```
+
+## Benchmarks
+Run `cargo bench` for benchmarks.
+
+## Docs
+To build documentation, run `cargo doc --no-deps`.
+To compile and view in the browser, run `cargo doc --no-deps --open`.
+
+## License
+Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0.
diff --git a/rust/parquet/build.rs b/rust/parquet/build.rs
new file mode 100644
index 0000000000000..b42b2a4babfec
--- /dev/null
+++ b/rust/parquet/build.rs
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::process::Command;
+
+fn main() {
+    // Set Parquet version, build hash and "created by" string.
+    let version = env!("CARGO_PKG_VERSION");
+    let mut created_by = format!("parquet-rs version {}", version);
+    if let Ok(git_hash) = run(Command::new("git").arg("rev-parse").arg("HEAD")) {
+        created_by.push_str(format!(" (build {})", git_hash).as_str());
+        println!("cargo:rustc-env=PARQUET_BUILD={}", git_hash);
+    }
+    println!("cargo:rustc-env=PARQUET_VERSION={}", version);
+    println!("cargo:rustc-env=PARQUET_CREATED_BY={}", created_by);
+}
+
+/// Runs command and returns either content of stdout for successful execution,
+/// or an error message otherwise.
+fn run(command: &mut Command) -> Result<String, String> {
+    println!("Running: `{:?}`", command);
+    match command.output() {
+        Ok(ref output) if output.status.success() => {
+            Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
+        }
+        Ok(ref output) => Err(format!("Failed: `{:?}` ({})", command, output.status)),
+        Err(error) => Err(format!("Failed: `{:?}` ({})", command, error)),
+    }
+}
diff --git a/rust/parquet/src/basic.rs b/rust/parquet/src/basic.rs
new file mode 100644
index 0000000000000..e6fdb9708759c
--- /dev/null
+++ b/rust/parquet/src/basic.rs
@@ -0,0 +1,1506 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Contains Rust mappings for Thrift definition.
+//! Refer to `parquet.thrift` file to see raw definitions.
+
+use std::{convert, fmt, result, str};
+
+use parquet_format as parquet;
+
+use crate::errors::ParquetError;
+
+// ----------------------------------------------------------------------
+// Types from the Thrift definition
+
+// ----------------------------------------------------------------------
+// Mirrors `parquet::Type`
+
+/// Types supported by Parquet.
+/// These physical types are intended to be used in combination with the encodings to
+/// control the on disk storage format.
+/// For example INT16 is not included as a type since a good encoding of INT32
+/// would handle this.
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum Type {
+    BOOLEAN,
+    INT32,
+    INT64,
+    INT96,
+    FLOAT,
+    DOUBLE,
+    BYTE_ARRAY,
+    FIXED_LEN_BYTE_ARRAY,
+}
+
+// ----------------------------------------------------------------------
+// Mirrors `parquet::ConvertedType`
+
+/// Common types (logical types) used by frameworks when using Parquet.
+/// This helps map between types in those frameworks to the base types in Parquet.
+/// This is only metadata and not needed to read or write the data.
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum LogicalType {
+    NONE,
+    /// A BYTE_ARRAY actually contains UTF8 encoded chars.
+    UTF8,
+
+    /// A map is converted as an optional field containing a repeated key/value pair.
+    MAP,
+
+    /// A key/value pair is converted into a group of two fields.
+    MAP_KEY_VALUE,
+
+    /// A list is converted into an optional field containing a repeated field for its
+    /// values.
+    LIST,
+
+    /// An enum is converted into a binary field
+    ENUM,
+
+    /// A decimal value.
+    /// This may be used to annotate binary or fixed primitive types. The
+    /// underlying byte array stores the unscaled value encoded as two's
+    /// complement using big-endian byte order (the most significant byte is the
+    /// zeroth element).
+    ///
+    /// This must be accompanied by a (maximum) precision and a scale in the
+    /// SchemaElement. The precision specifies the number of digits in the decimal
+    /// and the scale stores the location of the decimal point. For example 1.23
+    /// would have precision 3 (3 total digits) and scale 2 (the decimal point is
+    /// 2 digits over).
+    DECIMAL,
+
+    /// A date stored as days since Unix epoch, encoded as the INT32 physical type.
+    DATE,
+
+    /// The total number of milliseconds since midnight. The value is stored as an INT32
+    /// physical type.
+    TIME_MILLIS,
+
+    /// The total number of microseconds since midnight. The value is stored as an INT64
+    /// physical type.
+    TIME_MICROS,
+
+    /// Date and time recorded as milliseconds since the Unix epoch.
+    /// Recorded as a physical type of INT64.
+    TIMESTAMP_MILLIS,
+
+    /// Date and time recorded as microseconds since the Unix epoch.
+    /// The value is stored as an INT64 physical type.
+    TIMESTAMP_MICROS,
+
+    /// An unsigned 8 bit integer value stored as INT32 physical type.
+    UINT_8,
+
+    /// An unsigned 16 bit integer value stored as INT32 physical type.
+    UINT_16,
+
+    /// An unsigned 32 bit integer value stored as INT32 physical type.
+    UINT_32,
+
+    /// An unsigned 64 bit integer value stored as INT64 physical type.
+    UINT_64,
+
+    /// A signed 8 bit integer value stored as INT32 physical type.
+    INT_8,
+
+    /// A signed 16 bit integer value stored as INT32 physical type.
+    INT_16,
+
+    /// A signed 32 bit integer value stored as INT32 physical type.
+    INT_32,
+
+    /// A signed 64 bit integer value stored as INT64 physical type.
+    INT_64,
+
+    /// A JSON document embedded within a single UTF8 column.
+    JSON,
+
+    /// A BSON document embedded within a single BINARY column.
+    BSON,
+
+    /// An interval of time.
+    ///
+    /// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12.
+    /// This data is composed of three separate little endian unsigned integers.
+    /// Each stores a component of a duration of time. The first integer identifies
+    /// the number of months associated with the duration, the second identifies
+    /// the number of days associated with the duration and the third identifies
+    /// the number of milliseconds associated with the provided duration.
+    /// This duration of time is independent of any particular timezone or date.
+    INTERVAL,
+}
+
+// ----------------------------------------------------------------------
+// Mirrors `parquet::FieldRepetitionType`
+
+/// Representation of field types in schema.
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum Repetition {
+    /// Field is required (can not be null) and each record has exactly 1 value.
+    REQUIRED,
+    /// Field is optional (can be null) and each record has 0 or 1 values.
+    OPTIONAL,
+    /// Field is repeated and can contain 0 or more values.
+    REPEATED,
+}
+
+// ----------------------------------------------------------------------
+// Mirrors `parquet::Encoding`
+
+/// Encodings supported by Parquet.
+/// Not all encodings are valid for all types. These enums are also used to specify the
+/// encoding of definition and repetition levels.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum Encoding {
+    /// Default byte encoding.
+    /// - BOOLEAN - 1 bit per value, 0 is false; 1 is true.
+    /// - INT32 - 4 bytes per value, stored as little-endian.
+    /// - INT64 - 8 bytes per value, stored as little-endian.
+    /// - FLOAT - 4 bytes per value, stored as little-endian.
+    /// - DOUBLE - 8 bytes per value, stored as little-endian.
+    /// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
+    /// - FIXED_LEN_BYTE_ARRAY - just the bytes are stored.
+    PLAIN,
+
+    /// **Deprecated** dictionary encoding.
+    ///
+    /// The values in the dictionary are encoded using PLAIN encoding.
+    /// Since it is deprecated, RLE_DICTIONARY encoding is used for a data page, and
+    /// PLAIN encoding is used for dictionary page.
+    PLAIN_DICTIONARY,
+
+    /// Group packed run length encoding.
+    ///
+    /// Usable for definition/repetition levels encoding and boolean values.
+    RLE,
+
+    /// Bit packed encoding.
+    ///
+    /// This can only be used if the data has a known max width.
+    /// Usable for definition/repetition levels encoding.
+    BIT_PACKED,
+
+    /// Delta encoding for integers, either INT32 or INT64.
+    ///
+    /// Works best on sorted data.
+    DELTA_BINARY_PACKED,
+
+    /// Encoding for byte arrays to separate the length values and the data.
+    ///
+    /// The lengths are encoded using DELTA_BINARY_PACKED encoding.
+    DELTA_LENGTH_BYTE_ARRAY,
+
+    /// Incremental encoding for byte arrays.
+    ///
+    /// Prefix lengths are encoded using DELTA_BINARY_PACKED encoding.
+    /// Suffixes are stored using DELTA_LENGTH_BYTE_ARRAY encoding.
+    DELTA_BYTE_ARRAY,
+
+    /// Dictionary encoding.
+    ///
+    /// The ids are encoded using the RLE encoding.
+    RLE_DICTIONARY,
+}
+
+// ----------------------------------------------------------------------
+// Mirrors `parquet::CompressionCodec`
+
+/// Supported compression algorithms.
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum Compression {
+    UNCOMPRESSED,
+    SNAPPY,
+    GZIP,
+    LZO,
+    BROTLI,
+    LZ4,
+    ZSTD,
+}
+
+// ----------------------------------------------------------------------
+// Mirrors `parquet::PageType`
+
+/// Available data pages for Parquet file format.
+/// Note that some of the page types may not be supported.
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum PageType {
+    DATA_PAGE,
+    INDEX_PAGE,
+    DICTIONARY_PAGE,
+    DATA_PAGE_V2,
+}
+
+// ----------------------------------------------------------------------
+// Mirrors `parquet::ColumnOrder`
+
+/// Sort order for page and column statistics.
+///
+/// Types are associated with sort orders and column stats are aggregated using a sort
+/// order, and a sort order should be considered when comparing values with statistics
+/// min/max.
+///
+/// See reference in
+/// https://github.com/apache/parquet-cpp/blob/master/src/parquet/types.h
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum SortOrder {
+    /// Signed (either value or legacy byte-wise) comparison.
+    SIGNED,
+    /// Unsigned (depending on physical type either value or byte-wise) comparison.
+    UNSIGNED,
+    /// Comparison is undefined.
+    UNDEFINED,
+}
+
+/// Column order that specifies what method was used to aggregate min/max values for
+/// statistics.
+///
+/// If column order is undefined, then it is the legacy behaviour and all values should
+/// be compared as signed values/bytes.
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum ColumnOrder {
+    /// Column uses the order defined by its logical or physical type
+    /// (if there is no logical type), parquet-format 2.4.0+.
+    TYPE_DEFINED_ORDER(SortOrder),
+    /// Undefined column order, means legacy behaviour before parquet-format 2.4.0.
+    /// Sort order is always SIGNED.
+    UNDEFINED,
+}
+
+impl ColumnOrder {
+    /// Returns sort order for a physical/logical type.
+    pub fn get_sort_order(logical_type: LogicalType, physical_type: Type) -> SortOrder {
+        match logical_type {
+            // Unsigned byte-wise comparison.
+            LogicalType::UTF8
+            | LogicalType::JSON
+            | LogicalType::BSON
+            | LogicalType::ENUM => SortOrder::UNSIGNED,
+
+            LogicalType::INT_8
+            | LogicalType::INT_16
+            | LogicalType::INT_32
+            | LogicalType::INT_64 => SortOrder::SIGNED,
+
+            LogicalType::UINT_8
+            | LogicalType::UINT_16
+            | LogicalType::UINT_32
+            | LogicalType::UINT_64 => SortOrder::UNSIGNED,
+
+            // Signed comparison of the represented value.
+            LogicalType::DECIMAL => SortOrder::SIGNED,
+
+            LogicalType::DATE => SortOrder::SIGNED,
+
+            LogicalType::TIME_MILLIS
+            | LogicalType::TIME_MICROS
+            | LogicalType::TIMESTAMP_MILLIS
+            | LogicalType::TIMESTAMP_MICROS => SortOrder::SIGNED,
+
+            LogicalType::INTERVAL => SortOrder::UNSIGNED,
+
+            LogicalType::LIST | LogicalType::MAP | LogicalType::MAP_KEY_VALUE => {
+                SortOrder::UNDEFINED
+            }
+
+            // Fall back to physical type.
+            LogicalType::NONE => Self::get_default_sort_order(physical_type),
+        }
+    }
+
+    /// Returns default sort order based on physical type.
+    fn get_default_sort_order(physical_type: Type) -> SortOrder {
+        match physical_type {
+            // Order: false, true
+            Type::BOOLEAN => SortOrder::UNSIGNED,
+            Type::INT32 | Type::INT64 => SortOrder::SIGNED,
+            Type::INT96 => SortOrder::UNDEFINED,
+            // Notes to remember when comparing float/double values:
+            // If the min is a NaN, it should be ignored.
+            // If the max is a NaN, it should be ignored.
+            // If the min is +0, the row group may contain -0 values as well.
+            // If the max is -0, the row group may contain +0 values as well.
+            // When looking for NaN values, min and max should be ignored.
+            Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
+            // unsigned byte-wise comparison
+            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
+        }
+    }
+
+    /// Returns sort order associated with this column order.
+    pub fn sort_order(&self) -> SortOrder {
+        match *self {
+            ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
+            ColumnOrder::UNDEFINED => SortOrder::SIGNED,
+        }
+    }
+}
+
+impl fmt::Display for Type {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{:?}", self)
+    }
+}
+
+impl fmt::Display for LogicalType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{:?}", self)
+    }
+}
+
+impl fmt::Display for Repetition {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{:?}", self)
+    }
+}
+
+impl fmt::Display for Encoding {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{:?}", self)
+    }
+}
+
+impl fmt::Display for Compression {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{:?}", self)
+    }
+}
+
+impl fmt::Display for PageType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{:?}", self)
+    }
+}
+
+impl fmt::Display for SortOrder {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{:?}", self)
+    }
+}
+
+impl fmt::Display for ColumnOrder {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{:?}", self)
+    }
+}
+
+// ----------------------------------------------------------------------
+// parquet::Type <=> Type conversion
+
+impl convert::From<parquet::Type> for Type {
+    fn from(value: parquet::Type) -> Self {
+        match value {
+            parquet::Type::BOOLEAN => Type::BOOLEAN,
+            parquet::Type::INT32 => Type::INT32,
+            parquet::Type::INT64 => Type::INT64,
+            parquet::Type::INT96 => Type::INT96,
+            parquet::Type::FLOAT => Type::FLOAT,
+            parquet::Type::DOUBLE => Type::DOUBLE,
+            parquet::Type::BYTE_ARRAY => Type::BYTE_ARRAY,
+            parquet::Type::FIXED_LEN_BYTE_ARRAY => Type::FIXED_LEN_BYTE_ARRAY,
+        }
+    }
+}
+
+impl convert::From<Type> for parquet::Type {
+    fn from(value: Type) -> Self {
+        match value {
+            Type::BOOLEAN => parquet::Type::BOOLEAN,
+            Type::INT32 => parquet::Type::INT32,
+            Type::INT64 => parquet::Type::INT64,
+            Type::INT96 => parquet::Type::INT96,
+            Type::FLOAT => parquet::Type::FLOAT,
+            Type::DOUBLE => parquet::Type::DOUBLE,
+            Type::BYTE_ARRAY => parquet::Type::BYTE_ARRAY,
+            Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FIXED_LEN_BYTE_ARRAY,
+        }
+    }
+}
+
+// ----------------------------------------------------------------------
+// parquet::ConvertedType <=> LogicalType conversion
+
+impl convert::From<Option<parquet::ConvertedType>> for LogicalType {
+    fn from(option: Option<parquet::ConvertedType>) -> Self {
+        match option {
+            None => LogicalType::NONE,
+            Some(value) => match value {
+                parquet::ConvertedType::UTF8 => LogicalType::UTF8,
+                parquet::ConvertedType::MAP => LogicalType::MAP,
+                parquet::ConvertedType::MAP_KEY_VALUE => LogicalType::MAP_KEY_VALUE,
+                parquet::ConvertedType::LIST => LogicalType::LIST,
+                parquet::ConvertedType::ENUM => LogicalType::ENUM,
+                parquet::ConvertedType::DECIMAL => LogicalType::DECIMAL,
+                parquet::ConvertedType::DATE => LogicalType::DATE,
+                parquet::ConvertedType::TIME_MILLIS => LogicalType::TIME_MILLIS,
+                parquet::ConvertedType::TIME_MICROS => LogicalType::TIME_MICROS,
+                parquet::ConvertedType::TIMESTAMP_MILLIS => LogicalType::TIMESTAMP_MILLIS,
+                parquet::ConvertedType::TIMESTAMP_MICROS => LogicalType::TIMESTAMP_MICROS,
+                parquet::ConvertedType::UINT_8 => LogicalType::UINT_8,
+                parquet::ConvertedType::UINT_16 => LogicalType::UINT_16,
+                parquet::ConvertedType::UINT_32 => LogicalType::UINT_32,
+                parquet::ConvertedType::UINT_64 => LogicalType::UINT_64,
+                parquet::ConvertedType::INT_8 => LogicalType::INT_8,
+                parquet::ConvertedType::INT_16 => LogicalType::INT_16,
+                parquet::ConvertedType::INT_32 => LogicalType::INT_32,
+                parquet::ConvertedType::INT_64 => LogicalType::INT_64,
+                parquet::ConvertedType::JSON => LogicalType::JSON,
+                parquet::ConvertedType::BSON => LogicalType::BSON,
+                parquet::ConvertedType::INTERVAL => LogicalType::INTERVAL,
+            },
+        }
+    }
+}
+
+impl convert::From<LogicalType> for Option<parquet::ConvertedType> {
+    fn from(value: LogicalType) -> Self {
+        match value {
+            LogicalType::NONE => None,
+            LogicalType::UTF8 => Some(parquet::ConvertedType::UTF8),
+            LogicalType::MAP => Some(parquet::ConvertedType::MAP),
+            LogicalType::MAP_KEY_VALUE => Some(parquet::ConvertedType::MAP_KEY_VALUE),
+            LogicalType::LIST => Some(parquet::ConvertedType::LIST),
+            LogicalType::ENUM => Some(parquet::ConvertedType::ENUM),
+            LogicalType::DECIMAL => Some(parquet::ConvertedType::DECIMAL),
+            LogicalType::DATE => Some(parquet::ConvertedType::DATE),
+            LogicalType::TIME_MILLIS => Some(parquet::ConvertedType::TIME_MILLIS),
+            LogicalType::TIME_MICROS => Some(parquet::ConvertedType::TIME_MICROS),
+            LogicalType::TIMESTAMP_MILLIS => {
+                Some(parquet::ConvertedType::TIMESTAMP_MILLIS)
+            }
+            LogicalType::TIMESTAMP_MICROS => {
+                Some(parquet::ConvertedType::TIMESTAMP_MICROS)
+            }
+            LogicalType::UINT_8 => Some(parquet::ConvertedType::UINT_8),
+            LogicalType::UINT_16 => Some(parquet::ConvertedType::UINT_16),
+            LogicalType::UINT_32 => Some(parquet::ConvertedType::UINT_32),
+            LogicalType::UINT_64 => Some(parquet::ConvertedType::UINT_64),
+            LogicalType::INT_8 => Some(parquet::ConvertedType::INT_8),
+            LogicalType::INT_16 => Some(parquet::ConvertedType::INT_16),
+            LogicalType::INT_32 => Some(parquet::ConvertedType::INT_32),
+            LogicalType::INT_64 => Some(parquet::ConvertedType::INT_64),
+            LogicalType::JSON => Some(parquet::ConvertedType::JSON),
+            LogicalType::BSON => Some(parquet::ConvertedType::BSON),
+            LogicalType::INTERVAL => Some(parquet::ConvertedType::INTERVAL),
+        }
+    }
+}
+
+// ----------------------------------------------------------------------
+// parquet::FieldRepetitionType <=> Repetition conversion
+
+impl convert::From<parquet::FieldRepetitionType> for Repetition {
+    fn from(value: parquet::FieldRepetitionType) -> Self {
+        match value {
+            parquet::FieldRepetitionType::REQUIRED => Repetition::REQUIRED,
+            parquet::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL,
+            parquet::FieldRepetitionType::REPEATED => Repetition::REPEATED,
+        }
+    }
+}
+
+impl convert::From<Repetition> for parquet::FieldRepetitionType {
+    fn from(value: Repetition) -> Self {
+        match value {
+            Repetition::REQUIRED => parquet::FieldRepetitionType::REQUIRED,
+            Repetition::OPTIONAL => parquet::FieldRepetitionType::OPTIONAL,
+            Repetition::REPEATED => parquet::FieldRepetitionType::REPEATED,
+        }
+    }
+}
+
+// ----------------------------------------------------------------------
+// parquet::Encoding <=> Encoding conversion
+
+impl convert::From<parquet::Encoding> for Encoding {
+    fn from(value: parquet::Encoding) -> Self {
+        match value {
+            parquet::Encoding::PLAIN => Encoding::PLAIN,
+            parquet::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY,
+            parquet::Encoding::RLE => Encoding::RLE,
+            parquet::Encoding::BIT_PACKED => Encoding::BIT_PACKED,
+            parquet::Encoding::DELTA_BINARY_PACKED => Encoding::DELTA_BINARY_PACKED,
+            parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY => {
+                Encoding::DELTA_LENGTH_BYTE_ARRAY
+            }
+            parquet::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY,
+            parquet::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY,
+        }
+    }
+}
+
+impl convert::From<Encoding> for parquet::Encoding {
+    fn from(value: Encoding) -> Self {
+        match value {
+            Encoding::PLAIN => parquet::Encoding::PLAIN,
+            Encoding::PLAIN_DICTIONARY => parquet::Encoding::PLAIN_DICTIONARY,
+            Encoding::RLE => parquet::Encoding::RLE,
+            Encoding::BIT_PACKED => parquet::Encoding::BIT_PACKED,
+            Encoding::DELTA_BINARY_PACKED => parquet::Encoding::DELTA_BINARY_PACKED,
+            Encoding::DELTA_LENGTH_BYTE_ARRAY => {
+                parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY
+            }
+            Encoding::DELTA_BYTE_ARRAY => parquet::Encoding::DELTA_BYTE_ARRAY,
+            Encoding::RLE_DICTIONARY => parquet::Encoding::RLE_DICTIONARY,
+        }
+    }
+}
+
+// ----------------------------------------------------------------------
+// parquet::CompressionCodec <=> Compression conversion
+
+impl convert::From<parquet::CompressionCodec> for Compression {
+    fn from(value: parquet::CompressionCodec) -> Self {
+        match value {
+            parquet::CompressionCodec::UNCOMPRESSED => Compression::UNCOMPRESSED,
+            parquet::CompressionCodec::SNAPPY => Compression::SNAPPY,
+            parquet::CompressionCodec::GZIP => Compression::GZIP,
+            parquet::CompressionCodec::LZO => Compression::LZO,
+            parquet::CompressionCodec::BROTLI => Compression::BROTLI,
+            parquet::CompressionCodec::LZ4 => Compression::LZ4,
+            parquet::CompressionCodec::ZSTD => Compression::ZSTD,
+        }
+    }
+}
+
+impl convert::From<Compression> for parquet::CompressionCodec {
+    fn from(value: Compression) -> Self {
+        match value {
+            Compression::UNCOMPRESSED => parquet::CompressionCodec::UNCOMPRESSED,
+            Compression::SNAPPY => parquet::CompressionCodec::SNAPPY,
+            Compression::GZIP => parquet::CompressionCodec::GZIP,
+            Compression::LZO => parquet::CompressionCodec::LZO,
+            Compression::BROTLI => parquet::CompressionCodec::BROTLI,
+            Compression::LZ4 => parquet::CompressionCodec::LZ4,
+            Compression::ZSTD => parquet::CompressionCodec::ZSTD,
+        }
+    }
+}
+
+// ----------------------------------------------------------------------
+// parquet::PageType <=> PageType conversion
+
+impl convert::From<parquet::PageType> for PageType {
+    fn from(value: parquet::PageType) -> Self {
+        match value {
+            parquet::PageType::DATA_PAGE => PageType::DATA_PAGE,
+            parquet::PageType::INDEX_PAGE => PageType::INDEX_PAGE,
+            parquet::PageType::DICTIONARY_PAGE => PageType::DICTIONARY_PAGE,
+            parquet::PageType::DATA_PAGE_V2 => PageType::DATA_PAGE_V2,
+        }
+    }
+}
+
+impl convert::From<PageType> for parquet::PageType {
+    fn from(value: PageType) -> Self {
+        match value {
+            PageType::DATA_PAGE => parquet::PageType::DATA_PAGE,
+            PageType::INDEX_PAGE => parquet::PageType::INDEX_PAGE,
+            PageType::DICTIONARY_PAGE => parquet::PageType::DICTIONARY_PAGE,
+            PageType::DATA_PAGE_V2 => parquet::PageType::DATA_PAGE_V2,
+        }
+    }
+}
+
+// ----------------------------------------------------------------------
+// String conversions for schema parsing.
+
+impl str::FromStr for Repetition {
+    type Err = ParquetError;
+
+    fn from_str(s: &str) -> result::Result<Self, Self::Err> {
+        match s {
+            "REQUIRED" => Ok(Repetition::REQUIRED),
+            "OPTIONAL" => Ok(Repetition::OPTIONAL),
+            "REPEATED" => Ok(Repetition::REPEATED),
+            other => Err(general_err!("Invalid repetition {}", other)),
+        }
+    }
+}
+
+impl str::FromStr for Type {
+    type Err = ParquetError;
+
+    fn from_str(s: &str) -> result::Result<Self, Self::Err> {
+        match s {
+            "BOOLEAN" => Ok(Type::BOOLEAN),
+            "INT32" => Ok(Type::INT32),
+            "INT64" => Ok(Type::INT64),
+            "INT96" => Ok(Type::INT96),
+            "FLOAT" => Ok(Type::FLOAT),
+            "DOUBLE" => Ok(Type::DOUBLE),
+            "BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
+            "FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
+            other => Err(general_err!("Invalid type {}", other)),
+        }
+    }
+}
+
+impl str::FromStr for LogicalType {
+    type Err = ParquetError;
+
+    fn from_str(s: &str) -> result::Result<Self, Self::Err> {
+        match s {
+            "NONE" => Ok(LogicalType::NONE),
+            "UTF8" => Ok(LogicalType::UTF8),
+            "MAP" => Ok(LogicalType::MAP),
+            "MAP_KEY_VALUE" => Ok(LogicalType::MAP_KEY_VALUE),
+            "LIST" => Ok(LogicalType::LIST),
+            "ENUM" => Ok(LogicalType::ENUM),
+            "DECIMAL" => Ok(LogicalType::DECIMAL),
+            "DATE" => Ok(LogicalType::DATE),
+            "TIME_MILLIS" => Ok(LogicalType::TIME_MILLIS),
+            "TIME_MICROS" => Ok(LogicalType::TIME_MICROS),
+            "TIMESTAMP_MILLIS" => Ok(LogicalType::TIMESTAMP_MILLIS),
+            "TIMESTAMP_MICROS" => Ok(LogicalType::TIMESTAMP_MICROS),
+            "UINT_8" => Ok(LogicalType::UINT_8),
+            "UINT_16" => Ok(LogicalType::UINT_16),
+            "UINT_32" => Ok(LogicalType::UINT_32),
+            "UINT_64" => Ok(LogicalType::UINT_64),
+            "INT_8" => Ok(LogicalType::INT_8),
+            "INT_16" => Ok(LogicalType::INT_16),
+            "INT_32" => Ok(LogicalType::INT_32),
+            "INT_64" => Ok(LogicalType::INT_64),
+            "JSON" => Ok(LogicalType::JSON),
+            "BSON" => Ok(LogicalType::BSON),
+            "INTERVAL" => Ok(LogicalType::INTERVAL),
+            other => Err(general_err!("Invalid logical type {}", other)),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_display_type() {
+        assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
+        assert_eq!(Type::INT32.to_string(), "INT32");
+        assert_eq!(Type::INT64.to_string(), "INT64");
+        assert_eq!(Type::INT96.to_string(), "INT96");
+        assert_eq!(Type::FLOAT.to_string(), "FLOAT");
+        assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
+        assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
+        assert_eq!(
+            Type::FIXED_LEN_BYTE_ARRAY.to_string(),
+            "FIXED_LEN_BYTE_ARRAY"
+        );
+    }
+
+    #[test]
+    fn test_from_type() {
+        assert_eq!(Type::from(parquet::Type::BOOLEAN), Type::BOOLEAN);
+        assert_eq!(Type::from(parquet::Type::INT32), Type::INT32);
+        assert_eq!(Type::from(parquet::Type::INT64), Type::INT64);
+        assert_eq!(Type::from(parquet::Type::INT96), Type::INT96);
+        assert_eq!(Type::from(parquet::Type::FLOAT), Type::FLOAT);
+        assert_eq!(Type::from(parquet::Type::DOUBLE), Type::DOUBLE);
+        assert_eq!(Type::from(parquet::Type::BYTE_ARRAY), Type::BYTE_ARRAY);
+        assert_eq!(
+            Type::from(parquet::Type::FIXED_LEN_BYTE_ARRAY),
+            Type::FIXED_LEN_BYTE_ARRAY
+        );
+    }
+
+    #[test]
+    fn test_into_type() {
+        assert_eq!(parquet::Type::BOOLEAN, Type::BOOLEAN.into());
+        assert_eq!(parquet::Type::INT32, Type::INT32.into());
+        assert_eq!(parquet::Type::INT64, Type::INT64.into());
+        assert_eq!(parquet::Type::INT96, Type::INT96.into());
+        assert_eq!(parquet::Type::FLOAT, Type::FLOAT.into());
+        assert_eq!(parquet::Type::DOUBLE, Type::DOUBLE.into());
+        assert_eq!(parquet::Type::BYTE_ARRAY, Type::BYTE_ARRAY.into());
+        assert_eq!(
+            parquet::Type::FIXED_LEN_BYTE_ARRAY,
+            Type::FIXED_LEN_BYTE_ARRAY.into()
+        );
+    }
+
+    #[test]
+    fn test_from_string_into_type() {
+        assert_eq!(
+            Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
+            Type::BOOLEAN
+        );
+        assert_eq!(
+            Type::INT32.to_string().parse::<Type>().unwrap(),
+            Type::INT32
+        );
+        assert_eq!(
+            Type::INT64.to_string().parse::<Type>().unwrap(),
+            Type::INT64
+        );
+        assert_eq!(
+            Type::INT96.to_string().parse::<Type>().unwrap(),
+            Type::INT96
+        );
+        assert_eq!(
+            Type::FLOAT.to_string().parse::<Type>().unwrap(),
+            Type::FLOAT
+        );
+        assert_eq!(
+            Type::DOUBLE.to_string().parse::<Type>().unwrap(),
+            Type::DOUBLE
+        );
+        assert_eq!(
+            Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
+            Type::BYTE_ARRAY
+        );
+        assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
+        assert_eq!(
+            Type::FIXED_LEN_BYTE_ARRAY
+                .to_string()
+                .parse::<Type>()
+                .unwrap(),
+            Type::FIXED_LEN_BYTE_ARRAY
+        );
+    }
+
+    #[test]
+    fn test_display_logical_type() {
+        assert_eq!(LogicalType::NONE.to_string(), "NONE");
+        assert_eq!(LogicalType::UTF8.to_string(), "UTF8");
+        assert_eq!(LogicalType::MAP.to_string(), "MAP");
+        assert_eq!(LogicalType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
+        assert_eq!(LogicalType::LIST.to_string(), "LIST");
+        assert_eq!(LogicalType::ENUM.to_string(), "ENUM");
+        assert_eq!(LogicalType::DECIMAL.to_string(), "DECIMAL");
+        assert_eq!(LogicalType::DATE.to_string(), "DATE");
+        assert_eq!(LogicalType::TIME_MILLIS.to_string(), "TIME_MILLIS");
+        assert_eq!(LogicalType::DATE.to_string(), "DATE");
+        assert_eq!(LogicalType::TIME_MICROS.to_string(), "TIME_MICROS");
+        assert_eq!(
+            LogicalType::TIMESTAMP_MILLIS.to_string(),
+            "TIMESTAMP_MILLIS"
+        );
+        assert_eq!(
+            LogicalType::TIMESTAMP_MICROS.to_string(),
+            "TIMESTAMP_MICROS"
+        );
+        assert_eq!(LogicalType::UINT_8.to_string(), "UINT_8");
+        assert_eq!(LogicalType::UINT_16.to_string(), "UINT_16");
+        assert_eq!(LogicalType::UINT_32.to_string(), "UINT_32");
+        assert_eq!(LogicalType::UINT_64.to_string(), "UINT_64");
+        assert_eq!(LogicalType::INT_8.to_string(), "INT_8");
+        assert_eq!(LogicalType::INT_16.to_string(), "INT_16");
+        assert_eq!(LogicalType::INT_32.to_string(), "INT_32");
+        assert_eq!(LogicalType::INT_64.to_string(), "INT_64");
+        assert_eq!(LogicalType::JSON.to_string(), "JSON");
+        assert_eq!(LogicalType::BSON.to_string(), "BSON");
+        assert_eq!(LogicalType::INTERVAL.to_string(), "INTERVAL");
+    }
+
+    #[test]
+    fn test_from_logical_type() {
+        assert_eq!(LogicalType::from(None), LogicalType::NONE);
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::UTF8)),
+            LogicalType::UTF8
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::MAP)),
+            LogicalType::MAP
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::MAP_KEY_VALUE)),
+            LogicalType::MAP_KEY_VALUE
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::LIST)),
+            LogicalType::LIST
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::ENUM)),
+            LogicalType::ENUM
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::DECIMAL)),
+            LogicalType::DECIMAL
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::DATE)),
+            LogicalType::DATE
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::TIME_MILLIS)),
+            LogicalType::TIME_MILLIS
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::TIME_MICROS)),
+            LogicalType::TIME_MICROS
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::TIMESTAMP_MILLIS)),
+            LogicalType::TIMESTAMP_MILLIS
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::TIMESTAMP_MICROS)),
+            LogicalType::TIMESTAMP_MICROS
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::UINT_8)),
+            LogicalType::UINT_8
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::UINT_16)),
+            LogicalType::UINT_16
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::UINT_32)),
+            LogicalType::UINT_32
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::UINT_64)),
+            LogicalType::UINT_64
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::INT_8)),
+            LogicalType::INT_8
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::INT_16)),
+            LogicalType::INT_16
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::INT_32)),
+            LogicalType::INT_32
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::INT_64)),
+            LogicalType::INT_64
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::JSON)),
+            LogicalType::JSON
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::BSON)),
+            LogicalType::BSON
+        );
+        assert_eq!(
+            LogicalType::from(Some(parquet::ConvertedType::INTERVAL)),
+            LogicalType::INTERVAL
+        );
+    }
+
+    #[test]
+    fn test_into_logical_type() {
+        let converted_type: Option<parquet::ConvertedType> = None;
+        assert_eq!(converted_type, LogicalType::NONE.into());
+        assert_eq!(Some(parquet::ConvertedType::UTF8), LogicalType::UTF8.into());
+        assert_eq!(Some(parquet::ConvertedType::MAP), LogicalType::MAP.into());
+        assert_eq!(
+            Some(parquet::ConvertedType::MAP_KEY_VALUE),
+            LogicalType::MAP_KEY_VALUE.into()
+        );
+        assert_eq!(Some(parquet::ConvertedType::LIST), LogicalType::LIST.into());
+        assert_eq!(Some(parquet::ConvertedType::ENUM), LogicalType::ENUM.into());
+        assert_eq!(
+            Some(parquet::ConvertedType::DECIMAL),
+            LogicalType::DECIMAL.into()
+        );
+        assert_eq!(Some(parquet::ConvertedType::DATE), LogicalType::DATE.into());
+        assert_eq!(
+            Some(parquet::ConvertedType::TIME_MILLIS),
+            LogicalType::TIME_MILLIS.into()
+        );
+        assert_eq!(
+            Some(parquet::ConvertedType::TIME_MICROS),
+            LogicalType::TIME_MICROS.into()
+        );
+        assert_eq!(
+            Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
+            LogicalType::TIMESTAMP_MILLIS.into()
+        );
+        assert_eq!(
+            Some(parquet::ConvertedType::TIMESTAMP_MICROS),
+            LogicalType::TIMESTAMP_MICROS.into()
+        );
+        assert_eq!(
+            Some(parquet::ConvertedType::UINT_8),
+            LogicalType::UINT_8.into()
+        );
+        assert_eq!(
+            Some(parquet::ConvertedType::UINT_16),
+            LogicalType::UINT_16.into()
+        );
+        assert_eq!(
+            Some(parquet::ConvertedType::UINT_32),
+            LogicalType::UINT_32.into()
+        );
+        assert_eq!(
+            Some(parquet::ConvertedType::UINT_64),
+            LogicalType::UINT_64.into()
+        );
+        assert_eq!(
+            Some(parquet::ConvertedType::INT_8),
+            LogicalType::INT_8.into()
+        );
+        assert_eq!(
+            Some(parquet::ConvertedType::INT_16),
+            LogicalType::INT_16.into()
+        );
+        assert_eq!(
+            Some(parquet::ConvertedType::INT_32),
+            LogicalType::INT_32.into()
+        );
+        assert_eq!(
+            Some(parquet::ConvertedType::INT_64),
+            LogicalType::INT_64.into()
+        );
+        assert_eq!(Some(parquet::ConvertedType::JSON), LogicalType::JSON.into());
+        assert_eq!(Some(parquet::ConvertedType::BSON), LogicalType::BSON.into());
+        assert_eq!(
+            Some(parquet::ConvertedType::INTERVAL),
+            LogicalType::INTERVAL.into()
+        );
+    }
+
+    #[test]
+    fn test_from_string_into_logical_type() {
+        assert_eq!(
+            LogicalType::NONE
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::NONE
+        );
+        assert_eq!(
+            LogicalType::UTF8
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::UTF8
+        );
+        assert_eq!(
+            LogicalType::MAP.to_string().parse::<LogicalType>().unwrap(),
+            LogicalType::MAP
+        );
+        assert_eq!(
+            LogicalType::MAP_KEY_VALUE
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::MAP_KEY_VALUE
+        );
+        assert_eq!(
+            LogicalType::LIST
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::LIST
+        );
+        assert_eq!(
+            LogicalType::ENUM
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::ENUM
+        );
+        assert_eq!(
+            LogicalType::DECIMAL
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::DECIMAL
+        );
+        assert_eq!(
+            LogicalType::DATE
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::DATE
+        );
+        assert_eq!(
+            LogicalType::TIME_MILLIS
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::TIME_MILLIS
+        );
+        assert_eq!(
+            LogicalType::TIME_MICROS
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::TIME_MICROS
+        );
+        assert_eq!(
+            LogicalType::TIMESTAMP_MILLIS
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::TIMESTAMP_MILLIS
+        );
+        assert_eq!(
+            LogicalType::TIMESTAMP_MICROS
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::TIMESTAMP_MICROS
+        );
+        assert_eq!(
+            LogicalType::UINT_8
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::UINT_8
+        );
+        assert_eq!(
+            LogicalType::UINT_16
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::UINT_16
+        );
+        assert_eq!(
+            LogicalType::UINT_32
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::UINT_32
+        );
+        assert_eq!(
+            LogicalType::UINT_64
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::UINT_64
+        );
+        assert_eq!(
+            LogicalType::INT_8
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::INT_8
+        );
+        assert_eq!(
+            LogicalType::INT_16
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::INT_16
+        );
+        assert_eq!(
+            LogicalType::INT_32
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::INT_32
+        );
+        assert_eq!(
+            LogicalType::INT_64
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::INT_64
+        );
+        assert_eq!(
+            LogicalType::JSON
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::JSON
+        );
+        assert_eq!(
+            LogicalType::BSON
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::BSON
+        );
+        assert_eq!(
+            LogicalType::INTERVAL
+                .to_string()
+                .parse::<LogicalType>()
+                .unwrap(),
+            LogicalType::INTERVAL
+        );
+    }
+
+    #[test]
+    fn test_display_repetition() {
+        assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
+        assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
+        assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
+    }
+
+    #[test]
+    fn test_from_repetition() {
+        assert_eq!(
+            Repetition::from(parquet::FieldRepetitionType::REQUIRED),
+            Repetition::REQUIRED
+        );
+        assert_eq!(
+            Repetition::from(parquet::FieldRepetitionType::OPTIONAL),
+            Repetition::OPTIONAL
+        );
+        assert_eq!(
+            Repetition::from(parquet::FieldRepetitionType::REPEATED),
+            Repetition::REPEATED
+        );
+    }
+
+    #[test]
+    fn test_into_repetition() {
+        assert_eq!(
+            parquet::FieldRepetitionType::REQUIRED,
+            Repetition::REQUIRED.into()
+        );
+        assert_eq!(
+            parquet::FieldRepetitionType::OPTIONAL,
+            Repetition::OPTIONAL.into()
+        );
+        assert_eq!(
+            parquet::FieldRepetitionType::REPEATED,
+            Repetition::REPEATED.into()
+        );
+    }
+
+    #[test]
+    fn test_from_string_into_repetition() {
+        assert_eq!(
+            Repetition::REQUIRED
+                .to_string()
+                .parse::<Repetition>()
+                .unwrap(),
+            Repetition::REQUIRED
+        );
+        assert_eq!(
+            Repetition::OPTIONAL
+                .to_string()
+                .parse::<Repetition>()
+                .unwrap(),
+            Repetition::OPTIONAL
+        );
+        assert_eq!(
+            Repetition::REPEATED
+                .to_string()
+                .parse::<Repetition>()
+                .unwrap(),
+            Repetition::REPEATED
+        );
+    }
+
+    #[test]
+    fn test_display_encoding() {
+        assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
+        assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
+        assert_eq!(Encoding::RLE.to_string(), "RLE");
+        assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
+        assert_eq!(
+            Encoding::DELTA_BINARY_PACKED.to_string(),
+            "DELTA_BINARY_PACKED"
+        );
+        assert_eq!(
+            Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
+            "DELTA_LENGTH_BYTE_ARRAY"
+        );
+        assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
+        assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
+    }
+
+    #[test]
+    fn test_from_encoding() {
+        assert_eq!(Encoding::from(parquet::Encoding::PLAIN), Encoding::PLAIN);
+        assert_eq!(
+            Encoding::from(parquet::Encoding::PLAIN_DICTIONARY),
+            Encoding::PLAIN_DICTIONARY
+        );
+        assert_eq!(Encoding::from(parquet::Encoding::RLE), Encoding::RLE);
+        assert_eq!(
+            Encoding::from(parquet::Encoding::BIT_PACKED),
+            Encoding::BIT_PACKED
+        );
+        assert_eq!(
+            Encoding::from(parquet::Encoding::DELTA_BINARY_PACKED),
+            Encoding::DELTA_BINARY_PACKED
+        );
+        assert_eq!(
+            Encoding::from(parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY),
+            Encoding::DELTA_LENGTH_BYTE_ARRAY
+        );
+        assert_eq!(
+            Encoding::from(parquet::Encoding::DELTA_BYTE_ARRAY),
+            Encoding::DELTA_BYTE_ARRAY
+        );
+    }
+
+    #[test]
+    fn test_into_encoding() {
+        assert_eq!(parquet::Encoding::PLAIN, Encoding::PLAIN.into());
+        assert_eq!(
+            parquet::Encoding::PLAIN_DICTIONARY,
+            Encoding::PLAIN_DICTIONARY.into()
+        );
+        assert_eq!(parquet::Encoding::RLE, Encoding::RLE.into());
+        assert_eq!(parquet::Encoding::BIT_PACKED, Encoding::BIT_PACKED.into());
+        assert_eq!(
+            parquet::Encoding::DELTA_BINARY_PACKED,
+            Encoding::DELTA_BINARY_PACKED.into()
+        );
+        assert_eq!(
+            parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
+            Encoding::DELTA_LENGTH_BYTE_ARRAY.into()
+        );
+        assert_eq!(
+            parquet::Encoding::DELTA_BYTE_ARRAY,
+            Encoding::DELTA_BYTE_ARRAY.into()
+        );
+    }
+
+    #[test]
+    fn test_display_compression() {
+        assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
+        assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
+        assert_eq!(Compression::GZIP.to_string(), "GZIP");
+        assert_eq!(Compression::LZO.to_string(), "LZO");
+        assert_eq!(Compression::BROTLI.to_string(), "BROTLI");
+        assert_eq!(Compression::LZ4.to_string(), "LZ4");
+        assert_eq!(Compression::ZSTD.to_string(), "ZSTD");
+    }
+
+    #[test]
+    fn test_from_compression() {
+        assert_eq!(
+            Compression::from(parquet::CompressionCodec::UNCOMPRESSED),
+            Compression::UNCOMPRESSED
+        );
+        assert_eq!(
+            Compression::from(parquet::CompressionCodec::SNAPPY),
+            Compression::SNAPPY
+        );
+        assert_eq!(
+            Compression::from(parquet::CompressionCodec::GZIP),
+            Compression::GZIP
+        );
+        assert_eq!(
+            Compression::from(parquet::CompressionCodec::LZO),
+            Compression::LZO
+        );
+        assert_eq!(
+            Compression::from(parquet::CompressionCodec::BROTLI),
+            Compression::BROTLI
+        );
+        assert_eq!(
+            Compression::from(parquet::CompressionCodec::LZ4),
+            Compression::LZ4
+        );
+        assert_eq!(
+            Compression::from(parquet::CompressionCodec::ZSTD),
+            Compression::ZSTD
+        );
+    }
+
+    #[test]
+    fn test_into_compression() {
+        assert_eq!(
+            parquet::CompressionCodec::UNCOMPRESSED,
+            Compression::UNCOMPRESSED.into()
+        );
+        assert_eq!(
+            parquet::CompressionCodec::SNAPPY,
+            Compression::SNAPPY.into()
+        );
+        assert_eq!(parquet::CompressionCodec::GZIP, Compression::GZIP.into());
+        assert_eq!(parquet::CompressionCodec::LZO, Compression::LZO.into());
+        assert_eq!(
+            parquet::CompressionCodec::BROTLI,
+            Compression::BROTLI.into()
+        );
+        assert_eq!(parquet::CompressionCodec::LZ4, Compression::LZ4.into());
+        assert_eq!(parquet::CompressionCodec::ZSTD, Compression::ZSTD.into());
+    }
+
+    #[test]
+    fn test_display_page_type() {
+        assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
+        assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
+        assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
+        assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
+    }
+
+    #[test]
+    fn test_from_page_type() {
+        assert_eq!(
+            PageType::from(parquet::PageType::DATA_PAGE),
+            PageType::DATA_PAGE
+        );
+        assert_eq!(
+            PageType::from(parquet::PageType::INDEX_PAGE),
+            PageType::INDEX_PAGE
+        );
+        assert_eq!(
+            PageType::from(parquet::PageType::DICTIONARY_PAGE),
+            PageType::DICTIONARY_PAGE
+        );
+        assert_eq!(
+            PageType::from(parquet::PageType::DATA_PAGE_V2),
+            PageType::DATA_PAGE_V2
+        );
+    }
+
+    #[test]
+    fn test_into_page_type() {
+        assert_eq!(parquet::PageType::DATA_PAGE, PageType::DATA_PAGE.into());
+        assert_eq!(parquet::PageType::INDEX_PAGE, PageType::INDEX_PAGE.into());
+        assert_eq!(
+            parquet::PageType::DICTIONARY_PAGE,
+            PageType::DICTIONARY_PAGE.into()
+        );
+        assert_eq!(
+            parquet::PageType::DATA_PAGE_V2,
+            PageType::DATA_PAGE_V2.into()
+        );
+    }
+
+    #[test]
+    fn test_display_sort_order() {
+        assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
+        assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
+        assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
+    }
+
+    #[test]
+    fn test_display_column_order() {
+        assert_eq!(
+            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
+            "TYPE_DEFINED_ORDER(SIGNED)"
+        );
+        assert_eq!(
+            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
+            "TYPE_DEFINED_ORDER(UNSIGNED)"
+        );
+        assert_eq!(
+            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
+            "TYPE_DEFINED_ORDER(UNDEFINED)"
+        );
+        assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
+    }
+
+    #[test]
+    fn test_column_order_get_sort_order() {
+        // Helper to check the order in a list of values.
+        // Only logical type is checked.
+        fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
+            for tpe in types {
+                assert_eq!(
+                    ColumnOrder::get_sort_order(tpe, Type::BYTE_ARRAY),
+                    expected_order
+                );
+            }
+        }
+
+        // Unsigned comparison (physical type does not matter)
+        let unsigned = vec![
+            LogicalType::UTF8,
+            LogicalType::JSON,
+            LogicalType::BSON,
+            LogicalType::ENUM,
+            LogicalType::UINT_8,
+            LogicalType::UINT_16,
+            LogicalType::UINT_32,
+            LogicalType::UINT_64,
+            LogicalType::INTERVAL,
+        ];
+        check_sort_order(unsigned, SortOrder::UNSIGNED);
+
+        // Signed comparison (physical type does not matter)
+        let signed = vec![
+            LogicalType::INT_8,
+            LogicalType::INT_16,
+            LogicalType::INT_32,
+            LogicalType::INT_64,
+            LogicalType::DECIMAL,
+            LogicalType::DATE,
+            LogicalType::TIME_MILLIS,
+            LogicalType::TIME_MICROS,
+            LogicalType::TIMESTAMP_MILLIS,
+            LogicalType::TIMESTAMP_MICROS,
+        ];
+        check_sort_order(signed, SortOrder::SIGNED);
+
+        // Undefined comparison
+        let undefined = vec![
+            LogicalType::LIST,
+            LogicalType::MAP,
+            LogicalType::MAP_KEY_VALUE,
+        ];
+        check_sort_order(undefined, SortOrder::UNDEFINED);
+
+        // Check None logical type
+        // This should return a sort order for byte array type.
+        check_sort_order(vec![LogicalType::NONE], SortOrder::UNSIGNED);
+    }
+
+    #[test]
+    fn test_column_order_get_default_sort_order() {
+        // Comparison based on physical type
+        assert_eq!(
+            ColumnOrder::get_default_sort_order(Type::BOOLEAN),
+            SortOrder::UNSIGNED
+        );
+        assert_eq!(
+            ColumnOrder::get_default_sort_order(Type::INT32),
+            SortOrder::SIGNED
+        );
+        assert_eq!(
+            ColumnOrder::get_default_sort_order(Type::INT64),
+            SortOrder::SIGNED
+        );
+        assert_eq!(
+            ColumnOrder::get_default_sort_order(Type::INT96),
+            SortOrder::UNDEFINED
+        );
+        assert_eq!(
+            ColumnOrder::get_default_sort_order(Type::FLOAT),
+            SortOrder::SIGNED
+        );
+        assert_eq!(
+            ColumnOrder::get_default_sort_order(Type::DOUBLE),
+            SortOrder::SIGNED
+        );
+        assert_eq!(
+            ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
+            SortOrder::UNSIGNED
+        );
+        assert_eq!(
+            ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
+            SortOrder::UNSIGNED
+        );
+    }
+
+    #[test]
+    fn test_column_order_sort_order() {
+        assert_eq!(
+            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
+            SortOrder::SIGNED
+        );
+        assert_eq!(
+            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
+            SortOrder::UNSIGNED
+        );
+        assert_eq!(
+            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
+            SortOrder::UNDEFINED
+        );
+        assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
+    }
+}
diff --git a/rust/parquet/src/bin/parquet-read.rs b/rust/parquet/src/bin/parquet-read.rs
new file mode 100644
index 0000000000000..c86b26e3e7a4d
--- /dev/null
+++ b/rust/parquet/src/bin/parquet-read.rs
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Binary file to read data from a Parquet file.
+//!
+//! # Install
+//!
+//! `parquet-read` can be installed using `cargo`:
+//! ```
+//! cargo install parquet
+//! ```
+//! After this `parquet-read` should be globally available:
+//! ```
+//! parquet-read XYZ.parquet
+//! ```
+//!
+//! The binary can also be built from the source code and run as follows:
+//! ```
+//! cargo run --bin parquet-read XYZ.parquet
+//! ```
+//!
+//! # Usage
+//!
+//! ```
+//! parquet-read <file-path> [num-records]
+//! ```
+//! where `file-path` is the path to a Parquet file and `num-records` is the optional
+//! numeric option that allows to specify number of records to read from a file.
+//! When not provided, all records are read.
+//!
+//! Note that `parquet-read` reads full file schema, no projection or filtering is
+//! applied.
+
+extern crate parquet;
+
+use std::{env, fs::File, path::Path, process};
+
+use parquet::file::reader::{FileReader, SerializedFileReader};
+
+fn main() {
+    let args: Vec<String> = env::args().collect();
+    if args.len() != 2 && args.len() != 3 {
+        println!("Usage: parquet-read <file-path> [num-records]");
+        process::exit(1);
+    }
+
+    let mut num_records: Option<usize> = None;
+    if args.len() == 3 {
+        match args[2].parse() {
+            Ok(value) => num_records = Some(value),
+            Err(e) => panic!("Error when reading value for [num-records], {}", e),
+        }
+    }
+
+    let path = Path::new(&args[1]);
+    let file = File::open(&path).unwrap();
+    let parquet_reader = SerializedFileReader::new(file).unwrap();
+
+    // Use full schema as projected schema
+    let mut iter = parquet_reader.get_row_iter(None).unwrap();
+
+    let mut start = 0;
+    let end = num_records.unwrap_or(0);
+    let all_records = num_records.is_none();
+
+    while all_records || start < end {
+        match iter.next() {
+            Some(row) => println!("{}", row),
+            None => break,
+        }
+        start += 1;
+    }
+}
diff --git a/rust/parquet/src/bin/parquet-schema.rs b/rust/parquet/src/bin/parquet-schema.rs
new file mode 100644
index 0000000000000..2eaf7652ae9d6
--- /dev/null
+++ b/rust/parquet/src/bin/parquet-schema.rs
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Binary file to print the schema and metadata of a Parquet file.
+//!
+//! # Install
+//!
+//! `parquet-schema` can be installed using `cargo`:
+//! ```
+//! cargo install parquet
+//! ```
+//! After this `parquet-schema` should be globally available:
+//! ```
+//! parquet-schema XYZ.parquet
+//! ```
+//!
+//! The binary can also be built from the source code and run as follows:
+//! ```
+//! cargo run --bin parquet-schema XYZ.parquet
+//! ```
+//!
+//! # Usage
+//!
+//! ```
+//! parquet-schema <file-path> [verbose]
+//! ```
+//! where `file-path` is the path to a Parquet file and `verbose` is the optional boolean
+//! flag that allows to print schema only, when set to `false` (default behaviour when
+//! not provided), or print full file metadata, when set to `true`.
+
+extern crate parquet;
+
+use std::{env, fs::File, path::Path, process};
+
+use parquet::{
+    file::reader::{FileReader, SerializedFileReader},
+    schema::printer::{print_file_metadata, print_parquet_metadata},
+};
+
+fn main() {
+    let args: Vec<String> = env::args().collect();
+    if args.len() != 2 && args.len() != 3 {
+        println!("Usage: parquet-schema <file-path> [verbose]");
+        process::exit(1);
+    }
+    let path = Path::new(&args[1]);
+    let mut verbose = false;
+    if args.len() == 3 {
+        match args[2].parse() {
+            Ok(b) => verbose = b,
+            Err(e) => panic!(
+                "Error when reading value for [verbose] (expected either 'true' or 'false'): {}",
+                e
+            ),
+        }
+    }
+    let file = match File::open(&path) {
+        Err(e) => panic!("Error when opening file {}: {}", path.display(), e),
+        Ok(f) => f,
+    };
+    match SerializedFileReader::new(file) {
+        Err(e) => panic!("Error when parsing Parquet file: {}", e),
+        Ok(parquet_reader) => {
+            let metadata = parquet_reader.metadata();
+            println!("Metadata for file: {}", &args[1]);
+            println!("");
+            if verbose {
+                print_parquet_metadata(&mut std::io::stdout(), &metadata);
+            } else {
+                print_file_metadata(&mut std::io::stdout(), &metadata.file_metadata());
+            }
+        }
+    }
+}
diff --git a/rust/parquet/src/column/mod.rs b/rust/parquet/src/column/mod.rs
new file mode 100644
index 0000000000000..9a72199d940f2
--- /dev/null
+++ b/rust/parquet/src/column/mod.rs
@@ -0,0 +1,124 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Low level column reader and writer APIs.
+//!
+//! This API is designed for reading and writing column values, definition and repetition
+//! levels directly.
+//!
+//! # Example of writing and reading data
+//!
+//! Data has the following format:
+//! ```text
+//! +---------------+
+//! |         values|
+//! +---------------+
+//! |[1, 2]         |
+//! |[3, null, null]|
+//! +---------------+
+//! ```
+//!
+//! The example uses column writer and reader APIs to write raw values, definition and
+//! repetition levels and read them to verify write/read correctness.
+//!
+//! ```rust,no_run
+//! use std::{fs, path::Path, rc::Rc};
+//!
+//! use parquet::{
+//!     column::{reader::ColumnReader, writer::ColumnWriter},
+//!     file::{
+//!         properties::WriterProperties,
+//!         reader::{FileReader, SerializedFileReader},
+//!         writer::{FileWriter, SerializedFileWriter},
+//!     },
+//!     schema::parser::parse_message_type,
+//! };
+//!
+//! let path = Path::new("/path/to/column_sample.parquet");
+//!
+//! // Writing data using column writer API.
+//!
+//! let message_type = "
+//!   message schema {
+//!     optional group values (LIST) {
+//!       repeated group list {
+//!         optional INT32 element;
+//!       }
+//!     }
+//!   }
+//! ";
+//! let schema = Rc::new(parse_message_type(message_type).unwrap());
+//! let props = Rc::new(WriterProperties::builder().build());
+//! let file = fs::File::create(path).unwrap();
+//! let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
+//! let mut row_group_writer = writer.next_row_group().unwrap();
+//! while let Some(mut col_writer) = row_group_writer.next_column().unwrap() {
+//!     match col_writer {
+//!         // You can also use `get_typed_column_writer` method to extract typed writer.
+//!         ColumnWriter::Int32ColumnWriter(ref mut typed_writer) => {
+//!             typed_writer
+//!                 .write_batch(&[1, 2, 3], Some(&[3, 3, 3, 2, 2]), Some(&[0, 1, 0, 1, 1]))
+//!                 .unwrap();
+//!         }
+//!         _ => {}
+//!     }
+//!     row_group_writer.close_column(col_writer).unwrap();
+//! }
+//! writer.close_row_group(row_group_writer).unwrap();
+//! writer.close().unwrap();
+//!
+//! // Reading data using column reader API.
+//!
+//! let file = fs::File::open(path).unwrap();
+//! let reader = SerializedFileReader::new(file).unwrap();
+//! let metadata = reader.metadata();
+//!
+//! let mut res = Ok((0, 0));
+//! let mut values = vec![0; 8];
+//! let mut def_levels = vec![0; 8];
+//! let mut rep_levels = vec![0; 8];
+//!
+//! for i in 0..metadata.num_row_groups() {
+//!     let row_group_reader = reader.get_row_group(i).unwrap();
+//!     let row_group_metadata = metadata.row_group(i);
+//!
+//!     for j in 0..row_group_metadata.num_columns() {
+//!         let mut column_reader = row_group_reader.get_column_reader(j).unwrap();
+//!         match column_reader {
+//!             // You can also use `get_typed_column_reader` method to extract typed reader.
+//!             ColumnReader::Int32ColumnReader(ref mut typed_reader) => {
+//!                 res = typed_reader.read_batch(
+//!                     8, // batch size
+//!                     Some(&mut def_levels),
+//!                     Some(&mut rep_levels),
+//!                     &mut values,
+//!                 );
+//!             }
+//!             _ => {}
+//!         }
+//!     }
+//! }
+//!
+//! assert_eq!(res, Ok((3, 5)));
+//! assert_eq!(values, vec![1, 2, 3, 0, 0, 0, 0, 0]);
+//! assert_eq!(def_levels, vec![3, 3, 3, 2, 2, 0, 0, 0]);
+//! assert_eq!(rep_levels, vec![0, 1, 0, 1, 1, 0, 0, 0]);
+//! ```
+
+pub mod page;
+pub mod reader;
+pub mod writer;
diff --git a/rust/parquet/src/column/page.rs b/rust/parquet/src/column/page.rs
new file mode 100644
index 0000000000000..9e0c76fb83cbd
--- /dev/null
+++ b/rust/parquet/src/column/page.rs
@@ -0,0 +1,296 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Contains Parquet Page definitions and page reader interface.
+
+use crate::basic::{Encoding, PageType};
+use crate::errors::Result;
+use crate::file::{metadata::ColumnChunkMetaData, statistics::Statistics};
+use crate::util::memory::ByteBufferPtr;
+
+/// Parquet Page definition.
+///
+/// List of supported pages.
+/// These are 1-to-1 mapped from the equivalent Thrift definitions, except `buf` which
+/// used to store uncompressed bytes of the page.
+pub enum Page {
+    DataPage {
+        buf: ByteBufferPtr,
+        num_values: u32,
+        encoding: Encoding,
+        def_level_encoding: Encoding,
+        rep_level_encoding: Encoding,
+        statistics: Option<Statistics>,
+    },
+    DataPageV2 {
+        buf: ByteBufferPtr,
+        num_values: u32,
+        encoding: Encoding,
+        num_nulls: u32,
+        num_rows: u32,
+        def_levels_byte_len: u32,
+        rep_levels_byte_len: u32,
+        is_compressed: bool,
+        statistics: Option<Statistics>,
+    },
+    DictionaryPage {
+        buf: ByteBufferPtr,
+        num_values: u32,
+        encoding: Encoding,
+        is_sorted: bool,
+    },
+}
+
+impl Page {
+    /// Returns [`PageType`](`::basic::PageType`) for this page.
+    pub fn page_type(&self) -> PageType {
+        match self {
+            &Page::DataPage { .. } => PageType::DATA_PAGE,
+            &Page::DataPageV2 { .. } => PageType::DATA_PAGE_V2,
+            &Page::DictionaryPage { .. } => PageType::DICTIONARY_PAGE,
+        }
+    }
+
+    /// Returns internal byte buffer reference for this page.
+    pub fn buffer(&self) -> &ByteBufferPtr {
+        match self {
+            &Page::DataPage { ref buf, .. } => &buf,
+            &Page::DataPageV2 { ref buf, .. } => &buf,
+            &Page::DictionaryPage { ref buf, .. } => &buf,
+        }
+    }
+
+    /// Returns number of values in this page.
+    pub fn num_values(&self) -> u32 {
+        match self {
+            &Page::DataPage { num_values, .. } => num_values,
+            &Page::DataPageV2 { num_values, .. } => num_values,
+            &Page::DictionaryPage { num_values, .. } => num_values,
+        }
+    }
+
+    /// Returns this page [`Encoding`](`::basic::Encoding`).
+    pub fn encoding(&self) -> Encoding {
+        match self {
+            &Page::DataPage { encoding, .. } => encoding,
+            &Page::DataPageV2 { encoding, .. } => encoding,
+            &Page::DictionaryPage { encoding, .. } => encoding,
+        }
+    }
+
+    /// Returns optional [`Statistics`](`::file::metadata::Statistics`).
+    pub fn statistics(&self) -> Option<&Statistics> {
+        match self {
+            &Page::DataPage { ref statistics, .. } => statistics.as_ref(),
+            &Page::DataPageV2 { ref statistics, .. } => statistics.as_ref(),
+            &Page::DictionaryPage { .. } => None,
+        }
+    }
+}
+
+/// Helper struct to represent pages with potentially compressed buffer (data page v1) or
+/// compressed and concatenated buffer (def levels + rep levels + compressed values for
+/// data page v2).
+///
+/// The difference with `Page` is that `Page` buffer is always uncompressed.
+pub struct CompressedPage {
+    compressed_page: Page,
+    uncompressed_size: usize,
+}
+
+impl CompressedPage {
+    /// Creates `CompressedPage` from a page with potentially compressed buffer and
+    /// uncompressed size.
+    pub fn new(compressed_page: Page, uncompressed_size: usize) -> Self {
+        Self {
+            compressed_page,
+            uncompressed_size,
+        }
+    }
+
+    /// Returns page type.
+    pub fn page_type(&self) -> PageType {
+        self.compressed_page.page_type()
+    }
+
+    /// Returns underlying page with potentially compressed buffer.
+    pub fn compressed_page(&self) -> &Page {
+        &self.compressed_page
+    }
+
+    /// Returns uncompressed size in bytes.
+    pub fn uncompressed_size(&self) -> usize {
+        self.uncompressed_size
+    }
+
+    /// Returns compressed size in bytes.
+    ///
+    /// Note that it is assumed that buffer is compressed, but it may not be. In this
+    /// case compressed size will be equal to uncompressed size.
+    pub fn compressed_size(&self) -> usize {
+        self.compressed_page.buffer().len()
+    }
+
+    /// Number of values in page.
+    pub fn num_values(&self) -> u32 {
+        self.compressed_page.num_values()
+    }
+
+    /// Returns encoding for values in page.
+    pub fn encoding(&self) -> Encoding {
+        self.compressed_page.encoding()
+    }
+
+    /// Returns slice of compressed buffer in the page.
+    pub fn data(&self) -> &[u8] {
+        self.compressed_page.buffer().data()
+    }
+}
+
+/// Contains page write metrics.
+pub struct PageWriteSpec {
+    pub page_type: PageType,
+    pub uncompressed_size: usize,
+    pub compressed_size: usize,
+    pub num_values: u32,
+    pub offset: u64,
+    pub bytes_written: u64,
+}
+
+impl PageWriteSpec {
+    /// Creates new spec with default page write metrics.
+    pub fn new() -> Self {
+        Self {
+            page_type: PageType::DATA_PAGE,
+            uncompressed_size: 0,
+            compressed_size: 0,
+            num_values: 0,
+            offset: 0,
+            bytes_written: 0,
+        }
+    }
+}
+
+/// API for reading pages from a column chunk.
+/// This offers a iterator like API to get the next page.
+pub trait PageReader {
+    /// Gets the next page in the column chunk associated with this reader.
+    /// Returns `None` if there are no pages left.
+    fn get_next_page(&mut self) -> Result<Option<Page>>;
+}
+
+/// API for writing pages in a column chunk.
+///
+/// It is reasonable to assume that all pages will be written in the correct order, e.g.
+/// dictionary page followed by data pages, or a set of data pages, etc.
+pub trait PageWriter {
+    /// Writes a page into the output stream/sink.
+    /// Returns `PageWriteSpec` that contains information about written page metrics,
+    /// including number of bytes, size, number of values, offset, etc.
+    ///
+    /// This method is called for every compressed page we write into underlying buffer,
+    /// either data page or dictionary page.
+    fn write_page(&mut self, page: CompressedPage) -> Result<PageWriteSpec>;
+
+    /// Writes column chunk metadata into the output stream/sink.
+    ///
+    /// This method is called once before page writer is closed, normally when writes are
+    /// finalised in column writer.
+    fn write_metadata(&mut self, metadata: &ColumnChunkMetaData) -> Result<()>;
+
+    /// Closes resources and flushes underlying sink.
+    /// Page writer should not be used after this method is called.
+    fn close(&mut self) -> Result<()>;
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_page() {
+        let data_page = Page::DataPage {
+            buf: ByteBufferPtr::new(vec![0, 1, 2]),
+            num_values: 10,
+            encoding: Encoding::PLAIN,
+            def_level_encoding: Encoding::RLE,
+            rep_level_encoding: Encoding::RLE,
+            statistics: Some(Statistics::int32(Some(1), Some(2), None, 1, true)),
+        };
+        assert_eq!(data_page.page_type(), PageType::DATA_PAGE);
+        assert_eq!(data_page.buffer().data(), vec![0, 1, 2].as_slice());
+        assert_eq!(data_page.num_values(), 10);
+        assert_eq!(data_page.encoding(), Encoding::PLAIN);
+        assert_eq!(
+            data_page.statistics(),
+            Some(&Statistics::int32(Some(1), Some(2), None, 1, true))
+        );
+
+        let data_page_v2 = Page::DataPageV2 {
+            buf: ByteBufferPtr::new(vec![0, 1, 2]),
+            num_values: 10,
+            encoding: Encoding::PLAIN,
+            num_nulls: 5,
+            num_rows: 20,
+            def_levels_byte_len: 30,
+            rep_levels_byte_len: 40,
+            is_compressed: false,
+            statistics: Some(Statistics::int32(Some(1), Some(2), None, 1, true)),
+        };
+        assert_eq!(data_page_v2.page_type(), PageType::DATA_PAGE_V2);
+        assert_eq!(data_page_v2.buffer().data(), vec![0, 1, 2].as_slice());
+        assert_eq!(data_page_v2.num_values(), 10);
+        assert_eq!(data_page_v2.encoding(), Encoding::PLAIN);
+        assert_eq!(
+            data_page_v2.statistics(),
+            Some(&Statistics::int32(Some(1), Some(2), None, 1, true))
+        );
+
+        let dict_page = Page::DictionaryPage {
+            buf: ByteBufferPtr::new(vec![0, 1, 2]),
+            num_values: 10,
+            encoding: Encoding::PLAIN,
+            is_sorted: false,
+        };
+        assert_eq!(dict_page.page_type(), PageType::DICTIONARY_PAGE);
+        assert_eq!(dict_page.buffer().data(), vec![0, 1, 2].as_slice());
+        assert_eq!(dict_page.num_values(), 10);
+        assert_eq!(dict_page.encoding(), Encoding::PLAIN);
+        assert_eq!(dict_page.statistics(), None);
+    }
+
+    #[test]
+    fn test_compressed_page() {
+        let data_page = Page::DataPage {
+            buf: ByteBufferPtr::new(vec![0, 1, 2]),
+            num_values: 10,
+            encoding: Encoding::PLAIN,
+            def_level_encoding: Encoding::RLE,
+            rep_level_encoding: Encoding::RLE,
+            statistics: Some(Statistics::int32(Some(1), Some(2), None, 1, true)),
+        };
+
+        let cpage = CompressedPage::new(data_page, 5);
+
+        assert_eq!(cpage.page_type(), PageType::DATA_PAGE);
+        assert_eq!(cpage.uncompressed_size(), 5);
+        assert_eq!(cpage.compressed_size(), 3);
+        assert_eq!(cpage.num_values(), 10);
+        assert_eq!(cpage.encoding(), Encoding::PLAIN);
+        assert_eq!(cpage.data(), &[0, 1, 2]);
+    }
+}
diff --git a/rust/parquet/src/column/reader.rs b/rust/parquet/src/column/reader.rs
new file mode 100644
index 0000000000000..625dbd260eb46
--- /dev/null
+++ b/rust/parquet/src/column/reader.rs
@@ -0,0 +1,1610 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Contains column reader API.
+
+use std::{
+    cmp::{max, min},
+    collections::HashMap,
+    mem,
+};
+
+use super::page::{Page, PageReader};
+use crate::basic::*;
+use crate::data_type::*;
+use crate::encodings::{
+    decoding::{get_decoder, Decoder, DictDecoder, PlainDecoder},
+    levels::LevelDecoder,
+};
+use crate::errors::{ParquetError, Result};
+use crate::schema::types::ColumnDescPtr;
+use crate::util::memory::ByteBufferPtr;
+
+/// Column reader for a Parquet type.
+pub enum ColumnReader {
+    BoolColumnReader(ColumnReaderImpl<BoolType>),
+    Int32ColumnReader(ColumnReaderImpl<Int32Type>),
+    Int64ColumnReader(ColumnReaderImpl<Int64Type>),
+    Int96ColumnReader(ColumnReaderImpl<Int96Type>),
+    FloatColumnReader(ColumnReaderImpl<FloatType>),
+    DoubleColumnReader(ColumnReaderImpl<DoubleType>),
+    ByteArrayColumnReader(ColumnReaderImpl<ByteArrayType>),
+    FixedLenByteArrayColumnReader(ColumnReaderImpl<FixedLenByteArrayType>),
+}
+
+/// Gets a specific column reader corresponding to column descriptor `col_descr`. The
+/// column reader will read from pages in `col_page_reader`.
+pub fn get_column_reader(
+    col_descr: ColumnDescPtr,
+    col_page_reader: Box<PageReader>,
+) -> ColumnReader {
+    match col_descr.physical_type() {
+        Type::BOOLEAN => ColumnReader::BoolColumnReader(ColumnReaderImpl::new(
+            col_descr,
+            col_page_reader,
+        )),
+        Type::INT32 => ColumnReader::Int32ColumnReader(ColumnReaderImpl::new(
+            col_descr,
+            col_page_reader,
+        )),
+        Type::INT64 => ColumnReader::Int64ColumnReader(ColumnReaderImpl::new(
+            col_descr,
+            col_page_reader,
+        )),
+        Type::INT96 => ColumnReader::Int96ColumnReader(ColumnReaderImpl::new(
+            col_descr,
+            col_page_reader,
+        )),
+        Type::FLOAT => ColumnReader::FloatColumnReader(ColumnReaderImpl::new(
+            col_descr,
+            col_page_reader,
+        )),
+        Type::DOUBLE => ColumnReader::DoubleColumnReader(ColumnReaderImpl::new(
+            col_descr,
+            col_page_reader,
+        )),
+        Type::BYTE_ARRAY => ColumnReader::ByteArrayColumnReader(ColumnReaderImpl::new(
+            col_descr,
+            col_page_reader,
+        )),
+        Type::FIXED_LEN_BYTE_ARRAY => ColumnReader::FixedLenByteArrayColumnReader(
+            ColumnReaderImpl::new(col_descr, col_page_reader),
+        ),
+    }
+}
+
+/// Gets a typed column reader for the specific type `T`, by "up-casting" `col_reader` of
+/// non-generic type to a generic column reader type `ColumnReaderImpl`.
+///
+/// NOTE: the caller MUST guarantee that the actual enum value for `col_reader` matches
+/// the type `T`. Otherwise, disastrous consequence could happen.
+pub fn get_typed_column_reader<T: DataType>(
+    col_reader: ColumnReader,
+) -> ColumnReaderImpl<T> {
+    match col_reader {
+        ColumnReader::BoolColumnReader(r) => unsafe { mem::transmute(r) },
+        ColumnReader::Int32ColumnReader(r) => unsafe { mem::transmute(r) },
+        ColumnReader::Int64ColumnReader(r) => unsafe { mem::transmute(r) },
+        ColumnReader::Int96ColumnReader(r) => unsafe { mem::transmute(r) },
+        ColumnReader::FloatColumnReader(r) => unsafe { mem::transmute(r) },
+        ColumnReader::DoubleColumnReader(r) => unsafe { mem::transmute(r) },
+        ColumnReader::ByteArrayColumnReader(r) => unsafe { mem::transmute(r) },
+        ColumnReader::FixedLenByteArrayColumnReader(r) => unsafe { mem::transmute(r) },
+    }
+}
+
+/// Typed value reader for a particular primitive column.
+pub struct ColumnReaderImpl<T: DataType> {
+    descr: ColumnDescPtr,
+    def_level_decoder: Option<LevelDecoder>,
+    rep_level_decoder: Option<LevelDecoder>,
+    page_reader: Box<PageReader>,
+    current_encoding: Option<Encoding>,
+
+    // The total number of values stored in the data page.
+    num_buffered_values: u32,
+
+    // The number of values from the current data page that has been decoded into memory
+    // so far.
+    num_decoded_values: u32,
+
+    // Cache of decoders for existing encodings
+    decoders: HashMap<Encoding, Box<Decoder<T>>>,
+}
+
+impl<T: DataType> ColumnReaderImpl<T> {
+    /// Creates new column reader based on column descriptor and page reader.
+    pub fn new(descr: ColumnDescPtr, page_reader: Box<PageReader>) -> Self {
+        Self {
+            descr,
+            def_level_decoder: None,
+            rep_level_decoder: None,
+            page_reader,
+            current_encoding: None,
+            num_buffered_values: 0,
+            num_decoded_values: 0,
+            decoders: HashMap::new(),
+        }
+    }
+
+    /// Reads a batch of values of at most `batch_size`.
+    ///
+    /// This will try to read from the row group, and fills up at most `batch_size` values
+    /// for `def_levels`, `rep_levels` and `values`. It will stop either when the row
+    /// group is depleted or `batch_size` values has been read, or there is no space
+    /// in the input slices (values/definition levels/repetition levels).
+    ///
+    /// Note that in case the field being read is not required, `values` could contain
+    /// less values than `def_levels`. Also note that this will skip reading def / rep
+    /// levels if the field is required / not repeated, respectively.
+    ///
+    /// If `def_levels` or `rep_levels` is `None`, this will also skip reading the
+    /// respective levels. This is useful when the caller of this function knows in
+    /// advance that the field is required and non-repeated, therefore can avoid
+    /// allocating memory for the levels data. Note that if field has definition
+    /// levels, but caller provides None, there might be inconsistency between
+    /// levels/values (see comments below).
+    ///
+    /// Returns a tuple where the first element is the actual number of values read,
+    /// and the second element is the actual number of levels read.
+    #[inline]
+    pub fn read_batch(
+        &mut self,
+        batch_size: usize,
+        mut def_levels: Option<&mut [i16]>,
+        mut rep_levels: Option<&mut [i16]>,
+        values: &mut [T::T],
+    ) -> Result<(usize, usize)> {
+        let mut values_read = 0;
+        let mut levels_read = 0;
+
+        // Compute the smallest batch size we can read based on provided slices
+        let mut batch_size = min(batch_size, values.len());
+        if let Some(ref levels) = def_levels {
+            batch_size = min(batch_size, levels.len());
+        }
+        if let Some(ref levels) = rep_levels {
+            batch_size = min(batch_size, levels.len());
+        }
+
+        // Read exhaustively all pages until we read all batch_size values/levels
+        // or there are no more values/levels to read.
+        while max(values_read, levels_read) < batch_size {
+            if !self.has_next()? {
+                break;
+            }
+
+            // Batch size for the current iteration
+            let iter_batch_size = {
+                // Compute approximate value based on values decoded so far
+                let mut adjusted_size = min(
+                    batch_size,
+                    (self.num_buffered_values - self.num_decoded_values) as usize,
+                );
+
+                // Adjust batch size by taking into account how much space is left in
+                // values slice or levels slices (if available)
+                adjusted_size = min(adjusted_size, values.len() - values_read);
+                if let Some(ref levels) = def_levels {
+                    adjusted_size = min(adjusted_size, levels.len() - levels_read);
+                }
+                if let Some(ref levels) = rep_levels {
+                    adjusted_size = min(adjusted_size, levels.len() - levels_read);
+                }
+
+                adjusted_size
+            };
+
+            let mut values_to_read = 0;
+            let mut num_def_levels = 0;
+            let mut num_rep_levels = 0;
+
+            // If the field is required and non-repeated, there are no definition levels
+            if self.descr.max_def_level() > 0 && def_levels.as_ref().is_some() {
+                if let Some(ref mut levels) = def_levels {
+                    num_def_levels = self.read_def_levels(
+                        &mut levels[levels_read..levels_read + iter_batch_size],
+                    )?;
+                    for i in levels_read..levels_read + num_def_levels {
+                        if levels[i] == self.descr.max_def_level() {
+                            values_to_read += 1;
+                        }
+                    }
+                }
+            } else {
+                // If max definition level == 0, then it is REQUIRED field, read all
+                // values. If definition levels are not provided, we still
+                // read all values.
+                values_to_read = iter_batch_size;
+            }
+
+            if self.descr.max_rep_level() > 0 && rep_levels.is_some() {
+                if let Some(ref mut levels) = rep_levels {
+                    num_rep_levels = self.read_rep_levels(
+                        &mut levels[levels_read..levels_read + iter_batch_size],
+                    )?;
+
+                    // If definition levels are defined, check that rep levels == def
+                    // levels
+                    if def_levels.is_some() {
+                        assert_eq!(
+                            num_def_levels, num_rep_levels,
+                            "Number of decoded rep / def levels did not match"
+                        );
+                    }
+                }
+            }
+
+            // At this point we have read values, definition and repetition levels.
+            // If both definition and repetition levels are defined, their counts
+            // should be equal. Values count is always less or equal to definition levels.
+            //
+            // Note that if field is not required, but no definition levels are provided,
+            // we would read values of batch size and (if provided, of course) repetition
+            // levels of batch size - [!] they will not be synced, because only definition
+            // levels enforce number of non-null values to read.
+
+            let curr_values_read =
+                self.read_values(&mut values[values_read..values_read + values_to_read])?;
+
+            // Update all "return" counters and internal state.
+
+            // This is to account for when def or rep levels are not provided
+            let curr_levels_read = max(num_def_levels, num_rep_levels);
+            self.num_decoded_values += max(curr_levels_read, curr_values_read) as u32;
+            levels_read += curr_levels_read;
+            values_read += curr_values_read;
+        }
+
+        Ok((values_read, levels_read))
+    }
+
+    /// Reads a new page and set up the decoders for levels, values or dictionary.
+    /// Returns false if there's no page left.
+    fn read_new_page(&mut self) -> Result<bool> {
+        #[allow(while_true)]
+        while true {
+            match self.page_reader.get_next_page()? {
+                // No more page to read
+                None => return Ok(false),
+                Some(current_page) => {
+                    match current_page {
+                        // 1. Dictionary page: configure dictionary for this page.
+                        p @ Page::DictionaryPage { .. } => {
+                            self.configure_dictionary(p)?;
+                            continue;
+                        }
+                        // 2. Data page v1
+                        Page::DataPage {
+                            buf,
+                            num_values,
+                            encoding,
+                            def_level_encoding,
+                            rep_level_encoding,
+                            statistics: _,
+                        } => {
+                            self.num_buffered_values = num_values;
+                            self.num_decoded_values = 0;
+
+                            let mut buffer_ptr = buf;
+
+                            if self.descr.max_rep_level() > 0 {
+                                let mut rep_decoder = LevelDecoder::v1(
+                                    rep_level_encoding,
+                                    self.descr.max_rep_level(),
+                                );
+                                let total_bytes = rep_decoder.set_data(
+                                    self.num_buffered_values as usize,
+                                    buffer_ptr.all(),
+                                );
+                                buffer_ptr = buffer_ptr.start_from(total_bytes);
+                                self.rep_level_decoder = Some(rep_decoder);
+                            }
+
+                            if self.descr.max_def_level() > 0 {
+                                let mut def_decoder = LevelDecoder::v1(
+                                    def_level_encoding,
+                                    self.descr.max_def_level(),
+                                );
+                                let total_bytes = def_decoder.set_data(
+                                    self.num_buffered_values as usize,
+                                    buffer_ptr.all(),
+                                );
+                                buffer_ptr = buffer_ptr.start_from(total_bytes);
+                                self.def_level_decoder = Some(def_decoder);
+                            }
+
+                            // Data page v1 does not have offset, all content of buffer
+                            // should be passed
+                            self.set_current_page_encoding(
+                                encoding,
+                                &buffer_ptr,
+                                0,
+                                num_values as usize,
+                            )?;
+                            return Ok(true);
+                        }
+                        // 3. Data page v2
+                        Page::DataPageV2 {
+                            buf,
+                            num_values,
+                            encoding,
+                            num_nulls: _,
+                            num_rows: _,
+                            def_levels_byte_len,
+                            rep_levels_byte_len,
+                            is_compressed: _,
+                            statistics: _,
+                        } => {
+                            self.num_buffered_values = num_values;
+                            self.num_decoded_values = 0;
+
+                            let mut offset = 0;
+
+                            // DataPage v2 only supports RLE encoding for repetition
+                            // levels
+                            if self.descr.max_rep_level() > 0 {
+                                let mut rep_decoder =
+                                    LevelDecoder::v2(self.descr.max_rep_level());
+                                let bytes_read = rep_decoder.set_data_range(
+                                    self.num_buffered_values as usize,
+                                    &buf,
+                                    offset,
+                                    rep_levels_byte_len as usize,
+                                );
+                                offset += bytes_read;
+                                self.rep_level_decoder = Some(rep_decoder);
+                            }
+
+                            // DataPage v2 only supports RLE encoding for definition
+                            // levels
+                            if self.descr.max_def_level() > 0 {
+                                let mut def_decoder =
+                                    LevelDecoder::v2(self.descr.max_def_level());
+                                let bytes_read = def_decoder.set_data_range(
+                                    self.num_buffered_values as usize,
+                                    &buf,
+                                    offset,
+                                    def_levels_byte_len as usize,
+                                );
+                                offset += bytes_read;
+                                self.def_level_decoder = Some(def_decoder);
+                            }
+
+                            self.set_current_page_encoding(
+                                encoding,
+                                &buf,
+                                offset,
+                                num_values as usize,
+                            )?;
+                            return Ok(true);
+                        }
+                    };
+                }
+            }
+        }
+
+        Ok(true)
+    }
+
+    /// Resolves and updates encoding and set decoder for the current page
+    fn set_current_page_encoding(
+        &mut self,
+        mut encoding: Encoding,
+        buffer_ptr: &ByteBufferPtr,
+        offset: usize,
+        len: usize,
+    ) -> Result<()> {
+        if encoding == Encoding::PLAIN_DICTIONARY {
+            encoding = Encoding::RLE_DICTIONARY;
+        }
+
+        let decoder = if encoding == Encoding::RLE_DICTIONARY {
+            self.decoders
+                .get_mut(&encoding)
+                .expect("Decoder for dict should have been set")
+        } else {
+            // Search cache for data page decoder
+            if !self.decoders.contains_key(&encoding) {
+                // Initialize decoder for this page
+                let data_decoder = get_decoder::<T>(self.descr.clone(), encoding)?;
+                self.decoders.insert(encoding, data_decoder);
+            }
+            self.decoders.get_mut(&encoding).unwrap()
+        };
+
+        decoder.set_data(buffer_ptr.start_from(offset), len as usize)?;
+        self.current_encoding = Some(encoding);
+        Ok(())
+    }
+
+    #[inline]
+    fn has_next(&mut self) -> Result<bool> {
+        if self.num_buffered_values == 0
+            || self.num_buffered_values == self.num_decoded_values
+        {
+            // TODO: should we return false if read_new_page() = true and
+            // num_buffered_values = 0?
+            if !self.read_new_page()? {
+                Ok(false)
+            } else {
+                Ok(self.num_buffered_values != 0)
+            }
+        } else {
+            Ok(true)
+        }
+    }
+
+    #[inline]
+    fn read_rep_levels(&mut self, buffer: &mut [i16]) -> Result<usize> {
+        let level_decoder = self
+            .rep_level_decoder
+            .as_mut()
+            .expect("rep_level_decoder be set");
+        level_decoder.get(buffer)
+    }
+
+    #[inline]
+    fn read_def_levels(&mut self, buffer: &mut [i16]) -> Result<usize> {
+        let level_decoder = self
+            .def_level_decoder
+            .as_mut()
+            .expect("def_level_decoder be set");
+        level_decoder.get(buffer)
+    }
+
+    #[inline]
+    fn read_values(&mut self, buffer: &mut [T::T]) -> Result<usize> {
+        let encoding = self
+            .current_encoding
+            .expect("current_encoding should be set");
+        let current_decoder = self
+            .decoders
+            .get_mut(&encoding)
+            .expect(format!("decoder for encoding {} should be set", encoding).as_str());
+        current_decoder.get(buffer)
+    }
+
+    #[inline]
+    fn configure_dictionary(&mut self, page: Page) -> Result<bool> {
+        let mut encoding = page.encoding();
+        if encoding == Encoding::PLAIN || encoding == Encoding::PLAIN_DICTIONARY {
+            encoding = Encoding::RLE_DICTIONARY
+        }
+
+        if self.decoders.contains_key(&encoding) {
+            return Err(general_err!("Column cannot have more than one dictionary"));
+        }
+
+        if encoding == Encoding::RLE_DICTIONARY {
+            let mut dictionary = PlainDecoder::<T>::new(self.descr.type_length());
+            let num_values = page.num_values();
+            dictionary.set_data(page.buffer().clone(), num_values as usize)?;
+
+            let mut decoder = DictDecoder::new();
+            decoder.set_dict(Box::new(dictionary))?;
+            self.decoders.insert(encoding, Box::new(decoder));
+            Ok(true)
+        } else {
+            Err(nyi_err!(
+                "Invalid/Unsupported encoding type for dictionary: {}",
+                encoding
+            ))
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use rand::distributions::range::SampleRange;
+    use std::{collections::VecDeque, rc::Rc, vec::IntoIter};
+
+    use crate::basic::Type as PhysicalType;
+    use crate::column::page::Page;
+    use crate::encodings::{
+        encoding::{get_encoder, DictEncoder, Encoder},
+        levels::{max_buffer_size, LevelEncoder},
+    };
+    use crate::schema::types::{ColumnDescriptor, ColumnPath, Type as SchemaType};
+    use crate::util::{
+        memory::{ByteBufferPtr, MemTracker, MemTrackerPtr},
+        test_common::random_numbers_range,
+    };
+
+    const NUM_LEVELS: usize = 128;
+    const NUM_PAGES: usize = 2;
+    const MAX_DEF_LEVEL: i16 = 5;
+    const MAX_REP_LEVEL: i16 = 5;
+
+    // Macro to generate test cases
+    macro_rules! test {
+        // branch for generating i32 cases
+        ($test_func:ident, i32, $func:ident, $def_level:expr, $rep_level:expr,
+     $num_pages:expr, $num_levels:expr, $batch_size:expr, $min:expr, $max:expr) => {
+            test_internal!(
+                $test_func,
+                Int32Type,
+                get_test_int32_type,
+                $func,
+                $def_level,
+                $rep_level,
+                $num_pages,
+                $num_levels,
+                $batch_size,
+                $min,
+                $max
+            );
+        };
+        // branch for generating i64 cases
+        ($test_func:ident, i64, $func:ident, $def_level:expr, $rep_level:expr,
+     $num_pages:expr, $num_levels:expr, $batch_size:expr, $min:expr, $max:expr) => {
+            test_internal!(
+                $test_func,
+                Int64Type,
+                get_test_int64_type,
+                $func,
+                $def_level,
+                $rep_level,
+                $num_pages,
+                $num_levels,
+                $batch_size,
+                $min,
+                $max
+            );
+        };
+    }
+
+    macro_rules! test_internal {
+        ($test_func:ident, $ty:ident, $pty:ident, $func:ident, $def_level:expr,
+     $rep_level:expr, $num_pages:expr, $num_levels:expr, $batch_size:expr,
+     $min:expr, $max:expr) => {
+            #[test]
+            fn $test_func() {
+                let desc = Rc::new(ColumnDescriptor::new(
+                    Rc::new($pty()),
+                    None,
+                    $def_level,
+                    $rep_level,
+                    ColumnPath::new(Vec::new()),
+                ));
+                let mut tester = ColumnReaderTester::<$ty>::new();
+                tester.$func(desc, $num_pages, $num_levels, $batch_size, $min, $max);
+            }
+        };
+    }
+
+    test!(
+        test_read_plain_v1_int32,
+        i32,
+        plain_v1,
+        MAX_DEF_LEVEL,
+        MAX_REP_LEVEL,
+        NUM_PAGES,
+        NUM_LEVELS,
+        16,
+        ::std::i32::MIN,
+        ::std::i32::MAX
+    );
+    test!(
+        test_read_plain_v2_int32,
+        i32,
+        plain_v2,
+        MAX_DEF_LEVEL,
+        MAX_REP_LEVEL,
+        NUM_PAGES,
+        NUM_LEVELS,
+        16,
+        ::std::i32::MIN,
+        ::std::i32::MAX
+    );
+
+    test!(
+        test_read_plain_v1_int32_uneven,
+        i32,
+        plain_v1,
+        MAX_DEF_LEVEL,
+        MAX_REP_LEVEL,
+        NUM_PAGES,
+        NUM_LEVELS,
+        17,
+        ::std::i32::MIN,
+        ::std::i32::MAX
+    );
+    test!(
+        test_read_plain_v2_int32_uneven,
+        i32,
+        plain_v2,
+        MAX_DEF_LEVEL,
+        MAX_REP_LEVEL,
+        NUM_PAGES,
+        NUM_LEVELS,
+        17,
+        ::std::i32::MIN,
+        ::std::i32::MAX
+    );
+
+    test!(
+        test_read_plain_v1_int32_multi_page,
+        i32,
+        plain_v1,
+        MAX_DEF_LEVEL,
+        MAX_REP_LEVEL,
+        NUM_PAGES,
+        NUM_LEVELS,
+        512,
+        ::std::i32::MIN,
+        ::std::i32::MAX
+    );
+    test!(
+        test_read_plain_v2_int32_multi_page,
+        i32,
+        plain_v2,
+        MAX_DEF_LEVEL,
+        MAX_REP_LEVEL,
+        NUM_PAGES,
+        NUM_LEVELS,
+        512,
+        ::std::i32::MIN,
+        ::std::i32::MAX
+    );
+
+    // test cases when column descriptor has MAX_DEF_LEVEL = 0 and MAX_REP_LEVEL = 0
+    test!(
+        test_read_plain_v1_int32_required_non_repeated,
+        i32,
+        plain_v1,
+        0,
+        0,
+        NUM_PAGES,
+        NUM_LEVELS,
+        16,
+        ::std::i32::MIN,
+        ::std::i32::MAX
+    );
+    test!(
+        test_read_plain_v2_int32_required_non_repeated,
+        i32,
+        plain_v2,
+        0,
+        0,
+        NUM_PAGES,
+        NUM_LEVELS,
+        16,
+        ::std::i32::MIN,
+        ::std::i32::MAX
+    );
+
+    test!(
+        test_read_plain_v1_int64,
+        i64,
+        plain_v1,
+        1,
+        1,
+        NUM_PAGES,
+        NUM_LEVELS,
+        16,
+        ::std::i64::MIN,
+        ::std::i64::MAX
+    );
+    test!(
+        test_read_plain_v2_int64,
+        i64,
+        plain_v2,
+        1,
+        1,
+        NUM_PAGES,
+        NUM_LEVELS,
+        16,
+        ::std::i64::MIN,
+        ::std::i64::MAX
+    );
+
+    test!(
+        test_read_plain_v1_int64_uneven,
+        i64,
+        plain_v1,
+        1,
+        1,
+        NUM_PAGES,
+        NUM_LEVELS,
+        17,
+        ::std::i64::MIN,
+        ::std::i64::MAX
+    );
+    test!(
+        test_read_plain_v2_int64_uneven,
+        i64,
+        plain_v2,
+        1,
+        1,
+        NUM_PAGES,
+        NUM_LEVELS,
+        17,
+        ::std::i64::MIN,
+        ::std::i64::MAX
+    );
+
+    test!(
+        test_read_plain_v1_int64_multi_page,
+        i64,
+        plain_v1,
+        1,
+        1,
+        NUM_PAGES,
+        NUM_LEVELS,
+        512,
+        ::std::i64::MIN,
+        ::std::i64::MAX
+    );
+    test!(
+        test_read_plain_v2_int64_multi_page,
+        i64,
+        plain_v2,
+        1,
+        1,
+        NUM_PAGES,
+        NUM_LEVELS,
+        512,
+        ::std::i64::MIN,
+        ::std::i64::MAX
+    );
+
+    // test cases when column descriptor has MAX_DEF_LEVEL = 0 and MAX_REP_LEVEL = 0
+    test!(
+        test_read_plain_v1_int64_required_non_repeated,
+        i64,
+        plain_v1,
+        0,
+        0,
+        NUM_PAGES,
+        NUM_LEVELS,
+        16,
+        ::std::i64::MIN,
+        ::std::i64::MAX
+    );
+    test!(
+        test_read_plain_v2_int64_required_non_repeated,
+        i64,
+        plain_v2,
+        0,
+        0,
+        NUM_PAGES,
+        NUM_LEVELS,
+        16,
+        ::std::i64::MIN,
+        ::std::i64::MAX
+    );
+
+    test!(
+        test_read_dict_v1_int32_small,
+        i32,
+        dict_v1,
+        MAX_DEF_LEVEL,
+        MAX_REP_LEVEL,
+        2,
+        2,
+        16,
+        0,
+        3
+    );
+    test!(
+        test_read_dict_v2_int32_small,
+        i32,
+        dict_v2,
+        MAX_DEF_LEVEL,
+        MAX_REP_LEVEL,
+        2,
+        2,
+        16,
+        0,
+        3
+    );
+
+    test!(
+        test_read_dict_v1_int32,
+        i32,
+        dict_v1,
+        MAX_DEF_LEVEL,
+        MAX_REP_LEVEL,
+        NUM_PAGES,
+        NUM_LEVELS,
+        16,
+        0,
+        3
+    );
+    test!(
+        test_read_dict_v2_int32,
+        i32,
+        dict_v2,
+        MAX_DEF_LEVEL,
+        MAX_REP_LEVEL,
+        NUM_PAGES,
+        NUM_LEVELS,
+        16,
+        0,
+        3
+    );
+
+    test!(
+        test_read_dict_v1_int32_uneven,
+        i32,
+        dict_v1,
+        MAX_DEF_LEVEL,
+        MAX_REP_LEVEL,
+        NUM_PAGES,
+        NUM_LEVELS,
+        17,
+        0,
+        3
+    );
+    test!(
+        test_read_dict_v2_int32_uneven,
+        i32,
+        dict_v2,
+        MAX_DEF_LEVEL,
+        MAX_REP_LEVEL,
+        NUM_PAGES,
+        NUM_LEVELS,
+        17,
+        0,
+        3
+    );
+
+    test!(
+        test_read_dict_v1_int32_multi_page,
+        i32,
+        dict_v1,
+        MAX_DEF_LEVEL,
+        MAX_REP_LEVEL,
+        NUM_PAGES,
+        NUM_LEVELS,
+        512,
+        0,
+        3
+    );
+    test!(
+        test_read_dict_v2_int32_multi_page,
+        i32,
+        dict_v2,
+        MAX_DEF_LEVEL,
+        MAX_REP_LEVEL,
+        NUM_PAGES,
+        NUM_LEVELS,
+        512,
+        0,
+        3
+    );
+
+    test!(
+        test_read_dict_v1_int64,
+        i64,
+        dict_v1,
+        MAX_DEF_LEVEL,
+        MAX_REP_LEVEL,
+        NUM_PAGES,
+        NUM_LEVELS,
+        16,
+        0,
+        3
+    );
+    test!(
+        test_read_dict_v2_int64,
+        i64,
+        dict_v2,
+        MAX_DEF_LEVEL,
+        MAX_REP_LEVEL,
+        NUM_PAGES,
+        NUM_LEVELS,
+        16,
+        0,
+        3
+    );
+
+    #[test]
+    fn test_read_batch_values_only() {
+        test_read_batch_int32(16, &mut vec![0; 10], None, None); // < batch_size
+        test_read_batch_int32(16, &mut vec![0; 16], None, None); // == batch_size
+        test_read_batch_int32(16, &mut vec![0; 51], None, None); // > batch_size
+    }
+
+    #[test]
+    fn test_read_batch_values_def_levels() {
+        test_read_batch_int32(16, &mut vec![0; 10], Some(&mut vec![0; 10]), None);
+        test_read_batch_int32(16, &mut vec![0; 16], Some(&mut vec![0; 16]), None);
+        test_read_batch_int32(16, &mut vec![0; 51], Some(&mut vec![0; 51]), None);
+    }
+
+    #[test]
+    fn test_read_batch_values_rep_levels() {
+        test_read_batch_int32(16, &mut vec![0; 10], None, Some(&mut vec![0; 10]));
+        test_read_batch_int32(16, &mut vec![0; 16], None, Some(&mut vec![0; 16]));
+        test_read_batch_int32(16, &mut vec![0; 51], None, Some(&mut vec![0; 51]));
+    }
+
+    #[test]
+    fn test_read_batch_different_buf_sizes() {
+        test_read_batch_int32(
+            16,
+            &mut vec![0; 8],
+            Some(&mut vec![0; 9]),
+            Some(&mut vec![0; 7]),
+        );
+        test_read_batch_int32(
+            16,
+            &mut vec![0; 1],
+            Some(&mut vec![0; 9]),
+            Some(&mut vec![0; 3]),
+        );
+    }
+
+    #[test]
+    fn test_read_batch_values_def_rep_levels() {
+        test_read_batch_int32(
+            128,
+            &mut vec![0; 128],
+            Some(&mut vec![0; 128]),
+            Some(&mut vec![0; 128]),
+        );
+    }
+
+    #[test]
+    fn test_read_batch_adjust_after_buffering_page() {
+        // This test covers scenario when buffering new page results in setting number
+        // of decoded values to 0, resulting on reading `batch_size` of values, but it is
+        // larger than we can insert into slice (affects values and levels).
+        //
+        // Note: values are chosen to reproduce the issue.
+        //
+        let primitive_type = get_test_int32_type();
+        let desc = Rc::new(ColumnDescriptor::new(
+            Rc::new(primitive_type),
+            None,
+            1,
+            1,
+            ColumnPath::new(Vec::new()),
+        ));
+
+        let num_pages = 2;
+        let num_levels = 4;
+        let batch_size = 5;
+        let values = &mut vec![0; 7];
+        let def_levels = &mut vec![0; 7];
+        let rep_levels = &mut vec![0; 7];
+
+        let mut tester = ColumnReaderTester::<Int32Type>::new();
+        tester.test_read_batch(
+            desc,
+            Encoding::RLE_DICTIONARY,
+            num_pages,
+            num_levels,
+            batch_size,
+            ::std::i32::MIN,
+            ::std::i32::MAX,
+            values,
+            Some(def_levels),
+            Some(rep_levels),
+            false,
+        );
+    }
+
+    // ----------------------------------------------------------------------
+    // Helper methods to make pages and test
+    //
+    // # Overview
+    //
+    // Most of the test functionality is implemented in `ColumnReaderTester`, which
+    // provides some general data page test methods:
+    // - `test_read_batch_general`
+    // - `test_read_batch`
+    //
+    // There are also some high level wrappers that are part of `ColumnReaderTester`:
+    // - `plain_v1` -> call `test_read_batch_general` with data page v1 and plain encoding
+    // - `plain_v2` -> call `test_read_batch_general` with data page v2 and plain encoding
+    // - `dict_v1` -> call `test_read_batch_general` with data page v1 + dictionary page
+    // - `dict_v2` -> call `test_read_batch_general` with data page v2 + dictionary page
+    //
+    // And even higher level wrappers that simplify testing of almost the same test cases:
+    // - `get_test_int32_type`, provides dummy schema type
+    // - `get_test_int64_type`, provides dummy schema type
+    // - `test_read_batch_int32`, wrapper for `read_batch` tests, since they are basically
+    //   the same, just different def/rep levels and batch size.
+    //
+    // # Page assembly
+    //
+    // Page construction and generation of values, definition and repetition levels
+    // happens in `make_pages` function.
+    // All values are randomly generated based on provided min/max, levels are calculated
+    // based on provided max level for column descriptor (which is basically either int32
+    // or int64 type in tests) and `levels_per_page` variable.
+    //
+    // We use `DataPageBuilder` and its implementation `DataPageBuilderImpl` to actually
+    // turn values, definition and repetition levels into data pages (either v1 or v2).
+    //
+    // Those data pages are then stored as part of `TestPageReader` (we just pass vector
+    // of generated pages directly), which implements `PageReader` interface.
+    //
+    // # Comparison
+    //
+    // This allows us to pass test page reader into column reader, so we can test
+    // functionality of column reader - see `test_read_batch`, where we create column
+    // reader -> typed column reader, buffer values in `read_batch` method and compare
+    // output with generated data.
+
+    // Returns dummy Parquet `Type` for primitive field, because most of our tests use
+    // INT32 physical type.
+    fn get_test_int32_type() -> SchemaType {
+        SchemaType::primitive_type_builder("a", PhysicalType::INT32)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::INT_32)
+            .with_length(-1)
+            .build()
+            .expect("build() should be OK")
+    }
+
+    // Returns dummy Parquet `Type` for INT64 physical type.
+    fn get_test_int64_type() -> SchemaType {
+        SchemaType::primitive_type_builder("a", PhysicalType::INT64)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::INT_64)
+            .with_length(-1)
+            .build()
+            .expect("build() should be OK")
+    }
+
+    // Tests `read_batch()` functionality for INT32.
+    //
+    // This is a high level wrapper on `ColumnReaderTester` that allows us to specify some
+    // boilerplate code for setting up definition/repetition levels and column descriptor.
+    fn test_read_batch_int32(
+        batch_size: usize,
+        values: &mut [i32],
+        def_levels: Option<&mut [i16]>,
+        rep_levels: Option<&mut [i16]>,
+    ) {
+        let primitive_type = get_test_int32_type();
+        // make field is required based on provided slices of levels
+        let max_def_level = if def_levels.is_some() {
+            MAX_DEF_LEVEL
+        } else {
+            0
+        };
+        let max_rep_level = if def_levels.is_some() {
+            MAX_REP_LEVEL
+        } else {
+            0
+        };
+
+        let desc = Rc::new(ColumnDescriptor::new(
+            Rc::new(primitive_type),
+            None,
+            max_def_level,
+            max_rep_level,
+            ColumnPath::new(Vec::new()),
+        ));
+        let mut tester = ColumnReaderTester::<Int32Type>::new();
+        tester.test_read_batch(
+            desc,
+            Encoding::RLE_DICTIONARY,
+            NUM_PAGES,
+            NUM_LEVELS,
+            batch_size,
+            ::std::i32::MIN,
+            ::std::i32::MAX,
+            values,
+            def_levels,
+            rep_levels,
+            false,
+        );
+    }
+
+    struct ColumnReaderTester<T: DataType>
+    where
+        T::T: PartialOrd + SampleRange + Copy,
+    {
+        rep_levels: Vec<i16>,
+        def_levels: Vec<i16>,
+        values: Vec<T::T>,
+    }
+
+    impl<T: DataType> ColumnReaderTester<T>
+    where
+        T::T: PartialOrd + SampleRange + Copy,
+    {
+        pub fn new() -> Self {
+            Self {
+                rep_levels: Vec::new(),
+                def_levels: Vec::new(),
+                values: Vec::new(),
+            }
+        }
+
+        // Method to generate and test data pages v1
+        fn plain_v1(
+            &mut self,
+            desc: ColumnDescPtr,
+            num_pages: usize,
+            num_levels: usize,
+            batch_size: usize,
+            min: T::T,
+            max: T::T,
+        ) {
+            self.test_read_batch_general(
+                desc,
+                Encoding::PLAIN,
+                num_pages,
+                num_levels,
+                batch_size,
+                min,
+                max,
+                false,
+            );
+        }
+
+        // Method to generate and test data pages v2
+        fn plain_v2(
+            &mut self,
+            desc: ColumnDescPtr,
+            num_pages: usize,
+            num_levels: usize,
+            batch_size: usize,
+            min: T::T,
+            max: T::T,
+        ) {
+            self.test_read_batch_general(
+                desc,
+                Encoding::PLAIN,
+                num_pages,
+                num_levels,
+                batch_size,
+                min,
+                max,
+                true,
+            );
+        }
+
+        // Method to generate and test dictionary page + data pages v1
+        fn dict_v1(
+            &mut self,
+            desc: ColumnDescPtr,
+            num_pages: usize,
+            num_levels: usize,
+            batch_size: usize,
+            min: T::T,
+            max: T::T,
+        ) {
+            self.test_read_batch_general(
+                desc,
+                Encoding::RLE_DICTIONARY,
+                num_pages,
+                num_levels,
+                batch_size,
+                min,
+                max,
+                false,
+            );
+        }
+
+        // Method to generate and test dictionary page + data pages v2
+        fn dict_v2(
+            &mut self,
+            desc: ColumnDescPtr,
+            num_pages: usize,
+            num_levels: usize,
+            batch_size: usize,
+            min: T::T,
+            max: T::T,
+        ) {
+            self.test_read_batch_general(
+                desc,
+                Encoding::RLE_DICTIONARY,
+                num_pages,
+                num_levels,
+                batch_size,
+                min,
+                max,
+                true,
+            );
+        }
+
+        // Helper function for the general case of `read_batch()` where `values`,
+        // `def_levels` and `rep_levels` are always provided with enough space.
+        fn test_read_batch_general(
+            &mut self,
+            desc: ColumnDescPtr,
+            encoding: Encoding,
+            num_pages: usize,
+            num_levels: usize,
+            batch_size: usize,
+            min: T::T,
+            max: T::T,
+            use_v2: bool,
+        ) {
+            let mut def_levels = vec![0; num_levels * num_pages];
+            let mut rep_levels = vec![0; num_levels * num_pages];
+            let mut values = vec![T::T::default(); num_levels * num_pages];
+            self.test_read_batch(
+                desc,
+                encoding,
+                num_pages,
+                num_levels,
+                batch_size,
+                min,
+                max,
+                &mut values,
+                Some(&mut def_levels),
+                Some(&mut rep_levels),
+                use_v2,
+            );
+        }
+
+        // Helper function to test `read_batch()` method with custom buffers for values,
+        // definition and repetition levels.
+        fn test_read_batch(
+            &mut self,
+            desc: ColumnDescPtr,
+            encoding: Encoding,
+            num_pages: usize,
+            num_levels: usize,
+            batch_size: usize,
+            min: T::T,
+            max: T::T,
+            values: &mut [T::T],
+            mut def_levels: Option<&mut [i16]>,
+            mut rep_levels: Option<&mut [i16]>,
+            use_v2: bool,
+        ) {
+            let mut pages = VecDeque::new();
+            make_pages::<T>(
+                desc.clone(),
+                encoding,
+                num_pages,
+                num_levels,
+                min,
+                max,
+                &mut self.def_levels,
+                &mut self.rep_levels,
+                &mut self.values,
+                &mut pages,
+                use_v2,
+            );
+            let max_def_level = desc.max_def_level();
+            let page_reader = TestPageReader::new(Vec::from(pages));
+            let column_reader: ColumnReader =
+                get_column_reader(desc, Box::new(page_reader));
+            let mut typed_column_reader = get_typed_column_reader::<T>(column_reader);
+
+            let mut curr_values_read = 0;
+            let mut curr_levels_read = 0;
+            let mut done = false;
+            while !done {
+                let actual_def_levels = match &mut def_levels {
+                    Some(ref mut vec) => Some(&mut vec[curr_levels_read..]),
+                    None => None,
+                };
+                let actual_rep_levels = match rep_levels {
+                    Some(ref mut vec) => Some(&mut vec[curr_levels_read..]),
+                    None => None,
+                };
+
+                let (values_read, levels_read) = typed_column_reader
+                    .read_batch(
+                        batch_size,
+                        actual_def_levels,
+                        actual_rep_levels,
+                        &mut values[curr_values_read..],
+                    )
+                    .expect("read_batch() should be OK");
+
+                if values_read == 0 && levels_read == 0 {
+                    done = true;
+                }
+
+                curr_values_read += values_read;
+                curr_levels_read += levels_read;
+            }
+
+            assert!(
+                values.len() >= curr_values_read,
+                "values.len() >= values_read"
+            );
+            assert_eq!(
+                &values[0..curr_values_read],
+                &self.values[0..curr_values_read],
+                "values content doesn't match"
+            );
+
+            if let Some(ref levels) = def_levels {
+                assert!(
+                    levels.len() >= curr_levels_read,
+                    "def_levels.len() >= levels_read"
+                );
+                assert_eq!(
+                    &levels[0..curr_levels_read],
+                    &self.def_levels[0..curr_levels_read],
+                    "definition levels content doesn't match"
+                );
+            }
+
+            if let Some(ref levels) = rep_levels {
+                assert!(
+                    levels.len() >= curr_levels_read,
+                    "rep_levels.len() >= levels_read"
+                );
+                assert_eq!(
+                    &levels[0..curr_levels_read],
+                    &self.rep_levels[0..curr_levels_read],
+                    "repetition levels content doesn't match"
+                );
+            }
+
+            if def_levels.is_none() && rep_levels.is_none() {
+                assert!(
+                    curr_levels_read == 0,
+                    "expected to read 0 levels, found {}",
+                    curr_levels_read
+                );
+            } else if def_levels.is_some() && max_def_level > 0 {
+                assert!(
+                    curr_levels_read >= curr_values_read,
+                    "expected levels read to be greater than values read"
+                );
+            }
+        }
+    }
+
+    struct TestPageReader {
+        pages: IntoIter<Page>,
+    }
+
+    impl TestPageReader {
+        pub fn new(pages: Vec<Page>) -> Self {
+            Self {
+                pages: pages.into_iter(),
+            }
+        }
+    }
+
+    impl PageReader for TestPageReader {
+        fn get_next_page(&mut self) -> Result<Option<Page>> {
+            Ok(self.pages.next())
+        }
+    }
+
+    // ----------------------------------------------------------------------
+    // Utility functions for generating testing pages
+
+    trait DataPageBuilder {
+        fn add_rep_levels(&mut self, max_level: i16, rep_levels: &[i16]);
+        fn add_def_levels(&mut self, max_level: i16, def_levels: &[i16]);
+        fn add_values<T: DataType>(&mut self, encoding: Encoding, values: &[T::T]);
+        fn add_indices(&mut self, indices: ByteBufferPtr);
+        fn consume(self) -> Page;
+    }
+
+    /// A utility struct for building data pages (v1 or v2). Callers must call:
+    ///   - add_rep_levels()
+    ///   - add_def_levels()
+    ///   - add_values() for normal data page / add_indices() for dictionary data page
+    ///   - consume()
+    /// in order to populate and obtain a data page.
+    struct DataPageBuilderImpl {
+        desc: ColumnDescPtr,
+        encoding: Option<Encoding>,
+        mem_tracker: MemTrackerPtr,
+        num_values: u32,
+        buffer: Vec<u8>,
+        rep_levels_byte_len: u32,
+        def_levels_byte_len: u32,
+        datapage_v2: bool,
+    }
+
+    impl DataPageBuilderImpl {
+        // `num_values` is the number of non-null values to put in the data page.
+        // `datapage_v2` flag is used to indicate if the generated data page should use V2
+        // format or not.
+        fn new(desc: ColumnDescPtr, num_values: u32, datapage_v2: bool) -> Self {
+            DataPageBuilderImpl {
+                desc,
+                encoding: None,
+                mem_tracker: Rc::new(MemTracker::new()),
+                num_values,
+                buffer: vec![],
+                rep_levels_byte_len: 0,
+                def_levels_byte_len: 0,
+                datapage_v2,
+            }
+        }
+
+        // Adds levels to the buffer and return number of encoded bytes
+        fn add_levels(&mut self, max_level: i16, levels: &[i16]) -> u32 {
+            let size = max_buffer_size(Encoding::RLE, max_level, levels.len());
+            let mut level_encoder =
+                LevelEncoder::v1(Encoding::RLE, max_level, vec![0; size]);
+            level_encoder.put(levels).expect("put() should be OK");
+            let encoded_levels = level_encoder.consume().expect("consume() should be OK");
+            // Actual encoded bytes (without length offset)
+            let encoded_bytes = &encoded_levels[mem::size_of::<i32>()..];
+            if self.datapage_v2 {
+                // Level encoder always initializes with offset of i32, where it stores
+                // length of encoded data; for data page v2 we explicitly
+                // store length, therefore we should skip i32 bytes.
+                self.buffer.extend_from_slice(encoded_bytes);
+            } else {
+                self.buffer.extend_from_slice(encoded_levels.as_slice());
+            }
+            encoded_bytes.len() as u32
+        }
+    }
+
+    impl DataPageBuilder for DataPageBuilderImpl {
+        fn add_rep_levels(&mut self, max_levels: i16, rep_levels: &[i16]) {
+            self.num_values = rep_levels.len() as u32;
+            self.rep_levels_byte_len = self.add_levels(max_levels, rep_levels);
+        }
+
+        fn add_def_levels(&mut self, max_levels: i16, def_levels: &[i16]) {
+            assert!(
+                self.num_values == def_levels.len() as u32,
+                "Must call `add_rep_levels() first!`"
+            );
+
+            self.def_levels_byte_len = self.add_levels(max_levels, def_levels);
+        }
+
+        fn add_values<T: DataType>(&mut self, encoding: Encoding, values: &[T::T]) {
+            assert!(
+                self.num_values >= values.len() as u32,
+                "num_values: {}, values.len(): {}",
+                self.num_values,
+                values.len()
+            );
+            self.encoding = Some(encoding);
+            let mut encoder: Box<Encoder<T>> =
+                get_encoder::<T>(self.desc.clone(), encoding, self.mem_tracker.clone())
+                    .expect("get_encoder() should be OK");
+            encoder.put(values).expect("put() should be OK");
+            let encoded_values = encoder
+                .flush_buffer()
+                .expect("consume_buffer() should be OK");
+            self.buffer.extend_from_slice(encoded_values.data());
+        }
+
+        fn add_indices(&mut self, indices: ByteBufferPtr) {
+            self.encoding = Some(Encoding::RLE_DICTIONARY);
+            self.buffer.extend_from_slice(indices.data());
+        }
+
+        fn consume(self) -> Page {
+            if self.datapage_v2 {
+                Page::DataPageV2 {
+                    buf: ByteBufferPtr::new(self.buffer),
+                    num_values: self.num_values,
+                    encoding: self.encoding.unwrap(),
+                    num_nulls: 0, /* set to dummy value - don't need this when reading
+                                   * data page */
+                    num_rows: self.num_values, /* also don't need this when reading
+                                                * data page */
+                    def_levels_byte_len: self.def_levels_byte_len,
+                    rep_levels_byte_len: self.rep_levels_byte_len,
+                    is_compressed: false,
+                    statistics: None, // set to None, we do not need statistics for tests
+                }
+            } else {
+                Page::DataPage {
+                    buf: ByteBufferPtr::new(self.buffer),
+                    num_values: self.num_values,
+                    encoding: self.encoding.unwrap(),
+                    def_level_encoding: Encoding::RLE,
+                    rep_level_encoding: Encoding::RLE,
+                    statistics: None, // set to None, we do not need statistics for tests
+                }
+            }
+        }
+    }
+
+    fn make_pages<T: DataType>(
+        desc: ColumnDescPtr,
+        encoding: Encoding,
+        num_pages: usize,
+        levels_per_page: usize,
+        min: T::T,
+        max: T::T,
+        def_levels: &mut Vec<i16>,
+        rep_levels: &mut Vec<i16>,
+        values: &mut Vec<T::T>,
+        pages: &mut VecDeque<Page>,
+        use_v2: bool,
+    ) where
+        T::T: PartialOrd + SampleRange + Copy,
+    {
+        let mut num_values = 0;
+        let max_def_level = desc.max_def_level();
+        let max_rep_level = desc.max_rep_level();
+
+        let mem_tracker = Rc::new(MemTracker::new());
+        let mut dict_encoder = DictEncoder::<T>::new(desc.clone(), mem_tracker);
+
+        for i in 0..num_pages {
+            let mut num_values_cur_page = 0;
+            let level_range = i * levels_per_page..(i + 1) * levels_per_page;
+
+            if max_def_level > 0 {
+                random_numbers_range(levels_per_page, 0, max_def_level + 1, def_levels);
+                for dl in &def_levels[level_range.clone()] {
+                    if *dl == max_def_level {
+                        num_values_cur_page += 1;
+                    }
+                }
+            } else {
+                num_values_cur_page = levels_per_page;
+            }
+            if max_rep_level > 0 {
+                random_numbers_range(levels_per_page, 0, max_rep_level + 1, rep_levels);
+            }
+            random_numbers_range(num_values_cur_page, min, max, values);
+
+            // Generate the current page
+
+            let mut pb = DataPageBuilderImpl::new(
+                desc.clone(),
+                num_values_cur_page as u32,
+                use_v2,
+            );
+            if max_rep_level > 0 {
+                pb.add_rep_levels(max_rep_level, &rep_levels[level_range.clone()]);
+            }
+            if max_def_level > 0 {
+                pb.add_def_levels(max_def_level, &def_levels[level_range]);
+            }
+
+            let value_range = num_values..num_values + num_values_cur_page;
+            match encoding {
+                Encoding::PLAIN_DICTIONARY | Encoding::RLE_DICTIONARY => {
+                    let _ = dict_encoder.put(&values[value_range.clone()]);
+                    let indices = dict_encoder
+                        .write_indices()
+                        .expect("write_indices() should be OK");
+                    pb.add_indices(indices);
+                }
+                Encoding::PLAIN => {
+                    pb.add_values::<T>(encoding, &values[value_range]);
+                }
+                enc @ _ => panic!("Unexpected encoding {}", enc),
+            }
+
+            let data_page = pb.consume();
+            pages.push_back(data_page);
+            num_values += num_values_cur_page;
+        }
+
+        if encoding == Encoding::PLAIN_DICTIONARY || encoding == Encoding::RLE_DICTIONARY
+        {
+            let dict = dict_encoder
+                .write_dict()
+                .expect("write_dict() should be OK");
+            let dict_page = Page::DictionaryPage {
+                buf: dict,
+                num_values: dict_encoder.num_entries() as u32,
+                encoding: Encoding::RLE_DICTIONARY,
+                is_sorted: false,
+            };
+            pages.push_front(dict_page);
+        }
+    }
+}
diff --git a/rust/parquet/src/column/writer.rs b/rust/parquet/src/column/writer.rs
new file mode 100644
index 0000000000000..b520997e46ffb
--- /dev/null
+++ b/rust/parquet/src/column/writer.rs
@@ -0,0 +1,1647 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Contains column writer API.
+
+use std::{cmp, collections::VecDeque, mem, rc::Rc};
+
+use crate::basic::{Compression, Encoding, PageType, Type};
+use crate::column::page::{CompressedPage, Page, PageWriteSpec, PageWriter};
+use crate::compression::{create_codec, Codec};
+use crate::data_type::*;
+use crate::encodings::{
+    encoding::{get_encoder, DictEncoder, Encoder},
+    levels::{max_buffer_size, LevelEncoder},
+};
+use crate::errors::{ParquetError, Result};
+use crate::file::{
+    metadata::ColumnChunkMetaData,
+    properties::{WriterProperties, WriterPropertiesPtr, WriterVersion},
+};
+use crate::schema::types::ColumnDescPtr;
+use crate::util::memory::{ByteBufferPtr, MemTracker};
+
+/// Column writer for a Parquet type.
+pub enum ColumnWriter {
+    BoolColumnWriter(ColumnWriterImpl<BoolType>),
+    Int32ColumnWriter(ColumnWriterImpl<Int32Type>),
+    Int64ColumnWriter(ColumnWriterImpl<Int64Type>),
+    Int96ColumnWriter(ColumnWriterImpl<Int96Type>),
+    FloatColumnWriter(ColumnWriterImpl<FloatType>),
+    DoubleColumnWriter(ColumnWriterImpl<DoubleType>),
+    ByteArrayColumnWriter(ColumnWriterImpl<ByteArrayType>),
+    FixedLenByteArrayColumnWriter(ColumnWriterImpl<FixedLenByteArrayType>),
+}
+
+/// Gets a specific column writer corresponding to column descriptor `descr`.
+pub fn get_column_writer(
+    descr: ColumnDescPtr,
+    props: WriterPropertiesPtr,
+    page_writer: Box<PageWriter>,
+) -> ColumnWriter {
+    match descr.physical_type() {
+        Type::BOOLEAN => ColumnWriter::BoolColumnWriter(ColumnWriterImpl::new(
+            descr,
+            props,
+            page_writer,
+        )),
+        Type::INT32 => ColumnWriter::Int32ColumnWriter(ColumnWriterImpl::new(
+            descr,
+            props,
+            page_writer,
+        )),
+        Type::INT64 => ColumnWriter::Int64ColumnWriter(ColumnWriterImpl::new(
+            descr,
+            props,
+            page_writer,
+        )),
+        Type::INT96 => ColumnWriter::Int96ColumnWriter(ColumnWriterImpl::new(
+            descr,
+            props,
+            page_writer,
+        )),
+        Type::FLOAT => ColumnWriter::FloatColumnWriter(ColumnWriterImpl::new(
+            descr,
+            props,
+            page_writer,
+        )),
+        Type::DOUBLE => ColumnWriter::DoubleColumnWriter(ColumnWriterImpl::new(
+            descr,
+            props,
+            page_writer,
+        )),
+        Type::BYTE_ARRAY => ColumnWriter::ByteArrayColumnWriter(ColumnWriterImpl::new(
+            descr,
+            props,
+            page_writer,
+        )),
+        Type::FIXED_LEN_BYTE_ARRAY => ColumnWriter::FixedLenByteArrayColumnWriter(
+            ColumnWriterImpl::new(descr, props, page_writer),
+        ),
+    }
+}
+
+/// Gets a typed column writer for the specific type `T`, by "up-casting" `col_writer` of
+/// non-generic type to a generic column writer type `ColumnWriterImpl`.
+///
+/// NOTE: the caller MUST guarantee that the actual enum value for `col_writer` matches
+/// the type `T`. Otherwise, disastrous consequence could happen.
+pub fn get_typed_column_writer<T: DataType>(
+    col_writer: ColumnWriter,
+) -> ColumnWriterImpl<T> {
+    match col_writer {
+        ColumnWriter::BoolColumnWriter(r) => unsafe { mem::transmute(r) },
+        ColumnWriter::Int32ColumnWriter(r) => unsafe { mem::transmute(r) },
+        ColumnWriter::Int64ColumnWriter(r) => unsafe { mem::transmute(r) },
+        ColumnWriter::Int96ColumnWriter(r) => unsafe { mem::transmute(r) },
+        ColumnWriter::FloatColumnWriter(r) => unsafe { mem::transmute(r) },
+        ColumnWriter::DoubleColumnWriter(r) => unsafe { mem::transmute(r) },
+        ColumnWriter::ByteArrayColumnWriter(r) => unsafe { mem::transmute(r) },
+        ColumnWriter::FixedLenByteArrayColumnWriter(r) => unsafe { mem::transmute(r) },
+    }
+}
+
+/// Typed column writer for a primitive column.
+pub struct ColumnWriterImpl<T: DataType> {
+    // Column writer properties
+    descr: ColumnDescPtr,
+    props: WriterPropertiesPtr,
+    page_writer: Box<PageWriter>,
+    has_dictionary: bool,
+    dict_encoder: Option<DictEncoder<T>>,
+    encoder: Box<Encoder<T>>,
+    codec: Compression,
+    compressor: Option<Box<Codec>>,
+    // Metrics per page
+    num_buffered_values: u32,
+    num_buffered_encoded_values: u32,
+    num_buffered_rows: u32,
+    // Metrics per column writer
+    total_bytes_written: u64,
+    total_rows_written: u64,
+    total_uncompressed_size: u64,
+    total_compressed_size: u64,
+    total_num_values: u64,
+    dictionary_page_offset: Option<u64>,
+    data_page_offset: Option<u64>,
+    // Reused buffers
+    def_levels_sink: Vec<i16>,
+    rep_levels_sink: Vec<i16>,
+    data_pages: VecDeque<CompressedPage>,
+}
+
+impl<T: DataType> ColumnWriterImpl<T> {
+    pub fn new(
+        descr: ColumnDescPtr,
+        props: WriterPropertiesPtr,
+        page_writer: Box<PageWriter>,
+    ) -> Self {
+        let codec = props.compression(descr.path());
+        let compressor = create_codec(codec).unwrap();
+
+        // Optionally set dictionary encoder.
+        let dict_encoder = if props.dictionary_enabled(descr.path())
+            && Self::has_dictionary_support(&props)
+        {
+            Some(DictEncoder::new(descr.clone(), Rc::new(MemTracker::new())))
+        } else {
+            None
+        };
+
+        // Whether or not this column writer has a dictionary encoding.
+        let has_dictionary = dict_encoder.is_some();
+
+        // Set either main encoder or fallback encoder.
+        let fallback_encoder = get_encoder(
+            descr.clone(),
+            props
+                .encoding(descr.path())
+                .unwrap_or(Self::fallback_encoding(&props)),
+            Rc::new(MemTracker::new()),
+        )
+        .unwrap();
+
+        Self {
+            descr,
+            props,
+            page_writer,
+            has_dictionary,
+            dict_encoder,
+            encoder: fallback_encoder,
+            codec,
+            compressor,
+            num_buffered_values: 0,
+            num_buffered_encoded_values: 0,
+            num_buffered_rows: 0,
+            total_bytes_written: 0,
+            total_rows_written: 0,
+            total_uncompressed_size: 0,
+            total_compressed_size: 0,
+            total_num_values: 0,
+            dictionary_page_offset: None,
+            data_page_offset: None,
+            def_levels_sink: vec![],
+            rep_levels_sink: vec![],
+            data_pages: VecDeque::new(),
+        }
+    }
+
+    /// Writes batch of values, definition levels and repetition levels.
+    /// Returns number of values processed (written).
+    ///
+    /// If definition and repetition levels are provided, we write fully those levels and
+    /// select how many values to write (this number will be returned), since number of
+    /// actual written values may be smaller than provided values.
+    ///
+    /// If only values are provided, then all values are written and the length of
+    /// of the values buffer is returned.
+    ///
+    /// Definition and/or repetition levels can be omitted, if values are
+    /// non-nullable and/or non-repeated.
+    pub fn write_batch(
+        &mut self,
+        values: &[T::T],
+        def_levels: Option<&[i16]>,
+        rep_levels: Option<&[i16]>,
+    ) -> Result<usize> {
+        // We check for DataPage limits only after we have inserted the values. If a user
+        // writes a large number of values, the DataPage size can be well above the limit.
+        //
+        // The purpose of this chunking is to bound this. Even if a user writes large
+        // number of values, the chunking will ensure that we add data page at a
+        // reasonable pagesize limit.
+
+        // TODO: find out why we don't account for size of levels when we estimate page
+        // size.
+
+        // Find out the minimal length to prevent index out of bound errors.
+        let mut min_len = values.len();
+        if let Some(levels) = def_levels {
+            min_len = cmp::min(min_len, levels.len());
+        }
+        if let Some(levels) = rep_levels {
+            min_len = cmp::min(min_len, levels.len());
+        }
+
+        // Find out number of batches to process.
+        let write_batch_size = self.props.write_batch_size();
+        let num_batches = min_len / write_batch_size;
+
+        let mut values_offset = 0;
+        let mut levels_offset = 0;
+
+        for _ in 0..num_batches {
+            values_offset += self.write_mini_batch(
+                &values[values_offset..values_offset + write_batch_size],
+                def_levels.map(|lv| &lv[levels_offset..levels_offset + write_batch_size]),
+                rep_levels.map(|lv| &lv[levels_offset..levels_offset + write_batch_size]),
+            )?;
+            levels_offset += write_batch_size;
+        }
+
+        values_offset += self.write_mini_batch(
+            &values[values_offset..],
+            def_levels.map(|lv| &lv[levels_offset..]),
+            rep_levels.map(|lv| &lv[levels_offset..]),
+        )?;
+
+        // Return total number of values processed.
+        Ok(values_offset)
+    }
+
+    /// Returns total number of bytes written by this column writer so far.
+    /// This value is also returned when column writer is closed.
+    pub fn get_total_bytes_written(&self) -> u64 {
+        self.total_bytes_written
+    }
+
+    /// Returns total number of rows written by this column writer so far.
+    /// This value is also returned when column writer is closed.
+    pub fn get_total_rows_written(&self) -> u64 {
+        self.total_rows_written
+    }
+
+    /// Finalises writes and closes the column writer.
+    /// Returns total bytes written, total rows written and column chunk metadata.
+    pub fn close(mut self) -> Result<(u64, u64, ColumnChunkMetaData)> {
+        if self.dict_encoder.is_some() {
+            self.write_dictionary_page()?;
+        }
+        self.flush_data_pages()?;
+        let metadata = self.write_column_metadata()?;
+        self.dict_encoder = None;
+        self.page_writer.close()?;
+
+        Ok((self.total_bytes_written, self.total_rows_written, metadata))
+    }
+
+    /// Writes mini batch of values, definition and repetition levels.
+    /// This allows fine-grained processing of values and maintaining a reasonable
+    /// page size.
+    fn write_mini_batch(
+        &mut self,
+        values: &[T::T],
+        def_levels: Option<&[i16]>,
+        rep_levels: Option<&[i16]>,
+    ) -> Result<usize> {
+        let num_values;
+        let mut values_to_write = 0;
+
+        // Check if number of definition levels is the same as number of repetition
+        // levels.
+        if def_levels.is_some() && rep_levels.is_some() {
+            let def = def_levels.unwrap();
+            let rep = rep_levels.unwrap();
+            if def.len() != rep.len() {
+                return Err(general_err!(
+                    "Inconsistent length of definition and repetition levels: {} != {}",
+                    def.len(),
+                    rep.len()
+                ));
+            }
+        }
+
+        // Process definition levels and determine how many values to write.
+        if self.descr.max_def_level() > 0 {
+            if def_levels.is_none() {
+                return Err(general_err!(
+                    "Definition levels are required, because max definition level = {}",
+                    self.descr.max_def_level()
+                ));
+            }
+
+            let levels = def_levels.unwrap();
+            num_values = levels.len();
+            for &level in levels {
+                values_to_write += (level == self.descr.max_def_level()) as usize;
+            }
+
+            self.write_definition_levels(levels);
+        } else {
+            values_to_write = values.len();
+            num_values = values_to_write;
+        }
+
+        // Process repetition levels and determine how many rows we are about to process.
+        if self.descr.max_rep_level() > 0 {
+            // A row could contain more than one value.
+            if rep_levels.is_none() {
+                return Err(general_err!(
+                    "Repetition levels are required, because max repetition level = {}",
+                    self.descr.max_rep_level()
+                ));
+            }
+
+            // Count the occasions where we start a new row
+            let levels = rep_levels.unwrap();
+            for &level in levels {
+                self.num_buffered_rows += (level == 0) as u32
+            }
+
+            self.write_repetition_levels(levels);
+        } else {
+            // Each value is exactly one row.
+            // Equals to the number of values, we count nulls as well.
+            self.num_buffered_rows += num_values as u32;
+        }
+
+        // Check that we have enough values to write.
+        if values.len() < values_to_write {
+            return Err(general_err!(
+                "Expected to write {} values, but have only {}",
+                values_to_write,
+                values.len()
+            ));
+        }
+
+        // TODO: update page statistics
+
+        self.write_values(&values[0..values_to_write])?;
+
+        self.num_buffered_values += num_values as u32;
+        self.num_buffered_encoded_values += values_to_write as u32;
+
+        if self.should_add_data_page() {
+            self.add_data_page()?;
+        }
+
+        if self.should_dict_fallback() {
+            self.dict_fallback()?;
+        }
+
+        Ok(values_to_write)
+    }
+
+    #[inline]
+    fn write_definition_levels(&mut self, def_levels: &[i16]) {
+        self.def_levels_sink.extend_from_slice(def_levels);
+    }
+
+    #[inline]
+    fn write_repetition_levels(&mut self, rep_levels: &[i16]) {
+        self.rep_levels_sink.extend_from_slice(rep_levels);
+    }
+
+    #[inline]
+    fn write_values(&mut self, values: &[T::T]) -> Result<()> {
+        match self.dict_encoder {
+            Some(ref mut encoder) => encoder.put(values),
+            None => self.encoder.put(values),
+        }
+    }
+
+    /// Returns true if we need to fall back to non-dictionary encoding.
+    ///
+    /// We can only fall back if dictionary encoder is set and we have exceeded dictionary
+    /// size.
+    #[inline]
+    fn should_dict_fallback(&self) -> bool {
+        match self.dict_encoder {
+            Some(ref encoder) => {
+                encoder.dict_encoded_size() >= self.props.dictionary_pagesize_limit()
+            }
+            None => false,
+        }
+    }
+
+    /// Returns true if there is enough data for a data page, false otherwise.
+    #[inline]
+    fn should_add_data_page(&self) -> bool {
+        self.encoder.estimated_data_encoded_size() >= self.props.data_pagesize_limit()
+    }
+
+    /// Performs dictionary fallback.
+    /// Prepares and writes dictionary and all data pages into page writer.
+    fn dict_fallback(&mut self) -> Result<()> {
+        // At this point we know that we need to fall back.
+        self.write_dictionary_page()?;
+        self.flush_data_pages()?;
+        self.dict_encoder = None;
+        Ok(())
+    }
+
+    /// Adds data page.
+    /// Data page is either buffered in case of dictionary encoding or written directly.
+    fn add_data_page(&mut self) -> Result<()> {
+        // Extract encoded values
+        let value_bytes = match self.dict_encoder {
+            Some(ref mut encoder) => encoder.write_indices()?,
+            None => self.encoder.flush_buffer()?,
+        };
+
+        // Select encoding based on current encoder and writer version (v1 or v2).
+        let encoding = if self.dict_encoder.is_some() {
+            self.props.dictionary_data_page_encoding()
+        } else {
+            self.encoder.encoding()
+        };
+
+        let max_def_level = self.descr.max_def_level();
+        let max_rep_level = self.descr.max_rep_level();
+
+        let compressed_page = match self.props.writer_version() {
+            WriterVersion::PARQUET_1_0 => {
+                let mut buffer = vec![];
+
+                if max_rep_level > 0 {
+                    buffer.extend_from_slice(
+                        &self.encode_levels_v1(
+                            Encoding::RLE,
+                            &self.rep_levels_sink[..],
+                            max_rep_level,
+                        )?[..],
+                    );
+                }
+
+                if max_def_level > 0 {
+                    buffer.extend_from_slice(
+                        &self.encode_levels_v1(
+                            Encoding::RLE,
+                            &self.def_levels_sink[..],
+                            max_def_level,
+                        )?[..],
+                    );
+                }
+
+                buffer.extend_from_slice(value_bytes.data());
+                let uncompressed_size = buffer.len();
+
+                if let Some(ref mut cmpr) = self.compressor {
+                    let mut compressed_buf = Vec::with_capacity(value_bytes.data().len());
+                    cmpr.compress(&buffer[..], &mut compressed_buf)?;
+                    buffer = compressed_buf;
+                }
+
+                let data_page = Page::DataPage {
+                    buf: ByteBufferPtr::new(buffer),
+                    num_values: self.num_buffered_values,
+                    encoding,
+                    def_level_encoding: Encoding::RLE,
+                    rep_level_encoding: Encoding::RLE,
+                    // TODO: process statistics
+                    statistics: None,
+                };
+
+                CompressedPage::new(data_page, uncompressed_size)
+            }
+            WriterVersion::PARQUET_2_0 => {
+                let mut rep_levels_byte_len = 0;
+                let mut def_levels_byte_len = 0;
+                let mut buffer = vec![];
+
+                if max_rep_level > 0 {
+                    let levels =
+                        self.encode_levels_v2(&self.rep_levels_sink[..], max_rep_level)?;
+                    rep_levels_byte_len = levels.len();
+                    buffer.extend_from_slice(&levels[..]);
+                }
+
+                if max_def_level > 0 {
+                    let levels =
+                        self.encode_levels_v2(&self.def_levels_sink[..], max_def_level)?;
+                    def_levels_byte_len = levels.len();
+                    buffer.extend_from_slice(&levels[..]);
+                }
+
+                let uncompressed_size =
+                    rep_levels_byte_len + def_levels_byte_len + value_bytes.len();
+
+                // Data Page v2 compresses values only.
+                match self.compressor {
+                    Some(ref mut cmpr) => {
+                        let mut compressed_buf =
+                            Vec::with_capacity(value_bytes.data().len());
+                        cmpr.compress(value_bytes.data(), &mut compressed_buf)?;
+                        buffer.extend_from_slice(&compressed_buf[..]);
+                    }
+                    None => {
+                        buffer.extend_from_slice(value_bytes.data());
+                    }
+                }
+
+                let data_page = Page::DataPageV2 {
+                    buf: ByteBufferPtr::new(buffer),
+                    num_values: self.num_buffered_values,
+                    encoding,
+                    num_nulls: self.num_buffered_values
+                        - self.num_buffered_encoded_values,
+                    num_rows: self.num_buffered_rows,
+                    def_levels_byte_len: def_levels_byte_len as u32,
+                    rep_levels_byte_len: rep_levels_byte_len as u32,
+                    is_compressed: self.compressor.is_some(),
+                    // TODO: process statistics
+                    statistics: None,
+                };
+
+                CompressedPage::new(data_page, uncompressed_size)
+            }
+        };
+
+        // Check if we need to buffer data page or flush it to the sink directly.
+        if self.dict_encoder.is_some() {
+            self.data_pages.push_back(compressed_page);
+        } else {
+            self.write_data_page(compressed_page)?;
+        }
+
+        // Update total number of rows.
+        self.total_rows_written += self.num_buffered_rows as u64;
+
+        // Reset state.
+        self.rep_levels_sink.clear();
+        self.def_levels_sink.clear();
+        self.num_buffered_values = 0;
+        self.num_buffered_encoded_values = 0;
+        self.num_buffered_rows = 0;
+
+        Ok(())
+    }
+
+    /// Finalises any outstanding data pages and flushes buffered data pages from
+    /// dictionary encoding into underlying sink.
+    #[inline]
+    fn flush_data_pages(&mut self) -> Result<()> {
+        // Write all outstanding data to a new page.
+        if self.num_buffered_values > 0 {
+            self.add_data_page()?;
+        }
+
+        while let Some(page) = self.data_pages.pop_front() {
+            self.write_data_page(page)?;
+        }
+
+        Ok(())
+    }
+
+    /// Assembles and writes column chunk metadata.
+    fn write_column_metadata(&mut self) -> Result<ColumnChunkMetaData> {
+        let total_compressed_size = self.total_compressed_size as i64;
+        let total_uncompressed_size = self.total_uncompressed_size as i64;
+        let num_values = self.total_num_values as i64;
+        let dict_page_offset = self.dictionary_page_offset.map(|v| v as i64);
+        // If data page offset is not set, then no pages have been written
+        let data_page_offset = self.data_page_offset.unwrap_or(0) as i64;
+
+        let file_offset;
+        let mut encodings = Vec::new();
+
+        if self.has_dictionary {
+            assert!(dict_page_offset.is_some(), "Dictionary offset is not set");
+            file_offset = dict_page_offset.unwrap() + total_compressed_size;
+            // NOTE: This should be in sync with writing dictionary pages.
+            encodings.push(self.props.dictionary_page_encoding());
+            encodings.push(self.props.dictionary_data_page_encoding());
+            // Fallback to alternative encoding, add it to the list.
+            if self.dict_encoder.is_none() {
+                encodings.push(self.encoder.encoding());
+            }
+        } else {
+            file_offset = data_page_offset + total_compressed_size;
+            encodings.push(self.encoder.encoding());
+        }
+        // We use only RLE level encoding for data page v1 and data page v2.
+        encodings.push(Encoding::RLE);
+
+        let metadata = ColumnChunkMetaData::builder(self.descr.clone())
+            .set_compression(self.codec)
+            .set_encodings(encodings)
+            .set_file_offset(file_offset)
+            .set_total_compressed_size(total_compressed_size)
+            .set_total_uncompressed_size(total_uncompressed_size)
+            .set_num_values(num_values)
+            .set_data_page_offset(data_page_offset)
+            .set_dictionary_page_offset(dict_page_offset)
+            .build()?;
+
+        self.page_writer.write_metadata(&metadata)?;
+
+        Ok(metadata)
+    }
+
+    /// Encodes definition or repetition levels for Data Page v1.
+    #[inline]
+    fn encode_levels_v1(
+        &self,
+        encoding: Encoding,
+        levels: &[i16],
+        max_level: i16,
+    ) -> Result<Vec<u8>> {
+        let size = max_buffer_size(encoding, max_level, levels.len());
+        let mut encoder = LevelEncoder::v1(encoding, max_level, vec![0; size]);
+        encoder.put(&levels)?;
+        encoder.consume()
+    }
+
+    /// Encodes definition or repetition levels for Data Page v2.
+    /// Encoding is always RLE.
+    #[inline]
+    fn encode_levels_v2(&self, levels: &[i16], max_level: i16) -> Result<Vec<u8>> {
+        let size = max_buffer_size(Encoding::RLE, max_level, levels.len());
+        let mut encoder = LevelEncoder::v2(max_level, vec![0; size]);
+        encoder.put(&levels)?;
+        encoder.consume()
+    }
+
+    /// Writes compressed data page into underlying sink and updates global metrics.
+    #[inline]
+    fn write_data_page(&mut self, page: CompressedPage) -> Result<()> {
+        let page_spec = self.page_writer.write_page(page)?;
+        self.update_metrics_for_page(page_spec);
+        Ok(())
+    }
+
+    /// Writes dictionary page into underlying sink.
+    #[inline]
+    fn write_dictionary_page(&mut self) -> Result<()> {
+        if self.dict_encoder.is_none() {
+            return Err(general_err!("Dictionary encoder is not set"));
+        }
+
+        let compressed_page = {
+            let encoder = self.dict_encoder.as_ref().unwrap();
+            let is_sorted = encoder.is_sorted();
+            let num_values = encoder.num_entries();
+            let mut values_buf = encoder.write_dict()?;
+            let uncompressed_size = values_buf.len();
+
+            if let Some(ref mut cmpr) = self.compressor {
+                let mut output_buf = Vec::with_capacity(uncompressed_size);
+                cmpr.compress(values_buf.data(), &mut output_buf)?;
+                values_buf = ByteBufferPtr::new(output_buf);
+            }
+
+            let dict_page = Page::DictionaryPage {
+                buf: values_buf,
+                num_values: num_values as u32,
+                encoding: self.props.dictionary_page_encoding(),
+                is_sorted,
+            };
+            CompressedPage::new(dict_page, uncompressed_size)
+        };
+
+        let page_spec = self.page_writer.write_page(compressed_page)?;
+        self.update_metrics_for_page(page_spec);
+        Ok(())
+    }
+
+    /// Updates column writer metrics with each page metadata.
+    #[inline]
+    fn update_metrics_for_page(&mut self, page_spec: PageWriteSpec) {
+        self.total_uncompressed_size += page_spec.uncompressed_size as u64;
+        self.total_compressed_size += page_spec.compressed_size as u64;
+        self.total_num_values += page_spec.num_values as u64;
+        self.total_bytes_written += page_spec.bytes_written;
+
+        match page_spec.page_type {
+            PageType::DATA_PAGE | PageType::DATA_PAGE_V2 => {
+                if self.data_page_offset.is_none() {
+                    self.data_page_offset = Some(page_spec.offset);
+                }
+            }
+            PageType::DICTIONARY_PAGE => {
+                assert!(
+                    self.dictionary_page_offset.is_none(),
+                    "Dictionary offset is already set"
+                );
+                self.dictionary_page_offset = Some(page_spec.offset);
+            }
+            _ => {}
+        }
+    }
+
+    /// Returns reference to the underlying page writer.
+    /// This method is intended to use in tests only.
+    fn get_page_writer_ref(&self) -> &Box<PageWriter> {
+        &self.page_writer
+    }
+}
+
+// ----------------------------------------------------------------------
+// Encoding support for column writer.
+// This mirrors parquet-mr default encodings for writes. See:
+// https://github.com/apache/parquet-mr/blob/master/parquet-column/src/main/java/org/apache/parquet/column/values/factory/DefaultV1ValuesWriterFactory.java
+// https://github.com/apache/parquet-mr/blob/master/parquet-column/src/main/java/org/apache/parquet/column/values/factory/DefaultV2ValuesWriterFactory.java
+
+/// Trait to define default encoding for types, including whether or not the type
+/// supports dictionary encoding.
+trait EncodingWriteSupport {
+    /// Returns encoding for a column when no other encoding is provided in writer
+    /// properties.
+    fn fallback_encoding(props: &WriterProperties) -> Encoding;
+
+    /// Returns true if dictionary is supported for column writer, false otherwise.
+    fn has_dictionary_support(props: &WriterProperties) -> bool;
+}
+
+// Basic implementation, always falls back to PLAIN and supports dictionary.
+impl<T: DataType> EncodingWriteSupport for ColumnWriterImpl<T> {
+    default fn fallback_encoding(_props: &WriterProperties) -> Encoding {
+        Encoding::PLAIN
+    }
+
+    default fn has_dictionary_support(_props: &WriterProperties) -> bool {
+        true
+    }
+}
+
+impl EncodingWriteSupport for ColumnWriterImpl<BoolType> {
+    fn fallback_encoding(props: &WriterProperties) -> Encoding {
+        match props.writer_version() {
+            WriterVersion::PARQUET_1_0 => Encoding::PLAIN,
+            WriterVersion::PARQUET_2_0 => Encoding::RLE,
+        }
+    }
+
+    // Boolean column does not support dictionary encoding and should fall back to
+    // whatever fallback encoding is defined.
+    fn has_dictionary_support(_props: &WriterProperties) -> bool {
+        false
+    }
+}
+
+impl EncodingWriteSupport for ColumnWriterImpl<Int32Type> {
+    fn fallback_encoding(props: &WriterProperties) -> Encoding {
+        match props.writer_version() {
+            WriterVersion::PARQUET_1_0 => Encoding::PLAIN,
+            WriterVersion::PARQUET_2_0 => Encoding::DELTA_BINARY_PACKED,
+        }
+    }
+}
+
+impl EncodingWriteSupport for ColumnWriterImpl<Int64Type> {
+    fn fallback_encoding(props: &WriterProperties) -> Encoding {
+        match props.writer_version() {
+            WriterVersion::PARQUET_1_0 => Encoding::PLAIN,
+            WriterVersion::PARQUET_2_0 => Encoding::DELTA_BINARY_PACKED,
+        }
+    }
+}
+
+impl EncodingWriteSupport for ColumnWriterImpl<ByteArrayType> {
+    fn fallback_encoding(props: &WriterProperties) -> Encoding {
+        match props.writer_version() {
+            WriterVersion::PARQUET_1_0 => Encoding::PLAIN,
+            WriterVersion::PARQUET_2_0 => Encoding::DELTA_BYTE_ARRAY,
+        }
+    }
+}
+
+impl EncodingWriteSupport for ColumnWriterImpl<FixedLenByteArrayType> {
+    fn fallback_encoding(props: &WriterProperties) -> Encoding {
+        match props.writer_version() {
+            WriterVersion::PARQUET_1_0 => Encoding::PLAIN,
+            WriterVersion::PARQUET_2_0 => Encoding::DELTA_BYTE_ARRAY,
+        }
+    }
+
+    fn has_dictionary_support(props: &WriterProperties) -> bool {
+        match props.writer_version() {
+            // Dictionary encoding was not enabled in PARQUET 1.0
+            WriterVersion::PARQUET_1_0 => false,
+            WriterVersion::PARQUET_2_0 => true,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use std::error::Error;
+
+    use rand::distributions::range::SampleRange;
+
+    use crate::column::{
+        page::PageReader,
+        reader::{get_column_reader, get_typed_column_reader, ColumnReaderImpl},
+    };
+    use crate::file::{
+        properties::WriterProperties, reader::SerializedPageReader,
+        writer::SerializedPageWriter,
+    };
+    use crate::schema::types::{ColumnDescriptor, ColumnPath, Type as SchemaType};
+    use crate::util::{
+        io::{FileSink, FileSource},
+        test_common::{get_temp_file, random_numbers_range},
+    };
+
+    #[test]
+    fn test_column_writer_inconsistent_def_rep_length() {
+        let page_writer = get_test_page_writer();
+        let props = Rc::new(WriterProperties::builder().build());
+        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 1, 1, props);
+        let res = writer.write_batch(&[1, 2, 3, 4], Some(&[1, 1, 1]), Some(&[0, 0]));
+        assert!(res.is_err());
+        if let Err(err) = res {
+            assert_eq!(
+                err.description(),
+                "Inconsistent length of definition and repetition levels: 3 != 2"
+            );
+        }
+    }
+
+    #[test]
+    fn test_column_writer_invalid_def_levels() {
+        let page_writer = get_test_page_writer();
+        let props = Rc::new(WriterProperties::builder().build());
+        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 1, 0, props);
+        let res = writer.write_batch(&[1, 2, 3, 4], None, None);
+        assert!(res.is_err());
+        if let Err(err) = res {
+            assert_eq!(
+                err.description(),
+                "Definition levels are required, because max definition level = 1"
+            );
+        }
+    }
+
+    #[test]
+    fn test_column_writer_invalid_rep_levels() {
+        let page_writer = get_test_page_writer();
+        let props = Rc::new(WriterProperties::builder().build());
+        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 1, props);
+        let res = writer.write_batch(&[1, 2, 3, 4], None, None);
+        assert!(res.is_err());
+        if let Err(err) = res {
+            assert_eq!(
+                err.description(),
+                "Repetition levels are required, because max repetition level = 1"
+            );
+        }
+    }
+
+    #[test]
+    fn test_column_writer_not_enough_values_to_write() {
+        let page_writer = get_test_page_writer();
+        let props = Rc::new(WriterProperties::builder().build());
+        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 1, 0, props);
+        let res = writer.write_batch(&[1, 2], Some(&[1, 1, 1, 1]), None);
+        assert!(res.is_err());
+        if let Err(err) = res {
+            assert_eq!(
+                err.description(),
+                "Expected to write 4 values, but have only 2"
+            );
+        }
+    }
+
+    #[test]
+    #[should_panic(expected = "Dictionary offset is already set")]
+    fn test_column_writer_write_only_one_dictionary_page() {
+        let page_writer = get_test_page_writer();
+        let props = Rc::new(WriterProperties::builder().build());
+        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);
+        writer.write_batch(&[1, 2, 3, 4], None, None).unwrap();
+        // First page should be correctly written.
+        let res = writer.write_dictionary_page();
+        assert!(res.is_ok());
+        writer.write_dictionary_page().unwrap();
+    }
+
+    #[test]
+    fn test_column_writer_error_when_writing_disabled_dictionary() {
+        let page_writer = get_test_page_writer();
+        let props = Rc::new(
+            WriterProperties::builder()
+                .set_dictionary_enabled(false)
+                .build(),
+        );
+        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);
+        writer.write_batch(&[1, 2, 3, 4], None, None).unwrap();
+        let res = writer.write_dictionary_page();
+        assert!(res.is_err());
+        if let Err(err) = res {
+            assert_eq!(err.description(), "Dictionary encoder is not set");
+        }
+    }
+
+    #[test]
+    fn test_column_writer_boolean_type_does_not_support_dictionary() {
+        let page_writer = get_test_page_writer();
+        let props = Rc::new(
+            WriterProperties::builder()
+                .set_dictionary_enabled(true)
+                .build(),
+        );
+        let mut writer = get_test_column_writer::<BoolType>(page_writer, 0, 0, props);
+        writer
+            .write_batch(&[true, false, true, false], None, None)
+            .unwrap();
+
+        let (bytes_written, rows_written, metadata) = writer.close().unwrap();
+        // PlainEncoder uses bit writer to write boolean values, which all fit into 1
+        // byte.
+        assert_eq!(bytes_written, 1);
+        assert_eq!(rows_written, 4);
+        assert_eq!(metadata.encodings(), &vec![Encoding::PLAIN, Encoding::RLE]);
+        assert_eq!(metadata.num_values(), 4); // just values
+        assert_eq!(metadata.dictionary_page_offset(), None);
+    }
+
+    #[test]
+    fn test_column_writer_default_encoding_support_bool() {
+        check_encoding_write_support::<BoolType>(
+            WriterVersion::PARQUET_1_0,
+            true,
+            &[true, false],
+            None,
+            &[Encoding::PLAIN, Encoding::RLE],
+        );
+        check_encoding_write_support::<BoolType>(
+            WriterVersion::PARQUET_1_0,
+            false,
+            &[true, false],
+            None,
+            &[Encoding::PLAIN, Encoding::RLE],
+        );
+        check_encoding_write_support::<BoolType>(
+            WriterVersion::PARQUET_2_0,
+            true,
+            &[true, false],
+            None,
+            &[Encoding::RLE, Encoding::RLE],
+        );
+        check_encoding_write_support::<BoolType>(
+            WriterVersion::PARQUET_2_0,
+            false,
+            &[true, false],
+            None,
+            &[Encoding::RLE, Encoding::RLE],
+        );
+    }
+
+    #[test]
+    fn test_column_writer_default_encoding_support_int32() {
+        check_encoding_write_support::<Int32Type>(
+            WriterVersion::PARQUET_1_0,
+            true,
+            &[1, 2],
+            Some(0),
+            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
+        );
+        check_encoding_write_support::<Int32Type>(
+            WriterVersion::PARQUET_1_0,
+            false,
+            &[1, 2],
+            None,
+            &[Encoding::PLAIN, Encoding::RLE],
+        );
+        check_encoding_write_support::<Int32Type>(
+            WriterVersion::PARQUET_2_0,
+            true,
+            &[1, 2],
+            Some(0),
+            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
+        );
+        check_encoding_write_support::<Int32Type>(
+            WriterVersion::PARQUET_2_0,
+            false,
+            &[1, 2],
+            None,
+            &[Encoding::DELTA_BINARY_PACKED, Encoding::RLE],
+        );
+    }
+
+    #[test]
+    fn test_column_writer_default_encoding_support_int64() {
+        check_encoding_write_support::<Int64Type>(
+            WriterVersion::PARQUET_1_0,
+            true,
+            &[1, 2],
+            Some(0),
+            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
+        );
+        check_encoding_write_support::<Int64Type>(
+            WriterVersion::PARQUET_1_0,
+            false,
+            &[1, 2],
+            None,
+            &[Encoding::PLAIN, Encoding::RLE],
+        );
+        check_encoding_write_support::<Int64Type>(
+            WriterVersion::PARQUET_2_0,
+            true,
+            &[1, 2],
+            Some(0),
+            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
+        );
+        check_encoding_write_support::<Int64Type>(
+            WriterVersion::PARQUET_2_0,
+            false,
+            &[1, 2],
+            None,
+            &[Encoding::DELTA_BINARY_PACKED, Encoding::RLE],
+        );
+    }
+
+    #[test]
+    fn test_column_writer_default_encoding_support_int96() {
+        check_encoding_write_support::<Int96Type>(
+            WriterVersion::PARQUET_1_0,
+            true,
+            &[Int96::from(vec![1, 2, 3])],
+            Some(0),
+            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
+        );
+        check_encoding_write_support::<Int96Type>(
+            WriterVersion::PARQUET_1_0,
+            false,
+            &[Int96::from(vec![1, 2, 3])],
+            None,
+            &[Encoding::PLAIN, Encoding::RLE],
+        );
+        check_encoding_write_support::<Int96Type>(
+            WriterVersion::PARQUET_2_0,
+            true,
+            &[Int96::from(vec![1, 2, 3])],
+            Some(0),
+            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
+        );
+        check_encoding_write_support::<Int96Type>(
+            WriterVersion::PARQUET_2_0,
+            false,
+            &[Int96::from(vec![1, 2, 3])],
+            None,
+            &[Encoding::PLAIN, Encoding::RLE],
+        );
+    }
+
+    #[test]
+    fn test_column_writer_default_encoding_support_float() {
+        check_encoding_write_support::<FloatType>(
+            WriterVersion::PARQUET_1_0,
+            true,
+            &[1.0, 2.0],
+            Some(0),
+            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
+        );
+        check_encoding_write_support::<FloatType>(
+            WriterVersion::PARQUET_1_0,
+            false,
+            &[1.0, 2.0],
+            None,
+            &[Encoding::PLAIN, Encoding::RLE],
+        );
+        check_encoding_write_support::<FloatType>(
+            WriterVersion::PARQUET_2_0,
+            true,
+            &[1.0, 2.0],
+            Some(0),
+            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
+        );
+        check_encoding_write_support::<FloatType>(
+            WriterVersion::PARQUET_2_0,
+            false,
+            &[1.0, 2.0],
+            None,
+            &[Encoding::PLAIN, Encoding::RLE],
+        );
+    }
+
+    #[test]
+    fn test_column_writer_default_encoding_support_double() {
+        check_encoding_write_support::<DoubleType>(
+            WriterVersion::PARQUET_1_0,
+            true,
+            &[1.0, 2.0],
+            Some(0),
+            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
+        );
+        check_encoding_write_support::<DoubleType>(
+            WriterVersion::PARQUET_1_0,
+            false,
+            &[1.0, 2.0],
+            None,
+            &[Encoding::PLAIN, Encoding::RLE],
+        );
+        check_encoding_write_support::<DoubleType>(
+            WriterVersion::PARQUET_2_0,
+            true,
+            &[1.0, 2.0],
+            Some(0),
+            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
+        );
+        check_encoding_write_support::<DoubleType>(
+            WriterVersion::PARQUET_2_0,
+            false,
+            &[1.0, 2.0],
+            None,
+            &[Encoding::PLAIN, Encoding::RLE],
+        );
+    }
+
+    #[test]
+    fn test_column_writer_default_encoding_support_byte_array() {
+        check_encoding_write_support::<ByteArrayType>(
+            WriterVersion::PARQUET_1_0,
+            true,
+            &[ByteArray::from(vec![1u8])],
+            Some(0),
+            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
+        );
+        check_encoding_write_support::<ByteArrayType>(
+            WriterVersion::PARQUET_1_0,
+            false,
+            &[ByteArray::from(vec![1u8])],
+            None,
+            &[Encoding::PLAIN, Encoding::RLE],
+        );
+        check_encoding_write_support::<ByteArrayType>(
+            WriterVersion::PARQUET_2_0,
+            true,
+            &[ByteArray::from(vec![1u8])],
+            Some(0),
+            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
+        );
+        check_encoding_write_support::<ByteArrayType>(
+            WriterVersion::PARQUET_2_0,
+            false,
+            &[ByteArray::from(vec![1u8])],
+            None,
+            &[Encoding::DELTA_BYTE_ARRAY, Encoding::RLE],
+        );
+    }
+
+    #[test]
+    fn test_column_writer_default_encoding_support_fixed_len_byte_array() {
+        check_encoding_write_support::<FixedLenByteArrayType>(
+            WriterVersion::PARQUET_1_0,
+            true,
+            &[ByteArray::from(vec![1u8])],
+            None,
+            &[Encoding::PLAIN, Encoding::RLE],
+        );
+        check_encoding_write_support::<FixedLenByteArrayType>(
+            WriterVersion::PARQUET_1_0,
+            false,
+            &[ByteArray::from(vec![1u8])],
+            None,
+            &[Encoding::PLAIN, Encoding::RLE],
+        );
+        check_encoding_write_support::<FixedLenByteArrayType>(
+            WriterVersion::PARQUET_2_0,
+            true,
+            &[ByteArray::from(vec![1u8])],
+            Some(0),
+            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
+        );
+        check_encoding_write_support::<FixedLenByteArrayType>(
+            WriterVersion::PARQUET_2_0,
+            false,
+            &[ByteArray::from(vec![1u8])],
+            None,
+            &[Encoding::DELTA_BYTE_ARRAY, Encoding::RLE],
+        );
+    }
+
+    #[test]
+    fn test_column_writer_check_metadata() {
+        let page_writer = get_test_page_writer();
+        let props = Rc::new(WriterProperties::builder().build());
+        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);
+        writer.write_batch(&[1, 2, 3, 4], None, None).unwrap();
+
+        let (bytes_written, rows_written, metadata) = writer.close().unwrap();
+        assert_eq!(bytes_written, 20);
+        assert_eq!(rows_written, 4);
+        assert_eq!(
+            metadata.encodings(),
+            &vec![Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE]
+        );
+        assert_eq!(metadata.num_values(), 8); // dictionary + value indexes
+        assert_eq!(metadata.compressed_size(), 20);
+        assert_eq!(metadata.uncompressed_size(), 20);
+        assert_eq!(metadata.data_page_offset(), 0);
+        assert_eq!(metadata.dictionary_page_offset(), Some(0));
+    }
+
+    #[test]
+    fn test_column_writer_empty_column_roundtrip() {
+        let props = WriterProperties::builder().build();
+        column_roundtrip::<Int32Type>("test_col_writer_rnd_1", props, &[], None, None);
+    }
+
+    #[test]
+    fn test_column_writer_non_nullable_values_roundtrip() {
+        let props = WriterProperties::builder().build();
+        column_roundtrip_random::<Int32Type>(
+            "test_col_writer_rnd_2",
+            props,
+            1024,
+            ::std::i32::MIN,
+            ::std::i32::MAX,
+            0,
+            0,
+        );
+    }
+
+    #[test]
+    fn test_column_writer_nullable_non_repeated_values_roundtrip() {
+        let props = WriterProperties::builder().build();
+        column_roundtrip_random::<Int32Type>(
+            "test_column_writer_nullable_non_repeated_values_roundtrip",
+            props,
+            1024,
+            ::std::i32::MIN,
+            ::std::i32::MAX,
+            10,
+            0,
+        );
+    }
+
+    #[test]
+    fn test_column_writer_nullable_repeated_values_roundtrip() {
+        let props = WriterProperties::builder().build();
+        column_roundtrip_random::<Int32Type>(
+            "test_col_writer_rnd_3",
+            props,
+            1024,
+            ::std::i32::MIN,
+            ::std::i32::MAX,
+            10,
+            10,
+        );
+    }
+
+    #[test]
+    fn test_column_writer_dictionary_fallback_small_data_page() {
+        let props = WriterProperties::builder()
+            .set_dictionary_pagesize_limit(32)
+            .set_data_pagesize_limit(32)
+            .build();
+        column_roundtrip_random::<Int32Type>(
+            "test_col_writer_rnd_4",
+            props,
+            1024,
+            ::std::i32::MIN,
+            ::std::i32::MAX,
+            10,
+            10,
+        );
+    }
+
+    #[test]
+    fn test_column_writer_small_write_batch_size() {
+        for i in vec![1, 2, 5, 10, 11, 1023] {
+            let props = WriterProperties::builder().set_write_batch_size(i).build();
+
+            column_roundtrip_random::<Int32Type>(
+                "test_col_writer_rnd_5",
+                props,
+                1024,
+                ::std::i32::MIN,
+                ::std::i32::MAX,
+                10,
+                10,
+            );
+        }
+    }
+
+    #[test]
+    fn test_column_writer_dictionary_disabled_v1() {
+        let props = WriterProperties::builder()
+            .set_writer_version(WriterVersion::PARQUET_1_0)
+            .set_dictionary_enabled(false)
+            .build();
+        column_roundtrip_random::<Int32Type>(
+            "test_col_writer_rnd_6",
+            props,
+            1024,
+            ::std::i32::MIN,
+            ::std::i32::MAX,
+            10,
+            10,
+        );
+    }
+
+    #[test]
+    fn test_column_writer_dictionary_disabled_v2() {
+        let props = WriterProperties::builder()
+            .set_writer_version(WriterVersion::PARQUET_2_0)
+            .set_dictionary_enabled(false)
+            .build();
+        column_roundtrip_random::<Int32Type>(
+            "test_col_writer_rnd_7",
+            props,
+            1024,
+            ::std::i32::MIN,
+            ::std::i32::MAX,
+            10,
+            10,
+        );
+    }
+
+    #[test]
+    fn test_column_writer_compression_v1() {
+        let props = WriterProperties::builder()
+            .set_writer_version(WriterVersion::PARQUET_1_0)
+            .set_compression(Compression::SNAPPY)
+            .build();
+        column_roundtrip_random::<Int32Type>(
+            "test_col_writer_rnd_8",
+            props,
+            2048,
+            ::std::i32::MIN,
+            ::std::i32::MAX,
+            10,
+            10,
+        );
+    }
+
+    #[test]
+    fn test_column_writer_compression_v2() {
+        let props = WriterProperties::builder()
+            .set_writer_version(WriterVersion::PARQUET_2_0)
+            .set_compression(Compression::SNAPPY)
+            .build();
+        column_roundtrip_random::<Int32Type>(
+            "test_col_writer_rnd_9",
+            props,
+            2048,
+            ::std::i32::MIN,
+            ::std::i32::MAX,
+            10,
+            10,
+        );
+    }
+
+    /// Performs write-read roundtrip with randomly generated values and levels.
+    /// `max_size` is maximum number of values or levels (if `max_def_level` > 0) to write
+    /// for a column.
+    fn column_roundtrip_random<'a, T: DataType>(
+        file_name: &'a str,
+        props: WriterProperties,
+        max_size: usize,
+        min_value: T::T,
+        max_value: T::T,
+        max_def_level: i16,
+        max_rep_level: i16,
+    ) where
+        T::T: PartialOrd + SampleRange + Copy,
+    {
+        let mut num_values: usize = 0;
+
+        let mut buf: Vec<i16> = Vec::new();
+        let def_levels = if max_def_level > 0 {
+            random_numbers_range(max_size, 0, max_def_level + 1, &mut buf);
+            for &dl in &buf[..] {
+                if dl == max_def_level {
+                    num_values += 1;
+                }
+            }
+            Some(&buf[..])
+        } else {
+            num_values = max_size;
+            None
+        };
+
+        let mut buf: Vec<i16> = Vec::new();
+        let rep_levels = if max_rep_level > 0 {
+            random_numbers_range(max_size, 0, max_rep_level + 1, &mut buf);
+            Some(&buf[..])
+        } else {
+            None
+        };
+
+        let mut values: Vec<T::T> = Vec::new();
+        random_numbers_range(num_values, min_value, max_value, &mut values);
+
+        column_roundtrip::<T>(file_name, props, &values[..], def_levels, rep_levels);
+    }
+
+    /// Performs write-read roundtrip and asserts written values and levels.
+    fn column_roundtrip<'a, T: DataType>(
+        file_name: &'a str,
+        props: WriterProperties,
+        values: &[T::T],
+        def_levels: Option<&[i16]>,
+        rep_levels: Option<&[i16]>,
+    ) {
+        let file = get_temp_file(file_name, &[]);
+        let sink = FileSink::new(&file);
+        let page_writer = Box::new(SerializedPageWriter::new(sink));
+
+        let max_def_level = match def_levels {
+            Some(buf) => *buf.iter().max().unwrap_or(&0i16),
+            None => 0i16,
+        };
+
+        let max_rep_level = match rep_levels {
+            Some(buf) => *buf.iter().max().unwrap_or(&0i16),
+            None => 0i16,
+        };
+
+        let mut max_batch_size = values.len();
+        if let Some(levels) = def_levels {
+            max_batch_size = cmp::max(max_batch_size, levels.len());
+        }
+        if let Some(levels) = rep_levels {
+            max_batch_size = cmp::max(max_batch_size, levels.len());
+        }
+
+        let mut writer = get_test_column_writer::<T>(
+            page_writer,
+            max_def_level,
+            max_rep_level,
+            Rc::new(props),
+        );
+
+        let values_written = writer.write_batch(values, def_levels, rep_levels).unwrap();
+        assert_eq!(values_written, values.len());
+        let (bytes_written, rows_written, column_metadata) = writer.close().unwrap();
+
+        let source = FileSource::new(&file, 0, bytes_written as usize);
+        let page_reader = Box::new(
+            SerializedPageReader::new(
+                source,
+                column_metadata.num_values(),
+                column_metadata.compression(),
+                T::get_physical_type(),
+            )
+            .unwrap(),
+        );
+        let reader =
+            get_test_column_reader::<T>(page_reader, max_def_level, max_rep_level);
+
+        let mut actual_values = vec![T::T::default(); max_batch_size];
+        let mut actual_def_levels = match def_levels {
+            Some(_) => Some(vec![0i16; max_batch_size]),
+            None => None,
+        };
+        let mut actual_rep_levels = match rep_levels {
+            Some(_) => Some(vec![0i16; max_batch_size]),
+            None => None,
+        };
+
+        let (values_read, levels_read) = read_fully(
+            reader,
+            max_batch_size,
+            actual_def_levels.as_mut(),
+            actual_rep_levels.as_mut(),
+            actual_values.as_mut_slice(),
+        );
+
+        // Assert values, definition and repetition levels.
+
+        assert_eq!(&actual_values[..values_read], values);
+        match actual_def_levels {
+            Some(ref vec) => assert_eq!(Some(&vec[..levels_read]), def_levels),
+            None => assert_eq!(None, def_levels),
+        }
+        match actual_rep_levels {
+            Some(ref vec) => assert_eq!(Some(&vec[..levels_read]), rep_levels),
+            None => assert_eq!(None, rep_levels),
+        }
+
+        // Assert written rows.
+
+        if let Some(levels) = actual_rep_levels {
+            let mut actual_rows_written = 0;
+            for l in levels {
+                if l == 0 {
+                    actual_rows_written += 1;
+                }
+            }
+            assert_eq!(actual_rows_written, rows_written);
+        } else if actual_def_levels.is_some() {
+            assert_eq!(levels_read as u64, rows_written);
+        } else {
+            assert_eq!(values_read as u64, rows_written);
+        }
+    }
+
+    /// Performs write of provided values and returns column metadata of those values.
+    /// Used to test encoding support for column writer.
+    fn column_write_and_get_metadata<T: DataType>(
+        props: WriterProperties,
+        values: &[T::T],
+    ) -> ColumnChunkMetaData {
+        let page_writer = get_test_page_writer();
+        let props = Rc::new(props);
+        let mut writer = get_test_column_writer::<T>(page_writer, 0, 0, props);
+        writer.write_batch(values, None, None).unwrap();
+        let (_, _, metadata) = writer.close().unwrap();
+        metadata
+    }
+
+    // Function to use in tests for EncodingWriteSupport. This checks that dictionary
+    // offset and encodings to make sure that column writer uses provided by trait
+    // encodings.
+    fn check_encoding_write_support<T: DataType>(
+        version: WriterVersion,
+        dict_enabled: bool,
+        data: &[T::T],
+        dictionary_page_offset: Option<i64>,
+        encodings: &[Encoding],
+    ) {
+        let props = WriterProperties::builder()
+            .set_writer_version(version)
+            .set_dictionary_enabled(dict_enabled)
+            .build();
+        let meta = column_write_and_get_metadata::<T>(props, data);
+        assert_eq!(meta.dictionary_page_offset(), dictionary_page_offset);
+        assert_eq!(meta.encodings(), &encodings);
+    }
+
+    /// Reads one batch of data, considering that batch is large enough to capture all of
+    /// the values and levels.
+    fn read_fully<T: DataType>(
+        mut reader: ColumnReaderImpl<T>,
+        batch_size: usize,
+        mut def_levels: Option<&mut Vec<i16>>,
+        mut rep_levels: Option<&mut Vec<i16>>,
+        values: &mut [T::T],
+    ) -> (usize, usize) {
+        let actual_def_levels = match &mut def_levels {
+            Some(ref mut vec) => Some(&mut vec[..]),
+            None => None,
+        };
+        let actual_rep_levels = match rep_levels {
+            Some(ref mut vec) => Some(&mut vec[..]),
+            None => None,
+        };
+        reader
+            .read_batch(batch_size, actual_def_levels, actual_rep_levels, values)
+            .unwrap()
+    }
+
+    /// Returns column writer.
+    fn get_test_column_writer<T: DataType>(
+        page_writer: Box<PageWriter>,
+        max_def_level: i16,
+        max_rep_level: i16,
+        props: WriterPropertiesPtr,
+    ) -> ColumnWriterImpl<T> {
+        let descr = Rc::new(get_test_column_descr::<T>(max_def_level, max_rep_level));
+        let column_writer = get_column_writer(descr, props, page_writer);
+        get_typed_column_writer::<T>(column_writer)
+    }
+
+    /// Returns column reader.
+    fn get_test_column_reader<T: DataType>(
+        page_reader: Box<PageReader>,
+        max_def_level: i16,
+        max_rep_level: i16,
+    ) -> ColumnReaderImpl<T> {
+        let descr = Rc::new(get_test_column_descr::<T>(max_def_level, max_rep_level));
+        let column_reader = get_column_reader(descr, page_reader);
+        get_typed_column_reader::<T>(column_reader)
+    }
+
+    /// Returns descriptor for primitive column.
+    fn get_test_column_descr<T: DataType>(
+        max_def_level: i16,
+        max_rep_level: i16,
+    ) -> ColumnDescriptor {
+        let path = ColumnPath::from("col");
+        let tpe = SchemaType::primitive_type_builder("col", T::get_physical_type())
+            // length is set for "encoding support" tests for FIXED_LEN_BYTE_ARRAY type,
+            // it should be no-op for other types
+            .with_length(1)
+            .build()
+            .unwrap();
+        ColumnDescriptor::new(Rc::new(tpe), None, max_def_level, max_rep_level, path)
+    }
+
+    /// Returns page writer that collects pages without serializing them.
+    fn get_test_page_writer() -> Box<PageWriter> {
+        Box::new(TestPageWriter {})
+    }
+
+    struct TestPageWriter {}
+
+    impl PageWriter for TestPageWriter {
+        fn write_page(&mut self, page: CompressedPage) -> Result<PageWriteSpec> {
+            let mut res = PageWriteSpec::new();
+            res.page_type = page.page_type();
+            res.uncompressed_size = page.uncompressed_size();
+            res.compressed_size = page.compressed_size();
+            res.num_values = page.num_values();
+            res.offset = 0;
+            res.bytes_written = page.data().len() as u64;
+            Ok(res)
+        }
+
+        fn write_metadata(&mut self, _metadata: &ColumnChunkMetaData) -> Result<()> {
+            Ok(())
+        }
+
+        fn close(&mut self) -> Result<()> {
+            Ok(())
+        }
+    }
+}
diff --git a/rust/parquet/src/compression.rs b/rust/parquet/src/compression.rs
new file mode 100644
index 0000000000000..9cf2ac263dbd9
--- /dev/null
+++ b/rust/parquet/src/compression.rs
@@ -0,0 +1,342 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Contains codec interface and supported codec implementations.
+//!
+//! See [`Compression`](`::basic::Compression`) enum for all available compression
+//! algorithms.
+//!
+//! # Example
+//!
+//! ```rust
+//! use parquet::{basic::Compression, compression::create_codec};
+//!
+//! let mut codec = match create_codec(Compression::SNAPPY) {
+//!     Ok(Some(codec)) => codec,
+//!     _ => panic!(),
+//! };
+//!
+//! let data = vec![b'p', b'a', b'r', b'q', b'u', b'e', b't'];
+//! let mut compressed = vec![];
+//! codec.compress(&data[..], &mut compressed).unwrap();
+//!
+//! let mut output = vec![];
+//! codec.decompress(&compressed[..], &mut output).unwrap();
+//!
+//! assert_eq!(output, data);
+//! ```
+
+use std::io::{self, Read, Write};
+
+use brotli;
+use flate2::{read, write, Compression};
+use lz4;
+use snap::{decompress_len, max_compress_len, Decoder, Encoder};
+use zstd;
+
+use crate::basic::Compression as CodecType;
+use crate::errors::{ParquetError, Result};
+
+/// Parquet compression codec interface.
+pub trait Codec {
+    /// Compresses data stored in slice `input_buf` and writes the compressed result
+    /// to `output_buf`.
+    /// Note that you'll need to call `clear()` before reusing the same `output_buf`
+    /// across different `compress` calls.
+    fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()>;
+
+    /// Decompresses data stored in slice `input_buf` and writes output to `output_buf`.
+    /// Returns the total number of bytes written.
+    fn decompress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>)
+        -> Result<usize>;
+}
+
+/// Given the compression type `codec`, returns a codec used to compress and decompress
+/// bytes for the compression type.
+/// This returns `None` if the codec type is `UNCOMPRESSED`.
+pub fn create_codec(codec: CodecType) -> Result<Option<Box<Codec>>> {
+    match codec {
+        CodecType::BROTLI => Ok(Some(Box::new(BrotliCodec::new()))),
+        CodecType::GZIP => Ok(Some(Box::new(GZipCodec::new()))),
+        CodecType::SNAPPY => Ok(Some(Box::new(SnappyCodec::new()))),
+        CodecType::LZ4 => Ok(Some(Box::new(LZ4Codec::new()))),
+        CodecType::ZSTD => Ok(Some(Box::new(ZSTDCodec::new()))),
+        CodecType::UNCOMPRESSED => Ok(None),
+        _ => Err(nyi_err!("The codec type {} is not supported yet", codec)),
+    }
+}
+
+/// Codec for Snappy compression format.
+pub struct SnappyCodec {
+    decoder: Decoder,
+    encoder: Encoder,
+}
+
+impl SnappyCodec {
+    /// Creates new Snappy compression codec.
+    fn new() -> Self {
+        Self {
+            decoder: Decoder::new(),
+            encoder: Encoder::new(),
+        }
+    }
+}
+
+impl Codec for SnappyCodec {
+    fn decompress(
+        &mut self,
+        input_buf: &[u8],
+        output_buf: &mut Vec<u8>,
+    ) -> Result<usize> {
+        let len = decompress_len(input_buf)?;
+        output_buf.resize(len, 0);
+        self.decoder
+            .decompress(input_buf, output_buf)
+            .map_err(|e| e.into())
+    }
+
+    fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
+        let required_len = max_compress_len(input_buf.len());
+        if output_buf.len() < required_len {
+            output_buf.resize(required_len, 0);
+        }
+        let n = self.encoder.compress(input_buf, &mut output_buf[..])?;
+        output_buf.truncate(n);
+        Ok(())
+    }
+}
+
+/// Codec for GZIP compression algorithm.
+pub struct GZipCodec {}
+
+impl GZipCodec {
+    /// Creates new GZIP compression codec.
+    fn new() -> Self {
+        Self {}
+    }
+}
+
+impl Codec for GZipCodec {
+    fn decompress(
+        &mut self,
+        input_buf: &[u8],
+        output_buf: &mut Vec<u8>,
+    ) -> Result<usize> {
+        let mut decoder = read::GzDecoder::new(input_buf);
+        decoder.read_to_end(output_buf).map_err(|e| e.into())
+    }
+
+    fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
+        let mut encoder = write::GzEncoder::new(output_buf, Compression::default());
+        encoder.write_all(input_buf)?;
+        encoder.try_finish().map_err(|e| e.into())
+    }
+}
+
+const BROTLI_DEFAULT_BUFFER_SIZE: usize = 4096;
+const BROTLI_DEFAULT_COMPRESSION_QUALITY: u32 = 1; // supported levels 0-9
+const BROTLI_DEFAULT_LG_WINDOW_SIZE: u32 = 22; // recommended between 20-22
+
+/// Codec for Brotli compression algorithm.
+pub struct BrotliCodec {}
+
+impl BrotliCodec {
+    /// Creates new Brotli compression codec.
+    fn new() -> Self {
+        Self {}
+    }
+}
+
+impl Codec for BrotliCodec {
+    fn decompress(
+        &mut self,
+        input_buf: &[u8],
+        output_buf: &mut Vec<u8>,
+    ) -> Result<usize> {
+        brotli::Decompressor::new(input_buf, BROTLI_DEFAULT_BUFFER_SIZE)
+            .read_to_end(output_buf)
+            .map_err(|e| e.into())
+    }
+
+    fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
+        let mut encoder = brotli::CompressorWriter::new(
+            output_buf,
+            BROTLI_DEFAULT_BUFFER_SIZE,
+            BROTLI_DEFAULT_COMPRESSION_QUALITY,
+            BROTLI_DEFAULT_LG_WINDOW_SIZE,
+        );
+        encoder.write_all(&input_buf[..])?;
+        encoder.flush().map_err(|e| e.into())
+    }
+}
+
+const LZ4_BUFFER_SIZE: usize = 4096;
+
+/// Codec for LZ4 compression algorithm.
+pub struct LZ4Codec {}
+
+impl LZ4Codec {
+    /// Creates new LZ4 compression codec.
+    fn new() -> Self {
+        Self {}
+    }
+}
+
+impl Codec for LZ4Codec {
+    fn decompress(
+        &mut self,
+        input_buf: &[u8],
+        output_buf: &mut Vec<u8>,
+    ) -> Result<usize> {
+        let mut decoder = lz4::Decoder::new(input_buf)?;
+        let mut buffer: [u8; LZ4_BUFFER_SIZE] = [0; LZ4_BUFFER_SIZE];
+        let mut total_len = 0;
+        loop {
+            let len = decoder.read(&mut buffer)?;
+            if len == 0 {
+                break;
+            }
+            total_len += len;
+            output_buf.write_all(&buffer[0..len])?;
+        }
+        Ok(total_len)
+    }
+
+    fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
+        let mut encoder = lz4::EncoderBuilder::new().build(output_buf)?;
+        let mut from = 0;
+        loop {
+            let to = ::std::cmp::min(from + LZ4_BUFFER_SIZE, input_buf.len());
+            encoder.write_all(&input_buf[from..to])?;
+            from += LZ4_BUFFER_SIZE;
+            if from >= input_buf.len() {
+                break;
+            }
+        }
+        encoder.finish().1.map_err(|e| e.into())
+    }
+}
+
+/// Codec for Zstandard compression algorithm.
+pub struct ZSTDCodec {}
+
+impl ZSTDCodec {
+    /// Creates new Zstandard compression codec.
+    fn new() -> Self {
+        Self {}
+    }
+}
+
+/// Compression level (1-21) for ZSTD. Choose 1 here for better compression speed.
+const ZSTD_COMPRESSION_LEVEL: i32 = 1;
+
+impl Codec for ZSTDCodec {
+    fn decompress(
+        &mut self,
+        input_buf: &[u8],
+        output_buf: &mut Vec<u8>,
+    ) -> Result<usize> {
+        let mut decoder = zstd::Decoder::new(input_buf)?;
+        match io::copy(&mut decoder, output_buf) {
+            Ok(n) => Ok(n as usize),
+            Err(e) => Err(e.into()),
+        }
+    }
+
+    fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
+        let mut encoder = zstd::Encoder::new(output_buf, ZSTD_COMPRESSION_LEVEL)?;
+        encoder.write_all(&input_buf[..])?;
+        match encoder.finish() {
+            Ok(_) => Ok(()),
+            Err(e) => Err(e.into()),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::util::test_common::*;
+
+    fn test_roundtrip(c: CodecType, data: &Vec<u8>) {
+        let mut c1 = create_codec(c).unwrap().unwrap();
+        let mut c2 = create_codec(c).unwrap().unwrap();
+
+        // Compress with c1
+        let mut compressed = Vec::new();
+        let mut decompressed = Vec::new();
+        c1.compress(data.as_slice(), &mut compressed)
+            .expect("Error when compressing");
+
+        // Decompress with c2
+        let mut decompressed_size = c2
+            .decompress(compressed.as_slice(), &mut decompressed)
+            .expect("Error when decompressing");
+        assert_eq!(data.len(), decompressed_size);
+        decompressed.truncate(decompressed_size);
+        assert_eq!(*data, decompressed);
+
+        compressed.clear();
+
+        // Compress with c2
+        c2.compress(data.as_slice(), &mut compressed)
+            .expect("Error when compressing");
+
+        // Decompress with c1
+        decompressed_size = c1
+            .decompress(compressed.as_slice(), &mut decompressed)
+            .expect("Error when decompressing");
+        assert_eq!(data.len(), decompressed_size);
+        decompressed.truncate(decompressed_size);
+        assert_eq!(*data, decompressed);
+    }
+
+    fn test_codec(c: CodecType) {
+        let sizes = vec![100, 10000, 100000];
+        for size in sizes {
+            let mut data = random_bytes(size);
+            test_roundtrip(c, &mut data);
+        }
+    }
+
+    #[test]
+    fn test_codec_snappy() {
+        test_codec(CodecType::SNAPPY);
+    }
+
+    #[test]
+    fn test_codec_gzip() {
+        test_codec(CodecType::GZIP);
+    }
+
+    #[test]
+    fn test_codec_brotli() {
+        test_codec(CodecType::BROTLI);
+    }
+
+    #[test]
+    fn test_codec_lz4() {
+        test_codec(CodecType::LZ4);
+    }
+
+    #[test]
+    fn test_codec_zstd() {
+        test_codec(CodecType::ZSTD);
+    }
+
+}
diff --git a/rust/parquet/src/data_type.rs b/rust/parquet/src/data_type.rs
new file mode 100644
index 0000000000000..fedd0b765c2df
--- /dev/null
+++ b/rust/parquet/src/data_type.rs
@@ -0,0 +1,465 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Data types that connect Parquet physical types with their Rust-specific
+//! representations.
+
+use std::mem;
+
+use byteorder::{BigEndian, ByteOrder};
+
+use crate::basic::Type;
+use crate::util::memory::{ByteBuffer, ByteBufferPtr};
+
+/// Rust representation for logical type INT96, value is backed by an array of `u32`.
+/// The type only takes 12 bytes, without extra padding.
+#[derive(Clone, Debug)]
+pub struct Int96 {
+    value: Option<[u32; 3]>,
+}
+
+impl Int96 {
+    /// Creates new INT96 type struct with no data set.
+    pub fn new() -> Self {
+        Self { value: None }
+    }
+
+    /// Returns underlying data as slice of [`u32`].
+    pub fn data(&self) -> &[u32] {
+        assert!(self.value.is_some());
+        self.value.as_ref().unwrap()
+    }
+
+    /// Sets data for this INT96 type.
+    pub fn set_data(&mut self, elem0: u32, elem1: u32, elem2: u32) {
+        self.value = Some([elem0, elem1, elem2]);
+    }
+}
+
+impl Default for Int96 {
+    fn default() -> Self {
+        Self { value: None }
+    }
+}
+
+impl PartialEq for Int96 {
+    fn eq(&self, other: &Int96) -> bool {
+        self.data() == other.data()
+    }
+}
+
+impl From<Vec<u32>> for Int96 {
+    fn from(buf: Vec<u32>) -> Self {
+        assert_eq!(buf.len(), 3);
+        let mut result = Self::new();
+        result.set_data(buf[0], buf[1], buf[2]);
+        result
+    }
+}
+
+/// Rust representation for BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY Parquet physical types.
+/// Value is backed by a byte buffer.
+#[derive(Clone, Debug)]
+pub struct ByteArray {
+    data: Option<ByteBufferPtr>,
+}
+
+impl ByteArray {
+    /// Creates new byte array with no data set.
+    pub fn new() -> Self {
+        ByteArray { data: None }
+    }
+
+    /// Gets length of the underlying byte buffer.
+    pub fn len(&self) -> usize {
+        assert!(self.data.is_some());
+        self.data.as_ref().unwrap().len()
+    }
+
+    /// Returns slice of data.
+    pub fn data(&self) -> &[u8] {
+        assert!(self.data.is_some());
+        self.data.as_ref().unwrap().as_ref()
+    }
+
+    /// Set data from another byte buffer.
+    pub fn set_data(&mut self, data: ByteBufferPtr) {
+        self.data = Some(data);
+    }
+
+    /// Returns `ByteArray` instance with slice of values for a data.
+    pub fn slice(&self, start: usize, len: usize) -> Self {
+        assert!(self.data.is_some());
+        Self::from(self.data.as_ref().unwrap().range(start, len))
+    }
+}
+
+impl From<Vec<u8>> for ByteArray {
+    fn from(buf: Vec<u8>) -> ByteArray {
+        Self {
+            data: Some(ByteBufferPtr::new(buf)),
+        }
+    }
+}
+
+impl<'a> From<&'a str> for ByteArray {
+    fn from(s: &'a str) -> ByteArray {
+        let mut v = Vec::new();
+        v.extend_from_slice(s.as_bytes());
+        Self {
+            data: Some(ByteBufferPtr::new(v)),
+        }
+    }
+}
+
+impl From<ByteBufferPtr> for ByteArray {
+    fn from(ptr: ByteBufferPtr) -> ByteArray {
+        Self { data: Some(ptr) }
+    }
+}
+
+impl From<ByteBuffer> for ByteArray {
+    fn from(mut buf: ByteBuffer) -> ByteArray {
+        Self {
+            data: Some(buf.consume()),
+        }
+    }
+}
+
+impl Default for ByteArray {
+    fn default() -> Self {
+        ByteArray { data: None }
+    }
+}
+
+impl PartialEq for ByteArray {
+    fn eq(&self, other: &ByteArray) -> bool {
+        self.data() == other.data()
+    }
+}
+
+/// Rust representation for Decimal values.
+///
+/// This is not a representation of Parquet physical type, but rather a wrapper for
+/// DECIMAL logical type, and serves as container for raw parts of decimal values:
+/// unscaled value in bytes, precision and scale.
+#[derive(Clone, Debug)]
+pub enum Decimal {
+    /// Decimal backed by `i32`.
+    Int32 {
+        value: [u8; 4],
+        precision: i32,
+        scale: i32,
+    },
+    /// Decimal backed by `i64`.
+    Int64 {
+        value: [u8; 8],
+        precision: i32,
+        scale: i32,
+    },
+    /// Decimal backed by byte array.
+    Bytes {
+        value: ByteArray,
+        precision: i32,
+        scale: i32,
+    },
+}
+
+impl Decimal {
+    /// Creates new decimal value from `i32`.
+    pub fn from_i32(value: i32, precision: i32, scale: i32) -> Self {
+        let mut bytes = [0; 4];
+        BigEndian::write_i32(&mut bytes, value);
+        Decimal::Int32 {
+            value: bytes,
+            precision,
+            scale,
+        }
+    }
+
+    /// Creates new decimal value from `i64`.
+    pub fn from_i64(value: i64, precision: i32, scale: i32) -> Self {
+        let mut bytes = [0; 8];
+        BigEndian::write_i64(&mut bytes, value);
+        Decimal::Int64 {
+            value: bytes,
+            precision,
+            scale,
+        }
+    }
+
+    /// Creates new decimal value from `ByteArray`.
+    pub fn from_bytes(value: ByteArray, precision: i32, scale: i32) -> Self {
+        Decimal::Bytes {
+            value,
+            precision,
+            scale,
+        }
+    }
+
+    /// Returns bytes of unscaled value.
+    pub fn data(&self) -> &[u8] {
+        match *self {
+            Decimal::Int32 { ref value, .. } => value,
+            Decimal::Int64 { ref value, .. } => value,
+            Decimal::Bytes { ref value, .. } => value.data(),
+        }
+    }
+
+    /// Returns decimal precision.
+    pub fn precision(&self) -> i32 {
+        match *self {
+            Decimal::Int32 { precision, .. } => precision,
+            Decimal::Int64 { precision, .. } => precision,
+            Decimal::Bytes { precision, .. } => precision,
+        }
+    }
+
+    /// Returns decimal scale.
+    pub fn scale(&self) -> i32 {
+        match *self {
+            Decimal::Int32 { scale, .. } => scale,
+            Decimal::Int64 { scale, .. } => scale,
+            Decimal::Bytes { scale, .. } => scale,
+        }
+    }
+}
+
+impl Default for Decimal {
+    fn default() -> Self {
+        Self::from_i32(0, 0, 0)
+    }
+}
+
+impl PartialEq for Decimal {
+    fn eq(&self, other: &Decimal) -> bool {
+        self.precision() == other.precision()
+            && self.scale() == other.scale()
+            && self.data() == other.data()
+    }
+}
+
+/// Converts an instance of data type to a slice of bytes as `u8`.
+pub trait AsBytes {
+    /// Returns slice of bytes for this data type.
+    fn as_bytes(&self) -> &[u8];
+}
+
+macro_rules! gen_as_bytes {
+    ($source_ty:ident) => {
+        impl AsBytes for $source_ty {
+            fn as_bytes(&self) -> &[u8] {
+                unsafe {
+                    ::std::slice::from_raw_parts(
+                        self as *const $source_ty as *const u8,
+                        ::std::mem::size_of::<$source_ty>(),
+                    )
+                }
+            }
+        }
+    };
+}
+
+gen_as_bytes!(bool);
+gen_as_bytes!(u8);
+gen_as_bytes!(i32);
+gen_as_bytes!(u32);
+gen_as_bytes!(i64);
+gen_as_bytes!(f32);
+gen_as_bytes!(f64);
+
+impl AsBytes for Int96 {
+    fn as_bytes(&self) -> &[u8] {
+        unsafe {
+            ::std::slice::from_raw_parts(self.data() as *const [u32] as *const u8, 12)
+        }
+    }
+}
+
+impl AsBytes for ByteArray {
+    fn as_bytes(&self) -> &[u8] {
+        self.data()
+    }
+}
+
+impl AsBytes for Decimal {
+    fn as_bytes(&self) -> &[u8] {
+        self.data()
+    }
+}
+
+impl AsBytes for Vec<u8> {
+    fn as_bytes(&self) -> &[u8] {
+        self.as_slice()
+    }
+}
+
+impl<'a> AsBytes for &'a str {
+    fn as_bytes(&self) -> &[u8] {
+        (self as &str).as_bytes()
+    }
+}
+
+impl AsBytes for str {
+    fn as_bytes(&self) -> &[u8] {
+        (self as &str).as_bytes()
+    }
+}
+
+/// Contains the Parquet physical type information as well as the Rust primitive type
+/// presentation.
+pub trait DataType: 'static {
+    type T: ::std::cmp::PartialEq
+        + ::std::fmt::Debug
+        + ::std::default::Default
+        + ::std::clone::Clone
+        + AsBytes;
+
+    /// Returns Parquet physical type.
+    fn get_physical_type() -> Type;
+
+    /// Returns size in bytes for Rust representation of the physical type.
+    fn get_type_size() -> usize;
+}
+
+macro_rules! make_type {
+    ($name:ident, $physical_ty:path, $native_ty:ty, $size:expr) => {
+        pub struct $name {}
+
+        impl DataType for $name {
+            type T = $native_ty;
+
+            fn get_physical_type() -> Type {
+                $physical_ty
+            }
+
+            fn get_type_size() -> usize {
+                $size
+            }
+        }
+    };
+}
+
+/// Generate struct definitions for all physical types
+
+make_type!(BoolType, Type::BOOLEAN, bool, 1);
+make_type!(Int32Type, Type::INT32, i32, 4);
+make_type!(Int64Type, Type::INT64, i64, 8);
+make_type!(Int96Type, Type::INT96, Int96, mem::size_of::<Int96>());
+make_type!(FloatType, Type::FLOAT, f32, 4);
+make_type!(DoubleType, Type::DOUBLE, f64, 8);
+make_type!(
+    ByteArrayType,
+    Type::BYTE_ARRAY,
+    ByteArray,
+    mem::size_of::<ByteArray>()
+);
+make_type!(
+    FixedLenByteArrayType,
+    Type::FIXED_LEN_BYTE_ARRAY,
+    ByteArray,
+    mem::size_of::<ByteArray>()
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_as_bytes() {
+        assert_eq!(false.as_bytes(), &[0]);
+        assert_eq!(true.as_bytes(), &[1]);
+        assert_eq!((7 as i32).as_bytes(), &[7, 0, 0, 0]);
+        assert_eq!((555 as i32).as_bytes(), &[43, 2, 0, 0]);
+        assert_eq!((555 as u32).as_bytes(), &[43, 2, 0, 0]);
+        assert_eq!(i32::max_value().as_bytes(), &[255, 255, 255, 127]);
+        assert_eq!(i32::min_value().as_bytes(), &[0, 0, 0, 128]);
+        assert_eq!((7 as i64).as_bytes(), &[7, 0, 0, 0, 0, 0, 0, 0]);
+        assert_eq!((555 as i64).as_bytes(), &[43, 2, 0, 0, 0, 0, 0, 0]);
+        assert_eq!(
+            (i64::max_value()).as_bytes(),
+            &[255, 255, 255, 255, 255, 255, 255, 127]
+        );
+        assert_eq!((i64::min_value()).as_bytes(), &[0, 0, 0, 0, 0, 0, 0, 128]);
+        assert_eq!((3.14 as f32).as_bytes(), &[195, 245, 72, 64]);
+        assert_eq!(
+            (3.14 as f64).as_bytes(),
+            &[31, 133, 235, 81, 184, 30, 9, 64]
+        );
+        assert_eq!("hello".as_bytes(), &[b'h', b'e', b'l', b'l', b'o']);
+        assert_eq!(
+            Vec::from("hello".as_bytes()).as_bytes(),
+            &[b'h', b'e', b'l', b'l', b'o']
+        );
+
+        // Test Int96
+        let i96 = Int96::from(vec![1, 2, 3]);
+        assert_eq!(i96.as_bytes(), &[1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0]);
+
+        // Test ByteArray
+        let ba = ByteArray::from(vec![1, 2, 3]);
+        assert_eq!(ba.as_bytes(), &[1, 2, 3]);
+
+        // Test Decimal
+        let decimal = Decimal::from_i32(123, 5, 2);
+        assert_eq!(decimal.as_bytes(), &[0, 0, 0, 123]);
+        let decimal = Decimal::from_i64(123, 5, 2);
+        assert_eq!(decimal.as_bytes(), &[0, 0, 0, 0, 0, 0, 0, 123]);
+        let decimal = Decimal::from_bytes(ByteArray::from(vec![1, 2, 3]), 5, 2);
+        assert_eq!(decimal.as_bytes(), &[1, 2, 3]);
+    }
+
+    #[test]
+    fn test_int96_from() {
+        assert_eq!(
+            Int96::from(vec![1, 12345, 1234567890]).data(),
+            &[1, 12345, 1234567890]
+        );
+    }
+
+    #[test]
+    fn test_byte_array_from() {
+        assert_eq!(
+            ByteArray::from(vec![b'A', b'B', b'C']).data(),
+            &[b'A', b'B', b'C']
+        );
+        assert_eq!(ByteArray::from("ABC").data(), &[b'A', b'B', b'C']);
+        assert_eq!(
+            ByteArray::from(ByteBufferPtr::new(vec![1u8, 2u8, 3u8, 4u8, 5u8])).data(),
+            &[1u8, 2u8, 3u8, 4u8, 5u8]
+        );
+        let mut buf = ByteBuffer::new();
+        buf.set_data(vec![6u8, 7u8, 8u8, 9u8, 10u8]);
+        assert_eq!(ByteArray::from(buf).data(), &[6u8, 7u8, 8u8, 9u8, 10u8]);
+    }
+
+    #[test]
+    fn test_decimal_partial_eq() {
+        assert_eq!(Decimal::default(), Decimal::from_i32(0, 0, 0));
+        assert_eq!(Decimal::from_i32(222, 5, 2), Decimal::from_i32(222, 5, 2));
+        assert_eq!(
+            Decimal::from_bytes(ByteArray::from(vec![0, 0, 0, 3]), 5, 2),
+            Decimal::from_i32(3, 5, 2)
+        );
+
+        assert!(Decimal::from_i32(222, 5, 2) != Decimal::from_i32(111, 5, 2));
+        assert!(Decimal::from_i32(222, 5, 2) != Decimal::from_i32(222, 6, 2));
+        assert!(Decimal::from_i32(222, 5, 2) != Decimal::from_i32(222, 5, 3));
+
+        assert!(Decimal::from_i64(222, 5, 2) != Decimal::from_i32(222, 5, 2));
+    }
+}
diff --git a/rust/parquet/src/encodings/decoding.rs b/rust/parquet/src/encodings/decoding.rs
new file mode 100644
index 0000000000000..e02aed6af8f25
--- /dev/null
+++ b/rust/parquet/src/encodings/decoding.rs
@@ -0,0 +1,1433 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Contains all supported decoders for Parquet.
+
+use std::{cmp, marker::PhantomData, mem, slice::from_raw_parts_mut};
+
+use super::rle::RleDecoder;
+
+use byteorder::{ByteOrder, LittleEndian};
+
+use crate::basic::*;
+use crate::data_type::*;
+use crate::errors::{ParquetError, Result};
+use crate::schema::types::ColumnDescPtr;
+use crate::util::{
+    bit_util::BitReader,
+    memory::{ByteBuffer, ByteBufferPtr},
+};
+
+// ----------------------------------------------------------------------
+// Decoders
+
+/// A Parquet decoder for the data type `T`.
+pub trait Decoder<T: DataType> {
+    /// Sets the data to decode to be `data`, which should contain `num_values` of values
+    /// to decode.
+    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()>;
+
+    /// Consumes values from this decoder and write the results to `buffer`. This will try
+    /// to fill up `buffer`.
+    ///
+    /// Returns the actual number of values decoded, which should be equal to
+    /// `buffer.len()` unless the remaining number of values is less than
+    /// `buffer.len()`.
+    fn get(&mut self, buffer: &mut [T::T]) -> Result<usize>;
+
+    /// Returns the number of values left in this decoder stream.
+    fn values_left(&self) -> usize;
+
+    /// Returns the encoding for this decoder.
+    fn encoding(&self) -> Encoding;
+}
+
+/// Gets a decoder for the column descriptor `descr` and encoding type `encoding`.
+///
+/// NOTE: the primitive type in `descr` MUST match the data type `T`, otherwise
+/// disastrous consequence could occur.
+pub fn get_decoder<T: DataType>(
+    descr: ColumnDescPtr,
+    encoding: Encoding,
+) -> Result<Box<Decoder<T>>> {
+    let decoder: Box<Decoder<T>> = match encoding {
+        Encoding::PLAIN => Box::new(PlainDecoder::new(descr.type_length())),
+        Encoding::RLE_DICTIONARY | Encoding::PLAIN_DICTIONARY => {
+            return Err(general_err!(
+                "Cannot initialize this encoding through this function"
+            ));
+        }
+        Encoding::RLE => Box::new(RleValueDecoder::new()),
+        Encoding::DELTA_BINARY_PACKED => Box::new(DeltaBitPackDecoder::new()),
+        Encoding::DELTA_LENGTH_BYTE_ARRAY => Box::new(DeltaLengthByteArrayDecoder::new()),
+        Encoding::DELTA_BYTE_ARRAY => Box::new(DeltaByteArrayDecoder::new()),
+        e => return Err(nyi_err!("Encoding {} is not supported", e)),
+    };
+    Ok(decoder)
+}
+
+// ----------------------------------------------------------------------
+// PLAIN Decoding
+
+/// Plain decoding that supports all types.
+/// Values are encoded back to back. For native types, data is encoded as little endian.
+/// Floating point types are encoded in IEEE.
+/// See [`PlainDecoder`](`::encoding::PlainEncoder`) for more information.
+pub struct PlainDecoder<T: DataType> {
+    // The remaining number of values in the byte array
+    num_values: usize,
+
+    // The current starting index in the byte array.
+    start: usize,
+
+    // The length for the type `T`. Only used when `T` is `FixedLenByteArrayType`
+    type_length: i32,
+
+    // The byte array to decode from. Not set if `T` is bool.
+    data: Option<ByteBufferPtr>,
+
+    // Read `data` bit by bit. Only set if `T` is bool.
+    bit_reader: Option<BitReader>,
+
+    // To allow `T` in the generic parameter for this struct. This doesn't take any
+    // space.
+    _phantom: PhantomData<T>,
+}
+
+impl<T: DataType> PlainDecoder<T> {
+    /// Creates new plain decoder.
+    pub fn new(type_length: i32) -> Self {
+        PlainDecoder {
+            data: None,
+            bit_reader: None,
+            type_length,
+            num_values: 0,
+            start: 0,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<T: DataType> Decoder<T> for PlainDecoder<T> {
+    #[inline]
+    default fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
+        self.num_values = num_values;
+        self.start = 0;
+        self.data = Some(data);
+        Ok(())
+    }
+
+    #[inline]
+    fn values_left(&self) -> usize {
+        self.num_values
+    }
+
+    #[inline]
+    fn encoding(&self) -> Encoding {
+        Encoding::PLAIN
+    }
+
+    #[inline]
+    default fn get(&mut self, buffer: &mut [T::T]) -> Result<usize> {
+        assert!(self.data.is_some());
+
+        let data = self.data.as_mut().unwrap();
+        let num_values = cmp::min(buffer.len(), self.num_values);
+        let bytes_left = data.len() - self.start;
+        let bytes_to_decode = mem::size_of::<T::T>() * num_values;
+        if bytes_left < bytes_to_decode {
+            return Err(eof_err!("Not enough bytes to decode"));
+        }
+        let raw_buffer: &mut [u8] =
+            unsafe { from_raw_parts_mut(buffer.as_ptr() as *mut u8, bytes_to_decode) };
+        raw_buffer.copy_from_slice(data.range(self.start, bytes_to_decode).as_ref());
+        self.start += bytes_to_decode;
+        self.num_values -= num_values;
+
+        Ok(num_values)
+    }
+}
+
+impl Decoder<Int96Type> for PlainDecoder<Int96Type> {
+    fn get(&mut self, buffer: &mut [Int96]) -> Result<usize> {
+        assert!(self.data.is_some());
+
+        let data = self.data.as_ref().unwrap();
+        let num_values = cmp::min(buffer.len(), self.num_values);
+        let bytes_left = data.len() - self.start;
+        let bytes_to_decode = 12 * num_values;
+        if bytes_left < bytes_to_decode {
+            return Err(eof_err!("Not enough bytes to decode"));
+        }
+
+        let data_range = data.range(self.start, bytes_to_decode);
+        let bytes: &[u8] = data_range.data();
+        self.start += bytes_to_decode;
+
+        let mut pos = 0; // position in byte array
+        for i in 0..num_values {
+            let elem0 = LittleEndian::read_u32(&bytes[pos..pos + 4]);
+            let elem1 = LittleEndian::read_u32(&bytes[pos + 4..pos + 8]);
+            let elem2 = LittleEndian::read_u32(&bytes[pos + 8..pos + 12]);
+            buffer[i].set_data(elem0, elem1, elem2);
+            pos += 12;
+        }
+        self.num_values -= num_values;
+
+        Ok(num_values)
+    }
+}
+
+impl Decoder<BoolType> for PlainDecoder<BoolType> {
+    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
+        self.num_values = num_values;
+        self.bit_reader = Some(BitReader::new(data));
+        Ok(())
+    }
+
+    fn get(&mut self, buffer: &mut [bool]) -> Result<usize> {
+        assert!(self.bit_reader.is_some());
+
+        let bit_reader = self.bit_reader.as_mut().unwrap();
+        let values_read = bit_reader.get_batch::<bool>(buffer, 1);
+        self.num_values -= values_read;
+
+        Ok(values_read)
+    }
+}
+
+impl Decoder<ByteArrayType> for PlainDecoder<ByteArrayType> {
+    fn get(&mut self, buffer: &mut [ByteArray]) -> Result<usize> {
+        assert!(self.data.is_some());
+
+        let data = self.data.as_mut().unwrap();
+        let num_values = cmp::min(buffer.len(), self.num_values);
+        for i in 0..num_values {
+            let len: usize =
+                read_num_bytes!(u32, 4, data.start_from(self.start).as_ref()) as usize;
+            self.start += mem::size_of::<u32>();
+            if data.len() < self.start + len {
+                return Err(eof_err!("Not enough bytes to decode"));
+            }
+            buffer[i].set_data(data.range(self.start, len));
+            self.start += len;
+        }
+        self.num_values -= num_values;
+
+        Ok(num_values)
+    }
+}
+
+impl Decoder<FixedLenByteArrayType> for PlainDecoder<FixedLenByteArrayType> {
+    fn get(&mut self, buffer: &mut [ByteArray]) -> Result<usize> {
+        assert!(self.data.is_some());
+        assert!(self.type_length > 0);
+
+        let data = self.data.as_mut().unwrap();
+        let type_length = self.type_length as usize;
+        let num_values = cmp::min(buffer.len(), self.num_values);
+        for i in 0..num_values {
+            if data.len() < self.start + type_length {
+                return Err(eof_err!("Not enough bytes to decode"));
+            }
+            buffer[i].set_data(data.range(self.start, type_length));
+            self.start += type_length;
+        }
+        self.num_values -= num_values;
+
+        Ok(num_values)
+    }
+}
+
+// ----------------------------------------------------------------------
+// RLE_DICTIONARY/PLAIN_DICTIONARY Decoding
+
+/// Dictionary decoder.
+/// The dictionary encoding builds a dictionary of values encountered in a given column.
+/// The dictionary is be stored in a dictionary page per column chunk.
+/// See [`DictEncoder`](`::encoding::DictEncoder`) for more information.
+pub struct DictDecoder<T: DataType> {
+    // The dictionary, which maps ids to the values
+    dictionary: Vec<T::T>,
+
+    // Whether `dictionary` has been initialized
+    has_dictionary: bool,
+
+    // The decoder for the value ids
+    rle_decoder: Option<RleDecoder>,
+
+    // Number of values left in the data stream
+    num_values: usize,
+}
+
+impl<T: DataType> DictDecoder<T> {
+    /// Creates new dictionary decoder.
+    pub fn new() -> Self {
+        Self {
+            dictionary: vec![],
+            has_dictionary: false,
+            rle_decoder: None,
+            num_values: 0,
+        }
+    }
+
+    /// Decodes and sets values for dictionary using `decoder` decoder.
+    pub fn set_dict(&mut self, mut decoder: Box<Decoder<T>>) -> Result<()> {
+        let num_values = decoder.values_left();
+        self.dictionary.resize(num_values, T::T::default());
+        let _ = decoder.get(&mut self.dictionary)?;
+        self.has_dictionary = true;
+        Ok(())
+    }
+}
+
+impl<T: DataType> Decoder<T> for DictDecoder<T> {
+    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
+        // First byte in `data` is bit width
+        let bit_width = data.as_ref()[0];
+        let mut rle_decoder = RleDecoder::new(bit_width);
+        rle_decoder.set_data(data.start_from(1));
+        self.num_values = num_values;
+        self.rle_decoder = Some(rle_decoder);
+        Ok(())
+    }
+
+    fn get(&mut self, buffer: &mut [T::T]) -> Result<usize> {
+        assert!(self.rle_decoder.is_some());
+        assert!(self.has_dictionary, "Must call set_dict() first!");
+
+        let rle = self.rle_decoder.as_mut().unwrap();
+        let num_values = cmp::min(buffer.len(), self.num_values);
+        rle.get_batch_with_dict(&self.dictionary[..], buffer, num_values)
+    }
+
+    /// Number of values left in this decoder stream
+    fn values_left(&self) -> usize {
+        self.num_values
+    }
+
+    fn encoding(&self) -> Encoding {
+        Encoding::RLE_DICTIONARY
+    }
+}
+
+// ----------------------------------------------------------------------
+// RLE Decoding
+
+/// RLE/Bit-Packing hybrid decoding for values.
+/// Currently is used only for data pages v2 and supports boolean types.
+/// See [`RleValueEncoder`](`::encoding::RleValueEncoder`) for more information.
+pub struct RleValueDecoder<T: DataType> {
+    values_left: usize,
+    decoder: Option<RleDecoder>,
+    _phantom: PhantomData<T>,
+}
+
+impl<T: DataType> RleValueDecoder<T> {
+    pub fn new() -> Self {
+        Self {
+            values_left: 0,
+            decoder: None,
+            _phantom: PhantomData,
+        }
+    }
+
+    #[inline]
+    fn set_data_internal(
+        &mut self,
+        data: ByteBufferPtr,
+        num_values: usize,
+    ) -> Result<()> {
+        // We still need to remove prefix of i32 from the stream.
+        let i32_size = mem::size_of::<i32>();
+        let data_size = read_num_bytes!(i32, i32_size, data.as_ref()) as usize;
+        let rle_decoder = self
+            .decoder
+            .as_mut()
+            .expect("RLE decoder is not initialized");
+        rle_decoder.set_data(data.range(i32_size, data_size));
+        self.values_left = num_values;
+        Ok(())
+    }
+}
+
+impl<T: DataType> Decoder<T> for RleValueDecoder<T> {
+    #[inline]
+    default fn set_data(
+        &mut self,
+        _data: ByteBufferPtr,
+        _num_values: usize,
+    ) -> Result<()> {
+        panic!("RleValueDecoder only supports BoolType");
+    }
+
+    #[inline]
+    fn values_left(&self) -> usize {
+        self.values_left
+    }
+
+    #[inline]
+    fn encoding(&self) -> Encoding {
+        Encoding::RLE
+    }
+
+    #[inline]
+    fn get(&mut self, buffer: &mut [T::T]) -> Result<usize> {
+        let rle_decoder = self
+            .decoder
+            .as_mut()
+            .expect("RLE decoder is not initialized");
+        let values_read = rle_decoder.get_batch(buffer)?;
+        self.values_left -= values_read;
+        Ok(values_read)
+    }
+}
+
+impl Decoder<BoolType> for RleValueDecoder<BoolType> {
+    #[inline]
+    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
+        // Only support RLE value reader for boolean values with bit width of 1.
+        self.decoder = Some(RleDecoder::new(1));
+        self.set_data_internal(data, num_values)
+    }
+}
+
+// ----------------------------------------------------------------------
+// DELTA_BINARY_PACKED Decoding
+
+/// Delta binary packed decoder.
+/// Supports INT32 and INT64 types.
+/// See [`DeltaBitPackEncoder`](`::encoding::DeltaBitPackEncoder`) for more information.
+pub struct DeltaBitPackDecoder<T: DataType> {
+    bit_reader: BitReader,
+    initialized: bool,
+
+    // Header info
+    num_values: usize,
+    num_mini_blocks: i64,
+    values_per_mini_block: usize,
+    values_current_mini_block: usize,
+    first_value: i64,
+    first_value_read: bool,
+
+    // Per block info
+    min_delta: i64,
+    mini_block_idx: usize,
+    delta_bit_width: u8,
+    delta_bit_widths: ByteBuffer,
+    deltas_in_mini_block: Vec<T::T>, // eagerly loaded deltas for a mini block
+    use_batch: bool,
+
+    current_value: i64,
+
+    _phantom: PhantomData<T>,
+}
+
+impl<T: DataType> DeltaBitPackDecoder<T> {
+    /// Creates new delta bit packed decoder.
+    pub fn new() -> Self {
+        Self {
+            bit_reader: BitReader::from(vec![]),
+            initialized: false,
+            num_values: 0,
+            num_mini_blocks: 0,
+            values_per_mini_block: 0,
+            values_current_mini_block: 0,
+            first_value: 0,
+            first_value_read: false,
+            min_delta: 0,
+            mini_block_idx: 0,
+            delta_bit_width: 0,
+            delta_bit_widths: ByteBuffer::new(),
+            deltas_in_mini_block: vec![],
+            use_batch: mem::size_of::<T::T>() == 4,
+            current_value: 0,
+            _phantom: PhantomData,
+        }
+    }
+
+    /// Returns underlying bit reader offset.
+    pub fn get_offset(&self) -> usize {
+        assert!(self.initialized, "Bit reader is not initialized");
+        self.bit_reader.get_byte_offset()
+    }
+
+    /// Initializes new mini block.
+    #[inline]
+    fn init_block(&mut self) -> Result<()> {
+        self.min_delta = self
+            .bit_reader
+            .get_zigzag_vlq_int()
+            .ok_or(eof_err!("Not enough data to decode 'min_delta'"))?;
+
+        let mut widths = vec![];
+        for _ in 0..self.num_mini_blocks {
+            let w = self
+                .bit_reader
+                .get_aligned::<u8>(1)
+                .ok_or(eof_err!("Not enough data to decode 'width'"))?;
+            widths.push(w);
+        }
+
+        self.delta_bit_widths.set_data(widths);
+        self.mini_block_idx = 0;
+        self.delta_bit_width = self.delta_bit_widths.data()[0];
+        self.values_current_mini_block = self.values_per_mini_block;
+        Ok(())
+    }
+
+    /// Loads delta into mini block.
+    #[inline]
+    fn load_deltas_in_mini_block(&mut self) -> Result<()> {
+        self.deltas_in_mini_block.clear();
+        if self.use_batch {
+            self.deltas_in_mini_block
+                .resize(self.values_current_mini_block, T::T::default());
+            let loaded = self.bit_reader.get_batch::<T::T>(
+                &mut self.deltas_in_mini_block[..],
+                self.delta_bit_width as usize,
+            );
+            assert!(loaded == self.values_current_mini_block);
+        } else {
+            for _ in 0..self.values_current_mini_block {
+                // TODO: load one batch at a time similar to int32
+                let delta = self
+                    .bit_reader
+                    .get_value::<T::T>(self.delta_bit_width as usize)
+                    .ok_or(eof_err!("Not enough data to decode 'delta'"))?;
+                self.deltas_in_mini_block.push(delta);
+            }
+        }
+
+        Ok(())
+    }
+}
+
+impl<T: DataType> Decoder<T> for DeltaBitPackDecoder<T> {
+    // # of total values is derived from encoding
+    #[inline]
+    default fn set_data(&mut self, data: ByteBufferPtr, _: usize) -> Result<()> {
+        self.bit_reader = BitReader::new(data);
+        self.initialized = true;
+
+        let block_size = self
+            .bit_reader
+            .get_vlq_int()
+            .ok_or(eof_err!("Not enough data to decode 'block_size'"))?;
+        self.num_mini_blocks = self
+            .bit_reader
+            .get_vlq_int()
+            .ok_or(eof_err!("Not enough data to decode 'num_mini_blocks'"))?;
+        self.num_values = self
+            .bit_reader
+            .get_vlq_int()
+            .ok_or(eof_err!("Not enough data to decode 'num_values'"))?
+            as usize;
+        self.first_value = self
+            .bit_reader
+            .get_zigzag_vlq_int()
+            .ok_or(eof_err!("Not enough data to decode 'first_value'"))?;
+
+        // Reset decoding state
+        self.first_value_read = false;
+        self.mini_block_idx = 0;
+        self.delta_bit_widths.clear();
+        self.values_current_mini_block = 0;
+
+        self.values_per_mini_block = (block_size / self.num_mini_blocks) as usize;
+        assert!(self.values_per_mini_block % 8 == 0);
+
+        Ok(())
+    }
+
+    default fn get(&mut self, buffer: &mut [T::T]) -> Result<usize> {
+        assert!(self.initialized, "Bit reader is not initialized");
+
+        let num_values = cmp::min(buffer.len(), self.num_values);
+        for i in 0..num_values {
+            if !self.first_value_read {
+                self.set_decoded_value(buffer, i, self.first_value);
+                self.current_value = self.first_value;
+                self.first_value_read = true;
+                continue;
+            }
+
+            if self.values_current_mini_block == 0 {
+                self.mini_block_idx += 1;
+                if self.mini_block_idx < self.delta_bit_widths.size() {
+                    self.delta_bit_width =
+                        self.delta_bit_widths.data()[self.mini_block_idx];
+                    self.values_current_mini_block = self.values_per_mini_block;
+                } else {
+                    self.init_block()?;
+                }
+                self.load_deltas_in_mini_block()?;
+            }
+
+            // we decrement values in current mini block, so we need to invert index for
+            // delta
+            let delta = self.get_delta(
+                self.deltas_in_mini_block.len() - self.values_current_mini_block,
+            );
+            // It is OK for deltas to contain "overflowed" values after encoding,
+            // e.g. i64::MAX - i64::MIN, so we use `wrapping_add` to "overflow" again and
+            // restore original value.
+            self.current_value = self.current_value.wrapping_add(self.min_delta);
+            self.current_value = self.current_value.wrapping_add(delta as i64);
+            self.set_decoded_value(buffer, i, self.current_value);
+            self.values_current_mini_block -= 1;
+        }
+
+        self.num_values -= num_values;
+        Ok(num_values)
+    }
+
+    fn values_left(&self) -> usize {
+        self.num_values
+    }
+
+    fn encoding(&self) -> Encoding {
+        Encoding::DELTA_BINARY_PACKED
+    }
+}
+
+/// Helper trait to define specific conversions when decoding values
+trait DeltaBitPackDecoderConversion<T: DataType> {
+    /// Sets decoded value based on type `T`.
+    #[inline]
+    fn get_delta(&self, index: usize) -> i64;
+
+    #[inline]
+    fn set_decoded_value(&self, buffer: &mut [T::T], index: usize, value: i64);
+}
+
+impl<T: DataType> DeltaBitPackDecoderConversion<T> for DeltaBitPackDecoder<T> {
+    #[inline]
+    default fn get_delta(&self, _: usize) -> i64 {
+        panic!("DeltaBitPackDecoder only supports Int32Type and Int64Type")
+    }
+
+    #[inline]
+    default fn set_decoded_value(&self, _: &mut [T::T], _: usize, _: i64) {
+        panic!("DeltaBitPackDecoder only supports Int32Type and Int64Type")
+    }
+}
+
+impl DeltaBitPackDecoderConversion<Int32Type> for DeltaBitPackDecoder<Int32Type> {
+    #[inline]
+    fn get_delta(&self, index: usize) -> i64 {
+        self.deltas_in_mini_block[index] as i64
+    }
+
+    #[inline]
+    fn set_decoded_value(&self, buffer: &mut [i32], index: usize, value: i64) {
+        buffer[index] = value as i32;
+    }
+}
+
+impl DeltaBitPackDecoderConversion<Int64Type> for DeltaBitPackDecoder<Int64Type> {
+    #[inline]
+    fn get_delta(&self, index: usize) -> i64 {
+        self.deltas_in_mini_block[index]
+    }
+
+    #[inline]
+    fn set_decoded_value(&self, buffer: &mut [i64], index: usize, value: i64) {
+        buffer[index] = value;
+    }
+}
+
+// ----------------------------------------------------------------------
+// DELTA_LENGTH_BYTE_ARRAY Decoding
+
+/// Delta length byte array decoder.
+/// Only applied to byte arrays to separate the length values and the data, the lengths
+/// are encoded using DELTA_BINARY_PACKED encoding.
+/// See [`DeltaLengthByteArrayEncoder`](`::encoding::DeltaLengthByteArrayEncoder`)
+/// for more information.
+pub struct DeltaLengthByteArrayDecoder<T: DataType> {
+    // Lengths for each byte array in `data`
+    // TODO: add memory tracker to this
+    lengths: Vec<i32>,
+
+    // Current index into `lengths`
+    current_idx: usize,
+
+    // Concatenated byte array data
+    data: Option<ByteBufferPtr>,
+
+    // Offset into `data`, always point to the beginning of next byte array.
+    offset: usize,
+
+    // Number of values left in this decoder stream
+    num_values: usize,
+
+    // Placeholder to allow `T` as generic parameter
+    _phantom: PhantomData<T>,
+}
+
+impl<T: DataType> DeltaLengthByteArrayDecoder<T> {
+    /// Creates new delta length byte array decoder.
+    pub fn new() -> Self {
+        Self {
+            lengths: vec![],
+            current_idx: 0,
+            data: None,
+            offset: 0,
+            num_values: 0,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<T: DataType> Decoder<T> for DeltaLengthByteArrayDecoder<T> {
+    default fn set_data(&mut self, _: ByteBufferPtr, _: usize) -> Result<()> {
+        Err(general_err!(
+            "DeltaLengthByteArrayDecoder only support ByteArrayType"
+        ))
+    }
+
+    default fn get(&mut self, _: &mut [T::T]) -> Result<usize> {
+        Err(general_err!(
+            "DeltaLengthByteArrayDecoder only support ByteArrayType"
+        ))
+    }
+
+    fn values_left(&self) -> usize {
+        self.num_values
+    }
+
+    fn encoding(&self) -> Encoding {
+        Encoding::DELTA_LENGTH_BYTE_ARRAY
+    }
+}
+
+impl Decoder<ByteArrayType> for DeltaLengthByteArrayDecoder<ByteArrayType> {
+    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
+        let mut len_decoder = DeltaBitPackDecoder::<Int32Type>::new();
+        len_decoder.set_data(data.all(), num_values)?;
+        let num_lengths = len_decoder.values_left();
+        self.lengths.resize(num_lengths, 0);
+        len_decoder.get(&mut self.lengths[..])?;
+
+        self.data = Some(data.start_from(len_decoder.get_offset()));
+        self.offset = 0;
+        self.current_idx = 0;
+        self.num_values = num_lengths;
+        Ok(())
+    }
+
+    fn get(&mut self, buffer: &mut [ByteArray]) -> Result<usize> {
+        assert!(self.data.is_some());
+
+        let data = self.data.as_ref().unwrap();
+        let num_values = cmp::min(buffer.len(), self.num_values);
+        for i in 0..num_values {
+            let len = self.lengths[self.current_idx] as usize;
+            buffer[i].set_data(data.range(self.offset, len));
+            self.offset += len;
+            self.current_idx += 1;
+        }
+
+        self.num_values -= num_values;
+        Ok(num_values)
+    }
+}
+
+// ----------------------------------------------------------------------
+// DELTA_BYTE_ARRAY Decoding
+
+/// Delta byte array decoder.
+/// Prefix lengths are encoded using `DELTA_BINARY_PACKED` encoding, Suffixes are stored
+/// using `DELTA_LENGTH_BYTE_ARRAY` encoding.
+/// See [`DeltaByteArrayEncoder`](`::encoding::DeltaByteArrayEncoder`) for more
+/// information.
+pub struct DeltaByteArrayDecoder<T: DataType> {
+    // Prefix lengths for each byte array
+    // TODO: add memory tracker to this
+    prefix_lengths: Vec<i32>,
+
+    // The current index into `prefix_lengths`,
+    current_idx: usize,
+
+    // Decoder for all suffixes, the # of which should be the same as
+    // `prefix_lengths.len()`
+    suffix_decoder: Option<DeltaLengthByteArrayDecoder<ByteArrayType>>,
+
+    // The last byte array, used to derive the current prefix
+    previous_value: Vec<u8>,
+
+    // Number of values left
+    num_values: usize,
+
+    // Placeholder to allow `T` as generic parameter
+    _phantom: PhantomData<T>,
+}
+
+impl<T: DataType> DeltaByteArrayDecoder<T> {
+    /// Creates new delta byte array decoder.
+    pub fn new() -> Self {
+        Self {
+            prefix_lengths: vec![],
+            current_idx: 0,
+            suffix_decoder: None,
+            previous_value: vec![],
+            num_values: 0,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<'m, T: DataType> Decoder<T> for DeltaByteArrayDecoder<T> {
+    default fn set_data(&mut self, _: ByteBufferPtr, _: usize) -> Result<()> {
+        Err(general_err!(
+            "DeltaByteArrayDecoder only supports ByteArrayType and FixedLenByteArrayType"
+        ))
+    }
+
+    default fn get(&mut self, _: &mut [T::T]) -> Result<usize> {
+        Err(general_err!(
+            "DeltaByteArrayDecoder only supports ByteArrayType and FixedLenByteArrayType"
+        ))
+    }
+
+    fn values_left(&self) -> usize {
+        self.num_values
+    }
+
+    fn encoding(&self) -> Encoding {
+        Encoding::DELTA_BYTE_ARRAY
+    }
+}
+
+impl Decoder<ByteArrayType> for DeltaByteArrayDecoder<ByteArrayType> {
+    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
+        let mut prefix_len_decoder = DeltaBitPackDecoder::<Int32Type>::new();
+        prefix_len_decoder.set_data(data.all(), num_values)?;
+        let num_prefixes = prefix_len_decoder.values_left();
+        self.prefix_lengths.resize(num_prefixes, 0);
+        prefix_len_decoder.get(&mut self.prefix_lengths[..])?;
+
+        let mut suffix_decoder = DeltaLengthByteArrayDecoder::new();
+        suffix_decoder
+            .set_data(data.start_from(prefix_len_decoder.get_offset()), num_values)?;
+        self.suffix_decoder = Some(suffix_decoder);
+        self.num_values = num_prefixes;
+        self.current_idx = 0;
+        self.previous_value.clear();
+        Ok(())
+    }
+
+    fn get(&mut self, buffer: &mut [ByteArray]) -> Result<usize> {
+        assert!(self.suffix_decoder.is_some());
+
+        let num_values = cmp::min(buffer.len(), self.num_values);
+        let mut v: [ByteArray; 1] = [ByteArray::new(); 1];
+        for i in 0..num_values {
+            // Process suffix
+            // TODO: this is awkward - maybe we should add a non-vectorized API?
+            let suffix_decoder = self.suffix_decoder.as_mut().unwrap();
+            suffix_decoder.get(&mut v[..])?;
+            let suffix = v[0].data();
+
+            // Extract current prefix length, can be 0
+            let prefix_len = self.prefix_lengths[self.current_idx] as usize;
+
+            // Concatenate prefix with suffix
+            let mut result = Vec::new();
+            result.extend_from_slice(&self.previous_value[0..prefix_len]);
+            result.extend_from_slice(suffix);
+
+            let data = ByteBufferPtr::new(result.clone());
+            buffer[i].set_data(data);
+            self.previous_value = result;
+            self.current_idx += 1;
+        }
+
+        self.num_values -= num_values;
+        Ok(num_values)
+    }
+}
+
+impl Decoder<FixedLenByteArrayType> for DeltaByteArrayDecoder<FixedLenByteArrayType> {
+    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
+        let s: &mut DeltaByteArrayDecoder<ByteArrayType> =
+            unsafe { mem::transmute(self) };
+        s.set_data(data, num_values)
+    }
+
+    fn get(&mut self, buffer: &mut [ByteArray]) -> Result<usize> {
+        let s: &mut DeltaByteArrayDecoder<ByteArrayType> =
+            unsafe { mem::transmute(self) };
+        s.get(buffer)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{super::encoding::*, *};
+
+    use std::{mem, rc::Rc};
+
+    use crate::schema::types::{
+        ColumnDescPtr, ColumnDescriptor, ColumnPath, Type as SchemaType,
+    };
+    use crate::util::{
+        bit_util::set_array_bit, memory::MemTracker, test_common::RandGen,
+    };
+
+    #[test]
+    fn test_get_decoders() {
+        // supported encodings
+        create_and_check_decoder::<Int32Type>(Encoding::PLAIN, None);
+        create_and_check_decoder::<Int32Type>(Encoding::DELTA_BINARY_PACKED, None);
+        create_and_check_decoder::<Int32Type>(Encoding::DELTA_LENGTH_BYTE_ARRAY, None);
+        create_and_check_decoder::<Int32Type>(Encoding::DELTA_BYTE_ARRAY, None);
+        create_and_check_decoder::<BoolType>(Encoding::RLE, None);
+
+        // error when initializing
+        create_and_check_decoder::<Int32Type>(
+            Encoding::RLE_DICTIONARY,
+            Some(general_err!(
+                "Cannot initialize this encoding through this function"
+            )),
+        );
+        create_and_check_decoder::<Int32Type>(
+            Encoding::PLAIN_DICTIONARY,
+            Some(general_err!(
+                "Cannot initialize this encoding through this function"
+            )),
+        );
+
+        // unsupported
+        create_and_check_decoder::<Int32Type>(
+            Encoding::BIT_PACKED,
+            Some(nyi_err!("Encoding BIT_PACKED is not supported")),
+        );
+    }
+
+    #[test]
+    fn test_plain_decode_int32() {
+        let data = vec![42, 18, 52];
+        let data_bytes = Int32Type::to_byte_array(&data[..]);
+        let mut buffer = vec![0; 3];
+        test_plain_decode::<Int32Type>(
+            ByteBufferPtr::new(data_bytes),
+            3,
+            -1,
+            &mut buffer[..],
+            &data[..],
+        );
+    }
+
+    #[test]
+    fn test_plain_decode_int64() {
+        let data = vec![42, 18, 52];
+        let data_bytes = Int64Type::to_byte_array(&data[..]);
+        let mut buffer = vec![0; 3];
+        test_plain_decode::<Int64Type>(
+            ByteBufferPtr::new(data_bytes),
+            3,
+            -1,
+            &mut buffer[..],
+            &data[..],
+        );
+    }
+
+    #[test]
+    fn test_plain_decode_float() {
+        let data = vec![3.14, 2.414, 12.51];
+        let data_bytes = FloatType::to_byte_array(&data[..]);
+        let mut buffer = vec![0.0; 3];
+        test_plain_decode::<FloatType>(
+            ByteBufferPtr::new(data_bytes),
+            3,
+            -1,
+            &mut buffer[..],
+            &data[..],
+        );
+    }
+
+    #[test]
+    fn test_plain_decode_double() {
+        let data = vec![3.14f64, 2.414f64, 12.51f64];
+        let data_bytes = DoubleType::to_byte_array(&data[..]);
+        let mut buffer = vec![0.0f64; 3];
+        test_plain_decode::<DoubleType>(
+            ByteBufferPtr::new(data_bytes),
+            3,
+            -1,
+            &mut buffer[..],
+            &data[..],
+        );
+    }
+
+    #[test]
+    fn test_plain_decode_int96() {
+        let mut data = vec![Int96::new(); 4];
+        data[0].set_data(11, 22, 33);
+        data[1].set_data(44, 55, 66);
+        data[2].set_data(10, 20, 30);
+        data[3].set_data(40, 50, 60);
+        let data_bytes = Int96Type::to_byte_array(&data[..]);
+        let mut buffer = vec![Int96::new(); 4];
+        test_plain_decode::<Int96Type>(
+            ByteBufferPtr::new(data_bytes),
+            4,
+            -1,
+            &mut buffer[..],
+            &data[..],
+        );
+    }
+
+    #[test]
+    fn test_plain_decode_bool() {
+        let data = vec![
+            false, true, false, false, true, false, true, true, false, true,
+        ];
+        let data_bytes = BoolType::to_byte_array(&data[..]);
+        let mut buffer = vec![false; 10];
+        test_plain_decode::<BoolType>(
+            ByteBufferPtr::new(data_bytes),
+            10,
+            -1,
+            &mut buffer[..],
+            &data[..],
+        );
+    }
+
+    #[test]
+    fn test_plain_decode_byte_array() {
+        let mut data = vec![ByteArray::new(); 2];
+        data[0].set_data(ByteBufferPtr::new(String::from("hello").into_bytes()));
+        data[1].set_data(ByteBufferPtr::new(String::from("parquet").into_bytes()));
+        let data_bytes = ByteArrayType::to_byte_array(&data[..]);
+        let mut buffer = vec![ByteArray::new(); 2];
+        test_plain_decode::<ByteArrayType>(
+            ByteBufferPtr::new(data_bytes),
+            2,
+            -1,
+            &mut buffer[..],
+            &data[..],
+        );
+    }
+
+    #[test]
+    fn test_plain_decode_fixed_len_byte_array() {
+        let mut data = vec![ByteArray::default(); 3];
+        data[0].set_data(ByteBufferPtr::new(String::from("bird").into_bytes()));
+        data[1].set_data(ByteBufferPtr::new(String::from("come").into_bytes()));
+        data[2].set_data(ByteBufferPtr::new(String::from("flow").into_bytes()));
+        let data_bytes = FixedLenByteArrayType::to_byte_array(&data[..]);
+        let mut buffer = vec![ByteArray::default(); 3];
+        test_plain_decode::<FixedLenByteArrayType>(
+            ByteBufferPtr::new(data_bytes),
+            3,
+            4,
+            &mut buffer[..],
+            &data[..],
+        );
+    }
+
+    #[test]
+    #[should_panic(expected = "RleValueEncoder only supports BoolType")]
+    fn test_rle_value_encode_int32_not_supported() {
+        let mut encoder = RleValueEncoder::<Int32Type>::new();
+        encoder.put(&vec![1, 2, 3, 4]).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "RleValueDecoder only supports BoolType")]
+    fn test_rle_value_decode_int32_not_supported() {
+        let mut decoder = RleValueDecoder::<Int32Type>::new();
+        decoder
+            .set_data(ByteBufferPtr::new(vec![5, 0, 0, 0]), 1)
+            .unwrap();
+    }
+
+    #[test]
+    fn test_rle_value_decode_bool_decode() {
+        // Test multiple 'put' calls on the same encoder
+        let data = vec![
+            BoolType::gen_vec(-1, 256),
+            BoolType::gen_vec(-1, 257),
+            BoolType::gen_vec(-1, 126),
+        ];
+        test_rle_value_decode::<BoolType>(data);
+    }
+
+    #[test]
+    #[should_panic(expected = "Bit reader is not initialized")]
+    fn test_delta_bit_packed_not_initialized_offset() {
+        // Fail if set_data() is not called before get_offset()
+        let decoder = DeltaBitPackDecoder::<Int32Type>::new();
+        decoder.get_offset();
+    }
+
+    #[test]
+    #[should_panic(expected = "Bit reader is not initialized")]
+    fn test_delta_bit_packed_not_initialized_get() {
+        // Fail if set_data() is not called before get()
+        let mut decoder = DeltaBitPackDecoder::<Int32Type>::new();
+        let mut buffer = vec![];
+        decoder.get(&mut buffer).unwrap();
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_empty() {
+        let data = vec![vec![0; 0]];
+        test_delta_bit_packed_decode::<Int32Type>(data);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_repeat() {
+        let block_data = vec![
+            1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2,
+            3, 4, 5, 6, 7, 8,
+        ];
+        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_uneven() {
+        let block_data = vec![1, -2, 3, -4, 5, 6, 7, 8, 9, 10, 11];
+        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_same_values() {
+        let block_data = vec![
+            127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
+            127,
+        ];
+        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
+
+        let block_data = vec![
+            -127, -127, -127, -127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
+            -127, -127, -127,
+        ];
+        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_min_max() {
+        let block_data = vec![
+            i32::min_value(),
+            i32::max_value(),
+            i32::min_value(),
+            i32::max_value(),
+            i32::min_value(),
+            i32::max_value(),
+            i32::min_value(),
+            i32::max_value(),
+        ];
+        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_multiple_blocks() {
+        // Test multiple 'put' calls on the same encoder
+        let data = vec![
+            Int32Type::gen_vec(-1, 64),
+            Int32Type::gen_vec(-1, 128),
+            Int32Type::gen_vec(-1, 64),
+        ];
+        test_delta_bit_packed_decode::<Int32Type>(data);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_data_across_blocks() {
+        // Test multiple 'put' calls on the same encoder
+        let data = vec![Int32Type::gen_vec(-1, 256), Int32Type::gen_vec(-1, 257)];
+        test_delta_bit_packed_decode::<Int32Type>(data);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_with_empty_blocks() {
+        let data = vec![
+            Int32Type::gen_vec(-1, 128),
+            vec![0; 0],
+            Int32Type::gen_vec(-1, 64),
+        ];
+        test_delta_bit_packed_decode::<Int32Type>(data);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int64_empty() {
+        let data = vec![vec![0; 0]];
+        test_delta_bit_packed_decode::<Int64Type>(data);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int64_min_max() {
+        let block_data = vec![
+            i64::min_value(),
+            i64::max_value(),
+            i64::min_value(),
+            i64::max_value(),
+            i64::min_value(),
+            i64::max_value(),
+            i64::min_value(),
+            i64::max_value(),
+        ];
+        test_delta_bit_packed_decode::<Int64Type>(vec![block_data]);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int64_multiple_blocks() {
+        // Test multiple 'put' calls on the same encoder
+        let data = vec![
+            Int64Type::gen_vec(-1, 64),
+            Int64Type::gen_vec(-1, 128),
+            Int64Type::gen_vec(-1, 64),
+        ];
+        test_delta_bit_packed_decode::<Int64Type>(data);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_decoder_sample() {
+        let data_bytes = vec![
+            128, 1, 4, 3, 58, 28, 6, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        ];
+        let buffer = ByteBufferPtr::new(data_bytes);
+        let mut decoder: DeltaBitPackDecoder<Int32Type> = DeltaBitPackDecoder::new();
+        decoder.set_data(buffer, 3).unwrap();
+        // check exact offsets, because when reading partial values we end up with
+        // some data not being read from bit reader
+        assert_eq!(decoder.get_offset(), 5);
+        let mut result = vec![0, 0, 0];
+        decoder.get(&mut result).unwrap();
+        assert_eq!(decoder.get_offset(), 34);
+        assert_eq!(result, vec![29, 43, 89]);
+    }
+
+    #[test]
+    fn test_delta_byte_array_same_arrays() {
+        let data = vec![
+            vec![ByteArray::from(vec![1, 2, 3, 4, 5, 6])],
+            vec![
+                ByteArray::from(vec![1, 2, 3, 4, 5, 6]),
+                ByteArray::from(vec![1, 2, 3, 4, 5, 6]),
+            ],
+            vec![
+                ByteArray::from(vec![1, 2, 3, 4, 5, 6]),
+                ByteArray::from(vec![1, 2, 3, 4, 5, 6]),
+            ],
+        ];
+        test_delta_byte_array_decode(data);
+    }
+
+    #[test]
+    fn test_delta_byte_array_unique_arrays() {
+        let data = vec![
+            vec![ByteArray::from(vec![1])],
+            vec![ByteArray::from(vec![2, 3]), ByteArray::from(vec![4, 5, 6])],
+            vec![
+                ByteArray::from(vec![7, 8]),
+                ByteArray::from(vec![9, 0, 1, 2]),
+            ],
+        ];
+        test_delta_byte_array_decode(data);
+    }
+
+    #[test]
+    fn test_delta_byte_array_single_array() {
+        let data = vec![vec![ByteArray::from(vec![1, 2, 3, 4, 5, 6])]];
+        test_delta_byte_array_decode(data);
+    }
+
+    fn test_plain_decode<T: DataType>(
+        data: ByteBufferPtr,
+        num_values: usize,
+        type_length: i32,
+        buffer: &mut [T::T],
+        expected: &[T::T],
+    ) {
+        let mut decoder: PlainDecoder<T> = PlainDecoder::new(type_length);
+        let result = decoder.set_data(data, num_values);
+        assert!(result.is_ok());
+        let result = decoder.get(&mut buffer[..]);
+        assert!(result.is_ok());
+        assert_eq!(decoder.values_left(), 0);
+        assert_eq!(buffer, expected);
+    }
+
+    fn test_rle_value_decode<T: DataType>(data: Vec<Vec<T::T>>) {
+        test_encode_decode::<T>(data, Encoding::RLE);
+    }
+
+    fn test_delta_bit_packed_decode<T: DataType>(data: Vec<Vec<T::T>>) {
+        test_encode_decode::<T>(data, Encoding::DELTA_BINARY_PACKED);
+    }
+
+    fn test_delta_byte_array_decode(data: Vec<Vec<ByteArray>>) {
+        test_encode_decode::<ByteArrayType>(data, Encoding::DELTA_BYTE_ARRAY);
+    }
+
+    // Input data represents vector of data slices to write (test multiple `put()` calls)
+    // For example,
+    //   vec![vec![1, 2, 3]] invokes `put()` once and writes {1, 2, 3}
+    //   vec![vec![1, 2], vec![3]] invokes `put()` twice and writes {1, 2, 3}
+    fn test_encode_decode<T: DataType>(data: Vec<Vec<T::T>>, encoding: Encoding) {
+        // Type length should not really matter for encode/decode test,
+        // otherwise change it based on type
+        let col_descr = create_test_col_desc_ptr(-1, T::get_physical_type());
+
+        // Encode data
+        let mut encoder =
+            get_encoder::<T>(col_descr.clone(), encoding, Rc::new(MemTracker::new()))
+                .expect("get encoder");
+
+        for v in &data[..] {
+            encoder.put(&v[..]).expect("ok to encode");
+        }
+        let bytes = encoder.flush_buffer().expect("ok to flush buffer");
+
+        // Flatten expected data as contiguous array of values
+        let expected: Vec<T::T> = data.iter().flat_map(|s| s.clone()).collect();
+
+        // Decode data and compare with original
+        let mut decoder =
+            get_decoder::<T>(col_descr.clone(), encoding).expect("get decoder");
+
+        let mut result = vec![T::T::default(); expected.len()];
+        decoder
+            .set_data(bytes, expected.len())
+            .expect("ok to set data");
+        let mut result_num_values = 0;
+        while decoder.values_left() > 0 {
+            result_num_values += decoder
+                .get(&mut result[result_num_values..])
+                .expect("ok to decode");
+        }
+        assert_eq!(result_num_values, expected.len());
+        assert_eq!(result, expected);
+    }
+
+    fn create_and_check_decoder<T: DataType>(
+        encoding: Encoding,
+        err: Option<ParquetError>,
+    ) {
+        let descr = create_test_col_desc_ptr(-1, T::get_physical_type());
+        let decoder = get_decoder::<T>(descr, encoding);
+        match err {
+            Some(parquet_error) => {
+                assert!(decoder.is_err());
+                assert_eq!(decoder.err().unwrap(), parquet_error);
+            }
+            None => {
+                assert!(decoder.is_ok());
+                assert_eq!(decoder.unwrap().encoding(), encoding);
+            }
+        }
+    }
+
+    // Creates test column descriptor.
+    fn create_test_col_desc_ptr(type_len: i32, t: Type) -> ColumnDescPtr {
+        let ty = SchemaType::primitive_type_builder("t", t)
+            .with_length(type_len)
+            .build()
+            .unwrap();
+        Rc::new(ColumnDescriptor::new(
+            Rc::new(ty),
+            None,
+            0,
+            0,
+            ColumnPath::new(vec![]),
+        ))
+    }
+
+    fn usize_to_bytes(v: usize) -> [u8; 4] {
+        unsafe { mem::transmute::<u32, [u8; 4]>(v as u32) }
+    }
+
+    /// A util trait to convert slices of different types to byte arrays
+    trait ToByteArray<T: DataType> {
+        fn to_byte_array(data: &[T::T]) -> Vec<u8>;
+    }
+
+    impl<T> ToByteArray<T> for T
+    where
+        T: DataType,
+    {
+        default fn to_byte_array(data: &[T::T]) -> Vec<u8> {
+            let mut v = vec![];
+            let type_len = ::std::mem::size_of::<T::T>();
+            v.extend_from_slice(unsafe {
+                ::std::slice::from_raw_parts(
+                    data.as_ptr() as *const u8,
+                    data.len() * type_len,
+                )
+            });
+            v
+        }
+    }
+
+    impl ToByteArray<BoolType> for BoolType {
+        fn to_byte_array(data: &[bool]) -> Vec<u8> {
+            let mut v = vec![];
+            for i in 0..data.len() {
+                if i % 8 == 0 {
+                    v.push(0);
+                }
+                if data[i] {
+                    set_array_bit(&mut v[..], i);
+                }
+            }
+            v
+        }
+    }
+
+    impl ToByteArray<Int96Type> for Int96Type {
+        fn to_byte_array(data: &[Int96]) -> Vec<u8> {
+            let mut v = vec![];
+            for d in data {
+                unsafe {
+                    let copy =
+                        ::std::slice::from_raw_parts(d.data().as_ptr() as *const u8, 12);
+                    v.extend_from_slice(copy);
+                };
+            }
+            v
+        }
+    }
+
+    impl ToByteArray<ByteArrayType> for ByteArrayType {
+        fn to_byte_array(data: &[ByteArray]) -> Vec<u8> {
+            let mut v = vec![];
+            for d in data {
+                let buf = d.data();
+                let len = &usize_to_bytes(buf.len());
+                v.extend_from_slice(len);
+                v.extend(buf);
+            }
+            v
+        }
+    }
+
+    impl ToByteArray<FixedLenByteArrayType> for FixedLenByteArrayType {
+        fn to_byte_array(data: &[ByteArray]) -> Vec<u8> {
+            let mut v = vec![];
+            for d in data {
+                let buf = d.data();
+                v.extend(buf);
+            }
+            v
+        }
+    }
+}
diff --git a/rust/parquet/src/encodings/encoding.rs b/rust/parquet/src/encodings/encoding.rs
new file mode 100644
index 0000000000000..a045187d8295b
--- /dev/null
+++ b/rust/parquet/src/encodings/encoding.rs
@@ -0,0 +1,1390 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Contains all supported encoders for Parquet.
+
+use std::{cmp, io::Write, marker::PhantomData, mem, slice};
+
+use crate::basic::*;
+use crate::data_type::*;
+use crate::encodings::rle::RleEncoder;
+use crate::errors::{ParquetError, Result};
+use crate::schema::types::ColumnDescPtr;
+use crate::util::{
+    bit_util::{log2, num_required_bits, BitWriter},
+    hash_util,
+    memory::{Buffer, ByteBuffer, ByteBufferPtr, MemTrackerPtr},
+};
+
+// ----------------------------------------------------------------------
+// Encoders
+
+/// An Parquet encoder for the data type `T`.
+///
+/// Currently this allocates internal buffers for the encoded values. After done putting
+/// values, caller should call `flush_buffer()` to get an immutable buffer pointer.
+pub trait Encoder<T: DataType> {
+    /// Encodes data from `values`.
+    fn put(&mut self, values: &[T::T]) -> Result<()>;
+
+    /// Returns the encoding type of this encoder.
+    fn encoding(&self) -> Encoding;
+
+    /// Returns an estimate of the encoded data, in bytes.
+    /// Method call must be O(1).
+    fn estimated_data_encoded_size(&self) -> usize;
+
+    /// Flushes the underlying byte buffer that's being processed by this encoder, and
+    /// return the immutable copy of it. This will also reset the internal state.
+    fn flush_buffer(&mut self) -> Result<ByteBufferPtr>;
+}
+
+/// Gets a encoder for the particular data type `T` and encoding `encoding`. Memory usage
+/// for the encoder instance is tracked by `mem_tracker`.
+pub fn get_encoder<T: DataType>(
+    desc: ColumnDescPtr,
+    encoding: Encoding,
+    mem_tracker: MemTrackerPtr,
+) -> Result<Box<Encoder<T>>> {
+    let encoder: Box<Encoder<T>> = match encoding {
+        Encoding::PLAIN => Box::new(PlainEncoder::new(desc, mem_tracker, vec![])),
+        Encoding::RLE_DICTIONARY | Encoding::PLAIN_DICTIONARY => {
+            return Err(general_err!(
+                "Cannot initialize this encoding through this function"
+            ));
+        }
+        Encoding::RLE => Box::new(RleValueEncoder::new()),
+        Encoding::DELTA_BINARY_PACKED => Box::new(DeltaBitPackEncoder::new()),
+        Encoding::DELTA_LENGTH_BYTE_ARRAY => Box::new(DeltaLengthByteArrayEncoder::new()),
+        Encoding::DELTA_BYTE_ARRAY => Box::new(DeltaByteArrayEncoder::new()),
+        e => return Err(nyi_err!("Encoding {} is not supported", e)),
+    };
+    Ok(encoder)
+}
+
+// ----------------------------------------------------------------------
+// Plain encoding
+
+/// Plain encoding that supports all types.
+/// Values are encoded back to back.
+/// The plain encoding is used whenever a more efficient encoding can not be used.
+/// It stores the data in the following format:
+/// - BOOLEAN - 1 bit per value, 0 is false; 1 is true.
+/// - INT32 - 4 bytes per value, stored as little-endian.
+/// - INT64 - 8 bytes per value, stored as little-endian.
+/// - FLOAT - 4 bytes per value, stored as IEEE little-endian.
+/// - DOUBLE - 8 bytes per value, stored as IEEE little-endian.
+/// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
+/// - FIXED_LEN_BYTE_ARRAY - just the bytes are stored.
+pub struct PlainEncoder<T: DataType> {
+    buffer: ByteBuffer,
+    bit_writer: BitWriter,
+    desc: ColumnDescPtr,
+    _phantom: PhantomData<T>,
+}
+
+impl<T: DataType> PlainEncoder<T> {
+    /// Creates new plain encoder.
+    pub fn new(desc: ColumnDescPtr, mem_tracker: MemTrackerPtr, vec: Vec<u8>) -> Self {
+        let mut byte_buffer = ByteBuffer::new().with_mem_tracker(mem_tracker);
+        byte_buffer.set_data(vec);
+        Self {
+            buffer: byte_buffer,
+            bit_writer: BitWriter::new(256),
+            desc,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<T: DataType> Encoder<T> for PlainEncoder<T> {
+    default fn put(&mut self, values: &[T::T]) -> Result<()> {
+        let bytes = unsafe {
+            slice::from_raw_parts(
+                values as *const [T::T] as *const u8,
+                mem::size_of::<T::T>() * values.len(),
+            )
+        };
+        self.buffer.write(bytes)?;
+        Ok(())
+    }
+
+    fn encoding(&self) -> Encoding {
+        Encoding::PLAIN
+    }
+
+    fn estimated_data_encoded_size(&self) -> usize {
+        self.buffer.size() + self.bit_writer.bytes_written()
+    }
+
+    #[inline]
+    default fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
+        self.buffer.write(self.bit_writer.flush_buffer())?;
+        self.buffer.flush()?;
+        self.bit_writer.clear();
+
+        Ok(self.buffer.consume())
+    }
+}
+
+impl Encoder<BoolType> for PlainEncoder<BoolType> {
+    fn put(&mut self, values: &[bool]) -> Result<()> {
+        for v in values {
+            self.bit_writer.put_value(*v as u64, 1);
+        }
+        Ok(())
+    }
+}
+
+impl Encoder<Int96Type> for PlainEncoder<Int96Type> {
+    fn put(&mut self, values: &[Int96]) -> Result<()> {
+        for v in values {
+            self.buffer.write(v.as_bytes())?;
+        }
+        self.buffer.flush()?;
+        Ok(())
+    }
+}
+
+impl Encoder<ByteArrayType> for PlainEncoder<ByteArrayType> {
+    fn put(&mut self, values: &[ByteArray]) -> Result<()> {
+        for v in values {
+            self.buffer.write(&(v.len().to_le() as u32).as_bytes())?;
+            self.buffer.write(v.data())?;
+        }
+        self.buffer.flush()?;
+        Ok(())
+    }
+}
+
+impl Encoder<FixedLenByteArrayType> for PlainEncoder<FixedLenByteArrayType> {
+    fn put(&mut self, values: &[ByteArray]) -> Result<()> {
+        for v in values {
+            self.buffer.write(v.data())?;
+        }
+        self.buffer.flush()?;
+        Ok(())
+    }
+}
+
+// ----------------------------------------------------------------------
+// Dictionary encoding
+
+const INITIAL_HASH_TABLE_SIZE: usize = 1024;
+const MAX_HASH_LOAD: f32 = 0.7;
+const HASH_SLOT_EMPTY: i32 = -1;
+
+/// Dictionary encoder.
+/// The dictionary encoding builds a dictionary of values encountered in a given column.
+/// The dictionary page is written first, before the data pages of the column chunk.
+///
+/// Dictionary page format: the entries in the dictionary - in dictionary order -
+/// using the plain encoding.
+///
+/// Data page format: the bit width used to encode the entry ids stored as 1 byte
+/// (max bit width = 32), followed by the values encoded using RLE/Bit packed described
+/// above (with the given bit width).
+pub struct DictEncoder<T: DataType> {
+    // Descriptor for the column to be encoded.
+    desc: ColumnDescPtr,
+
+    // Size of the table. **Must be** a power of 2.
+    hash_table_size: usize,
+
+    // Store `hash_table_size` - 1, so that `j & mod_bitmask` is equivalent to
+    // `j % hash_table_size`, but uses far fewer CPU cycles.
+    mod_bitmask: u32,
+
+    // Stores indices which map (many-to-one) to the values in the `uniques` array.
+    // Here we are using fix-sized array with linear probing.
+    // A slot with `HASH_SLOT_EMPTY` indicates the slot is not currently occupied.
+    hash_slots: Buffer<i32>,
+
+    // Indices that have not yet be written out by `write_indices()`.
+    buffered_indices: Buffer<i32>,
+
+    // The unique observed values.
+    uniques: Buffer<T::T>,
+
+    // Size in bytes needed to encode this dictionary.
+    uniques_size_in_bytes: usize,
+
+    // Tracking memory usage for the various data structures in this struct.
+    mem_tracker: MemTrackerPtr,
+}
+
+impl<T: DataType> DictEncoder<T> {
+    /// Creates new dictionary encoder.
+    pub fn new(desc: ColumnDescPtr, mem_tracker: MemTrackerPtr) -> Self {
+        let mut slots = Buffer::new().with_mem_tracker(mem_tracker.clone());
+        slots.resize(INITIAL_HASH_TABLE_SIZE, -1);
+        Self {
+            desc,
+            hash_table_size: INITIAL_HASH_TABLE_SIZE,
+            mod_bitmask: (INITIAL_HASH_TABLE_SIZE - 1) as u32,
+            hash_slots: slots,
+            buffered_indices: Buffer::new().with_mem_tracker(mem_tracker.clone()),
+            uniques: Buffer::new().with_mem_tracker(mem_tracker.clone()),
+            uniques_size_in_bytes: 0,
+            mem_tracker,
+        }
+    }
+
+    /// Returns true if dictionary entries are sorted, false otherwise.
+    #[inline]
+    pub fn is_sorted(&self) -> bool {
+        // Sorting is not supported currently.
+        false
+    }
+
+    /// Returns number of unique values (keys) in the dictionary.
+    pub fn num_entries(&self) -> usize {
+        self.uniques.size()
+    }
+
+    /// Returns size of unique values (keys) in the dictionary, in bytes.
+    pub fn dict_encoded_size(&self) -> usize {
+        self.uniques_size_in_bytes
+    }
+
+    /// Writes out the dictionary values with PLAIN encoding in a byte buffer, and return
+    /// the result.
+    #[inline]
+    pub fn write_dict(&self) -> Result<ByteBufferPtr> {
+        let mut plain_encoder =
+            PlainEncoder::<T>::new(self.desc.clone(), self.mem_tracker.clone(), vec![]);
+        plain_encoder.put(self.uniques.data())?;
+        plain_encoder.flush_buffer()
+    }
+
+    /// Writes out the dictionary values with RLE encoding in a byte buffer, and return
+    /// the result.
+    #[inline]
+    pub fn write_indices(&mut self) -> Result<ByteBufferPtr> {
+        // TODO: the caller should allocate the buffer
+        let buffer_len = self.estimated_data_encoded_size();
+        let mut buffer: Vec<u8> = vec![0; buffer_len as usize];
+        buffer[0] = self.bit_width() as u8;
+        self.mem_tracker.alloc(buffer.capacity() as i64);
+
+        // Write bit width in the first byte
+        buffer.write((self.bit_width() as u8).as_bytes())?;
+        let mut encoder = RleEncoder::new_from_buf(self.bit_width(), buffer, 1);
+        for index in self.buffered_indices.data() {
+            if !encoder.put(*index as u64)? {
+                return Err(general_err!("Encoder doesn't have enough space"));
+            }
+        }
+        self.buffered_indices.clear();
+        Ok(ByteBufferPtr::new(encoder.consume()?))
+    }
+
+    #[inline]
+    fn put_one(&mut self, value: &T::T) -> Result<()> {
+        let mut j = (hash_util::hash(value, 0) & self.mod_bitmask) as usize;
+        let mut index = self.hash_slots[j];
+
+        while index != HASH_SLOT_EMPTY && self.uniques[index as usize] != *value {
+            j += 1;
+            if j == self.hash_table_size {
+                j = 0;
+            }
+            index = self.hash_slots[j];
+        }
+
+        if index == HASH_SLOT_EMPTY {
+            index = self.uniques.size() as i32;
+            self.hash_slots[j] = index;
+            self.add_dict_key(value.clone());
+
+            if self.uniques.size()
+                > (self.hash_table_size as f32 * MAX_HASH_LOAD) as usize
+            {
+                self.double_table_size();
+            }
+        }
+
+        self.buffered_indices.push(index);
+        Ok(())
+    }
+
+    #[inline]
+    fn add_dict_key(&mut self, value: T::T) {
+        self.uniques_size_in_bytes += self.get_encoded_size(&value);
+        self.uniques.push(value);
+    }
+
+    #[inline]
+    fn bit_width(&self) -> u8 {
+        let num_entries = self.uniques.size();
+        if num_entries == 0 {
+            0
+        } else if num_entries == 1 {
+            1
+        } else {
+            log2(num_entries as u64) as u8
+        }
+    }
+
+    #[inline]
+    fn double_table_size(&mut self) {
+        let new_size = self.hash_table_size * 2;
+        let mut new_hash_slots = Buffer::new().with_mem_tracker(self.mem_tracker.clone());
+        new_hash_slots.resize(new_size, HASH_SLOT_EMPTY);
+        for i in 0..self.hash_table_size {
+            let index = self.hash_slots[i];
+            if index == HASH_SLOT_EMPTY {
+                continue;
+            }
+            let value = &self.uniques[index as usize];
+            let mut j = (hash_util::hash(value, 0) & ((new_size - 1) as u32)) as usize;
+            let mut slot = new_hash_slots[j];
+            while slot != HASH_SLOT_EMPTY && self.uniques[slot as usize] != *value {
+                j += 1;
+                if j == new_size {
+                    j = 0;
+                }
+                slot = new_hash_slots[j];
+            }
+
+            new_hash_slots[j] = index;
+        }
+
+        self.hash_table_size = new_size;
+        self.mod_bitmask = (new_size - 1) as u32;
+        mem::replace(&mut self.hash_slots, new_hash_slots);
+    }
+}
+
+impl<T: DataType> Encoder<T> for DictEncoder<T> {
+    #[inline]
+    fn put(&mut self, values: &[T::T]) -> Result<()> {
+        for i in values {
+            self.put_one(&i)?
+        }
+        Ok(())
+    }
+
+    #[inline]
+    fn encoding(&self) -> Encoding {
+        Encoding::PLAIN_DICTIONARY
+    }
+
+    #[inline]
+    fn estimated_data_encoded_size(&self) -> usize {
+        let bit_width = self.bit_width();
+        1 + RleEncoder::min_buffer_size(bit_width)
+            + RleEncoder::max_buffer_size(bit_width, self.buffered_indices.size())
+    }
+
+    #[inline]
+    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
+        self.write_indices()
+    }
+}
+
+/// Provides encoded size for a data type.
+/// This is a workaround to calculate dictionary size in bytes.
+trait DictEncodedSize<T: DataType> {
+    #[inline]
+    fn get_encoded_size(&self, value: &T::T) -> usize;
+}
+
+impl<T: DataType> DictEncodedSize<T> for DictEncoder<T> {
+    #[inline]
+    default fn get_encoded_size(&self, _: &T::T) -> usize {
+        mem::size_of::<T::T>()
+    }
+}
+
+impl DictEncodedSize<ByteArrayType> for DictEncoder<ByteArrayType> {
+    #[inline]
+    fn get_encoded_size(&self, value: &ByteArray) -> usize {
+        mem::size_of::<u32>() + value.len()
+    }
+}
+
+impl DictEncodedSize<FixedLenByteArrayType> for DictEncoder<FixedLenByteArrayType> {
+    #[inline]
+    fn get_encoded_size(&self, _value: &ByteArray) -> usize {
+        self.desc.type_length() as usize
+    }
+}
+
+// ----------------------------------------------------------------------
+// RLE encoding
+
+const DEFAULT_RLE_BUFFER_LEN: usize = 1024;
+
+/// RLE/Bit-Packing hybrid encoding for values.
+/// Currently is used only for data pages v2 and supports boolean types.
+pub struct RleValueEncoder<T: DataType> {
+    // Buffer with raw values that we collect,
+    // when flushing buffer they are encoded using RLE encoder
+    encoder: Option<RleEncoder>,
+    _phantom: PhantomData<T>,
+}
+
+impl<T: DataType> RleValueEncoder<T> {
+    /// Creates new rle value encoder.
+    pub fn new() -> Self {
+        Self {
+            encoder: None,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<T: DataType> Encoder<T> for RleValueEncoder<T> {
+    #[inline]
+    default fn put(&mut self, _values: &[T::T]) -> Result<()> {
+        panic!("RleValueEncoder only supports BoolType");
+    }
+
+    fn encoding(&self) -> Encoding {
+        Encoding::RLE
+    }
+
+    #[inline]
+    default fn estimated_data_encoded_size(&self) -> usize {
+        match self.encoder {
+            Some(ref enc) => enc.len(),
+            None => 0,
+        }
+    }
+
+    #[inline]
+    default fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
+        panic!("RleValueEncoder only supports BoolType");
+    }
+}
+
+impl Encoder<BoolType> for RleValueEncoder<BoolType> {
+    #[inline]
+    default fn put(&mut self, values: &[bool]) -> Result<()> {
+        if self.encoder.is_none() {
+            self.encoder = Some(RleEncoder::new(1, DEFAULT_RLE_BUFFER_LEN));
+        }
+        let rle_encoder = self.encoder.as_mut().unwrap();
+        for value in values {
+            if !rle_encoder.put(*value as u64)? {
+                return Err(general_err!("RLE buffer is full"));
+            }
+        }
+        Ok(())
+    }
+
+    #[inline]
+    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
+        assert!(
+            self.encoder.is_some(),
+            "RLE value encoder is not initialized"
+        );
+        let rle_encoder = self.encoder.as_mut().unwrap();
+
+        // Flush all encoder buffers and raw values
+        let encoded_data = {
+            let buf = rle_encoder.flush_buffer()?;
+
+            // Note that buf does not have any offset, all data is encoded bytes
+            let len = (buf.len() as i32).to_le();
+            let len_bytes = len.as_bytes();
+            let mut encoded_data = Vec::new();
+            encoded_data.extend_from_slice(len_bytes);
+            encoded_data.extend_from_slice(buf);
+            encoded_data
+        };
+        // Reset rle encoder for the next batch
+        rle_encoder.clear();
+
+        Ok(ByteBufferPtr::new(encoded_data))
+    }
+}
+
+// ----------------------------------------------------------------------
+// DELTA_BINARY_PACKED encoding
+
+const MAX_PAGE_HEADER_WRITER_SIZE: usize = 32;
+const MAX_BIT_WRITER_SIZE: usize = 10 * 1024 * 1024;
+const DEFAULT_BLOCK_SIZE: usize = 128;
+const DEFAULT_NUM_MINI_BLOCKS: usize = 4;
+
+/// Delta bit packed encoder.
+/// Consists of a header followed by blocks of delta encoded values binary packed.
+///
+/// Delta-binary-packing:
+/// ```shell
+///   [page-header] [block 1], [block 2], ... [block N]
+/// ```
+///
+/// Each page header consists of:
+/// ```shell
+///   [block size] [number of miniblocks in a block] [total value count] [first value]
+/// ```
+///
+/// Each block consists of:
+/// ```shell
+///   [min delta] [list of bitwidths of miniblocks] [miniblocks]
+/// ```
+///
+/// Current implementation writes values in `put` method, multiple calls to `put` to
+/// existing block or start new block if block size is exceeded. Calling `flush_buffer`
+/// writes out all data and resets internal state, including page header.
+///
+/// Supports only INT32 and INT64.
+pub struct DeltaBitPackEncoder<T: DataType> {
+    page_header_writer: BitWriter,
+    bit_writer: BitWriter,
+    total_values: usize,
+    first_value: i64,
+    current_value: i64,
+    block_size: usize,
+    mini_block_size: usize,
+    num_mini_blocks: usize,
+    values_in_block: usize,
+    deltas: Vec<i64>,
+    _phantom: PhantomData<T>,
+}
+
+impl<T: DataType> DeltaBitPackEncoder<T> {
+    /// Creates new delta bit packed encoder.
+    pub fn new() -> Self {
+        let block_size = DEFAULT_BLOCK_SIZE;
+        let num_mini_blocks = DEFAULT_NUM_MINI_BLOCKS;
+        let mini_block_size = block_size / num_mini_blocks;
+        assert!(mini_block_size % 8 == 0);
+        Self::assert_supported_type();
+
+        DeltaBitPackEncoder {
+            page_header_writer: BitWriter::new(MAX_PAGE_HEADER_WRITER_SIZE),
+            bit_writer: BitWriter::new(MAX_BIT_WRITER_SIZE),
+            total_values: 0,
+            first_value: 0,
+            current_value: 0, // current value to keep adding deltas
+            block_size,       // can write fewer values than block size for last block
+            mini_block_size,
+            num_mini_blocks,
+            values_in_block: 0, // will be at most block_size
+            deltas: vec![0; block_size],
+            _phantom: PhantomData,
+        }
+    }
+
+    /// Writes page header for blocks, this method is invoked when we are done encoding
+    /// values. It is also okay to encode when no values have been provided
+    fn write_page_header(&mut self) {
+        // We ignore the result of each 'put' operation, because
+        // MAX_PAGE_HEADER_WRITER_SIZE is chosen to fit all header values and
+        // guarantees that writes will not fail.
+
+        // Write the size of each block
+        self.page_header_writer.put_vlq_int(self.block_size as u64);
+        // Write the number of mini blocks
+        self.page_header_writer
+            .put_vlq_int(self.num_mini_blocks as u64);
+        // Write the number of all values (including non-encoded first value)
+        self.page_header_writer
+            .put_vlq_int(self.total_values as u64);
+        // Write first value
+        self.page_header_writer.put_zigzag_vlq_int(self.first_value);
+    }
+
+    // Write current delta buffer (<= 'block size' values) into bit writer
+    fn flush_block_values(&mut self) -> Result<()> {
+        if self.values_in_block == 0 {
+            return Ok(());
+        }
+
+        let mut min_delta = i64::max_value();
+        for i in 0..self.values_in_block {
+            min_delta = cmp::min(min_delta, self.deltas[i]);
+        }
+
+        // Write min delta
+        self.bit_writer.put_zigzag_vlq_int(min_delta);
+
+        // Slice to store bit width for each mini block
+        // apply unsafe allocation to avoid double mutable borrow
+        let mini_block_widths: &mut [u8] = unsafe {
+            let tmp_slice = self.bit_writer.get_next_byte_ptr(self.num_mini_blocks)?;
+            slice::from_raw_parts_mut(tmp_slice.as_ptr() as *mut u8, self.num_mini_blocks)
+        };
+
+        for i in 0..self.num_mini_blocks {
+            // Find how many values we need to encode - either block size or whatever
+            // values left
+            let n = cmp::min(self.mini_block_size, self.values_in_block);
+            if n == 0 {
+                break;
+            }
+
+            // Compute the max delta in current mini block
+            let mut max_delta = i64::min_value();
+            for j in 0..n {
+                max_delta =
+                    cmp::max(max_delta, self.deltas[i * self.mini_block_size + j]);
+            }
+
+            // Compute bit width to store (max_delta - min_delta)
+            let bit_width = num_required_bits(self.subtract_u64(max_delta, min_delta));
+            mini_block_widths[i] = bit_width as u8;
+
+            // Encode values in current mini block using min_delta and bit_width
+            for j in 0..n {
+                let packed_value = self
+                    .subtract_u64(self.deltas[i * self.mini_block_size + j], min_delta);
+                self.bit_writer.put_value(packed_value, bit_width);
+            }
+
+            // Pad the last block (n < mini_block_size)
+            for _ in n..self.mini_block_size {
+                self.bit_writer.put_value(0, bit_width);
+            }
+
+            self.values_in_block -= n;
+        }
+
+        assert!(
+            self.values_in_block == 0,
+            "Expected 0 values in block, found {}",
+            self.values_in_block
+        );
+        Ok(())
+    }
+}
+
+// Implementation is shared between Int32Type and Int64Type,
+// see `DeltaBitPackEncoderConversion` below for specifics.
+impl<T: DataType> Encoder<T> for DeltaBitPackEncoder<T> {
+    fn put(&mut self, values: &[T::T]) -> Result<()> {
+        if values.is_empty() {
+            return Ok(());
+        }
+
+        let mut idx;
+        // Define values to encode, initialize state
+        if self.total_values == 0 {
+            self.first_value = self.as_i64(values, 0);
+            self.current_value = self.first_value;
+            idx = 1;
+        } else {
+            idx = 0;
+        }
+        // Add all values (including first value)
+        self.total_values += values.len();
+
+        // Write block
+        while idx < values.len() {
+            let value = self.as_i64(values, idx);
+            self.deltas[self.values_in_block] = self.subtract(value, self.current_value);
+            self.current_value = value;
+            idx += 1;
+            self.values_in_block += 1;
+            if self.values_in_block == self.block_size {
+                self.flush_block_values()?;
+            }
+        }
+        Ok(())
+    }
+
+    fn encoding(&self) -> Encoding {
+        Encoding::DELTA_BINARY_PACKED
+    }
+
+    fn estimated_data_encoded_size(&self) -> usize {
+        self.bit_writer.bytes_written()
+    }
+
+    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
+        // Write remaining values
+        self.flush_block_values()?;
+        // Write page header with total values
+        self.write_page_header();
+
+        let mut buffer = ByteBuffer::new();
+        buffer.write(self.page_header_writer.flush_buffer())?;
+        buffer.write(self.bit_writer.flush_buffer())?;
+        buffer.flush()?;
+
+        // Reset state
+        self.page_header_writer.clear();
+        self.bit_writer.clear();
+        self.total_values = 0;
+        self.first_value = 0;
+        self.current_value = 0;
+        self.values_in_block = 0;
+
+        Ok(buffer.consume())
+    }
+}
+
+/// Helper trait to define specific conversions and subtractions when computing deltas
+trait DeltaBitPackEncoderConversion<T: DataType> {
+    // Method should panic if type is not supported, otherwise no-op
+    #[inline]
+    fn assert_supported_type();
+
+    #[inline]
+    fn as_i64(&self, values: &[T::T], index: usize) -> i64;
+
+    #[inline]
+    fn subtract(&self, left: i64, right: i64) -> i64;
+
+    #[inline]
+    fn subtract_u64(&self, left: i64, right: i64) -> u64;
+}
+
+impl<T: DataType> DeltaBitPackEncoderConversion<T> for DeltaBitPackEncoder<T> {
+    #[inline]
+    default fn assert_supported_type() {
+        panic!("DeltaBitPackDecoder only supports Int32Type and Int64Type");
+    }
+
+    #[inline]
+    default fn as_i64(&self, _values: &[T::T], _index: usize) -> i64 {
+        0
+    }
+
+    #[inline]
+    default fn subtract(&self, _left: i64, _right: i64) -> i64 {
+        0
+    }
+
+    #[inline]
+    default fn subtract_u64(&self, _left: i64, _right: i64) -> u64 {
+        0
+    }
+}
+
+impl DeltaBitPackEncoderConversion<Int32Type> for DeltaBitPackEncoder<Int32Type> {
+    #[inline]
+    fn assert_supported_type() {
+        // no-op: supported type
+    }
+
+    #[inline]
+    fn as_i64(&self, values: &[i32], index: usize) -> i64 {
+        values[index] as i64
+    }
+
+    #[inline]
+    fn subtract(&self, left: i64, right: i64) -> i64 {
+        // It is okay for values to overflow, wrapping_sub wrapping around at the boundary
+        (left as i32).wrapping_sub(right as i32) as i64
+    }
+
+    #[inline]
+    fn subtract_u64(&self, left: i64, right: i64) -> u64 {
+        // Conversion of i32 -> u32 -> u64 is to avoid non-zero left most bytes in int
+        // representation
+        (left as i32).wrapping_sub(right as i32) as u32 as u64
+    }
+}
+
+impl DeltaBitPackEncoderConversion<Int64Type> for DeltaBitPackEncoder<Int64Type> {
+    #[inline]
+    fn assert_supported_type() {
+        // no-op: supported type
+    }
+
+    #[inline]
+    fn as_i64(&self, values: &[i64], index: usize) -> i64 {
+        values[index]
+    }
+
+    #[inline]
+    fn subtract(&self, left: i64, right: i64) -> i64 {
+        // It is okay for values to overflow, wrapping_sub wrapping around at the boundary
+        left.wrapping_sub(right)
+    }
+
+    #[inline]
+    fn subtract_u64(&self, left: i64, right: i64) -> u64 {
+        left.wrapping_sub(right) as u64
+    }
+}
+
+// ----------------------------------------------------------------------
+// DELTA_LENGTH_BYTE_ARRAY encoding
+
+/// Encoding for byte arrays to separate the length values and the data.
+/// The lengths are encoded using DELTA_BINARY_PACKED encoding, data is
+/// stored as raw bytes.
+pub struct DeltaLengthByteArrayEncoder<T: DataType> {
+    // length encoder
+    len_encoder: DeltaBitPackEncoder<Int32Type>,
+    // byte array data
+    data: Vec<ByteArray>,
+    // data size in bytes of encoded values
+    encoded_size: usize,
+    _phantom: PhantomData<T>,
+}
+
+impl<T: DataType> DeltaLengthByteArrayEncoder<T> {
+    /// Creates new delta length byte array encoder.
+    pub fn new() -> Self {
+        Self {
+            len_encoder: DeltaBitPackEncoder::new(),
+            data: vec![],
+            encoded_size: 0,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<T: DataType> Encoder<T> for DeltaLengthByteArrayEncoder<T> {
+    default fn put(&mut self, _values: &[T::T]) -> Result<()> {
+        panic!("DeltaLengthByteArrayEncoder only supports ByteArrayType");
+    }
+
+    fn encoding(&self) -> Encoding {
+        Encoding::DELTA_LENGTH_BYTE_ARRAY
+    }
+
+    fn estimated_data_encoded_size(&self) -> usize {
+        self.len_encoder.estimated_data_encoded_size() + self.encoded_size
+    }
+
+    default fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
+        panic!("DeltaLengthByteArrayEncoder only supports ByteArrayType");
+    }
+}
+
+impl Encoder<ByteArrayType> for DeltaLengthByteArrayEncoder<ByteArrayType> {
+    fn put(&mut self, values: &[ByteArray]) -> Result<()> {
+        let lengths: Vec<i32> = values
+            .iter()
+            .map(|byte_array| byte_array.len() as i32)
+            .collect();
+        self.len_encoder.put(&lengths)?;
+        for byte_array in values {
+            self.encoded_size += byte_array.len();
+            self.data.push(byte_array.clone());
+        }
+        Ok(())
+    }
+
+    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
+        let mut total_bytes = vec![];
+        let lengths = self.len_encoder.flush_buffer()?;
+        total_bytes.extend_from_slice(lengths.data());
+        self.data.iter().for_each(|byte_array| {
+            total_bytes.extend_from_slice(byte_array.data());
+        });
+        self.data.clear();
+        self.encoded_size = 0;
+        Ok(ByteBufferPtr::new(total_bytes))
+    }
+}
+
+// ----------------------------------------------------------------------
+// DELTA_BYTE_ARRAY encoding
+
+/// Encoding for byte arrays, prefix lengths are encoded using DELTA_BINARY_PACKED
+/// encoding, followed by suffixes with DELTA_LENGTH_BYTE_ARRAY encoding.
+pub struct DeltaByteArrayEncoder<T: DataType> {
+    prefix_len_encoder: DeltaBitPackEncoder<Int32Type>,
+    suffix_writer: DeltaLengthByteArrayEncoder<T>,
+    previous: Vec<u8>,
+    _phantom: PhantomData<T>,
+}
+
+impl<T: DataType> DeltaByteArrayEncoder<T> {
+    /// Creates new delta byte array encoder.
+    pub fn new() -> Self {
+        Self {
+            prefix_len_encoder: DeltaBitPackEncoder::<Int32Type>::new(),
+            suffix_writer: DeltaLengthByteArrayEncoder::<T>::new(),
+            previous: vec![],
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<T: DataType> Encoder<T> for DeltaByteArrayEncoder<T> {
+    default fn put(&mut self, _values: &[T::T]) -> Result<()> {
+        panic!(
+            "DeltaByteArrayEncoder only supports ByteArrayType and FixedLenByteArrayType"
+        );
+    }
+
+    fn encoding(&self) -> Encoding {
+        Encoding::DELTA_BYTE_ARRAY
+    }
+
+    fn estimated_data_encoded_size(&self) -> usize {
+        self.prefix_len_encoder.estimated_data_encoded_size()
+            + self.suffix_writer.estimated_data_encoded_size()
+    }
+
+    default fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
+        panic!(
+            "DeltaByteArrayEncoder only supports ByteArrayType and FixedLenByteArrayType"
+        );
+    }
+}
+
+impl Encoder<ByteArrayType> for DeltaByteArrayEncoder<ByteArrayType> {
+    fn put(&mut self, values: &[ByteArray]) -> Result<()> {
+        let mut prefix_lengths: Vec<i32> = vec![];
+        let mut suffixes: Vec<ByteArray> = vec![];
+
+        for byte_array in values {
+            let current = byte_array.data();
+            // Maximum prefix length that is shared between previous value and current
+            // value
+            let prefix_len = cmp::min(self.previous.len(), current.len());
+            let mut match_len = 0;
+            while match_len < prefix_len && self.previous[match_len] == current[match_len]
+            {
+                match_len += 1;
+            }
+            prefix_lengths.push(match_len as i32);
+            suffixes.push(byte_array.slice(match_len, byte_array.len() - match_len));
+            // Update previous for the next prefix
+            self.previous.clear();
+            self.previous.extend_from_slice(current);
+        }
+        self.prefix_len_encoder.put(&prefix_lengths)?;
+        self.suffix_writer.put(&suffixes)?;
+        Ok(())
+    }
+
+    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
+        // TODO: investigate if we can merge lengths and suffixes
+        // without copying data into new vector.
+        let mut total_bytes = vec![];
+        // Insert lengths ...
+        let lengths = self.prefix_len_encoder.flush_buffer()?;
+        total_bytes.extend_from_slice(lengths.data());
+        // ... followed by suffixes
+        let suffixes = self.suffix_writer.flush_buffer()?;
+        total_bytes.extend_from_slice(suffixes.data());
+
+        self.previous.clear();
+        Ok(ByteBufferPtr::new(total_bytes))
+    }
+}
+
+impl Encoder<FixedLenByteArrayType> for DeltaByteArrayEncoder<FixedLenByteArrayType> {
+    fn put(&mut self, values: &[ByteArray]) -> Result<()> {
+        let s: &mut DeltaByteArrayEncoder<ByteArrayType> =
+            unsafe { mem::transmute(self) };
+        s.put(values)
+    }
+
+    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
+        let s: &mut DeltaByteArrayEncoder<ByteArrayType> =
+            unsafe { mem::transmute(self) };
+        s.flush_buffer()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use std::rc::Rc;
+
+    use crate::decoding::{get_decoder, Decoder, DictDecoder, PlainDecoder};
+    use crate::schema::types::{
+        ColumnDescPtr, ColumnDescriptor, ColumnPath, Type as SchemaType,
+    };
+    use crate::util::{memory::MemTracker, test_common::RandGen};
+
+    const TEST_SET_SIZE: usize = 1024;
+
+    #[test]
+    fn test_get_encoders() {
+        // supported encodings
+        create_and_check_encoder::<Int32Type>(Encoding::PLAIN, None);
+        create_and_check_encoder::<Int32Type>(Encoding::DELTA_BINARY_PACKED, None);
+        create_and_check_encoder::<Int32Type>(Encoding::DELTA_LENGTH_BYTE_ARRAY, None);
+        create_and_check_encoder::<Int32Type>(Encoding::DELTA_BYTE_ARRAY, None);
+        create_and_check_encoder::<BoolType>(Encoding::RLE, None);
+
+        // error when initializing
+        create_and_check_encoder::<Int32Type>(
+            Encoding::RLE_DICTIONARY,
+            Some(general_err!(
+                "Cannot initialize this encoding through this function"
+            )),
+        );
+        create_and_check_encoder::<Int32Type>(
+            Encoding::PLAIN_DICTIONARY,
+            Some(general_err!(
+                "Cannot initialize this encoding through this function"
+            )),
+        );
+
+        // unsupported
+        create_and_check_encoder::<Int32Type>(
+            Encoding::BIT_PACKED,
+            Some(nyi_err!("Encoding BIT_PACKED is not supported")),
+        );
+    }
+
+    #[test]
+    fn test_bool() {
+        BoolType::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
+        BoolType::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
+        BoolType::test(Encoding::RLE, TEST_SET_SIZE, -1);
+    }
+
+    #[test]
+    fn test_i32() {
+        Int32Type::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
+        Int32Type::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
+        Int32Type::test(Encoding::DELTA_BINARY_PACKED, TEST_SET_SIZE, -1);
+    }
+
+    #[test]
+    fn test_i64() {
+        Int64Type::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
+        Int64Type::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
+        Int64Type::test(Encoding::DELTA_BINARY_PACKED, TEST_SET_SIZE, -1);
+    }
+
+    #[test]
+    fn test_i96() {
+        Int96Type::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
+        Int96Type::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
+    }
+
+    #[test]
+    fn test_float() {
+        FloatType::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
+        FloatType::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
+    }
+
+    #[test]
+    fn test_double() {
+        DoubleType::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
+        DoubleType::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
+    }
+
+    #[test]
+    fn test_byte_array() {
+        ByteArrayType::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
+        ByteArrayType::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
+        ByteArrayType::test(Encoding::DELTA_LENGTH_BYTE_ARRAY, TEST_SET_SIZE, -1);
+        ByteArrayType::test(Encoding::DELTA_BYTE_ARRAY, TEST_SET_SIZE, -1);
+    }
+
+    #[test]
+    fn test_fixed_lenbyte_array() {
+        FixedLenByteArrayType::test(Encoding::PLAIN, TEST_SET_SIZE, 100);
+        FixedLenByteArrayType::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, 100);
+        FixedLenByteArrayType::test(Encoding::DELTA_BYTE_ARRAY, TEST_SET_SIZE, 100);
+    }
+
+    #[test]
+    fn test_dict_encoded_size() {
+        fn run_test<T: DataType>(
+            type_length: i32,
+            values: &[T::T],
+            expected_size: usize,
+        ) {
+            let mut encoder = create_test_dict_encoder::<T>(type_length);
+            assert_eq!(encoder.dict_encoded_size(), 0);
+            encoder.put(values).unwrap();
+            assert_eq!(encoder.dict_encoded_size(), expected_size);
+            // We do not reset encoded size of the dictionary keys after flush_buffer
+            encoder.flush_buffer().unwrap();
+            assert_eq!(encoder.dict_encoded_size(), expected_size);
+        }
+
+        // Only 2 variations of values 1 byte each
+        run_test::<BoolType>(-1, &[true, false, true, false, true], 2);
+        run_test::<Int32Type>(-1, &[1i32, 2i32, 3i32, 4i32, 5i32], 20);
+        run_test::<Int64Type>(-1, &[1i64, 2i64, 3i64, 4i64, 5i64], 40);
+        run_test::<FloatType>(-1, &[1f32, 2f32, 3f32, 4f32, 5f32], 20);
+        run_test::<DoubleType>(-1, &[1f64, 2f64, 3f64, 4f64, 5f64], 40);
+        // Int96: len + reference
+        run_test::<Int96Type>(
+            -1,
+            &[Int96::from(vec![1, 2, 3]), Int96::from(vec![2, 3, 4])],
+            32,
+        );
+        run_test::<ByteArrayType>(
+            -1,
+            &[ByteArray::from("abcd"), ByteArray::from("efj")],
+            15,
+        );
+        run_test::<FixedLenByteArrayType>(
+            2,
+            &[ByteArray::from("ab"), ByteArray::from("bc")],
+            4,
+        );
+    }
+
+    #[test]
+    fn test_estimated_data_encoded_size() {
+        fn run_test<T: DataType>(
+            encoding: Encoding,
+            type_length: i32,
+            values: &[T::T],
+            initial_size: usize,
+            max_size: usize,
+            flush_size: usize,
+        ) {
+            let mut encoder = match encoding {
+                Encoding::PLAIN_DICTIONARY | Encoding::RLE_DICTIONARY => {
+                    Box::new(create_test_dict_encoder::<T>(type_length))
+                }
+                _ => create_test_encoder::<T>(type_length, encoding),
+            };
+            assert_eq!(encoder.estimated_data_encoded_size(), initial_size);
+
+            encoder.put(values).unwrap();
+            assert_eq!(encoder.estimated_data_encoded_size(), max_size);
+
+            encoder.flush_buffer().unwrap();
+            assert_eq!(encoder.estimated_data_encoded_size(), flush_size);
+        }
+
+        // PLAIN
+        run_test::<Int32Type>(Encoding::PLAIN, -1, &vec![123; 1024], 0, 4096, 0);
+
+        // DICTIONARY
+        // NOTE: The final size is almost the same because the dictionary entries are
+        // preserved after encoded values have been written.
+        run_test::<Int32Type>(Encoding::RLE_DICTIONARY, -1, &vec![123, 1024], 11, 68, 66);
+
+        // DELTA_BINARY_PACKED
+        run_test::<Int32Type>(
+            Encoding::DELTA_BINARY_PACKED,
+            -1,
+            &vec![123; 1024],
+            0,
+            35,
+            0,
+        );
+
+        // RLE
+        let mut values = vec![];
+        values.extend_from_slice(&vec![true; 16]);
+        values.extend_from_slice(&vec![false; 16]);
+        run_test::<BoolType>(Encoding::RLE, -1, &values, 0, 2, 0);
+
+        // DELTA_LENGTH_BYTE_ARRAY
+        run_test::<ByteArrayType>(
+            Encoding::DELTA_LENGTH_BYTE_ARRAY,
+            -1,
+            &[ByteArray::from("ab"), ByteArray::from("abc")],
+            0,
+            5, // only value bytes, length encoder is not flushed yet
+            0,
+        );
+
+        // DELTA_BYTE_ARRAY
+        run_test::<ByteArrayType>(
+            Encoding::DELTA_BYTE_ARRAY,
+            -1,
+            &[ByteArray::from("ab"), ByteArray::from("abc")],
+            0,
+            3, // only suffix bytes, length encoder is not flushed yet
+            0,
+        );
+    }
+
+    // See: https://github.com/sunchao/parquet-rs/issues/47
+    #[test]
+    fn test_issue_47() {
+        let mut encoder =
+            create_test_encoder::<ByteArrayType>(0, Encoding::DELTA_BYTE_ARRAY);
+        let mut decoder =
+            create_test_decoder::<ByteArrayType>(0, Encoding::DELTA_BYTE_ARRAY);
+
+        let mut input = vec![];
+        input.push(ByteArray::from("aa"));
+        input.push(ByteArray::from("aaa"));
+        input.push(ByteArray::from("aa"));
+        input.push(ByteArray::from("aaa"));
+        let mut output = vec![ByteArray::default(); input.len()];
+
+        let mut result =
+            put_and_get(&mut encoder, &mut decoder, &input[..2], &mut output[..2]);
+        assert!(
+            result.is_ok(),
+            "first put_and_get() failed with: {}",
+            result.unwrap_err()
+        );
+        result = put_and_get(&mut encoder, &mut decoder, &input[2..], &mut output[2..]);
+        assert!(
+            result.is_ok(),
+            "second put_and_get() failed with: {}",
+            result.unwrap_err()
+        );
+        assert_eq!(output, input);
+    }
+
+    trait EncodingTester<T: DataType> {
+        fn test(enc: Encoding, total: usize, type_length: i32) {
+            let result = match enc {
+                Encoding::PLAIN_DICTIONARY | Encoding::RLE_DICTIONARY => {
+                    Self::test_dict_internal(total, type_length)
+                }
+                enc @ _ => Self::test_internal(enc, total, type_length),
+            };
+
+            assert!(
+                result.is_ok(),
+                "Expected result to be OK but got err:\n {}",
+                result.unwrap_err()
+            );
+        }
+
+        fn test_internal(enc: Encoding, total: usize, type_length: i32) -> Result<()>;
+
+        fn test_dict_internal(total: usize, type_length: i32) -> Result<()>;
+    }
+
+    impl<T: DataType> EncodingTester<T> for T {
+        fn test_internal(enc: Encoding, total: usize, type_length: i32) -> Result<()> {
+            let mut encoder = create_test_encoder::<T>(type_length, enc);
+            let mut decoder = create_test_decoder::<T>(type_length, enc);
+            let mut values = <T as RandGen<T>>::gen_vec(type_length, total);
+            let mut result_data = vec![T::T::default(); total];
+
+            let mut actual_total = put_and_get(
+                &mut encoder,
+                &mut decoder,
+                &values[..],
+                &mut result_data[..],
+            )?;
+            assert_eq!(actual_total, total);
+            assert_eq!(result_data, values);
+
+            // Encode more data after flush and test with decoder
+
+            values = <T as RandGen<T>>::gen_vec(type_length, total);
+            actual_total = put_and_get(
+                &mut encoder,
+                &mut decoder,
+                &values[..],
+                &mut result_data[..],
+            )?;
+            assert_eq!(actual_total, total);
+            assert_eq!(result_data, values);
+
+            Ok(())
+        }
+
+        fn test_dict_internal(total: usize, type_length: i32) -> Result<()> {
+            let mut encoder = create_test_dict_encoder::<T>(type_length);
+            let mut values = <T as RandGen<T>>::gen_vec(type_length, total);
+            encoder.put(&values[..])?;
+
+            let mut data = encoder.flush_buffer()?;
+            let mut decoder = create_test_dict_decoder::<T>();
+            let mut dict_decoder = PlainDecoder::<T>::new(type_length);
+            dict_decoder.set_data(encoder.write_dict()?, encoder.num_entries())?;
+            decoder.set_dict(Box::new(dict_decoder))?;
+            let mut result_data = vec![T::T::default(); total];
+            decoder.set_data(data, total)?;
+            let mut actual_total = decoder.get(&mut result_data)?;
+
+            assert_eq!(actual_total, total);
+            assert_eq!(result_data, values);
+
+            // Encode more data after flush and test with decoder
+
+            values = <T as RandGen<T>>::gen_vec(type_length, total);
+            encoder.put(&values[..])?;
+            data = encoder.flush_buffer()?;
+
+            let mut dict_decoder = PlainDecoder::<T>::new(type_length);
+            dict_decoder.set_data(encoder.write_dict()?, encoder.num_entries())?;
+            decoder.set_dict(Box::new(dict_decoder))?;
+            decoder.set_data(data, total)?;
+            actual_total = decoder.get(&mut result_data)?;
+
+            assert_eq!(actual_total, total);
+            assert_eq!(result_data, values);
+
+            Ok(())
+        }
+    }
+
+    fn put_and_get<T: DataType>(
+        encoder: &mut Box<Encoder<T>>,
+        decoder: &mut Box<Decoder<T>>,
+        input: &[T::T],
+        output: &mut [T::T],
+    ) -> Result<usize> {
+        encoder.put(input)?;
+        let data = encoder.flush_buffer()?;
+        decoder.set_data(data, input.len())?;
+        decoder.get(output)
+    }
+
+    fn create_and_check_encoder<T: DataType>(
+        encoding: Encoding,
+        err: Option<ParquetError>,
+    ) {
+        let descr = create_test_col_desc_ptr(-1, T::get_physical_type());
+        let mem_tracker = Rc::new(MemTracker::new());
+        let encoder = get_encoder::<T>(descr, encoding, mem_tracker);
+        match err {
+            Some(parquet_error) => {
+                assert!(encoder.is_err());
+                assert_eq!(encoder.err().unwrap(), parquet_error);
+            }
+            None => {
+                assert!(encoder.is_ok());
+                assert_eq!(encoder.unwrap().encoding(), encoding);
+            }
+        }
+    }
+
+    // Creates test column descriptor.
+    fn create_test_col_desc_ptr(type_len: i32, t: Type) -> ColumnDescPtr {
+        let ty = SchemaType::primitive_type_builder("t", t)
+            .with_length(type_len)
+            .build()
+            .unwrap();
+        Rc::new(ColumnDescriptor::new(
+            Rc::new(ty),
+            None,
+            0,
+            0,
+            ColumnPath::new(vec![]),
+        ))
+    }
+
+    fn create_test_encoder<T: DataType>(type_len: i32, enc: Encoding) -> Box<Encoder<T>> {
+        let desc = create_test_col_desc_ptr(type_len, T::get_physical_type());
+        let mem_tracker = Rc::new(MemTracker::new());
+        get_encoder(desc, enc, mem_tracker).unwrap()
+    }
+
+    fn create_test_decoder<T: DataType>(type_len: i32, enc: Encoding) -> Box<Decoder<T>> {
+        let desc = create_test_col_desc_ptr(type_len, T::get_physical_type());
+        get_decoder(desc, enc).unwrap()
+    }
+
+    fn create_test_dict_encoder<T: DataType>(type_len: i32) -> DictEncoder<T> {
+        let desc = create_test_col_desc_ptr(type_len, T::get_physical_type());
+        let mem_tracker = Rc::new(MemTracker::new());
+        DictEncoder::<T>::new(desc, mem_tracker)
+    }
+
+    fn create_test_dict_decoder<T: DataType>() -> DictDecoder<T> {
+        DictDecoder::<T>::new()
+    }
+}
diff --git a/rust/parquet/src/encodings/levels.rs b/rust/parquet/src/encodings/levels.rs
new file mode 100644
index 0000000000000..93de6b1d546b0
--- /dev/null
+++ b/rust/parquet/src/encodings/levels.rs
@@ -0,0 +1,566 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{cmp, mem};
+
+use super::rle::{RleDecoder, RleEncoder};
+
+use crate::basic::Encoding;
+use crate::data_type::AsBytes;
+use crate::errors::{ParquetError, Result};
+use crate::util::{
+    bit_util::{ceil, log2, BitReader, BitWriter},
+    memory::ByteBufferPtr,
+};
+
+/// Computes max buffer size for level encoder/decoder based on encoding, max
+/// repetition/definition level and number of total buffered values (includes null
+/// values).
+#[inline]
+pub fn max_buffer_size(
+    encoding: Encoding,
+    max_level: i16,
+    num_buffered_values: usize,
+) -> usize {
+    let bit_width = log2(max_level as u64 + 1) as u8;
+    match encoding {
+        Encoding::RLE => {
+            RleEncoder::max_buffer_size(bit_width, num_buffered_values)
+                + RleEncoder::min_buffer_size(bit_width)
+        }
+        Encoding::BIT_PACKED => {
+            ceil((num_buffered_values * bit_width as usize) as i64, 8) as usize
+        }
+        _ => panic!("Unsupported encoding type {}", encoding),
+    }
+}
+
+/// Encoder for definition/repetition levels.
+/// Currently only supports RLE and BIT_PACKED (dev/null) encoding, including v2.
+pub enum LevelEncoder {
+    RLE(RleEncoder),
+    RLE_V2(RleEncoder),
+    BIT_PACKED(u8, BitWriter),
+}
+
+impl LevelEncoder {
+    /// Creates new level encoder based on encoding, max level and underlying byte buffer.
+    /// For bit packed encoding it is assumed that buffer is already allocated with
+    /// `levels::max_buffer_size` method.
+    ///
+    /// Used to encode levels for Data Page v1.
+    ///
+    /// Panics, if encoding is not supported.
+    pub fn v1(encoding: Encoding, max_level: i16, byte_buffer: Vec<u8>) -> Self {
+        let bit_width = log2(max_level as u64 + 1) as u8;
+        match encoding {
+            Encoding::RLE => LevelEncoder::RLE(RleEncoder::new_from_buf(
+                bit_width,
+                byte_buffer,
+                mem::size_of::<i32>(),
+            )),
+            Encoding::BIT_PACKED => {
+                // Here we set full byte buffer without adjusting for num_buffered_values,
+                // because byte buffer will already be allocated with size from
+                // `max_buffer_size()` method.
+                LevelEncoder::BIT_PACKED(
+                    bit_width,
+                    BitWriter::new_from_buf(byte_buffer, 0),
+                )
+            }
+            _ => panic!("Unsupported encoding type {}", encoding),
+        }
+    }
+
+    /// Creates new level encoder based on RLE encoding. Used to encode Data Page v2
+    /// repetition and definition levels.
+    pub fn v2(max_level: i16, byte_buffer: Vec<u8>) -> Self {
+        let bit_width = log2(max_level as u64 + 1) as u8;
+        LevelEncoder::RLE_V2(RleEncoder::new_from_buf(bit_width, byte_buffer, 0))
+    }
+
+    /// Put/encode levels vector into this level encoder.
+    /// Returns number of encoded values that are less than or equal to length of the
+    /// input buffer.
+    ///
+    /// RLE and BIT_PACKED level encoders return Err() when internal buffer overflows or
+    /// flush fails.
+    #[inline]
+    pub fn put(&mut self, buffer: &[i16]) -> Result<usize> {
+        let mut num_encoded = 0;
+        match *self {
+            LevelEncoder::RLE(ref mut encoder)
+            | LevelEncoder::RLE_V2(ref mut encoder) => {
+                for value in buffer {
+                    if !encoder.put(*value as u64)? {
+                        return Err(general_err!("RLE buffer is full"));
+                    }
+                    num_encoded += 1;
+                }
+                encoder.flush()?;
+            }
+            LevelEncoder::BIT_PACKED(bit_width, ref mut encoder) => {
+                for value in buffer {
+                    if !encoder.put_value(*value as u64, bit_width as usize) {
+                        return Err(general_err!("Not enough bytes left"));
+                    }
+                    num_encoded += 1;
+                }
+                encoder.flush();
+            }
+        }
+        Ok(num_encoded)
+    }
+
+    /// Finalizes level encoder, flush all intermediate buffers and return resulting
+    /// encoded buffer. Returned buffer is already truncated to encoded bytes only.
+    #[inline]
+    pub fn consume(self) -> Result<Vec<u8>> {
+        match self {
+            LevelEncoder::RLE(encoder) => {
+                let mut encoded_data = encoder.consume()?;
+                // Account for the buffer offset
+                let encoded_len = encoded_data.len() - mem::size_of::<i32>();
+                let len = (encoded_len as i32).to_le();
+                let len_bytes = len.as_bytes();
+                encoded_data[0..len_bytes.len()].copy_from_slice(len_bytes);
+                Ok(encoded_data)
+            }
+            LevelEncoder::RLE_V2(encoder) => encoder.consume(),
+            LevelEncoder::BIT_PACKED(_, encoder) => Ok(encoder.consume()),
+        }
+    }
+}
+
+/// Decoder for definition/repetition levels.
+/// Currently only supports RLE and BIT_PACKED encoding for Data Page v1 and
+/// RLE for Data Page v2.
+pub enum LevelDecoder {
+    RLE(Option<usize>, RleDecoder),
+    RLE_V2(Option<usize>, RleDecoder),
+    BIT_PACKED(Option<usize>, u8, BitReader),
+}
+
+impl LevelDecoder {
+    /// Creates new level decoder based on encoding and max definition/repetition level.
+    /// This method only initializes level decoder, `set_data` method must be called
+    /// before reading any value.
+    ///
+    /// Used to encode levels for Data Page v1.
+    ///
+    /// Panics if encoding is not supported
+    pub fn v1(encoding: Encoding, max_level: i16) -> Self {
+        let bit_width = log2(max_level as u64 + 1) as u8;
+        match encoding {
+            Encoding::RLE => LevelDecoder::RLE(None, RleDecoder::new(bit_width)),
+            Encoding::BIT_PACKED => {
+                LevelDecoder::BIT_PACKED(None, bit_width, BitReader::from(Vec::new()))
+            }
+            _ => panic!("Unsupported encoding type {}", encoding),
+        }
+    }
+
+    /// Creates new level decoder based on RLE encoding.
+    /// Used to decode Data Page v2 repetition and definition levels.
+    ///
+    /// To set data for this decoder, use `set_data_range` method.
+    pub fn v2(max_level: i16) -> Self {
+        let bit_width = log2(max_level as u64 + 1) as u8;
+        LevelDecoder::RLE_V2(None, RleDecoder::new(bit_width))
+    }
+
+    /// Sets data for this level decoder, and returns total number of bytes set.
+    /// This is used for Data Page v1 levels.
+    ///
+    /// `data` is encoded data as byte buffer, `num_buffered_values` represents total
+    /// number of values that is expected.
+    ///
+    /// Both RLE and BIT_PACKED level decoders set `num_buffered_values` as total number
+    /// of values that they can return and track num values.
+    #[inline]
+    pub fn set_data(&mut self, num_buffered_values: usize, data: ByteBufferPtr) -> usize {
+        match *self {
+            LevelDecoder::RLE(ref mut num_values, ref mut decoder) => {
+                *num_values = Some(num_buffered_values);
+                let i32_size = mem::size_of::<i32>();
+                let data_size = read_num_bytes!(i32, i32_size, data.as_ref()) as usize;
+                decoder.set_data(data.range(i32_size, data_size));
+                i32_size + data_size
+            }
+            LevelDecoder::BIT_PACKED(ref mut num_values, bit_width, ref mut decoder) => {
+                *num_values = Some(num_buffered_values);
+                // Set appropriate number of bytes: if max size is larger than buffer -
+                // set full buffer
+                let num_bytes =
+                    ceil((num_buffered_values * bit_width as usize) as i64, 8);
+                let data_size = cmp::min(num_bytes as usize, data.len());
+                decoder.reset(data.range(data.start(), data_size));
+                data_size
+            }
+            _ => panic!(),
+        }
+    }
+
+    /// Sets byte array explicitly when start position `start` and length `len` are known
+    /// in advance. Only supported by RLE level decoder and used for Data Page v2 levels.
+    /// Returns number of total bytes set for this decoder (len).
+    #[inline]
+    pub fn set_data_range(
+        &mut self,
+        num_buffered_values: usize,
+        data: &ByteBufferPtr,
+        start: usize,
+        len: usize,
+    ) -> usize {
+        match *self {
+            LevelDecoder::RLE_V2(ref mut num_values, ref mut decoder) => {
+                decoder.set_data(data.range(start, len));
+                *num_values = Some(num_buffered_values);
+                len
+            }
+            _ => panic!(
+                "set_data_range() method is only supported by RLE v2 encoding type"
+            ),
+        }
+    }
+
+    /// Returns true if data is set for decoder, false otherwise.
+    #[inline]
+    pub fn is_data_set(&self) -> bool {
+        match self {
+            LevelDecoder::RLE(ref num_values, _) => num_values.is_some(),
+            LevelDecoder::RLE_V2(ref num_values, _) => num_values.is_some(),
+            LevelDecoder::BIT_PACKED(ref num_values, ..) => num_values.is_some(),
+        }
+    }
+
+    /// Decodes values and puts them into `buffer`.
+    /// Returns number of values that were successfully decoded (less than or equal to
+    /// buffer length).
+    #[inline]
+    pub fn get(&mut self, buffer: &mut [i16]) -> Result<usize> {
+        assert!(self.is_data_set(), "No data set for decoding");
+        match *self {
+            LevelDecoder::RLE(ref mut num_values, ref mut decoder)
+            | LevelDecoder::RLE_V2(ref mut num_values, ref mut decoder) => {
+                // Max length we can read
+                let len = cmp::min(num_values.unwrap(), buffer.len());
+                let values_read = decoder.get_batch::<i16>(&mut buffer[0..len])?;
+                *num_values = num_values.map(|len| len - values_read);
+                Ok(values_read)
+            }
+            LevelDecoder::BIT_PACKED(ref mut num_values, bit_width, ref mut decoder) => {
+                // When extracting values from bit reader, it might return more values
+                // than left because of padding to a full byte, we use
+                // num_values to track precise number of values.
+                let len = cmp::min(num_values.unwrap(), buffer.len());
+                let values_read =
+                    decoder.get_batch::<i16>(&mut buffer[..len], bit_width as usize);
+                *num_values = num_values.map(|len| len - values_read);
+                Ok(values_read)
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::util::test_common::random_numbers_range;
+
+    fn test_internal_roundtrip(enc: Encoding, levels: &[i16], max_level: i16, v2: bool) {
+        let size = max_buffer_size(enc, max_level, levels.len());
+        let mut encoder = if v2 {
+            LevelEncoder::v2(max_level, vec![0; size])
+        } else {
+            LevelEncoder::v1(enc, max_level, vec![0; size])
+        };
+        encoder.put(&levels).expect("put() should be OK");
+        let encoded_levels = encoder.consume().expect("consume() should be OK");
+
+        let byte_buf = ByteBufferPtr::new(encoded_levels);
+        let mut decoder;
+        if v2 {
+            decoder = LevelDecoder::v2(max_level);
+            decoder.set_data_range(levels.len(), &byte_buf, 0, byte_buf.len());
+        } else {
+            decoder = LevelDecoder::v1(enc, max_level);
+            decoder.set_data(levels.len(), byte_buf);
+        };
+
+        let mut buffer = vec![0; levels.len()];
+        let num_decoded = decoder.get(&mut buffer).expect("get() should be OK");
+        assert_eq!(num_decoded, levels.len());
+        assert_eq!(buffer, levels);
+    }
+
+    // Performs incremental read until all bytes are read
+    fn test_internal_roundtrip_incremental(
+        enc: Encoding,
+        levels: &[i16],
+        max_level: i16,
+        v2: bool,
+    ) {
+        let size = max_buffer_size(enc, max_level, levels.len());
+        let mut encoder = if v2 {
+            LevelEncoder::v2(max_level, vec![0; size])
+        } else {
+            LevelEncoder::v1(enc, max_level, vec![0; size])
+        };
+        encoder.put(&levels).expect("put() should be OK");
+        let encoded_levels = encoder.consume().expect("consume() should be OK");
+
+        let byte_buf = ByteBufferPtr::new(encoded_levels);
+        let mut decoder;
+        if v2 {
+            decoder = LevelDecoder::v2(max_level);
+            decoder.set_data_range(levels.len(), &byte_buf, 0, byte_buf.len());
+        } else {
+            decoder = LevelDecoder::v1(enc, max_level);
+            decoder.set_data(levels.len(), byte_buf);
+        }
+
+        let mut buffer = vec![0; levels.len() * 2];
+        let mut total_decoded = 0;
+        let mut safe_stop = levels.len() * 2; // still terminate in case of issues in the code
+        while safe_stop > 0 {
+            safe_stop -= 1;
+            let num_decoded = decoder
+                .get(&mut buffer[total_decoded..total_decoded + 1])
+                .expect("get() should be OK");
+            if num_decoded == 0 {
+                break;
+            }
+            total_decoded += num_decoded;
+        }
+        assert!(
+            safe_stop > 0,
+            "Failed to read values incrementally, reached safe stop"
+        );
+        assert_eq!(total_decoded, levels.len());
+        assert_eq!(&buffer[0..levels.len()], levels);
+    }
+
+    // Tests encoding/decoding of values when output buffer is larger than number of
+    // encoded values
+    fn test_internal_roundtrip_underflow(
+        enc: Encoding,
+        levels: &[i16],
+        max_level: i16,
+        v2: bool,
+    ) {
+        let size = max_buffer_size(enc, max_level, levels.len());
+        let mut encoder = if v2 {
+            LevelEncoder::v2(max_level, vec![0; size])
+        } else {
+            LevelEncoder::v1(enc, max_level, vec![0; size])
+        };
+        // Encode only one value
+        let num_encoded = encoder.put(&levels[0..1]).expect("put() should be OK");
+        let encoded_levels = encoder.consume().expect("consume() should be OK");
+        assert_eq!(num_encoded, 1);
+
+        let byte_buf = ByteBufferPtr::new(encoded_levels);
+        let mut decoder;
+        // Set one encoded value as `num_buffered_values`
+        if v2 {
+            decoder = LevelDecoder::v2(max_level);
+            decoder.set_data_range(1, &byte_buf, 0, byte_buf.len());
+        } else {
+            decoder = LevelDecoder::v1(enc, max_level);
+            decoder.set_data(1, byte_buf);
+        }
+
+        let mut buffer = vec![0; levels.len()];
+        let num_decoded = decoder.get(&mut buffer).expect("get() should be OK");
+        assert_eq!(num_decoded, num_encoded);
+        assert_eq!(buffer[0..num_decoded], levels[0..num_decoded]);
+    }
+
+    // Tests when encoded values are larger than encoder's buffer
+    fn test_internal_roundtrip_overflow(
+        enc: Encoding,
+        levels: &[i16],
+        max_level: i16,
+        v2: bool,
+    ) {
+        let size = max_buffer_size(enc, max_level, levels.len());
+        let mut encoder = if v2 {
+            LevelEncoder::v2(max_level, vec![0; size])
+        } else {
+            LevelEncoder::v1(enc, max_level, vec![0; size])
+        };
+        let mut found_err = false;
+        // Insert a large number of values, so we run out of space
+        for _ in 0..100 {
+            match encoder.put(&levels) {
+                Err(err) => {
+                    assert!(format!("{}", err).contains("Not enough bytes left"));
+                    found_err = true;
+                    break;
+                }
+                Ok(_) => {}
+            }
+        }
+        if !found_err {
+            panic!("Failed test: no buffer overflow");
+        }
+    }
+
+    #[test]
+    fn test_roundtrip_one() {
+        let levels = vec![0, 1, 1, 1, 1, 0, 0, 0, 0, 1];
+        let max_level = 1;
+        test_internal_roundtrip(Encoding::RLE, &levels, max_level, false);
+        test_internal_roundtrip(Encoding::BIT_PACKED, &levels, max_level, false);
+        test_internal_roundtrip(Encoding::RLE, &levels, max_level, true);
+    }
+
+    #[test]
+    fn test_roundtrip() {
+        let levels = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
+        let max_level = 10;
+        test_internal_roundtrip(Encoding::RLE, &levels, max_level, false);
+        test_internal_roundtrip(Encoding::BIT_PACKED, &levels, max_level, false);
+        test_internal_roundtrip(Encoding::RLE, &levels, max_level, true);
+    }
+
+    #[test]
+    fn test_roundtrip_incremental() {
+        let levels = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
+        let max_level = 10;
+        test_internal_roundtrip_incremental(Encoding::RLE, &levels, max_level, false);
+        test_internal_roundtrip_incremental(
+            Encoding::BIT_PACKED,
+            &levels,
+            max_level,
+            false,
+        );
+        test_internal_roundtrip_incremental(Encoding::RLE, &levels, max_level, true);
+    }
+
+    #[test]
+    fn test_roundtrip_all_zeros() {
+        let levels = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let max_level = 1;
+        test_internal_roundtrip(Encoding::RLE, &levels, max_level, false);
+        test_internal_roundtrip(Encoding::BIT_PACKED, &levels, max_level, false);
+        test_internal_roundtrip(Encoding::RLE, &levels, max_level, true);
+    }
+
+    #[test]
+    fn test_roundtrip_random() {
+        // This test is mainly for bit packed level encoder/decoder
+        let mut levels = Vec::new();
+        let max_level = 5;
+        random_numbers_range::<i16>(120, 0, max_level, &mut levels);
+        test_internal_roundtrip(Encoding::RLE, &levels, max_level, false);
+        test_internal_roundtrip(Encoding::BIT_PACKED, &levels, max_level, false);
+        test_internal_roundtrip(Encoding::RLE, &levels, max_level, true);
+    }
+
+    #[test]
+    fn test_roundtrip_underflow() {
+        let levels = vec![1, 1, 2, 3, 2, 1, 1, 2, 3, 1];
+        let max_level = 3;
+        test_internal_roundtrip_underflow(Encoding::RLE, &levels, max_level, false);
+        test_internal_roundtrip_underflow(
+            Encoding::BIT_PACKED,
+            &levels,
+            max_level,
+            false,
+        );
+        test_internal_roundtrip_underflow(Encoding::RLE, &levels, max_level, true);
+    }
+
+    #[test]
+    fn test_roundtrip_overflow() {
+        let levels = vec![1, 1, 2, 3, 2, 1, 1, 2, 3, 1];
+        let max_level = 3;
+        test_internal_roundtrip_overflow(Encoding::RLE, &levels, max_level, false);
+        test_internal_roundtrip_overflow(Encoding::BIT_PACKED, &levels, max_level, false);
+        test_internal_roundtrip_overflow(Encoding::RLE, &levels, max_level, true);
+    }
+
+    #[test]
+    fn test_rle_decoder_set_data_range() {
+        // Buffer containing both repetition and definition levels
+        let buffer = ByteBufferPtr::new(vec![5, 198, 2, 5, 42, 168, 10, 0, 2, 3, 36, 73]);
+
+        let max_rep_level = 1;
+        let mut decoder = LevelDecoder::v2(max_rep_level);
+        assert_eq!(decoder.set_data_range(10, &buffer, 0, 3), 3);
+        let mut result = vec![0; 10];
+        let num_decoded = decoder.get(&mut result).expect("get() should be OK");
+        assert_eq!(num_decoded, 10);
+        assert_eq!(result, vec![0, 1, 1, 0, 0, 0, 1, 1, 0, 1]);
+
+        let max_def_level = 2;
+        let mut decoder = LevelDecoder::v2(max_def_level);
+        assert_eq!(decoder.set_data_range(10, &buffer, 3, 5), 5);
+        let mut result = vec![0; 10];
+        let num_decoded = decoder.get(&mut result).expect("get() should be OK");
+        assert_eq!(num_decoded, 10);
+        assert_eq!(result, vec![2, 2, 2, 0, 0, 2, 2, 2, 2, 2]);
+    }
+
+    #[test]
+    #[should_panic(
+        expected = "set_data_range() method is only supported by RLE v2 encoding type"
+    )]
+    fn test_bit_packed_decoder_set_data_range() {
+        // Buffer containing both repetition and definition levels
+        let buffer = ByteBufferPtr::new(vec![1, 2, 3, 4, 5]);
+        let max_level = 1;
+        let mut decoder = LevelDecoder::v1(Encoding::BIT_PACKED, max_level);
+        decoder.set_data_range(10, &buffer, 0, 3);
+    }
+
+    #[test]
+    fn test_bit_packed_decoder_set_data() {
+        // Test the maximum size that is assigned based on number of values and buffer
+        // length
+        let buffer = ByteBufferPtr::new(vec![1, 2, 3, 4, 5]);
+        let max_level = 1;
+        let mut decoder = LevelDecoder::v1(Encoding::BIT_PACKED, max_level);
+        // This should reset to entire buffer
+        assert_eq!(decoder.set_data(1024, buffer.all()), buffer.len());
+        // This should set smallest num bytes
+        assert_eq!(decoder.set_data(3, buffer.all()), 1);
+    }
+
+    #[test]
+    #[should_panic(expected = "No data set for decoding")]
+    fn test_rle_level_decoder_get_no_set_data() {
+        // `get()` normally panics because bit_reader is not set for RLE decoding
+        // we have explicit check now in set_data
+        let max_rep_level = 2;
+        let mut decoder = LevelDecoder::v1(Encoding::RLE, max_rep_level);
+        let mut buffer = vec![0; 16];
+        decoder.get(&mut buffer).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "No data set for decoding")]
+    fn test_bit_packed_level_decoder_get_no_set_data() {
+        let max_rep_level = 2;
+        let mut decoder = LevelDecoder::v1(Encoding::BIT_PACKED, max_rep_level);
+        let mut buffer = vec![0; 16];
+        decoder.get(&mut buffer).unwrap();
+    }
+}
diff --git a/rust/src/csv/mod.rs b/rust/parquet/src/encodings/mod.rs
similarity index 92%
rename from rust/src/csv/mod.rs
rename to rust/parquet/src/encodings/mod.rs
index 9f2bd1db69db2..33b1e233d8931 100644
--- a/rust/src/csv/mod.rs
+++ b/rust/parquet/src/encodings/mod.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-pub mod reader;
-
-pub use self::reader::Reader;
+pub mod decoding;
+pub mod encoding;
+pub mod levels;
+mod rle;
diff --git a/rust/parquet/src/encodings/rle.rs b/rust/parquet/src/encodings/rle.rs
new file mode 100644
index 0000000000000..f9347a6eacbe6
--- /dev/null
+++ b/rust/parquet/src/encodings/rle.rs
@@ -0,0 +1,857 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{
+    cmp,
+    mem::{size_of, transmute_copy},
+};
+
+use crate::errors::{ParquetError, Result};
+use crate::util::{
+    bit_util::{self, BitReader, BitWriter},
+    memory::ByteBufferPtr,
+};
+
+/// Rle/Bit-Packing Hybrid Encoding
+/// The grammar for this encoding looks like the following (copied verbatim
+/// from https://github.com/Parquet/parquet-format/blob/master/Encodings.md):
+///
+/// rle-bit-packed-hybrid: <length> <encoded-data>
+/// length := length of the <encoded-data> in bytes stored as 4 bytes little endian
+/// encoded-data := <run>*
+/// run := <bit-packed-run> | <rle-run>
+/// bit-packed-run := <bit-packed-header> <bit-packed-values>
+/// bit-packed-header := varint-encode(<bit-pack-count> << 1 | 1)
+/// we always bit-pack a multiple of 8 values at a time, so we only store the number of
+/// values / 8
+/// bit-pack-count := (number of values in this run) / 8
+/// bit-packed-values := *see 1 below*
+/// rle-run := <rle-header> <repeated-value>
+/// rle-header := varint-encode( (number of times repeated) << 1)
+/// repeated-value := value that is repeated, using a fixed-width of
+/// round-up-to-next-byte(bit-width)
+
+/// Maximum groups per bit-packed run. Current value is 64.
+const MAX_GROUPS_PER_BIT_PACKED_RUN: usize = 1 << 6;
+const MAX_VALUES_PER_BIT_PACKED_RUN: usize = MAX_GROUPS_PER_BIT_PACKED_RUN * 8;
+const MAX_WRITER_BUF_SIZE: usize = 1 << 10;
+
+/// A RLE/Bit-Packing hybrid encoder.
+// TODO: tracking memory usage
+pub struct RleEncoder {
+    // Number of bits needed to encode the value. Must be in the range of [0, 64].
+    bit_width: u8,
+
+    // Underlying writer which holds an internal buffer.
+    bit_writer: BitWriter,
+
+    // If this is true, the buffer is full and subsequent `put()` calls will fail.
+    buffer_full: bool,
+
+    // The maximum byte size a single run can take.
+    max_run_byte_size: usize,
+
+    // Buffered values for bit-packed runs.
+    buffered_values: [u64; 8],
+
+    // Number of current buffered values. Must be less than 8.
+    num_buffered_values: usize,
+
+    // The current (also last) value that was written and the count of how many
+    // times in a row that value has been seen.
+    current_value: u64,
+
+    // The number of repetitions for `current_value`. If this gets too high we'd
+    // switch to use RLE encoding.
+    repeat_count: usize,
+
+    // Number of bit-packed values in the current run. This doesn't include values
+    // in `buffered_values`.
+    bit_packed_count: usize,
+
+    // The position of the indicator byte in the `bit_writer`.
+    indicator_byte_pos: i64,
+}
+
+impl RleEncoder {
+    pub fn new(bit_width: u8, buffer_len: usize) -> Self {
+        let buffer = vec![0; buffer_len];
+        RleEncoder::new_from_buf(bit_width, buffer, 0)
+    }
+
+    /// Initialize the encoder from existing `buffer` and the starting offset `start`.
+    pub fn new_from_buf(bit_width: u8, buffer: Vec<u8>, start: usize) -> Self {
+        assert!(bit_width <= 64, "bit_width ({}) out of range.", bit_width);
+        let max_run_byte_size = RleEncoder::min_buffer_size(bit_width);
+        assert!(
+            buffer.len() >= max_run_byte_size,
+            "buffer length {} must be greater than {}",
+            buffer.len(),
+            max_run_byte_size
+        );
+        let bit_writer = BitWriter::new_from_buf(buffer, start);
+        RleEncoder {
+            bit_width,
+            bit_writer,
+            buffer_full: false,
+            max_run_byte_size,
+            buffered_values: [0; 8],
+            num_buffered_values: 0,
+            current_value: 0,
+            repeat_count: 0,
+            bit_packed_count: 0,
+            indicator_byte_pos: -1,
+        }
+    }
+
+    /// Returns the minimum buffer size needed to use the encoder for `bit_width`.
+    /// This is the maximum length of a single run for `bit_width`.
+    pub fn min_buffer_size(bit_width: u8) -> usize {
+        let max_bit_packed_run_size = 1 + bit_util::ceil(
+            (MAX_VALUES_PER_BIT_PACKED_RUN * bit_width as usize) as i64,
+            8,
+        );
+        let max_rle_run_size =
+            bit_util::MAX_VLQ_BYTE_LEN + bit_util::ceil(bit_width as i64, 8) as usize;
+        ::std::cmp::max(max_bit_packed_run_size as usize, max_rle_run_size)
+    }
+
+    /// Returns the maximum buffer size takes to encode `num_values` values with
+    /// `bit_width`.
+    pub fn max_buffer_size(bit_width: u8, num_values: usize) -> usize {
+        // First the maximum size for bit-packed run
+        let bytes_per_run = bit_width;
+        let num_runs = bit_util::ceil(num_values as i64, 8) as usize;
+        let bit_packed_max_size = num_runs + num_runs * bytes_per_run as usize;
+
+        // Second the maximum size for RLE run
+        let min_rle_run_size = 1 + bit_util::ceil(bit_width as i64, 8) as usize;
+        let rle_max_size =
+            bit_util::ceil(num_values as i64, 8) as usize * min_rle_run_size;
+        ::std::cmp::max(bit_packed_max_size, rle_max_size) as usize
+    }
+
+    /// Encodes `value`, which must be representable with `bit_width` bits.
+    /// Returns true if the value fits in buffer, false if it doesn't, or
+    /// error if something is wrong.
+    #[inline]
+    pub fn put(&mut self, value: u64) -> Result<bool> {
+        // This function buffers 8 values at a time. After seeing 8 values, it
+        // decides whether the current run should be encoded in bit-packed or RLE.
+        if self.buffer_full {
+            // The value cannot fit in the current buffer.
+            return Ok(false);
+        }
+        if self.current_value == value {
+            self.repeat_count += 1;
+            if self.repeat_count > 8 {
+                // A continuation of last value. No need to buffer.
+                return Ok(true);
+            }
+        } else {
+            if self.repeat_count >= 8 {
+                // The current RLE run has ended and we've gathered enough. Flush first.
+                assert_eq!(self.bit_packed_count, 0);
+                self.flush_rle_run()?;
+            }
+            self.repeat_count = 1;
+            self.current_value = value;
+        }
+
+        self.buffered_values[self.num_buffered_values] = value;
+        self.num_buffered_values += 1;
+        if self.num_buffered_values == 8 {
+            // Buffered values are full. Flush them.
+            assert_eq!(self.bit_packed_count % 8, 0);
+            self.flush_buffered_values()?;
+        }
+
+        Ok(true)
+    }
+
+    #[inline]
+    pub fn buffer(&self) -> &[u8] {
+        self.bit_writer.buffer()
+    }
+
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.bit_writer.bytes_written()
+    }
+
+    #[inline]
+    pub fn consume(mut self) -> Result<Vec<u8>> {
+        self.flush()?;
+        Ok(self.bit_writer.consume())
+    }
+
+    /// Borrow equivalent of the `consume` method.
+    /// Call `clear()` after invoking this method.
+    #[inline]
+    pub fn flush_buffer(&mut self) -> Result<&[u8]> {
+        self.flush()?;
+        Ok(self.bit_writer.flush_buffer())
+    }
+
+    /// Clears the internal state so this encoder can be reused (e.g., after becoming
+    /// full).
+    #[inline]
+    pub fn clear(&mut self) {
+        self.bit_writer.clear();
+        self.buffer_full = false;
+        self.num_buffered_values = 0;
+        self.current_value = 0;
+        self.repeat_count = 0;
+        self.bit_packed_count = 0;
+        self.indicator_byte_pos = -1;
+    }
+
+    /// Flushes all remaining values and return the final byte buffer maintained by the
+    /// internal writer.
+    #[inline]
+    pub fn flush(&mut self) -> Result<()> {
+        if self.bit_packed_count > 0
+            || self.repeat_count > 0
+            || self.num_buffered_values > 0
+        {
+            let all_repeat = self.bit_packed_count == 0
+                && (self.repeat_count == self.num_buffered_values
+                    || self.num_buffered_values == 0);
+            if self.repeat_count > 0 && all_repeat {
+                self.flush_rle_run()?;
+            } else {
+                // Buffer the last group of bit-packed values to 8 by padding with 0s.
+                if self.num_buffered_values > 0 {
+                    while self.num_buffered_values < 8 {
+                        self.buffered_values[self.num_buffered_values] = 0;
+                        self.num_buffered_values += 1;
+                    }
+                }
+                self.bit_packed_count += self.num_buffered_values;
+                self.flush_bit_packed_run(true)?;
+                self.repeat_count = 0;
+            }
+        }
+        Ok(())
+    }
+
+    #[inline]
+    fn flush_rle_run(&mut self) -> Result<()> {
+        assert!(self.repeat_count > 0);
+        let indicator_value = self.repeat_count << 1 | 0;
+        let mut result = self.bit_writer.put_vlq_int(indicator_value as u64);
+        result &= self.bit_writer.put_aligned(
+            self.current_value,
+            bit_util::ceil(self.bit_width as i64, 8) as usize,
+        );
+        if !result {
+            return Err(general_err!("Failed to write RLE run"));
+        }
+        self.num_buffered_values = 0;
+        self.repeat_count = 0;
+        Ok(())
+    }
+
+    #[inline]
+    fn flush_bit_packed_run(&mut self, update_indicator_byte: bool) -> Result<()> {
+        if self.indicator_byte_pos < 0 {
+            self.indicator_byte_pos = self.bit_writer.skip(1)? as i64;
+        }
+
+        // Write all buffered values as bit-packed literals
+        for i in 0..self.num_buffered_values {
+            let _ = self
+                .bit_writer
+                .put_value(self.buffered_values[i], self.bit_width as usize);
+        }
+        self.num_buffered_values = 0;
+        if update_indicator_byte {
+            // Write the indicator byte to the reserved position in `bit_writer`
+            let num_groups = self.bit_packed_count / 8;
+            let indicator_byte = ((num_groups << 1) | 1) as u8;
+            if !self.bit_writer.put_aligned_offset(
+                indicator_byte,
+                1,
+                self.indicator_byte_pos as usize,
+            ) {
+                return Err(general_err!("Not enough space to write indicator byte"));
+            }
+            self.indicator_byte_pos = -1;
+            self.bit_packed_count = 0;
+        }
+        Ok(())
+    }
+
+    #[inline]
+    fn flush_buffered_values(&mut self) -> Result<()> {
+        if self.repeat_count >= 8 {
+            self.num_buffered_values = 0;
+            if self.bit_packed_count > 0 {
+                // In this case we choose RLE encoding. Flush the current buffered values
+                // as bit-packed encoding.
+                assert_eq!(self.bit_packed_count % 8, 0);
+                self.flush_bit_packed_run(true)?
+            }
+            return Ok(());
+        }
+
+        self.bit_packed_count += self.num_buffered_values;
+        let num_groups = self.bit_packed_count / 8;
+        if num_groups + 1 >= MAX_GROUPS_PER_BIT_PACKED_RUN {
+            // We've reached the maximum value that can be hold in a single bit-packed
+            // run.
+            assert!(self.indicator_byte_pos >= 0);
+            self.flush_bit_packed_run(true)?;
+        } else {
+            self.flush_bit_packed_run(false)?;
+        }
+        self.repeat_count = 0;
+        Ok(())
+    }
+}
+
+/// A RLE/Bit-Packing hybrid decoder.
+pub struct RleDecoder {
+    // Number of bits used to encode the value. Must be between [0, 64].
+    bit_width: u8,
+
+    // Bit reader loaded with input buffer.
+    bit_reader: Option<BitReader>,
+
+    // Buffer used when `bit_reader` is not `None`, for batch reading.
+    index_buf: Option<[i32; 1024]>,
+
+    // The remaining number of values in RLE for this run
+    rle_left: u32,
+
+    // The remaining number of values in Bit-Packing for this run
+    bit_packed_left: u32,
+
+    // The current value for the case of RLE mode
+    current_value: Option<u64>,
+}
+
+impl RleDecoder {
+    pub fn new(bit_width: u8) -> Self {
+        RleDecoder {
+            bit_width,
+            rle_left: 0,
+            bit_packed_left: 0,
+            bit_reader: None,
+            index_buf: None,
+            current_value: None,
+        }
+    }
+
+    pub fn set_data(&mut self, data: ByteBufferPtr) {
+        if let Some(ref mut bit_reader) = self.bit_reader {
+            bit_reader.reset(data);
+        } else {
+            self.bit_reader = Some(BitReader::new(data));
+            self.index_buf = Some([0; 1024]);
+        }
+
+        let _ = self.reload();
+    }
+
+    #[inline]
+    pub fn get<T: Default>(&mut self) -> Result<Option<T>> {
+        assert!(size_of::<T>() <= 8);
+
+        while self.rle_left <= 0 && self.bit_packed_left <= 0 {
+            if !self.reload() {
+                return Ok(None);
+            }
+        }
+
+        let value = if self.rle_left > 0 {
+            let rle_value = unsafe {
+                transmute_copy::<u64, T>(
+                    self.current_value
+                        .as_mut()
+                        .expect("current_value should be Some"),
+                )
+            };
+            self.rle_left -= 1;
+            rle_value
+        } else {
+            // self.bit_packed_left > 0
+            let bit_reader = self.bit_reader.as_mut().expect("bit_reader should be Some");
+            let bit_packed_value = bit_reader
+                .get_value(self.bit_width as usize)
+                .ok_or(eof_err!("Not enough data for 'bit_packed_value'"))?;
+            self.bit_packed_left -= 1;
+            bit_packed_value
+        };
+
+        Ok(Some(value))
+    }
+
+    #[inline]
+    pub fn get_batch<T: Default>(&mut self, buffer: &mut [T]) -> Result<usize> {
+        assert!(self.bit_reader.is_some());
+        assert!(size_of::<T>() <= 8);
+
+        let mut values_read = 0;
+        while values_read < buffer.len() {
+            if self.rle_left > 0 {
+                assert!(self.current_value.is_some());
+                let num_values =
+                    cmp::min(buffer.len() - values_read, self.rle_left as usize);
+                for i in 0..num_values {
+                    let repeated_value = unsafe {
+                        transmute_copy::<u64, T>(self.current_value.as_mut().unwrap())
+                    };
+                    buffer[values_read + i] = repeated_value;
+                }
+                self.rle_left -= num_values as u32;
+                values_read += num_values;
+            } else if self.bit_packed_left > 0 {
+                assert!(self.bit_reader.is_some());
+                let mut num_values =
+                    cmp::min(buffer.len() - values_read, self.bit_packed_left as usize);
+                if let Some(ref mut bit_reader) = self.bit_reader {
+                    num_values = bit_reader.get_batch::<T>(
+                        &mut buffer[values_read..values_read + num_values],
+                        self.bit_width as usize,
+                    );
+                    self.bit_packed_left -= num_values as u32;
+                    values_read += num_values;
+                }
+            } else {
+                if !self.reload() {
+                    break;
+                }
+            }
+        }
+
+        Ok(values_read)
+    }
+
+    #[inline]
+    pub fn get_batch_with_dict<T>(
+        &mut self,
+        dict: &[T],
+        buffer: &mut [T],
+        max_values: usize,
+    ) -> Result<usize>
+    where
+        T: Default + Clone,
+    {
+        assert!(buffer.len() >= max_values);
+
+        let mut values_read = 0;
+        while values_read < max_values {
+            if self.rle_left > 0 {
+                assert!(self.current_value.is_some());
+                let num_values =
+                    cmp::min(max_values - values_read, self.rle_left as usize);
+                let dict_idx = self.current_value.unwrap() as usize;
+                for i in 0..num_values {
+                    buffer[values_read + i] = dict[dict_idx].clone();
+                }
+                self.rle_left -= num_values as u32;
+                values_read += num_values;
+            } else if self.bit_packed_left > 0 {
+                assert!(self.bit_reader.is_some());
+                let mut num_values =
+                    cmp::min(max_values - values_read, self.bit_packed_left as usize);
+                if let Some(ref mut bit_reader) = self.bit_reader {
+                    let mut index_buf = self.index_buf.unwrap();
+                    num_values = cmp::min(num_values, index_buf.len());
+                    loop {
+                        num_values = bit_reader.get_batch::<i32>(
+                            &mut index_buf[..num_values],
+                            self.bit_width as usize,
+                        );
+                        for i in 0..num_values {
+                            buffer[values_read + i] = dict[index_buf[i] as usize].clone();
+                        }
+                        self.bit_packed_left -= num_values as u32;
+                        values_read += num_values;
+                        if num_values < index_buf.len() {
+                            break;
+                        }
+                    }
+                }
+            } else {
+                if !self.reload() {
+                    break;
+                }
+            }
+        }
+
+        Ok(values_read)
+    }
+
+    #[inline]
+    fn reload(&mut self) -> bool {
+        assert!(self.bit_reader.is_some());
+        if let Some(ref mut bit_reader) = self.bit_reader {
+            if let Some(indicator_value) = bit_reader.get_vlq_int() {
+                if indicator_value & 1 == 1 {
+                    self.bit_packed_left = ((indicator_value >> 1) * 8) as u32;
+                } else {
+                    self.rle_left = (indicator_value >> 1) as u32;
+                    let value_width = bit_util::ceil(self.bit_width as i64, 8);
+                    self.current_value =
+                        bit_reader.get_aligned::<u64>(value_width as usize);
+                    assert!(self.current_value.is_some());
+                }
+                return true;
+            } else {
+                return false;
+            }
+        }
+        return false;
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use rand::{
+        self,
+        distributions::{Distribution, Standard},
+        thread_rng, Rng, SeedableRng,
+    };
+
+    use crate::util::memory::ByteBufferPtr;
+
+    const MAX_WIDTH: usize = 32;
+
+    #[test]
+    fn test_rle_decode_int32() {
+        // Test data: 0-7 with bit width 3
+        // 00000011 10001000 11000110 11111010
+        let data = ByteBufferPtr::new(vec![0x03, 0x88, 0xC6, 0xFA]);
+        let mut decoder: RleDecoder = RleDecoder::new(3);
+        decoder.set_data(data);
+        let mut buffer = vec![0; 8];
+        let expected = vec![0, 1, 2, 3, 4, 5, 6, 7];
+        let result = decoder.get_batch::<i32>(&mut buffer);
+        assert!(result.is_ok());
+        assert_eq!(buffer, expected);
+    }
+
+    #[test]
+    fn test_rle_consume_flush_buffer() {
+        let data = vec![1, 1, 1, 2, 2, 3, 3, 3];
+        let mut encoder1 = RleEncoder::new(3, 256);
+        let mut encoder2 = RleEncoder::new(3, 256);
+        for value in data {
+            encoder1.put(value as u64).unwrap();
+            encoder2.put(value as u64).unwrap();
+        }
+        let res1 = encoder1.flush_buffer().unwrap();
+        let res2 = encoder2.consume().unwrap();
+        assert_eq!(res1, &res2[..]);
+    }
+
+    #[test]
+    fn test_rle_decode_bool() {
+        // RLE test data: 50 1s followed by 50 0s
+        // 01100100 00000001 01100100 00000000
+        let data1 = ByteBufferPtr::new(vec![0x64, 0x01, 0x64, 0x00]);
+
+        // Bit-packing test data: alternating 1s and 0s, 100 total
+        // 100 / 8 = 13 groups
+        // 00011011 10101010 ... 00001010
+        let data2 = ByteBufferPtr::new(vec![
+            0x1B, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA,
+            0x0A,
+        ]);
+
+        let mut decoder: RleDecoder = RleDecoder::new(1);
+        decoder.set_data(data1);
+        let mut buffer = vec![false; 100];
+        let mut expected = vec![];
+        for i in 0..100 {
+            if i < 50 {
+                expected.push(true);
+            } else {
+                expected.push(false);
+            }
+        }
+        let result = decoder.get_batch::<bool>(&mut buffer);
+        assert!(result.is_ok());
+        assert_eq!(buffer, expected);
+
+        decoder.set_data(data2);
+        let mut buffer = vec![false; 100];
+        let mut expected = vec![];
+        for i in 0..100 {
+            if i % 2 == 0 {
+                expected.push(false);
+            } else {
+                expected.push(true);
+            }
+        }
+        let result = decoder.get_batch::<bool>(&mut buffer);
+        assert!(result.is_ok());
+        assert_eq!(buffer, expected);
+    }
+
+    #[test]
+    fn test_rle_decode_with_dict_int32() {
+        // Test RLE encoding: 3 0s followed by 4 1s followed by 5 2s
+        // 00000110 00000000 00001000 00000001 00001010 00000010
+        let dict = vec![10, 20, 30];
+        let data = ByteBufferPtr::new(vec![0x06, 0x00, 0x08, 0x01, 0x0A, 0x02]);
+        let mut decoder: RleDecoder = RleDecoder::new(3);
+        decoder.set_data(data);
+        let mut buffer = vec![0; 12];
+        let expected = vec![10, 10, 10, 20, 20, 20, 20, 30, 30, 30, 30, 30];
+        let result = decoder.get_batch_with_dict::<i32>(&dict, &mut buffer, 12);
+        assert!(result.is_ok());
+        assert_eq!(buffer, expected);
+
+        // Test bit-pack encoding: 345345345455 (2 groups: 8 and 4)
+        // 011 100 101 011 100 101 011 100 101 100 101 101
+        // 00000011 01100011 11000111 10001110 00000011 01100101 00001011
+        let dict = vec!["aaa", "bbb", "ccc", "ddd", "eee", "fff"];
+        let data = ByteBufferPtr::new(vec![0x03, 0x63, 0xC7, 0x8E, 0x03, 0x65, 0x0B]);
+        let mut decoder: RleDecoder = RleDecoder::new(3);
+        decoder.set_data(data);
+        let mut buffer = vec![""; 12];
+        let expected = vec![
+            "ddd", "eee", "fff", "ddd", "eee", "fff", "ddd", "eee", "fff", "eee", "fff",
+            "fff",
+        ];
+        let result = decoder.get_batch_with_dict::<&str>(
+            dict.as_slice(),
+            buffer.as_mut_slice(),
+            12,
+        );
+        assert!(result.is_ok());
+        assert_eq!(buffer, expected);
+    }
+
+    fn validate_rle(
+        values: &[i64],
+        bit_width: u8,
+        expected_encoding: Option<&[u8]>,
+        expected_len: i32,
+    ) {
+        let buffer_len = 64 * 1024;
+        let mut encoder = RleEncoder::new(bit_width, buffer_len);
+        for v in values {
+            let result = encoder.put(*v as u64);
+            assert!(result.is_ok());
+        }
+        let buffer = ByteBufferPtr::new(encoder.consume().expect("Expect consume() OK"));
+        if expected_len != -1 {
+            assert_eq!(buffer.len(), expected_len as usize);
+        }
+        match expected_encoding {
+            Some(b) => assert_eq!(buffer.as_ref(), b),
+            _ => (),
+        }
+
+        // Verify read
+        let mut decoder = RleDecoder::new(bit_width);
+        decoder.set_data(buffer.all());
+        for v in values {
+            let val: i64 = decoder
+                .get()
+                .expect("get() should be OK")
+                .expect("get() should return more value");
+            assert_eq!(val, *v);
+        }
+
+        // Verify batch read
+        decoder.set_data(buffer);
+        let mut values_read: Vec<i64> = vec![0; values.len()];
+        decoder
+            .get_batch(&mut values_read[..])
+            .expect("get_batch() should be OK");
+        assert_eq!(&values_read[..], values);
+    }
+
+    #[test]
+    fn test_rle_specific_sequences() {
+        let mut expected_buffer = Vec::new();
+        let mut values = Vec::new();
+        for _ in 0..50 {
+            values.push(0);
+        }
+        for _ in 0..50 {
+            values.push(1);
+        }
+        expected_buffer.push(50 << 1);
+        expected_buffer.push(0);
+        expected_buffer.push(50 << 1);
+        expected_buffer.push(1);
+
+        for width in 1..9 {
+            validate_rle(&values[..], width, Some(&expected_buffer[..]), 4);
+        }
+        for width in 9..MAX_WIDTH + 1 {
+            validate_rle(
+                &values[..],
+                width as u8,
+                None,
+                2 * (1 + bit_util::ceil(width as i64, 8) as i32),
+            );
+        }
+
+        // Test 100 0's and 1's alternating
+        values.clear();
+        expected_buffer.clear();
+        for i in 0..101 {
+            values.push(i % 2);
+        }
+        let num_groups = bit_util::ceil(100, 8) as u8;
+        expected_buffer.push(((num_groups << 1) as u8) | 1);
+        for _ in 1..(100 / 8) + 1 {
+            expected_buffer.push(0b10101010);
+        }
+        // For the last 4 0 and 1's, padded with 0.
+        expected_buffer.push(0b00001010);
+        validate_rle(
+            &values,
+            1,
+            Some(&expected_buffer[..]),
+            1 + num_groups as i32,
+        );
+        for width in 2..MAX_WIDTH + 1 {
+            let num_values = bit_util::ceil(100, 8) * 8;
+            validate_rle(
+                &values,
+                width as u8,
+                None,
+                1 + bit_util::ceil(width as i64 * num_values, 8) as i32,
+            );
+        }
+    }
+
+    // `validate_rle` on `num_vals` with width `bit_width`. If `value` is -1, that value
+    // is used, otherwise alternating values are used.
+    fn test_rle_values(bit_width: usize, num_vals: usize, value: i32) {
+        let mod_val = if bit_width == 64 {
+            1
+        } else {
+            1u64 << bit_width
+        };
+        let mut values: Vec<i64> = vec![];
+        for v in 0..num_vals {
+            let val = if value == -1 {
+                v as i64 % mod_val as i64
+            } else {
+                value as i64
+            };
+            values.push(val);
+        }
+        validate_rle(&values, bit_width as u8, None, -1);
+    }
+
+    #[test]
+    fn test_values() {
+        for width in 1..MAX_WIDTH + 1 {
+            test_rle_values(width, 1, -1);
+            test_rle_values(width, 1024, -1);
+            test_rle_values(width, 1024, 0);
+            test_rle_values(width, 1024, 1);
+        }
+    }
+
+    #[test]
+    fn test_rle_specific_roundtrip() {
+        let bit_width = 1;
+        let buffer_len = RleEncoder::min_buffer_size(bit_width);
+        let values: Vec<i16> = vec![0, 1, 1, 1, 1, 0, 0, 0, 0, 1];
+        let mut encoder = RleEncoder::new(bit_width, buffer_len);
+        for v in &values {
+            assert!(encoder.put(*v as u64).expect("put() should be OK"));
+        }
+        let buffer = encoder.consume().expect("consume() should be OK");
+        let mut decoder = RleDecoder::new(bit_width);
+        decoder.set_data(ByteBufferPtr::new(buffer));
+        let mut actual_values: Vec<i16> = vec![0; values.len()];
+        decoder
+            .get_batch(&mut actual_values)
+            .expect("get_batch() should be OK");
+        assert_eq!(actual_values, values);
+    }
+
+    fn test_round_trip(values: &[i32], bit_width: u8) {
+        let buffer_len = 64 * 1024;
+        let mut encoder = RleEncoder::new(bit_width, buffer_len);
+        for v in values {
+            let result = encoder.put(*v as u64).expect("put() should be OK");
+            assert!(result, "put() should not return false");
+        }
+
+        let buffer =
+            ByteBufferPtr::new(encoder.consume().expect("consume() should be OK"));
+
+        // Verify read
+        let mut decoder = RleDecoder::new(bit_width);
+        decoder.set_data(buffer.all());
+        for v in values {
+            let val = decoder
+                .get::<i32>()
+                .expect("get() should be OK")
+                .expect("get() should return value");
+            assert_eq!(val, *v);
+        }
+
+        // Verify batch read
+        let mut decoder = RleDecoder::new(bit_width);
+        decoder.set_data(buffer);
+        let mut values_read: Vec<i32> = vec![0; values.len()];
+        decoder
+            .get_batch(&mut values_read[..])
+            .expect("get_batch() should be OK");
+        assert_eq!(&values_read[..], values);
+    }
+
+    #[test]
+    fn test_random() {
+        let seed_len = 32;
+        let niters = 50;
+        let ngroups = 1000;
+        let max_group_size = 15;
+        let mut values = vec![];
+
+        for _ in 0..niters {
+            values.clear();
+            let mut rng = thread_rng();
+            let seed_vec: Vec<u8> =
+                Standard.sample_iter(&mut rng).take(seed_len).collect();
+            let mut seed = [0u8; 32];
+            seed.copy_from_slice(&seed_vec[0..seed_len]);
+            let mut gen = rand::StdRng::from_seed(seed);
+
+            let mut parity = false;
+            for _ in 0..ngroups {
+                let mut group_size = gen.gen_range::<u32>(1, 20);
+                if group_size > max_group_size {
+                    group_size = 1;
+                }
+                for _ in 0..group_size {
+                    values.push(parity as i32);
+                }
+                parity = !parity;
+            }
+            let bit_width = bit_util::num_required_bits(values.len() as u64);
+            assert!(bit_width < 64);
+            test_round_trip(&values[..], bit_width as u8);
+        }
+    }
+}
diff --git a/rust/parquet/src/errors.rs b/rust/parquet/src/errors.rs
new file mode 100644
index 0000000000000..abfbda9dba9f2
--- /dev/null
+++ b/rust/parquet/src/errors.rs
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Common Parquet errors and macros.
+
+use std::{cell, convert, io, result};
+
+use quick_error::quick_error;
+use snap;
+use thrift;
+
+quick_error! {
+  /// Set of errors that can be produced during different operations in Parquet.
+  #[derive(Debug, PartialEq)]
+  pub enum ParquetError {
+      /// General Parquet error.
+      /// Returned when code violates normal workflow of working with Parquet files.
+      General(message: String) {
+          display("Parquet error: {}", message)
+              description(message)
+              from(e: io::Error) -> (format!("underlying IO error: {}", e))
+              from(e: snap::Error) -> (format!("underlying snap error: {}", e))
+              from(e: thrift::Error) -> (format!("underlying Thrift error: {}", e))
+              from(e: cell::BorrowMutError) -> (format!("underlying borrow error: {}", e))
+      }
+      /// "Not yet implemented" Parquet error.
+      /// Returned when functionality is not yet available.
+      NYI(message: String) {
+          display("NYI: {}", message)
+              description(message)
+      }
+      /// "End of file" Parquet error.
+      /// Returned when IO related failures occur, e.g. when there are not enough bytes to
+      /// decode.
+      EOF(message: String) {
+          display("EOF: {}", message)
+              description(message)
+      }
+      /// Arrow error.
+      /// Returned when reading into arrow or writing from arrow.
+      ArrowError(message:  String) {
+          display("Arrow: {}", message)
+              description(message)
+      }
+  }
+}
+
+/// A specialized `Result` for Parquet errors.
+pub type Result<T> = result::Result<T, ParquetError>;
+
+// ----------------------------------------------------------------------
+// Conversion from `ParquetError` to other types of `Error`s
+
+impl convert::From<ParquetError> for io::Error {
+    fn from(e: ParquetError) -> Self {
+        io::Error::new(io::ErrorKind::Other, e)
+    }
+}
+
+// ----------------------------------------------------------------------
+// Convenient macros for different errors
+
+macro_rules! general_err {
+    ($fmt:expr) => (ParquetError::General($fmt.to_owned()));
+    ($fmt:expr, $($args:expr),*) => (ParquetError::General(format!($fmt, $($args),*)));
+    ($e:expr, $fmt:expr) => (ParquetError::General($fmt.to_owned(), $e));
+    ($e:ident, $fmt:expr, $($args:tt),*) => (
+        ParquetError::General(&format!($fmt, $($args),*), $e));
+}
+
+macro_rules! nyi_err {
+    ($fmt:expr) => (ParquetError::NYI($fmt.to_owned()));
+    ($fmt:expr, $($args:expr),*) => (ParquetError::NYI(format!($fmt, $($args),*)));
+}
+
+macro_rules! eof_err {
+    ($fmt:expr) => (ParquetError::EOF($fmt.to_owned()));
+    ($fmt:expr, $($args:expr),*) => (ParquetError::EOF(format!($fmt, $($args),*)));
+}
diff --git a/rust/parquet/src/file/metadata.rs b/rust/parquet/src/file/metadata.rs
new file mode 100644
index 0000000000000..16825a0c070ec
--- /dev/null
+++ b/rust/parquet/src/file/metadata.rs
@@ -0,0 +1,742 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Contains information about available Parquet metadata.
+//!
+//! The hierarchy of metadata is as follows:
+//!
+//! [`ParquetMetaData`](struct.ParquetMetaData.html) contains
+//! [`FileMetaData`](struct.FileMetaData.html) and zero or more
+//! [`RowGroupMetaData`](struct.RowGroupMetaData.html) for each row group.
+//!
+//! [`FileMetaData`](struct.FileMetaData.html) includes file version, application specific
+//! metadata.
+//!
+//! Each [`RowGroupMetaData`](struct.RowGroupMetaData.html) contains information about row
+//! group and one or more [`ColumnChunkMetaData`](struct.ColumnChunkMetaData.html) for
+//! each column chunk.
+//!
+//! [`ColumnChunkMetaData`](struct.ColumnChunkMetaData.html) has information about column
+//! chunk (primitive leaf column), including encoding/compression, number of values, etc.
+
+use std::rc::Rc;
+
+use parquet_format::{ColumnChunk, ColumnMetaData, RowGroup};
+
+use crate::basic::{ColumnOrder, Compression, Encoding, Type};
+use crate::errors::{ParquetError, Result};
+use crate::file::statistics::{self, Statistics};
+use crate::schema::types::{
+    ColumnDescPtr, ColumnDescriptor, ColumnPath, SchemaDescPtr, SchemaDescriptor,
+    Type as SchemaType, TypePtr,
+};
+
+/// Reference counted pointer for [`ParquetMetaData`].
+pub type ParquetMetaDataPtr = Rc<ParquetMetaData>;
+
+/// Global Parquet metadata.
+pub struct ParquetMetaData {
+    file_metadata: FileMetaDataPtr,
+    row_groups: Vec<RowGroupMetaDataPtr>,
+}
+
+impl ParquetMetaData {
+    /// Creates Parquet metadata from file metadata and a list of row group metadata `Rc`s
+    /// for each available row group.
+    pub fn new(
+        file_metadata: FileMetaData,
+        row_group_ptrs: Vec<RowGroupMetaDataPtr>,
+    ) -> Self {
+        ParquetMetaData {
+            file_metadata: Rc::new(file_metadata),
+            row_groups: row_group_ptrs,
+        }
+    }
+
+    /// Returns file metadata as reference counted clone.
+    pub fn file_metadata(&self) -> FileMetaDataPtr {
+        self.file_metadata.clone()
+    }
+
+    /// Returns number of row groups in this file.
+    pub fn num_row_groups(&self) -> usize {
+        self.row_groups.len()
+    }
+
+    /// Returns row group metadata for `i`th position.
+    /// Position should be less than number of row groups `num_row_groups`.
+    pub fn row_group(&self, i: usize) -> RowGroupMetaDataPtr {
+        self.row_groups[i].clone()
+    }
+
+    /// Returns slice of row group reference counted pointers in this file.
+    pub fn row_groups(&self) -> &[RowGroupMetaDataPtr] {
+        &self.row_groups.as_slice()
+    }
+}
+
+/// Reference counted pointer for [`FileMetaData`].
+pub type FileMetaDataPtr = Rc<FileMetaData>;
+
+/// Metadata for a Parquet file.
+pub struct FileMetaData {
+    version: i32,
+    num_rows: i64,
+    created_by: Option<String>,
+    schema: TypePtr,
+    schema_descr: SchemaDescPtr,
+    column_orders: Option<Vec<ColumnOrder>>,
+}
+
+impl FileMetaData {
+    /// Creates new file metadata.
+    pub fn new(
+        version: i32,
+        num_rows: i64,
+        created_by: Option<String>,
+        schema: TypePtr,
+        schema_descr: SchemaDescPtr,
+        column_orders: Option<Vec<ColumnOrder>>,
+    ) -> Self {
+        FileMetaData {
+            version,
+            num_rows,
+            created_by,
+            schema,
+            schema_descr,
+            column_orders,
+        }
+    }
+
+    /// Returns version of this file.
+    pub fn version(&self) -> i32 {
+        self.version
+    }
+
+    /// Returns number of rows in the file.
+    pub fn num_rows(&self) -> i64 {
+        self.num_rows
+    }
+
+    /// String message for application that wrote this file.
+    ///
+    /// This should have the following format:
+    /// `<application> version <application version> (build <application build hash>)`.
+    ///
+    /// ```shell
+    /// parquet-mr version 1.8.0 (build 0fda28af84b9746396014ad6a415b90592a98b3b)
+    /// ```
+    pub fn created_by(&self) -> &Option<String> {
+        &self.created_by
+    }
+
+    /// Returns Parquet ['Type`] that describes schema in this file.
+    pub fn schema(&self) -> &SchemaType {
+        self.schema.as_ref()
+    }
+
+    /// Returns a reference to schema descriptor.
+    pub fn schema_descr(&self) -> &SchemaDescriptor {
+        &self.schema_descr
+    }
+
+    /// Returns reference counted clone for schema descriptor.
+    pub fn schema_descr_ptr(&self) -> SchemaDescPtr {
+        self.schema_descr.clone()
+    }
+
+    /// Column (sort) order used for `min` and `max` values of each column in this file.
+    ///
+    /// Each column order corresponds to one column, determined by its position in the
+    /// list, matching the position of the column in the schema.
+    ///
+    /// When `None` is returned, there are no column orders available, and each column
+    /// should be assumed to have undefined (legacy) column order.
+    pub fn column_orders(&self) -> Option<&Vec<ColumnOrder>> {
+        self.column_orders.as_ref()
+    }
+
+    /// Returns column order for `i`th column in this file.
+    /// If column orders are not available, returns undefined (legacy) column order.
+    pub fn column_order(&self, i: usize) -> ColumnOrder {
+        self.column_orders
+            .as_ref()
+            .map(|data| data[i])
+            .unwrap_or(ColumnOrder::UNDEFINED)
+    }
+}
+
+/// Reference counted pointer for [`RowGroupMetaData`].
+pub type RowGroupMetaDataPtr = Rc<RowGroupMetaData>;
+
+/// Metadata for a row group.
+pub struct RowGroupMetaData {
+    columns: Vec<ColumnChunkMetaDataPtr>,
+    num_rows: i64,
+    total_byte_size: i64,
+    schema_descr: SchemaDescPtr,
+}
+
+impl RowGroupMetaData {
+    /// Returns builer for row group metadata.
+    pub fn builder(schema_descr: SchemaDescPtr) -> RowGroupMetaDataBuilder {
+        RowGroupMetaDataBuilder::new(schema_descr)
+    }
+
+    /// Number of columns in this row group.
+    pub fn num_columns(&self) -> usize {
+        self.columns.len()
+    }
+
+    /// Returns column chunk metadata for `i`th column.
+    pub fn column(&self, i: usize) -> &ColumnChunkMetaData {
+        &self.columns[i]
+    }
+
+    /// Returns slice of column chunk metadata [`Rc`] pointers.
+    pub fn columns(&self) -> &[ColumnChunkMetaDataPtr] {
+        &self.columns
+    }
+
+    /// Number of rows in this row group.
+    pub fn num_rows(&self) -> i64 {
+        self.num_rows
+    }
+
+    /// Total byte size of all uncompressed column data in this row group.
+    pub fn total_byte_size(&self) -> i64 {
+        self.total_byte_size
+    }
+
+    /// Returns reference to a schema descriptor.
+    pub fn schema_descr(&self) -> &SchemaDescriptor {
+        self.schema_descr.as_ref()
+    }
+
+    /// Returns reference counted clone of schema descriptor.
+    pub fn schema_descr_ptr(&self) -> SchemaDescPtr {
+        self.schema_descr.clone()
+    }
+
+    /// Method to convert from Thrift.
+    pub fn from_thrift(
+        schema_descr: SchemaDescPtr,
+        mut rg: RowGroup,
+    ) -> Result<RowGroupMetaData> {
+        assert_eq!(schema_descr.num_columns(), rg.columns.len());
+        let total_byte_size = rg.total_byte_size;
+        let num_rows = rg.num_rows;
+        let mut columns = vec![];
+        for (c, d) in rg.columns.drain(0..).zip(schema_descr.columns()) {
+            let cc = ColumnChunkMetaData::from_thrift(d.clone(), c)?;
+            columns.push(Rc::new(cc));
+        }
+        Ok(RowGroupMetaData {
+            columns,
+            num_rows,
+            total_byte_size,
+            schema_descr,
+        })
+    }
+
+    /// Method to convert to Thrift.
+    pub fn to_thrift(&self) -> RowGroup {
+        RowGroup {
+            columns: self.columns().into_iter().map(|v| v.to_thrift()).collect(),
+            total_byte_size: self.total_byte_size,
+            num_rows: self.num_rows,
+            sorting_columns: None,
+        }
+    }
+}
+
+/// Builder for row group metadata.
+pub struct RowGroupMetaDataBuilder {
+    columns: Vec<ColumnChunkMetaDataPtr>,
+    schema_descr: SchemaDescPtr,
+    num_rows: i64,
+    total_byte_size: i64,
+}
+
+impl RowGroupMetaDataBuilder {
+    /// Creates new builder from schema descriptor.
+    fn new(schema_descr: SchemaDescPtr) -> Self {
+        Self {
+            columns: Vec::with_capacity(schema_descr.num_columns()),
+            schema_descr,
+            num_rows: 0,
+            total_byte_size: 0,
+        }
+    }
+
+    /// Sets number of rows in this row group.
+    pub fn set_num_rows(mut self, value: i64) -> Self {
+        self.num_rows = value;
+        self
+    }
+
+    /// Sets total size in bytes for this row group.
+    pub fn set_total_byte_size(mut self, value: i64) -> Self {
+        self.total_byte_size = value;
+        self
+    }
+
+    /// Sets column metadata for this row group.
+    pub fn set_column_metadata(mut self, value: Vec<ColumnChunkMetaDataPtr>) -> Self {
+        self.columns = value;
+        self
+    }
+
+    /// Builds row group metadata.
+    pub fn build(self) -> Result<RowGroupMetaData> {
+        if self.schema_descr.num_columns() != self.columns.len() {
+            return Err(general_err!(
+                "Column length mismatch: {} != {}",
+                self.schema_descr.num_columns(),
+                self.columns.len()
+            ));
+        }
+
+        Ok(RowGroupMetaData {
+            columns: self.columns,
+            num_rows: self.num_rows,
+            total_byte_size: self.total_byte_size,
+            schema_descr: self.schema_descr,
+        })
+    }
+}
+
+/// Reference counted pointer for [`ColumnChunkMetaData`].
+pub type ColumnChunkMetaDataPtr = Rc<ColumnChunkMetaData>;
+
+/// Metadata for a column chunk.
+pub struct ColumnChunkMetaData {
+    column_type: Type,
+    column_path: ColumnPath,
+    column_descr: ColumnDescPtr,
+    encodings: Vec<Encoding>,
+    file_path: Option<String>,
+    file_offset: i64,
+    num_values: i64,
+    compression: Compression,
+    total_compressed_size: i64,
+    total_uncompressed_size: i64,
+    data_page_offset: i64,
+    index_page_offset: Option<i64>,
+    dictionary_page_offset: Option<i64>,
+    statistics: Option<Statistics>,
+}
+
+/// Represents common operations for a column chunk.
+impl ColumnChunkMetaData {
+    /// Returns builder for column chunk metadata.
+    pub fn builder(column_descr: ColumnDescPtr) -> ColumnChunkMetaDataBuilder {
+        ColumnChunkMetaDataBuilder::new(column_descr)
+    }
+
+    /// File where the column chunk is stored.
+    ///
+    /// If not set, assumed to belong to the same file as the metadata.
+    /// This path is relative to the current file.
+    pub fn file_path(&self) -> Option<&String> {
+        self.file_path.as_ref()
+    }
+
+    /// Byte offset in `file_path()`.
+    pub fn file_offset(&self) -> i64 {
+        self.file_offset
+    }
+
+    /// Type of this column. Must be primitive.
+    pub fn column_type(&self) -> Type {
+        self.column_type
+    }
+
+    /// Path (or identifier) of this column.
+    pub fn column_path(&self) -> &ColumnPath {
+        &self.column_path
+    }
+
+    /// Descriptor for this column.
+    pub fn column_descr(&self) -> &ColumnDescriptor {
+        self.column_descr.as_ref()
+    }
+
+    /// Reference counted clone of descriptor for this column.
+    pub fn column_descr_ptr(&self) -> ColumnDescPtr {
+        self.column_descr.clone()
+    }
+
+    /// All encodings used for this column.
+    pub fn encodings(&self) -> &Vec<Encoding> {
+        &self.encodings
+    }
+
+    /// Total number of values in this column chunk.
+    pub fn num_values(&self) -> i64 {
+        self.num_values
+    }
+
+    /// Compression for this column.
+    pub fn compression(&self) -> Compression {
+        self.compression
+    }
+
+    /// Returns the total compressed data size of this column chunk.
+    pub fn compressed_size(&self) -> i64 {
+        self.total_compressed_size
+    }
+
+    /// Returns the total uncompressed data size of this column chunk.
+    pub fn uncompressed_size(&self) -> i64 {
+        self.total_uncompressed_size
+    }
+
+    /// Returns the offset for the column data.
+    pub fn data_page_offset(&self) -> i64 {
+        self.data_page_offset
+    }
+
+    /// Returns `true` if this column chunk contains a index page, `false` otherwise.
+    pub fn has_index_page(&self) -> bool {
+        self.index_page_offset.is_some()
+    }
+
+    /// Returns the offset for the index page.
+    pub fn index_page_offset(&self) -> Option<i64> {
+        self.index_page_offset
+    }
+
+    /// Returns `true` if this column chunk contains a dictionary page, `false` otherwise.
+    pub fn has_dictionary_page(&self) -> bool {
+        self.dictionary_page_offset.is_some()
+    }
+
+    /// Returns the offset for the dictionary page, if any.
+    pub fn dictionary_page_offset(&self) -> Option<i64> {
+        self.dictionary_page_offset
+    }
+
+    /// Returns statistics that are set for this column chunk,
+    /// or `None` if no statistics are available.
+    pub fn statistics(&self) -> Option<&Statistics> {
+        self.statistics.as_ref()
+    }
+
+    /// Method to convert from Thrift.
+    pub fn from_thrift(column_descr: ColumnDescPtr, cc: ColumnChunk) -> Result<Self> {
+        if cc.meta_data.is_none() {
+            return Err(general_err!("Expected to have column metadata"));
+        }
+        let mut col_metadata: ColumnMetaData = cc.meta_data.unwrap();
+        let column_type = Type::from(col_metadata.type_);
+        let column_path = ColumnPath::new(col_metadata.path_in_schema);
+        let encodings = col_metadata
+            .encodings
+            .drain(0..)
+            .map(Encoding::from)
+            .collect();
+        let compression = Compression::from(col_metadata.codec);
+        let file_path = cc.file_path;
+        let file_offset = cc.file_offset;
+        let num_values = col_metadata.num_values;
+        let total_compressed_size = col_metadata.total_compressed_size;
+        let total_uncompressed_size = col_metadata.total_uncompressed_size;
+        let data_page_offset = col_metadata.data_page_offset;
+        let index_page_offset = col_metadata.index_page_offset;
+        let dictionary_page_offset = col_metadata.dictionary_page_offset;
+        let statistics = statistics::from_thrift(column_type, col_metadata.statistics);
+        let result = ColumnChunkMetaData {
+            column_type,
+            column_path,
+            column_descr,
+            encodings,
+            file_path,
+            file_offset,
+            num_values,
+            compression,
+            total_compressed_size,
+            total_uncompressed_size,
+            data_page_offset,
+            index_page_offset,
+            dictionary_page_offset,
+            statistics,
+        };
+        Ok(result)
+    }
+
+    /// Method to convert to Thrift.
+    pub fn to_thrift(&self) -> ColumnChunk {
+        let column_metadata = ColumnMetaData {
+            type_: self.column_type.into(),
+            encodings: self.encodings().into_iter().map(|&v| v.into()).collect(),
+            path_in_schema: Vec::from(self.column_path.as_ref()),
+            codec: self.compression.into(),
+            num_values: self.num_values,
+            total_uncompressed_size: self.total_uncompressed_size,
+            total_compressed_size: self.total_compressed_size,
+            key_value_metadata: None,
+            data_page_offset: self.data_page_offset,
+            index_page_offset: self.index_page_offset,
+            dictionary_page_offset: self.dictionary_page_offset,
+            statistics: statistics::to_thrift(self.statistics.as_ref()),
+            encoding_stats: None,
+        };
+
+        ColumnChunk {
+            file_path: self.file_path().map(|v| v.clone()),
+            file_offset: self.file_offset,
+            meta_data: Some(column_metadata),
+            offset_index_offset: None,
+            offset_index_length: None,
+            column_index_offset: None,
+            column_index_length: None,
+        }
+    }
+}
+
+/// Builder for column chunk metadata.
+pub struct ColumnChunkMetaDataBuilder {
+    column_descr: ColumnDescPtr,
+    encodings: Vec<Encoding>,
+    file_path: Option<String>,
+    file_offset: i64,
+    num_values: i64,
+    compression: Compression,
+    total_compressed_size: i64,
+    total_uncompressed_size: i64,
+    data_page_offset: i64,
+    index_page_offset: Option<i64>,
+    dictionary_page_offset: Option<i64>,
+    statistics: Option<Statistics>,
+}
+
+impl ColumnChunkMetaDataBuilder {
+    /// Creates new column chunk metadata builder.
+    fn new(column_descr: ColumnDescPtr) -> Self {
+        Self {
+            column_descr,
+            encodings: Vec::new(),
+            file_path: None,
+            file_offset: 0,
+            num_values: 0,
+            compression: Compression::UNCOMPRESSED,
+            total_compressed_size: 0,
+            total_uncompressed_size: 0,
+            data_page_offset: 0,
+            index_page_offset: None,
+            dictionary_page_offset: None,
+            statistics: None,
+        }
+    }
+
+    /// Sets list of encodings for this column chunk.
+    pub fn set_encodings(mut self, encodings: Vec<Encoding>) -> Self {
+        self.encodings = encodings;
+        self
+    }
+
+    /// Sets optional file path for this column chunk.
+    pub fn set_file_path(mut self, value: String) -> Self {
+        self.file_path = Some(value);
+        self
+    }
+
+    /// Sets file offset in bytes.
+    pub fn set_file_offset(mut self, value: i64) -> Self {
+        self.file_offset = value;
+        self
+    }
+
+    /// Sets number of values.
+    pub fn set_num_values(mut self, value: i64) -> Self {
+        self.num_values = value;
+        self
+    }
+
+    /// Sets compression.
+    pub fn set_compression(mut self, value: Compression) -> Self {
+        self.compression = value;
+        self
+    }
+
+    /// Sets total compressed size in bytes.
+    pub fn set_total_compressed_size(mut self, value: i64) -> Self {
+        self.total_compressed_size = value;
+        self
+    }
+
+    /// Sets total uncompressed size in bytes.
+    pub fn set_total_uncompressed_size(mut self, value: i64) -> Self {
+        self.total_uncompressed_size = value;
+        self
+    }
+
+    /// Sets data page offset in bytes.
+    pub fn set_data_page_offset(mut self, value: i64) -> Self {
+        self.data_page_offset = value;
+        self
+    }
+
+    /// Sets optional dictionary page ofset in bytes.
+    pub fn set_dictionary_page_offset(mut self, value: Option<i64>) -> Self {
+        self.dictionary_page_offset = value;
+        self
+    }
+
+    /// Sets optional index page offset in bytes.
+    pub fn set_index_page_offset(mut self, value: Option<i64>) -> Self {
+        self.index_page_offset = value;
+        self
+    }
+
+    /// Sets statistics for this column chunk.
+    pub fn set_statistics(mut self, value: Statistics) -> Self {
+        self.statistics = Some(value);
+        self
+    }
+
+    /// Builds column chunk metadata.
+    pub fn build(self) -> Result<ColumnChunkMetaData> {
+        Ok(ColumnChunkMetaData {
+            column_type: self.column_descr.physical_type(),
+            column_path: self.column_descr.path().clone(),
+            column_descr: self.column_descr,
+            encodings: self.encodings,
+            file_path: self.file_path,
+            file_offset: self.file_offset,
+            num_values: self.num_values,
+            compression: self.compression,
+            total_compressed_size: self.total_compressed_size,
+            total_uncompressed_size: self.total_uncompressed_size,
+            data_page_offset: self.data_page_offset,
+            index_page_offset: self.index_page_offset,
+            dictionary_page_offset: self.dictionary_page_offset,
+            statistics: self.statistics,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_row_group_metadata_thrift_conversion() {
+        let schema_descr = get_test_schema_descr();
+
+        let mut columns = vec![];
+        for ptr in schema_descr.columns() {
+            let column = ColumnChunkMetaData::builder(ptr.clone()).build().unwrap();
+            columns.push(Rc::new(column));
+        }
+        let row_group_meta = RowGroupMetaData::builder(schema_descr.clone())
+            .set_num_rows(1000)
+            .set_total_byte_size(2000)
+            .set_column_metadata(columns)
+            .build()
+            .unwrap();
+
+        let row_group_exp = row_group_meta.to_thrift();
+        let row_group_res =
+            RowGroupMetaData::from_thrift(schema_descr.clone(), row_group_exp.clone())
+                .unwrap()
+                .to_thrift();
+
+        assert_eq!(row_group_res, row_group_exp);
+    }
+
+    #[test]
+    fn test_row_group_metadata_thrift_conversion_empty() {
+        let schema_descr = get_test_schema_descr();
+
+        let row_group_meta = RowGroupMetaData::builder(schema_descr.clone()).build();
+
+        assert!(row_group_meta.is_err());
+        if let Err(e) = row_group_meta {
+            assert_eq!(
+                e.to_string(),
+                "Parquet error: Column length mismatch: 2 != 0"
+            );
+        }
+    }
+
+    #[test]
+    fn test_column_chunk_metadata_thrift_conversion() {
+        let column_descr = get_test_schema_descr().column(0);
+
+        let col_metadata = ColumnChunkMetaData::builder(column_descr.clone())
+            .set_encodings(vec![Encoding::PLAIN, Encoding::RLE])
+            .set_file_path("file_path".to_owned())
+            .set_file_offset(100)
+            .set_num_values(1000)
+            .set_compression(Compression::SNAPPY)
+            .set_total_compressed_size(2000)
+            .set_total_uncompressed_size(3000)
+            .set_data_page_offset(4000)
+            .set_dictionary_page_offset(Some(5000))
+            .build()
+            .unwrap();
+
+        let col_chunk_exp = col_metadata.to_thrift();
+
+        let col_chunk_res =
+            ColumnChunkMetaData::from_thrift(column_descr.clone(), col_chunk_exp.clone())
+                .unwrap()
+                .to_thrift();
+
+        assert_eq!(col_chunk_res, col_chunk_exp);
+    }
+
+    #[test]
+    fn test_column_chunk_metadata_thrift_conversion_empty() {
+        let column_descr = get_test_schema_descr().column(0);
+
+        let col_metadata = ColumnChunkMetaData::builder(column_descr.clone())
+            .build()
+            .unwrap();
+
+        let col_chunk_exp = col_metadata.to_thrift();
+        let col_chunk_res =
+            ColumnChunkMetaData::from_thrift(column_descr.clone(), col_chunk_exp.clone())
+                .unwrap()
+                .to_thrift();
+
+        assert_eq!(col_chunk_res, col_chunk_exp);
+    }
+
+    /// Returns sample schema descriptor so we can create column metadata.
+    fn get_test_schema_descr() -> SchemaDescPtr {
+        let schema = SchemaType::group_type_builder("schema")
+            .with_fields(&mut vec![
+                Rc::new(
+                    SchemaType::primitive_type_builder("a", Type::INT32)
+                        .build()
+                        .unwrap(),
+                ),
+                Rc::new(
+                    SchemaType::primitive_type_builder("b", Type::INT32)
+                        .build()
+                        .unwrap(),
+                ),
+            ])
+            .build()
+            .unwrap();
+
+        Rc::new(SchemaDescriptor::new(Rc::new(schema)))
+    }
+}
diff --git a/rust/parquet/src/file/mod.rs b/rust/parquet/src/file/mod.rs
new file mode 100644
index 0000000000000..407a97d5d6e5e
--- /dev/null
+++ b/rust/parquet/src/file/mod.rs
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Main entrypoint for working with Parquet API.
+//!
+//! Provides access to file and row group readers and writers, record API, metadata, etc.
+//!
+//! See [`reader::SerializedFileReader`](reader/struct.SerializedFileReader.html) or
+//! [`writer::SerializedFileWriter`](writer/struct.SerializedFileWriter.html) for a
+//! starting reference, [`metadata::ParquetMetaData`](metadata/index.html) for file
+//! metadata, and [`statistics`](statistics/index.html) for working with statistics.
+//!
+//! # Example of writing a new file
+//!
+//! ```rust,no_run
+//! use std::{fs, path::Path, rc::Rc};
+//!
+//! use parquet::{
+//!     file::{
+//!         properties::WriterProperties,
+//!         writer::{FileWriter, SerializedFileWriter},
+//!     },
+//!     schema::parser::parse_message_type,
+//! };
+//!
+//! let path = Path::new("/path/to/sample.parquet");
+//!
+//! let message_type = "
+//!   message schema {
+//!     REQUIRED INT32 b;
+//!   }
+//! ";
+//! let schema = Rc::new(parse_message_type(message_type).unwrap());
+//! let props = Rc::new(WriterProperties::builder().build());
+//! let file = fs::File::create(&path).unwrap();
+//! let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
+//! let mut row_group_writer = writer.next_row_group().unwrap();
+//! while let Some(mut col_writer) = row_group_writer.next_column().unwrap() {
+//!     // ... write values to a column writer
+//!     row_group_writer.close_column(col_writer).unwrap();
+//! }
+//! writer.close_row_group(row_group_writer).unwrap();
+//! writer.close().unwrap();
+//!
+//! let bytes = fs::read(&path).unwrap();
+//! assert_eq!(&bytes[0..4], &[b'P', b'A', b'R', b'1']);
+//! ```
+//! # Example of reading an existing file
+//!
+//! ```rust,no_run
+//! use parquet::file::reader::{FileReader, SerializedFileReader};
+//! use std::{fs::File, path::Path};
+//!
+//! let path = Path::new("/path/to/sample.parquet");
+//! if let Ok(file) = File::open(&path) {
+//!     let file = File::open(&path).unwrap();
+//!     let reader = SerializedFileReader::new(file).unwrap();
+//!
+//!     let parquet_metadata = reader.metadata();
+//!     assert_eq!(parquet_metadata.num_row_groups(), 1);
+//!
+//!     let row_group_reader = reader.get_row_group(0).unwrap();
+//!     assert_eq!(row_group_reader.num_columns(), 1);
+//! }
+//! ```
+
+pub mod metadata;
+pub mod properties;
+pub mod reader;
+pub mod statistics;
+pub mod writer;
+
+const FOOTER_SIZE: usize = 8;
+const PARQUET_MAGIC: [u8; 4] = [b'P', b'A', b'R', b'1'];
diff --git a/rust/parquet/src/file/properties.rs b/rust/parquet/src/file/properties.rs
new file mode 100644
index 0000000000000..54f093d222535
--- /dev/null
+++ b/rust/parquet/src/file/properties.rs
@@ -0,0 +1,654 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Writer properties.
+//!
+//! # Usage
+//!
+//! ```rust
+//! use parquet::{
+//!     basic::{Compression, Encoding},
+//!     file::properties::*,
+//!     schema::types::ColumnPath,
+//! };
+//!
+//! // Create properties with default configuration.
+//! let props = WriterProperties::builder().build();
+//!
+//! // Use properties builder to set certain options and assemble the configuration.
+//! let props = WriterProperties::builder()
+//!     .set_writer_version(WriterVersion::PARQUET_1_0)
+//!     .set_encoding(Encoding::PLAIN)
+//!     .set_column_encoding(ColumnPath::from("col1"), Encoding::DELTA_BINARY_PACKED)
+//!     .set_compression(Compression::SNAPPY)
+//!     .build();
+//!
+//! assert_eq!(props.writer_version(), WriterVersion::PARQUET_1_0);
+//! assert_eq!(
+//!     props.encoding(&ColumnPath::from("col1")),
+//!     Some(Encoding::DELTA_BINARY_PACKED)
+//! );
+//! assert_eq!(
+//!     props.encoding(&ColumnPath::from("col2")),
+//!     Some(Encoding::PLAIN)
+//! );
+//! ```
+
+use std::{collections::HashMap, rc::Rc};
+
+use crate::basic::{Compression, Encoding};
+use crate::schema::types::ColumnPath;
+
+const DEFAULT_PAGE_SIZE: usize = 1024 * 1024;
+const DEFAULT_WRITE_BATCH_SIZE: usize = 1024;
+const DEFAULT_WRITER_VERSION: WriterVersion = WriterVersion::PARQUET_1_0;
+const DEFAULT_COMPRESSION: Compression = Compression::UNCOMPRESSED;
+const DEFAULT_DICTIONARY_ENABLED: bool = true;
+const DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT: usize = DEFAULT_PAGE_SIZE;
+const DEFAULT_STATISTICS_ENABLED: bool = true;
+const DEFAULT_MAX_STATISTICS_SIZE: usize = 4096;
+const DEFAULT_MAX_ROW_GROUP_SIZE: usize = 128 * 1024 * 1024;
+const DEFAULT_CREATED_BY: &str = env!("PARQUET_CREATED_BY");
+
+/// Parquet writer version.
+///
+/// Basic constant, which is not part of the Thrift definition.
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum WriterVersion {
+    PARQUET_1_0,
+    PARQUET_2_0,
+}
+
+impl WriterVersion {
+    /// Returns writer version as `i32`.
+    pub fn as_num(&self) -> i32 {
+        match self {
+            WriterVersion::PARQUET_1_0 => 1,
+            WriterVersion::PARQUET_2_0 => 2,
+        }
+    }
+}
+
+/// Reference counted writer properties.
+pub type WriterPropertiesPtr = Rc<WriterProperties>;
+
+/// Writer properties.
+///
+/// It is created as an immutable data structure, use [`WriterPropertiesBuilder`] to
+/// assemble the properties.
+#[derive(Debug, Clone)]
+pub struct WriterProperties {
+    data_pagesize_limit: usize,
+    dictionary_pagesize_limit: usize,
+    write_batch_size: usize,
+    max_row_group_size: usize,
+    writer_version: WriterVersion,
+    created_by: String,
+    default_column_properties: ColumnProperties,
+    column_properties: HashMap<ColumnPath, ColumnProperties>,
+}
+
+impl WriterProperties {
+    /// Returns builder for writer properties with default values.
+    pub fn builder() -> WriterPropertiesBuilder {
+        WriterPropertiesBuilder::with_defaults()
+    }
+
+    /// Returns data page size limit.
+    pub fn data_pagesize_limit(&self) -> usize {
+        self.data_pagesize_limit
+    }
+
+    /// Returns dictionary page size limit.
+    pub fn dictionary_pagesize_limit(&self) -> usize {
+        self.dictionary_pagesize_limit
+    }
+
+    /// Returns configured batch size for writes.
+    ///
+    /// When writing a batch of data, this setting allows to split it internally into
+    /// smaller batches so we can better estimate the size of a page currently being
+    /// written.
+    pub fn write_batch_size(&self) -> usize {
+        self.write_batch_size
+    }
+
+    /// Returns max size for a row group.
+    pub fn max_row_group_size(&self) -> usize {
+        self.max_row_group_size
+    }
+
+    /// Returns configured writer version.
+    pub fn writer_version(&self) -> WriterVersion {
+        self.writer_version
+    }
+
+    /// Returns `created_by` string.
+    pub fn created_by(&self) -> &str {
+        &self.created_by
+    }
+
+    /// Returns encoding for a data page, when dictionary encoding is enabled.
+    /// This is not configurable.
+    #[inline]
+    pub fn dictionary_data_page_encoding(&self) -> Encoding {
+        // PLAIN_DICTIONARY encoding is deprecated in writer version 1.
+        // Dictionary values are encoded using RLE_DICTIONARY encoding.
+        Encoding::RLE_DICTIONARY
+    }
+
+    /// Returns encoding for dictionary page, when dictionary encoding is enabled.
+    /// This is not configurable.
+    #[inline]
+    pub fn dictionary_page_encoding(&self) -> Encoding {
+        // PLAIN_DICTIONARY is deprecated in writer version 1.
+        // Dictionary is encoded using plain encoding.
+        Encoding::PLAIN
+    }
+
+    /// Returns encoding for a column, if set.
+    /// In case when dictionary is enabled, returns fallback encoding.
+    ///
+    /// If encoding is not set, then column writer will choose the best encoding
+    /// based on the column type.
+    pub fn encoding(&self, col: &ColumnPath) -> Option<Encoding> {
+        self.column_properties
+            .get(col)
+            .and_then(|c| c.encoding())
+            .or_else(|| self.default_column_properties.encoding())
+    }
+
+    /// Returns compression codec for a column.
+    pub fn compression(&self, col: &ColumnPath) -> Compression {
+        self.column_properties
+            .get(col)
+            .and_then(|c| c.compression())
+            .or_else(|| self.default_column_properties.compression())
+            .unwrap_or(DEFAULT_COMPRESSION)
+    }
+
+    /// Returns `true` if dictionary encoding is enabled for a column.
+    pub fn dictionary_enabled(&self, col: &ColumnPath) -> bool {
+        self.column_properties
+            .get(col)
+            .and_then(|c| c.dictionary_enabled())
+            .or_else(|| self.default_column_properties.dictionary_enabled())
+            .unwrap_or(DEFAULT_DICTIONARY_ENABLED)
+    }
+
+    /// Returns `true` if statistics are enabled for a column.
+    pub fn statistics_enabled(&self, col: &ColumnPath) -> bool {
+        self.column_properties
+            .get(col)
+            .and_then(|c| c.statistics_enabled())
+            .or_else(|| self.default_column_properties.statistics_enabled())
+            .unwrap_or(DEFAULT_STATISTICS_ENABLED)
+    }
+
+    /// Returns max size for statistics.
+    /// Only applicable if statistics are enabled.
+    pub fn max_statistics_size(&self, col: &ColumnPath) -> usize {
+        self.column_properties
+            .get(col)
+            .and_then(|c| c.max_statistics_size())
+            .or_else(|| self.default_column_properties.max_statistics_size())
+            .unwrap_or(DEFAULT_MAX_STATISTICS_SIZE)
+    }
+}
+
+/// Writer properties builder.
+pub struct WriterPropertiesBuilder {
+    data_pagesize_limit: usize,
+    dictionary_pagesize_limit: usize,
+    write_batch_size: usize,
+    max_row_group_size: usize,
+    writer_version: WriterVersion,
+    created_by: String,
+    default_column_properties: ColumnProperties,
+    column_properties: HashMap<ColumnPath, ColumnProperties>,
+}
+
+impl WriterPropertiesBuilder {
+    /// Returns default state of the builder.
+    fn with_defaults() -> Self {
+        Self {
+            data_pagesize_limit: DEFAULT_PAGE_SIZE,
+            dictionary_pagesize_limit: DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT,
+            write_batch_size: DEFAULT_WRITE_BATCH_SIZE,
+            max_row_group_size: DEFAULT_MAX_ROW_GROUP_SIZE,
+            writer_version: DEFAULT_WRITER_VERSION,
+            created_by: DEFAULT_CREATED_BY.to_string(),
+            default_column_properties: ColumnProperties::new(),
+            column_properties: HashMap::new(),
+        }
+    }
+
+    /// Finalizes the configuration and returns immutable writer properties struct.
+    pub fn build(self) -> WriterProperties {
+        WriterProperties {
+            data_pagesize_limit: self.data_pagesize_limit,
+            dictionary_pagesize_limit: self.dictionary_pagesize_limit,
+            write_batch_size: self.write_batch_size,
+            max_row_group_size: self.max_row_group_size,
+            writer_version: self.writer_version,
+            created_by: self.created_by,
+            default_column_properties: self.default_column_properties,
+            column_properties: self.column_properties,
+        }
+    }
+
+    // ----------------------------------------------------------------------
+    // Writer properies related to a file
+
+    /// Sets writer version.
+    pub fn set_writer_version(mut self, value: WriterVersion) -> Self {
+        self.writer_version = value;
+        self
+    }
+
+    /// Sets data page size limit.
+    pub fn set_data_pagesize_limit(mut self, value: usize) -> Self {
+        self.data_pagesize_limit = value;
+        self
+    }
+
+    /// Sets dictionary page size limit.
+    pub fn set_dictionary_pagesize_limit(mut self, value: usize) -> Self {
+        self.dictionary_pagesize_limit = value;
+        self
+    }
+
+    /// Sets write batch size.
+    pub fn set_write_batch_size(mut self, value: usize) -> Self {
+        self.write_batch_size = value;
+        self
+    }
+
+    /// Sets max size for a row group.
+    pub fn set_max_row_group_size(mut self, value: usize) -> Self {
+        self.max_row_group_size = value;
+        self
+    }
+
+    /// Sets "created by" property.
+    pub fn set_created_by(mut self, value: String) -> Self {
+        self.created_by = value;
+        self
+    }
+
+    // ----------------------------------------------------------------------
+    // Setters for any column (global)
+
+    /// Sets encoding for any column.
+    ///
+    /// If dictionary is not enabled, this is treated as a primary encoding for all
+    /// columns. In case when dictionary is enabled for any column, this value is
+    /// considered to be a fallback encoding for that column.
+    ///
+    /// Panics if user tries to set dictionary encoding here, regardless of dictinoary
+    /// encoding flag being set.
+    pub fn set_encoding(mut self, value: Encoding) -> Self {
+        self.default_column_properties.set_encoding(value);
+        self
+    }
+
+    /// Sets compression codec for any column.
+    pub fn set_compression(mut self, value: Compression) -> Self {
+        self.default_column_properties.set_compression(value);
+        self
+    }
+
+    /// Sets flag to enable/disable dictionary encoding for any column.
+    ///
+    /// Use this method to set dictionary encoding, instead of explicitly specifying
+    /// encoding in `set_encoding` method.
+    pub fn set_dictionary_enabled(mut self, value: bool) -> Self {
+        self.default_column_properties.set_dictionary_enabled(value);
+        self
+    }
+
+    /// Sets flag to enable/disable statistics for any column.
+    pub fn set_statistics_enabled(mut self, value: bool) -> Self {
+        self.default_column_properties.set_statistics_enabled(value);
+        self
+    }
+
+    /// Sets max statistics size for any column.
+    /// Applicable only if statistics are enabled.
+    pub fn set_max_statistics_size(mut self, value: usize) -> Self {
+        self.default_column_properties
+            .set_max_statistics_size(value);
+        self
+    }
+
+    // ----------------------------------------------------------------------
+    // Setters for a specific column
+
+    /// Helper method to get existing or new mutable reference of column properties.
+    #[inline]
+    fn get_mut_props(&mut self, col: ColumnPath) -> &mut ColumnProperties {
+        self.column_properties
+            .entry(col)
+            .or_insert(ColumnProperties::new())
+    }
+
+    /// Sets encoding for a column.
+    /// Takes precedence over globally defined settings.
+    ///
+    /// If dictionary is not enabled, this is treated as a primary encoding for this
+    /// column. In case when dictionary is enabled for this column, either through
+    /// global defaults or explicitly, this value is considered to be a fallback
+    /// encoding for this column.
+    ///
+    /// Panics if user tries to set dictionary encoding here, regardless of dictinoary
+    /// encoding flag being set.
+    pub fn set_column_encoding(mut self, col: ColumnPath, value: Encoding) -> Self {
+        self.get_mut_props(col).set_encoding(value);
+        self
+    }
+
+    /// Sets compression codec for a column.
+    /// Takes precedence over globally defined settings.
+    pub fn set_column_compression(mut self, col: ColumnPath, value: Compression) -> Self {
+        self.get_mut_props(col).set_compression(value);
+        self
+    }
+
+    /// Sets flag to enable/disable dictionary encoding for a column.
+    /// Takes precedence over globally defined settings.
+    pub fn set_column_dictionary_enabled(mut self, col: ColumnPath, value: bool) -> Self {
+        self.get_mut_props(col).set_dictionary_enabled(value);
+        self
+    }
+
+    /// Sets flag to enable/disable statistics for a column.
+    /// Takes precedence over globally defined settings.
+    pub fn set_column_statistics_enabled(mut self, col: ColumnPath, value: bool) -> Self {
+        self.get_mut_props(col).set_statistics_enabled(value);
+        self
+    }
+
+    /// Sets max size for statistics for a column.
+    /// Takes precedence over globally defined settings.
+    pub fn set_column_max_statistics_size(
+        mut self,
+        col: ColumnPath,
+        value: usize,
+    ) -> Self {
+        self.get_mut_props(col).set_max_statistics_size(value);
+        self
+    }
+}
+
+/// Container for column properties that can be changed as part of writer.
+///
+/// If a field is `None`, it means that no specific value has been set for this column,
+/// so some subsequent or default value must be used.
+#[derive(Debug, Clone, PartialEq)]
+struct ColumnProperties {
+    encoding: Option<Encoding>,
+    codec: Option<Compression>,
+    dictionary_enabled: Option<bool>,
+    statistics_enabled: Option<bool>,
+    max_statistics_size: Option<usize>,
+}
+
+impl ColumnProperties {
+    /// Initialise column properties with default values.
+    fn new() -> Self {
+        Self {
+            encoding: None,
+            codec: None,
+            dictionary_enabled: None,
+            statistics_enabled: None,
+            max_statistics_size: None,
+        }
+    }
+
+    /// Sets encoding for this column.
+    ///
+    /// If dictionary is not enabled, this is treated as a primary encoding for a column.
+    /// In case when dictionary is enabled for a column, this value is considered to
+    /// be a fallback encoding.
+    ///
+    /// Panics if user tries to set dictionary encoding here, regardless of dictinoary
+    /// encoding flag being set. Use `set_dictionary_enabled` method to enable dictionary
+    /// for a column.
+    fn set_encoding(&mut self, value: Encoding) {
+        if value == Encoding::PLAIN_DICTIONARY || value == Encoding::RLE_DICTIONARY {
+            panic!("Dictionary encoding can not be used as fallback encoding");
+        }
+        self.encoding = Some(value);
+    }
+
+    /// Sets compression codec for this column.
+    fn set_compression(&mut self, value: Compression) {
+        self.codec = Some(value);
+    }
+
+    /// Sets whether or not dictionary encoding is enabled for this column.
+    fn set_dictionary_enabled(&mut self, enabled: bool) {
+        self.dictionary_enabled = Some(enabled);
+    }
+
+    /// Sets whether or not statistics are enabled for this column.
+    fn set_statistics_enabled(&mut self, enabled: bool) {
+        self.statistics_enabled = Some(enabled);
+    }
+
+    /// Sets max size for statistics for this column.
+    fn set_max_statistics_size(&mut self, value: usize) {
+        self.max_statistics_size = Some(value);
+    }
+
+    /// Returns optional encoding for this column.
+    fn encoding(&self) -> Option<Encoding> {
+        self.encoding
+    }
+
+    /// Returns optional compression codec for this column.
+    fn compression(&self) -> Option<Compression> {
+        self.codec
+    }
+
+    /// Returns `Some(true)` if dictionary encoding is enabled for this column, if
+    /// disabled then returns `Some(false)`. If result is `None`, then no setting has
+    /// been provided.
+    fn dictionary_enabled(&self) -> Option<bool> {
+        self.dictionary_enabled
+    }
+
+    /// Returns `Some(true)` if statistics are enabled for this column, if disabled then
+    /// returns `Some(false)`. If result is `None`, then no setting has been provided.
+    fn statistics_enabled(&self) -> Option<bool> {
+        self.statistics_enabled
+    }
+
+    /// Returns optional max size in bytes for statistics.
+    fn max_statistics_size(&self) -> Option<usize> {
+        self.max_statistics_size
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_writer_version() {
+        assert_eq!(WriterVersion::PARQUET_1_0.as_num(), 1);
+        assert_eq!(WriterVersion::PARQUET_2_0.as_num(), 2);
+    }
+
+    #[test]
+    fn test_writer_properties_default_settings() {
+        let props = WriterProperties::builder().build();
+        assert_eq!(props.data_pagesize_limit(), DEFAULT_PAGE_SIZE);
+        assert_eq!(
+            props.dictionary_pagesize_limit(),
+            DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT
+        );
+        assert_eq!(props.write_batch_size(), DEFAULT_WRITE_BATCH_SIZE);
+        assert_eq!(props.max_row_group_size(), DEFAULT_MAX_ROW_GROUP_SIZE);
+        assert_eq!(props.writer_version(), DEFAULT_WRITER_VERSION);
+        assert_eq!(props.created_by(), DEFAULT_CREATED_BY);
+        assert_eq!(props.encoding(&ColumnPath::from("col")), None);
+        assert_eq!(
+            props.compression(&ColumnPath::from("col")),
+            DEFAULT_COMPRESSION
+        );
+        assert_eq!(
+            props.dictionary_enabled(&ColumnPath::from("col")),
+            DEFAULT_DICTIONARY_ENABLED
+        );
+        assert_eq!(
+            props.statistics_enabled(&ColumnPath::from("col")),
+            DEFAULT_STATISTICS_ENABLED
+        );
+        assert_eq!(
+            props.max_statistics_size(&ColumnPath::from("col")),
+            DEFAULT_MAX_STATISTICS_SIZE
+        );
+    }
+
+    #[test]
+    fn test_writer_properties_dictionary_encoding() {
+        // dictionary encoding is not configurable, and it should be the same for both
+        // writer version 1 and 2.
+        for version in vec![WriterVersion::PARQUET_1_0, WriterVersion::PARQUET_2_0] {
+            let props = WriterProperties::builder()
+                .set_writer_version(version)
+                .build();
+            assert_eq!(props.dictionary_page_encoding(), Encoding::PLAIN);
+            assert_eq!(
+                props.dictionary_data_page_encoding(),
+                Encoding::RLE_DICTIONARY
+            );
+        }
+    }
+
+    #[test]
+    #[should_panic(expected = "Dictionary encoding can not be used as fallback encoding")]
+    fn test_writer_properties_panic_when_plain_dictionary_is_fallback() {
+        // Should panic when user specifies dictionary encoding as fallback encoding.
+        WriterProperties::builder()
+            .set_encoding(Encoding::PLAIN_DICTIONARY)
+            .build();
+    }
+
+    #[test]
+    #[should_panic(expected = "Dictionary encoding can not be used as fallback encoding")]
+    fn test_writer_properties_panic_when_rle_dictionary_is_fallback() {
+        // Should panic when user specifies dictionary encoding as fallback encoding.
+        WriterProperties::builder()
+            .set_encoding(Encoding::RLE_DICTIONARY)
+            .build();
+    }
+
+    #[test]
+    #[should_panic(expected = "Dictionary encoding can not be used as fallback encoding")]
+    fn test_writer_properties_panic_when_dictionary_is_enabled() {
+        WriterProperties::builder()
+            .set_dictionary_enabled(true)
+            .set_column_encoding(ColumnPath::from("col"), Encoding::RLE_DICTIONARY)
+            .build();
+    }
+
+    #[test]
+    #[should_panic(expected = "Dictionary encoding can not be used as fallback encoding")]
+    fn test_writer_properties_panic_when_dictionary_is_disabled() {
+        WriterProperties::builder()
+            .set_dictionary_enabled(false)
+            .set_column_encoding(ColumnPath::from("col"), Encoding::RLE_DICTIONARY)
+            .build();
+    }
+
+    #[test]
+    fn test_writer_properties_builder() {
+        let props = WriterProperties::builder()
+            // file settings
+            .set_writer_version(WriterVersion::PARQUET_2_0)
+            .set_data_pagesize_limit(10)
+            .set_dictionary_pagesize_limit(20)
+            .set_write_batch_size(30)
+            .set_max_row_group_size(40)
+            .set_created_by("default".to_owned())
+            // global column settings
+            .set_encoding(Encoding::DELTA_BINARY_PACKED)
+            .set_compression(Compression::GZIP)
+            .set_dictionary_enabled(false)
+            .set_statistics_enabled(false)
+            .set_max_statistics_size(50)
+            // specific column settings
+            .set_column_encoding(ColumnPath::from("col"), Encoding::RLE)
+            .set_column_compression(ColumnPath::from("col"), Compression::SNAPPY)
+            .set_column_dictionary_enabled(ColumnPath::from("col"), true)
+            .set_column_statistics_enabled(ColumnPath::from("col"), true)
+            .set_column_max_statistics_size(ColumnPath::from("col"), 123)
+            .build();
+
+        assert_eq!(props.writer_version(), WriterVersion::PARQUET_2_0);
+        assert_eq!(props.data_pagesize_limit(), 10);
+        assert_eq!(props.dictionary_pagesize_limit(), 20);
+        assert_eq!(props.write_batch_size(), 30);
+        assert_eq!(props.max_row_group_size(), 40);
+        assert_eq!(props.created_by(), "default");
+
+        assert_eq!(
+            props.encoding(&ColumnPath::from("a")),
+            Some(Encoding::DELTA_BINARY_PACKED)
+        );
+        assert_eq!(props.compression(&ColumnPath::from("a")), Compression::GZIP);
+        assert_eq!(props.dictionary_enabled(&ColumnPath::from("a")), false);
+        assert_eq!(props.statistics_enabled(&ColumnPath::from("a")), false);
+        assert_eq!(props.max_statistics_size(&ColumnPath::from("a")), 50);
+
+        assert_eq!(
+            props.encoding(&ColumnPath::from("col")),
+            Some(Encoding::RLE)
+        );
+        assert_eq!(
+            props.compression(&ColumnPath::from("col")),
+            Compression::SNAPPY
+        );
+        assert_eq!(props.dictionary_enabled(&ColumnPath::from("col")), true);
+        assert_eq!(props.statistics_enabled(&ColumnPath::from("col")), true);
+        assert_eq!(props.max_statistics_size(&ColumnPath::from("col")), 123);
+    }
+
+    #[test]
+    fn test_writer_properties_builder_partial_defaults() {
+        let props = WriterProperties::builder()
+            .set_encoding(Encoding::DELTA_BINARY_PACKED)
+            .set_compression(Compression::GZIP)
+            .set_column_encoding(ColumnPath::from("col"), Encoding::RLE)
+            .build();
+
+        assert_eq!(
+            props.encoding(&ColumnPath::from("col")),
+            Some(Encoding::RLE)
+        );
+        assert_eq!(
+            props.compression(&ColumnPath::from("col")),
+            Compression::GZIP
+        );
+        assert_eq!(
+            props.dictionary_enabled(&ColumnPath::from("col")),
+            DEFAULT_DICTIONARY_ENABLED
+        );
+    }
+}
diff --git a/rust/parquet/src/file/reader.rs b/rust/parquet/src/file/reader.rs
new file mode 100644
index 0000000000000..90d115590a42e
--- /dev/null
+++ b/rust/parquet/src/file/reader.rs
@@ -0,0 +1,928 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Contains file reader API and provides methods to access file metadata, row group
+//! readers to read individual column chunks, or access record iterator.
+
+use std::{
+    convert::TryFrom,
+    fs::File,
+    io::{BufReader, Cursor, Read, Seek, SeekFrom},
+    path::Path,
+    rc::Rc,
+};
+
+use byteorder::{ByteOrder, LittleEndian};
+use parquet_format::{
+    ColumnOrder as TColumnOrder, FileMetaData as TFileMetaData, PageHeader, PageType,
+};
+use thrift::protocol::TCompactInputProtocol;
+
+use crate::basic::{ColumnOrder, Compression, Encoding, Type};
+use crate::column::{
+    page::{Page, PageReader},
+    reader::{ColumnReader, ColumnReaderImpl},
+};
+use crate::compression::{create_codec, Codec};
+use crate::errors::{ParquetError, Result};
+use crate::file::{metadata::*, statistics, FOOTER_SIZE, PARQUET_MAGIC};
+use crate::record::reader::RowIter;
+use crate::schema::types::{self, SchemaDescriptor, Type as SchemaType};
+use crate::util::{io::FileSource, memory::ByteBufferPtr};
+
+// ----------------------------------------------------------------------
+// APIs for file & row group readers
+
+/// Parquet file reader API. With this, user can get metadata information about the
+/// Parquet file, can get reader for each row group, and access record iterator.
+pub trait FileReader {
+    /// Get metadata information about this file.
+    fn metadata(&self) -> ParquetMetaDataPtr;
+
+    /// Get the total number of row groups for this file.
+    fn num_row_groups(&self) -> usize;
+
+    /// Get the `i`th row group reader. Note this doesn't do bound check.
+    fn get_row_group(&self, i: usize) -> Result<Box<RowGroupReader>>;
+
+    /// Get full iterator of `Row`s from a file (over all row groups).
+    ///
+    /// Iterator will automatically load the next row group to advance.
+    ///
+    /// Projected schema can be a subset of or equal to the file schema, when it is None,
+    /// full file schema is assumed.
+    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter>;
+}
+
+/// Parquet row group reader API. With this, user can get metadata information about the
+/// row group, as well as readers for each individual column chunk.
+pub trait RowGroupReader {
+    /// Get metadata information about this row group.
+    fn metadata(&self) -> RowGroupMetaDataPtr;
+
+    /// Get the total number of column chunks in this row group.
+    fn num_columns(&self) -> usize;
+
+    /// Get page reader for the `i`th column chunk.
+    fn get_column_page_reader(&self, i: usize) -> Result<Box<PageReader>>;
+
+    /// Get value reader for the `i`th column chunk.
+    fn get_column_reader(&self, i: usize) -> Result<ColumnReader>;
+
+    /// Get iterator of `Row`s from this row group.
+    ///
+    /// Projected schema can be a subset of or equal to the file schema, when it is None,
+    /// full file schema is assumed.
+    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter>;
+}
+
+// ----------------------------------------------------------------------
+// Serialized impl for file & row group readers
+
+/// Length should return the amount of bytes that implementor contains.
+/// It's mainly used to read the metadata, which is at the end of the source.
+pub trait Length {
+    /// Returns the amount of bytes of the inner source.
+    fn len(&self) -> u64;
+}
+
+/// TryClone tries to clone the type and should maintain the `Seek` position of the given
+/// instance.
+pub trait TryClone: Sized {
+    /// Clones the type returning a new instance or an error if it's not possible
+    /// to clone it.
+    fn try_clone(&self) -> Result<Self>;
+}
+
+impl Length for File {
+    fn len(&self) -> u64 {
+        self.metadata().map(|m| m.len()).unwrap_or(0u64)
+    }
+}
+
+impl TryClone for File {
+    fn try_clone(&self) -> Result<Self> {
+        self.try_clone().map_err(|e| e.into())
+    }
+}
+
+impl<'a> Length for Cursor<&'a [u8]> {
+    fn len(&self) -> u64 {
+        self.get_ref().len() as u64
+    }
+}
+
+impl<'a> TryClone for Cursor<&'a [u8]> {
+    fn try_clone(&self) -> Result<Self> {
+        Ok(self.clone())
+    }
+}
+
+/// ParquetReader is the interface which needs to be fulfilled to be able to parse a
+/// parquet source.
+pub trait ParquetReader: Read + Seek + Length + TryClone {}
+impl<T: Read + Seek + Length + TryClone> ParquetReader for T {}
+
+/// A serialized implementation for Parquet [`FileReader`].
+pub struct SerializedFileReader<R: ParquetReader> {
+    buf: BufReader<R>,
+    metadata: ParquetMetaDataPtr,
+}
+
+impl<R: ParquetReader> SerializedFileReader<R> {
+    /// Creates file reader from a Parquet file.
+    /// Returns error if Parquet file does not exist or is corrupt.
+    pub fn new(reader: R) -> Result<Self> {
+        let mut buf = BufReader::new(reader);
+        let metadata = Self::parse_metadata(&mut buf)?;
+        Ok(Self {
+            buf,
+            metadata: Rc::new(metadata),
+        })
+    }
+
+    // Layout of Parquet file
+    // +---------------------------+---+-----+
+    // |      Rest of file         | B |  A  |
+    // +---------------------------+---+-----+
+    // where A: parquet footer, B: parquet metadata.
+    //
+    fn parse_metadata(buf: &mut BufReader<R>) -> Result<ParquetMetaData> {
+        let file_size = buf.get_ref().len();
+        if file_size < (FOOTER_SIZE as u64) {
+            return Err(general_err!(
+                "Invalid Parquet file. Size is smaller than footer"
+            ));
+        }
+        let mut footer_buffer: [u8; FOOTER_SIZE] = [0; FOOTER_SIZE];
+        buf.seek(SeekFrom::End(-(FOOTER_SIZE as i64)))?;
+        buf.read_exact(&mut footer_buffer)?;
+        if footer_buffer[4..] != PARQUET_MAGIC {
+            return Err(general_err!("Invalid Parquet file. Corrupt footer"));
+        }
+        let metadata_len = LittleEndian::read_i32(&footer_buffer[0..4]) as i64;
+        if metadata_len < 0 {
+            return Err(general_err!(
+                "Invalid Parquet file. Metadata length is less than zero ({})",
+                metadata_len
+            ));
+        }
+        let metadata_start: i64 = file_size as i64 - FOOTER_SIZE as i64 - metadata_len;
+        if metadata_start < 0 {
+            return Err(general_err!(
+                "Invalid Parquet file. Metadata start is less than zero ({})",
+                metadata_start
+            ));
+        }
+        buf.seek(SeekFrom::Start(metadata_start as u64))?;
+        let metadata_buf = buf.take(metadata_len as u64).into_inner();
+
+        // TODO: row group filtering
+        let mut prot = TCompactInputProtocol::new(metadata_buf);
+        let mut t_file_metadata: TFileMetaData =
+            TFileMetaData::read_from_in_protocol(&mut prot).map_err(|e| {
+                ParquetError::General(format!("Could not parse metadata: {}", e))
+            })?;
+        let schema = types::from_thrift(&mut t_file_metadata.schema)?;
+        let schema_descr = Rc::new(SchemaDescriptor::new(schema.clone()));
+        let mut row_groups = Vec::new();
+        for rg in t_file_metadata.row_groups {
+            row_groups.push(Rc::new(RowGroupMetaData::from_thrift(
+                schema_descr.clone(),
+                rg,
+            )?));
+        }
+        let column_orders =
+            Self::parse_column_orders(t_file_metadata.column_orders, &schema_descr);
+
+        let file_metadata = FileMetaData::new(
+            t_file_metadata.version,
+            t_file_metadata.num_rows,
+            t_file_metadata.created_by,
+            schema,
+            schema_descr,
+            column_orders,
+        );
+        Ok(ParquetMetaData::new(file_metadata, row_groups))
+    }
+
+    /// Parses column orders from Thrift definition.
+    /// If no column orders are defined, returns `None`.
+    fn parse_column_orders(
+        t_column_orders: Option<Vec<TColumnOrder>>,
+        schema_descr: &SchemaDescriptor,
+    ) -> Option<Vec<ColumnOrder>> {
+        match t_column_orders {
+            Some(orders) => {
+                // Should always be the case
+                assert_eq!(
+                    orders.len(),
+                    schema_descr.num_columns(),
+                    "Column order length mismatch"
+                );
+                let mut res = Vec::new();
+                for (i, column) in schema_descr.columns().iter().enumerate() {
+                    match orders[i] {
+                        TColumnOrder::TYPEORDER(_) => {
+                            let sort_order = ColumnOrder::get_sort_order(
+                                column.logical_type(),
+                                column.physical_type(),
+                            );
+                            res.push(ColumnOrder::TYPE_DEFINED_ORDER(sort_order));
+                        }
+                    }
+                }
+                Some(res)
+            }
+            None => None,
+        }
+    }
+}
+
+impl<R: 'static + ParquetReader> FileReader for SerializedFileReader<R> {
+    fn metadata(&self) -> ParquetMetaDataPtr {
+        self.metadata.clone()
+    }
+
+    fn num_row_groups(&self) -> usize {
+        self.metadata.num_row_groups()
+    }
+
+    fn get_row_group(&self, i: usize) -> Result<Box<RowGroupReader>> {
+        let row_group_metadata = self.metadata.row_group(i);
+        // Row groups should be processed sequentially.
+        let f = self.buf.get_ref().try_clone()?;
+        Ok(Box::new(SerializedRowGroupReader::new(
+            f,
+            row_group_metadata,
+        )))
+    }
+
+    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter> {
+        RowIter::from_file(projection, self)
+    }
+}
+
+impl TryFrom<File> for SerializedFileReader<File> {
+    type Error = ParquetError;
+
+    fn try_from(file: File) -> Result<Self> {
+        Self::new(file)
+    }
+}
+
+impl<'a> TryFrom<&'a Path> for SerializedFileReader<File> {
+    type Error = ParquetError;
+
+    fn try_from(path: &Path) -> Result<Self> {
+        let file = File::open(path)?;
+        Self::try_from(file)
+    }
+}
+
+impl TryFrom<String> for SerializedFileReader<File> {
+    type Error = ParquetError;
+
+    fn try_from(path: String) -> Result<Self> {
+        Self::try_from(Path::new(&path))
+    }
+}
+
+impl<'a> TryFrom<&'a str> for SerializedFileReader<File> {
+    type Error = ParquetError;
+
+    fn try_from(path: &str) -> Result<Self> {
+        Self::try_from(Path::new(&path))
+    }
+}
+
+/// A serialized implementation for Parquet [`RowGroupReader`].
+pub struct SerializedRowGroupReader<R: ParquetReader> {
+    buf: BufReader<R>,
+    metadata: RowGroupMetaDataPtr,
+}
+
+impl<R: 'static + ParquetReader> SerializedRowGroupReader<R> {
+    /// Creates new row group reader from a file and row group metadata.
+    fn new(file: R, metadata: RowGroupMetaDataPtr) -> Self {
+        let buf = BufReader::new(file);
+        Self { buf, metadata }
+    }
+}
+
+impl<R: 'static + ParquetReader> RowGroupReader for SerializedRowGroupReader<R> {
+    fn metadata(&self) -> RowGroupMetaDataPtr {
+        self.metadata.clone()
+    }
+
+    fn num_columns(&self) -> usize {
+        self.metadata.num_columns()
+    }
+
+    // TODO: fix PARQUET-816
+    fn get_column_page_reader(&self, i: usize) -> Result<Box<PageReader>> {
+        let col = self.metadata.column(i);
+        let mut col_start = col.data_page_offset();
+        if col.has_dictionary_page() {
+            col_start = col.dictionary_page_offset().unwrap();
+        }
+        let col_length = col.compressed_size();
+        let file_chunk =
+            FileSource::new(self.buf.get_ref(), col_start as u64, col_length as usize);
+        let page_reader = SerializedPageReader::new(
+            file_chunk,
+            col.num_values(),
+            col.compression(),
+            col.column_descr().physical_type(),
+        )?;
+        Ok(Box::new(page_reader))
+    }
+
+    fn get_column_reader(&self, i: usize) -> Result<ColumnReader> {
+        let schema_descr = self.metadata.schema_descr();
+        let col_descr = schema_descr.column(i);
+        let col_page_reader = self.get_column_page_reader(i)?;
+        let col_reader = match col_descr.physical_type() {
+            Type::BOOLEAN => ColumnReader::BoolColumnReader(ColumnReaderImpl::new(
+                col_descr,
+                col_page_reader,
+            )),
+            Type::INT32 => ColumnReader::Int32ColumnReader(ColumnReaderImpl::new(
+                col_descr,
+                col_page_reader,
+            )),
+            Type::INT64 => ColumnReader::Int64ColumnReader(ColumnReaderImpl::new(
+                col_descr,
+                col_page_reader,
+            )),
+            Type::INT96 => ColumnReader::Int96ColumnReader(ColumnReaderImpl::new(
+                col_descr,
+                col_page_reader,
+            )),
+            Type::FLOAT => ColumnReader::FloatColumnReader(ColumnReaderImpl::new(
+                col_descr,
+                col_page_reader,
+            )),
+            Type::DOUBLE => ColumnReader::DoubleColumnReader(ColumnReaderImpl::new(
+                col_descr,
+                col_page_reader,
+            )),
+            Type::BYTE_ARRAY => ColumnReader::ByteArrayColumnReader(
+                ColumnReaderImpl::new(col_descr, col_page_reader),
+            ),
+            Type::FIXED_LEN_BYTE_ARRAY => ColumnReader::FixedLenByteArrayColumnReader(
+                ColumnReaderImpl::new(col_descr, col_page_reader),
+            ),
+        };
+        Ok(col_reader)
+    }
+
+    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter> {
+        RowIter::from_row_group(projection, self)
+    }
+}
+
+/// A serialized implementation for Parquet [`PageReader`].
+pub struct SerializedPageReader<T: Read> {
+    // The file source buffer which references exactly the bytes for the column trunk
+    // to be read by this page reader.
+    buf: T,
+
+    // The compression codec for this column chunk. Only set for non-PLAIN codec.
+    decompressor: Option<Box<Codec>>,
+
+    // The number of values we have seen so far.
+    seen_num_values: i64,
+
+    // The number of total values in this column chunk.
+    total_num_values: i64,
+
+    // Column chunk type.
+    physical_type: Type,
+}
+
+impl<T: Read> SerializedPageReader<T> {
+    /// Creates a new serialized page reader from file source.
+    pub fn new(
+        buf: T,
+        total_num_values: i64,
+        compression: Compression,
+        physical_type: Type,
+    ) -> Result<Self> {
+        let decompressor = create_codec(compression)?;
+        let result = Self {
+            buf,
+            total_num_values,
+            seen_num_values: 0,
+            decompressor,
+            physical_type,
+        };
+        Ok(result)
+    }
+
+    /// Reads Page header from Thrift.
+    fn read_page_header(&mut self) -> Result<PageHeader> {
+        let mut prot = TCompactInputProtocol::new(&mut self.buf);
+        let page_header = PageHeader::read_from_in_protocol(&mut prot)?;
+        Ok(page_header)
+    }
+}
+
+impl<T: Read> PageReader for SerializedPageReader<T> {
+    fn get_next_page(&mut self) -> Result<Option<Page>> {
+        while self.seen_num_values < self.total_num_values {
+            let page_header = self.read_page_header()?;
+
+            // When processing data page v2, depending on enabled compression for the
+            // page, we should account for uncompressed data ('offset') of
+            // repetition and definition levels.
+            //
+            // We always use 0 offset for other pages other than v2, `true` flag means
+            // that compression will be applied if decompressor is defined
+            let mut offset: usize = 0;
+            let mut can_decompress = true;
+
+            if let Some(ref header_v2) = page_header.data_page_header_v2 {
+                offset = (header_v2.definition_levels_byte_length
+                    + header_v2.repetition_levels_byte_length)
+                    as usize;
+                // When is_compressed flag is missing the page is considered compressed
+                can_decompress = header_v2.is_compressed.unwrap_or(true);
+            }
+
+            let compressed_len = page_header.compressed_page_size as usize - offset;
+            let uncompressed_len = page_header.uncompressed_page_size as usize - offset;
+            // We still need to read all bytes from buffered stream
+            let mut buffer = vec![0; offset + compressed_len];
+            self.buf.read_exact(&mut buffer)?;
+
+            // TODO: page header could be huge because of statistics. We should set a
+            // maximum page header size and abort if that is exceeded.
+            if let Some(decompressor) = self.decompressor.as_mut() {
+                if can_decompress {
+                    let mut decompressed_buffer = Vec::with_capacity(uncompressed_len);
+                    let decompressed_size = decompressor
+                        .decompress(&buffer[offset..], &mut decompressed_buffer)?;
+                    if decompressed_size != uncompressed_len {
+                        return Err(general_err!(
+                            "Actual decompressed size doesn't match the expected one ({} vs {})",
+                            decompressed_size,
+                            uncompressed_len
+                        ));
+                    }
+                    if offset == 0 {
+                        buffer = decompressed_buffer;
+                    } else {
+                        // Prepend saved offsets to the buffer
+                        buffer.truncate(offset);
+                        buffer.append(&mut decompressed_buffer);
+                    }
+                }
+            }
+
+            let result = match page_header.type_ {
+                PageType::DICTIONARY_PAGE => {
+                    assert!(page_header.dictionary_page_header.is_some());
+                    let dict_header =
+                        page_header.dictionary_page_header.as_ref().unwrap();
+                    let is_sorted = dict_header.is_sorted.unwrap_or(false);
+                    Page::DictionaryPage {
+                        buf: ByteBufferPtr::new(buffer),
+                        num_values: dict_header.num_values as u32,
+                        encoding: Encoding::from(dict_header.encoding),
+                        is_sorted,
+                    }
+                }
+                PageType::DATA_PAGE => {
+                    assert!(page_header.data_page_header.is_some());
+                    let header = page_header.data_page_header.unwrap();
+                    self.seen_num_values += header.num_values as i64;
+                    Page::DataPage {
+                        buf: ByteBufferPtr::new(buffer),
+                        num_values: header.num_values as u32,
+                        encoding: Encoding::from(header.encoding),
+                        def_level_encoding: Encoding::from(
+                            header.definition_level_encoding,
+                        ),
+                        rep_level_encoding: Encoding::from(
+                            header.repetition_level_encoding,
+                        ),
+                        statistics: statistics::from_thrift(
+                            self.physical_type,
+                            header.statistics,
+                        ),
+                    }
+                }
+                PageType::DATA_PAGE_V2 => {
+                    assert!(page_header.data_page_header_v2.is_some());
+                    let header = page_header.data_page_header_v2.unwrap();
+                    let is_compressed = header.is_compressed.unwrap_or(true);
+                    self.seen_num_values += header.num_values as i64;
+                    Page::DataPageV2 {
+                        buf: ByteBufferPtr::new(buffer),
+                        num_values: header.num_values as u32,
+                        encoding: Encoding::from(header.encoding),
+                        num_nulls: header.num_nulls as u32,
+                        num_rows: header.num_rows as u32,
+                        def_levels_byte_len: header.definition_levels_byte_length as u32,
+                        rep_levels_byte_len: header.repetition_levels_byte_length as u32,
+                        is_compressed,
+                        statistics: statistics::from_thrift(
+                            self.physical_type,
+                            header.statistics,
+                        ),
+                    }
+                }
+                _ => {
+                    // For unknown page type (e.g., INDEX_PAGE), skip and read next.
+                    continue;
+                }
+            };
+            return Ok(Some(result));
+        }
+
+        // We are at the end of this column chunk and no more page left. Return None.
+        Ok(None)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use parquet_format::TypeDefinedOrder;
+
+    use crate::basic::SortOrder;
+    use crate::util::test_common::{get_temp_file, get_test_file, get_test_path};
+
+    #[test]
+    fn test_file_reader_metadata_size_smaller_than_footer() {
+        let test_file = get_temp_file("corrupt-1.parquet", &[]);
+        let reader_result = SerializedFileReader::new(test_file);
+        assert!(reader_result.is_err());
+        assert_eq!(
+            reader_result.err().unwrap(),
+            general_err!("Invalid Parquet file. Size is smaller than footer")
+        );
+    }
+
+    // #[test]
+    // fn test_cursor_and_file_has_the_same_behaviour() {
+    //     let path = get_test_path("alltypes_plain.parquet");
+    //     let buffer = include_bytes!(path);
+    //     let cursor = Cursor::new(buffer.as_ref());
+
+    //     let read_from_file =
+    //         SerializedFileReader::new(File::open("testdata/alltypes_plain.parquet").
+    // unwrap())             .unwrap();
+    //     let read_from_cursor = SerializedFileReader::new(cursor).unwrap();
+
+    //     let file_iter = read_from_file.get_row_iter(None).unwrap();
+    //     let cursor_iter = read_from_cursor.get_row_iter(None).unwrap();
+
+    //     assert!(file_iter.eq(cursor_iter));
+    // }
+
+    #[test]
+    fn test_file_reader_metadata_corrupt_footer() {
+        let test_file = get_temp_file("corrupt-2.parquet", &[1, 2, 3, 4, 5, 6, 7, 8]);
+        let reader_result = SerializedFileReader::new(test_file);
+        assert!(reader_result.is_err());
+        assert_eq!(
+            reader_result.err().unwrap(),
+            general_err!("Invalid Parquet file. Corrupt footer")
+        );
+    }
+
+    #[test]
+    fn test_file_reader_metadata_invalid_length() {
+        let test_file =
+            get_temp_file("corrupt-3.parquet", &[0, 0, 0, 255, b'P', b'A', b'R', b'1']);
+        let reader_result = SerializedFileReader::new(test_file);
+        assert!(reader_result.is_err());
+        assert_eq!(
+            reader_result.err().unwrap(),
+            general_err!(
+                "Invalid Parquet file. Metadata length is less than zero (-16777216)"
+            )
+        );
+    }
+
+    #[test]
+    fn test_file_reader_metadata_invalid_start() {
+        let test_file =
+            get_temp_file("corrupt-4.parquet", &[255, 0, 0, 0, b'P', b'A', b'R', b'1']);
+        let reader_result = SerializedFileReader::new(test_file);
+        assert!(reader_result.is_err());
+        assert_eq!(
+            reader_result.err().unwrap(),
+            general_err!("Invalid Parquet file. Metadata start is less than zero (-255)")
+        );
+    }
+
+    #[test]
+    fn test_file_reader_column_orders_parse() {
+        // Define simple schema, we do not need to provide logical types.
+        let mut fields = vec![
+            Rc::new(
+                SchemaType::primitive_type_builder("col1", Type::INT32)
+                    .build()
+                    .unwrap(),
+            ),
+            Rc::new(
+                SchemaType::primitive_type_builder("col2", Type::FLOAT)
+                    .build()
+                    .unwrap(),
+            ),
+        ];
+        let schema = SchemaType::group_type_builder("schema")
+            .with_fields(&mut fields)
+            .build()
+            .unwrap();
+        let schema_descr = SchemaDescriptor::new(Rc::new(schema));
+
+        let t_column_orders = Some(vec![
+            TColumnOrder::TYPEORDER(TypeDefinedOrder::new()),
+            TColumnOrder::TYPEORDER(TypeDefinedOrder::new()),
+        ]);
+
+        assert_eq!(
+            SerializedFileReader::<File>::parse_column_orders(
+                t_column_orders,
+                &schema_descr
+            ),
+            Some(vec![
+                ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED),
+                ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED)
+            ])
+        );
+
+        // Test when no column orders are defined.
+        assert_eq!(
+            SerializedFileReader::<File>::parse_column_orders(None, &schema_descr),
+            None
+        );
+    }
+
+    #[test]
+    #[should_panic(expected = "Column order length mismatch")]
+    fn test_file_reader_column_orders_len_mismatch() {
+        let schema = SchemaType::group_type_builder("schema").build().unwrap();
+        let schema_descr = SchemaDescriptor::new(Rc::new(schema));
+
+        let t_column_orders =
+            Some(vec![TColumnOrder::TYPEORDER(TypeDefinedOrder::new())]);
+
+        SerializedFileReader::<File>::parse_column_orders(t_column_orders, &schema_descr);
+    }
+
+    #[test]
+    fn test_file_reader_try_from() {
+        // Valid file path
+        let test_file = get_test_file("alltypes_plain.parquet");
+        let test_path_buf = get_test_path("alltypes_plain.parquet");
+        let test_path = test_path_buf.as_path();
+        let test_path_str = test_path.to_str().unwrap();
+
+        let reader = SerializedFileReader::try_from(test_file);
+        assert!(reader.is_ok());
+
+        let reader = SerializedFileReader::try_from(test_path);
+        assert!(reader.is_ok());
+
+        let reader = SerializedFileReader::try_from(test_path_str);
+        assert!(reader.is_ok());
+
+        let reader = SerializedFileReader::try_from(test_path_str.to_string());
+        assert!(reader.is_ok());
+
+        // Invalid file path
+        let test_path = Path::new("invalid.parquet");
+        let test_path_str = test_path.to_str().unwrap();
+
+        let reader = SerializedFileReader::try_from(test_path);
+        assert!(reader.is_err());
+
+        let reader = SerializedFileReader::try_from(test_path_str);
+        assert!(reader.is_err());
+
+        let reader = SerializedFileReader::try_from(test_path_str.to_string());
+        assert!(reader.is_err());
+    }
+
+    #[test]
+    fn test_reuse_file_chunk() {
+        // This test covers the case of maintaining the correct start position in a file
+        // stream for each column reader after initializing and moving to the next one
+        // (without necessarily reading the entire column).
+        let test_file = get_test_file("alltypes_plain.parquet");
+        let reader = SerializedFileReader::new(test_file).unwrap();
+        let row_group = reader.get_row_group(0).unwrap();
+
+        let mut page_readers = Vec::new();
+        for i in 0..row_group.num_columns() {
+            page_readers.push(row_group.get_column_page_reader(i).unwrap());
+        }
+
+        // Now buffer each col reader, we do not expect any failures like:
+        // General("underlying Thrift error: end of file")
+        for mut page_reader in page_readers {
+            assert!(page_reader.get_next_page().is_ok());
+        }
+    }
+
+    #[test]
+    fn test_file_reader() {
+        let test_file = get_test_file("alltypes_plain.parquet");
+        let reader_result = SerializedFileReader::new(test_file);
+        assert!(reader_result.is_ok());
+        let reader = reader_result.unwrap();
+
+        // Test contents in Parquet metadata
+        let metadata = reader.metadata();
+        assert_eq!(metadata.num_row_groups(), 1);
+
+        // Test contents in file metadata
+        let file_metadata = metadata.file_metadata();
+        assert!(file_metadata.created_by().is_some());
+        assert_eq!(
+            file_metadata.created_by().as_ref().unwrap(),
+            "impala version 1.3.0-INTERNAL (build 8a48ddb1eff84592b3fc06bc6f51ec120e1fffc9)"
+        );
+        assert_eq!(file_metadata.num_rows(), 8);
+        assert_eq!(file_metadata.version(), 1);
+        assert_eq!(file_metadata.column_orders(), None);
+
+        // Test contents in row group metadata
+        let row_group_metadata = metadata.row_group(0);
+        assert_eq!(row_group_metadata.num_columns(), 11);
+        assert_eq!(row_group_metadata.num_rows(), 8);
+        assert_eq!(row_group_metadata.total_byte_size(), 671);
+        // Check each column order
+        for i in 0..row_group_metadata.num_columns() {
+            assert_eq!(file_metadata.column_order(i), ColumnOrder::UNDEFINED);
+        }
+
+        // Test row group reader
+        let row_group_reader_result = reader.get_row_group(0);
+        assert!(row_group_reader_result.is_ok());
+        let row_group_reader: Box<RowGroupReader> = row_group_reader_result.unwrap();
+        assert_eq!(
+            row_group_reader.num_columns(),
+            row_group_metadata.num_columns()
+        );
+        assert_eq!(
+            row_group_reader.metadata().total_byte_size(),
+            row_group_metadata.total_byte_size()
+        );
+
+        // Test page readers
+        // TODO: test for every column
+        let page_reader_0_result = row_group_reader.get_column_page_reader(0);
+        assert!(page_reader_0_result.is_ok());
+        let mut page_reader_0: Box<PageReader> = page_reader_0_result.unwrap();
+        let mut page_count = 0;
+        while let Ok(Some(page)) = page_reader_0.get_next_page() {
+            let is_expected_page = match page {
+                Page::DictionaryPage {
+                    buf,
+                    num_values,
+                    encoding,
+                    is_sorted,
+                } => {
+                    assert_eq!(buf.len(), 32);
+                    assert_eq!(num_values, 8);
+                    assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
+                    assert_eq!(is_sorted, false);
+                    true
+                }
+                Page::DataPage {
+                    buf,
+                    num_values,
+                    encoding,
+                    def_level_encoding,
+                    rep_level_encoding,
+                    statistics,
+                } => {
+                    assert_eq!(buf.len(), 11);
+                    assert_eq!(num_values, 8);
+                    assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
+                    assert_eq!(def_level_encoding, Encoding::RLE);
+                    assert_eq!(rep_level_encoding, Encoding::BIT_PACKED);
+                    assert!(statistics.is_none());
+                    true
+                }
+                _ => false,
+            };
+            assert!(is_expected_page);
+            page_count += 1;
+        }
+        assert_eq!(page_count, 2);
+    }
+
+    #[test]
+    fn test_file_reader_datapage_v2() {
+        let test_file = get_test_file("datapage_v2.snappy.parquet");
+        let reader_result = SerializedFileReader::new(test_file);
+        assert!(reader_result.is_ok());
+        let reader = reader_result.unwrap();
+
+        // Test contents in Parquet metadata
+        let metadata = reader.metadata();
+        assert_eq!(metadata.num_row_groups(), 1);
+
+        // Test contents in file metadata
+        let file_metadata = metadata.file_metadata();
+        assert!(file_metadata.created_by().is_some());
+        assert_eq!(
+            file_metadata.created_by().as_ref().unwrap(),
+            "parquet-mr version 1.8.1 (build 4aba4dae7bb0d4edbcf7923ae1339f28fd3f7fcf)"
+        );
+        assert_eq!(file_metadata.num_rows(), 5);
+        assert_eq!(file_metadata.version(), 1);
+        assert_eq!(file_metadata.column_orders(), None);
+
+        let row_group_metadata = metadata.row_group(0);
+
+        // Check each column order
+        for i in 0..row_group_metadata.num_columns() {
+            assert_eq!(file_metadata.column_order(i), ColumnOrder::UNDEFINED);
+        }
+
+        // Test row group reader
+        let row_group_reader_result = reader.get_row_group(0);
+        assert!(row_group_reader_result.is_ok());
+        let row_group_reader: Box<RowGroupReader> = row_group_reader_result.unwrap();
+        assert_eq!(
+            row_group_reader.num_columns(),
+            row_group_metadata.num_columns()
+        );
+        assert_eq!(
+            row_group_reader.metadata().total_byte_size(),
+            row_group_metadata.total_byte_size()
+        );
+
+        // Test page readers
+        // TODO: test for every column
+        let page_reader_0_result = row_group_reader.get_column_page_reader(0);
+        assert!(page_reader_0_result.is_ok());
+        let mut page_reader_0: Box<PageReader> = page_reader_0_result.unwrap();
+        let mut page_count = 0;
+        while let Ok(Some(page)) = page_reader_0.get_next_page() {
+            let is_expected_page = match page {
+                Page::DictionaryPage {
+                    buf,
+                    num_values,
+                    encoding,
+                    is_sorted,
+                } => {
+                    assert_eq!(buf.len(), 7);
+                    assert_eq!(num_values, 1);
+                    assert_eq!(encoding, Encoding::PLAIN);
+                    assert_eq!(is_sorted, false);
+                    true
+                }
+                Page::DataPageV2 {
+                    buf,
+                    num_values,
+                    encoding,
+                    num_nulls,
+                    num_rows,
+                    def_levels_byte_len,
+                    rep_levels_byte_len,
+                    is_compressed,
+                    statistics,
+                } => {
+                    assert_eq!(buf.len(), 4);
+                    assert_eq!(num_values, 5);
+                    assert_eq!(encoding, Encoding::RLE_DICTIONARY);
+                    assert_eq!(num_nulls, 1);
+                    assert_eq!(num_rows, 5);
+                    assert_eq!(def_levels_byte_len, 2);
+                    assert_eq!(rep_levels_byte_len, 0);
+                    assert_eq!(is_compressed, true);
+                    assert!(statistics.is_some());
+                    true
+                }
+                _ => false,
+            };
+            assert!(is_expected_page);
+            page_count += 1;
+        }
+        assert_eq!(page_count, 2);
+    }
+}
diff --git a/rust/parquet/src/file/statistics.rs b/rust/parquet/src/file/statistics.rs
new file mode 100644
index 0000000000000..27bbcb68a9033
--- /dev/null
+++ b/rust/parquet/src/file/statistics.rs
@@ -0,0 +1,705 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Contains definitions for working with Parquet statistics.
+//!
+//! Though some common methods are available on enum, use pattern match to extract
+//! actual min and max values from statistics, see below:
+//!
+//! ```rust
+//! use parquet::file::statistics::Statistics;
+//!
+//! let stats = Statistics::int32(Some(1), Some(10), None, 3, true);
+//! assert_eq!(stats.null_count(), 3);
+//! assert!(stats.has_min_max_set());
+//! assert!(stats.is_min_max_deprecated());
+//!
+//! match stats {
+//!     Statistics::Int32(ref typed) => {
+//!         assert_eq!(*typed.min(), 1);
+//!         assert_eq!(*typed.max(), 10);
+//!     }
+//!     _ => {}
+//! }
+//! ```
+
+use std::{cmp, fmt};
+
+use byteorder::{ByteOrder, LittleEndian};
+use parquet_format::Statistics as TStatistics;
+
+use crate::basic::Type;
+use crate::data_type::*;
+
+// Macro to generate methods create Statistics.
+macro_rules! statistics_new_func {
+    ($func:ident, $vtype:ty, $stat:ident) => {
+        pub fn $func(
+            min: $vtype,
+            max: $vtype,
+            distinct: Option<u64>,
+            nulls: u64,
+            is_deprecated: bool,
+        ) -> Self {
+            Statistics::$stat(TypedStatistics::new(
+                min,
+                max,
+                distinct,
+                nulls,
+                is_deprecated,
+            ))
+        }
+    };
+}
+
+// Macro to generate getter functions for Statistics.
+macro_rules! statistics_enum_func {
+    ($self:ident, $func:ident) => {{
+        match *$self {
+            Statistics::Boolean(ref typed) => typed.$func(),
+            Statistics::Int32(ref typed) => typed.$func(),
+            Statistics::Int64(ref typed) => typed.$func(),
+            Statistics::Int96(ref typed) => typed.$func(),
+            Statistics::Float(ref typed) => typed.$func(),
+            Statistics::Double(ref typed) => typed.$func(),
+            Statistics::ByteArray(ref typed) => typed.$func(),
+            Statistics::FixedLenByteArray(ref typed) => typed.$func(),
+        }
+    }};
+}
+
+/// Converts Thrift definition into `Statistics`.
+pub fn from_thrift(
+    physical_type: Type,
+    thrift_stats: Option<TStatistics>,
+) -> Option<Statistics> {
+    match thrift_stats {
+        Some(stats) => {
+            // Number of nulls recorded, when it is not available, we just mark it as 0.
+            let null_count = stats.null_count.unwrap_or(0);
+            assert!(
+                null_count >= 0,
+                "Statistics null count is negative ({})",
+                null_count
+            );
+
+            // Generic null count.
+            let null_count = null_count as u64;
+            // Generic distinct count (count of distinct values occurring)
+            let distinct_count = stats.distinct_count.map(|value| value as u64);
+            // Whether or not statistics use deprecated min/max fields.
+            let old_format = stats.min_value.is_none() && stats.max_value.is_none();
+            // Generic min value as bytes.
+            let min = if old_format {
+                stats.min
+            } else {
+                stats.min_value
+            };
+            // Generic max value as bytes.
+            let max = if old_format {
+                stats.max
+            } else {
+                stats.max_value
+            };
+
+            // Values are encoded using PLAIN encoding definition, except that
+            // variable-length byte arrays do not include a length prefix.
+            //
+            // Instead of using actual decoder, we manually convert values.
+            let res = match physical_type {
+                Type::BOOLEAN => Statistics::boolean(
+                    min.map(|data| data[0] != 0),
+                    max.map(|data| data[0] != 0),
+                    distinct_count,
+                    null_count,
+                    old_format,
+                ),
+                Type::INT32 => Statistics::int32(
+                    min.map(|data| LittleEndian::read_i32(&data)),
+                    max.map(|data| LittleEndian::read_i32(&data)),
+                    distinct_count,
+                    null_count,
+                    old_format,
+                ),
+                Type::INT64 => Statistics::int64(
+                    min.map(|data| LittleEndian::read_i64(&data)),
+                    max.map(|data| LittleEndian::read_i64(&data)),
+                    distinct_count,
+                    null_count,
+                    old_format,
+                ),
+                Type::INT96 => {
+                    // INT96 statistics may not be correct, because comparison is signed
+                    // byte-wise, not actual timestamps. It is recommended to ignore
+                    // min/max statistics for INT96 columns.
+                    let min = min.map(|data| {
+                        assert_eq!(data.len(), 12);
+                        unsafe {
+                            let raw = ::std::slice::from_raw_parts(
+                                data.as_ptr() as *mut u32,
+                                3,
+                            );
+                            Int96::from(Vec::from(raw))
+                        }
+                    });
+                    let max = max.map(|data| {
+                        assert_eq!(data.len(), 12);
+                        unsafe {
+                            let raw = ::std::slice::from_raw_parts(
+                                data.as_ptr() as *mut u32,
+                                3,
+                            );
+                            Int96::from(Vec::from(raw))
+                        }
+                    });
+                    Statistics::int96(min, max, distinct_count, null_count, old_format)
+                }
+                Type::FLOAT => Statistics::float(
+                    min.map(|data| LittleEndian::read_f32(&data)),
+                    max.map(|data| LittleEndian::read_f32(&data)),
+                    distinct_count,
+                    null_count,
+                    old_format,
+                ),
+                Type::DOUBLE => Statistics::double(
+                    min.map(|data| LittleEndian::read_f64(&data)),
+                    max.map(|data| LittleEndian::read_f64(&data)),
+                    distinct_count,
+                    null_count,
+                    old_format,
+                ),
+                Type::BYTE_ARRAY => Statistics::byte_array(
+                    min.map(|data| ByteArray::from(data)),
+                    max.map(|data| ByteArray::from(data)),
+                    distinct_count,
+                    null_count,
+                    old_format,
+                ),
+                Type::FIXED_LEN_BYTE_ARRAY => Statistics::fixed_len_byte_array(
+                    min.map(|data| ByteArray::from(data)),
+                    max.map(|data| ByteArray::from(data)),
+                    distinct_count,
+                    null_count,
+                    old_format,
+                ),
+            };
+
+            Some(res)
+        }
+        None => None,
+    }
+}
+
+// Convert Statistics into Thrift definition.
+pub fn to_thrift(stats: Option<&Statistics>) -> Option<TStatistics> {
+    if stats.is_none() {
+        return None;
+    }
+
+    let stats = stats.unwrap();
+
+    let mut thrift_stats = TStatistics {
+        max: None,
+        min: None,
+        null_count: if stats.has_nulls() {
+            Some(stats.null_count() as i64)
+        } else {
+            None
+        },
+        distinct_count: stats.distinct_count().map(|value| value as i64),
+        max_value: None,
+        min_value: None,
+    };
+
+    // Get min/max if set.
+    let (min, max) = if stats.has_min_max_set() {
+        (
+            Some(stats.min_bytes().to_vec()),
+            Some(stats.max_bytes().to_vec()),
+        )
+    } else {
+        (None, None)
+    };
+
+    if stats.is_min_max_deprecated() {
+        thrift_stats.min = min;
+        thrift_stats.max = max;
+    } else {
+        thrift_stats.min_value = min;
+        thrift_stats.max_value = max;
+    }
+
+    Some(thrift_stats)
+}
+
+/// Statistics for a column chunk and data page.
+#[derive(Debug, PartialEq)]
+pub enum Statistics {
+    Boolean(TypedStatistics<BoolType>),
+    Int32(TypedStatistics<Int32Type>),
+    Int64(TypedStatistics<Int64Type>),
+    Int96(TypedStatistics<Int96Type>),
+    Float(TypedStatistics<FloatType>),
+    Double(TypedStatistics<DoubleType>),
+    ByteArray(TypedStatistics<ByteArrayType>),
+    FixedLenByteArray(TypedStatistics<FixedLenByteArrayType>),
+}
+
+impl Statistics {
+    statistics_new_func![boolean, Option<bool>, Boolean];
+
+    statistics_new_func![int32, Option<i32>, Int32];
+
+    statistics_new_func![int64, Option<i64>, Int64];
+
+    statistics_new_func![int96, Option<Int96>, Int96];
+
+    statistics_new_func![float, Option<f32>, Float];
+
+    statistics_new_func![double, Option<f64>, Double];
+
+    statistics_new_func![byte_array, Option<ByteArray>, ByteArray];
+
+    statistics_new_func![fixed_len_byte_array, Option<ByteArray>, FixedLenByteArray];
+
+    /// Returns `true` if statistics have old `min` and `max` fields set.
+    /// This means that the column order is likely to be undefined, which, for old files
+    /// could mean a signed sort order of values.
+    ///
+    /// Refer to [`ColumnOrder`](`::basic::ColumnOrder`) and
+    /// [`SortOrder`](`::basic::SortOrder`) for more information.
+    pub fn is_min_max_deprecated(&self) -> bool {
+        statistics_enum_func![self, is_min_max_deprecated]
+    }
+
+    /// Returns optional value of number of distinct values occurring.
+    /// When it is `None`, the value should be ignored.
+    pub fn distinct_count(&self) -> Option<u64> {
+        statistics_enum_func![self, distinct_count]
+    }
+
+    /// Returns number of null values for the column.
+    /// Note that this includes all nulls when column is part of the complex type.
+    pub fn null_count(&self) -> u64 {
+        statistics_enum_func![self, null_count]
+    }
+
+    /// Returns `true` if statistics collected any null values, `false` otherwise.
+    pub fn has_nulls(&self) -> bool {
+        self.null_count() > 0
+    }
+
+    /// Returns `true` if min value and max value are set.
+    /// Normally both min/max values will be set to `Some(value)` or `None`.
+    pub fn has_min_max_set(&self) -> bool {
+        statistics_enum_func![self, has_min_max_set]
+    }
+
+    /// Returns slice of bytes that represent min value.
+    /// Panics if min value is not set.
+    pub fn min_bytes(&self) -> &[u8] {
+        statistics_enum_func![self, min_bytes]
+    }
+
+    /// Returns slice of bytes that represent max value.
+    /// Panics if max value is not set.
+    pub fn max_bytes(&self) -> &[u8] {
+        statistics_enum_func![self, max_bytes]
+    }
+
+    /// Returns physical type associated with statistics.
+    pub fn physical_type(&self) -> Type {
+        match self {
+            Statistics::Boolean(_) => Type::BOOLEAN,
+            Statistics::Int32(_) => Type::INT32,
+            Statistics::Int64(_) => Type::INT64,
+            Statistics::Int96(_) => Type::INT96,
+            Statistics::Float(_) => Type::FLOAT,
+            Statistics::Double(_) => Type::DOUBLE,
+            Statistics::ByteArray(_) => Type::BYTE_ARRAY,
+            Statistics::FixedLenByteArray(_) => Type::FIXED_LEN_BYTE_ARRAY,
+        }
+    }
+}
+
+impl fmt::Display for Statistics {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Statistics::Boolean(typed) => write!(f, "{}", typed),
+            Statistics::Int32(typed) => write!(f, "{}", typed),
+            Statistics::Int64(typed) => write!(f, "{}", typed),
+            Statistics::Int96(typed) => write!(f, "{}", typed),
+            Statistics::Float(typed) => write!(f, "{}", typed),
+            Statistics::Double(typed) => write!(f, "{}", typed),
+            Statistics::ByteArray(typed) => write!(f, "{}", typed),
+            Statistics::FixedLenByteArray(typed) => write!(f, "{}", typed),
+        }
+    }
+}
+
+/// Typed implementation for [`Statistics`].
+pub struct TypedStatistics<T: DataType> {
+    min: Option<T::T>,
+    max: Option<T::T>,
+    // Distinct count could be omitted in some cases
+    distinct_count: Option<u64>,
+    null_count: u64,
+    is_min_max_deprecated: bool,
+}
+
+impl<T: DataType> TypedStatistics<T> {
+    /// Creates new typed statistics.
+    pub fn new(
+        min: Option<T::T>,
+        max: Option<T::T>,
+        distinct_count: Option<u64>,
+        null_count: u64,
+        is_min_max_deprecated: bool,
+    ) -> Self {
+        Self {
+            min,
+            max,
+            distinct_count,
+            null_count,
+            is_min_max_deprecated,
+        }
+    }
+
+    /// Returns min value of the statistics.
+    ///
+    /// Panics if min value is not set, e.g. all values are `null`.
+    /// Use `has_min_max_set` method to check that.
+    pub fn min(&self) -> &T::T {
+        self.min.as_ref().unwrap()
+    }
+
+    /// Returns max value of the statistics.
+    ///
+    /// Panics if max value is not set, e.g. all values are `null`.
+    /// Use `has_min_max_set` method to check that.
+    pub fn max(&self) -> &T::T {
+        self.max.as_ref().unwrap()
+    }
+
+    /// Returns min value as bytes of the statistics.
+    ///
+    /// Panics if min value is not set, use `has_min_max_set` method to check
+    /// if values are set.
+    pub fn min_bytes(&self) -> &[u8] {
+        self.min().as_bytes()
+    }
+
+    /// Returns max value as bytes of the statistics.
+    ///
+    /// Panics if max value is not set, use `has_min_max_set` method to check
+    /// if values are set.
+    pub fn max_bytes(&self) -> &[u8] {
+        self.max().as_bytes()
+    }
+
+    /// Whether or not min and max values are set.
+    /// Normally both min/max values will be set to `Some(value)` or `None`.
+    fn has_min_max_set(&self) -> bool {
+        self.min.is_some() && self.max.is_some()
+    }
+
+    /// Returns optional value of number of distinct values occurring.
+    fn distinct_count(&self) -> Option<u64> {
+        self.distinct_count
+    }
+
+    /// Returns null count.
+    fn null_count(&self) -> u64 {
+        self.null_count
+    }
+
+    /// Returns `true` if statistics were created using old min/max fields.
+    fn is_min_max_deprecated(&self) -> bool {
+        self.is_min_max_deprecated
+    }
+}
+
+impl<T: DataType> fmt::Display for TypedStatistics<T> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{{")?;
+        write!(f, "min: ")?;
+        match self.min {
+            Some(ref value) => self.value_fmt(f, value)?,
+            None => write!(f, "N/A")?,
+        }
+        write!(f, ", max: ")?;
+        match self.max {
+            Some(ref value) => self.value_fmt(f, value)?,
+            None => write!(f, "N/A")?,
+        }
+        write!(f, ", distinct_count: ")?;
+        match self.distinct_count {
+            Some(value) => write!(f, "{}", value)?,
+            None => write!(f, "N/A")?,
+        }
+        write!(f, ", null_count: {}", self.null_count)?;
+        write!(f, ", min_max_deprecated: {}", self.is_min_max_deprecated)?;
+        write!(f, "}}")
+    }
+}
+
+impl<T: DataType> fmt::Debug for TypedStatistics<T> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "{{min: {:?}, max: {:?}, distinct_count: {:?}, null_count: {}, \
+             min_max_deprecated: {}}}",
+            self.min,
+            self.max,
+            self.distinct_count,
+            self.null_count,
+            self.is_min_max_deprecated
+        )
+    }
+}
+
+impl<T: DataType> cmp::PartialEq for TypedStatistics<T> {
+    fn eq(&self, other: &TypedStatistics<T>) -> bool {
+        self.min == other.min
+            && self.max == other.max
+            && self.distinct_count == other.distinct_count
+            && self.null_count == other.null_count
+            && self.is_min_max_deprecated == other.is_min_max_deprecated
+    }
+}
+
+/// Trait to provide a specific write format for values.
+/// For example, we should display vector slices for byte array types, and original
+/// values for other types.
+trait ValueDisplay<T: DataType> {
+    fn value_fmt(&self, f: &mut fmt::Formatter, value: &T::T) -> fmt::Result;
+}
+
+impl<T: DataType> ValueDisplay<T> for TypedStatistics<T> {
+    default fn value_fmt(&self, f: &mut fmt::Formatter, value: &T::T) -> fmt::Result {
+        write!(f, "{:?}", value)
+    }
+}
+
+impl ValueDisplay<Int96Type> for TypedStatistics<Int96Type> {
+    fn value_fmt(&self, f: &mut fmt::Formatter, value: &Int96) -> fmt::Result {
+        write!(f, "{:?}", value.data())
+    }
+}
+
+impl ValueDisplay<ByteArrayType> for TypedStatistics<ByteArrayType> {
+    fn value_fmt(&self, f: &mut fmt::Formatter, value: &ByteArray) -> fmt::Result {
+        write!(f, "{:?}", value.data())
+    }
+}
+
+impl ValueDisplay<FixedLenByteArrayType> for TypedStatistics<FixedLenByteArrayType> {
+    fn value_fmt(&self, f: &mut fmt::Formatter, value: &ByteArray) -> fmt::Result {
+        write!(f, "{:?}", value.data())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_statistics_min_max_bytes() {
+        let stats = Statistics::int32(Some(-123), Some(234), None, 1, false);
+        assert!(stats.has_min_max_set());
+        assert_eq!(stats.min_bytes(), (-123).as_bytes());
+        assert_eq!(stats.max_bytes(), 234.as_bytes());
+
+        let stats = Statistics::byte_array(
+            Some(ByteArray::from(vec![1, 2, 3])),
+            Some(ByteArray::from(vec![3, 4, 5])),
+            None,
+            1,
+            true,
+        );
+        assert!(stats.has_min_max_set());
+        assert_eq!(stats.min_bytes(), &[1, 2, 3]);
+        assert_eq!(stats.max_bytes(), &[3, 4, 5]);
+    }
+
+    #[test]
+    #[should_panic(expected = "Statistics null count is negative (-10)")]
+    fn test_statistics_negative_null_count() {
+        let thrift_stats = TStatistics {
+            max: None,
+            min: None,
+            null_count: Some(-10),
+            distinct_count: None,
+            max_value: None,
+            min_value: None,
+        };
+
+        from_thrift(Type::INT32, Some(thrift_stats));
+    }
+
+    #[test]
+    fn test_statistics_thrift_none() {
+        assert_eq!(from_thrift(Type::INT32, None), None);
+        assert_eq!(from_thrift(Type::BYTE_ARRAY, None), None);
+    }
+
+    #[test]
+    fn test_statistics_debug() {
+        let stats = Statistics::int32(Some(1), Some(12), None, 12, true);
+        assert_eq!(
+            format!("{:?}", stats),
+            "Int32({min: Some(1), max: Some(12), distinct_count: None, null_count: 12, \
+             min_max_deprecated: true})"
+        );
+
+        let stats = Statistics::int32(None, None, None, 7, false);
+        assert_eq!(
+            format!("{:?}", stats),
+            "Int32({min: None, max: None, distinct_count: None, null_count: 7, \
+             min_max_deprecated: false})"
+        )
+    }
+
+    #[test]
+    fn test_statistics_display() {
+        let stats = Statistics::int32(Some(1), Some(12), None, 12, true);
+        assert_eq!(
+            format!("{}", stats),
+            "{min: 1, max: 12, distinct_count: N/A, null_count: 12, min_max_deprecated: true}"
+        );
+
+        let stats = Statistics::int64(None, None, None, 7, false);
+        assert_eq!(
+            format!("{}", stats),
+            "{min: N/A, max: N/A, distinct_count: N/A, null_count: 7, min_max_deprecated: \
+             false}"
+        );
+
+        let stats = Statistics::int96(
+            Some(Int96::from(vec![1, 0, 0])),
+            Some(Int96::from(vec![2, 3, 4])),
+            None,
+            3,
+            true,
+        );
+        assert_eq!(
+            format!("{}", stats),
+            "{min: [1, 0, 0], max: [2, 3, 4], distinct_count: N/A, null_count: 3, \
+             min_max_deprecated: true}"
+        );
+
+        let stats = Statistics::byte_array(
+            Some(ByteArray::from(vec![1u8])),
+            Some(ByteArray::from(vec![2u8])),
+            Some(5),
+            7,
+            false,
+        );
+        assert_eq!(
+            format!("{}", stats),
+            "{min: [1], max: [2], distinct_count: 5, null_count: 7, min_max_deprecated: false}"
+        );
+    }
+
+    #[test]
+    fn test_statistics_partial_eq() {
+        let expected = Statistics::int32(Some(12), Some(45), None, 11, true);
+
+        assert!(Statistics::int32(Some(12), Some(45), None, 11, true) == expected);
+        assert!(Statistics::int32(Some(11), Some(45), None, 11, true) != expected);
+        assert!(Statistics::int32(Some(12), Some(44), None, 11, true) != expected);
+        assert!(Statistics::int32(Some(12), Some(45), None, 23, true) != expected);
+        assert!(Statistics::int32(Some(12), Some(45), None, 11, false) != expected);
+
+        assert!(
+            Statistics::int32(Some(12), Some(45), None, 11, false)
+                != Statistics::int64(Some(12), Some(45), None, 11, false)
+        );
+
+        assert!(
+            Statistics::boolean(Some(false), Some(true), None, 0, true)
+                != Statistics::double(Some(1.2), Some(4.5), None, 0, true)
+        );
+
+        assert!(
+            Statistics::byte_array(
+                Some(ByteArray::from(vec![1, 2, 3])),
+                Some(ByteArray::from(vec![1, 2, 3])),
+                None,
+                0,
+                true
+            ) != Statistics::fixed_len_byte_array(
+                Some(ByteArray::from(vec![1, 2, 3])),
+                Some(ByteArray::from(vec![1, 2, 3])),
+                None,
+                0,
+                true
+            )
+        );
+    }
+
+    #[test]
+    fn test_statistics_from_thrift() {
+        // Helper method to check statistics conversion.
+        fn check_stats(stats: Statistics) {
+            let tpe = stats.physical_type();
+            let thrift_stats = to_thrift(Some(&stats));
+            assert_eq!(from_thrift(tpe, thrift_stats), Some(stats));
+        }
+
+        check_stats(Statistics::boolean(Some(false), Some(true), None, 7, true));
+        check_stats(Statistics::boolean(Some(false), Some(true), None, 7, true));
+        check_stats(Statistics::boolean(Some(false), Some(true), None, 0, false));
+        check_stats(Statistics::boolean(Some(true), Some(true), None, 7, true));
+        check_stats(Statistics::boolean(Some(false), Some(false), None, 7, true));
+        check_stats(Statistics::boolean(None, None, None, 7, true));
+
+        check_stats(Statistics::int32(Some(-100), Some(500), None, 7, true));
+        check_stats(Statistics::int32(Some(-100), Some(500), None, 0, false));
+        check_stats(Statistics::int32(None, None, None, 7, true));
+
+        check_stats(Statistics::int64(Some(-100), Some(200), None, 7, true));
+        check_stats(Statistics::int64(Some(-100), Some(200), None, 0, false));
+        check_stats(Statistics::int64(None, None, None, 7, true));
+
+        check_stats(Statistics::float(Some(1.2), Some(3.4), None, 7, true));
+        check_stats(Statistics::float(Some(1.2), Some(3.4), None, 0, false));
+        check_stats(Statistics::float(None, None, None, 7, true));
+
+        check_stats(Statistics::double(Some(1.2), Some(3.4), None, 7, true));
+        check_stats(Statistics::double(Some(1.2), Some(3.4), None, 0, false));
+        check_stats(Statistics::double(None, None, None, 7, true));
+
+        check_stats(Statistics::byte_array(
+            Some(ByteArray::from(vec![1, 2, 3])),
+            Some(ByteArray::from(vec![3, 4, 5])),
+            None,
+            7,
+            true,
+        ));
+        check_stats(Statistics::byte_array(None, None, None, 7, true));
+
+        check_stats(Statistics::fixed_len_byte_array(
+            Some(ByteArray::from(vec![1, 2, 3])),
+            Some(ByteArray::from(vec![3, 4, 5])),
+            None,
+            7,
+            true,
+        ));
+        check_stats(Statistics::fixed_len_byte_array(None, None, None, 7, true));
+    }
+}
diff --git a/rust/parquet/src/file/writer.rs b/rust/parquet/src/file/writer.rs
new file mode 100644
index 0000000000000..a5cd78ad8d2ea
--- /dev/null
+++ b/rust/parquet/src/file/writer.rs
@@ -0,0 +1,955 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Contains file writer API, and provides methods to write row groups and columns by
+//! using row group writers and column writers respectively.
+
+use std::{
+    fs::File,
+    io::{Seek, SeekFrom, Write},
+    rc::Rc,
+};
+
+use byteorder::{ByteOrder, LittleEndian};
+use parquet_format as parquet;
+use thrift::protocol::{TCompactOutputProtocol, TOutputProtocol};
+
+use crate::basic::PageType;
+use crate::column::{
+    page::{CompressedPage, Page, PageWriteSpec, PageWriter},
+    writer::{get_column_writer, ColumnWriter},
+};
+use crate::errors::{ParquetError, Result};
+use crate::file::{
+    metadata::*, properties::WriterPropertiesPtr,
+    statistics::to_thrift as statistics_to_thrift, FOOTER_SIZE, PARQUET_MAGIC,
+};
+use crate::schema::types::{self, SchemaDescPtr, SchemaDescriptor, TypePtr};
+use crate::util::io::{FileSink, Position};
+
+// ----------------------------------------------------------------------
+// APIs for file & row group writers
+
+/// Parquet file writer API.
+/// Provides methods to write row groups sequentially.
+///
+/// The main workflow should be as following:
+/// - Create file writer, this will open a new file and potentially write some metadata.
+/// - Request a new row group writer by calling `next_row_group`.
+/// - Once finished writing row group, close row group writer by passing it into
+/// `close_row_group` method - this will finalise row group metadata and update metrics.
+/// - Write subsequent row groups, if necessary.
+/// - After all row groups have been written, close the file writer using `close` method.
+pub trait FileWriter {
+    /// Creates new row group from this file writer.
+    /// In case of IO error or Thrift error, returns `Err`.
+    ///
+    /// There is no limit on a number of row groups in a file; however, row groups have
+    /// to be written sequentially. Every time the next row group is requested, the
+    /// previous row group must be finalised and closed using `close_row_group` method.
+    fn next_row_group(&mut self) -> Result<Box<RowGroupWriter>>;
+
+    /// Finalises and closes row group that was created using `next_row_group` method.
+    /// After calling this method, the next row group is available for writes.
+    fn close_row_group(&mut self, row_group_writer: Box<RowGroupWriter>) -> Result<()>;
+
+    /// Closes and finalises file writer.
+    ///
+    /// All row groups must be appended before this method is called.
+    /// No writes are allowed after this point.
+    ///
+    /// Can be called multiple times. It is up to implementation to either result in
+    /// no-op, or return an `Err` for subsequent calls.
+    fn close(&mut self) -> Result<()>;
+}
+
+/// Parquet row group writer API.
+/// Provides methods to access column writers in an iterator-like fashion, order is
+/// guaranteed to match the order of schema leaves (column descriptors).
+///
+/// All columns should be written sequentially; the main workflow is:
+/// - Request the next column using `next_column` method - this will return `None` if no
+/// more columns are available to write.
+/// - Once done writing a column, close column writer with `close_column` method - this
+/// will finalise column chunk metadata and update row group metrics.
+/// - Once all columns have been written, close row group writer with `close` method -
+/// it will return row group metadata and is no-op on already closed row group.
+pub trait RowGroupWriter {
+    /// Returns the next column writer, if available; otherwise returns `None`.
+    /// In case of any IO error or Thrift error, or if row group writer has already been
+    /// closed returns `Err`.
+    ///
+    /// To request the next column writer, the previous one must be finalised and closed
+    /// using `close_column`.
+    fn next_column(&mut self) -> Result<Option<ColumnWriter>>;
+
+    /// Closes column writer that was created using `next_column` method.
+    /// This should be called before requesting the next column writer.
+    fn close_column(&mut self, column_writer: ColumnWriter) -> Result<()>;
+
+    /// Closes this row group writer and returns row group metadata.
+    /// After calling this method row group writer must not be used.
+    ///
+    /// It is recommended to call this method before requesting another row group, but it
+    /// will be closed automatically before returning a new row group.
+    ///
+    /// Can be called multiple times. In subsequent calls will result in no-op and return
+    /// already created row group metadata.
+    fn close(&mut self) -> Result<RowGroupMetaDataPtr>;
+}
+
+// ----------------------------------------------------------------------
+// Serialized impl for file & row group writers
+
+/// A serialized implementation for Parquet [`FileWriter`].
+/// See documentation on file writer for more information.
+pub struct SerializedFileWriter {
+    file: File,
+    schema: TypePtr,
+    descr: SchemaDescPtr,
+    props: WriterPropertiesPtr,
+    total_num_rows: u64,
+    row_groups: Vec<RowGroupMetaDataPtr>,
+    previous_writer_closed: bool,
+    is_closed: bool,
+}
+
+impl SerializedFileWriter {
+    /// Creates new file writer.
+    pub fn new(
+        mut file: File,
+        schema: TypePtr,
+        properties: WriterPropertiesPtr,
+    ) -> Result<Self> {
+        Self::start_file(&mut file)?;
+        Ok(Self {
+            file,
+            schema: schema.clone(),
+            descr: Rc::new(SchemaDescriptor::new(schema)),
+            props: properties,
+            total_num_rows: 0,
+            row_groups: Vec::new(),
+            previous_writer_closed: true,
+            is_closed: false,
+        })
+    }
+
+    /// Writes magic bytes at the beginning of the file.
+    fn start_file(file: &mut File) -> Result<()> {
+        file.write(&PARQUET_MAGIC)?;
+        Ok(())
+    }
+
+    /// Finalises active row group writer, otherwise no-op.
+    fn finalise_row_group_writer(
+        &mut self,
+        mut row_group_writer: Box<RowGroupWriter>,
+    ) -> Result<()> {
+        let row_group_metadata = row_group_writer.close()?;
+        self.row_groups.push(row_group_metadata);
+        Ok(())
+    }
+
+    /// Assembles and writes metadata at the end of the file.
+    fn write_metadata(&mut self) -> Result<()> {
+        let file_metadata = parquet::FileMetaData {
+            version: self.props.writer_version().as_num(),
+            schema: types::to_thrift(self.schema.as_ref())?,
+            num_rows: self.total_num_rows as i64,
+            row_groups: self
+                .row_groups
+                .as_slice()
+                .into_iter()
+                .map(|v| v.to_thrift())
+                .collect(),
+            key_value_metadata: None,
+            created_by: Some(self.props.created_by().to_owned()),
+            column_orders: None,
+        };
+
+        // Write file metadata
+        let start_pos = self.file.seek(SeekFrom::Current(0))?;
+        {
+            let mut protocol = TCompactOutputProtocol::new(&mut self.file);
+            file_metadata.write_to_out_protocol(&mut protocol)?;
+            protocol.flush()?;
+        }
+        let end_pos = self.file.seek(SeekFrom::Current(0))?;
+
+        // Write footer
+        let mut footer_buffer: [u8; FOOTER_SIZE] = [0; FOOTER_SIZE];
+        let metadata_len = (end_pos - start_pos) as i32;
+        LittleEndian::write_i32(&mut footer_buffer, metadata_len);
+        (&mut footer_buffer[4..]).write(&PARQUET_MAGIC)?;
+        self.file.write(&footer_buffer)?;
+        Ok(())
+    }
+
+    #[inline]
+    fn assert_closed(&self) -> Result<()> {
+        if self.is_closed {
+            Err(general_err!("File writer is closed"))
+        } else {
+            Ok(())
+        }
+    }
+
+    #[inline]
+    fn assert_previous_writer_closed(&self) -> Result<()> {
+        if !self.previous_writer_closed {
+            Err(general_err!("Previous row group writer was not closed"))
+        } else {
+            Ok(())
+        }
+    }
+}
+
+impl FileWriter for SerializedFileWriter {
+    #[inline]
+    fn next_row_group(&mut self) -> Result<Box<RowGroupWriter>> {
+        self.assert_closed()?;
+        self.assert_previous_writer_closed()?;
+        let row_group_writer = SerializedRowGroupWriter::new(
+            self.descr.clone(),
+            self.props.clone(),
+            &self.file,
+        );
+        self.previous_writer_closed = false;
+        Ok(Box::new(row_group_writer))
+    }
+
+    #[inline]
+    fn close_row_group(&mut self, row_group_writer: Box<RowGroupWriter>) -> Result<()> {
+        self.assert_closed()?;
+        let res = self.finalise_row_group_writer(row_group_writer);
+        self.previous_writer_closed = res.is_ok();
+        res
+    }
+
+    #[inline]
+    fn close(&mut self) -> Result<()> {
+        self.assert_closed()?;
+        self.assert_previous_writer_closed()?;
+        self.write_metadata()?;
+        self.is_closed = true;
+        Ok(())
+    }
+}
+
+/// A serialized implementation for Parquet [`RowGroupWriter`].
+/// Coordinates writing of a row group with column writers.
+/// See documentation on row group writer for more information.
+pub struct SerializedRowGroupWriter {
+    descr: SchemaDescPtr,
+    props: WriterPropertiesPtr,
+    file: File,
+    total_rows_written: Option<u64>,
+    total_bytes_written: u64,
+    column_index: usize,
+    previous_writer_closed: bool,
+    row_group_metadata: Option<RowGroupMetaDataPtr>,
+    column_chunks: Vec<ColumnChunkMetaDataPtr>,
+}
+
+impl SerializedRowGroupWriter {
+    pub fn new(
+        schema_descr: SchemaDescPtr,
+        properties: WriterPropertiesPtr,
+        file: &File,
+    ) -> Self {
+        let num_columns = schema_descr.num_columns();
+        Self {
+            descr: schema_descr,
+            props: properties,
+            file: file.try_clone().unwrap(),
+            total_rows_written: None,
+            total_bytes_written: 0,
+            column_index: 0,
+            previous_writer_closed: true,
+            row_group_metadata: None,
+            column_chunks: Vec::with_capacity(num_columns),
+        }
+    }
+
+    /// Checks and finalises current column writer.
+    fn finalise_column_writer(&mut self, writer: ColumnWriter) -> Result<()> {
+        let (bytes_written, rows_written, metadata) = match writer {
+            ColumnWriter::BoolColumnWriter(typed) => typed.close()?,
+            ColumnWriter::Int32ColumnWriter(typed) => typed.close()?,
+            ColumnWriter::Int64ColumnWriter(typed) => typed.close()?,
+            ColumnWriter::Int96ColumnWriter(typed) => typed.close()?,
+            ColumnWriter::FloatColumnWriter(typed) => typed.close()?,
+            ColumnWriter::DoubleColumnWriter(typed) => typed.close()?,
+            ColumnWriter::ByteArrayColumnWriter(typed) => typed.close()?,
+            ColumnWriter::FixedLenByteArrayColumnWriter(typed) => typed.close()?,
+        };
+
+        // Update row group writer metrics
+        self.total_bytes_written += bytes_written;
+        self.column_chunks.push(Rc::new(metadata));
+        if let Some(rows) = self.total_rows_written {
+            if rows != rows_written {
+                return Err(general_err!(
+                    "Incorrect number of rows, expected {} != {} rows",
+                    rows,
+                    rows_written
+                ));
+            }
+        } else {
+            self.total_rows_written = Some(rows_written);
+        }
+
+        Ok(())
+    }
+
+    #[inline]
+    fn assert_closed(&self) -> Result<()> {
+        if self.row_group_metadata.is_some() {
+            Err(general_err!("Row group writer is closed"))
+        } else {
+            Ok(())
+        }
+    }
+
+    #[inline]
+    fn assert_previous_writer_closed(&self) -> Result<()> {
+        if !self.previous_writer_closed {
+            Err(general_err!("Previous column writer was not closed"))
+        } else {
+            Ok(())
+        }
+    }
+}
+
+impl RowGroupWriter for SerializedRowGroupWriter {
+    #[inline]
+    fn next_column(&mut self) -> Result<Option<ColumnWriter>> {
+        self.assert_closed()?;
+        self.assert_previous_writer_closed()?;
+
+        if self.column_index >= self.descr.num_columns() {
+            return Ok(None);
+        }
+        let sink = FileSink::new(&self.file);
+        let page_writer = Box::new(SerializedPageWriter::new(sink));
+        let column_writer = get_column_writer(
+            self.descr.column(self.column_index),
+            self.props.clone(),
+            page_writer,
+        );
+        self.column_index += 1;
+        self.previous_writer_closed = false;
+
+        Ok(Some(column_writer))
+    }
+
+    #[inline]
+    fn close_column(&mut self, column_writer: ColumnWriter) -> Result<()> {
+        let res = self.finalise_column_writer(column_writer);
+        self.previous_writer_closed = res.is_ok();
+        res
+    }
+
+    #[inline]
+    fn close(&mut self) -> Result<RowGroupMetaDataPtr> {
+        if self.row_group_metadata.is_none() {
+            self.assert_previous_writer_closed()?;
+
+            let row_group_metadata = RowGroupMetaData::builder(self.descr.clone())
+                .set_column_metadata(self.column_chunks.clone())
+                .set_total_byte_size(self.total_bytes_written as i64)
+                .set_num_rows(self.total_rows_written.unwrap_or(0) as i64)
+                .build()?;
+
+            self.row_group_metadata = Some(Rc::new(row_group_metadata));
+        }
+
+        let metadata = self.row_group_metadata.as_ref().unwrap().clone();
+        Ok(metadata)
+    }
+}
+
+/// A serialized implementation for Parquet [`PageWriter`].
+/// Writes and serializes pages and metadata into output stream.
+///
+/// `SerializedPageWriter` should not be used after calling `close()`.
+pub struct SerializedPageWriter<T: Write + Position> {
+    sink: T,
+}
+
+impl<T: Write + Position> SerializedPageWriter<T> {
+    /// Creates new page writer.
+    pub fn new(sink: T) -> Self {
+        Self { sink }
+    }
+
+    /// Serializes page header into Thrift.
+    /// Returns number of bytes that have been written into the sink.
+    #[inline]
+    fn serialize_page_header(&mut self, header: parquet::PageHeader) -> Result<usize> {
+        let start_pos = self.sink.pos();
+        {
+            let mut protocol = TCompactOutputProtocol::new(&mut self.sink);
+            header.write_to_out_protocol(&mut protocol)?;
+            protocol.flush()?;
+        }
+        Ok((self.sink.pos() - start_pos) as usize)
+    }
+
+    /// Serializes column chunk into Thrift.
+    /// Returns Ok() if there are not errors serializing and writing data into the sink.
+    #[inline]
+    fn serialize_column_chunk(&mut self, chunk: parquet::ColumnChunk) -> Result<()> {
+        let mut protocol = TCompactOutputProtocol::new(&mut self.sink);
+        chunk.write_to_out_protocol(&mut protocol)?;
+        protocol.flush()?;
+        Ok(())
+    }
+}
+
+impl<T: Write + Position> PageWriter for SerializedPageWriter<T> {
+    fn write_page(&mut self, page: CompressedPage) -> Result<PageWriteSpec> {
+        let uncompressed_size = page.uncompressed_size();
+        let compressed_size = page.compressed_size();
+        let num_values = page.num_values();
+        let encoding = page.encoding();
+        let page_type = page.page_type();
+
+        let mut page_header = parquet::PageHeader {
+            type_: page_type.into(),
+            uncompressed_page_size: uncompressed_size as i32,
+            compressed_page_size: compressed_size as i32,
+            // TODO: Add support for crc checksum
+            crc: None,
+            data_page_header: None,
+            index_page_header: None,
+            dictionary_page_header: None,
+            data_page_header_v2: None,
+        };
+
+        match page.compressed_page() {
+            &Page::DataPage {
+                def_level_encoding,
+                rep_level_encoding,
+                ref statistics,
+                ..
+            } => {
+                let data_page_header = parquet::DataPageHeader {
+                    num_values: num_values as i32,
+                    encoding: encoding.into(),
+                    definition_level_encoding: def_level_encoding.into(),
+                    repetition_level_encoding: rep_level_encoding.into(),
+                    statistics: statistics_to_thrift(statistics.as_ref()),
+                };
+                page_header.data_page_header = Some(data_page_header);
+            }
+            &Page::DataPageV2 {
+                num_nulls,
+                num_rows,
+                def_levels_byte_len,
+                rep_levels_byte_len,
+                is_compressed,
+                ref statistics,
+                ..
+            } => {
+                let data_page_header_v2 = parquet::DataPageHeaderV2 {
+                    num_values: num_values as i32,
+                    num_nulls: num_nulls as i32,
+                    num_rows: num_rows as i32,
+                    encoding: encoding.into(),
+                    definition_levels_byte_length: def_levels_byte_len as i32,
+                    repetition_levels_byte_length: rep_levels_byte_len as i32,
+                    is_compressed: Some(is_compressed),
+                    statistics: statistics_to_thrift(statistics.as_ref()),
+                };
+                page_header.data_page_header_v2 = Some(data_page_header_v2);
+            }
+            &Page::DictionaryPage { is_sorted, .. } => {
+                let dictionary_page_header = parquet::DictionaryPageHeader {
+                    num_values: num_values as i32,
+                    encoding: encoding.into(),
+                    is_sorted: Some(is_sorted),
+                };
+                page_header.dictionary_page_header = Some(dictionary_page_header);
+            }
+        }
+
+        let start_pos = self.sink.pos();
+
+        let header_size = self.serialize_page_header(page_header)?;
+        self.sink.write_all(page.data())?;
+
+        let mut spec = PageWriteSpec::new();
+        spec.page_type = page_type;
+        spec.uncompressed_size = uncompressed_size + header_size;
+        spec.compressed_size = compressed_size + header_size;
+        spec.offset = start_pos;
+        spec.bytes_written = self.sink.pos() - start_pos;
+        // Number of values is incremented for data pages only
+        if page_type == PageType::DATA_PAGE || page_type == PageType::DATA_PAGE_V2 {
+            spec.num_values = num_values;
+        }
+
+        Ok(spec)
+    }
+
+    fn write_metadata(&mut self, metadata: &ColumnChunkMetaData) -> Result<()> {
+        self.serialize_column_chunk(metadata.to_thrift())
+    }
+
+    fn close(&mut self) -> Result<()> {
+        self.sink.flush()?;
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use std::{error::Error, io::Cursor};
+
+    use crate::basic::{Compression, Encoding, Repetition, Type};
+    use crate::column::page::PageReader;
+    use crate::compression::{create_codec, Codec};
+    use crate::file::{
+        properties::WriterProperties,
+        reader::{FileReader, SerializedFileReader, SerializedPageReader},
+        statistics::{from_thrift, to_thrift, Statistics},
+    };
+    use crate::record::RowAccessor;
+    use crate::util::{memory::ByteBufferPtr, test_common::get_temp_file};
+
+    #[test]
+    fn test_file_writer_error_after_close() {
+        let file = get_temp_file("test_file_writer_error_after_close", &[]);
+        let schema = Rc::new(types::Type::group_type_builder("schema").build().unwrap());
+        let props = Rc::new(WriterProperties::builder().build());
+        let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
+        writer.close().unwrap();
+        {
+            let res = writer.next_row_group();
+            assert!(res.is_err());
+            if let Err(err) = res {
+                assert_eq!(err.description(), "File writer is closed");
+            }
+        }
+        {
+            let res = writer.close();
+            assert!(res.is_err());
+            if let Err(err) = res {
+                assert_eq!(err.description(), "File writer is closed");
+            }
+        }
+    }
+
+    #[test]
+    fn test_row_group_writer_error_after_close() {
+        let file = get_temp_file("test_file_writer_row_group_error_after_close", &[]);
+        let schema = Rc::new(types::Type::group_type_builder("schema").build().unwrap());
+        let props = Rc::new(WriterProperties::builder().build());
+        let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
+        let mut row_group_writer = writer.next_row_group().unwrap();
+        row_group_writer.close().unwrap();
+
+        let res = row_group_writer.next_column();
+        assert!(res.is_err());
+        if let Err(err) = res {
+            assert_eq!(err.description(), "Row group writer is closed");
+        }
+    }
+
+    #[test]
+    fn test_row_group_writer_error_not_all_columns_written() {
+        let file =
+            get_temp_file("test_row_group_writer_error_not_all_columns_written", &[]);
+        let schema = Rc::new(
+            types::Type::group_type_builder("schema")
+                .with_fields(&mut vec![Rc::new(
+                    types::Type::primitive_type_builder("col1", Type::INT32)
+                        .build()
+                        .unwrap(),
+                )])
+                .build()
+                .unwrap(),
+        );
+        let props = Rc::new(WriterProperties::builder().build());
+        let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
+        let mut row_group_writer = writer.next_row_group().unwrap();
+        let res = row_group_writer.close();
+        assert!(res.is_err());
+        if let Err(err) = res {
+            assert_eq!(err.description(), "Column length mismatch: 1 != 0");
+        }
+    }
+
+    #[test]
+    fn test_row_group_writer_num_records_mismatch() {
+        let file = get_temp_file("test_row_group_writer_num_records_mismatch", &[]);
+        let schema = Rc::new(
+            types::Type::group_type_builder("schema")
+                .with_fields(&mut vec![
+                    Rc::new(
+                        types::Type::primitive_type_builder("col1", Type::INT32)
+                            .with_repetition(Repetition::REQUIRED)
+                            .build()
+                            .unwrap(),
+                    ),
+                    Rc::new(
+                        types::Type::primitive_type_builder("col2", Type::INT32)
+                            .with_repetition(Repetition::REQUIRED)
+                            .build()
+                            .unwrap(),
+                    ),
+                ])
+                .build()
+                .unwrap(),
+        );
+        let props = Rc::new(WriterProperties::builder().build());
+        let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
+        let mut row_group_writer = writer.next_row_group().unwrap();
+
+        let mut col_writer = row_group_writer.next_column().unwrap().unwrap();
+        if let ColumnWriter::Int32ColumnWriter(ref mut typed) = col_writer {
+            typed.write_batch(&[1, 2, 3], None, None).unwrap();
+        }
+        row_group_writer.close_column(col_writer).unwrap();
+
+        let mut col_writer = row_group_writer.next_column().unwrap().unwrap();
+        if let ColumnWriter::Int32ColumnWriter(ref mut typed) = col_writer {
+            typed.write_batch(&[1, 2], None, None).unwrap();
+        }
+
+        let res = row_group_writer.close_column(col_writer);
+        assert!(res.is_err());
+        if let Err(err) = res {
+            assert_eq!(
+                err.description(),
+                "Incorrect number of rows, expected 3 != 2 rows"
+            );
+        }
+    }
+
+    #[test]
+    fn test_file_writer_empty_file() {
+        let file = get_temp_file("test_file_writer_write_empty_file", &[]);
+
+        let schema = Rc::new(
+            types::Type::group_type_builder("schema")
+                .with_fields(&mut vec![Rc::new(
+                    types::Type::primitive_type_builder("col1", Type::INT32)
+                        .build()
+                        .unwrap(),
+                )])
+                .build()
+                .unwrap(),
+        );
+        let props = Rc::new(WriterProperties::builder().build());
+        let mut writer =
+            SerializedFileWriter::new(file.try_clone().unwrap(), schema, props).unwrap();
+        writer.close().unwrap();
+
+        let reader = SerializedFileReader::new(file).unwrap();
+        assert_eq!(reader.get_row_iter(None).unwrap().count(), 0);
+    }
+
+    #[test]
+    fn test_file_writer_empty_row_groups() {
+        let file = get_temp_file("test_file_writer_write_empty_row_groups", &[]);
+        test_file_roundtrip(file, vec![]);
+    }
+
+    #[test]
+    fn test_file_writer_single_row_group() {
+        let file = get_temp_file("test_file_writer_write_single_row_group", &[]);
+        test_file_roundtrip(file, vec![vec![1, 2, 3, 4, 5]]);
+    }
+
+    #[test]
+    fn test_file_writer_multiple_row_groups() {
+        let file = get_temp_file("test_file_writer_write_multiple_row_groups", &[]);
+        test_file_roundtrip(
+            file,
+            vec![
+                vec![1, 2, 3, 4, 5],
+                vec![1, 2, 3],
+                vec![1],
+                vec![1, 2, 3, 4, 5, 6],
+            ],
+        );
+    }
+
+    #[test]
+    fn test_file_writer_multiple_large_row_groups() {
+        let file = get_temp_file("test_file_writer_multiple_large_row_groups", &[]);
+        test_file_roundtrip(
+            file,
+            vec![vec![123; 1024], vec![124; 1000], vec![125; 15], vec![]],
+        );
+    }
+
+    #[test]
+    fn test_page_writer_data_pages() {
+        let pages = vec![
+            Page::DataPage {
+                buf: ByteBufferPtr::new(vec![1, 2, 3, 4, 5, 6, 7, 8]),
+                num_values: 10,
+                encoding: Encoding::DELTA_BINARY_PACKED,
+                def_level_encoding: Encoding::RLE,
+                rep_level_encoding: Encoding::RLE,
+                statistics: Some(Statistics::int32(Some(1), Some(3), None, 7, true)),
+            },
+            Page::DataPageV2 {
+                buf: ByteBufferPtr::new(vec![4; 128]),
+                num_values: 10,
+                encoding: Encoding::DELTA_BINARY_PACKED,
+                num_nulls: 2,
+                num_rows: 12,
+                def_levels_byte_len: 24,
+                rep_levels_byte_len: 32,
+                is_compressed: false,
+                statistics: Some(Statistics::int32(Some(1), Some(3), None, 7, true)),
+            },
+        ];
+
+        test_page_roundtrip(&pages[..], Compression::SNAPPY, Type::INT32);
+        test_page_roundtrip(&pages[..], Compression::UNCOMPRESSED, Type::INT32);
+    }
+
+    #[test]
+    fn test_page_writer_dict_pages() {
+        let pages = vec![
+            Page::DictionaryPage {
+                buf: ByteBufferPtr::new(vec![1, 2, 3, 4, 5]),
+                num_values: 5,
+                encoding: Encoding::RLE_DICTIONARY,
+                is_sorted: false,
+            },
+            Page::DataPage {
+                buf: ByteBufferPtr::new(vec![1, 2, 3, 4, 5, 6, 7, 8]),
+                num_values: 10,
+                encoding: Encoding::DELTA_BINARY_PACKED,
+                def_level_encoding: Encoding::RLE,
+                rep_level_encoding: Encoding::RLE,
+                statistics: Some(Statistics::int32(Some(1), Some(3), None, 7, true)),
+            },
+            Page::DataPageV2 {
+                buf: ByteBufferPtr::new(vec![4; 128]),
+                num_values: 10,
+                encoding: Encoding::DELTA_BINARY_PACKED,
+                num_nulls: 2,
+                num_rows: 12,
+                def_levels_byte_len: 24,
+                rep_levels_byte_len: 32,
+                is_compressed: false,
+                statistics: None,
+            },
+        ];
+
+        test_page_roundtrip(&pages[..], Compression::SNAPPY, Type::INT32);
+        test_page_roundtrip(&pages[..], Compression::UNCOMPRESSED, Type::INT32);
+    }
+
+    /// Tests writing and reading pages.
+    /// Physical type is for statistics only, should match any defined statistics type in
+    /// pages.
+    fn test_page_roundtrip(pages: &[Page], codec: Compression, physical_type: Type) {
+        let mut compressed_pages = vec![];
+        let mut total_num_values = 0i64;
+        let mut compressor = create_codec(codec).unwrap();
+
+        for page in pages {
+            let uncompressed_len = page.buffer().len();
+
+            let compressed_page = match page {
+                &Page::DataPage {
+                    ref buf,
+                    num_values,
+                    encoding,
+                    def_level_encoding,
+                    rep_level_encoding,
+                    ref statistics,
+                } => {
+                    total_num_values += num_values as i64;
+                    let output_buf = compress_helper(compressor.as_mut(), buf.data());
+
+                    Page::DataPage {
+                        buf: ByteBufferPtr::new(output_buf),
+                        num_values,
+                        encoding,
+                        def_level_encoding,
+                        rep_level_encoding,
+                        statistics: from_thrift(
+                            physical_type,
+                            to_thrift(statistics.as_ref()),
+                        ),
+                    }
+                }
+                &Page::DataPageV2 {
+                    ref buf,
+                    num_values,
+                    encoding,
+                    num_nulls,
+                    num_rows,
+                    def_levels_byte_len,
+                    rep_levels_byte_len,
+                    ref statistics,
+                    ..
+                } => {
+                    total_num_values += num_values as i64;
+                    let offset = (def_levels_byte_len + rep_levels_byte_len) as usize;
+                    let cmp_buf =
+                        compress_helper(compressor.as_mut(), &buf.data()[offset..]);
+                    let mut output_buf = Vec::from(&buf.data()[..offset]);
+                    output_buf.extend_from_slice(&cmp_buf[..]);
+
+                    Page::DataPageV2 {
+                        buf: ByteBufferPtr::new(output_buf),
+                        num_values,
+                        encoding,
+                        num_nulls,
+                        num_rows,
+                        def_levels_byte_len,
+                        rep_levels_byte_len,
+                        is_compressed: compressor.is_some(),
+                        statistics: from_thrift(
+                            physical_type,
+                            to_thrift(statistics.as_ref()),
+                        ),
+                    }
+                }
+                &Page::DictionaryPage {
+                    ref buf,
+                    num_values,
+                    encoding,
+                    is_sorted,
+                } => {
+                    let output_buf = compress_helper(compressor.as_mut(), buf.data());
+
+                    Page::DictionaryPage {
+                        buf: ByteBufferPtr::new(output_buf),
+                        num_values,
+                        encoding,
+                        is_sorted,
+                    }
+                }
+            };
+
+            let compressed_page = CompressedPage::new(compressed_page, uncompressed_len);
+            compressed_pages.push(compressed_page);
+        }
+
+        let mut buffer: Vec<u8> = vec![];
+        let mut result_pages: Vec<Page> = vec![];
+        {
+            let cursor = Cursor::new(&mut buffer);
+            let mut page_writer = SerializedPageWriter::new(cursor);
+
+            for page in compressed_pages {
+                page_writer.write_page(page).unwrap();
+            }
+            page_writer.close().unwrap();
+        }
+        {
+            let mut page_reader = SerializedPageReader::new(
+                Cursor::new(&buffer),
+                total_num_values,
+                codec,
+                physical_type,
+            )
+            .unwrap();
+
+            while let Some(page) = page_reader.get_next_page().unwrap() {
+                result_pages.push(page);
+            }
+        }
+
+        assert_eq!(result_pages.len(), pages.len());
+        for i in 0..result_pages.len() {
+            assert_page(&result_pages[i], &pages[i]);
+        }
+    }
+
+    /// Helper function to compress a slice
+    fn compress_helper(compressor: Option<&mut Box<Codec>>, data: &[u8]) -> Vec<u8> {
+        let mut output_buf = vec![];
+        if let Some(cmpr) = compressor {
+            cmpr.compress(data, &mut output_buf).unwrap();
+        } else {
+            output_buf.extend_from_slice(data);
+        }
+        output_buf
+    }
+
+    /// Check if pages match.
+    fn assert_page(left: &Page, right: &Page) {
+        assert_eq!(left.page_type(), right.page_type());
+        assert_eq!(left.buffer().data(), right.buffer().data());
+        assert_eq!(left.num_values(), right.num_values());
+        assert_eq!(left.encoding(), right.encoding());
+        assert_eq!(to_thrift(left.statistics()), to_thrift(right.statistics()));
+    }
+
+    /// File write-read roundtrip.
+    /// `data` consists of arrays of values for each row group.
+    fn test_file_roundtrip(file: File, data: Vec<Vec<i32>>) {
+        let schema = Rc::new(
+            types::Type::group_type_builder("schema")
+                .with_fields(&mut vec![Rc::new(
+                    types::Type::primitive_type_builder("col1", Type::INT32)
+                        .with_repetition(Repetition::REQUIRED)
+                        .build()
+                        .unwrap(),
+                )])
+                .build()
+                .unwrap(),
+        );
+        let props = Rc::new(WriterProperties::builder().build());
+        let mut file_writer =
+            SerializedFileWriter::new(file.try_clone().unwrap(), schema, props).unwrap();
+
+        for subset in &data {
+            let mut row_group_writer = file_writer.next_row_group().unwrap();
+            let col_writer = row_group_writer.next_column().unwrap();
+            if let Some(mut writer) = col_writer {
+                match writer {
+                    ColumnWriter::Int32ColumnWriter(ref mut typed) => {
+                        typed.write_batch(&subset[..], None, None).unwrap();
+                    }
+                    _ => {
+                        unimplemented!();
+                    }
+                }
+                row_group_writer.close_column(writer).unwrap();
+            }
+            file_writer.close_row_group(row_group_writer).unwrap();
+        }
+
+        file_writer.close().unwrap();
+
+        let reader = SerializedFileReader::new(file).unwrap();
+        assert_eq!(reader.num_row_groups(), data.len());
+        for i in 0..reader.num_row_groups() {
+            let row_group_reader = reader.get_row_group(i).unwrap();
+            let iter = row_group_reader.get_row_iter(None).unwrap();
+            let res = iter
+                .map(|elem| elem.get_int(0).unwrap())
+                .collect::<Vec<i32>>();
+            assert_eq!(res, data[i]);
+        }
+    }
+}
diff --git a/rust/parquet/src/lib.rs b/rust/parquet/src/lib.rs
new file mode 100644
index 0000000000000..cad85ecde317c
--- /dev/null
+++ b/rust/parquet/src/lib.rs
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#![feature(type_ascription)]
+#![feature(rustc_private)]
+#![feature(specialization)]
+#![feature(try_from)]
+#![allow(dead_code)]
+#![allow(non_camel_case_types)]
+
+#[macro_use]
+pub mod errors;
+pub mod basic;
+pub mod data_type;
+
+// Exported for external use, such as benchmarks
+pub use self::encodings::{decoding, encoding};
+pub use self::util::memory;
+
+#[macro_use]
+mod util;
+pub mod column;
+pub mod compression;
+mod encodings;
+pub mod file;
+pub mod reader;
+pub mod record;
+pub mod schema;
diff --git a/rust/parquet/src/reader/mod.rs b/rust/parquet/src/reader/mod.rs
new file mode 100644
index 0000000000000..fe580c5e92b37
--- /dev/null
+++ b/rust/parquet/src/reader/mod.rs
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [Apache Arrow](http://arrow.apache.org/) is a cross-language development platform for
+//! in-memory data.
+//!
+//! This mod provides API for converting between arrow and parquet.
+
+pub mod schema;
+
+pub use self::schema::{parquet_to_arrow_schema, parquet_to_arrow_schema_by_columns};
diff --git a/rust/parquet/src/reader/schema.rs b/rust/parquet/src/reader/schema.rs
new file mode 100644
index 0000000000000..34276a2d5633f
--- /dev/null
+++ b/rust/parquet/src/reader/schema.rs
@@ -0,0 +1,799 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Provides API for converting parquet schema to arrow schema and vice versa.
+//!
+//! The main interfaces for converting parquet schema to arrow schema  are
+//! `parquet_to_arrow_schema` and `parquet_to_arrow_schema_by_columns`.
+//!
+//! The interfaces for converting arrow schema to parquet schema is coming.
+
+use std::{collections::HashSet, rc::Rc};
+
+use crate::basic::{LogicalType, Repetition, Type as PhysicalType};
+use crate::errors::{ParquetError::ArrowError, Result};
+use crate::schema::types::{SchemaDescPtr, Type, TypePtr};
+
+use arrow::datatypes::{DataType, Field, Schema};
+
+/// Convert parquet schema to arrow schema.
+pub fn parquet_to_arrow_schema(parquet_schema: SchemaDescPtr) -> Result<Schema> {
+    parquet_to_arrow_schema_by_columns(
+        parquet_schema.clone(),
+        0..parquet_schema.columns().len(),
+    )
+}
+
+/// Convert parquet schema to arrow schema, only preserving some leaf columns.
+pub fn parquet_to_arrow_schema_by_columns<T>(
+    parquet_schema: SchemaDescPtr,
+    column_indices: T,
+) -> Result<Schema>
+where
+    T: IntoIterator<Item = usize>,
+{
+    let mut base_nodes = Vec::new();
+    let mut base_nodes_set = HashSet::new();
+    let mut leaves = HashSet::new();
+
+    for c in column_indices {
+        let column = parquet_schema.column(c).self_type() as *const Type;
+        let root = parquet_schema.get_column_root_ptr(c);
+        let root_raw_ptr = root.clone().as_ref() as *const Type;
+
+        leaves.insert(column);
+        if !base_nodes_set.contains(&root_raw_ptr) {
+            base_nodes.push(root);
+            base_nodes_set.insert(root_raw_ptr);
+        }
+    }
+
+    let leaves = Rc::new(leaves);
+    base_nodes
+        .into_iter()
+        .map(|t| ParquetTypeConverter::new(t, leaves.clone()).to_field())
+        .collect::<Result<Vec<Option<Field>>>>()
+        .map(|result| result.into_iter().filter_map(|f| f).collect::<Vec<Field>>())
+        .map(|fields| Schema::new(fields))
+}
+
+/// This struct is used to group methods and data structures used to convert parquet
+/// schema together.
+struct ParquetTypeConverter {
+    schema: TypePtr,
+    /// This is the columns that need to be converted to arrow schema.
+    columns_to_convert: Rc<HashSet<*const Type>>,
+}
+
+impl ParquetTypeConverter {
+    fn new(schema: TypePtr, columns_to_convert: Rc<HashSet<*const Type>>) -> Self {
+        Self {
+            schema,
+            columns_to_convert,
+        }
+    }
+
+    fn clone_with_schema(&self, other: TypePtr) -> Self {
+        Self {
+            schema: other,
+            columns_to_convert: self.columns_to_convert.clone(),
+        }
+    }
+}
+
+impl ParquetTypeConverter {
+    // Public interfaces.
+
+    /// Converts parquet schema to arrow data type.
+    ///
+    /// This function discards schema name.
+    ///
+    /// If this schema is a primitive type and not included in the leaves, the result is
+    /// Ok(None).
+    ///
+    /// If this schema is a group type and none of its children is reserved in the
+    /// conversion, the result is Ok(None).
+    fn to_data_type(&self) -> Result<Option<DataType>> {
+        match self.schema.as_ref() {
+            Type::PrimitiveType { .. } => self.to_primitive_type(),
+            Type::GroupType { .. } => self.to_group_type(),
+        }
+    }
+
+    /// Converts parquet schema to arrow field.
+    ///
+    /// This method is roughly the same as
+    /// [`to_data_type`](`ParquetTypeConverter::to_data_type`), except it reserves schema
+    /// name.
+    fn to_field(&self) -> Result<Option<Field>> {
+        self.to_data_type().map(|opt| {
+            opt.map(|dt| Field::new(self.schema.name(), dt, self.is_nullable()))
+        })
+    }
+
+    // Utility functions.
+
+    /// Checks whether this schema is nullable.
+    fn is_nullable(&self) -> bool {
+        let basic_info = self.schema.get_basic_info();
+        if basic_info.has_repetition() {
+            match basic_info.repetition() {
+                Repetition::OPTIONAL => true,
+                Repetition::REPEATED => true,
+                Repetition::REQUIRED => false,
+            }
+        } else {
+            false
+        }
+    }
+
+    fn is_repeated(&self) -> bool {
+        let basic_info = self.schema.get_basic_info();
+
+        basic_info.has_repetition() && basic_info.repetition() == Repetition::REPEATED
+    }
+
+    fn is_self_included(&self) -> bool {
+        self.columns_to_convert
+            .contains(&(self.schema.as_ref() as *const Type))
+    }
+
+    // Functions for primitive types.
+
+    /// Entry point for converting parquet primitive type to arrow type.
+    ///
+    /// This function takes care of repetition.
+    fn to_primitive_type(&self) -> Result<Option<DataType>> {
+        if self.is_self_included() {
+            self.to_primitive_type_inner().map(|dt| {
+                if self.is_repeated() {
+                    Some(DataType::List(Box::new(dt)))
+                } else {
+                    Some(dt)
+                }
+            })
+        } else {
+            Ok(None)
+        }
+    }
+
+    /// Converting parquet primitive type to arrow data type.
+    fn to_primitive_type_inner(&self) -> Result<DataType> {
+        match self.schema.get_physical_type() {
+            PhysicalType::BOOLEAN => Ok(DataType::Boolean),
+            PhysicalType::INT32 => self.to_int32(),
+            PhysicalType::INT64 => self.to_int64(),
+            PhysicalType::FLOAT => Ok(DataType::Float32),
+            PhysicalType::DOUBLE => Ok(DataType::Float64),
+            PhysicalType::BYTE_ARRAY => self.to_byte_array(),
+            other => Err(ArrowError(format!(
+                "Unable to convert parquet type {}",
+                other
+            ))),
+        }
+    }
+
+    fn to_int32(&self) -> Result<DataType> {
+        match self.schema.get_basic_info().logical_type() {
+            LogicalType::NONE => Ok(DataType::Int32),
+            LogicalType::UINT_8 => Ok(DataType::UInt8),
+            LogicalType::UINT_16 => Ok(DataType::UInt16),
+            LogicalType::UINT_32 => Ok(DataType::UInt32),
+            LogicalType::INT_8 => Ok(DataType::Int8),
+            LogicalType::INT_16 => Ok(DataType::Int16),
+            LogicalType::INT_32 => Ok(DataType::Int32),
+            other => Err(ArrowError(format!(
+                "Unable to convert parquet logical type {}",
+                other
+            ))),
+        }
+    }
+
+    fn to_int64(&self) -> Result<DataType> {
+        match self.schema.get_basic_info().logical_type() {
+            LogicalType::NONE => Ok(DataType::Int64),
+            LogicalType::INT_64 => Ok(DataType::Int64),
+            LogicalType::UINT_64 => Ok(DataType::UInt64),
+            other => Err(ArrowError(format!(
+                "Unable to convert parquet logical type {}",
+                other
+            ))),
+        }
+    }
+
+    fn to_byte_array(&self) -> Result<DataType> {
+        match self.schema.get_basic_info().logical_type() {
+            LogicalType::UTF8 => Ok(DataType::Utf8),
+            other => Err(ArrowError(format!(
+                "Unable to convert parquet logical type {}",
+                other
+            ))),
+        }
+    }
+
+    // Functions for group types.
+
+    /// Entry point for converting parquet group type.
+    ///
+    /// This function takes care of logical type and repetition.
+    fn to_group_type(&self) -> Result<Option<DataType>> {
+        if self.is_repeated() {
+            self.to_struct()
+                .map(|opt| opt.map(|dt| DataType::List(Box::new(dt))))
+        } else {
+            match self.schema.get_basic_info().logical_type() {
+                LogicalType::LIST => self.to_list(),
+                _ => self.to_struct(),
+            }
+        }
+    }
+
+    /// Converts a parquet group type to arrow struct.
+    fn to_struct(&self) -> Result<Option<DataType>> {
+        match self.schema.as_ref() {
+            Type::PrimitiveType { .. } => panic!(
+                "{:?} is a struct type, and can't be processed as primitive.",
+                self.schema
+            ),
+            Type::GroupType {
+                basic_info: _,
+                fields,
+            } => fields
+                .iter()
+                .map(|field_ptr| self.clone_with_schema(field_ptr.clone()).to_field())
+                .collect::<Result<Vec<Option<Field>>>>()
+                .map(|result| {
+                    result.into_iter().filter_map(|f| f).collect::<Vec<Field>>()
+                })
+                .map(|fields| {
+                    if fields.is_empty() {
+                        None
+                    } else {
+                        Some(DataType::Struct(fields))
+                    }
+                }),
+        }
+    }
+
+    /// Converts a parquet list to arrow list.
+    ///
+    /// To fully understand this algorithm, please refer to
+    /// [parquet doc](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md).
+    fn to_list(&self) -> Result<Option<DataType>> {
+        match self.schema.as_ref() {
+            Type::PrimitiveType { .. } => panic!(
+                "{:?} is a list type and can't be processed as primitive.",
+                self.schema
+            ),
+            Type::GroupType {
+                basic_info: _,
+                fields,
+            } if fields.len() == 1 => {
+                let list_item = fields.first().unwrap();
+                let item_converter = self.clone_with_schema(list_item.clone());
+
+                let item_type = match list_item.as_ref() {
+                    Type::PrimitiveType { .. } => {
+                        if item_converter.is_repeated() {
+                            item_converter.to_primitive_type_inner().map(|dt| Some(dt))
+                        } else {
+                            Err(ArrowError(
+                                "Primitive element type of list must be repeated."
+                                    .to_string(),
+                            ))
+                        }
+                    }
+                    Type::GroupType {
+                        basic_info: _,
+                        fields,
+                    } => {
+                        if fields.len() > 1 {
+                            item_converter.to_struct()
+                        } else if fields.len() == 1
+                            && list_item.name() != "array"
+                            && list_item.name() != format!("{}_tuple", self.schema.name())
+                        {
+                            let nested_item = fields.first().unwrap();
+                            let nested_item_converter =
+                                self.clone_with_schema(nested_item.clone());
+
+                            nested_item_converter.to_data_type()
+                        } else {
+                            item_converter.to_struct()
+                        }
+                    }
+                };
+
+                item_type.map(|opt| opt.map(|dt| DataType::List(Box::new(dt))))
+            }
+            _ => Err(ArrowError(
+                "Group element type of list can only contain one field.".to_string(),
+            )),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::rc::Rc;
+
+    use crate::schema::{parser::parse_message_type, types::SchemaDescriptor};
+
+    use arrow::datatypes::{DataType, Field};
+
+    use super::{parquet_to_arrow_schema, parquet_to_arrow_schema_by_columns};
+
+    #[test]
+    fn test_flat_primitives() {
+        let message_type = "
+        message test_schema {
+            REQUIRED BOOLEAN boolean;
+            REQUIRED INT32   int8  (INT_8);
+            REQUIRED INT32   int16 (INT_16);
+            REQUIRED INT32   int32;
+            REQUIRED INT64   int64 ;
+            OPTIONAL DOUBLE  double;
+            OPTIONAL FLOAT   float;
+            OPTIONAL BINARY  string (UTF8);
+        }
+        ";
+        let parquet_group_type = parse_message_type(message_type).unwrap();
+
+        let parquet_schema = SchemaDescriptor::new(Rc::new(parquet_group_type));
+        let converted_arrow_schema =
+            parquet_to_arrow_schema(Rc::new(parquet_schema)).unwrap();
+
+        let arrow_fields = vec![
+            Field::new("boolean", DataType::Boolean, false),
+            Field::new("int8", DataType::Int8, false),
+            Field::new("int16", DataType::Int16, false),
+            Field::new("int32", DataType::Int32, false),
+            Field::new("int64", DataType::Int64, false),
+            Field::new("double", DataType::Float64, true),
+            Field::new("float", DataType::Float32, true),
+            Field::new("string", DataType::Utf8, true),
+        ];
+
+        assert_eq!(&arrow_fields, converted_arrow_schema.fields());
+    }
+
+    #[test]
+    fn test_duplicate_fields() {
+        let message_type = "
+        message test_schema {
+            REQUIRED BOOLEAN boolean;
+            REQUIRED INT32 int8 (INT_8);
+        }
+        ";
+
+        let parquet_group_type = parse_message_type(message_type).unwrap();
+
+        let parquet_schema = Rc::new(SchemaDescriptor::new(Rc::new(parquet_group_type)));
+        let converted_arrow_schema =
+            parquet_to_arrow_schema(parquet_schema.clone()).unwrap();
+
+        let arrow_fields = vec![
+            Field::new("boolean", DataType::Boolean, false),
+            Field::new("int8", DataType::Int8, false),
+        ];
+        assert_eq!(&arrow_fields, converted_arrow_schema.fields());
+
+        let converted_arrow_schema = parquet_to_arrow_schema_by_columns(
+            parquet_schema.clone(),
+            vec![0usize, 1usize],
+        )
+        .unwrap();
+        assert_eq!(&arrow_fields, converted_arrow_schema.fields());
+    }
+
+    #[test]
+    fn test_parquet_lists() {
+        let mut arrow_fields = Vec::new();
+
+        // LIST encoding example taken from parquet-format/LogicalTypes.md
+        let message_type = "
+        message test_schema {
+          REQUIRED GROUP my_list (LIST) {
+            REPEATED GROUP list {
+              OPTIONAL BINARY element (UTF8);
+            }
+          }
+          OPTIONAL GROUP my_list (LIST) {
+            REPEATED GROUP list {
+              REQUIRED BINARY element (UTF8);
+            }
+          }
+          OPTIONAL GROUP array_of_arrays (LIST) {
+            REPEATED GROUP list {
+              REQUIRED GROUP element (LIST) {
+                REPEATED GROUP list {
+                  REQUIRED INT32 element;
+                }
+              }
+            }
+          }
+          OPTIONAL GROUP my_list (LIST) {
+            REPEATED GROUP element {
+              REQUIRED BINARY str (UTF8);
+            }
+          }
+          OPTIONAL GROUP my_list (LIST) {
+            REPEATED INT32 element;
+          }
+          OPTIONAL GROUP my_list (LIST) {
+            REPEATED GROUP element {
+              REQUIRED BINARY str (UTF8);
+              REQUIRED INT32 num;
+            }
+          }
+          OPTIONAL GROUP my_list (LIST) {
+            REPEATED GROUP array {
+              REQUIRED BINARY str (UTF8);
+            }
+
+          }
+          OPTIONAL GROUP my_list (LIST) {
+            REPEATED GROUP my_list_tuple {
+              REQUIRED BINARY str (UTF8);
+            }
+          }
+          REPEATED INT32 name;
+        }
+        ";
+
+        // // List<String> (list non-null, elements nullable)
+        // required group my_list (LIST) {
+        //   repeated group list {
+        //     optional binary element (UTF8);
+        //   }
+        // }
+        {
+            arrow_fields.push(Field::new(
+                "my_list",
+                DataType::List(Box::new(DataType::Utf8)),
+                false,
+            ));
+        }
+
+        // // List<String> (list nullable, elements non-null)
+        // optional group my_list (LIST) {
+        //   repeated group list {
+        //     required binary element (UTF8);
+        //   }
+        // }
+        {
+            arrow_fields.push(Field::new(
+                "my_list",
+                DataType::List(Box::new(DataType::Utf8)),
+                true,
+            ));
+        }
+
+        // Element types can be nested structures. For example, a list of lists:
+        //
+        // // List<List<Integer>>
+        // optional group array_of_arrays (LIST) {
+        //   repeated group list {
+        //     required group element (LIST) {
+        //       repeated group list {
+        //         required int32 element;
+        //       }
+        //     }
+        //   }
+        // }
+        {
+            let arrow_inner_list = DataType::List(Box::new(DataType::Int32));
+            arrow_fields.push(Field::new(
+                "array_of_arrays",
+                DataType::List(Box::new(arrow_inner_list)),
+                true,
+            ));
+        }
+
+        // // List<String> (list nullable, elements non-null)
+        // optional group my_list (LIST) {
+        //   repeated group element {
+        //     required binary str (UTF8);
+        //   };
+        // }
+        {
+            arrow_fields.push(Field::new(
+                "my_list",
+                DataType::List(Box::new(DataType::Utf8)),
+                true,
+            ));
+        }
+
+        // // List<Integer> (nullable list, non-null elements)
+        // optional group my_list (LIST) {
+        //   repeated int32 element;
+        // }
+        {
+            arrow_fields.push(Field::new(
+                "my_list",
+                DataType::List(Box::new(DataType::Int32)),
+                true,
+            ));
+        }
+
+        // // List<Tuple<String, Integer>> (nullable list, non-null elements)
+        // optional group my_list (LIST) {
+        //   repeated group element {
+        //     required binary str (UTF8);
+        //     required int32 num;
+        //   };
+        // }
+        {
+            let arrow_struct = DataType::Struct(vec![
+                Field::new("str", DataType::Utf8, false),
+                Field::new("num", DataType::Int32, false),
+            ]);
+            arrow_fields.push(Field::new(
+                "my_list",
+                DataType::List(Box::new(arrow_struct)),
+                true,
+            ));
+        }
+
+        // // List<OneTuple<String>> (nullable list, non-null elements)
+        // optional group my_list (LIST) {
+        //   repeated group array {
+        //     required binary str (UTF8);
+        //   };
+        // }
+        // Special case: group is named array
+        {
+            let arrow_struct =
+                DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]);
+            arrow_fields.push(Field::new(
+                "my_list",
+                DataType::List(Box::new(arrow_struct)),
+                true,
+            ));
+        }
+
+        // // List<OneTuple<String>> (nullable list, non-null elements)
+        // optional group my_list (LIST) {
+        //   repeated group my_list_tuple {
+        //     required binary str (UTF8);
+        //   };
+        // }
+        // Special case: group named ends in _tuple
+        {
+            let arrow_struct =
+                DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]);
+            arrow_fields.push(Field::new(
+                "my_list",
+                DataType::List(Box::new(arrow_struct)),
+                true,
+            ));
+        }
+
+        // One-level encoding: Only allows required lists with required cells
+        //   repeated value_type name
+        {
+            arrow_fields.push(Field::new(
+                "name",
+                DataType::List(Box::new(DataType::Int32)),
+                true,
+            ));
+        }
+
+        let parquet_group_type = parse_message_type(message_type).unwrap();
+
+        let parquet_schema = Rc::new(SchemaDescriptor::new(Rc::new(parquet_group_type)));
+        let converted_arrow_schema =
+            parquet_to_arrow_schema(parquet_schema.clone()).unwrap();
+        let converted_fields = converted_arrow_schema.fields();
+
+        assert_eq!(arrow_fields.len(), converted_fields.len());
+        for i in 0..arrow_fields.len() {
+            assert_eq!(arrow_fields[i], converted_fields[i]);
+        }
+    }
+
+    #[test]
+    fn test_nested_schema() {
+        let mut arrow_fields = Vec::new();
+        {
+            let group1_fields = vec![
+                Field::new("leaf1", DataType::Boolean, false),
+                Field::new("leaf2", DataType::Int32, false),
+            ];
+            let group1_struct =
+                Field::new("group1", DataType::Struct(group1_fields), false);
+            arrow_fields.push(group1_struct);
+
+            let leaf3_field = Field::new("leaf3", DataType::Int64, false);
+            arrow_fields.push(leaf3_field);
+        }
+
+        let message_type = "
+        message test_schema {
+          REQUIRED GROUP group1 {
+            REQUIRED BOOLEAN leaf1;
+            REQUIRED INT32 leaf2;
+          }
+          REQUIRED INT64 leaf3;
+        }
+        ";
+        let parquet_group_type = parse_message_type(message_type).unwrap();
+
+        let parquet_schema = Rc::new(SchemaDescriptor::new(Rc::new(parquet_group_type)));
+        let converted_arrow_schema =
+            parquet_to_arrow_schema(parquet_schema.clone()).unwrap();
+        let converted_fields = converted_arrow_schema.fields();
+
+        assert_eq!(arrow_fields.len(), converted_fields.len());
+        for i in 0..arrow_fields.len() {
+            assert_eq!(arrow_fields[i], converted_fields[i]);
+        }
+    }
+
+    #[test]
+    fn test_nested_schema_partial() {
+        let mut arrow_fields = Vec::new();
+        {
+            let group1_fields = vec![Field::new("leaf1", DataType::Int64, false)];
+            let group1 = Field::new("group1", DataType::Struct(group1_fields), false);
+            arrow_fields.push(group1);
+
+            let group2_fields = vec![Field::new("leaf4", DataType::Int64, false)];
+            let group2 = Field::new("group2", DataType::Struct(group2_fields), false);
+            arrow_fields.push(group2);
+
+            arrow_fields.push(Field::new("leaf5", DataType::Int64, false));
+        }
+
+        let message_type = "
+        message test_schema {
+          REQUIRED GROUP group1 {
+            REQUIRED INT64 leaf1;
+            REQUIRED INT64 leaf2;
+          }
+          REQUIRED  GROUP group2 {
+            REQUIRED INT64 leaf3;
+            REQUIRED INT64 leaf4;
+          }
+          REQUIRED INT64 leaf5;
+        }
+        ";
+        let parquet_group_type = parse_message_type(message_type).unwrap();
+
+        // Expected partial arrow schema (columns 0, 3, 4):
+        // required group group1 {
+        //   required int64 leaf1;
+        // }
+        // required group group2 {
+        //   required int64 leaf4;
+        // }
+        // required int64 leaf5;
+
+        let parquet_schema = Rc::new(SchemaDescriptor::new(Rc::new(parquet_group_type)));
+        let converted_arrow_schema =
+            parquet_to_arrow_schema_by_columns(parquet_schema.clone(), vec![0, 3, 4])
+                .unwrap();
+        let converted_fields = converted_arrow_schema.fields();
+
+        assert_eq!(arrow_fields.len(), converted_fields.len());
+        for i in 0..arrow_fields.len() {
+            assert_eq!(arrow_fields[i], converted_fields[i]);
+        }
+    }
+
+    #[test]
+    fn test_nested_schema_partial_ordering() {
+        let mut arrow_fields = Vec::new();
+        {
+            let group2_fields = vec![Field::new("leaf4", DataType::Int64, false)];
+            let group2 = Field::new("group2", DataType::Struct(group2_fields), false);
+            arrow_fields.push(group2);
+
+            arrow_fields.push(Field::new("leaf5", DataType::Int64, false));
+
+            let group1_fields = vec![Field::new("leaf1", DataType::Int64, false)];
+            let group1 = Field::new("group1", DataType::Struct(group1_fields), false);
+            arrow_fields.push(group1);
+        }
+
+        let message_type = "
+        message test_schema {
+          REQUIRED GROUP group1 {
+            REQUIRED INT64 leaf1;
+            REQUIRED INT64 leaf2;
+          }
+          REQUIRED  GROUP group2 {
+            REQUIRED INT64 leaf3;
+            REQUIRED INT64 leaf4;
+          }
+          REQUIRED INT64 leaf5;
+        }
+        ";
+        let parquet_group_type = parse_message_type(message_type).unwrap();
+
+        // Expected partial arrow schema (columns 3, 4, 0):
+        // required group group1 {
+        //   required int64 leaf1;
+        // }
+        // required group group2 {
+        //   required int64 leaf4;
+        // }
+        // required int64 leaf5;
+
+        let parquet_schema = Rc::new(SchemaDescriptor::new(Rc::new(parquet_group_type)));
+        let converted_arrow_schema =
+            parquet_to_arrow_schema_by_columns(parquet_schema.clone(), vec![3, 4, 0])
+                .unwrap();
+        let converted_fields = converted_arrow_schema.fields();
+
+        assert_eq!(arrow_fields.len(), converted_fields.len());
+        for i in 0..arrow_fields.len() {
+            assert_eq!(arrow_fields[i], converted_fields[i]);
+        }
+    }
+
+    #[test]
+    fn test_repeated_nested_schema() {
+        let mut arrow_fields = Vec::new();
+        {
+            arrow_fields.push(Field::new("leaf1", DataType::Int32, true));
+
+            let inner_group_list = Field::new(
+                "innerGroup",
+                DataType::List(Box::new(DataType::Struct(vec![Field::new(
+                    "leaf3",
+                    DataType::Int32,
+                    true,
+                )]))),
+                true,
+            );
+
+            let outer_group_list = Field::new(
+                "outerGroup",
+                DataType::List(Box::new(DataType::Struct(vec![
+                    Field::new("leaf2", DataType::Int32, true),
+                    inner_group_list,
+                ]))),
+                true,
+            );
+            arrow_fields.push(outer_group_list);
+        }
+
+        let message_type = "
+        message test_schema {
+          OPTIONAL INT32 leaf1;
+          REPEATED GROUP outerGroup {
+            OPTIONAL INT32 leaf2;
+            REPEATED GROUP innerGroup {
+              OPTIONAL INT32 leaf3;
+            }
+          }
+        }
+        ";
+        let parquet_group_type = parse_message_type(message_type).unwrap();
+
+        let parquet_schema = Rc::new(SchemaDescriptor::new(Rc::new(parquet_group_type)));
+        let converted_arrow_schema =
+            parquet_to_arrow_schema(parquet_schema.clone()).unwrap();
+        let converted_fields = converted_arrow_schema.fields();
+
+        assert_eq!(arrow_fields.len(), converted_fields.len());
+        for i in 0..arrow_fields.len() {
+            assert_eq!(arrow_fields[i], converted_fields[i]);
+        }
+    }
+}
diff --git a/rust/parquet/src/record/api.rs b/rust/parquet/src/record/api.rs
new file mode 100644
index 0000000000000..87f88b376478b
--- /dev/null
+++ b/rust/parquet/src/record/api.rs
@@ -0,0 +1,1449 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Contains Row enum that is used to represent record in Rust.
+
+use std::fmt;
+
+use chrono::{Local, TimeZone};
+use num_bigint::{BigInt, Sign};
+
+use crate::basic::{LogicalType, Type as PhysicalType};
+use crate::data_type::{ByteArray, Decimal, Int96};
+use crate::errors::{ParquetError, Result};
+use crate::schema::types::ColumnDescPtr;
+
+/// Macro as a shortcut to generate 'not yet implemented' panic error.
+macro_rules! nyi {
+    ($column_descr:ident, $value:ident) => {{
+        unimplemented!(
+            "Conversion for physical type {}, logical type {}, value {:?}",
+            $column_descr.physical_type(),
+            $column_descr.logical_type(),
+            $value
+        );
+    }};
+}
+
+/// `Row` represents a nested Parquet record.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Row {
+    fields: Vec<(String, Field)>,
+}
+
+impl Row {
+    /// Get the number of fields in this row.
+    pub fn len(&self) -> usize {
+        self.fields.len()
+    }
+}
+
+/// Trait for type-safe convenient access to fields within a Row.
+pub trait RowAccessor {
+    fn get_bool(&self, i: usize) -> Result<bool>;
+    fn get_byte(&self, i: usize) -> Result<i8>;
+    fn get_short(&self, i: usize) -> Result<i16>;
+    fn get_int(&self, i: usize) -> Result<i32>;
+    fn get_long(&self, i: usize) -> Result<i64>;
+    fn get_ubyte(&self, i: usize) -> Result<u8>;
+    fn get_ushort(&self, i: usize) -> Result<u16>;
+    fn get_uint(&self, i: usize) -> Result<u32>;
+    fn get_ulong(&self, i: usize) -> Result<u64>;
+    fn get_float(&self, i: usize) -> Result<f32>;
+    fn get_double(&self, i: usize) -> Result<f64>;
+    fn get_timestamp(&self, i: usize) -> Result<u64>;
+    fn get_decimal(&self, i: usize) -> Result<&Decimal>;
+    fn get_string(&self, i: usize) -> Result<&String>;
+    fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
+    fn get_group(&self, i: usize) -> Result<&Row>;
+    fn get_list(&self, i: usize) -> Result<&List>;
+    fn get_map(&self, i: usize) -> Result<&Map>;
+}
+
+/// Macro to generate type-safe get_xxx methods for primitive types,
+/// e.g. `get_bool`, `get_short`.
+macro_rules! row_primitive_accessor {
+  ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
+    fn $METHOD(&self, i: usize) -> Result<$TY> {
+      match self.fields[i].1 {
+        Field::$VARIANT(v) => Ok(v),
+        _ => Err(general_err!("Cannot access {} as {}",
+          self.fields[i].1.get_type_name(), stringify!($VARIANT)))
+      }
+    }
+  }
+}
+
+/// Macro to generate type-safe get_xxx methods for reference types,
+/// e.g. `get_list`, `get_map`.
+macro_rules! row_complex_accessor {
+  ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
+    fn $METHOD(&self, i: usize) -> Result<&$TY> {
+      match self.fields[i].1 {
+        Field::$VARIANT(ref v) => Ok(v),
+        _ => Err(general_err!("Cannot access {} as {}",
+          self.fields[i].1.get_type_name(), stringify!($VARIANT)))
+      }
+    }
+  }
+}
+
+impl RowAccessor for Row {
+    row_primitive_accessor!(get_bool, Bool, bool);
+
+    row_primitive_accessor!(get_byte, Byte, i8);
+
+    row_primitive_accessor!(get_short, Short, i16);
+
+    row_primitive_accessor!(get_int, Int, i32);
+
+    row_primitive_accessor!(get_long, Long, i64);
+
+    row_primitive_accessor!(get_ubyte, UByte, u8);
+
+    row_primitive_accessor!(get_ushort, UShort, u16);
+
+    row_primitive_accessor!(get_uint, UInt, u32);
+
+    row_primitive_accessor!(get_ulong, ULong, u64);
+
+    row_primitive_accessor!(get_float, Float, f32);
+
+    row_primitive_accessor!(get_double, Double, f64);
+
+    row_primitive_accessor!(get_timestamp, Timestamp, u64);
+
+    row_complex_accessor!(get_decimal, Decimal, Decimal);
+
+    row_complex_accessor!(get_string, Str, String);
+
+    row_complex_accessor!(get_bytes, Bytes, ByteArray);
+
+    row_complex_accessor!(get_group, Group, Row);
+
+    row_complex_accessor!(get_list, ListInternal, List);
+
+    row_complex_accessor!(get_map, MapInternal, Map);
+}
+
+/// Constructs a `Row` from the list of `fields` and returns it.
+#[inline]
+pub fn make_row(fields: Vec<(String, Field)>) -> Row {
+    Row { fields }
+}
+
+impl fmt::Display for Row {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{{")?;
+        for (i, &(ref key, ref value)) in self.fields.iter().enumerate() {
+            key.fmt(f)?;
+            write!(f, ": ")?;
+            value.fmt(f)?;
+            if i < self.fields.len() - 1 {
+                write!(f, ", ")?;
+            }
+        }
+        write!(f, "}}")
+    }
+}
+
+/// `List` represents a list which contains an array of elements.
+#[derive(Clone, Debug, PartialEq)]
+pub struct List {
+    elements: Vec<Field>,
+}
+
+impl List {
+    /// Get the number of fields in this row
+    pub fn len(&self) -> usize {
+        self.elements.len()
+    }
+}
+
+/// Constructs a `List` from the list of `fields` and returns it.
+#[inline]
+pub fn make_list(elements: Vec<Field>) -> List {
+    List { elements }
+}
+
+/// Trait for type-safe access of an index for a `List`.
+/// Note that the get_XXX methods do not do bound checking.
+pub trait ListAccessor {
+    fn get_bool(&self, i: usize) -> Result<bool>;
+    fn get_byte(&self, i: usize) -> Result<i8>;
+    fn get_short(&self, i: usize) -> Result<i16>;
+    fn get_int(&self, i: usize) -> Result<i32>;
+    fn get_long(&self, i: usize) -> Result<i64>;
+    fn get_ubyte(&self, i: usize) -> Result<u8>;
+    fn get_ushort(&self, i: usize) -> Result<u16>;
+    fn get_uint(&self, i: usize) -> Result<u32>;
+    fn get_ulong(&self, i: usize) -> Result<u64>;
+    fn get_float(&self, i: usize) -> Result<f32>;
+    fn get_double(&self, i: usize) -> Result<f64>;
+    fn get_timestamp(&self, i: usize) -> Result<u64>;
+    fn get_decimal(&self, i: usize) -> Result<&Decimal>;
+    fn get_string(&self, i: usize) -> Result<&String>;
+    fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
+    fn get_group(&self, i: usize) -> Result<&Row>;
+    fn get_list(&self, i: usize) -> Result<&List>;
+    fn get_map(&self, i: usize) -> Result<&Map>;
+}
+
+/// Macro to generate type-safe get_xxx methods for primitive types,
+/// e.g. get_bool, get_short
+macro_rules! list_primitive_accessor {
+  ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
+    fn $METHOD(&self, i: usize) -> Result<$TY> {
+      match self.elements[i] {
+        Field::$VARIANT(v) => Ok(v),
+        _ => Err(general_err!(
+          "Cannot access {} as {}",
+          self.elements[i].get_type_name(), stringify!($VARIANT))
+        )
+      }
+    }
+  }
+}
+
+/// Macro to generate type-safe get_xxx methods for reference types
+/// e.g. get_list, get_map
+macro_rules! list_complex_accessor {
+  ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
+    fn $METHOD(&self, i: usize) -> Result<&$TY> {
+      match self.elements[i] {
+        Field::$VARIANT(ref v) => Ok(v),
+        _ => Err(general_err!(
+          "Cannot access {} as {}",
+          self.elements[i].get_type_name(), stringify!($VARIANT))
+        )
+      }
+    }
+  }
+}
+
+impl ListAccessor for List {
+    list_primitive_accessor!(get_bool, Bool, bool);
+
+    list_primitive_accessor!(get_byte, Byte, i8);
+
+    list_primitive_accessor!(get_short, Short, i16);
+
+    list_primitive_accessor!(get_int, Int, i32);
+
+    list_primitive_accessor!(get_long, Long, i64);
+
+    list_primitive_accessor!(get_ubyte, UByte, u8);
+
+    list_primitive_accessor!(get_ushort, UShort, u16);
+
+    list_primitive_accessor!(get_uint, UInt, u32);
+
+    list_primitive_accessor!(get_ulong, ULong, u64);
+
+    list_primitive_accessor!(get_float, Float, f32);
+
+    list_primitive_accessor!(get_double, Double, f64);
+
+    list_primitive_accessor!(get_timestamp, Timestamp, u64);
+
+    list_complex_accessor!(get_decimal, Decimal, Decimal);
+
+    list_complex_accessor!(get_string, Str, String);
+
+    list_complex_accessor!(get_bytes, Bytes, ByteArray);
+
+    list_complex_accessor!(get_group, Group, Row);
+
+    list_complex_accessor!(get_list, ListInternal, List);
+
+    list_complex_accessor!(get_map, MapInternal, Map);
+}
+
+/// `Map` represents a map which contains an list of key->value pairs.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Map {
+    entries: Vec<(Field, Field)>,
+}
+
+impl Map {
+    /// Get the number of fields in this row
+    pub fn len(&self) -> usize {
+        self.entries.len()
+    }
+}
+
+/// Constructs a `Map` from the list of `entries` and returns it.
+#[inline]
+pub fn make_map(entries: Vec<(Field, Field)>) -> Map {
+    Map { entries }
+}
+
+/// Trait for type-safe access of an index for a `Map`
+pub trait MapAccessor {
+    fn get_keys<'a>(&'a self) -> Box<ListAccessor + 'a>;
+    fn get_values<'a>(&'a self) -> Box<ListAccessor + 'a>;
+}
+
+struct MapList<'a> {
+    elements: Vec<&'a Field>,
+}
+
+/// Macro to generate type-safe get_xxx methods for primitive types,
+/// e.g. get_bool, get_short
+macro_rules! map_list_primitive_accessor {
+  ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
+    fn $METHOD(&self, i: usize) -> Result<$TY> {
+      match self.elements[i] {
+        Field::$VARIANT(v) => Ok(*v),
+        _ => Err(general_err!(
+          "Cannot access {} as {}",
+          self.elements[i].get_type_name(), stringify!($VARIANT))
+        )
+      }
+    }
+  }
+}
+
+impl<'a> ListAccessor for MapList<'a> {
+    map_list_primitive_accessor!(get_bool, Bool, bool);
+
+    map_list_primitive_accessor!(get_byte, Byte, i8);
+
+    map_list_primitive_accessor!(get_short, Short, i16);
+
+    map_list_primitive_accessor!(get_int, Int, i32);
+
+    map_list_primitive_accessor!(get_long, Long, i64);
+
+    map_list_primitive_accessor!(get_ubyte, UByte, u8);
+
+    map_list_primitive_accessor!(get_ushort, UShort, u16);
+
+    map_list_primitive_accessor!(get_uint, UInt, u32);
+
+    map_list_primitive_accessor!(get_ulong, ULong, u64);
+
+    map_list_primitive_accessor!(get_float, Float, f32);
+
+    map_list_primitive_accessor!(get_double, Double, f64);
+
+    map_list_primitive_accessor!(get_timestamp, Timestamp, u64);
+
+    list_complex_accessor!(get_decimal, Decimal, Decimal);
+
+    list_complex_accessor!(get_string, Str, String);
+
+    list_complex_accessor!(get_bytes, Bytes, ByteArray);
+
+    list_complex_accessor!(get_group, Group, Row);
+
+    list_complex_accessor!(get_list, ListInternal, List);
+
+    list_complex_accessor!(get_map, MapInternal, Map);
+}
+
+impl MapAccessor for Map {
+    fn get_keys<'a>(&'a self) -> Box<ListAccessor + 'a> {
+        let map_list = MapList {
+            elements: self.entries.iter().map(|v| &v.0).collect(),
+        };
+        Box::new(map_list)
+    }
+
+    fn get_values<'a>(&'a self) -> Box<ListAccessor + 'a> {
+        let map_list = MapList {
+            elements: self.entries.iter().map(|v| &v.1).collect(),
+        };
+        Box::new(map_list)
+    }
+}
+
+/// API to represent a single field in a `Row`.
+#[derive(Clone, Debug, PartialEq)]
+pub enum Field {
+    // Primitive types
+    /// Null value.
+    Null,
+    /// Boolean value (`true`, `false`).
+    Bool(bool),
+    /// Signed integer INT_8.
+    Byte(i8),
+    /// Signed integer INT_16.
+    Short(i16),
+    /// Signed integer INT_32.
+    Int(i32),
+    /// Signed integer INT_64.
+    Long(i64),
+    // Unsigned integer UINT_8.
+    UByte(u8),
+    // Unsigned integer UINT_16.
+    UShort(u16),
+    // Unsigned integer UINT_32.
+    UInt(u32),
+    // Unsigned integer UINT_64.
+    ULong(u64),
+    /// IEEE 32-bit floating point value.
+    Float(f32),
+    /// IEEE 64-bit floating point value.
+    Double(f64),
+    /// Decimal value.
+    Decimal(Decimal),
+    /// UTF-8 encoded character string.
+    Str(String),
+    /// General binary value.
+    Bytes(ByteArray),
+    /// Date without a time of day, stores the number of days from the
+    /// Unix epoch, 1 January 1970.
+    Date(u32),
+    /// Milliseconds from the Unix epoch, 1 January 1970.
+    Timestamp(u64),
+
+    // ----------------------------------------------------------------------
+    // Complex types
+    /// Struct, child elements are tuples of field-value pairs.
+    Group(Row),
+    /// List of elements.
+    ListInternal(List),
+    /// List of key-value pairs.
+    MapInternal(Map),
+}
+
+impl Field {
+    /// Get the type name.
+    fn get_type_name(&self) -> &'static str {
+        match *self {
+            Field::Null => "Null",
+            Field::Bool(_) => "Bool",
+            Field::Byte(_) => "Byte",
+            Field::Short(_) => "Short",
+            Field::Int(_) => "Int",
+            Field::Long(_) => "Long",
+            Field::UByte(_) => "UByte",
+            Field::UShort(_) => "UShort",
+            Field::UInt(_) => "UInt",
+            Field::ULong(_) => "ULong",
+            Field::Float(_) => "Float",
+            Field::Double(_) => "Double",
+            Field::Decimal(_) => "Decimal",
+            Field::Date(_) => "Date",
+            Field::Str(_) => "Str",
+            Field::Bytes(_) => "Bytes",
+            Field::Timestamp(_) => "Timestamp",
+            Field::Group(_) => "Group",
+            Field::ListInternal(_) => "ListInternal",
+            Field::MapInternal(_) => "MapInternal",
+        }
+    }
+
+    /// Determines if this Row represents a primitive value.
+    pub fn is_primitive(&self) -> bool {
+        match *self {
+            Field::Group(_) => false,
+            Field::ListInternal(_) => false,
+            Field::MapInternal(_) => false,
+            _ => true,
+        }
+    }
+
+    /// Converts Parquet BOOLEAN type with logical type into `bool` value.
+    #[inline]
+    pub fn convert_bool(_descr: &ColumnDescPtr, value: bool) -> Self {
+        Field::Bool(value)
+    }
+
+    /// Converts Parquet INT32 type with logical type into `i32` value.
+    #[inline]
+    pub fn convert_int32(descr: &ColumnDescPtr, value: i32) -> Self {
+        match descr.logical_type() {
+            LogicalType::INT_8 => Field::Byte(value as i8),
+            LogicalType::INT_16 => Field::Short(value as i16),
+            LogicalType::INT_32 | LogicalType::NONE => Field::Int(value),
+            LogicalType::UINT_8 => Field::UByte(value as u8),
+            LogicalType::UINT_16 => Field::UShort(value as u16),
+            LogicalType::UINT_32 => Field::UInt(value as u32),
+            LogicalType::DATE => Field::Date(value as u32),
+            LogicalType::DECIMAL => Field::Decimal(Decimal::from_i32(
+                value,
+                descr.type_precision(),
+                descr.type_scale(),
+            )),
+            _ => nyi!(descr, value),
+        }
+    }
+
+    /// Converts Parquet INT64 type with logical type into `i64` value.
+    #[inline]
+    pub fn convert_int64(descr: &ColumnDescPtr, value: i64) -> Self {
+        match descr.logical_type() {
+            LogicalType::INT_64 | LogicalType::NONE => Field::Long(value),
+            LogicalType::UINT_64 => Field::ULong(value as u64),
+            LogicalType::TIMESTAMP_MILLIS => Field::Timestamp(value as u64),
+            LogicalType::DECIMAL => Field::Decimal(Decimal::from_i64(
+                value,
+                descr.type_precision(),
+                descr.type_scale(),
+            )),
+            _ => nyi!(descr, value),
+        }
+    }
+
+    /// Converts Parquet INT96 (nanosecond timestamps) type and logical type into
+    /// `Timestamp` value.
+    #[inline]
+    pub fn convert_int96(_descr: &ColumnDescPtr, value: Int96) -> Self {
+        const JULIAN_DAY_OF_EPOCH: i64 = 2_440_588;
+        const SECONDS_PER_DAY: i64 = 86_400;
+        const MILLIS_PER_SECOND: i64 = 1_000;
+
+        let day = value.data()[2] as i64;
+        let nanoseconds = ((value.data()[1] as i64) << 32) + value.data()[0] as i64;
+        let seconds = (day - JULIAN_DAY_OF_EPOCH) * SECONDS_PER_DAY;
+        let millis = seconds * MILLIS_PER_SECOND + nanoseconds / 1_000_000;
+
+        // TODO: Add support for negative milliseconds.
+        // Chrono library does not handle negative timestamps, but we could probably write
+        // something similar to java.util.Date and java.util.Calendar.
+        if millis < 0 {
+            panic!(
+                "Expected non-negative milliseconds when converting Int96, found {}",
+                millis
+            );
+        }
+
+        Field::Timestamp(millis as u64)
+    }
+
+    /// Converts Parquet FLOAT type with logical type into `f32` value.
+    #[inline]
+    pub fn convert_float(_descr: &ColumnDescPtr, value: f32) -> Self {
+        Field::Float(value)
+    }
+
+    /// Converts Parquet DOUBLE type with logical type into `f64` value.
+    #[inline]
+    pub fn convert_double(_descr: &ColumnDescPtr, value: f64) -> Self {
+        Field::Double(value)
+    }
+
+    /// Converts Parquet BYTE_ARRAY type with logical type into either UTF8 string or
+    /// array of bytes.
+    #[inline]
+    pub fn convert_byte_array(descr: &ColumnDescPtr, value: ByteArray) -> Self {
+        match descr.physical_type() {
+            PhysicalType::BYTE_ARRAY => match descr.logical_type() {
+                LogicalType::UTF8 | LogicalType::ENUM | LogicalType::JSON => {
+                    let value =
+                        unsafe { String::from_utf8_unchecked(value.data().to_vec()) };
+                    Field::Str(value)
+                }
+                LogicalType::BSON | LogicalType::NONE => Field::Bytes(value),
+                LogicalType::DECIMAL => Field::Decimal(Decimal::from_bytes(
+                    value,
+                    descr.type_precision(),
+                    descr.type_scale(),
+                )),
+                _ => nyi!(descr, value),
+            },
+            PhysicalType::FIXED_LEN_BYTE_ARRAY => match descr.logical_type() {
+                LogicalType::DECIMAL => Field::Decimal(Decimal::from_bytes(
+                    value,
+                    descr.type_precision(),
+                    descr.type_scale(),
+                )),
+                LogicalType::NONE => Field::Bytes(value),
+                _ => nyi!(descr, value),
+            },
+            _ => nyi!(descr, value),
+        }
+    }
+}
+
+impl fmt::Display for Field {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Field::Null => write!(f, "null"),
+            Field::Bool(value) => write!(f, "{}", value),
+            Field::Byte(value) => write!(f, "{}", value),
+            Field::Short(value) => write!(f, "{}", value),
+            Field::Int(value) => write!(f, "{}", value),
+            Field::Long(value) => write!(f, "{}", value),
+            Field::UByte(value) => write!(f, "{}", value),
+            Field::UShort(value) => write!(f, "{}", value),
+            Field::UInt(value) => write!(f, "{}", value),
+            Field::ULong(value) => write!(f, "{}", value),
+            Field::Float(value) => {
+                if value > 1e19 || value < 1e-15 {
+                    write!(f, "{:E}", value)
+                } else {
+                    write!(f, "{:?}", value)
+                }
+            }
+            Field::Double(value) => {
+                if value > 1e19 || value < 1e-15 {
+                    write!(f, "{:E}", value)
+                } else {
+                    write!(f, "{:?}", value)
+                }
+            }
+            Field::Decimal(ref value) => {
+                write!(f, "{}", convert_decimal_to_string(value))
+            }
+            Field::Str(ref value) => write!(f, "\"{}\"", value),
+            Field::Bytes(ref value) => write!(f, "{:?}", value.data()),
+            Field::Date(value) => write!(f, "{}", convert_date_to_string(value)),
+            Field::Timestamp(value) => {
+                write!(f, "{}", convert_timestamp_to_string(value))
+            }
+            Field::Group(ref fields) => write!(f, "{}", fields),
+            Field::ListInternal(ref list) => {
+                let elems = &list.elements;
+                write!(f, "[")?;
+                for (i, field) in elems.iter().enumerate() {
+                    field.fmt(f)?;
+                    if i < elems.len() - 1 {
+                        write!(f, ", ")?;
+                    }
+                }
+                write!(f, "]")
+            }
+            Field::MapInternal(ref map) => {
+                let entries = &map.entries;
+                write!(f, "{{")?;
+                for (i, &(ref key, ref value)) in entries.iter().enumerate() {
+                    key.fmt(f)?;
+                    write!(f, " -> ")?;
+                    value.fmt(f)?;
+                    if i < entries.len() - 1 {
+                        write!(f, ", ")?;
+                    }
+                }
+                write!(f, "}}")
+            }
+        }
+    }
+}
+
+/// Helper method to convert Parquet date into a string.
+/// Input `value` is a number of days since the epoch in UTC.
+/// Date is displayed in local timezone.
+#[inline]
+fn convert_date_to_string(value: u32) -> String {
+    static NUM_SECONDS_IN_DAY: i64 = 60 * 60 * 24;
+    let dt = Local.timestamp(value as i64 * NUM_SECONDS_IN_DAY, 0).date();
+    format!("{}", dt.format("%Y-%m-%d %:z"))
+}
+
+/// Helper method to convert Parquet timestamp into a string.
+/// Input `value` is a number of milliseconds since the epoch in UTC.
+/// Datetime is displayed in local timezone.
+#[inline]
+fn convert_timestamp_to_string(value: u64) -> String {
+    let dt = Local.timestamp((value / 1000) as i64, 0);
+    format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"))
+}
+
+/// Helper method to convert Parquet decimal into a string.
+/// We assert that `scale >= 0` and `precision > scale`, but this will be enforced
+/// when constructing Parquet schema.
+#[inline]
+fn convert_decimal_to_string(decimal: &Decimal) -> String {
+    assert!(decimal.scale() >= 0 && decimal.precision() > decimal.scale());
+
+    // Specify as signed bytes to resolve sign as part of conversion.
+    let num = BigInt::from_signed_bytes_be(decimal.data());
+
+    // Offset of the first digit in a string.
+    let negative = if num.sign() == Sign::Minus { 1 } else { 0 };
+    let mut num_str = num.to_string();
+    let mut point = num_str.len() as i32 - decimal.scale() - negative;
+
+    // Convert to string form without scientific notation.
+    if point <= 0 {
+        // Zeros need to be prepended to the unscaled value.
+        while point < 0 {
+            num_str.insert(negative as usize, '0');
+            point += 1;
+        }
+        num_str.insert_str(negative as usize, "0.");
+    } else {
+        // No zeroes need to be prepended to the unscaled value, simply insert decimal
+        // point.
+        num_str.insert((point + negative) as usize, '.');
+    }
+
+    num_str
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use chrono;
+    use std::rc::Rc;
+
+    use crate::schema::types::{ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder};
+
+    /// Creates test column descriptor based on provided type parameters.
+    macro_rules! make_column_descr {
+        ($physical_type:expr, $logical_type:expr) => {{
+            let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
+                .with_logical_type($logical_type)
+                .build()
+                .unwrap();
+            Rc::new(ColumnDescriptor::new(
+                Rc::new(tpe),
+                None,
+                0,
+                0,
+                ColumnPath::from("col"),
+            ))
+        }};
+        ($physical_type:expr, $logical_type:expr, $len:expr, $prec:expr, $scale:expr) => {{
+            let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
+                .with_logical_type($logical_type)
+                .with_length($len)
+                .with_precision($prec)
+                .with_scale($scale)
+                .build()
+                .unwrap();
+            Rc::new(ColumnDescriptor::new(
+                Rc::new(tpe),
+                None,
+                0,
+                0,
+                ColumnPath::from("col"),
+            ))
+        }};
+    }
+
+    #[test]
+    fn test_row_convert_bool() {
+        // BOOLEAN value does not depend on logical type
+        let descr = make_column_descr![PhysicalType::BOOLEAN, LogicalType::NONE];
+
+        let row = Field::convert_bool(&descr, true);
+        assert_eq!(row, Field::Bool(true));
+
+        let row = Field::convert_bool(&descr, false);
+        assert_eq!(row, Field::Bool(false));
+    }
+
+    #[test]
+    fn test_row_convert_int32() {
+        let descr = make_column_descr![PhysicalType::INT32, LogicalType::INT_8];
+        let row = Field::convert_int32(&descr, 111);
+        assert_eq!(row, Field::Byte(111));
+
+        let descr = make_column_descr![PhysicalType::INT32, LogicalType::INT_16];
+        let row = Field::convert_int32(&descr, 222);
+        assert_eq!(row, Field::Short(222));
+
+        let descr = make_column_descr![PhysicalType::INT32, LogicalType::INT_32];
+        let row = Field::convert_int32(&descr, 333);
+        assert_eq!(row, Field::Int(333));
+
+        let descr = make_column_descr![PhysicalType::INT32, LogicalType::UINT_8];
+        let row = Field::convert_int32(&descr, -1);
+        assert_eq!(row, Field::UByte(255));
+
+        let descr = make_column_descr![PhysicalType::INT32, LogicalType::UINT_16];
+        let row = Field::convert_int32(&descr, 256);
+        assert_eq!(row, Field::UShort(256));
+
+        let descr = make_column_descr![PhysicalType::INT32, LogicalType::UINT_32];
+        let row = Field::convert_int32(&descr, 1234);
+        assert_eq!(row, Field::UInt(1234));
+
+        let descr = make_column_descr![PhysicalType::INT32, LogicalType::NONE];
+        let row = Field::convert_int32(&descr, 444);
+        assert_eq!(row, Field::Int(444));
+
+        let descr = make_column_descr![PhysicalType::INT32, LogicalType::DATE];
+        let row = Field::convert_int32(&descr, 14611);
+        assert_eq!(row, Field::Date(14611));
+
+        let descr =
+            make_column_descr![PhysicalType::INT32, LogicalType::DECIMAL, 0, 8, 2];
+        let row = Field::convert_int32(&descr, 444);
+        assert_eq!(row, Field::Decimal(Decimal::from_i32(444, 8, 2)));
+    }
+
+    #[test]
+    fn test_row_convert_int64() {
+        let descr = make_column_descr![PhysicalType::INT64, LogicalType::INT_64];
+        let row = Field::convert_int64(&descr, 1111);
+        assert_eq!(row, Field::Long(1111));
+
+        let descr = make_column_descr![PhysicalType::INT64, LogicalType::UINT_64];
+        let row = Field::convert_int64(&descr, 78239823);
+        assert_eq!(row, Field::ULong(78239823));
+
+        let descr =
+            make_column_descr![PhysicalType::INT64, LogicalType::TIMESTAMP_MILLIS];
+        let row = Field::convert_int64(&descr, 1541186529153);
+        assert_eq!(row, Field::Timestamp(1541186529153));
+
+        let descr = make_column_descr![PhysicalType::INT64, LogicalType::NONE];
+        let row = Field::convert_int64(&descr, 2222);
+        assert_eq!(row, Field::Long(2222));
+
+        let descr =
+            make_column_descr![PhysicalType::INT64, LogicalType::DECIMAL, 0, 8, 2];
+        let row = Field::convert_int64(&descr, 3333);
+        assert_eq!(row, Field::Decimal(Decimal::from_i64(3333, 8, 2)));
+    }
+
+    #[test]
+    fn test_row_convert_int96() {
+        // INT96 value does not depend on logical type
+        let descr = make_column_descr![PhysicalType::INT96, LogicalType::NONE];
+
+        let value = Int96::from(vec![0, 0, 2454923]);
+        let row = Field::convert_int96(&descr, value);
+        assert_eq!(row, Field::Timestamp(1238544000000));
+
+        let value = Int96::from(vec![4165425152, 13, 2454923]);
+        let row = Field::convert_int96(&descr, value);
+        assert_eq!(row, Field::Timestamp(1238544060000));
+    }
+
+    #[test]
+    #[should_panic(expected = "Expected non-negative milliseconds when converting Int96")]
+    fn test_row_convert_int96_invalid() {
+        // INT96 value does not depend on logical type
+        let descr = make_column_descr![PhysicalType::INT96, LogicalType::NONE];
+
+        let value = Int96::from(vec![0, 0, 0]);
+        Field::convert_int96(&descr, value);
+    }
+
+    #[test]
+    fn test_row_convert_float() {
+        // FLOAT value does not depend on logical type
+        let descr = make_column_descr![PhysicalType::FLOAT, LogicalType::NONE];
+        let row = Field::convert_float(&descr, 2.31);
+        assert_eq!(row, Field::Float(2.31));
+    }
+
+    #[test]
+    fn test_row_convert_double() {
+        // DOUBLE value does not depend on logical type
+        let descr = make_column_descr![PhysicalType::DOUBLE, LogicalType::NONE];
+        let row = Field::convert_double(&descr, 1.56);
+        assert_eq!(row, Field::Double(1.56));
+    }
+
+    #[test]
+    fn test_row_convert_byte_array() {
+        // UTF8
+        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, LogicalType::UTF8];
+        let value = ByteArray::from(vec![b'A', b'B', b'C', b'D']);
+        let row = Field::convert_byte_array(&descr, value);
+        assert_eq!(row, Field::Str("ABCD".to_string()));
+
+        // ENUM
+        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, LogicalType::ENUM];
+        let value = ByteArray::from(vec![b'1', b'2', b'3']);
+        let row = Field::convert_byte_array(&descr, value);
+        assert_eq!(row, Field::Str("123".to_string()));
+
+        // JSON
+        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, LogicalType::JSON];
+        let value = ByteArray::from(vec![b'{', b'"', b'a', b'"', b':', b'1', b'}']);
+        let row = Field::convert_byte_array(&descr, value);
+        assert_eq!(row, Field::Str("{\"a\":1}".to_string()));
+
+        // NONE
+        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, LogicalType::NONE];
+        let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
+        let row = Field::convert_byte_array(&descr, value.clone());
+        assert_eq!(row, Field::Bytes(value));
+
+        // BSON
+        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, LogicalType::BSON];
+        let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
+        let row = Field::convert_byte_array(&descr, value.clone());
+        assert_eq!(row, Field::Bytes(value));
+
+        // DECIMAL
+        let descr =
+            make_column_descr![PhysicalType::BYTE_ARRAY, LogicalType::DECIMAL, 0, 8, 2];
+        let value = ByteArray::from(vec![207, 200]);
+        let row = Field::convert_byte_array(&descr, value.clone());
+        assert_eq!(row, Field::Decimal(Decimal::from_bytes(value, 8, 2)));
+
+        // DECIMAL (FIXED_LEN_BYTE_ARRAY)
+        let descr = make_column_descr![
+            PhysicalType::FIXED_LEN_BYTE_ARRAY,
+            LogicalType::DECIMAL,
+            8,
+            17,
+            5
+        ];
+        let value = ByteArray::from(vec![0, 0, 0, 0, 0, 4, 147, 224]);
+        let row = Field::convert_byte_array(&descr, value.clone());
+        assert_eq!(row, Field::Decimal(Decimal::from_bytes(value, 17, 5)));
+
+        // NONE (FIXED_LEN_BYTE_ARRAY)
+        let descr = make_column_descr![
+            PhysicalType::FIXED_LEN_BYTE_ARRAY,
+            LogicalType::NONE,
+            6,
+            0,
+            0
+        ];
+        let value = ByteArray::from(vec![1, 2, 3, 4, 5, 6]);
+        let row = Field::convert_byte_array(&descr, value.clone());
+        assert_eq!(row, Field::Bytes(value));
+    }
+
+    #[test]
+    fn test_convert_date_to_string() {
+        fn check_date_conversion(y: u32, m: u32, d: u32) {
+            let datetime = chrono::NaiveDate::from_ymd(y as i32, m, d).and_hms(0, 0, 0);
+            let dt = Local.from_utc_datetime(&datetime);
+            let res = convert_date_to_string((dt.timestamp() / 60 / 60 / 24) as u32);
+            let exp = format!("{}", dt.format("%Y-%m-%d %:z"));
+            assert_eq!(res, exp);
+        }
+
+        check_date_conversion(2010, 01, 02);
+        check_date_conversion(2014, 05, 01);
+        check_date_conversion(2016, 02, 29);
+        check_date_conversion(2017, 09, 12);
+        check_date_conversion(2018, 03, 31);
+    }
+
+    #[test]
+    fn test_convert_timestamp_to_string() {
+        fn check_datetime_conversion(y: u32, m: u32, d: u32, h: u32, mi: u32, s: u32) {
+            let datetime = chrono::NaiveDate::from_ymd(y as i32, m, d).and_hms(h, mi, s);
+            let dt = Local.from_utc_datetime(&datetime);
+            let res = convert_timestamp_to_string(dt.timestamp_millis() as u64);
+            let exp = format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"));
+            assert_eq!(res, exp);
+        }
+
+        check_datetime_conversion(2010, 01, 02, 13, 12, 54);
+        check_datetime_conversion(2011, 01, 03, 08, 23, 01);
+        check_datetime_conversion(2012, 04, 05, 11, 06, 32);
+        check_datetime_conversion(2013, 05, 12, 16, 38, 00);
+        check_datetime_conversion(2014, 11, 28, 21, 15, 12);
+    }
+
+    #[test]
+    fn test_convert_float_to_string() {
+        assert_eq!(format!("{}", Field::Float(1.0)), "1.0");
+        assert_eq!(format!("{}", Field::Float(9.63)), "9.63");
+        assert_eq!(format!("{}", Field::Float(1e-15)), "0.000000000000001");
+        assert_eq!(format!("{}", Field::Float(1e-16)), "1E-16");
+        assert_eq!(format!("{}", Field::Float(1e19)), "10000000000000000000.0");
+        assert_eq!(format!("{}", Field::Float(1e20)), "1E20");
+        assert_eq!(format!("{}", Field::Float(1.7976931E30)), "1.7976931E30");
+        assert_eq!(format!("{}", Field::Float(-1.7976931E30)), "-1.7976931E30");
+    }
+
+    #[test]
+    fn test_convert_double_to_string() {
+        assert_eq!(format!("{}", Field::Double(1.0)), "1.0");
+        assert_eq!(format!("{}", Field::Double(9.63)), "9.63");
+        assert_eq!(format!("{}", Field::Double(1e-15)), "0.000000000000001");
+        assert_eq!(format!("{}", Field::Double(1e-16)), "1E-16");
+        assert_eq!(format!("{}", Field::Double(1e19)), "10000000000000000000.0");
+        assert_eq!(format!("{}", Field::Double(1e20)), "1E20");
+        assert_eq!(
+            format!("{}", Field::Double(1.79769313486E308)),
+            "1.79769313486E308"
+        );
+        assert_eq!(
+            format!("{}", Field::Double(-1.79769313486E308)),
+            "-1.79769313486E308"
+        );
+    }
+
+    #[test]
+    fn test_convert_decimal_to_string() {
+        // Helper method to compare decimal
+        fn check_decimal(bytes: Vec<u8>, precision: i32, scale: i32, res: &str) {
+            let decimal = Decimal::from_bytes(ByteArray::from(bytes), precision, scale);
+            assert_eq!(convert_decimal_to_string(&decimal), res);
+        }
+
+        // This example previously used to fail in some engines
+        check_decimal(
+            vec![0, 0, 0, 0, 0, 0, 0, 0, 13, 224, 182, 179, 167, 100, 0, 0],
+            38,
+            18,
+            "1.000000000000000000",
+        );
+        check_decimal(
+            vec![
+                249, 233, 247, 16, 185, 192, 202, 223, 215, 165, 192, 166, 67, 72,
+            ],
+            36,
+            28,
+            "-12344.0242342304923409234234293432",
+        );
+        check_decimal(vec![0, 0, 0, 0, 0, 4, 147, 224], 17, 5, "3.00000");
+        check_decimal(vec![0, 0, 0, 0, 1, 201, 195, 140], 18, 2, "300000.12");
+        check_decimal(vec![207, 200], 10, 2, "-123.44");
+        check_decimal(vec![207, 200], 10, 8, "-0.00012344");
+    }
+
+    #[test]
+    fn test_row_display() {
+        // Primitive types
+        assert_eq!(format!("{}", Field::Null), "null");
+        assert_eq!(format!("{}", Field::Bool(true)), "true");
+        assert_eq!(format!("{}", Field::Bool(false)), "false");
+        assert_eq!(format!("{}", Field::Byte(1)), "1");
+        assert_eq!(format!("{}", Field::Short(2)), "2");
+        assert_eq!(format!("{}", Field::Int(3)), "3");
+        assert_eq!(format!("{}", Field::Long(4)), "4");
+        assert_eq!(format!("{}", Field::UByte(1)), "1");
+        assert_eq!(format!("{}", Field::UShort(2)), "2");
+        assert_eq!(format!("{}", Field::UInt(3)), "3");
+        assert_eq!(format!("{}", Field::ULong(4)), "4");
+        assert_eq!(format!("{}", Field::Float(5.0)), "5.0");
+        assert_eq!(format!("{}", Field::Float(5.1234)), "5.1234");
+        assert_eq!(format!("{}", Field::Double(6.0)), "6.0");
+        assert_eq!(format!("{}", Field::Double(6.1234)), "6.1234");
+        assert_eq!(format!("{}", Field::Str("abc".to_string())), "\"abc\"");
+        assert_eq!(
+            format!("{}", Field::Bytes(ByteArray::from(vec![1, 2, 3]))),
+            "[1, 2, 3]"
+        );
+        assert_eq!(
+            format!("{}", Field::Date(14611)),
+            convert_date_to_string(14611)
+        );
+        assert_eq!(
+            format!("{}", Field::Timestamp(1262391174000)),
+            convert_timestamp_to_string(1262391174000)
+        );
+        assert_eq!(
+            format!("{}", Field::Decimal(Decimal::from_i32(4, 8, 2))),
+            convert_decimal_to_string(&Decimal::from_i32(4, 8, 2))
+        );
+
+        // Complex types
+        let fields = vec![
+            ("x".to_string(), Field::Null),
+            ("Y".to_string(), Field::Int(2)),
+            ("z".to_string(), Field::Float(3.1)),
+            ("a".to_string(), Field::Str("abc".to_string())),
+        ];
+        let row = Field::Group(make_row(fields));
+        assert_eq!(format!("{}", row), "{x: null, Y: 2, z: 3.1, a: \"abc\"}");
+
+        let row = Field::ListInternal(make_list(vec![
+            Field::Int(2),
+            Field::Int(1),
+            Field::Null,
+            Field::Int(12),
+        ]));
+        assert_eq!(format!("{}", row), "[2, 1, null, 12]");
+
+        let row = Field::MapInternal(make_map(vec![
+            (Field::Int(1), Field::Float(1.2)),
+            (Field::Int(2), Field::Float(4.5)),
+            (Field::Int(3), Field::Float(2.3)),
+        ]));
+        assert_eq!(format!("{}", row), "{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}");
+    }
+
+    #[test]
+    fn test_is_primitive() {
+        // primitives
+        assert!(Field::Null.is_primitive());
+        assert!(Field::Bool(true).is_primitive());
+        assert!(Field::Bool(false).is_primitive());
+        assert!(Field::Byte(1).is_primitive());
+        assert!(Field::Short(2).is_primitive());
+        assert!(Field::Int(3).is_primitive());
+        assert!(Field::Long(4).is_primitive());
+        assert!(Field::UByte(1).is_primitive());
+        assert!(Field::UShort(2).is_primitive());
+        assert!(Field::UInt(3).is_primitive());
+        assert!(Field::ULong(4).is_primitive());
+        assert!(Field::Float(5.0).is_primitive());
+        assert!(Field::Float(5.1234).is_primitive());
+        assert!(Field::Double(6.0).is_primitive());
+        assert!(Field::Double(6.1234).is_primitive());
+        assert!(Field::Str("abc".to_string()).is_primitive());
+        assert!(Field::Bytes(ByteArray::from(vec![1, 2, 3])).is_primitive());
+        assert!(Field::Timestamp(12345678).is_primitive());
+        assert!(Field::Decimal(Decimal::from_i32(4, 8, 2)).is_primitive());
+
+        // complex types
+        assert_eq!(
+            false,
+            Field::Group(make_row(vec![
+                ("x".to_string(), Field::Null),
+                ("Y".to_string(), Field::Int(2)),
+                ("z".to_string(), Field::Float(3.1)),
+                ("a".to_string(), Field::Str("abc".to_string()))
+            ]))
+            .is_primitive()
+        );
+
+        assert_eq!(
+            false,
+            Field::ListInternal(make_list(vec![
+                Field::Int(2),
+                Field::Int(1),
+                Field::Null,
+                Field::Int(12)
+            ]))
+            .is_primitive()
+        );
+
+        assert_eq!(
+            false,
+            Field::MapInternal(make_map(vec![
+                (Field::Int(1), Field::Float(1.2)),
+                (Field::Int(2), Field::Float(4.5)),
+                (Field::Int(3), Field::Float(2.3))
+            ]))
+            .is_primitive()
+        );
+    }
+
+    #[test]
+    fn test_row_primitive_accessors() {
+        // primitives
+        let row = make_row(vec![
+            ("a".to_string(), Field::Null),
+            ("b".to_string(), Field::Bool(false)),
+            ("c".to_string(), Field::Byte(3)),
+            ("d".to_string(), Field::Short(4)),
+            ("e".to_string(), Field::Int(5)),
+            ("f".to_string(), Field::Long(6)),
+            ("g".to_string(), Field::UByte(3)),
+            ("h".to_string(), Field::UShort(4)),
+            ("i".to_string(), Field::UInt(5)),
+            ("j".to_string(), Field::ULong(6)),
+            ("k".to_string(), Field::Float(7.1)),
+            ("l".to_string(), Field::Double(8.1)),
+            ("m".to_string(), Field::Str("abc".to_string())),
+            (
+                "n".to_string(),
+                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
+            ),
+            ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
+        ]);
+
+        assert_eq!(false, row.get_bool(1).unwrap());
+        assert_eq!(3, row.get_byte(2).unwrap());
+        assert_eq!(4, row.get_short(3).unwrap());
+        assert_eq!(5, row.get_int(4).unwrap());
+        assert_eq!(6, row.get_long(5).unwrap());
+        assert_eq!(3, row.get_ubyte(6).unwrap());
+        assert_eq!(4, row.get_ushort(7).unwrap());
+        assert_eq!(5, row.get_uint(8).unwrap());
+        assert_eq!(6, row.get_ulong(9).unwrap());
+        assert_eq!(7.1, row.get_float(10).unwrap());
+        assert_eq!(8.1, row.get_double(11).unwrap());
+        assert_eq!("abc", row.get_string(12).unwrap());
+        assert_eq!(5, row.get_bytes(13).unwrap().len());
+        assert_eq!(7, row.get_decimal(14).unwrap().precision());
+    }
+
+    #[test]
+    fn test_row_primitive_invalid_accessors() {
+        // primitives
+        let row = make_row(vec![
+            ("a".to_string(), Field::Null),
+            ("b".to_string(), Field::Bool(false)),
+            ("c".to_string(), Field::Byte(3)),
+            ("d".to_string(), Field::Short(4)),
+            ("e".to_string(), Field::Int(5)),
+            ("f".to_string(), Field::Long(6)),
+            ("g".to_string(), Field::UByte(3)),
+            ("h".to_string(), Field::UShort(4)),
+            ("i".to_string(), Field::UInt(5)),
+            ("j".to_string(), Field::ULong(6)),
+            ("k".to_string(), Field::Float(7.1)),
+            ("l".to_string(), Field::Double(8.1)),
+            ("m".to_string(), Field::Str("abc".to_string())),
+            (
+                "n".to_string(),
+                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
+            ),
+            ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
+        ]);
+
+        for i in 0..row.len() {
+            assert!(row.get_group(i).is_err());
+        }
+    }
+
+    #[test]
+    fn test_row_complex_accessors() {
+        let row = make_row(vec![
+            (
+                "a".to_string(),
+                Field::Group(make_row(vec![
+                    ("x".to_string(), Field::Null),
+                    ("Y".to_string(), Field::Int(2)),
+                ])),
+            ),
+            (
+                "b".to_string(),
+                Field::ListInternal(make_list(vec![
+                    Field::Int(2),
+                    Field::Int(1),
+                    Field::Null,
+                    Field::Int(12),
+                ])),
+            ),
+            (
+                "c".to_string(),
+                Field::MapInternal(make_map(vec![
+                    (Field::Int(1), Field::Float(1.2)),
+                    (Field::Int(2), Field::Float(4.5)),
+                    (Field::Int(3), Field::Float(2.3)),
+                ])),
+            ),
+        ]);
+
+        assert_eq!(2, row.get_group(0).unwrap().len());
+        assert_eq!(4, row.get_list(1).unwrap().len());
+        assert_eq!(3, row.get_map(2).unwrap().len());
+    }
+
+    #[test]
+    fn test_row_complex_invalid_accessors() {
+        let row = make_row(vec![
+            (
+                "a".to_string(),
+                Field::Group(make_row(vec![
+                    ("x".to_string(), Field::Null),
+                    ("Y".to_string(), Field::Int(2)),
+                ])),
+            ),
+            (
+                "b".to_string(),
+                Field::ListInternal(make_list(vec![
+                    Field::Int(2),
+                    Field::Int(1),
+                    Field::Null,
+                    Field::Int(12),
+                ])),
+            ),
+            (
+                "c".to_string(),
+                Field::MapInternal(make_map(vec![
+                    (Field::Int(1), Field::Float(1.2)),
+                    (Field::Int(2), Field::Float(4.5)),
+                    (Field::Int(3), Field::Float(2.3)),
+                ])),
+            ),
+        ]);
+
+        assert_eq!(
+            ParquetError::General("Cannot access Group as Float".to_string()),
+            row.get_float(0).unwrap_err()
+        );
+        assert_eq!(
+            ParquetError::General("Cannot access ListInternal as Float".to_string()),
+            row.get_float(1).unwrap_err()
+        );
+        assert_eq!(
+            ParquetError::General("Cannot access MapInternal as Float".to_string()),
+            row.get_float(2).unwrap_err()
+        );
+    }
+
+    #[test]
+    fn test_list_primitive_accessors() {
+        // primitives
+        let list = make_list(vec![Field::Bool(false)]);
+        assert_eq!(false, list.get_bool(0).unwrap());
+
+        let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
+        assert_eq!(4, list.get_byte(1).unwrap());
+
+        let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
+        assert_eq!(6, list.get_short(2).unwrap());
+
+        let list = make_list(vec![Field::Int(5)]);
+        assert_eq!(5, list.get_int(0).unwrap());
+
+        let list = make_list(vec![Field::Long(6), Field::Long(7)]);
+        assert_eq!(7, list.get_long(1).unwrap());
+
+        let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
+        assert_eq!(4, list.get_ubyte(1).unwrap());
+
+        let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
+        assert_eq!(6, list.get_ushort(2).unwrap());
+
+        let list = make_list(vec![Field::UInt(5)]);
+        assert_eq!(5, list.get_uint(0).unwrap());
+
+        let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
+        assert_eq!(7, list.get_ulong(1).unwrap());
+
+        let list = make_list(vec![
+            Field::Float(8.1),
+            Field::Float(9.2),
+            Field::Float(10.3),
+        ]);
+        assert_eq!(10.3, list.get_float(2).unwrap());
+
+        let list = make_list(vec![Field::Double(3.1415)]);
+        assert_eq!(3.1415, list.get_double(0).unwrap());
+
+        let list = make_list(vec![Field::Str("abc".to_string())]);
+        assert_eq!(&"abc".to_string(), list.get_string(0).unwrap());
+
+        let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
+        assert_eq!(&[1, 2, 3, 4, 5], list.get_bytes(0).unwrap().data());
+
+        let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
+        assert_eq!(&[0, 0, 0, 4], list.get_decimal(0).unwrap().data());
+    }
+
+    #[test]
+    fn test_list_primitive_invalid_accessors() {
+        // primitives
+        let list = make_list(vec![Field::Bool(false)]);
+        assert!(list.get_byte(0).is_err());
+
+        let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
+        assert!(list.get_short(1).is_err());
+
+        let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
+        assert!(list.get_int(2).is_err());
+
+        let list = make_list(vec![Field::Int(5)]);
+        assert!(list.get_long(0).is_err());
+
+        let list = make_list(vec![Field::Long(6), Field::Long(7)]);
+        assert!(list.get_float(1).is_err());
+
+        let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
+        assert!(list.get_short(1).is_err());
+
+        let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
+        assert!(list.get_int(2).is_err());
+
+        let list = make_list(vec![Field::UInt(5)]);
+        assert!(list.get_long(0).is_err());
+
+        let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
+        assert!(list.get_float(1).is_err());
+
+        let list = make_list(vec![
+            Field::Float(8.1),
+            Field::Float(9.2),
+            Field::Float(10.3),
+        ]);
+        assert!(list.get_double(2).is_err());
+
+        let list = make_list(vec![Field::Double(3.1415)]);
+        assert!(list.get_string(0).is_err());
+
+        let list = make_list(vec![Field::Str("abc".to_string())]);
+        assert!(list.get_bytes(0).is_err());
+
+        let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
+        assert!(list.get_bool(0).is_err());
+
+        let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
+        assert!(list.get_bool(0).is_err());
+    }
+
+    #[test]
+    fn test_list_complex_accessors() {
+        let list = make_list(vec![Field::Group(make_row(vec![
+            ("x".to_string(), Field::Null),
+            ("Y".to_string(), Field::Int(2)),
+        ]))]);
+        assert_eq!(2, list.get_group(0).unwrap().len());
+
+        let list = make_list(vec![Field::ListInternal(make_list(vec![
+            Field::Int(2),
+            Field::Int(1),
+            Field::Null,
+            Field::Int(12),
+        ]))]);
+        assert_eq!(4, list.get_list(0).unwrap().len());
+
+        let list = make_list(vec![Field::MapInternal(make_map(vec![
+            (Field::Int(1), Field::Float(1.2)),
+            (Field::Int(2), Field::Float(4.5)),
+            (Field::Int(3), Field::Float(2.3)),
+        ]))]);
+        assert_eq!(3, list.get_map(0).unwrap().len());
+    }
+
+    #[test]
+    fn test_list_complex_invalid_accessors() {
+        let list = make_list(vec![Field::Group(make_row(vec![
+            ("x".to_string(), Field::Null),
+            ("Y".to_string(), Field::Int(2)),
+        ]))]);
+        assert_eq!(
+            general_err!("Cannot access Group as Float".to_string()),
+            list.get_float(0).unwrap_err()
+        );
+
+        let list = make_list(vec![Field::ListInternal(make_list(vec![
+            Field::Int(2),
+            Field::Int(1),
+            Field::Null,
+            Field::Int(12),
+        ]))]);
+        assert_eq!(
+            general_err!("Cannot access ListInternal as Float".to_string()),
+            list.get_float(0).unwrap_err()
+        );
+
+        let list = make_list(vec![Field::MapInternal(make_map(vec![
+            (Field::Int(1), Field::Float(1.2)),
+            (Field::Int(2), Field::Float(4.5)),
+            (Field::Int(3), Field::Float(2.3)),
+        ]))]);
+        assert_eq!(
+            general_err!("Cannot access MapInternal as Float".to_string()),
+            list.get_float(0).unwrap_err()
+        );
+    }
+
+    #[test]
+    fn test_map_accessors() {
+        // a map from int to string
+        let map = make_map(vec![
+            (Field::Int(1), Field::Str("a".to_string())),
+            (Field::Int(2), Field::Str("b".to_string())),
+            (Field::Int(3), Field::Str("c".to_string())),
+            (Field::Int(4), Field::Str("d".to_string())),
+            (Field::Int(5), Field::Str("e".to_string())),
+        ]);
+
+        assert_eq!(5, map.len());
+        for i in 0..5 {
+            assert_eq!((i + 1) as i32, map.get_keys().get_int(i).unwrap());
+            assert_eq!(
+                &((i as u8 + 'a' as u8) as char).to_string(),
+                map.get_values().get_string(i).unwrap()
+            );
+        }
+    }
+}
diff --git a/rust/parquet/src/record/mod.rs b/rust/parquet/src/record/mod.rs
new file mode 100644
index 0000000000000..0dba8a78bd165
--- /dev/null
+++ b/rust/parquet/src/record/mod.rs
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Contains record-based API for reading Parquet files.
+
+mod api;
+pub mod reader;
+mod triplet;
+
+pub use self::api::{List, ListAccessor, Map, MapAccessor, Row, RowAccessor};
diff --git a/rust/parquet/src/record/reader.rs b/rust/parquet/src/record/reader.rs
new file mode 100644
index 0000000000000..a5dbcb1cb4e14
--- /dev/null
+++ b/rust/parquet/src/record/reader.rs
@@ -0,0 +1,1533 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Contains implementation of record assembly and converting Parquet types into
+//! [`Row`](`::record::api::Row`)s.
+
+use std::{collections::HashMap, fmt, rc::Rc};
+
+use crate::basic::{LogicalType, Repetition};
+use crate::errors::{ParquetError, Result};
+use crate::file::reader::{FileReader, RowGroupReader};
+use crate::record::{
+    api::{make_list, make_map, make_row, Field, Row},
+    triplet::TripletIter,
+};
+use crate::schema::types::{ColumnPath, SchemaDescPtr, SchemaDescriptor, Type, TypePtr};
+
+/// Default batch size for a reader
+const DEFAULT_BATCH_SIZE: usize = 1024;
+
+/// Tree builder for `Reader` enum.
+/// Serves as a container of options for building a reader tree and a builder, and
+/// accessing a records iterator [`RowIter`].
+pub struct TreeBuilder {
+    // Batch size (>= 1) for triplet iterators
+    batch_size: usize,
+}
+
+impl TreeBuilder {
+    /// Creates new tree builder with default parameters.
+    pub fn new() -> Self {
+        Self {
+            batch_size: DEFAULT_BATCH_SIZE,
+        }
+    }
+
+    /// Sets batch size for this tree builder.
+    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
+        self.batch_size = batch_size;
+        self
+    }
+
+    /// Creates new root reader for provided schema and row group.
+    pub fn build(
+        &self,
+        descr: SchemaDescPtr,
+        row_group_reader: &RowGroupReader,
+    ) -> Reader {
+        // Prepare lookup table of column path -> original column index
+        // This allows to prune columns and map schema leaf nodes to the column readers
+        let mut paths: HashMap<ColumnPath, usize> = HashMap::new();
+        let row_group_metadata = row_group_reader.metadata();
+
+        for col_index in 0..row_group_reader.num_columns() {
+            let col_meta = row_group_metadata.column(col_index);
+            let col_path = col_meta.column_path().clone();
+            paths.insert(col_path, col_index);
+        }
+
+        // Build child readers for the message type
+        let mut readers = Vec::new();
+        let mut path = Vec::new();
+
+        for field in descr.root_schema().get_fields() {
+            let reader = self.reader_tree(
+                field.clone(),
+                &mut path,
+                0,
+                0,
+                &paths,
+                row_group_reader,
+            );
+            readers.push(reader);
+        }
+
+        // Return group reader for message type,
+        // it is always required with definition level 0
+        Reader::GroupReader(None, 0, readers)
+    }
+
+    /// Creates iterator of `Row`s directly from schema descriptor and row group.
+    pub fn as_iter(
+        &self,
+        descr: SchemaDescPtr,
+        row_group_reader: &RowGroupReader,
+    ) -> ReaderIter {
+        let num_records = row_group_reader.metadata().num_rows() as usize;
+        ReaderIter::new(self.build(descr, row_group_reader), num_records)
+    }
+
+    /// Builds tree of readers for the current schema recursively.
+    fn reader_tree(
+        &self,
+        field: TypePtr,
+        mut path: &mut Vec<String>,
+        mut curr_def_level: i16,
+        mut curr_rep_level: i16,
+        paths: &HashMap<ColumnPath, usize>,
+        row_group_reader: &RowGroupReader,
+    ) -> Reader {
+        assert!(field.get_basic_info().has_repetition());
+        // Update current definition and repetition levels for this type
+        let repetition = field.get_basic_info().repetition();
+        match repetition {
+            Repetition::OPTIONAL => {
+                curr_def_level += 1;
+            }
+            Repetition::REPEATED => {
+                curr_def_level += 1;
+                curr_rep_level += 1;
+            }
+            _ => {}
+        }
+
+        path.push(String::from(field.name()));
+        let reader = if field.is_primitive() {
+            let col_path = ColumnPath::new(path.to_vec());
+            let orig_index = *paths.get(&col_path).unwrap();
+            let col_descr = row_group_reader
+                .metadata()
+                .column(orig_index)
+                .column_descr_ptr();
+            let col_reader = row_group_reader.get_column_reader(orig_index).unwrap();
+            let column = TripletIter::new(col_descr, col_reader, self.batch_size);
+            Reader::PrimitiveReader(field, column)
+        } else {
+            match field.get_basic_info().logical_type() {
+                // List types
+                LogicalType::LIST => {
+                    assert_eq!(
+                        field.get_fields().len(),
+                        1,
+                        "Invalid list type {:?}",
+                        field
+                    );
+
+                    let repeated_field = field.get_fields()[0].clone();
+                    assert_eq!(
+                        repeated_field.get_basic_info().repetition(),
+                        Repetition::REPEATED,
+                        "Invalid list type {:?}",
+                        field
+                    );
+
+                    if Reader::is_element_type(&repeated_field) {
+                        // Support for backward compatible lists
+                        let reader = self.reader_tree(
+                            repeated_field.clone(),
+                            &mut path,
+                            curr_def_level,
+                            curr_rep_level,
+                            paths,
+                            row_group_reader,
+                        );
+
+                        Reader::RepeatedReader(
+                            field,
+                            curr_def_level,
+                            curr_rep_level,
+                            Box::new(reader),
+                        )
+                    } else {
+                        let child_field = repeated_field.get_fields()[0].clone();
+
+                        path.push(String::from(repeated_field.name()));
+
+                        let reader = self.reader_tree(
+                            child_field,
+                            &mut path,
+                            curr_def_level + 1,
+                            curr_rep_level + 1,
+                            paths,
+                            row_group_reader,
+                        );
+
+                        path.pop();
+
+                        Reader::RepeatedReader(
+                            field,
+                            curr_def_level,
+                            curr_rep_level,
+                            Box::new(reader),
+                        )
+                    }
+                }
+                // Map types (key-value pairs)
+                LogicalType::MAP | LogicalType::MAP_KEY_VALUE => {
+                    assert_eq!(
+                        field.get_fields().len(),
+                        1,
+                        "Invalid map type: {:?}",
+                        field
+                    );
+                    assert!(
+                        !field.get_fields()[0].is_primitive(),
+                        "Invalid map type: {:?}",
+                        field
+                    );
+
+                    let key_value_type = field.get_fields()[0].clone();
+                    assert_eq!(
+                        key_value_type.get_basic_info().repetition(),
+                        Repetition::REPEATED,
+                        "Invalid map type: {:?}",
+                        field
+                    );
+                    assert_eq!(
+                        key_value_type.get_fields().len(),
+                        2,
+                        "Invalid map type: {:?}",
+                        field
+                    );
+
+                    path.push(String::from(key_value_type.name()));
+
+                    let key_type = &key_value_type.get_fields()[0];
+                    assert!(
+                        key_type.is_primitive(),
+                        "Map key type is expected to be a primitive type, but found {:?}",
+                        key_type
+                    );
+                    let key_reader = self.reader_tree(
+                        key_type.clone(),
+                        &mut path,
+                        curr_def_level + 1,
+                        curr_rep_level + 1,
+                        paths,
+                        row_group_reader,
+                    );
+
+                    let value_type = &key_value_type.get_fields()[1];
+                    let value_reader = self.reader_tree(
+                        value_type.clone(),
+                        &mut path,
+                        curr_def_level + 1,
+                        curr_rep_level + 1,
+                        paths,
+                        row_group_reader,
+                    );
+
+                    path.pop();
+
+                    Reader::KeyValueReader(
+                        field,
+                        curr_def_level,
+                        curr_rep_level,
+                        Box::new(key_reader),
+                        Box::new(value_reader),
+                    )
+                }
+                // A repeated field that is neither contained by a `LIST`- or
+                // `MAP`-annotated group nor annotated by `LIST` or `MAP`
+                // should be interpreted as a required list of required
+                // elements where the element type is the type of the field.
+                _ if repetition == Repetition::REPEATED => {
+                    let required_field = Type::group_type_builder(field.name())
+                        .with_repetition(Repetition::REQUIRED)
+                        .with_logical_type(field.get_basic_info().logical_type())
+                        .with_fields(&mut Vec::from(field.get_fields()))
+                        .build()
+                        .unwrap();
+
+                    path.pop();
+
+                    let reader = self.reader_tree(
+                        Rc::new(required_field),
+                        &mut path,
+                        curr_def_level,
+                        curr_rep_level,
+                        paths,
+                        row_group_reader,
+                    );
+
+                    Reader::RepeatedReader(
+                        field,
+                        curr_def_level - 1,
+                        curr_rep_level - 1,
+                        Box::new(reader),
+                    )
+                }
+                // Group types (structs)
+                _ => {
+                    let mut readers = Vec::new();
+                    for child in field.get_fields() {
+                        let reader = self.reader_tree(
+                            child.clone(),
+                            &mut path,
+                            curr_def_level,
+                            curr_rep_level,
+                            paths,
+                            row_group_reader,
+                        );
+                        readers.push(reader);
+                    }
+                    Reader::GroupReader(Some(field), curr_def_level, readers)
+                }
+            }
+        };
+        path.pop();
+
+        Reader::option(repetition, curr_def_level, reader)
+    }
+}
+
+/// Reader tree for record assembly
+pub enum Reader {
+    // Primitive reader with type information and triplet iterator
+    PrimitiveReader(TypePtr, TripletIter),
+    // Optional reader with definition level of a parent and a reader
+    OptionReader(i16, Box<Reader>),
+    // Group (struct) reader with type information, definition level and list of child
+    // readers. When it represents message type, type information is None
+    GroupReader(Option<TypePtr>, i16, Vec<Reader>),
+    // Reader for repeated values, e.g. lists, contains type information, definition
+    // level, repetition level and a child reader
+    RepeatedReader(TypePtr, i16, i16, Box<Reader>),
+    // Reader of key-value pairs, e.g. maps, contains type information, definition
+    // level, repetition level, child reader for keys and child reader for values
+    KeyValueReader(TypePtr, i16, i16, Box<Reader>, Box<Reader>),
+}
+
+impl Reader {
+    /// Wraps reader in option reader based on repetition.
+    fn option(repetition: Repetition, def_level: i16, reader: Reader) -> Self {
+        if repetition == Repetition::OPTIONAL {
+            Reader::OptionReader(def_level - 1, Box::new(reader))
+        } else {
+            reader
+        }
+    }
+
+    /// Returns true if repeated type is an element type for the list.
+    /// Used to determine legacy list types.
+    /// This method is copied from Spark Parquet reader and is based on the reference:
+    /// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
+    ///   #backward-compatibility-rules
+    fn is_element_type(repeated_type: &Type) -> bool {
+        // For legacy 2-level list types with primitive element type, e.g.:
+        //
+        //    // ARRAY<INT> (nullable list, non-null elements)
+        //    optional group my_list (LIST) {
+        //      repeated int32 element;
+        //    }
+        //
+        repeated_type.is_primitive() ||
+    // For legacy 2-level list types whose element type is a group type with 2 or more
+    // fields, e.g.:
+    //
+    //    // ARRAY<STRUCT<str: STRING, num: INT>> (nullable list, non-null elements)
+    //    optional group my_list (LIST) {
+    //      repeated group element {
+    //        required binary str (UTF8);
+    //        required int32 num;
+    //      };
+    //    }
+    //
+    repeated_type.is_group() && repeated_type.get_fields().len() > 1 ||
+    // For legacy 2-level list types generated by parquet-avro (Parquet version < 1.6.0),
+    // e.g.:
+    //
+    //    // ARRAY<STRUCT<str: STRING>> (nullable list, non-null elements)
+    //    optional group my_list (LIST) {
+    //      repeated group array {
+    //        required binary str (UTF8);
+    //      };
+    //    }
+    //
+    repeated_type.name() == "array" ||
+    // For Parquet data generated by parquet-thrift, e.g.:
+    //
+    //    // ARRAY<STRUCT<str: STRING>> (nullable list, non-null elements)
+    //    optional group my_list (LIST) {
+    //      repeated group my_list_tuple {
+    //        required binary str (UTF8);
+    //      };
+    //    }
+    //
+    repeated_type.name().ends_with("_tuple")
+    }
+
+    /// Reads current record as `Row` from the reader tree.
+    /// Automatically advances all necessary readers.
+    /// This must be called on the root level reader (i.e., for Message type).
+    /// Otherwise, it will panic.
+    fn read(&mut self) -> Row {
+        match *self {
+            Reader::GroupReader(_, _, ref mut readers) => {
+                let mut fields = Vec::new();
+                for reader in readers {
+                    fields.push((String::from(reader.field_name()), reader.read_field()));
+                }
+                make_row(fields)
+            }
+            _ => panic!("Cannot call read() on {}", self),
+        }
+    }
+
+    /// Reads current record as `Field` from the reader tree.
+    /// Automatically advances all necessary readers.
+    fn read_field(&mut self) -> Field {
+        match *self {
+            Reader::PrimitiveReader(_, ref mut column) => {
+                let value = column.current_value();
+                column.read_next().unwrap();
+                value
+            }
+            Reader::OptionReader(def_level, ref mut reader) => {
+                if reader.current_def_level() > def_level {
+                    reader.read_field()
+                } else {
+                    reader.advance_columns();
+                    Field::Null
+                }
+            }
+            Reader::GroupReader(_, def_level, ref mut readers) => {
+                let mut fields = Vec::new();
+                for reader in readers {
+                    if reader.repetition() != Repetition::OPTIONAL
+                        || reader.current_def_level() > def_level
+                    {
+                        fields.push((
+                            String::from(reader.field_name()),
+                            reader.read_field(),
+                        ));
+                    } else {
+                        reader.advance_columns();
+                        fields.push((String::from(reader.field_name()), Field::Null));
+                    }
+                }
+                let row = make_row(fields);
+                Field::Group(row)
+            }
+            Reader::RepeatedReader(_, def_level, rep_level, ref mut reader) => {
+                let mut elements = Vec::new();
+                loop {
+                    if reader.current_def_level() > def_level {
+                        elements.push(reader.read_field());
+                    } else {
+                        reader.advance_columns();
+                        // If the current definition level is equal to the definition
+                        // level of this repeated type, then the
+                        // result is an empty list and the repetition level
+                        // will always be <= rl.
+                        break;
+                    }
+
+                    // This covers case when we are out of repetition levels and should
+                    // close the group, or there are no values left to
+                    // buffer.
+                    if !reader.has_next() || reader.current_rep_level() <= rep_level {
+                        break;
+                    }
+                }
+                Field::ListInternal(make_list(elements))
+            }
+            Reader::KeyValueReader(
+                _,
+                def_level,
+                rep_level,
+                ref mut keys,
+                ref mut values,
+            ) => {
+                let mut pairs = Vec::new();
+                loop {
+                    if keys.current_def_level() > def_level {
+                        pairs.push((keys.read_field(), values.read_field()));
+                    } else {
+                        keys.advance_columns();
+                        values.advance_columns();
+                        // If the current definition level is equal to the definition
+                        // level of this repeated type, then the
+                        // result is an empty list and the repetition level
+                        // will always be <= rl.
+                        break;
+                    }
+
+                    // This covers case when we are out of repetition levels and should
+                    // close the group, or there are no values left to
+                    // buffer.
+                    if !keys.has_next() || keys.current_rep_level() <= rep_level {
+                        break;
+                    }
+                }
+
+                Field::MapInternal(make_map(pairs))
+            }
+        }
+    }
+
+    /// Returns field name for the current reader.
+    fn field_name(&self) -> &str {
+        match *self {
+            Reader::PrimitiveReader(ref field, _) => field.name(),
+            Reader::OptionReader(_, ref reader) => reader.field_name(),
+            Reader::GroupReader(ref opt, ..) => match opt {
+                &Some(ref field) => field.name(),
+                &None => panic!("Field is None for group reader"),
+            },
+            Reader::RepeatedReader(ref field, ..) => field.name(),
+            Reader::KeyValueReader(ref field, ..) => field.name(),
+        }
+    }
+
+    /// Returns repetition for the current reader.
+    fn repetition(&self) -> Repetition {
+        match *self {
+            Reader::PrimitiveReader(ref field, _) => field.get_basic_info().repetition(),
+            Reader::OptionReader(_, ref reader) => reader.repetition(),
+            Reader::GroupReader(ref opt, ..) => match opt {
+                &Some(ref field) => field.get_basic_info().repetition(),
+                &None => panic!("Field is None for group reader"),
+            },
+            Reader::RepeatedReader(ref field, ..) => field.get_basic_info().repetition(),
+            Reader::KeyValueReader(ref field, ..) => field.get_basic_info().repetition(),
+        }
+    }
+
+    /// Returns true, if current reader has more values, false otherwise.
+    /// Method does not advance internal iterator.
+    fn has_next(&self) -> bool {
+        match *self {
+            Reader::PrimitiveReader(_, ref column) => column.has_next(),
+            Reader::OptionReader(_, ref reader) => reader.has_next(),
+            Reader::GroupReader(_, _, ref readers) => readers.first().unwrap().has_next(),
+            Reader::RepeatedReader(_, _, _, ref reader) => reader.has_next(),
+            Reader::KeyValueReader(_, _, _, ref keys, _) => keys.has_next(),
+        }
+    }
+
+    /// Returns current definition level,
+    /// Method does not advance internal iterator.
+    fn current_def_level(&self) -> i16 {
+        match *self {
+            Reader::PrimitiveReader(_, ref column) => column.current_def_level(),
+            Reader::OptionReader(_, ref reader) => reader.current_def_level(),
+            Reader::GroupReader(_, _, ref readers) => match readers.first() {
+                Some(reader) => reader.current_def_level(),
+                None => panic!("Current definition level: empty group reader"),
+            },
+            Reader::RepeatedReader(_, _, _, ref reader) => reader.current_def_level(),
+            Reader::KeyValueReader(_, _, _, ref keys, _) => keys.current_def_level(),
+        }
+    }
+
+    /// Returns current repetition level.
+    /// Method does not advance internal iterator.
+    fn current_rep_level(&self) -> i16 {
+        match *self {
+            Reader::PrimitiveReader(_, ref column) => column.current_rep_level(),
+            Reader::OptionReader(_, ref reader) => reader.current_rep_level(),
+            Reader::GroupReader(_, _, ref readers) => match readers.first() {
+                Some(reader) => reader.current_rep_level(),
+                None => panic!("Current repetition level: empty group reader"),
+            },
+            Reader::RepeatedReader(_, _, _, ref reader) => reader.current_rep_level(),
+            Reader::KeyValueReader(_, _, _, ref keys, _) => keys.current_rep_level(),
+        }
+    }
+
+    /// Advances leaf columns for the current reader.
+    fn advance_columns(&mut self) {
+        match *self {
+            Reader::PrimitiveReader(_, ref mut column) => {
+                column.read_next().unwrap();
+            }
+            Reader::OptionReader(_, ref mut reader) => {
+                reader.advance_columns();
+            }
+            Reader::GroupReader(_, _, ref mut readers) => {
+                for reader in readers {
+                    reader.advance_columns();
+                }
+            }
+            Reader::RepeatedReader(_, _, _, ref mut reader) => {
+                reader.advance_columns();
+            }
+            Reader::KeyValueReader(_, _, _, ref mut keys, ref mut values) => {
+                keys.advance_columns();
+                values.advance_columns();
+            }
+        }
+    }
+}
+
+impl fmt::Display for Reader {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let s = match self {
+            Reader::PrimitiveReader(..) => "PrimitiveReader",
+            Reader::OptionReader(..) => "OptionReader",
+            Reader::GroupReader(..) => "GroupReader",
+            Reader::RepeatedReader(..) => "RepeatedReader",
+            Reader::KeyValueReader(..) => "KeyValueReader",
+        };
+        write!(f, "{}", s)
+    }
+}
+
+// ----------------------------------------------------------------------
+// Row iterators
+
+/// Iterator of [`Row`](`::record::api::Row`)s.
+/// It is used either for a single row group to iterate over data in that row group, or
+/// an entire file with auto buffering of all row groups.
+pub struct RowIter<'a> {
+    descr: SchemaDescPtr,
+    tree_builder: TreeBuilder,
+    file_reader: Option<&'a FileReader>,
+    current_row_group: usize,
+    num_row_groups: usize,
+    row_iter: Option<ReaderIter>,
+}
+
+impl<'a> RowIter<'a> {
+    /// Creates iterator of [`Row`](`::record::api::Row`)s for all row groups in a file.
+    pub fn from_file(proj: Option<Type>, reader: &'a FileReader) -> Result<Self> {
+        let descr = Self::get_proj_descr(
+            proj,
+            reader.metadata().file_metadata().schema_descr_ptr(),
+        )?;
+        let num_row_groups = reader.num_row_groups();
+
+        Ok(Self {
+            descr,
+            tree_builder: Self::tree_builder(),
+            file_reader: Some(reader),
+            current_row_group: 0,
+            num_row_groups,
+            row_iter: None,
+        })
+    }
+
+    /// Creates iterator of [`Row`](`::record::api::Row`)s for a specific row group.
+    pub fn from_row_group(
+        proj: Option<Type>,
+        reader: &'a RowGroupReader,
+    ) -> Result<Self> {
+        let descr = Self::get_proj_descr(proj, reader.metadata().schema_descr_ptr())?;
+        let tree_builder = Self::tree_builder();
+        let row_iter = tree_builder.as_iter(descr.clone(), reader);
+
+        // For row group we need to set `current_row_group` >= `num_row_groups`, because
+        // we only have one row group and can't buffer more.
+        Ok(Self {
+            descr,
+            tree_builder,
+            file_reader: None,
+            current_row_group: 0,
+            num_row_groups: 0,
+            row_iter: Some(row_iter),
+        })
+    }
+
+    /// Returns common tree builder, so the same settings are applied to both iterators
+    /// from file reader and row group.
+    #[inline]
+    fn tree_builder() -> TreeBuilder {
+        TreeBuilder::new()
+    }
+
+    /// Helper method to get schema descriptor for projected schema.
+    /// If projection is None, then full schema is returned.
+    #[inline]
+    fn get_proj_descr(
+        proj: Option<Type>,
+        root_descr: SchemaDescPtr,
+    ) -> Result<SchemaDescPtr> {
+        match proj {
+            Some(projection) => {
+                // check if projection is part of file schema
+                let root_schema = root_descr.root_schema();
+                if !root_schema.check_contains(&projection) {
+                    return Err(general_err!("Root schema does not contain projection"));
+                }
+                Ok(Rc::new(SchemaDescriptor::new(Rc::new(projection))))
+            }
+            None => Ok(root_descr),
+        }
+    }
+}
+
+impl<'a> Iterator for RowIter<'a> {
+    type Item = Row;
+
+    fn next(&mut self) -> Option<Row> {
+        let mut row = None;
+        if let Some(ref mut iter) = self.row_iter {
+            row = iter.next();
+        }
+
+        while row.is_none() && self.current_row_group < self.num_row_groups {
+            // We do not expect any failures when accessing a row group, and file reader
+            // must be set for selecting next row group.
+            let row_group_reader = &*self
+                .file_reader
+                .as_ref()
+                .expect("File reader is required to advance row group")
+                .get_row_group(self.current_row_group)
+                .unwrap();
+            self.current_row_group += 1;
+            let mut iter = self
+                .tree_builder
+                .as_iter(self.descr.clone(), row_group_reader);
+            row = iter.next();
+            self.row_iter = Some(iter);
+        }
+
+        row
+    }
+}
+
+/// Internal iterator of [`Row`](`::record::api::Row`)s for a reader.
+pub struct ReaderIter {
+    root_reader: Reader,
+    records_left: usize,
+}
+
+impl ReaderIter {
+    fn new(mut root_reader: Reader, num_records: usize) -> Self {
+        // Prepare root reader by advancing all column vectors
+        root_reader.advance_columns();
+        Self {
+            root_reader,
+            records_left: num_records,
+        }
+    }
+}
+
+impl Iterator for ReaderIter {
+    type Item = Row;
+
+    fn next(&mut self) -> Option<Row> {
+        if self.records_left > 0 {
+            self.records_left -= 1;
+            Some(self.root_reader.read())
+        } else {
+            None
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::errors::{ParquetError, Result};
+    use crate::file::reader::{FileReader, SerializedFileReader};
+    use crate::record::api::{Field, Row};
+    use crate::schema::parser::parse_message_type;
+    use crate::util::test_common::get_test_file;
+
+    // Convenient macros to assemble row, list, map, and group.
+
+    macro_rules! row {
+        () => {
+            {
+                let result = Vec::new();
+                make_row(result)
+            }
+        };
+        ( $( $e:expr ), + ) => {
+            {
+                let mut result = Vec::new();
+                $(
+                    result.push($e);
+                )*
+                    make_row(result)
+            }
+        }
+    }
+
+    macro_rules! list {
+        () => {
+            {
+                let result = Vec::new();
+                Field::ListInternal(make_list(result))
+            }
+        };
+        ( $( $e:expr ), + ) => {
+            {
+                let mut result = Vec::new();
+                $(
+                    result.push($e);
+                )*
+                    Field::ListInternal(make_list(result))
+            }
+        }
+    }
+
+    macro_rules! map {
+        () => {
+            {
+                let result = Vec::new();
+                Field::MapInternal(make_map(result))
+            }
+        };
+        ( $( $e:expr ), + ) => {
+            {
+                let mut result = Vec::new();
+                $(
+                    result.push($e);
+                )*
+                    Field::MapInternal(make_map(result))
+            }
+        }
+    }
+
+    macro_rules! group {
+        ( $( $e:expr ), * ) => {
+            {
+                Field::Group(row!($( $e ), *))
+            }
+        }
+    }
+
+    #[test]
+    fn test_file_reader_rows_nulls() {
+        let rows = test_file_reader_rows("nulls.snappy.parquet", None).unwrap();
+        let expected_rows = vec![
+            row![(
+                "b_struct".to_string(),
+                group![("b_c_int".to_string(), Field::Null)]
+            )],
+            row![(
+                "b_struct".to_string(),
+                group![("b_c_int".to_string(), Field::Null)]
+            )],
+            row![(
+                "b_struct".to_string(),
+                group![("b_c_int".to_string(), Field::Null)]
+            )],
+            row![(
+                "b_struct".to_string(),
+                group![("b_c_int".to_string(), Field::Null)]
+            )],
+            row![(
+                "b_struct".to_string(),
+                group![("b_c_int".to_string(), Field::Null)]
+            )],
+            row![(
+                "b_struct".to_string(),
+                group![("b_c_int".to_string(), Field::Null)]
+            )],
+            row![(
+                "b_struct".to_string(),
+                group![("b_c_int".to_string(), Field::Null)]
+            )],
+            row![(
+                "b_struct".to_string(),
+                group![("b_c_int".to_string(), Field::Null)]
+            )],
+        ];
+        assert_eq!(rows, expected_rows);
+    }
+
+    #[test]
+    fn test_file_reader_rows_nonnullable() {
+        let rows = test_file_reader_rows("nonnullable.impala.parquet", None).unwrap();
+        let expected_rows = vec![row![
+            ("ID".to_string(), Field::Long(8)),
+            ("Int_Array".to_string(), list![Field::Int(-1)]),
+            (
+                "int_array_array".to_string(),
+                list![list![Field::Int(-1), Field::Int(-2)], list![]]
+            ),
+            (
+                "Int_Map".to_string(),
+                map![(Field::Str("k1".to_string()), Field::Int(-1))]
+            ),
+            (
+                "int_map_array".to_string(),
+                list![
+                    map![],
+                    map![(Field::Str("k1".to_string()), Field::Int(1))],
+                    map![],
+                    map![]
+                ]
+            ),
+            (
+                "nested_Struct".to_string(),
+                group![
+                    ("a".to_string(), Field::Int(-1)),
+                    ("B".to_string(), list![Field::Int(-1)]),
+                    (
+                        "c".to_string(),
+                        group![(
+                            "D".to_string(),
+                            list![list![group![
+                                ("e".to_string(), Field::Int(-1)),
+                                ("f".to_string(), Field::Str("nonnullable".to_string()))
+                            ]]]
+                        )]
+                    ),
+                    ("G".to_string(), map![])
+                ]
+            )
+        ]];
+        assert_eq!(rows, expected_rows);
+    }
+
+    #[test]
+    fn test_file_reader_rows_nullable() {
+        let rows = test_file_reader_rows("nullable.impala.parquet", None).unwrap();
+        let expected_rows = vec![
+            row![
+                ("id".to_string(), Field::Long(1)),
+                (
+                    "int_array".to_string(),
+                    list![Field::Int(1), Field::Int(2), Field::Int(3)]
+                ),
+                (
+                    "int_array_Array".to_string(),
+                    list![
+                        list![Field::Int(1), Field::Int(2)],
+                        list![Field::Int(3), Field::Int(4)]
+                    ]
+                ),
+                (
+                    "int_map".to_string(),
+                    map![
+                        (Field::Str("k1".to_string()), Field::Int(1)),
+                        (Field::Str("k2".to_string()), Field::Int(100))
+                    ]
+                ),
+                (
+                    "int_Map_Array".to_string(),
+                    list![map![(Field::Str("k1".to_string()), Field::Int(1))]]
+                ),
+                (
+                    "nested_struct".to_string(),
+                    group![
+                        ("A".to_string(), Field::Int(1)),
+                        ("b".to_string(), list![Field::Int(1)]),
+                        (
+                            "C".to_string(),
+                            group![(
+                                "d".to_string(),
+                                list![
+                                    list![
+                                        group![
+                                            ("E".to_string(), Field::Int(10)),
+                                            (
+                                                "F".to_string(),
+                                                Field::Str("aaa".to_string())
+                                            )
+                                        ],
+                                        group![
+                                            ("E".to_string(), Field::Int(-10)),
+                                            (
+                                                "F".to_string(),
+                                                Field::Str("bbb".to_string())
+                                            )
+                                        ]
+                                    ],
+                                    list![group![
+                                        ("E".to_string(), Field::Int(11)),
+                                        ("F".to_string(), Field::Str("c".to_string()))
+                                    ]]
+                                ]
+                            )]
+                        ),
+                        (
+                            "g".to_string(),
+                            map![(
+                                Field::Str("foo".to_string()),
+                                group![(
+                                    "H".to_string(),
+                                    group![("i".to_string(), list![Field::Double(1.1)])]
+                                )]
+                            )]
+                        )
+                    ]
+                )
+            ],
+            row![
+                ("id".to_string(), Field::Long(2)),
+                (
+                    "int_array".to_string(),
+                    list![
+                        Field::Null,
+                        Field::Int(1),
+                        Field::Int(2),
+                        Field::Null,
+                        Field::Int(3),
+                        Field::Null
+                    ]
+                ),
+                (
+                    "int_array_Array".to_string(),
+                    list![
+                        list![Field::Null, Field::Int(1), Field::Int(2), Field::Null],
+                        list![Field::Int(3), Field::Null, Field::Int(4)],
+                        list![],
+                        Field::Null
+                    ]
+                ),
+                (
+                    "int_map".to_string(),
+                    map![
+                        (Field::Str("k1".to_string()), Field::Int(2)),
+                        (Field::Str("k2".to_string()), Field::Null)
+                    ]
+                ),
+                (
+                    "int_Map_Array".to_string(),
+                    list![
+                        map![
+                            (Field::Str("k3".to_string()), Field::Null),
+                            (Field::Str("k1".to_string()), Field::Int(1))
+                        ],
+                        Field::Null,
+                        map![]
+                    ]
+                ),
+                (
+                    "nested_struct".to_string(),
+                    group![
+                        ("A".to_string(), Field::Null),
+                        ("b".to_string(), list![Field::Null]),
+                        (
+                            "C".to_string(),
+                            group![(
+                                "d".to_string(),
+                                list![
+                                    list![
+                                        group![
+                                            ("E".to_string(), Field::Null),
+                                            ("F".to_string(), Field::Null)
+                                        ],
+                                        group![
+                                            ("E".to_string(), Field::Int(10)),
+                                            (
+                                                "F".to_string(),
+                                                Field::Str("aaa".to_string())
+                                            )
+                                        ],
+                                        group![
+                                            ("E".to_string(), Field::Null),
+                                            ("F".to_string(), Field::Null)
+                                        ],
+                                        group![
+                                            ("E".to_string(), Field::Int(-10)),
+                                            (
+                                                "F".to_string(),
+                                                Field::Str("bbb".to_string())
+                                            )
+                                        ],
+                                        group![
+                                            ("E".to_string(), Field::Null),
+                                            ("F".to_string(), Field::Null)
+                                        ]
+                                    ],
+                                    list![
+                                        group![
+                                            ("E".to_string(), Field::Int(11)),
+                                            (
+                                                "F".to_string(),
+                                                Field::Str("c".to_string())
+                                            )
+                                        ],
+                                        Field::Null
+                                    ],
+                                    list![],
+                                    Field::Null
+                                ]
+                            )]
+                        ),
+                        (
+                            "g".to_string(),
+                            map![
+                                (
+                                    Field::Str("g1".to_string()),
+                                    group![(
+                                        "H".to_string(),
+                                        group![(
+                                            "i".to_string(),
+                                            list![Field::Double(2.2), Field::Null]
+                                        )]
+                                    )]
+                                ),
+                                (
+                                    Field::Str("g2".to_string()),
+                                    group![(
+                                        "H".to_string(),
+                                        group![("i".to_string(), list![])]
+                                    )]
+                                ),
+                                (Field::Str("g3".to_string()), Field::Null),
+                                (
+                                    Field::Str("g4".to_string()),
+                                    group![(
+                                        "H".to_string(),
+                                        group![("i".to_string(), Field::Null)]
+                                    )]
+                                ),
+                                (
+                                    Field::Str("g5".to_string()),
+                                    group![("H".to_string(), Field::Null)]
+                                )
+                            ]
+                        )
+                    ]
+                )
+            ],
+            row![
+                ("id".to_string(), Field::Long(3)),
+                ("int_array".to_string(), list![]),
+                ("int_array_Array".to_string(), list![Field::Null]),
+                ("int_map".to_string(), map![]),
+                ("int_Map_Array".to_string(), list![Field::Null, Field::Null]),
+                (
+                    "nested_struct".to_string(),
+                    group![
+                        ("A".to_string(), Field::Null),
+                        ("b".to_string(), Field::Null),
+                        ("C".to_string(), group![("d".to_string(), list![])]),
+                        ("g".to_string(), map![])
+                    ]
+                )
+            ],
+            row![
+                ("id".to_string(), Field::Long(4)),
+                ("int_array".to_string(), Field::Null),
+                ("int_array_Array".to_string(), list![]),
+                ("int_map".to_string(), map![]),
+                ("int_Map_Array".to_string(), list![]),
+                (
+                    "nested_struct".to_string(),
+                    group![
+                        ("A".to_string(), Field::Null),
+                        ("b".to_string(), Field::Null),
+                        ("C".to_string(), group![("d".to_string(), Field::Null)]),
+                        ("g".to_string(), Field::Null)
+                    ]
+                )
+            ],
+            row![
+                ("id".to_string(), Field::Long(5)),
+                ("int_array".to_string(), Field::Null),
+                ("int_array_Array".to_string(), Field::Null),
+                ("int_map".to_string(), map![]),
+                ("int_Map_Array".to_string(), Field::Null),
+                (
+                    "nested_struct".to_string(),
+                    group![
+                        ("A".to_string(), Field::Null),
+                        ("b".to_string(), Field::Null),
+                        ("C".to_string(), Field::Null),
+                        (
+                            "g".to_string(),
+                            map![(
+                                Field::Str("foo".to_string()),
+                                group![(
+                                    "H".to_string(),
+                                    group![(
+                                        "i".to_string(),
+                                        list![Field::Double(2.2), Field::Double(3.3)]
+                                    )]
+                                )]
+                            )]
+                        )
+                    ]
+                )
+            ],
+            row![
+                ("id".to_string(), Field::Long(6)),
+                ("int_array".to_string(), Field::Null),
+                ("int_array_Array".to_string(), Field::Null),
+                ("int_map".to_string(), Field::Null),
+                ("int_Map_Array".to_string(), Field::Null),
+                ("nested_struct".to_string(), Field::Null)
+            ],
+            row![
+                ("id".to_string(), Field::Long(7)),
+                ("int_array".to_string(), Field::Null),
+                (
+                    "int_array_Array".to_string(),
+                    list![Field::Null, list![Field::Int(5), Field::Int(6)]]
+                ),
+                (
+                    "int_map".to_string(),
+                    map![
+                        (Field::Str("k1".to_string()), Field::Null),
+                        (Field::Str("k3".to_string()), Field::Null)
+                    ]
+                ),
+                ("int_Map_Array".to_string(), Field::Null),
+                (
+                    "nested_struct".to_string(),
+                    group![
+                        ("A".to_string(), Field::Int(7)),
+                        (
+                            "b".to_string(),
+                            list![Field::Int(2), Field::Int(3), Field::Null]
+                        ),
+                        (
+                            "C".to_string(),
+                            group![(
+                                "d".to_string(),
+                                list![list![], list![Field::Null], Field::Null]
+                            )]
+                        ),
+                        ("g".to_string(), Field::Null)
+                    ]
+                )
+            ],
+        ];
+        assert_eq!(rows, expected_rows);
+    }
+
+    #[test]
+    fn test_file_reader_rows_projection() {
+        let schema = "
+      message spark_schema {
+        REQUIRED DOUBLE c;
+        REQUIRED INT32 b;
+      }
+    ";
+        let schema = parse_message_type(&schema).unwrap();
+        let rows =
+            test_file_reader_rows("nested_maps.snappy.parquet", Some(schema)).unwrap();
+        let expected_rows = vec![
+            row![
+                ("c".to_string(), Field::Double(1.0)),
+                ("b".to_string(), Field::Int(1))
+            ],
+            row![
+                ("c".to_string(), Field::Double(1.0)),
+                ("b".to_string(), Field::Int(1))
+            ],
+            row![
+                ("c".to_string(), Field::Double(1.0)),
+                ("b".to_string(), Field::Int(1))
+            ],
+            row![
+                ("c".to_string(), Field::Double(1.0)),
+                ("b".to_string(), Field::Int(1))
+            ],
+            row![
+                ("c".to_string(), Field::Double(1.0)),
+                ("b".to_string(), Field::Int(1))
+            ],
+            row![
+                ("c".to_string(), Field::Double(1.0)),
+                ("b".to_string(), Field::Int(1))
+            ],
+        ];
+        assert_eq!(rows, expected_rows);
+    }
+
+    #[test]
+    fn test_file_reader_rows_projection_map() {
+        let schema = "
+      message spark_schema {
+        OPTIONAL group a (MAP) {
+          REPEATED group key_value {
+            REQUIRED BYTE_ARRAY key (UTF8);
+            OPTIONAL group value (MAP) {
+              REPEATED group key_value {
+                REQUIRED INT32 key;
+                REQUIRED BOOLEAN value;
+              }
+            }
+          }
+        }
+      }
+    ";
+        let schema = parse_message_type(&schema).unwrap();
+        let rows =
+            test_file_reader_rows("nested_maps.snappy.parquet", Some(schema)).unwrap();
+        let expected_rows = vec![
+            row![(
+                "a".to_string(),
+                map![(
+                    Field::Str("a".to_string()),
+                    map![
+                        (Field::Int(1), Field::Bool(true)),
+                        (Field::Int(2), Field::Bool(false))
+                    ]
+                )]
+            )],
+            row![(
+                "a".to_string(),
+                map![(
+                    Field::Str("b".to_string()),
+                    map![(Field::Int(1), Field::Bool(true))]
+                )]
+            )],
+            row![(
+                "a".to_string(),
+                map![(Field::Str("c".to_string()), Field::Null)]
+            )],
+            row![("a".to_string(), map![(Field::Str("d".to_string()), map![])])],
+            row![(
+                "a".to_string(),
+                map![(
+                    Field::Str("e".to_string()),
+                    map![(Field::Int(1), Field::Bool(true))]
+                )]
+            )],
+            row![(
+                "a".to_string(),
+                map![(
+                    Field::Str("f".to_string()),
+                    map![
+                        (Field::Int(3), Field::Bool(true)),
+                        (Field::Int(4), Field::Bool(false)),
+                        (Field::Int(5), Field::Bool(true))
+                    ]
+                )]
+            )],
+        ];
+        assert_eq!(rows, expected_rows);
+    }
+
+    #[test]
+    fn test_file_reader_rows_projection_list() {
+        let schema = "
+      message spark_schema {
+        OPTIONAL group a (LIST) {
+          REPEATED group list {
+            OPTIONAL group element (LIST) {
+              REPEATED group list {
+                OPTIONAL group element (LIST) {
+                  REPEATED group list {
+                    OPTIONAL BYTE_ARRAY element (UTF8);
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    ";
+        let schema = parse_message_type(&schema).unwrap();
+        let rows =
+            test_file_reader_rows("nested_lists.snappy.parquet", Some(schema)).unwrap();
+        let expected_rows = vec![
+            row![(
+                "a".to_string(),
+                list![
+                    list![
+                        list![Field::Str("a".to_string()), Field::Str("b".to_string())],
+                        list![Field::Str("c".to_string())]
+                    ],
+                    list![Field::Null, list![Field::Str("d".to_string())]]
+                ]
+            )],
+            row![(
+                "a".to_string(),
+                list![
+                    list![
+                        list![Field::Str("a".to_string()), Field::Str("b".to_string())],
+                        list![Field::Str("c".to_string()), Field::Str("d".to_string())]
+                    ],
+                    list![Field::Null, list![Field::Str("e".to_string())]]
+                ]
+            )],
+            row![(
+                "a".to_string(),
+                list![
+                    list![
+                        list![Field::Str("a".to_string()), Field::Str("b".to_string())],
+                        list![Field::Str("c".to_string()), Field::Str("d".to_string())],
+                        list![Field::Str("e".to_string())]
+                    ],
+                    list![Field::Null, list![Field::Str("f".to_string())]]
+                ]
+            )],
+        ];
+        assert_eq!(rows, expected_rows);
+    }
+
+    #[test]
+    fn test_file_reader_rows_invalid_projection() {
+        let schema = "
+      message spark_schema {
+        REQUIRED INT32 key;
+        REQUIRED BOOLEAN value;
+      }
+    ";
+        let schema = parse_message_type(&schema).unwrap();
+        let res = test_file_reader_rows("nested_maps.snappy.parquet", Some(schema));
+        assert!(res.is_err());
+        assert_eq!(
+            res.unwrap_err(),
+            general_err!("Root schema does not contain projection")
+        );
+    }
+
+    #[test]
+    fn test_row_group_rows_invalid_projection() {
+        let schema = "
+      message spark_schema {
+        REQUIRED INT32 key;
+        REQUIRED BOOLEAN value;
+      }
+    ";
+        let schema = parse_message_type(&schema).unwrap();
+        let res = test_row_group_rows("nested_maps.snappy.parquet", Some(schema));
+        assert!(res.is_err());
+        assert_eq!(
+            res.unwrap_err(),
+            general_err!("Root schema does not contain projection")
+        );
+    }
+
+    #[test]
+    #[should_panic(expected = "Invalid map type")]
+    fn test_file_reader_rows_invalid_map_type() {
+        let schema = "
+      message spark_schema {
+        OPTIONAL group a (MAP) {
+          REPEATED group key_value {
+            REQUIRED BYTE_ARRAY key (UTF8);
+            OPTIONAL group value (MAP) {
+              REPEATED group key_value {
+                REQUIRED INT32 key;
+              }
+            }
+          }
+        }
+      }
+    ";
+        let schema = parse_message_type(&schema).unwrap();
+        test_file_reader_rows("nested_maps.snappy.parquet", Some(schema)).unwrap();
+    }
+
+    #[test]
+    fn test_tree_reader_handle_repeated_fields_with_no_annotation() {
+        // Array field `phoneNumbers` does not contain LIST annotation.
+        // We parse it as struct with `phone` repeated field as array.
+        let rows = test_file_reader_rows("repeated_no_annotation.parquet", None).unwrap();
+        let expected_rows = vec![
+            row![
+                ("id".to_string(), Field::Int(1)),
+                ("phoneNumbers".to_string(), Field::Null)
+            ],
+            row![
+                ("id".to_string(), Field::Int(2)),
+                ("phoneNumbers".to_string(), Field::Null)
+            ],
+            row![
+                ("id".to_string(), Field::Int(3)),
+                (
+                    "phoneNumbers".to_string(),
+                    group![("phone".to_string(), list![])]
+                )
+            ],
+            row![
+                ("id".to_string(), Field::Int(4)),
+                (
+                    "phoneNumbers".to_string(),
+                    group![(
+                        "phone".to_string(),
+                        list![group![
+                            ("number".to_string(), Field::Long(5555555555)),
+                            ("kind".to_string(), Field::Null)
+                        ]]
+                    )]
+                )
+            ],
+            row![
+                ("id".to_string(), Field::Int(5)),
+                (
+                    "phoneNumbers".to_string(),
+                    group![(
+                        "phone".to_string(),
+                        list![group![
+                            ("number".to_string(), Field::Long(1111111111)),
+                            ("kind".to_string(), Field::Str("home".to_string()))
+                        ]]
+                    )]
+                )
+            ],
+            row![
+                ("id".to_string(), Field::Int(6)),
+                (
+                    "phoneNumbers".to_string(),
+                    group![(
+                        "phone".to_string(),
+                        list![
+                            group![
+                                ("number".to_string(), Field::Long(1111111111)),
+                                ("kind".to_string(), Field::Str("home".to_string()))
+                            ],
+                            group![
+                                ("number".to_string(), Field::Long(2222222222)),
+                                ("kind".to_string(), Field::Null)
+                            ],
+                            group![
+                                ("number".to_string(), Field::Long(3333333333)),
+                                ("kind".to_string(), Field::Str("mobile".to_string()))
+                            ]
+                        ]
+                    )]
+                )
+            ],
+        ];
+
+        assert_eq!(rows, expected_rows);
+    }
+
+    fn test_file_reader_rows(file_name: &str, schema: Option<Type>) -> Result<Vec<Row>> {
+        let file = get_test_file(file_name);
+        let file_reader: Box<FileReader> = Box::new(SerializedFileReader::new(file)?);
+        let iter = file_reader.get_row_iter(schema)?;
+        Ok(iter.collect())
+    }
+
+    fn test_row_group_rows(file_name: &str, schema: Option<Type>) -> Result<Vec<Row>> {
+        let file = get_test_file(file_name);
+        let file_reader: Box<FileReader> = Box::new(SerializedFileReader::new(file)?);
+        // Check the first row group only, because files will contain only single row
+        // group
+        let row_group_reader = file_reader.get_row_group(0).unwrap();
+        let iter = row_group_reader.get_row_iter(schema)?;
+        Ok(iter.collect())
+    }
+}
diff --git a/rust/parquet/src/record/triplet.rs b/rust/parquet/src/record/triplet.rs
new file mode 100644
index 0000000000000..9915b18f61704
--- /dev/null
+++ b/rust/parquet/src/record/triplet.rs
@@ -0,0 +1,573 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::basic::Type as PhysicalType;
+use crate::column::reader::{get_typed_column_reader, ColumnReader, ColumnReaderImpl};
+use crate::data_type::*;
+use crate::errors::{ParquetError, Result};
+use crate::record::api::Field;
+use crate::schema::types::ColumnDescPtr;
+
+/// Macro to generate simple functions that cover all types of triplet iterator.
+/// $func is a function of a typed triplet iterator and $token is a either {`ref`} or
+/// {`ref`, `mut`}
+macro_rules! triplet_enum_func {
+  ($self:ident, $func:ident, $( $token:tt ),*) => ({
+    match *$self {
+      TripletIter::BoolTripletIter($($token)* typed) => typed.$func(),
+      TripletIter::Int32TripletIter($($token)* typed) => typed.$func(),
+      TripletIter::Int64TripletIter($($token)* typed) => typed.$func(),
+      TripletIter::Int96TripletIter($($token)* typed) => typed.$func(),
+      TripletIter::FloatTripletIter($($token)* typed) => typed.$func(),
+      TripletIter::DoubleTripletIter($($token)* typed) => typed.$func(),
+      TripletIter::ByteArrayTripletIter($($token)* typed) => typed.$func(),
+      TripletIter::FixedLenByteArrayTripletIter($($token)* typed) => typed.$func()
+    }
+  });
+}
+
+/// High level API wrapper on column reader.
+/// Provides per-element access for each primitive column.
+pub enum TripletIter {
+    BoolTripletIter(TypedTripletIter<BoolType>),
+    Int32TripletIter(TypedTripletIter<Int32Type>),
+    Int64TripletIter(TypedTripletIter<Int64Type>),
+    Int96TripletIter(TypedTripletIter<Int96Type>),
+    FloatTripletIter(TypedTripletIter<FloatType>),
+    DoubleTripletIter(TypedTripletIter<DoubleType>),
+    ByteArrayTripletIter(TypedTripletIter<ByteArrayType>),
+    FixedLenByteArrayTripletIter(TypedTripletIter<FixedLenByteArrayType>),
+}
+
+impl TripletIter {
+    /// Creates new triplet for column reader
+    pub fn new(descr: ColumnDescPtr, reader: ColumnReader, batch_size: usize) -> Self {
+        match descr.physical_type() {
+            PhysicalType::BOOLEAN => TripletIter::BoolTripletIter(TypedTripletIter::new(
+                descr, batch_size, reader,
+            )),
+            PhysicalType::INT32 => TripletIter::Int32TripletIter(TypedTripletIter::new(
+                descr, batch_size, reader,
+            )),
+            PhysicalType::INT64 => TripletIter::Int64TripletIter(TypedTripletIter::new(
+                descr, batch_size, reader,
+            )),
+            PhysicalType::INT96 => TripletIter::Int96TripletIter(TypedTripletIter::new(
+                descr, batch_size, reader,
+            )),
+            PhysicalType::FLOAT => TripletIter::FloatTripletIter(TypedTripletIter::new(
+                descr, batch_size, reader,
+            )),
+            PhysicalType::DOUBLE => TripletIter::DoubleTripletIter(
+                TypedTripletIter::new(descr, batch_size, reader),
+            ),
+            PhysicalType::BYTE_ARRAY => TripletIter::ByteArrayTripletIter(
+                TypedTripletIter::new(descr, batch_size, reader),
+            ),
+            PhysicalType::FIXED_LEN_BYTE_ARRAY => {
+                TripletIter::FixedLenByteArrayTripletIter(TypedTripletIter::new(
+                    descr, batch_size, reader,
+                ))
+            }
+        }
+    }
+
+    /// Invokes underlying typed triplet iterator to buffer current value.
+    /// Should be called once - either before `is_null` or `current_value`.
+    #[inline]
+    pub fn read_next(&mut self) -> Result<bool> {
+        triplet_enum_func!(self, read_next, ref, mut)
+    }
+
+    /// Provides check on values/levels left without invoking the underlying typed triplet
+    /// iterator.
+    /// Returns true if more values/levels exist, false otherwise.
+    /// It is always in sync with `read_next` method.
+    #[inline]
+    pub fn has_next(&self) -> bool {
+        triplet_enum_func!(self, has_next, ref)
+    }
+
+    /// Returns current definition level for a leaf triplet iterator
+    #[inline]
+    pub fn current_def_level(&self) -> i16 {
+        triplet_enum_func!(self, current_def_level, ref)
+    }
+
+    /// Returns max definition level for a leaf triplet iterator
+    #[inline]
+    pub fn max_def_level(&self) -> i16 {
+        triplet_enum_func!(self, max_def_level, ref)
+    }
+
+    /// Returns current repetition level for a leaf triplet iterator
+    #[inline]
+    pub fn current_rep_level(&self) -> i16 {
+        triplet_enum_func!(self, current_rep_level, ref)
+    }
+
+    /// Returns max repetition level for a leaf triplet iterator
+    #[inline]
+    pub fn max_rep_level(&self) -> i16 {
+        triplet_enum_func!(self, max_rep_level, ref)
+    }
+
+    /// Returns true, if current value is null.
+    /// Based on the fact that for non-null value current definition level
+    /// equals to max definition level.
+    #[inline]
+    pub fn is_null(&self) -> bool {
+        self.current_def_level() < self.max_def_level()
+    }
+
+    /// Updates non-null value for current row.
+    pub fn current_value(&self) -> Field {
+        assert!(!self.is_null(), "Value is null");
+        match *self {
+            TripletIter::BoolTripletIter(ref typed) => {
+                Field::convert_bool(typed.column_descr(), *typed.current_value())
+            }
+            TripletIter::Int32TripletIter(ref typed) => {
+                Field::convert_int32(typed.column_descr(), *typed.current_value())
+            }
+            TripletIter::Int64TripletIter(ref typed) => {
+                Field::convert_int64(typed.column_descr(), *typed.current_value())
+            }
+            TripletIter::Int96TripletIter(ref typed) => {
+                Field::convert_int96(typed.column_descr(), typed.current_value().clone())
+            }
+            TripletIter::FloatTripletIter(ref typed) => {
+                Field::convert_float(typed.column_descr(), *typed.current_value())
+            }
+            TripletIter::DoubleTripletIter(ref typed) => {
+                Field::convert_double(typed.column_descr(), *typed.current_value())
+            }
+            TripletIter::ByteArrayTripletIter(ref typed) => Field::convert_byte_array(
+                typed.column_descr(),
+                typed.current_value().clone(),
+            ),
+            TripletIter::FixedLenByteArrayTripletIter(ref typed) => {
+                Field::convert_byte_array(
+                    typed.column_descr(),
+                    typed.current_value().clone(),
+                )
+            }
+        }
+    }
+}
+
+/// Internal typed triplet iterator as a wrapper for column reader
+/// (primitive leaf column), provides per-element access.
+pub struct TypedTripletIter<T: DataType> {
+    reader: ColumnReaderImpl<T>,
+    column_descr: ColumnDescPtr,
+    batch_size: usize,
+    // type properties
+    max_def_level: i16,
+    max_rep_level: i16,
+    // values and levels
+    values: Vec<T::T>,
+    def_levels: Option<Vec<i16>>,
+    rep_levels: Option<Vec<i16>>,
+    // current index for the triplet (value, def, rep)
+    curr_triplet_index: usize,
+    // how many triplets are left before we need to buffer
+    triplets_left: usize,
+    // helper flag to quickly check if we have more values/levels to read
+    has_next: bool,
+}
+
+impl<T: DataType> TypedTripletIter<T> {
+    /// Creates new typed triplet iterator based on provided column reader.
+    /// Use batch size to specify the amount of values to buffer from column reader.
+    fn new(descr: ColumnDescPtr, batch_size: usize, column_reader: ColumnReader) -> Self {
+        assert!(
+            batch_size > 0,
+            "Expected positive batch size, found: {}",
+            batch_size
+        );
+
+        let max_def_level = descr.max_def_level();
+        let max_rep_level = descr.max_rep_level();
+
+        let def_levels = if max_def_level == 0 {
+            None
+        } else {
+            Some(vec![0; batch_size])
+        };
+        let rep_levels = if max_rep_level == 0 {
+            None
+        } else {
+            Some(vec![0; batch_size])
+        };
+
+        Self {
+            reader: get_typed_column_reader(column_reader),
+            column_descr: descr,
+            batch_size,
+            max_def_level,
+            max_rep_level,
+            values: vec![T::T::default(); batch_size],
+            def_levels,
+            rep_levels,
+            curr_triplet_index: 0,
+            triplets_left: 0,
+            has_next: false,
+        }
+    }
+
+    /// Returns column descriptor reference for the current typed triplet iterator.
+    #[inline]
+    pub fn column_descr(&self) -> &ColumnDescPtr {
+        &self.column_descr
+    }
+
+    /// Returns maximum definition level for the triplet iterator (leaf column).
+    #[inline]
+    fn max_def_level(&self) -> i16 {
+        self.max_def_level
+    }
+
+    /// Returns maximum repetition level for the triplet iterator (leaf column).
+    #[inline]
+    fn max_rep_level(&self) -> i16 {
+        self.max_rep_level
+    }
+
+    /// Returns current value.
+    /// Method does not advance the iterator, therefore can be called multiple times.
+    #[inline]
+    fn current_value(&self) -> &T::T {
+        assert!(
+            self.current_def_level() == self.max_def_level(),
+            "Cannot extract value, max definition level: {}, current level: {}",
+            self.max_def_level(),
+            self.current_def_level()
+        );
+        &self.values[self.curr_triplet_index]
+    }
+
+    /// Returns current definition level.
+    /// If field is required, then maximum definition level is returned.
+    #[inline]
+    fn current_def_level(&self) -> i16 {
+        match self.def_levels {
+            Some(ref vec) => vec[self.curr_triplet_index],
+            None => self.max_def_level,
+        }
+    }
+
+    /// Returns current repetition level.
+    /// If field is required, then maximum repetition level is returned.
+    #[inline]
+    fn current_rep_level(&self) -> i16 {
+        match self.rep_levels {
+            Some(ref vec) => vec[self.curr_triplet_index],
+            None => self.max_rep_level,
+        }
+    }
+
+    /// Quick check if iterator has more values/levels to read.
+    /// It is updated as a result of `read_next` method, so they are synchronized.
+    #[inline]
+    fn has_next(&self) -> bool {
+        self.has_next
+    }
+
+    /// Advances to the next triplet.
+    /// Returns true, if there are more records to read, false there are no records left.
+    fn read_next(&mut self) -> Result<bool> {
+        self.curr_triplet_index += 1;
+
+        if self.curr_triplet_index >= self.triplets_left {
+            let (values_read, levels_read) = {
+                // Get slice of definition levels, if available
+                let def_levels = match self.def_levels {
+                    Some(ref mut vec) => Some(&mut vec[..]),
+                    None => None,
+                };
+
+                // Get slice of repetition levels, if available
+                let rep_levels = match self.rep_levels {
+                    Some(ref mut vec) => Some(&mut vec[..]),
+                    None => None,
+                };
+
+                // Buffer triplets
+                self.reader.read_batch(
+                    self.batch_size,
+                    def_levels,
+                    rep_levels,
+                    &mut self.values,
+                )?
+            };
+
+            // No more values or levels to read
+            if values_read == 0 && levels_read == 0 {
+                self.has_next = false;
+                return Ok(false);
+            }
+
+            // We never read values more than levels
+            if levels_read == 0 || values_read == levels_read {
+                // There are no definition levels to read, column is required
+                // or definition levels match values, so it does not require spacing
+                self.curr_triplet_index = 0;
+                self.triplets_left = values_read;
+            } else if values_read < levels_read {
+                // Add spacing for triplets.
+                // The idea is setting values for positions in def_levels when current
+                // definition level equals to maximum definition level.
+                // Values and levels are guaranteed to line up, because of
+                // the column reader method.
+
+                // Note: if values_read == 0, then spacing will not be triggered
+                let mut idx = values_read;
+                let def_levels = self.def_levels.as_ref().unwrap();
+                for i in 0..levels_read {
+                    if def_levels[levels_read - i - 1] == self.max_def_level {
+                        idx -= 1; // This is done to avoid usize becoming a negative value
+                        self.values.swap(levels_read - i - 1, idx);
+                    }
+                }
+                self.curr_triplet_index = 0;
+                self.triplets_left = levels_read;
+            } else {
+                return Err(general_err!(
+                    "Spacing of values/levels is wrong, values_read: {}, levels_read: {}",
+                    values_read,
+                    levels_read
+                ));
+            }
+        }
+
+        self.has_next = true;
+        Ok(true)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::file::reader::{FileReader, SerializedFileReader};
+    use crate::schema::types::ColumnPath;
+    use crate::util::test_common::get_test_file;
+
+    #[test]
+    #[should_panic(expected = "Expected positive batch size, found: 0")]
+    fn test_triplet_zero_batch_size() {
+        let column_path =
+            ColumnPath::from(vec!["b_struct".to_string(), "b_c_int".to_string()]);
+        test_column_in_file(
+            "nulls.snappy.parquet",
+            0,
+            &column_path,
+            &vec![],
+            &vec![],
+            &vec![],
+        );
+    }
+
+    #[test]
+    fn test_triplet_null_column() {
+        let path = vec!["b_struct", "b_c_int"];
+        let values = vec![];
+        let def_levels = vec![1, 1, 1, 1, 1, 1, 1, 1];
+        let rep_levels = vec![0, 0, 0, 0, 0, 0, 0, 0];
+        test_triplet_iter(
+            "nulls.snappy.parquet",
+            path,
+            &values,
+            &def_levels,
+            &rep_levels,
+        );
+    }
+
+    #[test]
+    fn test_triplet_required_column() {
+        let path = vec!["ID"];
+        let values = vec![Field::Long(8)];
+        let def_levels = vec![0];
+        let rep_levels = vec![0];
+        test_triplet_iter(
+            "nonnullable.impala.parquet",
+            path,
+            &values,
+            &def_levels,
+            &rep_levels,
+        );
+    }
+
+    #[test]
+    fn test_triplet_optional_column() {
+        let path = vec!["nested_struct", "A"];
+        let values = vec![Field::Int(1), Field::Int(7)];
+        let def_levels = vec![2, 1, 1, 1, 1, 0, 2];
+        let rep_levels = vec![0, 0, 0, 0, 0, 0, 0];
+        test_triplet_iter(
+            "nullable.impala.parquet",
+            path,
+            &values,
+            &def_levels,
+            &rep_levels,
+        );
+    }
+
+    #[test]
+    fn test_triplet_optional_list_column() {
+        let path = vec!["a", "list", "element", "list", "element", "list", "element"];
+        let values = vec![
+            Field::Str("a".to_string()),
+            Field::Str("b".to_string()),
+            Field::Str("c".to_string()),
+            Field::Str("d".to_string()),
+            Field::Str("a".to_string()),
+            Field::Str("b".to_string()),
+            Field::Str("c".to_string()),
+            Field::Str("d".to_string()),
+            Field::Str("e".to_string()),
+            Field::Str("a".to_string()),
+            Field::Str("b".to_string()),
+            Field::Str("c".to_string()),
+            Field::Str("d".to_string()),
+            Field::Str("e".to_string()),
+            Field::Str("f".to_string()),
+        ];
+        let def_levels = vec![7, 7, 7, 4, 7, 7, 7, 7, 7, 4, 7, 7, 7, 7, 7, 7, 4, 7];
+        let rep_levels = vec![0, 3, 2, 1, 2, 0, 3, 2, 3, 1, 2, 0, 3, 2, 3, 2, 1, 2];
+        test_triplet_iter(
+            "nested_lists.snappy.parquet",
+            path,
+            &values,
+            &def_levels,
+            &rep_levels,
+        );
+    }
+
+    #[test]
+    fn test_triplet_optional_map_column() {
+        let path = vec!["a", "key_value", "value", "key_value", "key"];
+        let values = vec![
+            Field::Int(1),
+            Field::Int(2),
+            Field::Int(1),
+            Field::Int(1),
+            Field::Int(3),
+            Field::Int(4),
+            Field::Int(5),
+        ];
+        let def_levels = vec![4, 4, 4, 2, 3, 4, 4, 4, 4];
+        let rep_levels = vec![0, 2, 0, 0, 0, 0, 0, 2, 2];
+        test_triplet_iter(
+            "nested_maps.snappy.parquet",
+            path,
+            &values,
+            &def_levels,
+            &rep_levels,
+        );
+    }
+
+    // Check triplet iterator across different batch sizes
+    fn test_triplet_iter(
+        file_name: &str,
+        column_path: Vec<&str>,
+        expected_values: &[Field],
+        expected_def_levels: &[i16],
+        expected_rep_levels: &[i16],
+    ) {
+        // Convert path into column path
+        let path: Vec<String> = column_path.iter().map(|x| x.to_string()).collect();
+        let column_path = ColumnPath::from(path);
+
+        let batch_sizes = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 128, 256];
+        for batch_size in batch_sizes {
+            test_column_in_file(
+                file_name,
+                batch_size,
+                &column_path,
+                expected_values,
+                expected_def_levels,
+                expected_rep_levels,
+            );
+        }
+    }
+
+    // Check values of a selectd column in a file
+    fn test_column_in_file(
+        file_name: &str,
+        batch_size: usize,
+        column_path: &ColumnPath,
+        expected_values: &[Field],
+        expected_def_levels: &[i16],
+        expected_rep_levels: &[i16],
+    ) {
+        let file = get_test_file(file_name);
+        let file_reader = SerializedFileReader::new(file).unwrap();
+        // Get schema descriptor
+        let file_metadata = file_reader.metadata().file_metadata();
+        let schema = file_metadata.schema_descr();
+        // Get first row group
+        let row_group_reader = file_reader.get_row_group(0).unwrap();
+
+        for i in 0..schema.num_columns() {
+            let descr = schema.column(i);
+            if descr.path() == column_path {
+                let reader = row_group_reader.get_column_reader(i).unwrap();
+                test_triplet_column(
+                    descr,
+                    reader,
+                    batch_size,
+                    expected_values,
+                    expected_def_levels,
+                    expected_rep_levels,
+                );
+            }
+        }
+    }
+
+    // Check values for individual triplet iterator
+    fn test_triplet_column(
+        descr: ColumnDescPtr,
+        reader: ColumnReader,
+        batch_size: usize,
+        expected_values: &[Field],
+        expected_def_levels: &[i16],
+        expected_rep_levels: &[i16],
+    ) {
+        let mut iter = TripletIter::new(descr.clone(), reader, batch_size);
+        let mut values: Vec<Field> = Vec::new();
+        let mut def_levels: Vec<i16> = Vec::new();
+        let mut rep_levels: Vec<i16> = Vec::new();
+
+        assert_eq!(iter.max_def_level(), descr.max_def_level());
+        assert_eq!(iter.max_rep_level(), descr.max_rep_level());
+
+        while let Ok(true) = iter.read_next() {
+            assert!(iter.has_next());
+            if !iter.is_null() {
+                values.push(iter.current_value());
+            }
+            def_levels.push(iter.current_def_level());
+            rep_levels.push(iter.current_rep_level());
+        }
+
+        assert_eq!(values, expected_values);
+        assert_eq!(def_levels, expected_def_levels);
+        assert_eq!(rep_levels, expected_rep_levels);
+    }
+}
diff --git a/rust/parquet/src/schema/mod.rs b/rust/parquet/src/schema/mod.rs
new file mode 100644
index 0000000000000..351ce97337178
--- /dev/null
+++ b/rust/parquet/src/schema/mod.rs
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Parquet schema definitions and methods to print and parse schema.
+//!
+//! # Example
+//!
+//! ```rust
+//! use parquet::{
+//!     basic::{LogicalType, Repetition, Type as PhysicalType},
+//!     schema::{parser, printer, types::Type},
+//! };
+//! use std::rc::Rc;
+//!
+//! // Create the following schema:
+//! //
+//! // message schema {
+//! //   OPTIONAL BYTE_ARRAY a (UTF8);
+//! //   REQUIRED INT32 b;
+//! // }
+//!
+//! let field_a = Type::primitive_type_builder("a", PhysicalType::BYTE_ARRAY)
+//!     .with_logical_type(LogicalType::UTF8)
+//!     .with_repetition(Repetition::OPTIONAL)
+//!     .build()
+//!     .unwrap();
+//!
+//! let field_b = Type::primitive_type_builder("b", PhysicalType::INT32)
+//!     .with_repetition(Repetition::REQUIRED)
+//!     .build()
+//!     .unwrap();
+//!
+//! let schema = Type::group_type_builder("schema")
+//!     .with_fields(&mut vec![Rc::new(field_a), Rc::new(field_b)])
+//!     .build()
+//!     .unwrap();
+//!
+//! let mut buf = Vec::new();
+//!
+//! // Print schema into buffer
+//! printer::print_schema(&mut buf, &schema);
+//!
+//! // Parse schema from the string
+//! let string_schema = String::from_utf8(buf).unwrap();
+//! let parsed_schema = parser::parse_message_type(&string_schema).unwrap();
+//!
+//! assert_eq!(schema, parsed_schema);
+//! ```
+
+pub mod parser;
+pub mod printer;
+pub mod types;
diff --git a/rust/parquet/src/schema/parser.rs b/rust/parquet/src/schema/parser.rs
new file mode 100644
index 0000000000000..c8c2a02e82810
--- /dev/null
+++ b/rust/parquet/src/schema/parser.rs
@@ -0,0 +1,780 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Parquet schema parser.
+//! Provides methods to parse and validate string message type into Parquet
+//! [`Type`](`::schema::types::Type`).
+//!
+//! # Example
+//!
+//! ```rust
+//! use parquet::schema::parser::parse_message_type;
+//!
+//! let message_type = "
+//!   message spark_schema {
+//!     OPTIONAL BYTE_ARRAY a (UTF8);
+//!     REQUIRED INT32 b;
+//!     REQUIRED DOUBLE c;
+//!     REQUIRED BOOLEAN d;
+//!     OPTIONAL group e (LIST) {
+//!       REPEATED group list {
+//!         REQUIRED INT32 element;
+//!       }
+//!     }
+//!   }
+//! ";
+//!
+//! let schema = parse_message_type(message_type).expect("Expected valid schema");
+//! println!("{:?}", schema);
+//! ```
+
+use std::rc::Rc;
+
+use crate::basic::{LogicalType, Repetition, Type as PhysicalType};
+use crate::errors::{ParquetError, Result};
+use crate::schema::types::{Type, TypePtr};
+
+/// Parses message type as string into a Parquet [`Type`](`::schema::types::Type`) which,
+/// for example, could be used to extract individual columns. Returns Parquet general
+/// error when parsing or validation fails.
+pub fn parse_message_type<'a>(message_type: &'a str) -> Result<Type> {
+    let mut parser = Parser {
+        tokenizer: &mut Tokenizer::from_str(message_type),
+    };
+    parser.parse_message_type()
+}
+
+/// Tokenizer to split message type string into tokens that are separated using characters
+/// defined in `is_schema_delim` method. Tokenizer also preserves delimiters as tokens.
+/// Tokenizer provides Iterator interface to process tokens; it also allows to step back
+/// to reprocess previous tokens.
+struct Tokenizer<'a> {
+    // List of all tokens for a string
+    tokens: Vec<&'a str>,
+    // Current index of vector
+    index: usize,
+}
+
+impl<'a> Tokenizer<'a> {
+    // Create tokenizer from message type string
+    pub fn from_str(string: &'a str) -> Self {
+        let vec = string
+            .split_whitespace()
+            .flat_map(|t| Self::split_token(t))
+            .collect();
+        Tokenizer {
+            tokens: vec,
+            index: 0,
+        }
+    }
+
+    // List of all special characters in schema
+    fn is_schema_delim(c: char) -> bool {
+        c == ';' || c == '{' || c == '}' || c == '(' || c == ')' || c == '=' || c == ','
+    }
+
+    /// Splits string into tokens; input string can already be token or can contain
+    /// delimiters, e.g. required" -> Vec("required") and
+    /// "(UTF8);" -> Vec("(", "UTF8", ")", ";")
+    fn split_token(string: &str) -> Vec<&str> {
+        let mut buffer: Vec<&str> = Vec::new();
+        let mut tail = string;
+        while let Some(index) = tail.find(Self::is_schema_delim) {
+            let (h, t) = tail.split_at(index);
+            if !h.is_empty() {
+                buffer.push(h);
+            }
+            buffer.push(&t[0..1]);
+            tail = &t[1..];
+        }
+        if !tail.is_empty() {
+            buffer.push(tail);
+        }
+        buffer
+    }
+
+    // Move pointer to a previous element
+    fn backtrack(&mut self) {
+        self.index -= 1;
+    }
+}
+
+impl<'a> Iterator for Tokenizer<'a> {
+    type Item = &'a str;
+
+    fn next(&mut self) -> Option<&'a str> {
+        if self.index < self.tokens.len() {
+            self.index += 1;
+            Some(self.tokens[self.index - 1])
+        } else {
+            None
+        }
+    }
+}
+
+/// Internal Schema parser.
+/// Traverses message type using tokenizer and parses each group/primitive type
+/// recursively.
+struct Parser<'a> {
+    tokenizer: &'a mut Tokenizer<'a>,
+}
+
+// Utility function to assert token on validity.
+fn assert_token(token: Option<&str>, expected: &str) -> Result<()> {
+    match token {
+        Some(value) if value == expected => Ok(()),
+        Some(other) => Err(general_err!(
+            "Expected '{}', found token '{}'",
+            expected,
+            other
+        )),
+        None => Err(general_err!(
+            "Expected '{}', but no token found (None)",
+            expected
+        )),
+    }
+}
+
+// Utility function to parse i32 or return general error.
+fn parse_i32(
+    value: Option<&str>,
+    not_found_msg: &str,
+    parse_fail_msg: &str,
+) -> Result<i32> {
+    value
+        .ok_or(general_err!(not_found_msg))
+        .and_then(|v| v.parse::<i32>().map_err(|_| general_err!(parse_fail_msg)))
+}
+
+impl<'a> Parser<'a> {
+    // Entry function to parse message type, uses internal tokenizer.
+    fn parse_message_type(&mut self) -> Result<Type> {
+        // Check that message type starts with "message".
+        match self.tokenizer.next() {
+            Some("message") => {
+                let name = self
+                    .tokenizer
+                    .next()
+                    .ok_or(general_err!("Expected name, found None"))?;
+                let mut fields = self.parse_child_types()?;
+                Type::group_type_builder(name)
+                    .with_fields(&mut fields)
+                    .build()
+            }
+            _ => Err(general_err!("Message type does not start with 'message'")),
+        }
+    }
+
+    // Parses child types for a current group type.
+    // This is only invoked on root and group types.
+    fn parse_child_types(&mut self) -> Result<Vec<TypePtr>> {
+        assert_token(self.tokenizer.next(), "{")?;
+        let mut vec = Vec::new();
+        while let Some(value) = self.tokenizer.next() {
+            if value == "}" {
+                break;
+            } else {
+                self.tokenizer.backtrack();
+                vec.push(Rc::new(self.add_type()?));
+            }
+        }
+        Ok(vec)
+    }
+
+    fn add_type(&mut self) -> Result<Type> {
+        // Parse repetition
+        let repetition = self
+            .tokenizer
+            .next()
+            .ok_or(general_err!("Expected repetition, found None"))
+            .and_then(|v| v.to_uppercase().parse::<Repetition>())?;
+
+        match self.tokenizer.next() {
+            Some(group) if group.to_uppercase() == "GROUP" => {
+                self.add_group_type(Some(repetition))
+            }
+            Some(type_string) => {
+                let physical_type = type_string.to_uppercase().parse::<PhysicalType>()?;
+                self.add_primitive_type(repetition, physical_type)
+            }
+            None => Err(general_err!("Invalid type, could not extract next token")),
+        }
+    }
+
+    fn add_group_type(&mut self, repetition: Option<Repetition>) -> Result<Type> {
+        // Parse name of the group type
+        let name = self
+            .tokenizer
+            .next()
+            .ok_or(general_err!("Expected name, found None"))?;
+
+        // Parse logical type if exists
+        let logical_type = if let Some("(") = self.tokenizer.next() {
+            let tpe = self
+                .tokenizer
+                .next()
+                .ok_or(general_err!("Expected logical type, found None"))
+                .and_then(|v| v.to_uppercase().parse::<LogicalType>())?;
+            assert_token(self.tokenizer.next(), ")")?;
+            tpe
+        } else {
+            self.tokenizer.backtrack();
+            LogicalType::NONE
+        };
+
+        // Parse optional id
+        let id = if let Some("=") = self.tokenizer.next() {
+            self.tokenizer.next().and_then(|v| v.parse::<i32>().ok())
+        } else {
+            self.tokenizer.backtrack();
+            None
+        };
+
+        let mut fields = self.parse_child_types()?;
+        let mut builder = Type::group_type_builder(name)
+            .with_logical_type(logical_type)
+            .with_fields(&mut fields);
+        if let Some(rep) = repetition {
+            builder = builder.with_repetition(rep);
+        }
+        if let Some(id) = id {
+            builder = builder.with_id(id);
+        }
+        builder.build()
+    }
+
+    fn add_primitive_type(
+        &mut self,
+        repetition: Repetition,
+        physical_type: PhysicalType,
+    ) -> Result<Type> {
+        // Read type length if the type is FIXED_LEN_BYTE_ARRAY.
+        let mut length: i32 = -1;
+        if physical_type == PhysicalType::FIXED_LEN_BYTE_ARRAY {
+            assert_token(self.tokenizer.next(), "(")?;
+            length = parse_i32(
+                self.tokenizer.next(),
+                "Expected length for FIXED_LEN_BYTE_ARRAY, found None",
+                "Failed to parse length for FIXED_LEN_BYTE_ARRAY",
+            )?;
+            assert_token(self.tokenizer.next(), ")")?;
+        }
+
+        // Parse name of the primitive type
+        let name = self
+            .tokenizer
+            .next()
+            .ok_or(general_err!("Expected name, found None"))?;
+
+        // Parse logical type
+        let (logical_type, precision, scale) = if let Some("(") = self.tokenizer.next() {
+            let tpe = self
+                .tokenizer
+                .next()
+                .ok_or(general_err!("Expected logical type, found None"))
+                .and_then(|v| v.to_uppercase().parse::<LogicalType>())?;
+
+            // Parse precision and scale for decimals
+            let mut precision: i32 = -1;
+            let mut scale: i32 = -1;
+
+            if tpe == LogicalType::DECIMAL {
+                if let Some("(") = self.tokenizer.next() {
+                    // Parse precision
+                    precision = parse_i32(
+                        self.tokenizer.next(),
+                        "Expected precision, found None",
+                        "Failed to parse precision for DECIMAL type",
+                    )?;
+
+                    // Parse scale
+                    scale = if let Some(",") = self.tokenizer.next() {
+                        parse_i32(
+                            self.tokenizer.next(),
+                            "Expected scale, found None",
+                            "Failed to parse scale for DECIMAL type",
+                        )?
+                    } else {
+                        // Scale is not provided, set it to 0.
+                        self.tokenizer.backtrack();
+                        0
+                    };
+
+                    assert_token(self.tokenizer.next(), ")")?;
+                } else {
+                    self.tokenizer.backtrack();
+                }
+            }
+
+            assert_token(self.tokenizer.next(), ")")?;
+            (tpe, precision, scale)
+        } else {
+            self.tokenizer.backtrack();
+            (LogicalType::NONE, -1, -1)
+        };
+
+        // Parse optional id
+        let id = if let Some("=") = self.tokenizer.next() {
+            self.tokenizer.next().and_then(|v| v.parse::<i32>().ok())
+        } else {
+            self.tokenizer.backtrack();
+            None
+        };
+        assert_token(self.tokenizer.next(), ";")?;
+
+        let mut builder = Type::primitive_type_builder(name, physical_type)
+            .with_repetition(repetition)
+            .with_logical_type(logical_type)
+            .with_length(length)
+            .with_precision(precision)
+            .with_scale(scale);
+        if let Some(id) = id {
+            builder = builder.with_id(id);
+        }
+        Ok(builder.build()?)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_tokenize_empty_string() {
+        assert_eq!(Tokenizer::from_str("").next(), None);
+    }
+
+    #[test]
+    fn test_tokenize_delimiters() {
+        let mut iter = Tokenizer::from_str(",;{}()=");
+        assert_eq!(iter.next(), Some(","));
+        assert_eq!(iter.next(), Some(";"));
+        assert_eq!(iter.next(), Some("{"));
+        assert_eq!(iter.next(), Some("}"));
+        assert_eq!(iter.next(), Some("("));
+        assert_eq!(iter.next(), Some(")"));
+        assert_eq!(iter.next(), Some("="));
+        assert_eq!(iter.next(), None);
+    }
+
+    #[test]
+    fn test_tokenize_delimiters_with_whitespaces() {
+        let mut iter = Tokenizer::from_str(" , ; { } ( ) = ");
+        assert_eq!(iter.next(), Some(","));
+        assert_eq!(iter.next(), Some(";"));
+        assert_eq!(iter.next(), Some("{"));
+        assert_eq!(iter.next(), Some("}"));
+        assert_eq!(iter.next(), Some("("));
+        assert_eq!(iter.next(), Some(")"));
+        assert_eq!(iter.next(), Some("="));
+        assert_eq!(iter.next(), None);
+    }
+
+    #[test]
+    fn test_tokenize_words() {
+        let mut iter = Tokenizer::from_str("abc def ghi jkl mno");
+        assert_eq!(iter.next(), Some("abc"));
+        assert_eq!(iter.next(), Some("def"));
+        assert_eq!(iter.next(), Some("ghi"));
+        assert_eq!(iter.next(), Some("jkl"));
+        assert_eq!(iter.next(), Some("mno"));
+        assert_eq!(iter.next(), None);
+    }
+
+    #[test]
+    fn test_tokenize_backtrack() {
+        let mut iter = Tokenizer::from_str("abc;");
+        assert_eq!(iter.next(), Some("abc"));
+        assert_eq!(iter.next(), Some(";"));
+        iter.backtrack();
+        assert_eq!(iter.next(), Some(";"));
+        assert_eq!(iter.next(), None);
+    }
+
+    #[test]
+    fn test_tokenize_message_type() {
+        let schema = "
+    message schema {
+      required int32 a;
+      optional binary c (UTF8);
+      required group d {
+        required int32 a;
+        optional binary c (UTF8);
+      }
+      required group e (LIST) {
+        repeated group list {
+          required int32 element;
+        }
+      }
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let mut res = Vec::new();
+        while let Some(token) = iter.next() {
+            res.push(token);
+        }
+        assert_eq!(
+            res,
+            vec![
+                "message", "schema", "{", "required", "int32", "a", ";", "optional",
+                "binary", "c", "(", "UTF8", ")", ";", "required", "group", "d", "{",
+                "required", "int32", "a", ";", "optional", "binary", "c", "(", "UTF8",
+                ")", ";", "}", "required", "group", "e", "(", "LIST", ")", "{",
+                "repeated", "group", "list", "{", "required", "int32", "element", ";",
+                "}", "}", "}"
+            ]
+        );
+    }
+
+    #[test]
+    fn test_assert_token() {
+        assert!(assert_token(Some("a"), "a").is_ok());
+        assert!(assert_token(Some("a"), "b").is_err());
+        assert!(assert_token(None, "b").is_err());
+    }
+
+    #[test]
+    fn test_parse_message_type_invalid() {
+        let mut iter = Tokenizer::from_str("test");
+        let result = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type();
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "Parquet error: Message type does not start with 'message'"
+        );
+    }
+
+    #[test]
+    fn test_parse_message_type_no_name() {
+        let mut iter = Tokenizer::from_str("message");
+        let result = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type();
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "Parquet error: Expected name, found None"
+        );
+    }
+
+    #[test]
+    fn test_parse_message_type_fixed_byte_array() {
+        let schema = "
+    message schema {
+      REQUIRED FIXED_LEN_BYTE_ARRAY col;
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let result = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type();
+        assert!(result.is_err());
+
+        let schema = "
+    message schema {
+      REQUIRED FIXED_LEN_BYTE_ARRAY(16) col;
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let result = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type();
+        assert!(result.is_ok());
+    }
+
+    #[test]
+    fn test_parse_message_type_decimal() {
+        // It is okay for decimal to omit precision and scale with right syntax.
+        // Here we test wrong syntax of decimal type
+
+        // Invalid decimal syntax
+        let schema = "
+    message root {
+      optional int32 f1 (DECIMAL();
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let result = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type();
+        assert!(result.is_err());
+
+        // Invalid decimal, need precision and scale
+        let schema = "
+    message root {
+      optional int32 f1 (DECIMAL());
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let result = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type();
+        assert!(result.is_err());
+
+        // Invalid decimal because of `,` - has precision, needs scale
+        let schema = "
+    message root {
+      optional int32 f1 (DECIMAL(8,));
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let result = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type();
+        assert!(result.is_err());
+
+        // Invalid decimal because, we always require either precision or scale to be
+        // specified as part of logical type
+        let schema = "
+    message root {
+      optional int32 f3 (DECIMAL);
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let result = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type();
+        assert!(result.is_err());
+
+        // Valid decimal (precision, scale)
+        let schema = "
+    message root {
+      optional int32 f1 (DECIMAL(8, 3));
+      optional int32 f2 (DECIMAL(8));
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let result = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type();
+        assert!(result.is_ok());
+    }
+
+    #[test]
+    fn test_parse_message_type_compare_1() {
+        let schema = "
+    message root {
+      optional fixed_len_byte_array(5) f1 (DECIMAL(9, 3));
+      optional fixed_len_byte_array (16) f2 (DECIMAL (38, 18));
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let message = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type()
+        .unwrap();
+
+        let expected = Type::group_type_builder("root")
+            .with_fields(&mut vec![
+                Rc::new(
+                    Type::primitive_type_builder(
+                        "f1",
+                        PhysicalType::FIXED_LEN_BYTE_ARRAY,
+                    )
+                    .with_logical_type(LogicalType::DECIMAL)
+                    .with_length(5)
+                    .with_precision(9)
+                    .with_scale(3)
+                    .build()
+                    .unwrap(),
+                ),
+                Rc::new(
+                    Type::primitive_type_builder(
+                        "f2",
+                        PhysicalType::FIXED_LEN_BYTE_ARRAY,
+                    )
+                    .with_logical_type(LogicalType::DECIMAL)
+                    .with_length(16)
+                    .with_precision(38)
+                    .with_scale(18)
+                    .build()
+                    .unwrap(),
+                ),
+            ])
+            .build()
+            .unwrap();
+
+        assert_eq!(message, expected);
+    }
+
+    #[test]
+    fn test_parse_message_type_compare_2() {
+        let schema = "
+    message root {
+      required group a0 {
+        optional group a1 (LIST) {
+          repeated binary a2 (UTF8);
+        }
+
+        optional group b1 (LIST) {
+          repeated group b2 {
+            optional int32 b3;
+            optional double b4;
+          }
+        }
+      }
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let message = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type()
+        .unwrap();
+
+        let expected = Type::group_type_builder("root")
+            .with_fields(&mut vec![Rc::new(
+                Type::group_type_builder("a0")
+                    .with_repetition(Repetition::REQUIRED)
+                    .with_fields(&mut vec![
+                        Rc::new(
+                            Type::group_type_builder("a1")
+                                .with_repetition(Repetition::OPTIONAL)
+                                .with_logical_type(LogicalType::LIST)
+                                .with_fields(&mut vec![Rc::new(
+                                    Type::primitive_type_builder(
+                                        "a2",
+                                        PhysicalType::BYTE_ARRAY,
+                                    )
+                                    .with_repetition(Repetition::REPEATED)
+                                    .with_logical_type(LogicalType::UTF8)
+                                    .build()
+                                    .unwrap(),
+                                )])
+                                .build()
+                                .unwrap(),
+                        ),
+                        Rc::new(
+                            Type::group_type_builder("b1")
+                                .with_repetition(Repetition::OPTIONAL)
+                                .with_logical_type(LogicalType::LIST)
+                                .with_fields(&mut vec![Rc::new(
+                                    Type::group_type_builder("b2")
+                                        .with_repetition(Repetition::REPEATED)
+                                        .with_fields(&mut vec![
+                                            Rc::new(
+                                                Type::primitive_type_builder(
+                                                    "b3",
+                                                    PhysicalType::INT32,
+                                                )
+                                                .build()
+                                                .unwrap(),
+                                            ),
+                                            Rc::new(
+                                                Type::primitive_type_builder(
+                                                    "b4",
+                                                    PhysicalType::DOUBLE,
+                                                )
+                                                .build()
+                                                .unwrap(),
+                                            ),
+                                        ])
+                                        .build()
+                                        .unwrap(),
+                                )])
+                                .build()
+                                .unwrap(),
+                        ),
+                    ])
+                    .build()
+                    .unwrap(),
+            )])
+            .build()
+            .unwrap();
+
+        assert_eq!(message, expected);
+    }
+
+    #[test]
+    fn test_parse_message_type_compare_3() {
+        let schema = "
+    message root {
+      required int32 _1 (INT_8);
+      required int32 _2 (INT_16);
+      required float _3;
+      required double _4;
+      optional int32 _5 (DATE);
+      optional binary _6 (UTF8);
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let message = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type()
+        .unwrap();
+
+        let mut fields = vec![
+            Rc::new(
+                Type::primitive_type_builder("_1", PhysicalType::INT32)
+                    .with_repetition(Repetition::REQUIRED)
+                    .with_logical_type(LogicalType::INT_8)
+                    .build()
+                    .unwrap(),
+            ),
+            Rc::new(
+                Type::primitive_type_builder("_2", PhysicalType::INT32)
+                    .with_repetition(Repetition::REQUIRED)
+                    .with_logical_type(LogicalType::INT_16)
+                    .build()
+                    .unwrap(),
+            ),
+            Rc::new(
+                Type::primitive_type_builder("_3", PhysicalType::FLOAT)
+                    .with_repetition(Repetition::REQUIRED)
+                    .build()
+                    .unwrap(),
+            ),
+            Rc::new(
+                Type::primitive_type_builder("_4", PhysicalType::DOUBLE)
+                    .with_repetition(Repetition::REQUIRED)
+                    .build()
+                    .unwrap(),
+            ),
+            Rc::new(
+                Type::primitive_type_builder("_5", PhysicalType::INT32)
+                    .with_logical_type(LogicalType::DATE)
+                    .build()
+                    .unwrap(),
+            ),
+            Rc::new(
+                Type::primitive_type_builder("_6", PhysicalType::BYTE_ARRAY)
+                    .with_logical_type(LogicalType::UTF8)
+                    .build()
+                    .unwrap(),
+            ),
+        ];
+
+        let expected = Type::group_type_builder("root")
+            .with_fields(&mut fields)
+            .build()
+            .unwrap();
+        assert_eq!(message, expected);
+    }
+}
diff --git a/rust/parquet/src/schema/printer.rs b/rust/parquet/src/schema/printer.rs
new file mode 100644
index 0000000000000..85ef1cc86d5fc
--- /dev/null
+++ b/rust/parquet/src/schema/printer.rs
@@ -0,0 +1,469 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Parquet schema printer.
+//! Provides methods to print Parquet file schema and list file metadata.
+//!
+//! # Example
+//!
+//! ```rust
+//! use parquet::{
+//!     file::reader::{FileReader, SerializedFileReader},
+//!     schema::printer::{print_file_metadata, print_parquet_metadata, print_schema},
+//! };
+//! use std::{fs::File, path::Path};
+//!
+//! // Open a file
+//! let path = Path::new("test.parquet");
+//! if let Ok(file) = File::open(&path) {
+//!     let reader = SerializedFileReader::new(file).unwrap();
+//!     let parquet_metadata = reader.metadata();
+//!
+//!     print_parquet_metadata(&mut std::io::stdout(), &parquet_metadata);
+//!     print_file_metadata(&mut std::io::stdout(), &parquet_metadata.file_metadata());
+//!
+//!     print_schema(
+//!         &mut std::io::stdout(),
+//!         &parquet_metadata.file_metadata().schema(),
+//!     );
+//! }
+//! ```
+
+use std::{fmt, io};
+
+use crate::basic::{LogicalType, Type as PhysicalType};
+use crate::file::metadata::{
+    ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData,
+};
+use crate::schema::types::Type;
+
+/// Prints Parquet metadata [`ParquetMetaData`](`::file::metadata::ParquetMetaData`)
+/// information.
+#[allow(unused_must_use)]
+pub fn print_parquet_metadata(out: &mut io::Write, metadata: &ParquetMetaData) {
+    print_file_metadata(out, &metadata.file_metadata());
+    writeln!(out, "");
+    writeln!(out, "");
+    writeln!(out, "num of row groups: {}", metadata.num_row_groups());
+    writeln!(out, "row groups:");
+    writeln!(out, "");
+    for (i, rg) in metadata.row_groups().iter().enumerate() {
+        writeln!(out, "row group {}:", i);
+        print_dashes(out, 80);
+        print_row_group_metadata(out, rg);
+    }
+}
+
+/// Prints file metadata [`FileMetaData`](`::file::metadata::FileMetaData`) information.
+#[allow(unused_must_use)]
+pub fn print_file_metadata(out: &mut io::Write, file_metadata: &FileMetaData) {
+    writeln!(out, "version: {}", file_metadata.version());
+    writeln!(out, "num of rows: {}", file_metadata.num_rows());
+    if let Some(created_by) = file_metadata.created_by().as_ref() {
+        writeln!(out, "created by: {}", created_by);
+    }
+    let schema = file_metadata.schema();
+    print_schema(out, schema);
+}
+
+/// Prints Parquet [`Type`](`::schema::types::Type`) information.
+#[allow(unused_must_use)]
+pub fn print_schema(out: &mut io::Write, tp: &Type) {
+    // TODO: better if we can pass fmt::Write to Printer.
+    // But how can we make it to accept both io::Write & fmt::Write?
+    let mut s = String::new();
+    {
+        let mut printer = Printer::new(&mut s);
+        printer.print(tp);
+    }
+    writeln!(out, "{}", s);
+}
+
+#[allow(unused_must_use)]
+fn print_row_group_metadata(out: &mut io::Write, rg_metadata: &RowGroupMetaData) {
+    writeln!(out, "total byte size: {}", rg_metadata.total_byte_size());
+    writeln!(out, "num of rows: {}", rg_metadata.num_rows());
+    writeln!(out, "");
+    writeln!(out, "num of columns: {}", rg_metadata.num_columns());
+    writeln!(out, "columns: ");
+    for (i, cc) in rg_metadata.columns().iter().enumerate() {
+        writeln!(out, "");
+        writeln!(out, "column {}:", i);
+        print_dashes(out, 80);
+        print_column_chunk_metadata(out, cc);
+    }
+}
+
+#[allow(unused_must_use)]
+fn print_column_chunk_metadata(out: &mut io::Write, cc_metadata: &ColumnChunkMetaData) {
+    writeln!(out, "column type: {}", cc_metadata.column_type());
+    writeln!(out, "column path: {}", cc_metadata.column_path());
+    let encoding_strs: Vec<_> = cc_metadata
+        .encodings()
+        .iter()
+        .map(|e| format!("{}", e))
+        .collect();
+    writeln!(out, "encodings: {}", encoding_strs.join(" "));
+    let file_path_str = match cc_metadata.file_path() {
+        None => "N/A",
+        Some(ref fp) => *fp,
+    };
+    writeln!(out, "file path: {}", file_path_str);
+    writeln!(out, "file offset: {}", cc_metadata.file_offset());
+    writeln!(out, "num of values: {}", cc_metadata.num_values());
+    writeln!(
+        out,
+        "total compressed size (in bytes): {}",
+        cc_metadata.compressed_size()
+    );
+    writeln!(
+        out,
+        "total uncompressed size (in bytes): {}",
+        cc_metadata.uncompressed_size()
+    );
+    writeln!(out, "data page offset: {}", cc_metadata.data_page_offset());
+    let index_page_offset_str = match cc_metadata.index_page_offset() {
+        None => "N/A".to_owned(),
+        Some(ipo) => ipo.to_string(),
+    };
+    writeln!(out, "index page offset: {}", index_page_offset_str);
+    let dict_page_offset_str = match cc_metadata.dictionary_page_offset() {
+        None => "N/A".to_owned(),
+        Some(dpo) => dpo.to_string(),
+    };
+    writeln!(out, "dictionary page offset: {}", dict_page_offset_str);
+    let statistics_str = match cc_metadata.statistics() {
+        None => "N/A".to_owned(),
+        Some(stats) => stats.to_string(),
+    };
+    writeln!(out, "statistics: {}", statistics_str);
+    writeln!(out, "");
+}
+
+#[allow(unused_must_use)]
+fn print_dashes(out: &mut io::Write, num: i32) {
+    for _ in 0..num {
+        write!(out, "-");
+    }
+    writeln!(out, "");
+}
+
+const INDENT_WIDTH: i32 = 2;
+
+/// Struct for printing Parquet message type.
+struct Printer<'a> {
+    output: &'a mut fmt::Write,
+    indent: i32,
+}
+
+#[allow(unused_must_use)]
+impl<'a> Printer<'a> {
+    fn new(output: &'a mut fmt::Write) -> Self {
+        Printer { output, indent: 0 }
+    }
+
+    fn print_indent(&mut self) {
+        for _ in 0..self.indent {
+            write!(self.output, " ");
+        }
+    }
+}
+
+#[allow(unused_must_use)]
+impl<'a> Printer<'a> {
+    pub fn print(&mut self, tp: &Type) {
+        self.print_indent();
+        match tp {
+            &Type::PrimitiveType {
+                ref basic_info,
+                physical_type,
+                type_length,
+                scale,
+                precision,
+            } => {
+                let phys_type_str = match physical_type {
+                    PhysicalType::FIXED_LEN_BYTE_ARRAY => {
+                        // We need to include length for fixed byte array
+                        format!("{} ({})", physical_type, type_length)
+                    }
+                    _ => format!("{}", physical_type),
+                };
+                // Also print logical type if it is available
+                let logical_type_str = match basic_info.logical_type() {
+                    LogicalType::NONE => format!(""),
+                    decimal @ LogicalType::DECIMAL => {
+                        // For decimal type we should print precision and scale if they
+                        // are > 0, e.g. DECIMAL(9, 2) -
+                        // DECIMAL(9) - DECIMAL
+                        let precision_scale = match (precision, scale) {
+                            (p, s) if p > 0 && s > 0 => format!(" ({}, {})", p, s),
+                            (p, 0) if p > 0 => format!(" ({})", p),
+                            _ => format!(""),
+                        };
+                        format!(" ({}{})", decimal, precision_scale)
+                    }
+                    other_logical_type => format!(" ({})", other_logical_type),
+                };
+                write!(
+                    self.output,
+                    "{} {} {}{};",
+                    basic_info.repetition(),
+                    phys_type_str,
+                    basic_info.name(),
+                    logical_type_str
+                );
+            }
+            &Type::GroupType {
+                ref basic_info,
+                ref fields,
+            } => {
+                if basic_info.has_repetition() {
+                    let r = basic_info.repetition();
+                    write!(self.output, "{} group {} ", r, basic_info.name());
+                    if basic_info.logical_type() != LogicalType::NONE {
+                        write!(self.output, "({}) ", basic_info.logical_type());
+                    }
+                    writeln!(self.output, "{{");
+                } else {
+                    writeln!(self.output, "message {} {{", basic_info.name());
+                }
+
+                self.indent += INDENT_WIDTH;
+                for c in fields {
+                    self.print(&c);
+                    writeln!(self.output, "");
+                }
+                self.indent -= INDENT_WIDTH;
+                self.print_indent();
+                write!(self.output, "}}");
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use std::rc::Rc;
+
+    use crate::basic::{Repetition, Type as PhysicalType};
+    use crate::schema::{parser::parse_message_type, types::Type};
+
+    fn assert_print_parse_message(message: Type) {
+        let mut s = String::new();
+        {
+            let mut p = Printer::new(&mut s);
+            p.print(&message);
+        }
+        let parsed = parse_message_type(&s).unwrap();
+        assert_eq!(message, parsed);
+    }
+
+    #[test]
+    fn test_print_primitive_type() {
+        let mut s = String::new();
+        {
+            let mut p = Printer::new(&mut s);
+            let foo = Type::primitive_type_builder("foo", PhysicalType::INT32)
+                .with_repetition(Repetition::REQUIRED)
+                .with_logical_type(LogicalType::INT_32)
+                .build()
+                .unwrap();
+            p.print(&foo);
+        }
+        assert_eq!(&mut s, "REQUIRED INT32 foo (INT_32);");
+    }
+
+    #[test]
+    fn test_print_primitive_type_without_logical() {
+        let mut s = String::new();
+        {
+            let mut p = Printer::new(&mut s);
+            let foo = Type::primitive_type_builder("foo", PhysicalType::DOUBLE)
+                .with_repetition(Repetition::REQUIRED)
+                .build()
+                .unwrap();
+            p.print(&foo);
+        }
+        assert_eq!(&mut s, "REQUIRED DOUBLE foo;");
+    }
+
+    #[test]
+    fn test_print_group_type() {
+        let mut s = String::new();
+        {
+            let mut p = Printer::new(&mut s);
+            let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
+                .with_repetition(Repetition::REQUIRED)
+                .with_logical_type(LogicalType::INT_32)
+                .with_id(0)
+                .build();
+            let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
+                .with_logical_type(LogicalType::UTF8)
+                .with_id(1)
+                .build();
+            let f3 =
+                Type::primitive_type_builder("f3", PhysicalType::FIXED_LEN_BYTE_ARRAY)
+                    .with_repetition(Repetition::REPEATED)
+                    .with_logical_type(LogicalType::INTERVAL)
+                    .with_length(12)
+                    .with_id(2)
+                    .build();
+            let mut struct_fields = Vec::new();
+            struct_fields.push(Rc::new(f1.unwrap()));
+            struct_fields.push(Rc::new(f2.unwrap()));
+            let foo = Type::group_type_builder("foo")
+                .with_repetition(Repetition::OPTIONAL)
+                .with_fields(&mut struct_fields)
+                .with_id(1)
+                .build()
+                .unwrap();
+            let mut fields = Vec::new();
+            fields.push(Rc::new(foo));
+            fields.push(Rc::new(f3.unwrap()));
+            let message = Type::group_type_builder("schema")
+                .with_fields(&mut fields)
+                .with_id(2)
+                .build()
+                .unwrap();
+            p.print(&message);
+        }
+        let expected = "message schema {
+  OPTIONAL group foo {
+    REQUIRED INT32 f1 (INT_32);
+    OPTIONAL BYTE_ARRAY f2 (UTF8);
+  }
+  REPEATED FIXED_LEN_BYTE_ARRAY (12) f3 (INTERVAL);
+}";
+        assert_eq!(&mut s, expected);
+    }
+
+    #[test]
+    fn test_print_and_parse_primitive() {
+        let a2 = Type::primitive_type_builder("a2", PhysicalType::BYTE_ARRAY)
+            .with_repetition(Repetition::REPEATED)
+            .with_logical_type(LogicalType::UTF8)
+            .build()
+            .unwrap();
+
+        let a1 = Type::group_type_builder("a1")
+            .with_repetition(Repetition::OPTIONAL)
+            .with_logical_type(LogicalType::LIST)
+            .with_fields(&mut vec![Rc::new(a2)])
+            .build()
+            .unwrap();
+
+        let b3 = Type::primitive_type_builder("b3", PhysicalType::INT32)
+            .with_repetition(Repetition::OPTIONAL)
+            .build()
+            .unwrap();
+
+        let b4 = Type::primitive_type_builder("b4", PhysicalType::DOUBLE)
+            .with_repetition(Repetition::OPTIONAL)
+            .build()
+            .unwrap();
+
+        let b2 = Type::group_type_builder("b2")
+            .with_repetition(Repetition::REPEATED)
+            .with_logical_type(LogicalType::NONE)
+            .with_fields(&mut vec![Rc::new(b3), Rc::new(b4)])
+            .build()
+            .unwrap();
+
+        let b1 = Type::group_type_builder("b1")
+            .with_repetition(Repetition::OPTIONAL)
+            .with_logical_type(LogicalType::LIST)
+            .with_fields(&mut vec![Rc::new(b2)])
+            .build()
+            .unwrap();
+
+        let a0 = Type::group_type_builder("a0")
+            .with_repetition(Repetition::REQUIRED)
+            .with_fields(&mut vec![Rc::new(a1), Rc::new(b1)])
+            .build()
+            .unwrap();
+
+        let message = Type::group_type_builder("root")
+            .with_fields(&mut vec![Rc::new(a0)])
+            .build()
+            .unwrap();
+
+        assert_print_parse_message(message);
+    }
+
+    #[test]
+    fn test_print_and_parse_nested() {
+        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::INT_32)
+            .build()
+            .unwrap();
+
+        let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
+            .with_repetition(Repetition::OPTIONAL)
+            .with_logical_type(LogicalType::UTF8)
+            .build()
+            .unwrap();
+
+        let foo = Type::group_type_builder("foo")
+            .with_repetition(Repetition::OPTIONAL)
+            .with_fields(&mut vec![Rc::new(f1), Rc::new(f2)])
+            .build()
+            .unwrap();
+
+        let f3 = Type::primitive_type_builder("f3", PhysicalType::FIXED_LEN_BYTE_ARRAY)
+            .with_repetition(Repetition::REPEATED)
+            .with_logical_type(LogicalType::INTERVAL)
+            .with_length(12)
+            .build()
+            .unwrap();
+
+        let message = Type::group_type_builder("schema")
+            .with_fields(&mut vec![Rc::new(foo), Rc::new(f3)])
+            .build()
+            .unwrap();
+
+        assert_print_parse_message(message);
+    }
+
+    #[test]
+    fn test_print_and_parse_decimal() {
+        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
+            .with_repetition(Repetition::OPTIONAL)
+            .with_logical_type(LogicalType::DECIMAL)
+            .with_precision(9)
+            .with_scale(2)
+            .build()
+            .unwrap();
+
+        let f2 = Type::primitive_type_builder("f2", PhysicalType::INT32)
+            .with_repetition(Repetition::OPTIONAL)
+            .with_logical_type(LogicalType::DECIMAL)
+            .with_precision(9)
+            .with_scale(0)
+            .build()
+            .unwrap();
+
+        let message = Type::group_type_builder("schema")
+            .with_fields(&mut vec![Rc::new(f1), Rc::new(f2)])
+            .build()
+            .unwrap();
+
+        assert_print_parse_message(message);
+    }
+}
diff --git a/rust/parquet/src/schema/types.rs b/rust/parquet/src/schema/types.rs
new file mode 100644
index 0000000000000..adf6f4e456aba
--- /dev/null
+++ b/rust/parquet/src/schema/types.rs
@@ -0,0 +1,1854 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Contains structs and methods to build Parquet schema and schema descriptors.
+
+use std::{collections::HashMap, convert::From, fmt, rc::Rc};
+
+use parquet_format::SchemaElement;
+
+use crate::basic::{LogicalType, Repetition, Type as PhysicalType};
+use crate::errors::{ParquetError, Result};
+
+// ----------------------------------------------------------------------
+// Parquet Type definitions
+
+/// Type alias for `Rc<Type>`.
+pub type TypePtr = Rc<Type>;
+/// Type alias for `Rc<SchemaDescriptor>`.
+pub type SchemaDescPtr = Rc<SchemaDescriptor>;
+/// Type alias for `Rc<ColumnDescriptor>`.
+pub type ColumnDescPtr = Rc<ColumnDescriptor>;
+
+/// Representation of a Parquet type.
+/// Used to describe primitive leaf fields and structs, including top-level schema.
+/// Note that the top-level schema type is represented using `GroupType` whose
+/// repetition is `None`.
+#[derive(Debug, PartialEq)]
+pub enum Type {
+    PrimitiveType {
+        basic_info: BasicTypeInfo,
+        physical_type: PhysicalType,
+        type_length: i32,
+        scale: i32,
+        precision: i32,
+    },
+    GroupType {
+        basic_info: BasicTypeInfo,
+        fields: Vec<TypePtr>,
+    },
+}
+
+impl Type {
+    /// Creates primitive type builder with provided field name and physical type.
+    pub fn primitive_type_builder(
+        name: &str,
+        physical_type: PhysicalType,
+    ) -> PrimitiveTypeBuilder {
+        PrimitiveTypeBuilder::new(name, physical_type)
+    }
+
+    /// Creates group type builder with provided column name.
+    pub fn group_type_builder(name: &str) -> GroupTypeBuilder {
+        GroupTypeBuilder::new(name)
+    }
+
+    /// Returns [`BasicTypeInfo`] information about the type.
+    pub fn get_basic_info(&self) -> &BasicTypeInfo {
+        match *self {
+            Type::PrimitiveType { ref basic_info, .. } => &basic_info,
+            Type::GroupType { ref basic_info, .. } => &basic_info,
+        }
+    }
+
+    /// Returns this type's field name.
+    pub fn name(&self) -> &str {
+        self.get_basic_info().name()
+    }
+
+    /// Gets the fields from this group type.
+    /// Note that this will panic if called on a non-group type.
+    // TODO: should we return `&[&Type]` here?
+    pub fn get_fields(&self) -> &[TypePtr] {
+        match *self {
+            Type::GroupType { ref fields, .. } => &fields[..],
+            _ => panic!("Cannot call get_fields() on a non-group type"),
+        }
+    }
+
+    /// Gets physical type of this primitive type.
+    /// Note that this will panic if called on a non-primitive type.
+    pub fn get_physical_type(&self) -> PhysicalType {
+        match *self {
+            Type::PrimitiveType {
+                basic_info: _,
+                physical_type,
+                ..
+            } => physical_type,
+            _ => panic!("Cannot call get_physical_type() on a non-primitive type"),
+        }
+    }
+
+    /// Checks if `sub_type` schema is part of current schema.
+    /// This method can be used to check if projected columns are part of the root schema.
+    pub fn check_contains(&self, sub_type: &Type) -> bool {
+        // Names match, and repetitions match or not set for both
+        let basic_match = self.get_basic_info().name()
+            == sub_type.get_basic_info().name()
+            && (self.is_schema() && sub_type.is_schema()
+                || !self.is_schema()
+                    && !sub_type.is_schema()
+                    && self.get_basic_info().repetition()
+                        == sub_type.get_basic_info().repetition());
+
+        match *self {
+            Type::PrimitiveType { .. } if basic_match && sub_type.is_primitive() => {
+                self.get_physical_type() == sub_type.get_physical_type()
+            }
+            Type::GroupType { .. } if basic_match && sub_type.is_group() => {
+                // build hashmap of name -> TypePtr
+                let mut field_map = HashMap::new();
+                for field in self.get_fields() {
+                    field_map.insert(field.name(), field);
+                }
+
+                for field in sub_type.get_fields() {
+                    if !field_map
+                        .get(field.name())
+                        .map(|tpe| tpe.check_contains(field))
+                        .unwrap_or(false)
+                    {
+                        return false;
+                    }
+                }
+                true
+            }
+            _ => false,
+        }
+    }
+
+    /// Returns `true` if this type is a primitive type, `false` otherwise.
+    pub fn is_primitive(&self) -> bool {
+        match *self {
+            Type::PrimitiveType { .. } => true,
+            _ => false,
+        }
+    }
+
+    /// Returns `true` if this type is a group type, `false` otherwise.
+    pub fn is_group(&self) -> bool {
+        match *self {
+            Type::GroupType { .. } => true,
+            _ => false,
+        }
+    }
+
+    /// Returns `true` if this type is the top-level schema type (message type).
+    pub fn is_schema(&self) -> bool {
+        match *self {
+            Type::GroupType { ref basic_info, .. } => !basic_info.has_repetition(),
+            _ => false,
+        }
+    }
+}
+
+/// A builder for primitive types. All attributes are optional
+/// except the name and physical type.
+/// Note that if not specified explicitly, `Repetition::OPTIONAL` is used.
+pub struct PrimitiveTypeBuilder<'a> {
+    name: &'a str,
+    repetition: Repetition,
+    physical_type: PhysicalType,
+    logical_type: LogicalType,
+    length: i32,
+    precision: i32,
+    scale: i32,
+    id: Option<i32>,
+}
+
+impl<'a> PrimitiveTypeBuilder<'a> {
+    /// Creates new primitive type builder with provided field name and physical type.
+    pub fn new(name: &'a str, physical_type: PhysicalType) -> Self {
+        Self {
+            name,
+            repetition: Repetition::OPTIONAL,
+            physical_type,
+            logical_type: LogicalType::NONE,
+            length: -1,
+            precision: -1,
+            scale: -1,
+            id: None,
+        }
+    }
+
+    /// Sets [`Repetition`](`::basic::Repetition`) for this field and returns itself.
+    pub fn with_repetition(mut self, repetition: Repetition) -> Self {
+        self.repetition = repetition;
+        self
+    }
+
+    /// Sets [`LogicalType`](`::basic::LogicalType`) for this field and returns itself.
+    pub fn with_logical_type(mut self, logical_type: LogicalType) -> Self {
+        self.logical_type = logical_type;
+        self
+    }
+
+    /// Sets type length and returns itself.
+    /// This is only applied to FIXED_LEN_BYTE_ARRAY and INT96 (INTERVAL) types, because
+    /// they maintain fixed size underlying byte array.
+    /// By default, value is `0`.
+    pub fn with_length(mut self, length: i32) -> Self {
+        self.length = length;
+        self
+    }
+
+    /// Sets precision for Parquet DECIMAL physical type and returns itself.
+    /// By default, it equals to `0` and used only for decimal context.
+    pub fn with_precision(mut self, precision: i32) -> Self {
+        self.precision = precision;
+        self
+    }
+
+    /// Sets scale for Parquet DECIMAL physical type and returns itself.
+    /// By default, it equals to `0` and used only for decimal context.
+    pub fn with_scale(mut self, scale: i32) -> Self {
+        self.scale = scale;
+        self
+    }
+
+    /// Sets optional field id and returns itself.
+    pub fn with_id(mut self, id: i32) -> Self {
+        self.id = Some(id);
+        self
+    }
+
+    /// Creates a new `PrimitiveType` instance from the collected attributes.
+    /// Returns `Err` in case of any building conditions are not met.
+    pub fn build(self) -> Result<Type> {
+        let basic_info = BasicTypeInfo {
+            name: String::from(self.name),
+            repetition: Some(self.repetition),
+            logical_type: self.logical_type,
+            id: self.id,
+        };
+
+        // Check length before logical type, since it is used for logical type validation.
+        if self.physical_type == PhysicalType::FIXED_LEN_BYTE_ARRAY && self.length < 0 {
+            return Err(general_err!(
+                "Invalid FIXED_LEN_BYTE_ARRAY length: {}",
+                self.length
+            ));
+        }
+
+        match self.logical_type {
+            LogicalType::NONE => {}
+            LogicalType::UTF8 | LogicalType::BSON | LogicalType::JSON => {
+                if self.physical_type != PhysicalType::BYTE_ARRAY {
+                    return Err(general_err!(
+                        "{} can only annotate BYTE_ARRAY fields",
+                        self.logical_type
+                    ));
+                }
+            }
+            LogicalType::DECIMAL => {
+                match self.physical_type {
+                    PhysicalType::INT32
+                    | PhysicalType::INT64
+                    | PhysicalType::BYTE_ARRAY
+                    | PhysicalType::FIXED_LEN_BYTE_ARRAY => (),
+                    _ => {
+                        return Err(general_err!(
+                            "DECIMAL can only annotate INT32, INT64, BYTE_ARRAY and FIXED"
+                        ));
+                    }
+                }
+
+                // Precision is required and must be a non-zero positive integer.
+                if self.precision < 1 {
+                    return Err(general_err!(
+                        "Invalid DECIMAL precision: {}",
+                        self.precision
+                    ));
+                }
+
+                // Scale must be zero or a positive integer less than the precision.
+                if self.scale < 0 {
+                    return Err(general_err!("Invalid DECIMAL scale: {}", self.scale));
+                }
+
+                if self.scale >= self.precision {
+                    return Err(general_err!(
+                        "Invalid DECIMAL: scale ({}) cannot be greater than or equal to precision \
+                         ({})",
+                        self.scale,
+                        self.precision
+                    ));
+                }
+
+                // Check precision and scale based on physical type limitations.
+                match self.physical_type {
+                    PhysicalType::INT32 => {
+                        if self.precision > 9 {
+                            return Err(general_err!(
+                                "Cannot represent INT32 as DECIMAL with precision {}",
+                                self.precision
+                            ));
+                        }
+                    }
+                    PhysicalType::INT64 => {
+                        if self.precision > 18 {
+                            return Err(general_err!(
+                                "Cannot represent INT64 as DECIMAL with precision {}",
+                                self.precision
+                            ));
+                        }
+                    }
+                    PhysicalType::FIXED_LEN_BYTE_ARRAY => {
+                        let max_precision = (2f64.powi(8 * self.length - 1) - 1f64)
+                            .log10()
+                            .floor() as i32;
+
+                        if self.precision > max_precision {
+                            return Err(general_err!(
+                "Cannot represent FIXED_LEN_BYTE_ARRAY as DECIMAL with length {} and \
+                 precision {}",
+                self.length,
+                self.precision
+              ));
+                        }
+                    }
+                    _ => (), // For BYTE_ARRAY precision is not limited
+                }
+            }
+            LogicalType::DATE
+            | LogicalType::TIME_MILLIS
+            | LogicalType::UINT_8
+            | LogicalType::UINT_16
+            | LogicalType::UINT_32
+            | LogicalType::INT_8
+            | LogicalType::INT_16
+            | LogicalType::INT_32 => {
+                if self.physical_type != PhysicalType::INT32 {
+                    return Err(general_err!(
+                        "{} can only annotate INT32",
+                        self.logical_type
+                    ));
+                }
+            }
+            LogicalType::TIME_MICROS
+            | LogicalType::TIMESTAMP_MILLIS
+            | LogicalType::TIMESTAMP_MICROS
+            | LogicalType::UINT_64
+            | LogicalType::INT_64 => {
+                if self.physical_type != PhysicalType::INT64 {
+                    return Err(general_err!(
+                        "{} can only annotate INT64",
+                        self.logical_type
+                    ));
+                }
+            }
+            LogicalType::INTERVAL => {
+                if self.physical_type != PhysicalType::FIXED_LEN_BYTE_ARRAY
+                    || self.length != 12
+                {
+                    return Err(general_err!(
+                        "INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)"
+                    ));
+                }
+            }
+            LogicalType::ENUM => {
+                if self.physical_type != PhysicalType::BYTE_ARRAY {
+                    return Err(general_err!("ENUM can only annotate BYTE_ARRAY fields"));
+                }
+            }
+            _ => {
+                return Err(general_err!(
+                    "{} cannot be applied to a primitive type",
+                    self.logical_type
+                ));
+            }
+        }
+
+        Ok(Type::PrimitiveType {
+            basic_info,
+            physical_type: self.physical_type,
+            type_length: self.length,
+            scale: self.scale,
+            precision: self.precision,
+        })
+    }
+}
+
+/// A builder for group types. All attributes are optional except the name.
+/// Note that if not specified explicitly, `None` is used as the repetition of the group,
+/// which means it is a root (message) type.
+pub struct GroupTypeBuilder<'a> {
+    name: &'a str,
+    repetition: Option<Repetition>,
+    logical_type: LogicalType,
+    fields: Vec<TypePtr>,
+    id: Option<i32>,
+}
+
+impl<'a> GroupTypeBuilder<'a> {
+    /// Creates new group type builder with provided field name.
+    pub fn new(name: &'a str) -> Self {
+        Self {
+            name,
+            repetition: None,
+            logical_type: LogicalType::NONE,
+            fields: Vec::new(),
+            id: None,
+        }
+    }
+
+    /// Sets [`Repetition`](`::basic::Repetition`) for this field and returns itself.
+    pub fn with_repetition(mut self, repetition: Repetition) -> Self {
+        self.repetition = Some(repetition);
+        self
+    }
+
+    /// Sets [`LogicalType`](`::basic::LogicalType`) for this field and returns itself.
+    pub fn with_logical_type(mut self, logical_type: LogicalType) -> Self {
+        self.logical_type = logical_type;
+        self
+    }
+
+    /// Sets a list of fields that should be child nodes of this field.
+    /// Returns updated self.
+    pub fn with_fields(mut self, fields: &mut Vec<TypePtr>) -> Self {
+        self.fields.append(fields);
+        self
+    }
+
+    /// Sets optional field id and returns itself.
+    pub fn with_id(mut self, id: i32) -> Self {
+        self.id = Some(id);
+        self
+    }
+
+    /// Creates a new `GroupType` instance from the gathered attributes.
+    pub fn build(self) -> Result<Type> {
+        let basic_info = BasicTypeInfo {
+            name: String::from(self.name),
+            repetition: self.repetition,
+            logical_type: self.logical_type,
+            id: self.id,
+        };
+        Ok(Type::GroupType {
+            basic_info,
+            fields: self.fields,
+        })
+    }
+}
+
+/// Basic type info. This contains information such as the name of the type,
+/// the repetition level, the logical type and the kind of the type (group, primitive).
+#[derive(Debug, PartialEq)]
+pub struct BasicTypeInfo {
+    name: String,
+    repetition: Option<Repetition>,
+    logical_type: LogicalType,
+    id: Option<i32>,
+}
+
+impl BasicTypeInfo {
+    /// Returns field name.
+    pub fn name(&self) -> &str {
+        &self.name
+    }
+
+    /// Returns `true` if type has repetition field set, `false` otherwise.
+    /// This is mostly applied to group type, because primitive type always has
+    /// repetition set.
+    pub fn has_repetition(&self) -> bool {
+        self.repetition.is_some()
+    }
+
+    /// Returns [`Repetition`](`::basic::Repetition`) value for the type.
+    pub fn repetition(&self) -> Repetition {
+        assert!(self.repetition.is_some());
+        self.repetition.unwrap()
+    }
+
+    /// Returns [`LogicalType`](`::basic::LogicalType`) value for the type.
+    pub fn logical_type(&self) -> LogicalType {
+        self.logical_type
+    }
+
+    /// Returns `true` if id is set, `false` otherwise.
+    pub fn has_id(&self) -> bool {
+        self.id.is_some()
+    }
+
+    /// Returns id value for the type.
+    pub fn id(&self) -> i32 {
+        assert!(self.id.is_some());
+        self.id.unwrap()
+    }
+}
+
+// ----------------------------------------------------------------------
+// Parquet descriptor definitions
+
+/// Represents a path in a nested schema
+#[derive(Clone, PartialEq, Debug, Eq, Hash)]
+pub struct ColumnPath {
+    parts: Vec<String>,
+}
+
+impl ColumnPath {
+    /// Creates new column path from vector of field names.
+    pub fn new(parts: Vec<String>) -> Self {
+        ColumnPath { parts }
+    }
+
+    /// Returns string representation of this column path.
+    /// ```rust
+    /// use parquet::schema::types::ColumnPath;
+    ///
+    /// let path = ColumnPath::new(vec!["a".to_string(), "b".to_string(), "c".to_string()]);
+    /// assert_eq!(&path.string(), "a.b.c");
+    /// ```
+    pub fn string(&self) -> String {
+        self.parts.join(".")
+    }
+}
+
+impl fmt::Display for ColumnPath {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{:?}", self.string())
+    }
+}
+
+impl From<Vec<String>> for ColumnPath {
+    fn from(parts: Vec<String>) -> Self {
+        ColumnPath { parts }
+    }
+}
+
+impl<'a> From<&'a str> for ColumnPath {
+    fn from(single_path: &str) -> Self {
+        let s = String::from(single_path);
+        ColumnPath::from(s)
+    }
+}
+
+impl From<String> for ColumnPath {
+    fn from(single_path: String) -> Self {
+        let mut v = vec![];
+        v.push(single_path);
+        ColumnPath { parts: v }
+    }
+}
+
+impl AsRef<[String]> for ColumnPath {
+    fn as_ref(&self) -> &[String] {
+        &self.parts
+    }
+}
+
+/// A descriptor for leaf-level primitive columns.
+/// This encapsulates information such as definition and repetition levels and is used to
+/// re-assemble nested data.
+pub struct ColumnDescriptor {
+    // The "leaf" primitive type of this column
+    primitive_type: TypePtr,
+
+    // The root type of this column. For instance, if the column is "a.b.c.d", then the
+    // primitive type is 'd' while the root_type is 'a'.
+    //
+    // NOTE: this is sometimes `None` for the convenience of testing. It should NEVER be
+    // `None` when running in production.
+    root_type: Option<TypePtr>,
+
+    // The maximum definition level for this column
+    max_def_level: i16,
+
+    // The maximum repetition level for this column
+    max_rep_level: i16,
+
+    // The path of this column. For instance, "a.b.c.d".
+    path: ColumnPath,
+}
+
+impl ColumnDescriptor {
+    /// Creates new descriptor for leaf-level column.
+    pub fn new(
+        primitive_type: TypePtr,
+        root_type: Option<TypePtr>,
+        max_def_level: i16,
+        max_rep_level: i16,
+        path: ColumnPath,
+    ) -> Self {
+        Self {
+            primitive_type,
+            root_type,
+            max_def_level,
+            max_rep_level,
+            path,
+        }
+    }
+
+    /// Returns maximum definition level for this column.
+    pub fn max_def_level(&self) -> i16 {
+        self.max_def_level
+    }
+
+    /// Returns maximum repetition level for this column.
+    pub fn max_rep_level(&self) -> i16 {
+        self.max_rep_level
+    }
+
+    /// Returns [`ColumnPath`] for this column.
+    pub fn path(&self) -> &ColumnPath {
+        &self.path
+    }
+
+    /// Returns self type [`Type`](`::schema::types::Type`) for this leaf column.
+    pub fn self_type(&self) -> &Type {
+        self.primitive_type.as_ref()
+    }
+
+    /// Returns root [`Type`](`::schema::types::Type`) (most top-level parent field) for
+    /// this leaf column.
+    pub fn root_type(&self) -> &Type {
+        assert!(self.root_type.is_some());
+        self.root_type.as_ref().unwrap()
+    }
+
+    /// Returns column name.
+    pub fn name(&self) -> &str {
+        self.primitive_type.name()
+    }
+
+    /// Returns [`LogicalType`](`::basic::LogicalType`) for this column.
+    pub fn logical_type(&self) -> LogicalType {
+        self.primitive_type.get_basic_info().logical_type()
+    }
+
+    /// Returns physical type for this column.
+    /// Note that it will panic if called on a non-primitive type.
+    pub fn physical_type(&self) -> PhysicalType {
+        match self.primitive_type.as_ref() {
+            &Type::PrimitiveType { physical_type, .. } => physical_type,
+            _ => panic!("Expected primitive type!"),
+        }
+    }
+
+    /// Returns type length for this column.
+    /// Note that it will panic if called on a non-primitive type.
+    pub fn type_length(&self) -> i32 {
+        match self.primitive_type.as_ref() {
+            &Type::PrimitiveType { type_length, .. } => type_length,
+            _ => panic!("Expected primitive type!"),
+        }
+    }
+
+    /// Returns type precision for this column.
+    /// Note that it will panic if called on a non-primitive type.
+    pub fn type_precision(&self) -> i32 {
+        match self.primitive_type.as_ref() {
+            &Type::PrimitiveType { precision, .. } => precision,
+            _ => panic!("Expected primitive type!"),
+        }
+    }
+
+    /// Returns type scale for this column.
+    /// Note that it will panic if called on a non-primitive type.
+    pub fn type_scale(&self) -> i32 {
+        match self.primitive_type.as_ref() {
+            &Type::PrimitiveType { scale, .. } => scale,
+            _ => panic!("Expected primitive type!"),
+        }
+    }
+}
+
+/// A schema descriptor. This encapsulates the top-level schemas for all the columns,
+/// as well as all descriptors for all the primitive columns.
+pub struct SchemaDescriptor {
+    // The top-level schema (the "message" type).
+    // This must be a `GroupType` where each field is a root column type in the schema.
+    schema: TypePtr,
+
+    // All the descriptors for primitive columns in this schema, constructed from
+    // `schema` in DFS order.
+    leaves: Vec<ColumnDescPtr>,
+
+    // Mapping from a leaf column's index to the root column type that it
+    // comes from. For instance: the leaf `a.b.c.d` would have a link back to `a`:
+    // -- a  <-----+
+    // -- -- b     |
+    // -- -- -- c  |
+    // -- -- -- -- d
+    leaf_to_base: HashMap<usize, TypePtr>,
+}
+
+impl SchemaDescriptor {
+    /// Creates new schema descriptor from Parquet schema.
+    pub fn new(tp: TypePtr) -> Self {
+        assert!(tp.is_group(), "SchemaDescriptor should take a GroupType");
+        let mut leaves = vec![];
+        let mut leaf_to_base = HashMap::new();
+        for f in tp.get_fields() {
+            let mut path = vec![];
+            build_tree(
+                f.clone(),
+                tp.clone(),
+                f.clone(),
+                0,
+                0,
+                &mut leaves,
+                &mut leaf_to_base,
+                &mut path,
+            );
+        }
+
+        Self {
+            schema: tp,
+            leaves,
+            leaf_to_base,
+        }
+    }
+
+    /// Returns [`ColumnDescriptor`] for a field position.
+    pub fn column(&self, i: usize) -> ColumnDescPtr {
+        assert!(
+            i < self.leaves.len(),
+            "Index out of bound: {} not in [0, {})",
+            i,
+            self.leaves.len()
+        );
+        self.leaves[i].clone()
+    }
+
+    /// Returns slice of [`ColumnDescriptor`].
+    pub fn columns(&self) -> &[ColumnDescPtr] {
+        &self.leaves
+    }
+
+    /// Returns number of leaf-level columns.
+    pub fn num_columns(&self) -> usize {
+        self.leaves.len()
+    }
+
+    /// Returns column root [`Type`](`::schema::types::Type`) for a field position.
+    pub fn get_column_root(&self, i: usize) -> &Type {
+        let result = self.column_root_of(i);
+        result.as_ref()
+    }
+
+    /// Returns column root [`Type`](`::schema::types::Type`) pointer for a field
+    /// position.
+    pub fn get_column_root_ptr(&self, i: usize) -> TypePtr {
+        let result = self.column_root_of(i);
+        result.clone()
+    }
+
+    fn column_root_of(&self, i: usize) -> &Rc<Type> {
+        assert!(
+            i < self.leaves.len(),
+            "Index out of bound: {} not in [0, {})",
+            i,
+            self.leaves.len()
+        );
+
+        let result = self.leaf_to_base.get(&i);
+        assert!(
+            result.is_some(),
+            "Expected a value for index {} but found None",
+            i
+        );
+        result.unwrap()
+    }
+
+    /// Returns schema as [`Type`](`::schema::types::Type`).
+    pub fn root_schema(&self) -> &Type {
+        self.schema.as_ref()
+    }
+
+    /// Returns schema name.
+    pub fn name(&self) -> &str {
+        self.schema.name()
+    }
+}
+
+fn build_tree(
+    tp: TypePtr,
+    root_tp: TypePtr,
+    base_tp: TypePtr,
+    mut max_rep_level: i16,
+    mut max_def_level: i16,
+    leaves: &mut Vec<ColumnDescPtr>,
+    leaf_to_base: &mut HashMap<usize, TypePtr>,
+    path_so_far: &mut Vec<String>,
+) {
+    assert!(tp.get_basic_info().has_repetition());
+
+    path_so_far.push(String::from(tp.name()));
+    match tp.get_basic_info().repetition() {
+        Repetition::OPTIONAL => {
+            max_def_level += 1;
+        }
+        Repetition::REPEATED => {
+            max_def_level += 1;
+            max_rep_level += 1;
+        }
+        _ => {}
+    }
+
+    match tp.as_ref() {
+        &Type::PrimitiveType { .. } => {
+            let mut path: Vec<String> = vec![];
+            path.extend_from_slice(&path_so_far[..]);
+            leaves.push(Rc::new(ColumnDescriptor::new(
+                tp.clone(),
+                Some(root_tp),
+                max_def_level,
+                max_rep_level,
+                ColumnPath::new(path),
+            )));
+            leaf_to_base.insert(leaves.len() - 1, base_tp);
+        }
+        &Type::GroupType { ref fields, .. } => {
+            for f in fields {
+                build_tree(
+                    f.clone(),
+                    root_tp.clone(),
+                    base_tp.clone(),
+                    max_rep_level,
+                    max_def_level,
+                    leaves,
+                    leaf_to_base,
+                    path_so_far,
+                );
+                let idx = path_so_far.len() - 1;
+                path_so_far.remove(idx);
+            }
+        }
+    }
+}
+
+/// Method to convert from Thrift.
+pub fn from_thrift(elements: &[SchemaElement]) -> Result<TypePtr> {
+    let mut index = 0;
+    let mut schema_nodes = Vec::new();
+    while index < elements.len() {
+        let t = from_thrift_helper(elements, index)?;
+        index = t.0;
+        schema_nodes.push(t.1);
+    }
+    if schema_nodes.len() != 1 {
+        return Err(general_err!(
+            "Expected exactly one root node, but found {}",
+            schema_nodes.len()
+        ));
+    }
+
+    Ok(schema_nodes.remove(0))
+}
+
+/// Constructs a new Type from the `elements`, starting at index `index`.
+/// The first result is the starting index for the next Type after this one. If it is
+/// equal to `elements.len()`, then this Type is the last one.
+/// The second result is the result Type.
+fn from_thrift_helper(
+    elements: &[SchemaElement],
+    index: usize,
+) -> Result<(usize, TypePtr)> {
+    // Whether or not the current node is root (message type).
+    // There is only one message type node in the schema tree.
+    let is_root_node = index == 0;
+
+    if index > elements.len() {
+        return Err(general_err!(
+            "Index out of bound, index = {}, len = {}",
+            index,
+            elements.len()
+        ));
+    }
+    let logical_type = LogicalType::from(elements[index].converted_type);
+    let field_id = elements[index].field_id;
+    match elements[index].num_children {
+        // From parquet-format:
+        //   The children count is used to construct the nested relationship.
+        //   This field is not set when the element is a primitive type
+        // Sometimes parquet-cpp sets num_children field to 0 for primitive types, so we
+        // have to handle this case too.
+        None | Some(0) => {
+            // primitive type
+            if elements[index].repetition_type.is_none() {
+                return Err(general_err!(
+                    "Repetition level must be defined for a primitive type"
+                ));
+            }
+            let repetition = Repetition::from(elements[index].repetition_type.unwrap());
+            let physical_type = PhysicalType::from(elements[index].type_.unwrap());
+            let length = elements[index].type_length.unwrap_or(-1);
+            let scale = elements[index].scale.unwrap_or(-1);
+            let precision = elements[index].precision.unwrap_or(-1);
+            let name = &elements[index].name;
+            let mut builder = Type::primitive_type_builder(name, physical_type)
+                .with_repetition(repetition)
+                .with_logical_type(logical_type)
+                .with_length(length)
+                .with_precision(precision)
+                .with_scale(scale);
+            if let Some(id) = field_id {
+                builder = builder.with_id(id);
+            }
+            Ok((index + 1, Rc::new(builder.build()?)))
+        }
+        Some(n) => {
+            let repetition = elements[index].repetition_type.map(|r| Repetition::from(r));
+            let mut fields = vec![];
+            let mut next_index = index + 1;
+            for _ in 0..n {
+                let child_result = from_thrift_helper(elements, next_index as usize)?;
+                next_index = child_result.0;
+                fields.push(child_result.1);
+            }
+
+            let mut builder = Type::group_type_builder(&elements[index].name)
+                .with_logical_type(logical_type)
+                .with_fields(&mut fields);
+            if let Some(rep) = repetition {
+                // Sometimes parquet-cpp and parquet-mr set repetition level REQUIRED or
+                // REPEATED for root node.
+                //
+                // We only set repetition for group types that are not top-level message
+                // type. According to parquet-format:
+                //   Root of the schema does not have a repetition_type.
+                //   All other types must have one.
+                if !is_root_node {
+                    builder = builder.with_repetition(rep);
+                }
+            }
+            if let Some(id) = field_id {
+                builder = builder.with_id(id);
+            }
+            Ok((next_index, Rc::new(builder.build().unwrap())))
+        }
+    }
+}
+
+/// Method to convert to Thrift.
+pub fn to_thrift(schema: &Type) -> Result<Vec<SchemaElement>> {
+    if !schema.is_group() {
+        return Err(general_err!("Root schema must be Group type"));
+    }
+    let mut elements: Vec<SchemaElement> = Vec::new();
+    to_thrift_helper(schema, &mut elements);
+    Ok(elements)
+}
+
+/// Constructs list of `SchemaElement` from the schema using depth-first traversal.
+/// Here we assume that schema is always valid and starts with group type.
+fn to_thrift_helper(schema: &Type, elements: &mut Vec<SchemaElement>) {
+    match *schema {
+        Type::PrimitiveType {
+            ref basic_info,
+            physical_type,
+            type_length,
+            scale,
+            precision,
+        } => {
+            let element = SchemaElement {
+                type_: Some(physical_type.into()),
+                type_length: if type_length >= 0 {
+                    Some(type_length)
+                } else {
+                    None
+                },
+                repetition_type: Some(basic_info.repetition().into()),
+                name: basic_info.name().to_owned(),
+                num_children: None,
+                converted_type: basic_info.logical_type().into(),
+                scale: if scale >= 0 { Some(scale) } else { None },
+                precision: if precision >= 0 {
+                    Some(precision)
+                } else {
+                    None
+                },
+                field_id: if basic_info.has_id() {
+                    Some(basic_info.id())
+                } else {
+                    None
+                },
+                logical_type: None,
+            };
+
+            elements.push(element);
+        }
+        Type::GroupType {
+            ref basic_info,
+            ref fields,
+        } => {
+            let repetition = if basic_info.has_repetition() {
+                Some(basic_info.repetition().into())
+            } else {
+                None
+            };
+
+            let element = SchemaElement {
+                type_: None,
+                type_length: None,
+                repetition_type: repetition,
+                name: basic_info.name().to_owned(),
+                num_children: Some(fields.len() as i32),
+                converted_type: basic_info.logical_type().into(),
+                scale: None,
+                precision: None,
+                field_id: if basic_info.has_id() {
+                    Some(basic_info.id())
+                } else {
+                    None
+                },
+                logical_type: None,
+            };
+
+            elements.push(element);
+
+            // Add child elements for a group
+            for field in fields {
+                to_thrift_helper(field, elements);
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use std::error::Error;
+
+    use crate::schema::parser::parse_message_type;
+
+    #[test]
+    fn test_primitive_type() {
+        let mut result = Type::primitive_type_builder("foo", PhysicalType::INT32)
+            .with_logical_type(LogicalType::INT_32)
+            .with_id(0)
+            .build();
+        assert!(result.is_ok());
+
+        if let Ok(tp) = result {
+            assert!(tp.is_primitive());
+            assert!(!tp.is_group());
+            let basic_info = tp.get_basic_info();
+            assert_eq!(basic_info.repetition(), Repetition::OPTIONAL);
+            assert_eq!(basic_info.logical_type(), LogicalType::INT_32);
+            assert_eq!(basic_info.id(), 0);
+            match tp {
+                Type::PrimitiveType { physical_type, .. } => {
+                    assert_eq!(physical_type, PhysicalType::INT32);
+                }
+                _ => assert!(false),
+            }
+        }
+
+        // Test illegal inputs
+        result = Type::primitive_type_builder("foo", PhysicalType::INT64)
+            .with_repetition(Repetition::REPEATED)
+            .with_logical_type(LogicalType::BSON)
+            .build();
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(e.description(), "BSON can only annotate BYTE_ARRAY fields");
+        }
+
+        result = Type::primitive_type_builder("foo", PhysicalType::INT96)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::DECIMAL)
+            .with_precision(-1)
+            .with_scale(-1)
+            .build();
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(
+                e.description(),
+                "DECIMAL can only annotate INT32, INT64, BYTE_ARRAY and FIXED"
+            );
+        }
+
+        result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::DECIMAL)
+            .with_precision(-1)
+            .with_scale(-1)
+            .build();
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(e.description(), "Invalid DECIMAL precision: -1");
+        }
+
+        result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::DECIMAL)
+            .with_precision(0)
+            .with_scale(-1)
+            .build();
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(e.description(), "Invalid DECIMAL precision: 0");
+        }
+
+        result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::DECIMAL)
+            .with_precision(1)
+            .with_scale(-1)
+            .build();
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(e.description(), "Invalid DECIMAL scale: -1");
+        }
+
+        result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::DECIMAL)
+            .with_precision(1)
+            .with_scale(2)
+            .build();
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(
+                e.description(),
+                "Invalid DECIMAL: scale (2) cannot be greater than or equal to precision (1)"
+            );
+        }
+
+        result = Type::primitive_type_builder("foo", PhysicalType::INT32)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::DECIMAL)
+            .with_precision(18)
+            .with_scale(2)
+            .build();
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(
+                e.description(),
+                "Cannot represent INT32 as DECIMAL with precision 18"
+            );
+        }
+
+        result = Type::primitive_type_builder("foo", PhysicalType::INT64)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::DECIMAL)
+            .with_precision(32)
+            .with_scale(2)
+            .build();
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(
+                e.description(),
+                "Cannot represent INT64 as DECIMAL with precision 32"
+            );
+        }
+
+        result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::DECIMAL)
+            .with_length(5)
+            .with_precision(12)
+            .with_scale(2)
+            .build();
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(
+                e.description(),
+                "Cannot represent FIXED_LEN_BYTE_ARRAY as DECIMAL with length 5 and precision 12"
+            );
+        }
+
+        result = Type::primitive_type_builder("foo", PhysicalType::INT64)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::UINT_8)
+            .build();
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(e.description(), "UINT_8 can only annotate INT32");
+        }
+
+        result = Type::primitive_type_builder("foo", PhysicalType::INT32)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::TIME_MICROS)
+            .build();
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(e.description(), "TIME_MICROS can only annotate INT64");
+        }
+
+        result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::INTERVAL)
+            .build();
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(
+                e.description(),
+                "INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)"
+            );
+        }
+
+        result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::INTERVAL)
+            .with_length(1)
+            .build();
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(
+                e.description(),
+                "INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)"
+            );
+        }
+
+        result = Type::primitive_type_builder("foo", PhysicalType::INT32)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::ENUM)
+            .build();
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(e.description(), "ENUM can only annotate BYTE_ARRAY fields");
+        }
+
+        result = Type::primitive_type_builder("foo", PhysicalType::INT32)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::MAP)
+            .build();
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(e.description(), "MAP cannot be applied to a primitive type");
+        }
+
+        result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::DECIMAL)
+            .with_length(-1)
+            .build();
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(e.description(), "Invalid FIXED_LEN_BYTE_ARRAY length: -1");
+        }
+    }
+
+    #[test]
+    fn test_group_type() {
+        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
+            .with_logical_type(LogicalType::INT_32)
+            .with_id(0)
+            .build();
+        assert!(f1.is_ok());
+        let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
+            .with_logical_type(LogicalType::UTF8)
+            .with_id(1)
+            .build();
+        assert!(f2.is_ok());
+
+        let mut fields = vec![];
+        fields.push(Rc::new(f1.unwrap()));
+        fields.push(Rc::new(f2.unwrap()));
+
+        let result = Type::group_type_builder("foo")
+            .with_repetition(Repetition::REPEATED)
+            .with_fields(&mut fields)
+            .with_id(1)
+            .build();
+        assert!(result.is_ok());
+
+        let tp = result.unwrap();
+        let basic_info = tp.get_basic_info();
+        assert!(tp.is_group());
+        assert!(!tp.is_primitive());
+        assert_eq!(basic_info.repetition(), Repetition::REPEATED);
+        assert_eq!(basic_info.logical_type(), LogicalType::NONE);
+        assert_eq!(basic_info.id(), 1);
+        assert_eq!(tp.get_fields().len(), 2);
+        assert_eq!(tp.get_fields()[0].name(), "f1");
+        assert_eq!(tp.get_fields()[1].name(), "f2");
+    }
+
+    #[test]
+    fn test_column_descriptor() {
+        let result = test_column_descriptor_helper();
+        assert!(
+            result.is_ok(),
+            "Expected result to be OK but got err:\n {}",
+            result.unwrap_err()
+        );
+    }
+
+    fn test_column_descriptor_helper() -> Result<()> {
+        let tp = Type::primitive_type_builder("name", PhysicalType::BYTE_ARRAY)
+            .with_logical_type(LogicalType::UTF8)
+            .build()?;
+
+        let root_tp = Type::group_type_builder("root")
+            .with_logical_type(LogicalType::LIST)
+            .build()
+            .unwrap();
+        let root_tp_rc = Rc::new(root_tp);
+
+        let descr = ColumnDescriptor::new(
+            Rc::new(tp),
+            Some(root_tp_rc.clone()),
+            4,
+            1,
+            ColumnPath::from("name"),
+        );
+
+        assert_eq!(descr.path(), &ColumnPath::from("name"));
+        assert_eq!(descr.logical_type(), LogicalType::UTF8);
+        assert_eq!(descr.physical_type(), PhysicalType::BYTE_ARRAY);
+        assert_eq!(descr.max_def_level(), 4);
+        assert_eq!(descr.max_rep_level(), 1);
+        assert_eq!(descr.name(), "name");
+        assert_eq!(descr.type_length(), -1);
+        assert_eq!(descr.type_precision(), -1);
+        assert_eq!(descr.type_scale(), -1);
+        assert_eq!(descr.root_type(), root_tp_rc.as_ref());
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_schema_descriptor() {
+        let result = test_schema_descriptor_helper();
+        assert!(
+            result.is_ok(),
+            "Expected result to be OK but got err:\n {}",
+            result.unwrap_err()
+        );
+    }
+
+    // A helper fn to avoid handling the results from type creation
+    fn test_schema_descriptor_helper() -> Result<()> {
+        let mut fields = vec![];
+
+        let inta = Type::primitive_type_builder("a", PhysicalType::INT32)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::INT_32)
+            .build()?;
+        fields.push(Rc::new(inta));
+        let intb = Type::primitive_type_builder("b", PhysicalType::INT64)
+            .with_logical_type(LogicalType::INT_64)
+            .build()?;
+        fields.push(Rc::new(intb));
+        let intc = Type::primitive_type_builder("c", PhysicalType::BYTE_ARRAY)
+            .with_repetition(Repetition::REPEATED)
+            .with_logical_type(LogicalType::UTF8)
+            .build()?;
+        fields.push(Rc::new(intc));
+
+        // 3-level list encoding
+        let item1 = Type::primitive_type_builder("item1", PhysicalType::INT64)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(LogicalType::INT_64)
+            .build()?;
+        let item2 =
+            Type::primitive_type_builder("item2", PhysicalType::BOOLEAN).build()?;
+        let item3 = Type::primitive_type_builder("item3", PhysicalType::INT32)
+            .with_repetition(Repetition::REPEATED)
+            .with_logical_type(LogicalType::INT_32)
+            .build()?;
+        let list = Type::group_type_builder("records")
+            .with_repetition(Repetition::REPEATED)
+            .with_logical_type(LogicalType::LIST)
+            .with_fields(&mut vec![Rc::new(item1), Rc::new(item2), Rc::new(item3)])
+            .build()?;
+        let bag = Type::group_type_builder("bag")
+            .with_repetition(Repetition::OPTIONAL)
+            .with_fields(&mut vec![Rc::new(list)])
+            .build()?;
+        fields.push(Rc::new(bag));
+
+        let schema = Type::group_type_builder("schema")
+            .with_repetition(Repetition::REPEATED)
+            .with_fields(&mut fields)
+            .build()?;
+        let descr = SchemaDescriptor::new(Rc::new(schema));
+
+        let nleaves = 6;
+        assert_eq!(descr.num_columns(), nleaves);
+
+        //                             mdef mrep
+        // required int32 a            0    0
+        // optional int64 b            1    0
+        // repeated byte_array c       1    1
+        // optional group bag          1    0
+        //   repeated group records    2    1
+        //     required int64 item1    2    1
+        //     optional boolean item2  3    1
+        //     repeated int32 item3    3    2
+        let ex_max_def_levels = vec![0, 1, 1, 2, 3, 3];
+        let ex_max_rep_levels = vec![0, 0, 1, 1, 1, 2];
+
+        for i in 0..nleaves {
+            let col = descr.column(i);
+            assert_eq!(col.max_def_level(), ex_max_def_levels[i], "{}", i);
+            assert_eq!(col.max_rep_level(), ex_max_rep_levels[i], "{}", i);
+        }
+
+        assert_eq!(descr.column(0).path().string(), "a");
+        assert_eq!(descr.column(1).path().string(), "b");
+        assert_eq!(descr.column(2).path().string(), "c");
+        assert_eq!(descr.column(3).path().string(), "bag.records.item1");
+        assert_eq!(descr.column(4).path().string(), "bag.records.item2");
+        assert_eq!(descr.column(5).path().string(), "bag.records.item3");
+
+        assert_eq!(descr.get_column_root(0).name(), "a");
+        assert_eq!(descr.get_column_root(3).name(), "bag");
+        assert_eq!(descr.get_column_root(4).name(), "bag");
+        assert_eq!(descr.get_column_root(5).name(), "bag");
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_schema_build_tree_def_rep_levels() {
+        let message_type = "
+    message spark_schema {
+      REQUIRED INT32 a;
+      OPTIONAL group b {
+        OPTIONAL INT32 _1;
+        OPTIONAL INT32 _2;
+      }
+      OPTIONAL group c (LIST) {
+        REPEATED group list {
+          OPTIONAL INT32 element;
+        }
+      }
+    }
+    ";
+        let schema = parse_message_type(message_type).expect("should parse schema");
+        let descr = SchemaDescriptor::new(Rc::new(schema));
+        // required int32 a
+        assert_eq!(descr.column(0).max_def_level(), 0);
+        assert_eq!(descr.column(0).max_rep_level(), 0);
+        // optional int32 b._1
+        assert_eq!(descr.column(1).max_def_level(), 2);
+        assert_eq!(descr.column(1).max_rep_level(), 0);
+        // optional int32 b._2
+        assert_eq!(descr.column(2).max_def_level(), 2);
+        assert_eq!(descr.column(2).max_rep_level(), 0);
+        // repeated optional int32 c.list.element
+        assert_eq!(descr.column(3).max_def_level(), 3);
+        assert_eq!(descr.column(3).max_rep_level(), 1);
+    }
+
+    #[test]
+    #[should_panic(expected = "Cannot call get_physical_type() on a non-primitive type")]
+    fn test_get_physical_type_panic() {
+        let list = Type::group_type_builder("records")
+            .with_repetition(Repetition::REPEATED)
+            .build()
+            .unwrap();
+        list.get_physical_type();
+    }
+
+    #[test]
+    fn test_get_physical_type_primitive() {
+        let f = Type::primitive_type_builder("f", PhysicalType::INT64)
+            .build()
+            .unwrap();
+        assert_eq!(f.get_physical_type(), PhysicalType::INT64);
+
+        let f = Type::primitive_type_builder("f", PhysicalType::BYTE_ARRAY)
+            .build()
+            .unwrap();
+        assert_eq!(f.get_physical_type(), PhysicalType::BYTE_ARRAY);
+    }
+
+    #[test]
+    fn test_check_contains_primitive_primitive() {
+        // OK
+        let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
+            .build()
+            .unwrap();
+        let f2 = Type::primitive_type_builder("f", PhysicalType::INT32)
+            .build()
+            .unwrap();
+        assert!(f1.check_contains(&f2));
+
+        // OK: different logical type does not affect check_contains
+        let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
+            .with_logical_type(LogicalType::UINT_8)
+            .build()
+            .unwrap();
+        let f2 = Type::primitive_type_builder("f", PhysicalType::INT32)
+            .with_logical_type(LogicalType::UINT_16)
+            .build()
+            .unwrap();
+        assert!(f1.check_contains(&f2));
+
+        // KO: different name
+        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
+            .build()
+            .unwrap();
+        let f2 = Type::primitive_type_builder("f2", PhysicalType::INT32)
+            .build()
+            .unwrap();
+        assert!(!f1.check_contains(&f2));
+
+        // KO: different type
+        let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
+            .build()
+            .unwrap();
+        let f2 = Type::primitive_type_builder("f", PhysicalType::INT64)
+            .build()
+            .unwrap();
+        assert!(!f1.check_contains(&f2));
+
+        // KO: different repetition
+        let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
+            .with_repetition(Repetition::REQUIRED)
+            .build()
+            .unwrap();
+        let f2 = Type::primitive_type_builder("f", PhysicalType::INT32)
+            .with_repetition(Repetition::OPTIONAL)
+            .build()
+            .unwrap();
+        assert!(!f1.check_contains(&f2));
+    }
+
+    // function to create a new group type for testing
+    fn test_new_group_type(name: &str, repetition: Repetition, types: Vec<Type>) -> Type {
+        let mut fields = Vec::new();
+        for tpe in types {
+            fields.push(Rc::new(tpe))
+        }
+        Type::group_type_builder(name)
+            .with_repetition(repetition)
+            .with_fields(&mut fields)
+            .build()
+            .unwrap()
+    }
+
+    #[test]
+    fn test_check_contains_group_group() {
+        // OK: should match okay with empty fields
+        let f1 = Type::group_type_builder("f").build().unwrap();
+        let f2 = Type::group_type_builder("f").build().unwrap();
+        assert!(f1.check_contains(&f2));
+
+        // OK: fields match
+        let f1 = test_new_group_type(
+            "f",
+            Repetition::REPEATED,
+            vec![
+                Type::primitive_type_builder("f1", PhysicalType::INT32)
+                    .build()
+                    .unwrap(),
+                Type::primitive_type_builder("f2", PhysicalType::INT64)
+                    .build()
+                    .unwrap(),
+            ],
+        );
+        let f2 = test_new_group_type(
+            "f",
+            Repetition::REPEATED,
+            vec![
+                Type::primitive_type_builder("f1", PhysicalType::INT32)
+                    .build()
+                    .unwrap(),
+                Type::primitive_type_builder("f2", PhysicalType::INT64)
+                    .build()
+                    .unwrap(),
+            ],
+        );
+        assert!(f1.check_contains(&f2));
+
+        // OK: subset of fields
+        let f1 = test_new_group_type(
+            "f",
+            Repetition::REPEATED,
+            vec![
+                Type::primitive_type_builder("f1", PhysicalType::INT32)
+                    .build()
+                    .unwrap(),
+                Type::primitive_type_builder("f2", PhysicalType::INT64)
+                    .build()
+                    .unwrap(),
+            ],
+        );
+        let f2 = test_new_group_type(
+            "f",
+            Repetition::REPEATED,
+            vec![Type::primitive_type_builder("f2", PhysicalType::INT64)
+                .build()
+                .unwrap()],
+        );
+        assert!(f1.check_contains(&f2));
+
+        // KO: different name
+        let f1 = Type::group_type_builder("f1").build().unwrap();
+        let f2 = Type::group_type_builder("f2").build().unwrap();
+        assert!(!f1.check_contains(&f2));
+
+        // KO: different repetition
+        let f1 = Type::group_type_builder("f")
+            .with_repetition(Repetition::OPTIONAL)
+            .build()
+            .unwrap();
+        let f2 = Type::group_type_builder("f")
+            .with_repetition(Repetition::REPEATED)
+            .build()
+            .unwrap();
+        assert!(!f1.check_contains(&f2));
+
+        // KO: different fields
+        let f1 = test_new_group_type(
+            "f",
+            Repetition::REPEATED,
+            vec![
+                Type::primitive_type_builder("f1", PhysicalType::INT32)
+                    .build()
+                    .unwrap(),
+                Type::primitive_type_builder("f2", PhysicalType::INT64)
+                    .build()
+                    .unwrap(),
+            ],
+        );
+        let f2 = test_new_group_type(
+            "f",
+            Repetition::REPEATED,
+            vec![
+                Type::primitive_type_builder("f1", PhysicalType::INT32)
+                    .build()
+                    .unwrap(),
+                Type::primitive_type_builder("f2", PhysicalType::BOOLEAN)
+                    .build()
+                    .unwrap(),
+            ],
+        );
+        assert!(!f1.check_contains(&f2));
+
+        // KO: different fields
+        let f1 = test_new_group_type(
+            "f",
+            Repetition::REPEATED,
+            vec![
+                Type::primitive_type_builder("f1", PhysicalType::INT32)
+                    .build()
+                    .unwrap(),
+                Type::primitive_type_builder("f2", PhysicalType::INT64)
+                    .build()
+                    .unwrap(),
+            ],
+        );
+        let f2 = test_new_group_type(
+            "f",
+            Repetition::REPEATED,
+            vec![Type::primitive_type_builder("f3", PhysicalType::INT32)
+                .build()
+                .unwrap()],
+        );
+        assert!(!f1.check_contains(&f2));
+    }
+
+    #[test]
+    fn test_check_contains_group_primitive() {
+        // KO: should not match
+        let f1 = Type::group_type_builder("f").build().unwrap();
+        let f2 = Type::primitive_type_builder("f", PhysicalType::INT64)
+            .build()
+            .unwrap();
+        assert!(!f1.check_contains(&f2));
+        assert!(!f2.check_contains(&f1));
+
+        // KO: should not match when primitive field is part of group type
+        let f1 = test_new_group_type(
+            "f",
+            Repetition::REPEATED,
+            vec![Type::primitive_type_builder("f1", PhysicalType::INT32)
+                .build()
+                .unwrap()],
+        );
+        let f2 = Type::primitive_type_builder("f1", PhysicalType::INT32)
+            .build()
+            .unwrap();
+        assert!(!f1.check_contains(&f2));
+        assert!(!f2.check_contains(&f1));
+
+        // OK: match nested types
+        let f1 = test_new_group_type(
+            "a",
+            Repetition::REPEATED,
+            vec![
+                test_new_group_type(
+                    "b",
+                    Repetition::REPEATED,
+                    vec![Type::primitive_type_builder("c", PhysicalType::INT32)
+                        .build()
+                        .unwrap()],
+                ),
+                Type::primitive_type_builder("d", PhysicalType::INT64)
+                    .build()
+                    .unwrap(),
+                Type::primitive_type_builder("e", PhysicalType::BOOLEAN)
+                    .build()
+                    .unwrap(),
+            ],
+        );
+        let f2 = test_new_group_type(
+            "a",
+            Repetition::REPEATED,
+            vec![test_new_group_type(
+                "b",
+                Repetition::REPEATED,
+                vec![Type::primitive_type_builder("c", PhysicalType::INT32)
+                    .build()
+                    .unwrap()],
+            )],
+        );
+        assert!(f1.check_contains(&f2)); // should match
+        assert!(!f2.check_contains(&f1)); // should fail
+    }
+
+    #[test]
+    fn test_schema_type_thrift_conversion_err() {
+        let schema = Type::primitive_type_builder("col", PhysicalType::INT32)
+            .build()
+            .unwrap();
+        let thrift_schema = to_thrift(&schema);
+        assert!(thrift_schema.is_err());
+        if let Err(e) = thrift_schema {
+            assert_eq!(e.description(), "Root schema must be Group type");
+        }
+    }
+
+    #[test]
+    fn test_schema_type_thrift_conversion() {
+        let message_type = "
+    message conversions {
+      REQUIRED INT64 id;
+      OPTIONAL group int_array_Array (LIST) {
+        REPEATED group list {
+          OPTIONAL group element (LIST) {
+            REPEATED group list {
+              OPTIONAL INT32 element;
+            }
+          }
+        }
+      }
+      OPTIONAL group int_map (MAP) {
+        REPEATED group map (MAP_KEY_VALUE) {
+          REQUIRED BYTE_ARRAY key (UTF8);
+          OPTIONAL INT32 value;
+        }
+      }
+      OPTIONAL group int_Map_Array (LIST) {
+        REPEATED group list {
+          OPTIONAL group g (MAP) {
+            REPEATED group map (MAP_KEY_VALUE) {
+              REQUIRED BYTE_ARRAY key (UTF8);
+              OPTIONAL group value {
+                OPTIONAL group H {
+                  OPTIONAL group i (LIST) {
+                    REPEATED group list {
+                      OPTIONAL DOUBLE element;
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      OPTIONAL group nested_struct {
+        OPTIONAL INT32 A;
+        OPTIONAL group b (LIST) {
+          REPEATED group list {
+            REQUIRED FIXED_LEN_BYTE_ARRAY (16) element;
+          }
+        }
+      }
+    }
+    ";
+        let expected_schema = parse_message_type(message_type).unwrap();
+        let thrift_schema = to_thrift(&expected_schema).unwrap();
+        let result_schema = from_thrift(&thrift_schema).unwrap();
+        assert_eq!(result_schema, Rc::new(expected_schema));
+    }
+
+    #[test]
+    fn test_schema_type_thrift_conversion_decimal() {
+        let message_type = "
+    message decimals {
+      OPTIONAL INT32 field0;
+      OPTIONAL INT64 field1 (DECIMAL (18, 2));
+      OPTIONAL FIXED_LEN_BYTE_ARRAY (16) field2 (DECIMAL (38, 18));
+      OPTIONAL BYTE_ARRAY field3 (DECIMAL (9));
+    }
+    ";
+        let expected_schema = parse_message_type(message_type).unwrap();
+        let thrift_schema = to_thrift(&expected_schema).unwrap();
+        let result_schema = from_thrift(&thrift_schema).unwrap();
+        assert_eq!(result_schema, Rc::new(expected_schema));
+    }
+
+    // Tests schema conversion from thrift, when num_children is set to Some(0) for a
+    // primitive type.
+    #[test]
+    fn test_schema_from_thrift_with_num_children_set() {
+        // schema definition written by parquet-cpp version 1.3.2-SNAPSHOT
+        let message_type = "
+    message schema {
+      OPTIONAL BYTE_ARRAY id (UTF8);
+      OPTIONAL BYTE_ARRAY name (UTF8);
+      OPTIONAL BYTE_ARRAY message (UTF8);
+      OPTIONAL INT32 type (UINT_8);
+      OPTIONAL INT64 author_time (TIMESTAMP_MILLIS);
+      OPTIONAL INT64 __index_level_0__;
+    }
+    ";
+
+        let expected_schema = parse_message_type(message_type).unwrap();
+        let mut thrift_schema = to_thrift(&expected_schema).unwrap();
+        // Change all of None to Some(0)
+        for mut elem in &mut thrift_schema[..] {
+            if elem.num_children == None {
+                elem.num_children = Some(0);
+            }
+        }
+
+        let result_schema = from_thrift(&thrift_schema).unwrap();
+        assert_eq!(result_schema, Rc::new(expected_schema));
+    }
+
+    // Sometimes parquet-cpp sets repetition level for the root node, which is against
+    // the format definition, but we need to handle it by setting it back to None.
+    #[test]
+    fn test_schema_from_thrift_root_has_repetition() {
+        // schema definition written by parquet-cpp version 1.3.2-SNAPSHOT
+        let message_type = "
+    message schema {
+      OPTIONAL BYTE_ARRAY a (UTF8);
+      OPTIONAL INT32 b (UINT_8);
+    }
+    ";
+
+        let expected_schema = parse_message_type(message_type).unwrap();
+        let mut thrift_schema = to_thrift(&expected_schema).unwrap();
+        thrift_schema[0].repetition_type = Some(Repetition::REQUIRED.into());
+
+        let result_schema = from_thrift(&thrift_schema).unwrap();
+        assert_eq!(result_schema, Rc::new(expected_schema));
+    }
+}
diff --git a/rust/parquet/src/util/bit_packing.rs b/rust/parquet/src/util/bit_packing.rs
new file mode 100644
index 0000000000000..99628672d549c
--- /dev/null
+++ b/rust/parquet/src/util/bit_packing.rs
@@ -0,0 +1,3662 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Unpack 32 values with bit width `num_bits` from `in_ptr`, and write to `out_ptr`.
+/// Return the `in_ptr` where the starting offset points to the first byte after all the
+/// bytes that were consumed.
+// TODO: may be better to make these more compact using if-else conditions.
+//  However, this may require const generics:
+//     https://github.com/rust-lang/rust/issues/44580
+//  to eliminate the branching cost.
+// TODO: we should use SIMD instructions to further optimize this. I have explored
+//    https://github.com/tantivy-search/bitpacking
+// but the layout it uses for SIMD is different from Parquet.
+// TODO: support packing as well, which is used for encoding.
+pub unsafe fn unpack32(
+    mut in_ptr: *const u32,
+    out_ptr: *mut u32,
+    num_bits: usize,
+) -> *const u32 {
+    in_ptr = match num_bits {
+        0 => nullunpacker32(in_ptr, out_ptr),
+        1 => unpack1_32(in_ptr, out_ptr),
+        2 => unpack2_32(in_ptr, out_ptr),
+        3 => unpack3_32(in_ptr, out_ptr),
+        4 => unpack4_32(in_ptr, out_ptr),
+        5 => unpack5_32(in_ptr, out_ptr),
+        6 => unpack6_32(in_ptr, out_ptr),
+        7 => unpack7_32(in_ptr, out_ptr),
+        8 => unpack8_32(in_ptr, out_ptr),
+        9 => unpack9_32(in_ptr, out_ptr),
+        10 => unpack10_32(in_ptr, out_ptr),
+        11 => unpack11_32(in_ptr, out_ptr),
+        12 => unpack12_32(in_ptr, out_ptr),
+        13 => unpack13_32(in_ptr, out_ptr),
+        14 => unpack14_32(in_ptr, out_ptr),
+        15 => unpack15_32(in_ptr, out_ptr),
+        16 => unpack16_32(in_ptr, out_ptr),
+        17 => unpack17_32(in_ptr, out_ptr),
+        18 => unpack18_32(in_ptr, out_ptr),
+        19 => unpack19_32(in_ptr, out_ptr),
+        20 => unpack20_32(in_ptr, out_ptr),
+        21 => unpack21_32(in_ptr, out_ptr),
+        22 => unpack22_32(in_ptr, out_ptr),
+        23 => unpack23_32(in_ptr, out_ptr),
+        24 => unpack24_32(in_ptr, out_ptr),
+        25 => unpack25_32(in_ptr, out_ptr),
+        26 => unpack26_32(in_ptr, out_ptr),
+        27 => unpack27_32(in_ptr, out_ptr),
+        28 => unpack28_32(in_ptr, out_ptr),
+        29 => unpack29_32(in_ptr, out_ptr),
+        30 => unpack30_32(in_ptr, out_ptr),
+        31 => unpack31_32(in_ptr, out_ptr),
+        32 => unpack32_32(in_ptr, out_ptr),
+        _ => unimplemented!(),
+    };
+    in_ptr
+}
+
+unsafe fn nullunpacker32(in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    for _ in 0..32 {
+        *out = 0;
+        out = out.offset(1);
+    }
+    in_buf
+}
+
+unsafe fn unpack1_32(in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 1) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 2) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 3) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 4) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 5) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 6) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 7) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 9) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 10) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 11) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 13) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 14) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 15) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 17) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 18) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 19) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 20) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 21) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 22) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 23) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 24) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 25) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 26) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 27) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 28) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 29) & 1;
+    out = out.offset(1);
+    *out = ((*in_buf) >> 30) & 1;
+    out = out.offset(1);
+    *out = (*in_buf) >> 31;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack2_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 2) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 4) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 6) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 10) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 14) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 18) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 20) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 22) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 24) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 26) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 28) % (1u32 << 2);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+    *out = ((*in_buf) >> 0) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 2) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 4) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 6) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 10) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 14) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 18) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 20) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 22) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 24) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 26) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 28) % (1u32 << 2);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack3_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 3) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 6) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 9) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 15) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 18) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 21) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 24) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 27) % (1u32 << 3);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 1)) << (3 - 1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 1) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 4) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 7) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 10) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 13) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 19) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 22) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 25) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 28) % (1u32 << 3);
+    out = out.offset(1);
+    *out = (*in_buf) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (3 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 5) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 11) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 14) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 17) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 20) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 23) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 26) % (1u32 << 3);
+    out = out.offset(1);
+    *out = (*in_buf) >> 29;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack4_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 4) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 20) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 24) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 28) % (1u32 << 4);
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 4) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 20) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 24) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 28) % (1u32 << 4);
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 4) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 20) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 24) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 28) % (1u32 << 4);
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 4) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 20) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 24) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 28) % (1u32 << 4);
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack5_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 5) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 10) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 15) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 20) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 25) % (1u32 << 5);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 3)) << (5 - 3);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 3) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 13) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 18) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 23) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 28) % (1u32 << 5);
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 1)) << (5 - 1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 1) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 6) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 11) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 21) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 26) % (1u32 << 5);
+    out = out.offset(1);
+    *out = (*in_buf) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (5 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 9) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 14) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 19) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 24) % (1u32 << 5);
+    out = out.offset(1);
+    *out = (*in_buf) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (5 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 7) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 17) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 22) % (1u32 << 5);
+    out = out.offset(1);
+    *out = (*in_buf) >> 27;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack6_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 6) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 18) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 24) % (1u32 << 6);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (6 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 10) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 22) % (1u32 << 6);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (6 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 14) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 20) % (1u32 << 6);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 6) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 18) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 24) % (1u32 << 6);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (6 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 10) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 22) % (1u32 << 6);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (6 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 14) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 20) % (1u32 << 6);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack7_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 7) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 14) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 21) % (1u32 << 7);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 3)) << (7 - 3);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 3) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 10) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 17) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 24) % (1u32 << 7);
+    out = out.offset(1);
+    *out = (*in_buf) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (7 - 6);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 6) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 13) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 20) % (1u32 << 7);
+    out = out.offset(1);
+    *out = (*in_buf) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (7 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 9) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 23) % (1u32 << 7);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 5)) << (7 - 5);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 5) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 19) % (1u32 << 7);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 1)) << (7 - 1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 1) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 15) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 22) % (1u32 << 7);
+    out = out.offset(1);
+    *out = (*in_buf) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (7 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 11) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 18) % (1u32 << 7);
+    out = out.offset(1);
+    *out = (*in_buf) >> 25;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack8_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 8);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 8);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 8);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 8);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 8);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 8);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 8);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 8) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 8);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack9_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 9) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 18) % (1u32 << 9);
+    out = out.offset(1);
+    *out = (*in_buf) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (9 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 13) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 22) % (1u32 << 9);
+    out = out.offset(1);
+    *out = (*in_buf) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (9 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 17) % (1u32 << 9);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 3)) << (9 - 3);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 3) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 21) % (1u32 << 9);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 7)) << (9 - 7);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 7) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 9);
+    out = out.offset(1);
+    *out = (*in_buf) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (9 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 11) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 20) % (1u32 << 9);
+    out = out.offset(1);
+    *out = (*in_buf) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (9 - 6);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 6) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 15) % (1u32 << 9);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 1)) << (9 - 1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 1) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 10) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 19) % (1u32 << 9);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 5)) << (9 - 5);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 5) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 14) % (1u32 << 9);
+    out = out.offset(1);
+    *out = (*in_buf) >> 23;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack10_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 10) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 20) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (10 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 18) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (10 - 6);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 6) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (10 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 14) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (10 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 10) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 20) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (10 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 18) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (10 - 6);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 6) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (10 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 14) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (10 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (*in_buf) >> 22;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack11_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 11) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 1)) << (11 - 1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 1) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (*in_buf) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (11 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 13) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 3)) << (11 - 3);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 3) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 14) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (*in_buf) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (11 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 15) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 5)) << (11 - 5);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 5) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (*in_buf) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (11 - 6);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 6) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 17) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 7)) << (11 - 7);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 7) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 18) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (*in_buf) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (11 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 19) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 9)) << (11 - 9);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 9) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 20) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (*in_buf) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (11 - 10);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 10) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (*in_buf) >> 21;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack12_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 12);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (12 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 12);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (12 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 12);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (12 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 12);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (12 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 12);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (12 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 12);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (12 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 12);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 12) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (12 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 12);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (12 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (*in_buf) >> 20;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack13_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 13);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 13) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 7)) << (13 - 7);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 7) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 1)) << (13 - 1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 1) % (1u32 << 13);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 14) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (*in_buf) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (13 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (*in_buf) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (13 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 13);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 15) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 9)) << (13 - 9);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 9) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 3)) << (13 - 3);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 3) % (1u32 << 13);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (*in_buf) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (13 - 10);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 10) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (*in_buf) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (13 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 13);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 17) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 11)) << (13 - 11);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 11) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 5)) << (13 - 5);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 5) % (1u32 << 13);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 18) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (*in_buf) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (13 - 12);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 12) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (*in_buf) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (13 - 6);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 6) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (*in_buf) >> 19;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack14_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 14);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 14) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (14 - 10);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 10) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (14 - 6);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 6) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (14 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 14);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (14 - 12);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 12) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (14 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (14 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (*in_buf) >> 18;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 14);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 14) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (14 - 10);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 10) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (14 - 6);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 6) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (14 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 14);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (14 - 12);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 12) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (14 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (14 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (*in_buf) >> 18;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack15_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 15);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 15) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 13)) << (15 - 13);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 13) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 11)) << (15 - 11);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 11) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 9)) << (15 - 9);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 9) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 7)) << (15 - 7);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 7) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 5)) << (15 - 5);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 5) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 3)) << (15 - 3);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 3) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (*in_buf) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 1)) << (15 - 1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 1) % (1u32 << 15);
+    out = out.offset(1);
+    *out = ((*in_buf) >> 16) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (*in_buf) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 14)) << (15 - 14);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 14) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (*in_buf) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (15 - 12);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 12) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (*in_buf) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (15 - 10);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 10) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (*in_buf) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (15 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (*in_buf) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (15 - 6);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 6) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (*in_buf) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (15 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (*in_buf) >> 19;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (15 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (*in_buf) >> 17;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack16_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (*in_buf) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (*in_buf) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (*in_buf) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (*in_buf) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (*in_buf) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (*in_buf) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (*in_buf) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (*in_buf) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (*in_buf) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (*in_buf) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (*in_buf) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (*in_buf) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (*in_buf) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (*in_buf) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (*in_buf) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (*in_buf) >> 16;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack17_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (*in_buf) >> 17;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (17 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (*in_buf) >> 19;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (17 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (*in_buf) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (17 - 6);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 6) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (*in_buf) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (17 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (*in_buf) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (17 - 10);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 10) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (*in_buf) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (17 - 12);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 12) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (*in_buf) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 14)) << (17 - 14);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 14) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (*in_buf) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (17 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 1)) << (17 - 1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 1) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (*in_buf) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 3)) << (17 - 3);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 3) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 5)) << (17 - 5);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 5) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 7)) << (17 - 7);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 7) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 9)) << (17 - 9);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 9) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 11)) << (17 - 11);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 11) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 13)) << (17 - 13);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 13) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 15)) << (17 - 15);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 15;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack18_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (*in_buf) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (18 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (18 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (18 - 12);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 12) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (18 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (18 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (18 - 6);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 6) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (18 - 10);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 10) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 14)) << (18 - 14);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 14;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (*in_buf) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (18 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (18 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (18 - 12);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 12) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (18 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (18 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (18 - 6);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 6) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (18 - 10);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 10) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 14)) << (18 - 14);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 14;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack19_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (*in_buf) >> 19;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (19 - 6);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 6) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (*in_buf) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (19 - 12);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 12) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (*in_buf) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 18)) << (19 - 18);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 5)) << (19 - 5);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 5) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 11)) << (19 - 11);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 11) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 17)) << (19 - 17);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 17;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (19 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (*in_buf) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (19 - 10);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 10) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (*in_buf) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (19 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 3)) << (19 - 3);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 3) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 9)) << (19 - 9);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 9) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 15)) << (19 - 15);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 15;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (19 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (*in_buf) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (19 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (*in_buf) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 14)) << (19 - 14);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 1)) << (19 - 1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 1) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 7)) << (19 - 7);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 7) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 13)) << (19 - 13);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 13;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack20_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (20 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (20 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (20 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (20 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (20 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (20 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (20 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (20 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (20 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (20 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (20 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (20 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (20 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (20 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (20 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (20 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack21_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (*in_buf) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (21 - 10);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 10) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (*in_buf) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 20)) << (21 - 20);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 9)) << (21 - 9);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 9) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 19)) << (21 - 19);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 19;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (21 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (*in_buf) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 18)) << (21 - 18);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 7)) << (21 - 7);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 7) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 17)) << (21 - 17);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 17;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (21 - 6);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 6) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (*in_buf) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (21 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 5)) << (21 - 5);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 5) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 15)) << (21 - 15);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 15;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (21 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (*in_buf) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 14)) << (21 - 14);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 3)) << (21 - 3);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 3) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 13)) << (21 - 13);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 13;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (21 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (*in_buf) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (21 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 1)) << (21 - 1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 1) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 11)) << (21 - 11);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 11;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack22_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (22 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (22 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 14)) << (22 - 14);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (22 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (22 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (22 - 6);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 6) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 18)) << (22 - 18);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (22 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 20)) << (22 - 20);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (22 - 10);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 10;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (22 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (22 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 14)) << (22 - 14);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (22 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (22 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (22 - 6);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 6) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 18)) << (22 - 18);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (22 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 20)) << (22 - 20);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (22 - 10);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 10;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack23_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 23);
+    out = out.offset(1);
+    *out = (*in_buf) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 14)) << (23 - 14);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 5)) << (23 - 5);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 5) % (1u32 << 23);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 19)) << (23 - 19);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 19;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (23 - 10);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 10;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 1)) << (23 - 1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 1) % (1u32 << 23);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 15)) << (23 - 15);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 15;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (23 - 6);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 6) % (1u32 << 23);
+    out = out.offset(1);
+    *out = (*in_buf) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 20)) << (23 - 20);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 11)) << (23 - 11);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 11;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (23 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 23);
+    out = out.offset(1);
+    *out = (*in_buf) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (23 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 7)) << (23 - 7);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 7) % (1u32 << 23);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 21)) << (23 - 21);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (23 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 3)) << (23 - 3);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 3) % (1u32 << 23);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 17)) << (23 - 17);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 17;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (23 - 8);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 8) % (1u32 << 23);
+    out = out.offset(1);
+    *out = (*in_buf) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 22)) << (23 - 22);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 13)) << (23 - 13);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 13;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (23 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 23);
+    out = out.offset(1);
+    *out = (*in_buf) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 18)) << (23 - 18);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 9)) << (23 - 9);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 9;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack24_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 24);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 24);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 24);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 24);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 24);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 24);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 24);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 24);
+    out = out.offset(1);
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack25_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 25);
+    out = out.offset(1);
+    *out = (*in_buf) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 18)) << (25 - 18);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 11)) << (25 - 11);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 11;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (25 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 25);
+    out = out.offset(1);
+    *out = (*in_buf) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 22)) << (25 - 22);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 15)) << (25 - 15);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 15;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (25 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 1)) << (25 - 1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 1) % (1u32 << 25);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 19)) << (25 - 19);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 19;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (25 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 5)) << (25 - 5);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 5) % (1u32 << 25);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 23)) << (25 - 23);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (25 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 9)) << (25 - 9);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 9;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (25 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 25);
+    out = out.offset(1);
+    *out = (*in_buf) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 20)) << (25 - 20);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 13)) << (25 - 13);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 13;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (25 - 6);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 6) % (1u32 << 25);
+    out = out.offset(1);
+    *out = (*in_buf) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 24)) << (25 - 24);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 17)) << (25 - 17);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 17;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (25 - 10);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 10;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 3)) << (25 - 3);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 3) % (1u32 << 25);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 21)) << (25 - 21);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 14)) << (25 - 14);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 7)) << (25 - 7);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 7;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack26_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 26);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 20)) << (26 - 20);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 14)) << (26 - 14);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (26 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (26 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 26);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 22)) << (26 - 22);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (26 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (26 - 10);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 10;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (26 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 26);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 24)) << (26 - 24);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 18)) << (26 - 18);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (26 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (26 - 6);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 6;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 26);
+    out = out.offset(1);
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 20)) << (26 - 20);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 14)) << (26 - 14);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (26 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (26 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 26);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 22)) << (26 - 22);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (26 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (26 - 10);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 10;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (26 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 26);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 24)) << (26 - 24);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 18)) << (26 - 18);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (26 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (26 - 6);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 6;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack27_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 27);
+    out = out.offset(1);
+    *out = (*in_buf) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 22)) << (27 - 22);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 17)) << (27 - 17);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 17;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (27 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 7)) << (27 - 7);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 7;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (27 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 27);
+    out = out.offset(1);
+    *out = (*in_buf) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 24)) << (27 - 24);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 19)) << (27 - 19);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 19;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 14)) << (27 - 14);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 9)) << (27 - 9);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 9;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (27 - 4);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 4) % (1u32 << 27);
+    out = out.offset(1);
+    *out = (*in_buf) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 26)) << (27 - 26);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 21)) << (27 - 21);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (27 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 11)) << (27 - 11);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 11;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (27 - 6);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 6;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 1)) << (27 - 1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 1) % (1u32 << 27);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 23)) << (27 - 23);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 18)) << (27 - 18);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 13)) << (27 - 13);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 13;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (27 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 3)) << (27 - 3);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 3) % (1u32 << 27);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 25)) << (27 - 25);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 20)) << (27 - 20);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 15)) << (27 - 15);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 15;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (27 - 10);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 10;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 5)) << (27 - 5);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 5;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack28_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 28);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 24)) << (28 - 24);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 20)) << (28 - 20);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (28 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (28 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (28 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (28 - 4);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 4;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 28);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 24)) << (28 - 24);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 20)) << (28 - 20);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (28 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (28 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (28 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (28 - 4);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 4;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 28);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 24)) << (28 - 24);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 20)) << (28 - 20);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (28 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (28 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (28 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (28 - 4);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 4;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 28);
+    out = out.offset(1);
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 24)) << (28 - 24);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 20)) << (28 - 20);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (28 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (28 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (28 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (28 - 4);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 4;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack29_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 29);
+    out = out.offset(1);
+    *out = (*in_buf) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 26)) << (29 - 26);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 23)) << (29 - 23);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 20)) << (29 - 20);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 17)) << (29 - 17);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 17;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 14)) << (29 - 14);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 11)) << (29 - 11);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 11;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (29 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 5)) << (29 - 5);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 5;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (29 - 2);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 2) % (1u32 << 29);
+    out = out.offset(1);
+    *out = (*in_buf) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 28)) << (29 - 28);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 25)) << (29 - 25);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 22)) << (29 - 22);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 19)) << (29 - 19);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 19;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (29 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 13)) << (29 - 13);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 13;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (29 - 10);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 10;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 7)) << (29 - 7);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 7;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (29 - 4);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 4;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 1)) << (29 - 1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 1) % (1u32 << 29);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 27)) << (29 - 27);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 24)) << (29 - 24);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 21)) << (29 - 21);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 18)) << (29 - 18);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 15)) << (29 - 15);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 15;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (29 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 9)) << (29 - 9);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 9;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (29 - 6);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 6;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 3)) << (29 - 3);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 3;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack30_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 30);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 28)) << (30 - 28);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 26)) << (30 - 26);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 24)) << (30 - 24);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 22)) << (30 - 22);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 20)) << (30 - 20);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 18)) << (30 - 18);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (30 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 14)) << (30 - 14);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (30 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (30 - 10);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 10;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (30 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (30 - 6);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 6;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (30 - 4);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 4;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (30 - 2);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 2;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = ((*in_buf) >> 0) % (1u32 << 30);
+    out = out.offset(1);
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 28)) << (30 - 28);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 26)) << (30 - 26);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 24)) << (30 - 24);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 22)) << (30 - 22);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 20)) << (30 - 20);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 18)) << (30 - 18);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (30 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 14)) << (30 - 14);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (30 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (30 - 10);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 10;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (30 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (30 - 6);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 6;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (30 - 4);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 4;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (30 - 2);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 2;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack31_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = ((*in_buf) >> 0) % (1u32 << 31);
+    out = out.offset(1);
+    *out = (*in_buf) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 30)) << (31 - 30);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 29)) << (31 - 29);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 28)) << (31 - 28);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 27)) << (31 - 27);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 26)) << (31 - 26);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 25)) << (31 - 25);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 24)) << (31 - 24);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 23)) << (31 - 23);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 22)) << (31 - 22);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 21)) << (31 - 21);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 20)) << (31 - 20);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 19)) << (31 - 19);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 19;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 18)) << (31 - 18);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 17)) << (31 - 17);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 17;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 16)) << (31 - 16);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 15)) << (31 - 15);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 15;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 14)) << (31 - 14);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 13)) << (31 - 13);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 13;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 12)) << (31 - 12);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 11)) << (31 - 11);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 11;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 10)) << (31 - 10);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 10;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 9)) << (31 - 9);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 9;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 8)) << (31 - 8);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 7)) << (31 - 7);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 7;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 6)) << (31 - 6);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 6;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 5)) << (31 - 5);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 5;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 4)) << (31 - 4);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 4;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 3)) << (31 - 3);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 3;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 2)) << (31 - 2);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 2;
+    in_buf = in_buf.offset(1);
+    *out |= ((*in_buf) % (1u32 << 1)) << (31 - 1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 1;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack32_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (*in_buf) >> 0;
+
+    in_buf.offset(1)
+}
diff --git a/rust/parquet/src/util/bit_util.rs b/rust/parquet/src/util/bit_util.rs
new file mode 100644
index 0000000000000..5e7e7bd0feb2a
--- /dev/null
+++ b/rust/parquet/src/util/bit_util.rs
@@ -0,0 +1,1074 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{
+    cmp,
+    mem::{size_of, transmute_copy},
+};
+
+use crate::errors::{ParquetError, Result};
+use crate::util::{bit_packing::unpack32, memory::ByteBufferPtr};
+
+/// Reads `$size` of bytes from `$src`, and reinterprets them as type `$ty`, in
+/// little-endian order. `$ty` must implement the `Default` trait. Otherwise this won't
+/// compile.
+/// This is copied and modified from byteorder crate.
+macro_rules! read_num_bytes {
+    ($ty:ty, $size:expr, $src:expr) => {{
+        assert!($size <= $src.len());
+        let mut data: $ty = Default::default();
+        unsafe {
+            ::std::ptr::copy_nonoverlapping(
+                $src.as_ptr(),
+                &mut data as *mut $ty as *mut u8,
+                $size,
+            );
+        }
+        data
+    }};
+}
+
+/// Converts value `val` of type `T` to a byte vector, by reading `num_bytes` from `val`.
+/// NOTE: if `val` is less than the size of `T` then it can be truncated.
+#[inline]
+pub fn convert_to_bytes<T>(val: &T, num_bytes: usize) -> Vec<u8> {
+    let mut bytes: Vec<u8> = vec![0; num_bytes];
+    memcpy_value(val, num_bytes, &mut bytes);
+    bytes
+}
+
+#[inline]
+pub fn memcpy(source: &[u8], target: &mut [u8]) {
+    assert!(target.len() >= source.len());
+    unsafe {
+        ::std::ptr::copy_nonoverlapping(
+            source.as_ptr(),
+            target.as_mut_ptr(),
+            source.len(),
+        )
+    }
+}
+
+#[inline]
+pub fn memcpy_value<T>(source: &T, num_bytes: usize, target: &mut [u8]) {
+    assert!(
+        target.len() >= num_bytes,
+        "Not enough space. Only had {} bytes but need to put {} bytes",
+        target.len(),
+        num_bytes
+    );
+    unsafe {
+        ::std::ptr::copy_nonoverlapping(
+            source as *const T as *const u8,
+            target.as_mut_ptr(),
+            num_bytes,
+        )
+    }
+}
+
+/// Returns the ceil of value/divisor
+#[inline]
+pub fn ceil(value: i64, divisor: i64) -> i64 {
+    let mut result = value / divisor;
+    if value % divisor != 0 {
+        result += 1
+    };
+    result
+}
+
+/// Returns ceil(log2(x))
+#[inline]
+pub fn log2(mut x: u64) -> i32 {
+    if x == 1 {
+        return 0;
+    }
+    x -= 1;
+    let mut result = 0;
+    while x > 0 {
+        x >>= 1;
+        result += 1;
+    }
+    result
+}
+
+/// Returns the `num_bits` least-significant bits of `v`
+#[inline]
+pub fn trailing_bits(v: u64, num_bits: usize) -> u64 {
+    if num_bits == 0 {
+        return 0;
+    }
+    if num_bits >= 64 {
+        return v;
+    }
+    let n = 64 - num_bits;
+    (v << n) >> n
+}
+
+#[inline]
+pub fn set_array_bit(bits: &mut [u8], i: usize) {
+    bits[i / 8] |= 1 << (i % 8);
+}
+
+#[inline]
+pub fn unset_array_bit(bits: &mut [u8], i: usize) {
+    bits[i / 8] &= !(1 << (i % 8));
+}
+
+/// Returns the minimum number of bits needed to represent the value 'x'
+#[inline]
+pub fn num_required_bits(x: u64) -> usize {
+    for i in (0..64).rev() {
+        if x & (1u64 << i) != 0 {
+            return i + 1;
+        }
+    }
+    0
+}
+
+/// Utility class for writing bit/byte streams. This class can write data in either
+/// bit packed or byte aligned fashion.
+pub struct BitWriter {
+    buffer: Vec<u8>,
+    max_bytes: usize,
+    buffered_values: u64,
+    byte_offset: usize,
+    bit_offset: usize,
+    start: usize,
+}
+
+impl BitWriter {
+    pub fn new(max_bytes: usize) -> Self {
+        Self {
+            buffer: vec![0; max_bytes],
+            max_bytes,
+            buffered_values: 0,
+            byte_offset: 0,
+            bit_offset: 0,
+            start: 0,
+        }
+    }
+
+    /// Initializes the writer from the existing buffer `buffer` and starting
+    /// offset `start`.
+    pub fn new_from_buf(buffer: Vec<u8>, start: usize) -> Self {
+        assert!(start < buffer.len());
+        let len = buffer.len();
+        Self {
+            buffer,
+            max_bytes: len,
+            buffered_values: 0,
+            byte_offset: start,
+            bit_offset: 0,
+            start,
+        }
+    }
+
+    /// Consumes and returns the current buffer.
+    #[inline]
+    pub fn consume(mut self) -> Vec<u8> {
+        self.flush();
+        self.buffer.truncate(self.byte_offset);
+        self.buffer
+    }
+
+    /// Flushes the internal buffered bits and returns the buffer's content.
+    /// This is a borrow equivalent of `consume` method.
+    #[inline]
+    pub fn flush_buffer(&mut self) -> &[u8] {
+        self.flush();
+        &self.buffer()[0..self.byte_offset]
+    }
+
+    /// Clears the internal state so the buffer can be reused.
+    #[inline]
+    pub fn clear(&mut self) {
+        self.buffered_values = 0;
+        self.byte_offset = self.start;
+        self.bit_offset = 0;
+    }
+
+    /// Flushes the internal buffered bits and the align the buffer to the next byte.
+    #[inline]
+    pub fn flush(&mut self) {
+        let num_bytes = ceil(self.bit_offset as i64, 8) as usize;
+        assert!(self.byte_offset + num_bytes <= self.max_bytes);
+        memcpy_value(
+            &self.buffered_values,
+            num_bytes,
+            &mut self.buffer[self.byte_offset..],
+        );
+        self.buffered_values = 0;
+        self.bit_offset = 0;
+        self.byte_offset += num_bytes;
+    }
+
+    /// Advances the current offset by skipping `num_bytes`, flushing the internal bit
+    /// buffer first.
+    /// This is useful when you want to jump over `num_bytes` bytes and come back later
+    /// to fill these bytes.
+    ///
+    /// Returns error if `num_bytes` is beyond the boundary of the internal buffer.
+    /// Otherwise, returns the old offset.
+    #[inline]
+    pub fn skip(&mut self, num_bytes: usize) -> Result<usize> {
+        self.flush();
+        assert!(self.byte_offset <= self.max_bytes);
+        if self.byte_offset + num_bytes > self.max_bytes {
+            return Err(general_err!(
+                "Not enough bytes left in BitWriter. Need {} but only have {}",
+                self.byte_offset + num_bytes,
+                self.max_bytes
+            ));
+        }
+        let result = self.byte_offset;
+        self.byte_offset += num_bytes;
+        Ok(result)
+    }
+
+    /// Returns a slice containing the next `num_bytes` bytes starting from the current
+    /// offset, and advances the underlying buffer by `num_bytes`.
+    /// This is useful when you want to jump over `num_bytes` bytes and come back later
+    /// to fill these bytes.
+    #[inline]
+    pub fn get_next_byte_ptr(&mut self, num_bytes: usize) -> Result<&mut [u8]> {
+        let offset = self.skip(num_bytes)?;
+        Ok(&mut self.buffer[offset..offset + num_bytes])
+    }
+
+    #[inline]
+    pub fn bytes_written(&self) -> usize {
+        self.byte_offset - self.start + ceil(self.bit_offset as i64, 8) as usize
+    }
+
+    #[inline]
+    pub fn buffer(&self) -> &[u8] {
+        &self.buffer[self.start..]
+    }
+
+    #[inline]
+    pub fn byte_offset(&self) -> usize {
+        self.byte_offset
+    }
+
+    /// Returns the internal buffer length. This is the maximum number of bytes that this
+    /// writer can write. User needs to call `consume` to consume the current buffer
+    /// before more data can be written.
+    #[inline]
+    pub fn buffer_len(&self) -> usize {
+        self.max_bytes
+    }
+
+    /// Writes the `num_bits` LSB of value `v` to the internal buffer of this writer.
+    /// The `num_bits` must not be greater than 64. This is bit packed.
+    ///
+    /// Returns false if there's not enough room left. True otherwise.
+    #[inline]
+    pub fn put_value(&mut self, v: u64, num_bits: usize) -> bool {
+        assert!(num_bits <= 64);
+        assert_eq!(v.checked_shr(num_bits as u32).unwrap_or(0), 0); // covers case v >> 64
+
+        if self.byte_offset * 8 + self.bit_offset + num_bits > self.max_bytes as usize * 8
+        {
+            return false;
+        }
+
+        self.buffered_values |= v << self.bit_offset;
+        self.bit_offset += num_bits;
+        if self.bit_offset >= 64 {
+            memcpy_value(
+                &self.buffered_values,
+                8,
+                &mut self.buffer[self.byte_offset..],
+            );
+            self.byte_offset += 8;
+            self.bit_offset -= 64;
+            self.buffered_values = 0;
+            // Perform checked right shift: v >> offset, where offset < 64, otherwise we
+            // shift all bits
+            self.buffered_values = v
+                .checked_shr((num_bits - self.bit_offset) as u32)
+                .unwrap_or(0);
+        }
+        assert!(self.bit_offset < 64);
+        true
+    }
+
+    /// Writes `val` of `num_bytes` bytes to the next aligned byte. If size of `T` is
+    /// larger than `num_bytes`, extra higher ordered bytes will be ignored.
+    ///
+    /// Returns false if there's not enough room left. True otherwise.
+    #[inline]
+    pub fn put_aligned<T: Copy>(&mut self, val: T, num_bytes: usize) -> bool {
+        let result = self.get_next_byte_ptr(num_bytes);
+        if result.is_err() {
+            // TODO: should we return `Result` for this func?
+            return false;
+        }
+        let mut ptr = result.unwrap();
+        memcpy_value(&val, num_bytes, &mut ptr);
+        true
+    }
+
+    /// Writes `val` of `num_bytes` bytes at the designated `offset`. The `offset` is the
+    /// offset starting from the beginning of the internal buffer that this writer
+    /// maintains. Note that this will overwrite any existing data between `offset` and
+    /// `offset + num_bytes`. Also that if size of `T` is larger than `num_bytes`, extra
+    /// higher ordered bytes will be ignored.
+    ///
+    /// Returns false if there's not enough room left, or the `pos` is not valid.
+    /// True otherwise.
+    #[inline]
+    pub fn put_aligned_offset<T: Copy>(
+        &mut self,
+        val: T,
+        num_bytes: usize,
+        offset: usize,
+    ) -> bool {
+        if num_bytes + offset > self.max_bytes {
+            return false;
+        }
+        memcpy_value(
+            &val,
+            num_bytes,
+            &mut self.buffer[offset..offset + num_bytes],
+        );
+        true
+    }
+
+    /// Writes a VLQ encoded integer `v` to this buffer. The value is byte aligned.
+    ///
+    /// Returns false if there's not enough room left. True otherwise.
+    #[inline]
+    pub fn put_vlq_int(&mut self, mut v: u64) -> bool {
+        let mut result = true;
+        while v & 0xFFFFFFFFFFFFFF80 != 0 {
+            result &= self.put_aligned::<u8>(((v & 0x7F) | 0x80) as u8, 1);
+            v >>= 7;
+        }
+        result &= self.put_aligned::<u8>((v & 0x7F) as u8, 1);
+        result
+    }
+
+    /// Writes a zigzag-VLQ encoded (in little endian order) int `v` to this buffer.
+    /// Zigzag-VLQ is a variant of VLQ encoding where negative and positive
+    /// numbers are encoded in a zigzag fashion.
+    /// See: https://developers.google.com/protocol-buffers/docs/encoding
+    ///
+    /// Returns false if there's not enough room left. True otherwise.
+    #[inline]
+    pub fn put_zigzag_vlq_int(&mut self, v: i64) -> bool {
+        let u: u64 = ((v << 1) ^ (v >> 63)) as u64;
+        self.put_vlq_int(u)
+    }
+}
+
+/// Maximum byte length for a VLQ encoded integer
+/// MAX_VLQ_BYTE_LEN = 5 for i32, and MAX_VLQ_BYTE_LEN = 10 for i64
+pub const MAX_VLQ_BYTE_LEN: usize = 10;
+
+pub struct BitReader {
+    // The byte buffer to read from, passed in by client
+    buffer: ByteBufferPtr,
+
+    // Bytes are memcpy'd from `buffer` and values are read from this variable.
+    // This is faster than reading values byte by byte directly from `buffer`
+    buffered_values: u64,
+
+    //
+    // End                                         Start
+    // |............|B|B|B|B|B|B|B|B|..............|
+    //                   ^          ^
+    //                 bit_offset   byte_offset
+    //
+    // Current byte offset in `buffer`
+    byte_offset: usize,
+
+    // Current bit offset in `buffered_values`
+    bit_offset: usize,
+
+    // Total number of bytes in `buffer`
+    total_bytes: usize,
+}
+
+/// Utility class to read bit/byte stream. This class can read bits or bytes that are
+/// either byte aligned or not.
+impl BitReader {
+    pub fn new(buffer: ByteBufferPtr) -> Self {
+        let total_bytes = buffer.len();
+        let num_bytes = cmp::min(8, total_bytes);
+        let buffered_values = read_num_bytes!(u64, num_bytes, buffer.as_ref());
+        BitReader {
+            buffer,
+            buffered_values,
+            byte_offset: 0,
+            bit_offset: 0,
+            total_bytes,
+        }
+    }
+
+    #[inline]
+    pub fn reset(&mut self, buffer: ByteBufferPtr) {
+        self.buffer = buffer;
+        self.total_bytes = self.buffer.len();
+        let num_bytes = cmp::min(8, self.total_bytes);
+        self.buffered_values = read_num_bytes!(u64, num_bytes, self.buffer.as_ref());
+        self.byte_offset = 0;
+        self.bit_offset = 0;
+    }
+
+    /// Gets the current byte offset
+    #[inline]
+    pub fn get_byte_offset(&self) -> usize {
+        self.byte_offset + ceil(self.bit_offset as i64, 8) as usize
+    }
+
+    /// Reads a value of type `T` and of size `num_bits`.
+    ///
+    /// Returns `None` if there's not enough data available. `Some` otherwise.
+    #[inline]
+    pub fn get_value<T: Default>(&mut self, num_bits: usize) -> Option<T> {
+        assert!(num_bits <= 64);
+        assert!(num_bits <= size_of::<T>() * 8);
+
+        if self.byte_offset * 8 + self.bit_offset + num_bits > self.total_bytes * 8 {
+            return None;
+        }
+
+        let mut v = trailing_bits(self.buffered_values, self.bit_offset + num_bits)
+            >> self.bit_offset;
+        self.bit_offset += num_bits;
+
+        if self.bit_offset >= 64 {
+            self.byte_offset += 8;
+            self.bit_offset -= 64;
+
+            self.reload_buffer_values();
+            v |= trailing_bits(self.buffered_values, self.bit_offset)
+                .wrapping_shl((num_bits - self.bit_offset) as u32);
+        }
+
+        // TODO: better to avoid copying here
+        let result: T = unsafe { transmute_copy::<u64, T>(&v) };
+        Some(result)
+    }
+
+    #[inline]
+    pub fn get_batch<T: Default>(&mut self, batch: &mut [T], num_bits: usize) -> usize {
+        assert!(num_bits <= 32);
+        assert!(num_bits <= size_of::<T>() * 8);
+
+        let mut values_to_read = batch.len();
+        let needed_bits = num_bits * values_to_read;
+        let remaining_bits = (self.total_bytes - self.byte_offset) * 8 - self.bit_offset;
+        if remaining_bits < needed_bits {
+            values_to_read = remaining_bits / num_bits;
+        }
+
+        let mut i = 0;
+
+        // First align bit offset to byte offset
+        if self.bit_offset != 0 {
+            while i < values_to_read && self.bit_offset != 0 {
+                batch[i] = self
+                    .get_value(num_bits)
+                    .expect("expected to have more data");
+                i += 1;
+            }
+        }
+
+        unsafe {
+            let in_buf = &self.buffer.data()[self.byte_offset..];
+            let mut in_ptr = in_buf as *const [u8] as *const u8 as *const u32;
+            if size_of::<T>() == 4 {
+                while values_to_read - i >= 32 {
+                    let out_ptr = &mut batch[i..] as *mut [T] as *mut T as *mut u32;
+                    in_ptr = unpack32(in_ptr, out_ptr, num_bits);
+                    self.byte_offset += 4 * num_bits;
+                    i += 32;
+                }
+            } else {
+                let mut out_buf = [0u32; 32];
+                let out_ptr = &mut out_buf as &mut [u32] as *mut [u32] as *mut u32;
+                while values_to_read - i >= 32 {
+                    in_ptr = unpack32(in_ptr, out_ptr, num_bits);
+                    self.byte_offset += 4 * num_bits;
+                    for n in 0..32 {
+                        // We need to copy from smaller size to bigger size to avoid
+                        // overwritting other memory regions.
+                        if size_of::<T>() > size_of::<u32>() {
+                            ::std::ptr::copy_nonoverlapping(
+                                out_buf[n..].as_ptr() as *const u32,
+                                &mut batch[i] as *mut T as *mut u32,
+                                1,
+                            );
+                        } else {
+                            ::std::ptr::copy_nonoverlapping(
+                                out_buf[n..].as_ptr() as *const T,
+                                &mut batch[i] as *mut T,
+                                1,
+                            );
+                        }
+                        i += 1;
+                    }
+                }
+            }
+        }
+
+        assert!(values_to_read - i < 32);
+
+        self.reload_buffer_values();
+        while i < values_to_read {
+            batch[i] = self
+                .get_value(num_bits)
+                .expect("expected to have more data");
+            i += 1;
+        }
+
+        values_to_read
+    }
+
+    /// Reads a `num_bytes`-sized value from this buffer and return it.
+    /// `T` needs to be a little-endian native type. The value is assumed to be byte
+    /// aligned so the bit reader will be advanced to the start of the next byte before
+    /// reading the value.
+
+    /// Returns `Some` if there's enough bytes left to form a value of `T`.
+    /// Otherwise `None`.
+    #[inline]
+    pub fn get_aligned<T: Default>(&mut self, num_bytes: usize) -> Option<T> {
+        let bytes_read = ceil(self.bit_offset as i64, 8) as usize;
+        if self.byte_offset + bytes_read + num_bytes > self.total_bytes {
+            return None;
+        }
+
+        // Advance byte_offset to next unread byte and read num_bytes
+        self.byte_offset += bytes_read;
+        let v = read_num_bytes!(
+            T,
+            num_bytes,
+            self.buffer.start_from(self.byte_offset).as_ref()
+        );
+        self.byte_offset += num_bytes;
+
+        // Reset buffered_values
+        self.bit_offset = 0;
+        self.reload_buffer_values();
+        Some(v)
+    }
+
+    /// Reads a VLQ encoded (in little endian order) int from the stream.
+    /// The encoded int must start at the beginning of a byte.
+    ///
+    /// Returns `None` if there's not enough bytes in the stream. `Some` otherwise.
+    #[inline]
+    pub fn get_vlq_int(&mut self) -> Option<i64> {
+        let mut shift = 0;
+        let mut v: i64 = 0;
+        while let Some(byte) = self.get_aligned::<u8>(1) {
+            v |= ((byte & 0x7F) as i64) << shift;
+            shift += 7;
+            assert!(
+                shift <= MAX_VLQ_BYTE_LEN * 7,
+                "Num of bytes exceed MAX_VLQ_BYTE_LEN ({})",
+                MAX_VLQ_BYTE_LEN
+            );
+            if byte & 0x80 == 0 {
+                return Some(v);
+            }
+        }
+        None
+    }
+
+    /// Reads a zigzag-VLQ encoded (in little endian order) int from the stream
+    /// Zigzag-VLQ is a variant of VLQ encoding where negative and positive numbers are
+    /// encoded in a zigzag fashion.
+    /// See: https://developers.google.com/protocol-buffers/docs/encoding
+    ///
+    /// Note: the encoded int must start at the beginning of a byte.
+    ///
+    /// Returns `None` if the number of bytes there's not enough bytes in the stream.
+    /// `Some` otherwise.
+    #[inline]
+    pub fn get_zigzag_vlq_int(&mut self) -> Option<i64> {
+        self.get_vlq_int().map(|v| {
+            let u = v as u64;
+            ((u >> 1) as i64 ^ -((u & 1) as i64))
+        })
+    }
+
+    #[inline]
+    fn reload_buffer_values(&mut self) {
+        let bytes_to_read = cmp::min(self.total_bytes - self.byte_offset, 8);
+        self.buffered_values = read_num_bytes!(
+            u64,
+            bytes_to_read,
+            self.buffer.start_from(self.byte_offset).as_ref()
+        );
+    }
+}
+
+impl From<Vec<u8>> for BitReader {
+    #[inline]
+    fn from(buffer: Vec<u8>) -> Self {
+        BitReader::new(ByteBufferPtr::new(buffer))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::super::test_common::*;
+    use super::*;
+
+    use rand::distributions::{Distribution, Standard};
+    use std::fmt::Debug;
+
+    #[test]
+    fn test_ceil() {
+        assert_eq!(ceil(0, 1), 0);
+        assert_eq!(ceil(1, 1), 1);
+        assert_eq!(ceil(1, 2), 1);
+        assert_eq!(ceil(1, 8), 1);
+        assert_eq!(ceil(7, 8), 1);
+        assert_eq!(ceil(8, 8), 1);
+        assert_eq!(ceil(9, 8), 2);
+        assert_eq!(ceil(9, 9), 1);
+        assert_eq!(ceil(10000000000, 10), 1000000000);
+        assert_eq!(ceil(10, 10000000000), 1);
+        assert_eq!(ceil(10000000000, 1000000000), 10);
+    }
+
+    #[test]
+    fn test_bit_reader_get_byte_offset() {
+        let buffer = vec![255; 10];
+        let mut bit_reader = BitReader::from(buffer);
+        assert_eq!(bit_reader.get_byte_offset(), 0); // offset (0 bytes, 0 bits)
+        bit_reader.get_value::<i32>(6);
+        assert_eq!(bit_reader.get_byte_offset(), 1); // offset (0 bytes, 6 bits)
+        bit_reader.get_value::<i32>(10);
+        assert_eq!(bit_reader.get_byte_offset(), 2); // offset (0 bytes, 16 bits)
+        bit_reader.get_value::<i32>(20);
+        assert_eq!(bit_reader.get_byte_offset(), 5); // offset (0 bytes, 36 bits)
+        bit_reader.get_value::<i32>(30);
+        assert_eq!(bit_reader.get_byte_offset(), 9); // offset (8 bytes, 2 bits)
+    }
+
+    #[test]
+    fn test_bit_reader_get_value() {
+        let buffer = vec![255, 0];
+        let mut bit_reader = BitReader::from(buffer);
+        assert_eq!(bit_reader.get_value::<i32>(1), Some(1));
+        assert_eq!(bit_reader.get_value::<i32>(2), Some(3));
+        assert_eq!(bit_reader.get_value::<i32>(3), Some(7));
+        assert_eq!(bit_reader.get_value::<i32>(4), Some(3));
+    }
+
+    #[test]
+    fn test_bit_reader_get_value_boundary() {
+        let buffer = vec![10, 0, 0, 0, 20, 0, 30, 0, 0, 0, 40, 0];
+        let mut bit_reader = BitReader::from(buffer);
+        assert_eq!(bit_reader.get_value::<i64>(32), Some(10));
+        assert_eq!(bit_reader.get_value::<i64>(16), Some(20));
+        assert_eq!(bit_reader.get_value::<i64>(32), Some(30));
+        assert_eq!(bit_reader.get_value::<i64>(16), Some(40));
+    }
+
+    #[test]
+    fn test_bit_reader_get_aligned() {
+        // 01110101 11001011
+        let buffer = ByteBufferPtr::new(vec![0x75, 0xCB]);
+        let mut bit_reader = BitReader::new(buffer.all());
+        assert_eq!(bit_reader.get_value::<i32>(3), Some(5));
+        assert_eq!(bit_reader.get_aligned::<i32>(1), Some(203));
+        assert_eq!(bit_reader.get_value::<i32>(1), None);
+        bit_reader.reset(buffer.all());
+        assert_eq!(bit_reader.get_aligned::<i32>(3), None);
+    }
+
+    #[test]
+    fn test_bit_reader_get_vlq_int() {
+        // 10001001 00000001 11110010 10110101 00000110
+        let buffer: Vec<u8> = vec![0x89, 0x01, 0xF2, 0xB5, 0x06];
+        let mut bit_reader = BitReader::from(buffer);
+        assert_eq!(bit_reader.get_vlq_int(), Some(137));
+        assert_eq!(bit_reader.get_vlq_int(), Some(105202));
+    }
+
+    #[test]
+    fn test_bit_reader_get_zigzag_vlq_int() {
+        let buffer: Vec<u8> = vec![0, 1, 2, 3];
+        let mut bit_reader = BitReader::from(buffer);
+        assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(0));
+        assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(-1));
+        assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(1));
+        assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(-2));
+    }
+
+    #[test]
+    fn test_set_array_bit() {
+        let mut buffer = vec![0, 0, 0];
+        set_array_bit(&mut buffer[..], 1);
+        assert_eq!(buffer, vec![2, 0, 0]);
+        set_array_bit(&mut buffer[..], 4);
+        assert_eq!(buffer, vec![18, 0, 0]);
+        unset_array_bit(&mut buffer[..], 1);
+        assert_eq!(buffer, vec![16, 0, 0]);
+        set_array_bit(&mut buffer[..], 10);
+        assert_eq!(buffer, vec![16, 4, 0]);
+        set_array_bit(&mut buffer[..], 10);
+        assert_eq!(buffer, vec![16, 4, 0]);
+        set_array_bit(&mut buffer[..], 11);
+        assert_eq!(buffer, vec![16, 12, 0]);
+        unset_array_bit(&mut buffer[..], 10);
+        assert_eq!(buffer, vec![16, 8, 0]);
+    }
+
+    #[test]
+    fn test_num_required_bits() {
+        assert_eq!(num_required_bits(0), 0);
+        assert_eq!(num_required_bits(1), 1);
+        assert_eq!(num_required_bits(2), 2);
+        assert_eq!(num_required_bits(4), 3);
+        assert_eq!(num_required_bits(8), 4);
+        assert_eq!(num_required_bits(10), 4);
+        assert_eq!(num_required_bits(12), 4);
+        assert_eq!(num_required_bits(16), 5);
+    }
+
+    #[test]
+    fn test_log2() {
+        assert_eq!(log2(1), 0);
+        assert_eq!(log2(2), 1);
+        assert_eq!(log2(3), 2);
+        assert_eq!(log2(4), 2);
+        assert_eq!(log2(5), 3);
+        assert_eq!(log2(5), 3);
+        assert_eq!(log2(6), 3);
+        assert_eq!(log2(7), 3);
+        assert_eq!(log2(8), 3);
+        assert_eq!(log2(9), 4);
+    }
+
+    #[test]
+    fn test_skip() {
+        let mut writer = BitWriter::new(5);
+        let old_offset = writer.skip(1).expect("skip() should return OK");
+        writer.put_aligned(42, 4);
+        writer.put_aligned_offset(0x10, 1, old_offset);
+        let result = writer.consume();
+        assert_eq!(result.as_ref(), [0x10, 42, 0, 0, 0]);
+
+        writer = BitWriter::new(4);
+        let result = writer.skip(5);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_get_next_byte_ptr() {
+        let mut writer = BitWriter::new(5);
+        {
+            let first_byte = writer
+                .get_next_byte_ptr(1)
+                .expect("get_next_byte_ptr() should return OK");
+            first_byte[0] = 0x10;
+        }
+        writer.put_aligned(42, 4);
+        let result = writer.consume();
+        assert_eq!(result.as_ref(), [0x10, 42, 0, 0, 0]);
+    }
+
+    #[test]
+    fn test_consume_flush_buffer() {
+        let mut writer1 = BitWriter::new(3);
+        let mut writer2 = BitWriter::new(3);
+        for i in 1..10 {
+            writer1.put_value(i, 4);
+            writer2.put_value(i, 4);
+        }
+        let res1 = writer1.flush_buffer();
+        let res2 = writer2.consume();
+        assert_eq!(res1, &res2[..]);
+    }
+
+    #[test]
+    fn test_put_get_bool() {
+        let len = 8;
+        let mut writer = BitWriter::new(len);
+
+        for i in 0..8 {
+            let result = writer.put_value(i % 2, 1);
+            assert!(result);
+        }
+
+        writer.flush();
+        {
+            let buffer = writer.buffer();
+            assert_eq!(buffer[0], 0b10101010);
+        }
+
+        // Write 00110011
+        for i in 0..8 {
+            let result = match i {
+                0 | 1 | 4 | 5 => writer.put_value(false as u64, 1),
+                _ => writer.put_value(true as u64, 1),
+            };
+            assert!(result);
+        }
+        writer.flush();
+        {
+            let buffer = writer.buffer();
+            assert_eq!(buffer[0], 0b10101010);
+            assert_eq!(buffer[1], 0b11001100);
+        }
+
+        let mut reader = BitReader::from(writer.consume());
+
+        for i in 0..8 {
+            let val = reader
+                .get_value::<u8>(1)
+                .expect("get_value() should return OK");
+            assert_eq!(val, i % 2);
+        }
+
+        for i in 0..8 {
+            let val = reader
+                .get_value::<bool>(1)
+                .expect("get_value() should return OK");
+            match i {
+                0 | 1 | 4 | 5 => assert_eq!(val, false),
+                _ => assert_eq!(val, true),
+            }
+        }
+    }
+
+    #[test]
+    fn test_put_value_roundtrip() {
+        test_put_value_rand_numbers(32, 2);
+        test_put_value_rand_numbers(32, 3);
+        test_put_value_rand_numbers(32, 4);
+        test_put_value_rand_numbers(32, 5);
+        test_put_value_rand_numbers(32, 6);
+        test_put_value_rand_numbers(32, 7);
+        test_put_value_rand_numbers(32, 8);
+        test_put_value_rand_numbers(64, 16);
+        test_put_value_rand_numbers(64, 24);
+        test_put_value_rand_numbers(64, 32);
+    }
+
+    fn test_put_value_rand_numbers(total: usize, num_bits: usize) {
+        assert!(num_bits < 64);
+        let num_bytes = ceil(num_bits as i64, 8);
+        let mut writer = BitWriter::new(num_bytes as usize * total);
+        let values: Vec<u64> = random_numbers::<u64>(total)
+            .iter()
+            .map(|v| v & ((1 << num_bits) - 1))
+            .collect();
+        for i in 0..total {
+            assert!(
+                writer.put_value(values[i] as u64, num_bits),
+                "[{}]: put_value() failed",
+                i
+            );
+        }
+
+        let mut reader = BitReader::from(writer.consume());
+        for i in 0..total {
+            let v = reader
+                .get_value::<u64>(num_bits)
+                .expect("get_value() should return OK");
+            assert_eq!(
+                v, values[i],
+                "[{}]: expected {} but got {}",
+                i, values[i], v
+            );
+        }
+    }
+
+    #[test]
+    fn test_get_batch() {
+        const SIZE: &[usize] = &[1, 31, 32, 33, 128, 129];
+        for s in SIZE {
+            for i in 0..33 {
+                match i {
+                    0...8 => test_get_batch_helper::<u8>(*s, i),
+                    9...16 => test_get_batch_helper::<u16>(*s, i),
+                    _ => test_get_batch_helper::<u32>(*s, i),
+                }
+            }
+        }
+    }
+
+    fn test_get_batch_helper<T>(total: usize, num_bits: usize)
+    where
+        T: Default + Clone + Debug + Eq,
+    {
+        assert!(num_bits <= 32);
+        let num_bytes = ceil(num_bits as i64, 8);
+        let mut writer = BitWriter::new(num_bytes as usize * total);
+
+        let values: Vec<u32> = random_numbers::<u32>(total)
+            .iter()
+            .map(|v| v & ((1u64 << num_bits) - 1) as u32)
+            .collect();
+
+        // Generic values used to check against actual values read from `get_batch`.
+        let expected_values: Vec<T> = values
+            .iter()
+            .map(|v| unsafe { transmute_copy::<u32, T>(&v) })
+            .collect();
+
+        for i in 0..total {
+            assert!(writer.put_value(values[i] as u64, num_bits));
+        }
+
+        let buf = writer.consume();
+        let mut reader = BitReader::from(buf);
+        let mut batch = vec![T::default(); values.len()];
+        let values_read = reader.get_batch::<T>(&mut batch, num_bits);
+        assert_eq!(values_read, values.len());
+        for i in 0..batch.len() {
+            assert_eq!(
+                batch[i], expected_values[i],
+                "num_bits = {}, index = {}",
+                num_bits, i
+            );
+        }
+    }
+
+    #[test]
+    fn test_put_aligned_roundtrip() {
+        test_put_aligned_rand_numbers::<u8>(4, 3);
+        test_put_aligned_rand_numbers::<u8>(16, 5);
+        test_put_aligned_rand_numbers::<i16>(32, 7);
+        test_put_aligned_rand_numbers::<i16>(32, 9);
+        test_put_aligned_rand_numbers::<i32>(32, 11);
+        test_put_aligned_rand_numbers::<i32>(32, 13);
+        test_put_aligned_rand_numbers::<i64>(32, 17);
+        test_put_aligned_rand_numbers::<i64>(32, 23);
+    }
+
+    fn test_put_aligned_rand_numbers<T>(total: usize, num_bits: usize)
+    where
+        T: Copy + Default + Debug + PartialEq,
+        Standard: Distribution<T>,
+    {
+        assert!(num_bits <= 32);
+        assert!(total % 2 == 0);
+
+        let aligned_value_byte_width = ::std::mem::size_of::<T>();
+        let value_byte_width = ceil(num_bits as i64, 8) as usize;
+        let mut writer =
+            BitWriter::new((total / 2) * (aligned_value_byte_width + value_byte_width));
+        let values: Vec<u32> = random_numbers::<u32>(total / 2)
+            .iter()
+            .map(|v| v & ((1 << num_bits) - 1))
+            .collect();
+        let aligned_values = random_numbers::<T>(total / 2);
+
+        for i in 0..total {
+            let j = i / 2;
+            if i % 2 == 0 {
+                assert!(
+                    writer.put_value(values[j] as u64, num_bits),
+                    "[{}]: put_value() failed",
+                    i
+                );
+            } else {
+                assert!(
+                    writer.put_aligned::<T>(aligned_values[j], aligned_value_byte_width),
+                    "[{}]: put_aligned() failed",
+                    i
+                );
+            }
+        }
+
+        let mut reader = BitReader::from(writer.consume());
+        for i in 0..total {
+            let j = i / 2;
+            if i % 2 == 0 {
+                let v = reader
+                    .get_value::<u64>(num_bits)
+                    .expect("get_value() should return OK");
+                assert_eq!(
+                    v, values[j] as u64,
+                    "[{}]: expected {} but got {}",
+                    i, values[j], v
+                );
+            } else {
+                let v = reader
+                    .get_aligned::<T>(aligned_value_byte_width)
+                    .expect("get_aligned() should return OK");
+                assert_eq!(
+                    v, aligned_values[j],
+                    "[{}]: expected {:?} but got {:?}",
+                    i, aligned_values[j], v
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_put_vlq_int() {
+        let total = 64;
+        let mut writer = BitWriter::new(total * 32);
+        let values = random_numbers::<u32>(total);
+        for i in 0..total {
+            assert!(
+                writer.put_vlq_int(values[i] as u64),
+                "[{}]; put_vlq_int() failed",
+                i
+            );
+        }
+
+        let mut reader = BitReader::from(writer.consume());
+        for i in 0..total {
+            let v = reader
+                .get_vlq_int()
+                .expect("get_vlq_int() should return OK");
+            assert_eq!(
+                v as u32, values[i],
+                "[{}]: expected {} but got {}",
+                i, values[i], v
+            );
+        }
+    }
+
+    #[test]
+    fn test_put_zigzag_vlq_int() {
+        let total = 64;
+        let mut writer = BitWriter::new(total * 32);
+        let values = random_numbers::<i32>(total);
+        for i in 0..total {
+            assert!(
+                writer.put_zigzag_vlq_int(values[i] as i64),
+                "[{}]; put_zigzag_vlq_int() failed",
+                i
+            );
+        }
+
+        let mut reader = BitReader::from(writer.consume());
+        for i in 0..total {
+            let v = reader
+                .get_zigzag_vlq_int()
+                .expect("get_zigzag_vlq_int() should return OK");
+            assert_eq!(
+                v as i32, values[i],
+                "[{}]: expected {} but got {}",
+                i, values[i], v
+            );
+        }
+    }
+}
diff --git a/rust/parquet/src/util/hash_util.rs b/rust/parquet/src/util/hash_util.rs
new file mode 100644
index 0000000000000..b9441f819a46d
--- /dev/null
+++ b/rust/parquet/src/util/hash_util.rs
@@ -0,0 +1,163 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::data_type::AsBytes;
+
+/// Computes hash value for `data`, with a seed value `seed`.
+/// The data type `T` must implement the `AsBytes` trait.
+pub fn hash<T: AsBytes>(data: &T, seed: u32) -> u32 {
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    {
+        if is_x86_feature_detected!("sse4.2") {
+            unsafe { crc32_hash(data, seed) }
+        } else {
+            murmur_hash2_64a(data, seed as u64) as u32
+        }
+    }
+}
+
+const MURMUR_PRIME: u64 = 0xc6a4a7935bd1e995;
+const MURMUR_R: i32 = 47;
+
+/// Rust implementation of MurmurHash2, 64-bit version for 64-bit platforms
+fn murmur_hash2_64a<T: AsBytes>(data: &T, seed: u64) -> u64 {
+    let data_bytes = data.as_bytes();
+    let len = data_bytes.len();
+    let len_64 = (len / 8) * 8;
+    let data_bytes_64 = unsafe {
+        ::std::slice::from_raw_parts(
+            &data_bytes[0..len_64] as *const [u8] as *const u64,
+            len / 8,
+        )
+    };
+
+    let mut h = seed ^ (MURMUR_PRIME.wrapping_mul(data_bytes.len() as u64));
+    for v in data_bytes_64 {
+        let mut k = *v;
+        k = k.wrapping_mul(MURMUR_PRIME);
+        k ^= k >> MURMUR_R;
+        k = k.wrapping_mul(MURMUR_PRIME);
+        h ^= k;
+        h = h.wrapping_mul(MURMUR_PRIME);
+    }
+
+    let data2 = &data_bytes[len_64..];
+
+    let v = len & 7;
+    if v == 7 {
+        h ^= (data2[6] as u64) << 48;
+    }
+    if v >= 6 {
+        h ^= (data2[5] as u64) << 40;
+    }
+    if v >= 5 {
+        h ^= (data2[4] as u64) << 32;
+    }
+    if v >= 4 {
+        h ^= (data2[3] as u64) << 24;
+    }
+    if v >= 3 {
+        h ^= (data2[2] as u64) << 16;
+    }
+    if v >= 2 {
+        h ^= (data2[1] as u64) << 8;
+    }
+    if v >= 1 {
+        h ^= data2[0] as u64;
+    }
+    if v > 0 {
+        h = h.wrapping_mul(MURMUR_PRIME);
+    }
+
+    h ^= h >> MURMUR_R;
+    h = h.wrapping_mul(MURMUR_PRIME);
+    h ^= h >> MURMUR_R;
+    h
+}
+
+/// CRC32 hash implementation using SSE4 instructions. Borrowed from Impala.
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+#[target_feature(enable = "sse4.2")]
+unsafe fn crc32_hash<T: AsBytes>(data: &T, seed: u32) -> u32 {
+    #[cfg(target_arch = "x86")]
+    use std::arch::x86::*;
+    #[cfg(target_arch = "x86_64")]
+    use std::arch::x86_64::*;
+
+    let bytes: &[u8] = data.as_bytes();
+    let u32_num_bytes = ::std::mem::size_of::<u32>();
+    let mut num_bytes = bytes.len();
+    let num_words = num_bytes / u32_num_bytes;
+    num_bytes %= u32_num_bytes;
+
+    let bytes_u32: &[u32] = ::std::slice::from_raw_parts(
+        &bytes[0..num_words * u32_num_bytes] as *const [u8] as *const u32,
+        num_words,
+    );
+
+    let mut offset = 0;
+    let mut hash = seed;
+    while offset < num_words {
+        hash = _mm_crc32_u32(hash, bytes_u32[offset]);
+        offset += 1;
+    }
+
+    offset = num_words * u32_num_bytes;
+    while offset < num_bytes {
+        hash = _mm_crc32_u8(hash, bytes[offset]);
+        offset += 1;
+    }
+
+    // The lower half of the CRC hash has poor uniformity, so swap the halves
+    // for anyone who only uses the first several bits of the hash.
+    hash = (hash << 16) | (hash >> 16);
+    hash
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_murmur2_64a() {
+        let result = murmur_hash2_64a(&"hello", 123);
+        assert_eq!(result, 2597646618390559622);
+
+        let result = murmur_hash2_64a(&"helloworld", 123);
+        assert_eq!(result, 4934371746140206573);
+
+        let result = murmur_hash2_64a(&"helloworldparquet", 123);
+        assert_eq!(result, 2392198230801491746);
+    }
+
+    #[test]
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    fn test_crc32() {
+        if is_x86_feature_detected!("sse4.2") {
+            unsafe {
+                let result = crc32_hash(&"hello", 123);
+                assert_eq!(result, 2927487359);
+
+                let result = crc32_hash(&"helloworld", 123);
+                assert_eq!(result, 314229527);
+
+                let result = crc32_hash(&"helloworldparquet", 123);
+                assert_eq!(result, 667078870);
+            }
+        }
+    }
+}
diff --git a/rust/parquet/src/util/io.rs b/rust/parquet/src/util/io.rs
new file mode 100644
index 0000000000000..177cfb9724671
--- /dev/null
+++ b/rust/parquet/src/util/io.rs
@@ -0,0 +1,221 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{cmp, fs::File, io::*, sync::Mutex};
+
+use crate::file::reader::ParquetReader;
+
+// ----------------------------------------------------------------------
+// Read/Write wrappers for `File`.
+
+/// Position trait returns the current position in the stream.
+/// Should be viewed as a lighter version of `Seek` that does not allow seek operations,
+/// and does not require mutable reference for the current position.
+pub trait Position {
+    /// Returns position in the stream.
+    fn pos(&self) -> u64;
+}
+
+/// Struct that represents a slice of a file data with independent start position and
+/// length. Internally clones provided file handle, wraps with BufReader and resets
+/// position before any read.
+///
+/// This is workaround and alternative for `file.try_clone()` method. It clones `File`
+/// while preserving independent position, which is not available with `try_clone()`.
+///
+/// Designed after `arrow::io::RandomAccessFile`.
+pub struct FileSource<R: ParquetReader> {
+    reader: Mutex<BufReader<R>>,
+    start: u64, // start position in a file
+    end: u64,   // end position in a file
+}
+
+impl<R: ParquetReader> FileSource<R> {
+    /// Creates new file reader with start and length from a file handle
+    pub fn new(fd: &R, start: u64, length: usize) -> Self {
+        Self {
+            reader: Mutex::new(BufReader::new(fd.try_clone().unwrap())),
+            start,
+            end: start + length as u64,
+        }
+    }
+}
+
+impl<R: ParquetReader> Read for FileSource<R> {
+    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
+        let mut reader = self
+            .reader
+            .lock()
+            .map_err(|err| Error::new(ErrorKind::Other, err.to_string()))?;
+
+        let bytes_to_read = cmp::min(buf.len(), (self.end - self.start) as usize);
+        let buf = &mut buf[0..bytes_to_read];
+
+        reader.seek(SeekFrom::Start(self.start as u64))?;
+        let res = reader.read(buf);
+        if let Ok(bytes_read) = res {
+            self.start += bytes_read as u64;
+        }
+
+        res
+    }
+}
+
+impl<R: ParquetReader> Position for FileSource<R> {
+    fn pos(&self) -> u64 {
+        self.start
+    }
+}
+
+/// Struct that represents `File` output stream with position tracking.
+/// Used as a sink in file writer.
+pub struct FileSink {
+    buf: BufWriter<File>,
+    // This is not necessarily position in the underlying file,
+    // but rather current position in the sink.
+    pos: u64,
+}
+
+impl FileSink {
+    /// Creates new file sink.
+    /// Position is set to whatever position file has.
+    pub fn new(file: &File) -> Self {
+        let mut owned_file = file.try_clone().unwrap();
+        let pos = owned_file.seek(SeekFrom::Current(0)).unwrap();
+        Self {
+            buf: BufWriter::new(owned_file),
+            pos,
+        }
+    }
+}
+
+impl Write for FileSink {
+    fn write(&mut self, buf: &[u8]) -> Result<usize> {
+        let num_bytes = self.buf.write(buf)?;
+        self.pos += num_bytes as u64;
+        Ok(num_bytes)
+    }
+
+    fn flush(&mut self) -> Result<()> {
+        self.buf.flush()
+    }
+}
+
+impl Position for FileSink {
+    fn pos(&self) -> u64 {
+        self.pos
+    }
+}
+
+// Position implementation for Cursor to use in various tests.
+impl<'a> Position for Cursor<&'a mut Vec<u8>> {
+    fn pos(&self) -> u64 {
+        self.position()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::util::test_common::{get_temp_file, get_test_file};
+
+    #[test]
+    fn test_io_read_fully() {
+        let mut buf = vec![0; 8];
+        let mut src = FileSource::new(&get_test_file("alltypes_plain.parquet"), 0, 4);
+
+        let bytes_read = src.read(&mut buf[..]).unwrap();
+        assert_eq!(bytes_read, 4);
+        assert_eq!(buf, vec![b'P', b'A', b'R', b'1', 0, 0, 0, 0]);
+    }
+
+    #[test]
+    fn test_io_read_in_chunks() {
+        let mut buf = vec![0; 4];
+        let mut src = FileSource::new(&get_test_file("alltypes_plain.parquet"), 0, 4);
+
+        let bytes_read = src.read(&mut buf[0..2]).unwrap();
+        assert_eq!(bytes_read, 2);
+        let bytes_read = src.read(&mut buf[2..]).unwrap();
+        assert_eq!(bytes_read, 2);
+        assert_eq!(buf, vec![b'P', b'A', b'R', b'1']);
+    }
+
+    #[test]
+    fn test_io_read_pos() {
+        let mut src = FileSource::new(&get_test_file("alltypes_plain.parquet"), 0, 4);
+
+        src.read(&mut vec![0; 1]).unwrap();
+        assert_eq!(src.pos(), 1);
+
+        src.read(&mut vec![0; 4]).unwrap();
+        assert_eq!(src.pos(), 4);
+    }
+
+    #[test]
+    fn test_io_read_over_limit() {
+        let mut src = FileSource::new(&get_test_file("alltypes_plain.parquet"), 0, 4);
+
+        // Read all bytes from source
+        src.read(&mut vec![0; 128]).unwrap();
+        assert_eq!(src.pos(), 4);
+
+        // Try reading again, should return 0 bytes.
+        let bytes_read = src.read(&mut vec![0; 128]).unwrap();
+        assert_eq!(bytes_read, 0);
+        assert_eq!(src.pos(), 4);
+    }
+
+    #[test]
+    fn test_io_seek_switch() {
+        let mut buf = vec![0; 4];
+        let mut file = get_test_file("alltypes_plain.parquet");
+        let mut src = FileSource::new(&file, 0, 4);
+
+        file.seek(SeekFrom::Start(5 as u64))
+            .expect("File seek to a position");
+
+        let bytes_read = src.read(&mut buf[..]).unwrap();
+        assert_eq!(bytes_read, 4);
+        assert_eq!(buf, vec![b'P', b'A', b'R', b'1']);
+    }
+
+    #[test]
+    fn test_io_write_with_pos() {
+        let mut file = get_temp_file("file_sink_test", &[b'a', b'b', b'c']);
+        file.seek(SeekFrom::Current(3)).unwrap();
+
+        // Write into sink
+        let mut sink = FileSink::new(&file);
+        assert_eq!(sink.pos(), 3);
+
+        sink.write(&[b'd', b'e', b'f', b'g']).unwrap();
+        assert_eq!(sink.pos(), 7);
+
+        sink.flush().unwrap();
+        assert_eq!(sink.pos(), file.seek(SeekFrom::Current(0)).unwrap());
+
+        // Read data using file chunk
+        let mut res = vec![0u8; 7];
+        let mut chunk =
+            FileSource::new(&file, 0, file.metadata().unwrap().len() as usize);
+        chunk.read(&mut res[..]).unwrap();
+
+        assert_eq!(res, vec![b'a', b'b', b'c', b'd', b'e', b'f', b'g']);
+    }
+}
diff --git a/rust/parquet/src/util/memory.rs b/rust/parquet/src/util/memory.rs
new file mode 100644
index 0000000000000..69a389e50fe92
--- /dev/null
+++ b/rust/parquet/src/util/memory.rs
@@ -0,0 +1,524 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Utility methods and structs for working with memory.
+
+use std::{
+    cell::Cell,
+    fmt::{Debug, Display, Formatter, Result as FmtResult},
+    io::{Result as IoResult, Write},
+    mem,
+    ops::{Index, IndexMut},
+    rc::{Rc, Weak},
+};
+
+// ----------------------------------------------------------------------
+// Memory Tracker classes
+
+/// Reference counted pointer for [`MemTracker`].
+pub type MemTrackerPtr = Rc<MemTracker>;
+/// Non-owning reference for [`MemTracker`].
+pub type WeakMemTrackerPtr = Weak<MemTracker>;
+
+/// Struct to track memory usage information.
+#[derive(Debug)]
+pub struct MemTracker {
+    // In the tuple, the first element is the current memory allocated (in bytes),
+    // and the second element is the maximum memory allocated so far (in bytes).
+    memory_usage: Cell<(i64, i64)>,
+}
+
+impl MemTracker {
+    /// Creates new memory tracker.
+    #[inline]
+    pub fn new() -> MemTracker {
+        MemTracker {
+            memory_usage: Cell::new((0, 0)),
+        }
+    }
+
+    /// Returns the current memory consumption, in bytes.
+    pub fn memory_usage(&self) -> i64 {
+        self.memory_usage.get().0
+    }
+
+    /// Returns the maximum memory consumption so far, in bytes.
+    pub fn max_memory_usage(&self) -> i64 {
+        self.memory_usage.get().1
+    }
+
+    /// Adds `num_bytes` to the memory consumption tracked by this memory tracker.
+    #[inline]
+    pub fn alloc(&self, num_bytes: i64) {
+        let (current, mut maximum) = self.memory_usage.get();
+        let new_current = current + num_bytes;
+        if new_current > maximum {
+            maximum = new_current
+        }
+        self.memory_usage.set((new_current, maximum));
+    }
+}
+
+// ----------------------------------------------------------------------
+// Buffer classes
+
+/// Type alias for [`Buffer`].
+pub type ByteBuffer = Buffer<u8>;
+/// Type alias for [`BufferPtr`].
+pub type ByteBufferPtr = BufferPtr<u8>;
+
+/// A resize-able buffer class with generic member, with optional memory tracker.
+///
+/// Note that a buffer has two attributes:
+/// `capacity` and `size`: the former is the total number of space reserved for
+/// the buffer, while the latter is the actual number of elements.
+/// Invariant: `capacity` >= `size`.
+/// The total allocated bytes for a buffer equals to `capacity * sizeof<T>()`.
+pub struct Buffer<T: Clone> {
+    data: Vec<T>,
+    mem_tracker: Option<MemTrackerPtr>,
+    type_length: usize,
+}
+
+impl<T: Clone> Buffer<T> {
+    /// Creates new empty buffer.
+    pub fn new() -> Self {
+        Buffer {
+            data: vec![],
+            mem_tracker: None,
+            type_length: ::std::mem::size_of::<T>(),
+        }
+    }
+
+    /// Adds [`MemTracker`] for this buffer.
+    #[inline]
+    pub fn with_mem_tracker(mut self, mc: MemTrackerPtr) -> Self {
+        mc.alloc((self.data.capacity() * self.type_length) as i64);
+        self.mem_tracker = Some(mc);
+        self
+    }
+
+    /// Returns slice of data in this buffer.
+    #[inline]
+    pub fn data(&self) -> &[T] {
+        self.data.as_slice()
+    }
+
+    /// Sets data for this buffer.
+    #[inline]
+    pub fn set_data(&mut self, new_data: Vec<T>) {
+        if let Some(ref mc) = self.mem_tracker {
+            let capacity_diff = new_data.capacity() as i64 - self.data.capacity() as i64;
+            mc.alloc(capacity_diff * self.type_length as i64);
+        }
+        self.data = new_data;
+    }
+
+    /// Resizes underlying data in place to a new length `new_size`.
+    ///
+    /// If `new_size` is less than current length, data is truncated, otherwise, it is
+    /// extended to `new_size` with provided default value `init_value`.
+    ///
+    /// Memory tracker is also updated, if available.
+    #[inline]
+    pub fn resize(&mut self, new_size: usize, init_value: T) {
+        let old_capacity = self.data.capacity();
+        self.data.resize(new_size, init_value);
+        if let Some(ref mc) = self.mem_tracker {
+            let capacity_diff = self.data.capacity() as i64 - old_capacity as i64;
+            mc.alloc(capacity_diff * self.type_length as i64);
+        }
+    }
+
+    /// Clears underlying data.
+    #[inline]
+    pub fn clear(&mut self) {
+        self.data.clear()
+    }
+
+    /// Reserves capacity `additional_capacity` for underlying data vector.
+    ///
+    /// Memory tracker is also updated, if available.
+    #[inline]
+    pub fn reserve(&mut self, additional_capacity: usize) {
+        let old_capacity = self.data.capacity();
+        self.data.reserve(additional_capacity);
+        if self.data.capacity() > old_capacity {
+            if let Some(ref mc) = self.mem_tracker {
+                let capacity_diff = self.data.capacity() as i64 - old_capacity as i64;
+                mc.alloc(capacity_diff * self.type_length as i64);
+            }
+        }
+    }
+
+    /// Returns [`BufferPtr`] with buffer data.
+    /// Buffer data is reset.
+    #[inline]
+    pub fn consume(&mut self) -> BufferPtr<T> {
+        let old_data = mem::replace(&mut self.data, vec![]);
+        let mut result = BufferPtr::new(old_data);
+        if let Some(ref mc) = self.mem_tracker {
+            result = result.with_mem_tracker(mc.clone());
+        }
+        result
+    }
+
+    /// Adds `value` to the buffer.
+    #[inline]
+    pub fn push(&mut self, value: T) {
+        self.data.push(value)
+    }
+
+    /// Returns current capacity for the buffer.
+    #[inline]
+    pub fn capacity(&self) -> usize {
+        self.data.capacity()
+    }
+
+    /// Returns current size for the buffer.
+    #[inline]
+    pub fn size(&self) -> usize {
+        self.data.len()
+    }
+
+    /// Returns `true` if memory tracker is added to buffer, `false` otherwise.
+    #[inline]
+    pub fn is_mem_tracked(&self) -> bool {
+        self.mem_tracker.is_some()
+    }
+
+    /// Returns memory tracker associated with this buffer.
+    /// This may panic, if memory tracker is not set, use method above to check if
+    /// memory tracker is available.
+    #[inline]
+    pub fn mem_tracker(&self) -> &MemTrackerPtr {
+        self.mem_tracker.as_ref().unwrap()
+    }
+}
+
+impl<T: Sized + Clone> Index<usize> for Buffer<T> {
+    type Output = T;
+
+    fn index(&self, index: usize) -> &T {
+        &self.data[index]
+    }
+}
+
+impl<T: Sized + Clone> IndexMut<usize> for Buffer<T> {
+    fn index_mut(&mut self, index: usize) -> &mut T {
+        &mut self.data[index]
+    }
+}
+
+// TODO: implement this for other types
+impl Write for Buffer<u8> {
+    #[inline]
+    fn write(&mut self, buf: &[u8]) -> IoResult<usize> {
+        let old_capacity = self.data.capacity();
+        let bytes_written = self.data.write(buf)?;
+        if let Some(ref mc) = self.mem_tracker {
+            if self.data.capacity() - old_capacity > 0 {
+                mc.alloc((self.data.capacity() - old_capacity) as i64)
+            }
+        }
+        Ok(bytes_written)
+    }
+
+    fn flush(&mut self) -> IoResult<()> {
+        // No-op
+        self.data.flush()
+    }
+}
+
+impl AsRef<[u8]> for Buffer<u8> {
+    fn as_ref(&self) -> &[u8] {
+        self.data.as_slice()
+    }
+}
+
+impl<T: Clone> Drop for Buffer<T> {
+    #[inline]
+    fn drop(&mut self) {
+        if let Some(ref mc) = self.mem_tracker {
+            mc.alloc(-((self.data.capacity() * self.type_length) as i64));
+        }
+    }
+}
+
+// ----------------------------------------------------------------------
+// Immutable Buffer (BufferPtr) classes
+
+/// An representation of a slice on a reference-counting and read-only byte array.
+/// Sub-slices can be further created from this. The byte array will be released
+/// when all slices are dropped.
+#[derive(Clone, Debug)]
+pub struct BufferPtr<T> {
+    data: Rc<Vec<T>>,
+    start: usize,
+    len: usize,
+    // TODO: will this create too many references? rethink about this.
+    mem_tracker: Option<MemTrackerPtr>,
+}
+
+impl<T> BufferPtr<T> {
+    /// Creates new buffer from a vector.
+    pub fn new(v: Vec<T>) -> Self {
+        let len = v.len();
+        Self {
+            data: Rc::new(v),
+            start: 0,
+            len,
+            mem_tracker: None,
+        }
+    }
+
+    /// Returns slice of data in this buffer.
+    pub fn data(&self) -> &[T] {
+        &self.data[self.start..self.start + self.len]
+    }
+
+    /// Updates this buffer with new `start` position and length `len`.
+    ///
+    /// Range should be within current start position and length.
+    pub fn with_range(mut self, start: usize, len: usize) -> Self {
+        assert!(start <= self.len);
+        assert!(start + len <= self.len);
+        self.start = start;
+        self.len = len;
+        self
+    }
+
+    /// Adds memory tracker to this buffer.
+    pub fn with_mem_tracker(mut self, mc: MemTrackerPtr) -> Self {
+        self.mem_tracker = Some(mc);
+        self
+    }
+
+    /// Returns start position of this buffer.
+    pub fn start(&self) -> usize {
+        self.start
+    }
+
+    /// Returns length of this buffer
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Returns `true` if this buffer has memory tracker, `false` otherwise.
+    pub fn is_mem_tracked(&self) -> bool {
+        self.mem_tracker.is_some()
+    }
+
+    /// Returns a shallow copy of the buffer.
+    /// Reference counted pointer to the data is copied.
+    pub fn all(&self) -> BufferPtr<T> {
+        BufferPtr {
+            data: self.data.clone(),
+            start: self.start,
+            len: self.len,
+            mem_tracker: self.mem_tracker.as_ref().map(|p| p.clone()),
+        }
+    }
+
+    /// Returns a shallow copy of the buffer that starts with `start` position.
+    pub fn start_from(&self, start: usize) -> BufferPtr<T> {
+        assert!(start <= self.len);
+        BufferPtr {
+            data: self.data.clone(),
+            start: self.start + start,
+            len: self.len - start,
+            mem_tracker: self.mem_tracker.as_ref().map(|p| p.clone()),
+        }
+    }
+
+    /// Returns a shallow copy that is a range slice within this buffer.
+    pub fn range(&self, start: usize, len: usize) -> BufferPtr<T> {
+        assert!(start + len <= self.len);
+        BufferPtr {
+            data: self.data.clone(),
+            start: self.start + start,
+            len,
+            mem_tracker: self.mem_tracker.as_ref().map(|p| p.clone()),
+        }
+    }
+}
+
+impl<T: Sized> Index<usize> for BufferPtr<T> {
+    type Output = T;
+
+    fn index(&self, index: usize) -> &T {
+        assert!(index < self.len);
+        &self.data[self.start + index]
+    }
+}
+
+impl<T: Debug> Display for BufferPtr<T> {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        write!(f, "{:?}", self.data)
+    }
+}
+
+impl<T> Drop for BufferPtr<T> {
+    fn drop(&mut self) {
+        if self.is_mem_tracked()
+            && Rc::strong_count(&self.data) == 1
+            && Rc::weak_count(&self.data) == 0
+        {
+            let mc = self.mem_tracker.as_ref().unwrap();
+            mc.alloc(-(self.data.capacity() as i64));
+        }
+    }
+}
+
+impl AsRef<[u8]> for BufferPtr<u8> {
+    fn as_ref(&self) -> &[u8] {
+        &self.data[self.start..self.start + self.len]
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_byte_buffer_mem_tracker() {
+        let mem_tracker = Rc::new(MemTracker::new());
+
+        let mut buffer = ByteBuffer::new().with_mem_tracker(mem_tracker.clone());
+        buffer.set_data(vec![0; 10]);
+        assert_eq!(mem_tracker.memory_usage(), buffer.capacity() as i64);
+        buffer.set_data(vec![0; 20]);
+        let capacity = buffer.capacity() as i64;
+        assert_eq!(mem_tracker.memory_usage(), capacity);
+
+        let max_capacity = {
+            let mut buffer2 = ByteBuffer::new().with_mem_tracker(mem_tracker.clone());
+            buffer2.reserve(30);
+            assert_eq!(
+                mem_tracker.memory_usage(),
+                buffer2.capacity() as i64 + capacity
+            );
+            buffer2.set_data(vec![0; 100]);
+            assert_eq!(
+                mem_tracker.memory_usage(),
+                buffer2.capacity() as i64 + capacity
+            );
+            buffer2.capacity() as i64 + capacity
+        };
+
+        assert_eq!(mem_tracker.memory_usage(), capacity);
+        assert_eq!(mem_tracker.max_memory_usage(), max_capacity);
+
+        buffer.reserve(40);
+        assert_eq!(mem_tracker.memory_usage(), buffer.capacity() as i64);
+
+        buffer.consume();
+        assert_eq!(mem_tracker.memory_usage(), buffer.capacity() as i64);
+    }
+
+    #[test]
+    fn test_byte_ptr_mem_tracker() {
+        let mem_tracker = Rc::new(MemTracker::new());
+
+        let mut buffer = ByteBuffer::new().with_mem_tracker(mem_tracker.clone());
+        buffer.set_data(vec![0; 60]);
+
+        {
+            let buffer_capacity = buffer.capacity() as i64;
+            let buf_ptr = buffer.consume();
+            assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
+            {
+                let buf_ptr1 = buf_ptr.all();
+                {
+                    let _ = buf_ptr.start_from(20);
+                    assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
+                }
+                assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
+                let _ = buf_ptr1.range(30, 20);
+                assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
+            }
+            assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
+        }
+        assert_eq!(mem_tracker.memory_usage(), buffer.capacity() as i64);
+    }
+
+    #[test]
+    fn test_byte_buffer() {
+        let mut buffer = ByteBuffer::new();
+        assert_eq!(buffer.size(), 0);
+        assert_eq!(buffer.capacity(), 0);
+
+        let mut buffer2 = ByteBuffer::new();
+        buffer2.reserve(40);
+        assert_eq!(buffer2.size(), 0);
+        assert_eq!(buffer2.capacity(), 40);
+
+        buffer.set_data((0..5).collect());
+        assert_eq!(buffer.size(), 5);
+        assert_eq!(buffer[4], 4);
+
+        buffer.set_data((0..20).collect());
+        assert_eq!(buffer.size(), 20);
+        assert_eq!(buffer[10], 10);
+
+        let expected: Vec<u8> = (0..20).collect();
+        {
+            let data = buffer.data();
+            assert_eq!(data, expected.as_slice());
+        }
+
+        buffer.reserve(40);
+        assert!(buffer.capacity() >= 40);
+
+        let byte_ptr = buffer.consume();
+        assert_eq!(buffer.size(), 0);
+        assert_eq!(byte_ptr.as_ref(), expected.as_slice());
+
+        let values: Vec<u8> = (0..30).collect();
+        let _ = buffer.write(values.as_slice());
+        let _ = buffer.flush();
+
+        assert_eq!(buffer.data(), values.as_slice());
+    }
+
+    #[test]
+    fn test_byte_ptr() {
+        let values = (0..50).collect();
+        let ptr = ByteBufferPtr::new(values);
+        assert_eq!(ptr.len(), 50);
+        assert_eq!(ptr.start(), 0);
+        assert_eq!(ptr[40], 40);
+
+        let ptr2 = ptr.all();
+        assert_eq!(ptr2.len(), 50);
+        assert_eq!(ptr2.start(), 0);
+        assert_eq!(ptr2[40], 40);
+
+        let ptr3 = ptr.start_from(20);
+        assert_eq!(ptr3.len(), 30);
+        assert_eq!(ptr3.start(), 20);
+        assert_eq!(ptr3[0], 20);
+
+        let ptr4 = ptr3.range(10, 10);
+        assert_eq!(ptr4.len(), 10);
+        assert_eq!(ptr4.start(), 30);
+        assert_eq!(ptr4[0], 30);
+
+        let expected: Vec<u8> = (30..40).collect();
+        assert_eq!(ptr4.as_ref(), expected.as_slice());
+    }
+}
diff --git a/rust/parquet/src/util/mod.rs b/rust/parquet/src/util/mod.rs
new file mode 100644
index 0000000000000..669cc3c0a495c
--- /dev/null
+++ b/rust/parquet/src/util/mod.rs
@@ -0,0 +1,26 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub mod io;
+pub mod memory;
+#[macro_use]
+pub mod bit_util;
+mod bit_packing;
+pub mod hash_util;
+
+#[cfg(test)]
+pub mod test_common;
diff --git a/rust/parquet/src/util/test_common.rs b/rust/parquet/src/util/test_common.rs
new file mode 100644
index 0000000000000..ad315a6aa4f69
--- /dev/null
+++ b/rust/parquet/src/util/test_common.rs
@@ -0,0 +1,190 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use rand::{
+    distributions::{range::SampleRange, Distribution, Standard},
+    thread_rng, Rng,
+};
+use std::{env, fs, io::Write, path::PathBuf, str::FromStr};
+
+use crate::data_type::*;
+use crate::util::memory::ByteBufferPtr;
+
+/// Random generator of data type `T` values and sequences.
+pub trait RandGen<T: DataType> {
+    fn gen(len: i32) -> T::T;
+
+    fn gen_vec(len: i32, total: usize) -> Vec<T::T> {
+        let mut result = vec![];
+        for _ in 0..total {
+            result.push(Self::gen(len))
+        }
+        result
+    }
+}
+
+impl<T: DataType> RandGen<T> for T {
+    default fn gen(_: i32) -> T::T {
+        panic!("Unsupported data type");
+    }
+}
+
+impl RandGen<BoolType> for BoolType {
+    fn gen(_: i32) -> bool {
+        thread_rng().gen::<bool>()
+    }
+}
+
+impl RandGen<Int32Type> for Int32Type {
+    fn gen(_: i32) -> i32 {
+        thread_rng().gen::<i32>()
+    }
+}
+
+impl RandGen<Int64Type> for Int64Type {
+    fn gen(_: i32) -> i64 {
+        thread_rng().gen::<i64>()
+    }
+}
+
+impl RandGen<Int96Type> for Int96Type {
+    fn gen(_: i32) -> Int96 {
+        let mut rng = thread_rng();
+        let mut result = Int96::new();
+        result.set_data(rng.gen::<u32>(), rng.gen::<u32>(), rng.gen::<u32>());
+        result
+    }
+}
+
+impl RandGen<FloatType> for FloatType {
+    fn gen(_: i32) -> f32 {
+        thread_rng().gen::<f32>()
+    }
+}
+
+impl RandGen<DoubleType> for DoubleType {
+    fn gen(_: i32) -> f64 {
+        thread_rng().gen::<f64>()
+    }
+}
+
+impl RandGen<ByteArrayType> for ByteArrayType {
+    fn gen(_: i32) -> ByteArray {
+        let mut rng = thread_rng();
+        let mut result = ByteArray::new();
+        let mut value = vec![];
+        let len = rng.gen_range::<usize>(0, 128);
+        for _ in 0..len {
+            value.push(rng.gen_range(0, 255) & 0xFF);
+        }
+        result.set_data(ByteBufferPtr::new(value));
+        result
+    }
+}
+
+impl RandGen<FixedLenByteArrayType> for FixedLenByteArrayType {
+    fn gen(len: i32) -> ByteArray {
+        let mut rng = thread_rng();
+        let value_len = if len < 0 {
+            rng.gen_range::<usize>(0, 128)
+        } else {
+            len as usize
+        };
+        let value = random_bytes(value_len);
+        ByteArray::from(value)
+    }
+}
+
+pub fn random_bytes(n: usize) -> Vec<u8> {
+    let mut result = vec![];
+    let mut rng = thread_rng();
+    for _ in 0..n {
+        result.push(rng.gen_range(0, 255) & 0xFF);
+    }
+    result
+}
+
+pub fn random_bools(n: usize) -> Vec<bool> {
+    let mut result = vec![];
+    let mut rng = thread_rng();
+    for _ in 0..n {
+        result.push(rng.gen::<bool>());
+    }
+    result
+}
+
+pub fn random_numbers<T>(n: usize) -> Vec<T>
+where
+    Standard: Distribution<T>,
+{
+    let mut rng = thread_rng();
+    Standard.sample_iter(&mut rng).take(n).collect()
+}
+
+pub fn random_numbers_range<T>(n: usize, low: T, high: T, result: &mut Vec<T>)
+where
+    T: PartialOrd + SampleRange + Copy,
+{
+    let mut rng = thread_rng();
+    for _ in 0..n {
+        result.push(rng.gen_range(low, high));
+    }
+}
+
+/// Returns path to the test parquet file in 'data' directory
+pub fn get_test_path(file_name: &str) -> PathBuf {
+    let result = env::var("PARQUET_TEST_DATA");
+    if result.is_err() {
+        panic!("Please point PARQUET_TEST_DATA environment variable to the test data directory");
+    }
+    let mut pathbuf = PathBuf::from_str(result.unwrap().as_str()).unwrap();
+    pathbuf.push(file_name);
+    pathbuf
+}
+
+/// Returns file handle for a test parquet file from 'data' directory
+pub fn get_test_file(file_name: &str) -> fs::File {
+    let file = fs::File::open(get_test_path(file_name).as_path());
+    if file.is_err() {
+        panic!("Test file {} not found", file_name)
+    }
+    file.unwrap()
+}
+
+/// Returns file handle for a temp file in 'target' directory with a provided content
+pub fn get_temp_file(file_name: &str, content: &[u8]) -> fs::File {
+    // build tmp path to a file in "target/debug/testdata"
+    let mut path_buf = env::current_dir().unwrap();
+    path_buf.push("target");
+    path_buf.push("debug");
+    path_buf.push("testdata");
+    fs::create_dir_all(&path_buf).unwrap();
+    path_buf.push(file_name);
+
+    // write file content
+    let mut tmp_file = fs::File::create(path_buf.as_path()).unwrap();
+    tmp_file.write_all(content).unwrap();
+    tmp_file.sync_all().unwrap();
+
+    // return file handle for both read and write
+    let file = fs::OpenOptions::new()
+        .read(true)
+        .write(true)
+        .open(path_buf.as_path());
+    assert!(file.is_ok());
+    file.unwrap()
+}
diff --git a/rust/rust-toolchain b/rust/rust-toolchain
new file mode 100644
index 0000000000000..07ade694b1a3c
--- /dev/null
+++ b/rust/rust-toolchain
@@ -0,0 +1 @@
+nightly
\ No newline at end of file
diff --git a/cpp/src/arrow/util/string_view/CMakeLists.txt b/rust/rustfmt.toml
similarity index 90%
rename from cpp/src/arrow/util/string_view/CMakeLists.txt
rename to rust/rustfmt.toml
index bae6bdb807d92..b692119bbc123 100644
--- a/cpp/src/arrow/util/string_view/CMakeLists.txt
+++ b/rust/rustfmt.toml
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-install(FILES
-  string_view.hpp
-  DESTINATION include/arrow/util/string_view)
+max_width = 90
+wrap_comments = true
+format_doc_comments = true
+comment_width = 90
\ No newline at end of file
diff --git a/rust/src/builder.rs b/rust/src/builder.rs
deleted file mode 100644
index df6b645312e23..0000000000000
--- a/rust/src/builder.rs
+++ /dev/null
@@ -1,911 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines a `BufferBuilder` capable of creating a `Buffer` which can be used as an internal
-//! buffer in an `ArrayData` object.
-
-use std::any::Any;
-use std::io::Write;
-use std::marker::PhantomData;
-use std::mem;
-
-use array::*;
-use array_data::ArrayData;
-use buffer::{Buffer, MutableBuffer};
-use datatypes::*;
-use error::{ArrowError, Result};
-use util::bit_util;
-
-/// Buffer builder with zero-copy build method
-pub struct BufferBuilder<T: ArrowPrimitiveType> {
-    buffer: MutableBuffer,
-    len: i64,
-    _marker: PhantomData<T>,
-}
-
-pub type BooleanBufferBuilder = BufferBuilder<BooleanType>;
-pub type Int8BufferBuilder = BufferBuilder<Int8Type>;
-pub type Int16BufferBuilder = BufferBuilder<Int16Type>;
-pub type Int32BufferBuilder = BufferBuilder<Int32Type>;
-pub type Int64BufferBuilder = BufferBuilder<Int64Type>;
-pub type UInt8BufferBuilder = BufferBuilder<UInt8Type>;
-pub type UInt16BufferBuilder = BufferBuilder<UInt16Type>;
-pub type UInt32BufferBuilder = BufferBuilder<UInt32Type>;
-pub type UInt64BufferBuilder = BufferBuilder<UInt64Type>;
-pub type Float32BufferBuilder = BufferBuilder<Float32Type>;
-pub type Float64BufferBuilder = BufferBuilder<Float64Type>;
-
-// Trait for buffer builder. This is used mainly to offer separate implementations for
-// numeric types and boolean types, while still be able to call methods on buffer builder
-// with generic primitive type.
-pub trait BufferBuilderTrait<T: ArrowPrimitiveType> {
-    fn new(capacity: i64) -> Self;
-    fn len(&self) -> i64;
-    fn capacity(&self) -> i64;
-    fn advance(&mut self, i: i64) -> Result<()>;
-    fn reserve(&mut self, n: i64) -> Result<()>;
-    fn push(&mut self, v: T::Native) -> Result<()>;
-    fn push_slice(&mut self, slice: &[T::Native]) -> Result<()>;
-    fn finish(self) -> Buffer;
-}
-
-impl<T: ArrowPrimitiveType> BufferBuilderTrait<T> for BufferBuilder<T> {
-    /// Creates a builder with a fixed initial capacity
-    default fn new(capacity: i64) -> Self {
-        let buffer = MutableBuffer::new(capacity as usize * mem::size_of::<T::Native>());
-        Self {
-            buffer,
-            len: 0,
-            _marker: PhantomData,
-        }
-    }
-
-    /// Returns the number of array elements (slots) in the builder
-    fn len(&self) -> i64 {
-        self.len
-    }
-
-    /// Returns the current capacity of the builder (number of elements)
-    fn capacity(&self) -> i64 {
-        let bit_capacity = self.buffer.capacity() * 8;
-        (bit_capacity / T::get_bit_width()) as i64
-    }
-
-    // Advances the `len` of the underlying `Buffer` by `i` slots of type T
-    default fn advance(&mut self, i: i64) -> Result<()> {
-        let new_buffer_len = (self.len + i) as usize * mem::size_of::<T::Native>();
-        self.buffer.resize(new_buffer_len)?;
-        self.len += i;
-        Ok(())
-    }
-
-    /// Reserves memory for `n` elements of type `T`.
-    default fn reserve(&mut self, n: i64) -> Result<()> {
-        let new_capacity = self.len + n;
-        let byte_capacity = mem::size_of::<T::Native>() * new_capacity as usize;
-        self.buffer.reserve(byte_capacity)?;
-        Ok(())
-    }
-
-    /// Pushes a value into the builder, growing the internal buffer as needed.
-    default fn push(&mut self, v: T::Native) -> Result<()> {
-        self.reserve(1)?;
-        self.write_bytes(v.to_byte_slice(), 1)
-    }
-
-    /// Pushes a slice of type `T`, growing the internal buffer as needed.
-    default fn push_slice(&mut self, slice: &[T::Native]) -> Result<()> {
-        let array_slots = slice.len() as i64;
-        self.reserve(array_slots)?;
-        self.write_bytes(slice.to_byte_slice(), array_slots)
-    }
-
-    /// Consumes this builder and returns an immutable `Buffer`.
-    default fn finish(self) -> Buffer {
-        self.buffer.freeze()
-    }
-}
-
-impl<T: ArrowPrimitiveType> BufferBuilder<T> {
-    /// Writes a byte slice to the underlying buffer and updates the `len`, i.e. the number array
-    /// elements in the builder.  Also, converts the `io::Result` required by the `Write` trait
-    /// to the Arrow `Result` type.
-    fn write_bytes(&mut self, bytes: &[u8], len_added: i64) -> Result<()> {
-        let write_result = self.buffer.write(bytes);
-        // `io::Result` has many options one of which we use, so pattern matching is overkill here
-        if write_result.is_err() {
-            Err(ArrowError::MemoryError(
-                "Could not write to Buffer, not big enough".to_string(),
-            ))
-        } else {
-            self.len += len_added;
-            Ok(())
-        }
-    }
-}
-
-impl BufferBuilderTrait<BooleanType> for BufferBuilder<BooleanType> {
-    /// Creates a builder with a fixed initial capacity.
-    fn new(capacity: i64) -> Self {
-        let byte_capacity = bit_util::ceil(capacity, 8);
-        let actual_capacity = bit_util::round_upto_multiple_of_64(byte_capacity) as usize;
-        let mut buffer = MutableBuffer::new(actual_capacity);
-        buffer.set_null_bits(0, actual_capacity);
-        Self {
-            buffer,
-            len: 0,
-            _marker: PhantomData,
-        }
-    }
-
-    // Advances the `len` of the underlying `Buffer` by `i` slots of type T
-    fn advance(&mut self, i: i64) -> Result<()> {
-        let new_buffer_len = bit_util::ceil(self.len + i, 8);
-        self.buffer.resize(new_buffer_len as usize)?;
-        self.len += i;
-        Ok(())
-    }
-
-    /// Pushes a value into the builder, growing the internal buffer as needed.
-    fn push(&mut self, v: bool) -> Result<()> {
-        self.reserve(1)?;
-        if v {
-            // For performance the `len` of the buffer is not updated on each push but
-            // is updated in the `freeze` method instead.
-            unsafe {
-                bit_util::set_bit_raw(self.buffer.raw_data() as *mut u8, (self.len) as usize);
-            }
-        }
-        self.len += 1;
-        Ok(())
-    }
-
-    /// Pushes a slice of type `T`, growing the internal buffer as needed.
-    fn push_slice(&mut self, slice: &[bool]) -> Result<()> {
-        let array_slots = slice.len();
-        for i in 0..array_slots {
-            self.push(slice[i])?;
-        }
-        Ok(())
-    }
-
-    /// Reserves memory for `n` elements of type `T`.
-    fn reserve(&mut self, n: i64) -> Result<()> {
-        let new_capacity = self.len + n;
-        if new_capacity > self.capacity() {
-            let new_byte_capacity = bit_util::ceil(new_capacity, 8) as usize;
-            let existing_capacity = self.buffer.capacity();
-            let new_capacity = self.buffer.reserve(new_byte_capacity)?;
-            self.buffer
-                .set_null_bits(existing_capacity, new_capacity - existing_capacity);
-        }
-        Ok(())
-    }
-
-    /// Consumes this and returns an immutable `Buffer`.
-    fn finish(mut self) -> Buffer {
-        // `push` does not update the buffer's `len` so do it before `freeze` is called.
-        let new_buffer_len = bit_util::ceil(self.len, 8) as usize;
-        debug_assert!(new_buffer_len >= self.buffer.len());
-        self.buffer.resize(new_buffer_len).unwrap();
-        self.buffer.freeze()
-    }
-}
-
-/// Trait for dealing with different array builders at runtime
-pub trait ArrayBuilder {
-    /// The type of array that this builder creates
-    type ArrayType;
-
-    /// Returns the builder as an owned `Any` type so that it can be `downcast` to a specific
-    /// implementation before calling it's `finish` method
-    fn into_any(self) -> Box<Any>;
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> i64;
-
-    /// Builds the array
-    fn finish(self) -> Self::ArrayType;
-}
-
-///  Array builder for fixed-width primitive types
-pub struct PrimitiveArrayBuilder<T: ArrowPrimitiveType> {
-    values_builder: BufferBuilder<T>,
-    bitmap_builder: BooleanBufferBuilder,
-}
-
-pub type BooleanBuilder = PrimitiveArrayBuilder<BooleanType>;
-pub type Int8Builder = PrimitiveArrayBuilder<Int8Type>;
-pub type Int16Builder = PrimitiveArrayBuilder<Int16Type>;
-pub type Int32Builder = PrimitiveArrayBuilder<Int32Type>;
-pub type Int64Builder = PrimitiveArrayBuilder<Int64Type>;
-pub type UInt8Builder = PrimitiveArrayBuilder<UInt8Type>;
-pub type UInt16Builder = PrimitiveArrayBuilder<UInt16Type>;
-pub type UInt32Builder = PrimitiveArrayBuilder<UInt32Type>;
-pub type UInt64Builder = PrimitiveArrayBuilder<UInt64Type>;
-pub type Float32Builder = PrimitiveArrayBuilder<Float32Type>;
-pub type Float64Builder = PrimitiveArrayBuilder<Float64Type>;
-
-impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveArrayBuilder<T> {
-    type ArrayType = PrimitiveArray<T>;
-
-    /// Returns the builder as an owned `Any` type so that it can be `downcast` to a specific
-    /// implementation before calling it's `finish` method
-    fn into_any(self) -> Box<Any> {
-        Box::new(self)
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> i64 {
-        self.values_builder.len
-    }
-
-    /// Builds the PrimitiveArray
-    fn finish(self) -> PrimitiveArray<T> {
-        let len = self.len();
-        let null_bit_buffer = self.bitmap_builder.finish();
-        let data = ArrayData::builder(T::get_data_type())
-            .len(len)
-            .null_count(len - bit_util::count_set_bits(null_bit_buffer.data()))
-            .add_buffer(self.values_builder.finish())
-            .null_bit_buffer(null_bit_buffer)
-            .build();
-        PrimitiveArray::<T>::from(data)
-    }
-}
-
-impl<T: ArrowPrimitiveType> PrimitiveArrayBuilder<T> {
-    /// Creates a new primitive array builder
-    pub fn new(capacity: i64) -> Self {
-        Self {
-            values_builder: BufferBuilder::<T>::new(capacity),
-            bitmap_builder: BooleanBufferBuilder::new(capacity),
-        }
-    }
-
-    /// Returns the capacity of this builder measured in slots of type `T`
-    pub fn capacity(&self) -> i64 {
-        self.values_builder.capacity()
-    }
-
-    /// Pushes a value of type `T` into the builder
-    pub fn push(&mut self, v: T::Native) -> Result<()> {
-        self.bitmap_builder.push(true)?;
-        self.values_builder.push(v)?;
-        Ok(())
-    }
-
-    /// Pushes a null slot into the builder
-    pub fn push_null(&mut self) -> Result<()> {
-        self.bitmap_builder.push(false)?;
-        self.values_builder.advance(1)?;
-        Ok(())
-    }
-
-    /// Pushes an `Option<T>` into the builder
-    pub fn push_option(&mut self, v: Option<T::Native>) -> Result<()> {
-        match v {
-            None => self.push_null()?,
-            Some(v) => self.push(v)?,
-        };
-        Ok(())
-    }
-
-    /// Pushes a slice of type `T` into the builder
-    pub fn push_slice(&mut self, v: &[T::Native]) -> Result<()> {
-        self.bitmap_builder.push_slice(&vec![true; v.len()][..])?;
-        self.values_builder.push_slice(v)?;
-        Ok(())
-    }
-}
-
-///  Array builder for `ListArray`
-pub struct ListArrayBuilder<T: ArrayBuilder> {
-    offsets_builder: Int32BufferBuilder,
-    bitmap_builder: BooleanBufferBuilder,
-    values_builder: T,
-    len: i64,
-}
-
-impl<T: ArrayBuilder> ListArrayBuilder<T> {
-    /// Creates a new `ListArrayBuilder` from a given values array builder
-    pub fn new(values_builder: T) -> Self {
-        let mut offsets_builder = Int32BufferBuilder::new(values_builder.len() + 1);
-        offsets_builder.push(0).unwrap();
-        Self {
-            offsets_builder,
-            bitmap_builder: BooleanBufferBuilder::new(values_builder.len()),
-            values_builder,
-            len: 0,
-        }
-    }
-}
-
-macro_rules! impl_list_array_builder {
-    ($builder_ty:ty) => {
-        impl ArrayBuilder for ListArrayBuilder<$builder_ty> {
-            type ArrayType = ListArray;
-
-            /// Returns the builder as an owned `Any` type so that it can be `downcast` to a specific
-            /// implementation before calling it's `finish` method.
-            fn into_any(self) -> Box<Any> {
-                Box::new(self)
-            }
-
-            /// Returns the number of array slots in the builder
-            fn len(&self) -> i64 {
-                self.len
-            }
-
-            /// Builds the `ListArray`
-            fn finish(self) -> ListArray {
-                let len = self.len();
-                let values_arr = self
-                    .values_builder
-                    .into_any()
-                    .downcast::<$builder_ty>()
-                    .unwrap()
-                    .finish();
-                let values_data = values_arr.data();
-
-                let null_bit_buffer = self.bitmap_builder.finish();
-                let data =
-                    ArrayData::builder(DataType::List(Box::new(values_data.data_type().clone())))
-                        .len(len)
-                        .null_count(len - bit_util::count_set_bits(null_bit_buffer.data()))
-                        .add_buffer(self.offsets_builder.finish())
-                        .add_child_data(values_data)
-                        .null_bit_buffer(null_bit_buffer)
-                        .build();
-
-                ListArray::from(data)
-            }
-        }
-
-        impl ListArrayBuilder<$builder_ty> {
-            /// Returns the child array builder as a mutable reference.
-            ///
-            /// This mutable reference can be used to push values into the child array builder,
-            /// but you must call `append` to delimit each distinct list value.
-            pub fn values(&mut self) -> &mut $builder_ty {
-                &mut self.values_builder
-            }
-
-            /// Finish the current variable-length list array slot
-            pub fn append(&mut self, is_valid: bool) -> Result<()> {
-                self.offsets_builder
-                    .push(self.values_builder.len() as i32)?;
-                self.bitmap_builder.push(is_valid)?;
-                self.len += 1;
-                Ok(())
-            }
-        }
-    };
-}
-
-impl_list_array_builder!(BooleanBuilder);
-impl_list_array_builder!(UInt8Builder);
-impl_list_array_builder!(UInt16Builder);
-impl_list_array_builder!(UInt32Builder);
-impl_list_array_builder!(UInt64Builder);
-impl_list_array_builder!(Int8Builder);
-impl_list_array_builder!(Int16Builder);
-impl_list_array_builder!(Int32Builder);
-impl_list_array_builder!(Int64Builder);
-impl_list_array_builder!(Float32Builder);
-impl_list_array_builder!(Float64Builder);
-impl_list_array_builder!(ListArrayBuilder<BooleanBuilder>);
-impl_list_array_builder!(ListArrayBuilder<UInt8Builder>);
-impl_list_array_builder!(ListArrayBuilder<UInt16Builder>);
-impl_list_array_builder!(ListArrayBuilder<UInt32Builder>);
-impl_list_array_builder!(ListArrayBuilder<UInt64Builder>);
-impl_list_array_builder!(ListArrayBuilder<Int8Builder>);
-impl_list_array_builder!(ListArrayBuilder<Int16Builder>);
-impl_list_array_builder!(ListArrayBuilder<Int32Builder>);
-impl_list_array_builder!(ListArrayBuilder<Int64Builder>);
-impl_list_array_builder!(ListArrayBuilder<Float32Builder>);
-impl_list_array_builder!(ListArrayBuilder<Float64Builder>);
-
-///  Array builder for `BinaryArray`
-pub struct BinaryArrayBuilder {
-    builder: ListArrayBuilder<UInt8Builder>,
-}
-
-impl ArrayBuilder for BinaryArrayBuilder {
-    type ArrayType = BinaryArray;
-
-    /// Returns the builder as an owned `Any` type so that it can be `downcast` to a specific
-    /// implementation before calling it's `finish` method.
-    fn into_any(self) -> Box<Any> {
-        Box::new(self)
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> i64 {
-        self.builder.len()
-    }
-
-    /// Builds the `BinaryArray`
-    fn finish(self) -> BinaryArray {
-        BinaryArray::from(self.builder.finish())
-    }
-}
-
-impl BinaryArrayBuilder {
-    /// Creates a new `BinaryArrayBuilder`, `capacity` is the number of bytes in the values array
-    pub fn new(capacity: i64) -> Self {
-        let values_builder = UInt8Builder::new(capacity);
-        Self {
-            builder: ListArrayBuilder::new(values_builder),
-        }
-    }
-
-    /// Pushes a single byte value into the builder's values array.
-    ///
-    /// Note, when pushing individual byte values you must call `append` to delimit each
-    /// distinct list value.
-    pub fn push(&mut self, value: u8) -> Result<()> {
-        self.builder.values().push(value)?;
-        Ok(())
-    }
-
-    /// Pushes a `&String` or `&str` into the builder.
-    ///
-    /// Automatically calls the `append` method to delimit the string pushed in as a distinct
-    /// array element.
-    pub fn push_string(&mut self, value: &str) -> Result<()> {
-        self.builder.values().push_slice(value.as_bytes())?;
-        self.builder.append(true)?;
-        Ok(())
-    }
-
-    /// Finish the current variable-length list array slot.
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        self.builder.append(is_valid)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use array::Array;
-
-    use super::*;
-
-    #[test]
-    fn test_builder_i32_empty() {
-        let b = Int32BufferBuilder::new(5);
-        assert_eq!(0, b.len());
-        assert_eq!(16, b.capacity());
-        let a = b.finish();
-        assert_eq!(0, a.len());
-    }
-
-    #[test]
-    fn test_builder_i32_alloc_zero_bytes() {
-        let mut b = Int32BufferBuilder::new(0);
-        b.push(123).unwrap();
-        let a = b.finish();
-        assert_eq!(4, a.len());
-    }
-
-    #[test]
-    fn test_builder_i32() {
-        let mut b = Int32BufferBuilder::new(5);
-        for i in 0..5 {
-            b.push(i).unwrap();
-        }
-        assert_eq!(16, b.capacity());
-        let a = b.finish();
-        assert_eq!(20, a.len());
-    }
-
-    #[test]
-    fn test_builder_i32_grow_buffer() {
-        let mut b = Int32BufferBuilder::new(2);
-        assert_eq!(16, b.capacity());
-        for i in 0..20 {
-            b.push(i).unwrap();
-        }
-        assert_eq!(32, b.capacity());
-        let a = b.finish();
-        assert_eq!(80, a.len());
-    }
-
-    #[test]
-    fn test_reserve() {
-        let mut b = UInt8BufferBuilder::new(2);
-        assert_eq!(64, b.capacity());
-        b.reserve(64).unwrap();
-        assert_eq!(64, b.capacity());
-        b.reserve(65).unwrap();
-        assert_eq!(128, b.capacity());
-
-        let mut b = Int32BufferBuilder::new(2);
-        assert_eq!(16, b.capacity());
-        b.reserve(16).unwrap();
-        assert_eq!(16, b.capacity());
-        b.reserve(17).unwrap();
-        assert_eq!(32, b.capacity());
-    }
-
-    #[test]
-    fn test_push_slice() {
-        let mut b = UInt8BufferBuilder::new(0);
-        b.push_slice("Hello, ".as_bytes()).unwrap();
-        b.push_slice("World!".as_bytes()).unwrap();
-        let buffer = b.finish();
-        assert_eq!(13, buffer.len());
-
-        let mut b = Int32BufferBuilder::new(0);
-        b.push_slice(&[32, 54]).unwrap();
-        let buffer = b.finish();
-        assert_eq!(8, buffer.len());
-    }
-
-    #[test]
-    fn test_write_bytes() {
-        let mut b = BooleanBufferBuilder::new(4);
-        b.push(false).unwrap();
-        b.push(true).unwrap();
-        b.push(false).unwrap();
-        b.push(true).unwrap();
-        assert_eq!(4, b.len());
-        assert_eq!(512, b.capacity());
-        let buffer = b.finish();
-        assert_eq!(1, buffer.len());
-
-        let mut b = BooleanBufferBuilder::new(4);
-        b.push_slice(&[false, true, false, true]).unwrap();
-        assert_eq!(4, b.len());
-        assert_eq!(512, b.capacity());
-        let buffer = b.finish();
-        assert_eq!(1, buffer.len());
-    }
-
-    #[test]
-    fn test_write_bytes_i32() {
-        let mut b = Int32BufferBuilder::new(4);
-        let bytes = [8, 16, 32, 64].to_byte_slice();
-        b.write_bytes(bytes, 4).unwrap();
-        assert_eq!(4, b.len());
-        assert_eq!(16, b.capacity());
-        let buffer = b.finish();
-        assert_eq!(16, buffer.len());
-    }
-
-    #[test]
-    #[should_panic(expected = "Could not write to Buffer, not big enough")]
-    fn test_write_too_many_bytes() {
-        let mut b = Int32BufferBuilder::new(0);
-        let bytes = [8, 16, 32, 64].to_byte_slice();
-        b.write_bytes(bytes, 4).unwrap();
-    }
-
-    #[test]
-    fn test_boolean_builder_increases_buffer_len() {
-        // 00000010 01001000
-        let buf = Buffer::from([72_u8, 2_u8]);
-        let mut builder = BooleanBufferBuilder::new(8);
-
-        for i in 0..10 {
-            if i == 3 || i == 6 || i == 9 {
-                builder.push(true).unwrap();
-            } else {
-                builder.push(false).unwrap();
-            }
-        }
-        let buf2 = builder.finish();
-
-        assert_eq!(buf.len(), buf2.len());
-        assert_eq!(buf.data(), buf2.data());
-    }
-
-    #[test]
-    fn test_primitive_array_builder_i32() {
-        let mut builder = Int32Array::builder(5);
-        for i in 0..5 {
-            builder.push(i).unwrap();
-        }
-        let arr = builder.finish();
-        assert_eq!(5, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..5 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i as i32, arr.value(i));
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_bool() {
-        // 00000010 01001000
-        let buf = Buffer::from([72_u8, 2_u8]);
-        let mut builder = BooleanArray::builder(10);
-        for i in 0..10 {
-            if i == 3 || i == 6 || i == 9 {
-                builder.push(true).unwrap();
-            } else {
-                builder.push(false).unwrap();
-            }
-        }
-
-        let arr = builder.finish();
-        assert_eq!(buf, arr.values());
-        assert_eq!(10, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..10 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {}", i)
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_push_option() {
-        let arr1 = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
-
-        let mut builder = Int32Array::builder(5);
-        builder.push_option(Some(0)).unwrap();
-        builder.push_option(None).unwrap();
-        builder.push_option(Some(2)).unwrap();
-        builder.push_option(None).unwrap();
-        builder.push_option(Some(4)).unwrap();
-        let arr2 = builder.finish();
-
-        assert_eq!(arr1.len(), arr2.len());
-        assert_eq!(arr1.offset(), arr2.offset());
-        assert_eq!(arr1.null_count(), arr2.null_count());
-        for i in 0..5 {
-            assert_eq!(arr1.is_null(i), arr2.is_null(i));
-            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
-            if arr1.is_valid(i) {
-                assert_eq!(arr1.value(i), arr2.value(i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_push_null() {
-        let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
-
-        let mut builder = Int32Array::builder(5);
-        builder.push(0).unwrap();
-        builder.push(2).unwrap();
-        builder.push_null().unwrap();
-        builder.push_null().unwrap();
-        builder.push(4).unwrap();
-        let arr2 = builder.finish();
-
-        assert_eq!(arr1.len(), arr2.len());
-        assert_eq!(arr1.offset(), arr2.offset());
-        assert_eq!(arr1.null_count(), arr2.null_count());
-        for i in 0..5 {
-            assert_eq!(arr1.is_null(i), arr2.is_null(i));
-            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
-            if arr1.is_valid(i) {
-                assert_eq!(arr1.value(i), arr2.value(i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_push_slice() {
-        let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
-
-        let mut builder = Int32Array::builder(5);
-        builder.push_slice(&[0, 2]).unwrap();
-        builder.push_null().unwrap();
-        builder.push_null().unwrap();
-        builder.push(4).unwrap();
-        let arr2 = builder.finish();
-
-        assert_eq!(arr1.len(), arr2.len());
-        assert_eq!(arr1.offset(), arr2.offset());
-        assert_eq!(arr1.null_count(), arr2.null_count());
-        for i in 0..5 {
-            assert_eq!(arr1.is_null(i), arr2.is_null(i));
-            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
-            if arr1.is_valid(i) {
-                assert_eq!(arr1.value(i), arr2.value(i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_list_array_builder() {
-        let values_builder = Int32Builder::new(10);
-        let mut builder = ListArrayBuilder::new(values_builder);
-
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        builder.values().push(0).unwrap();
-        builder.values().push(1).unwrap();
-        builder.values().push(2).unwrap();
-        builder.append(true).unwrap();
-        builder.values().push(3).unwrap();
-        builder.values().push(4).unwrap();
-        builder.values().push(5).unwrap();
-        builder.append(true).unwrap();
-        builder.values().push(6).unwrap();
-        builder.values().push(7).unwrap();
-        builder.append(true).unwrap();
-        let list_array = builder.finish();
-
-        let values = list_array.values().data().buffers()[0].clone();
-        assert_eq!(
-            Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()),
-            values
-        );
-        assert_eq!(
-            Buffer::from(&[0, 3, 6, 8].to_byte_slice()),
-            list_array.data().buffers()[0].clone()
-        );
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(6, list_array.value_offset(2));
-        assert_eq!(2, list_array.value_length(2));
-        for i in 0..3 {
-            assert!(list_array.is_valid(i as i64));
-            assert!(!list_array.is_null(i as i64));
-        }
-    }
-
-    #[test]
-    fn test_list_array_builder_nulls() {
-        let values_builder = Int32Builder::new(10);
-        let mut builder = ListArrayBuilder::new(values_builder);
-
-        //  [[0, 1, 2], null, [3, null, 5], [6, 7]]
-        builder.values().push(0).unwrap();
-        builder.values().push(1).unwrap();
-        builder.values().push(2).unwrap();
-        builder.append(true).unwrap();
-        builder.append(false).unwrap();
-        builder.values().push(3).unwrap();
-        builder.values().push_null().unwrap();
-        builder.values().push(5).unwrap();
-        builder.append(true).unwrap();
-        builder.values().push(6).unwrap();
-        builder.values().push(7).unwrap();
-        builder.append(true).unwrap();
-        let list_array = builder.finish();
-
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(4, list_array.len());
-        assert_eq!(1, list_array.null_count());
-        assert_eq!(3, list_array.value_offset(2));
-        assert_eq!(3, list_array.value_length(2));
-    }
-
-    #[test]
-    fn test_list_list_array_builder() {
-        let primitive_builder = Int32Builder::new(10);
-        let values_builder = ListArrayBuilder::new(primitive_builder);
-        let mut builder = ListArrayBuilder::new(values_builder);
-
-        //  [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]]
-        builder.values().values().push(1).unwrap();
-        builder.values().values().push(2).unwrap();
-        builder.values().append(true).unwrap();
-        builder.values().values().push(3).unwrap();
-        builder.values().values().push(4).unwrap();
-        builder.values().append(true).unwrap();
-        builder.append(true).unwrap();
-
-        builder.values().values().push(5).unwrap();
-        builder.values().values().push(6).unwrap();
-        builder.values().values().push(7).unwrap();
-        builder.values().append(true).unwrap();
-        builder.values().append(false).unwrap();
-        builder.values().values().push(8).unwrap();
-        builder.values().append(true).unwrap();
-        builder.append(true).unwrap();
-
-        builder.append(false).unwrap();
-
-        builder.values().values().push(9).unwrap();
-        builder.values().values().push(10).unwrap();
-        builder.values().append(true).unwrap();
-        builder.append(true).unwrap();
-
-        let list_array = builder.finish();
-
-        assert_eq!(4, list_array.len());
-        assert_eq!(1, list_array.null_count());
-        assert_eq!(
-            Buffer::from(&[0, 2, 5, 5, 6].to_byte_slice()),
-            list_array.data().buffers()[0].clone()
-        );
-
-        assert_eq!(6, list_array.values().data().len());
-        assert_eq!(1, list_array.values().data().null_count());
-        assert_eq!(
-            Buffer::from(&[0, 2, 4, 7, 7, 8, 10].to_byte_slice()),
-            list_array.values().data().buffers()[0].clone()
-        );
-
-        assert_eq!(10, list_array.values().data().child_data()[0].len());
-        assert_eq!(0, list_array.values().data().child_data()[0].null_count());
-        assert_eq!(
-            Buffer::from(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10].to_byte_slice()),
-            list_array.values().data().child_data()[0].buffers()[0].clone()
-        );
-    }
-
-    #[test]
-    fn test_binary_array_builder() {
-        use array::BinaryArray;
-        let mut builder = BinaryArrayBuilder::new(20);
-
-        builder.push(b'h').unwrap();
-        builder.push(b'e').unwrap();
-        builder.push(b'l').unwrap();
-        builder.push(b'l').unwrap();
-        builder.push(b'o').unwrap();
-        builder.append(true).unwrap();
-        builder.append(true).unwrap();
-        builder.push(b'w').unwrap();
-        builder.push(b'o').unwrap();
-        builder.push(b'r').unwrap();
-        builder.push(b'l').unwrap();
-        builder.push(b'd').unwrap();
-        builder.append(true).unwrap();
-
-        let array = builder.finish();
-
-        let binary_array = BinaryArray::from(array);
-
-        assert_eq!(3, binary_array.len());
-        assert_eq!(0, binary_array.null_count());
-        assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.get_value(0));
-        assert_eq!("hello", binary_array.get_string(0));
-        assert_eq!([] as [u8; 0], binary_array.get_value(1));
-        assert_eq!("", binary_array.get_string(1));
-        assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.get_value(2));
-        assert_eq!("world", binary_array.get_string(2));
-        assert_eq!(5, binary_array.value_offset(2));
-        assert_eq!(5, binary_array.value_length(2));
-    }
-
-    #[test]
-    fn test_binary_array_builder_push_string() {
-        use array::BinaryArray;
-        let mut builder = BinaryArrayBuilder::new(20);
-
-        let var = "hello".to_owned();
-        builder.push_string(&var).unwrap();
-        builder.append(true).unwrap();
-        builder.push_string("world").unwrap();
-
-        let array = builder.finish();
-
-        let binary_array = BinaryArray::from(array);
-
-        assert_eq!(3, binary_array.len());
-        assert_eq!(0, binary_array.null_count());
-        assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.get_value(0));
-        assert_eq!("hello", binary_array.get_string(0));
-        assert_eq!([] as [u8; 0], binary_array.get_value(1));
-        assert_eq!("", binary_array.get_string(1));
-        assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.get_value(2));
-        assert_eq!("world", binary_array.get_string(2));
-        assert_eq!(5, binary_array.value_offset(2));
-        assert_eq!(5, binary_array.value_length(2));
-    }
-}
diff --git a/rust/src/csv/reader.rs b/rust/src/csv/reader.rs
deleted file mode 100644
index dcb35958c5d89..0000000000000
--- a/rust/src/csv/reader.rs
+++ /dev/null
@@ -1,272 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! CSV Reader
-//!
-//! This CSV reader allows CSV files to be read into the Arrow memory model. Records are loaded in
-//! batches and are then converted from row-based data to columnar data.
-//!
-//! Example:
-//!
-//! ```
-//! use arrow::csv;
-//! use arrow::datatypes::{DataType, Field, Schema};
-//! use std::fs::File;
-//! use std::sync::Arc;
-//!
-//! let schema = Schema::new(vec![
-//!   Field::new("city", DataType::Utf8, false),
-//!   Field::new("lat", DataType::Float64, false),
-//!   Field::new("lng", DataType::Float64, false),
-//! ]);
-//!
-//! let file = File::open("test/data/uk_cities.csv").unwrap();
-//!
-//! let mut csv = csv::Reader::new(file, Arc::new(schema), false, 1024, None);
-//! let batch = csv.next().unwrap().unwrap();
-//!```
-
-use std::fs::File;
-use std::io::BufReader;
-use std::sync::Arc;
-
-use array::{ArrayRef, BinaryArray};
-use builder::*;
-use datatypes::*;
-use error::{ArrowError, Result};
-use record_batch::RecordBatch;
-
-use csv_crate::{StringRecord, StringRecordsIntoIter};
-
-/// CSV file reader
-pub struct Reader {
-    /// Explicit schema for the CSV file
-    schema: Arc<Schema>,
-    /// Optional projection for which columns to load (zero-based column indices)
-    projection: Option<Vec<usize>>,
-    /// File reader
-    record_iter: StringRecordsIntoIter<BufReader<File>>,
-    /// Batch size (number of records to load each time)
-    batch_size: usize,
-}
-
-impl Reader {
-    /// Create a new CsvReader
-    pub fn new(
-        file: File,
-        schema: Arc<Schema>,
-        has_headers: bool,
-        batch_size: usize,
-        projection: Option<Vec<usize>>,
-    ) -> Self {
-        let csv_reader = csv::ReaderBuilder::new()
-            .has_headers(has_headers)
-            .from_reader(BufReader::new(file));
-
-        let record_iter = csv_reader.into_records();
-        Reader {
-            schema: schema.clone(),
-            projection,
-            record_iter,
-            batch_size,
-        }
-    }
-}
-
-fn build_primitive_array<T: ArrowPrimitiveType>(
-    rows: &[StringRecord],
-    col_idx: &usize,
-) -> Result<ArrayRef> {
-    let mut builder = PrimitiveArrayBuilder::<T>::new(rows.len() as i64);
-    for row_index in 0..rows.len() {
-        match rows[row_index].get(*col_idx) {
-            Some(s) if s.len() > 0 => match s.parse::<T::Native>() {
-                Ok(v) => builder.push(v)?,
-                Err(_) => {
-                    // TODO: we should surface the underlying error here.
-                    return Err(ArrowError::ParseError(format!(
-                        "Error while parsing value {}",
-                        s
-                    )));
-                }
-            },
-            _ => builder.push_null().unwrap(),
-        }
-    }
-    Ok(Arc::new(builder.finish()) as ArrayRef)
-}
-
-impl Reader {
-    /// Read the next batch of rows
-    pub fn next(&mut self) -> Result<Option<RecordBatch>> {
-        // read a batch of rows into memory
-        let mut rows: Vec<StringRecord> = Vec::with_capacity(self.batch_size);
-        for _ in 0..self.batch_size {
-            match self.record_iter.next() {
-                Some(Ok(r)) => {
-                    rows.push(r);
-                }
-                Some(Err(_)) => {
-                    return Err(ArrowError::ParseError("Error reading CSV file".to_string()));
-                }
-                None => break,
-            }
-        }
-
-        // return early if no data was loaded
-        if rows.is_empty() {
-            return Ok(None);
-        }
-
-        let projection: Vec<usize> = match self.projection {
-            Some(ref v) => v.clone(),
-            None => self
-                .schema
-                .fields()
-                .iter()
-                .enumerate()
-                .map(|(i, _)| i)
-                .collect(),
-        };
-
-        let rows = &rows[..];
-        let arrays: Result<Vec<ArrayRef>> = projection
-            .iter()
-            .map(|i| {
-                let field = self.schema.field(*i);
-                match field.data_type() {
-                    &DataType::Boolean => build_primitive_array::<BooleanType>(rows, i),
-                    &DataType::Int8 => build_primitive_array::<Int8Type>(rows, i),
-                    &DataType::Int16 => build_primitive_array::<Int16Type>(rows, i),
-                    &DataType::Int32 => build_primitive_array::<Int32Type>(rows, i),
-                    &DataType::Int64 => build_primitive_array::<Int64Type>(rows, i),
-                    &DataType::UInt8 => build_primitive_array::<UInt8Type>(rows, i),
-                    &DataType::UInt16 => build_primitive_array::<UInt16Type>(rows, i),
-                    &DataType::UInt32 => build_primitive_array::<UInt32Type>(rows, i),
-                    &DataType::UInt64 => build_primitive_array::<UInt64Type>(rows, i),
-                    &DataType::Float32 => build_primitive_array::<Float32Type>(rows, i),
-                    &DataType::Float64 => build_primitive_array::<Float64Type>(rows, i),
-                    &DataType::Utf8 => {
-                        let mut values_builder: UInt8Builder = UInt8Builder::new(rows.len() as i64);
-                        let mut list_builder = ListArrayBuilder::new(values_builder);
-                        for row_index in 0..rows.len() {
-                            match rows[row_index].get(*i) {
-                                Some(s) => {
-                                    list_builder.values().push_slice(s.as_bytes()).unwrap();
-                                    list_builder.append(true).unwrap();
-                                }
-                                _ => {
-                                    list_builder.append(false).unwrap();
-                                }
-                            }
-                        }
-                        Ok(Arc::new(BinaryArray::from(list_builder.finish())) as ArrayRef)
-                    }
-                    other => Err(ArrowError::ParseError(format!(
-                        "Unsupported data type {:?}",
-                        other
-                    ))),
-                }
-            })
-            .collect();
-
-        match arrays {
-            Ok(arr) => Ok(Some(RecordBatch::new(self.schema.clone(), arr))),
-            Err(e) => Err(e),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use array::*;
-    use datatypes::Field;
-
-    #[test]
-    fn test_csv() {
-        let schema = Schema::new(vec![
-            Field::new("city", DataType::Utf8, false),
-            Field::new("lat", DataType::Float64, false),
-            Field::new("lng", DataType::Float64, false),
-        ]);
-
-        let file = File::open("test/data/uk_cities.csv").unwrap();
-
-        let mut csv = Reader::new(file, Arc::new(schema), false, 1024, None);
-        let batch = csv.next().unwrap().unwrap();
-        assert_eq!(37, batch.num_rows());
-        assert_eq!(3, batch.num_columns());
-
-        // access data from a primitive array
-        let lat = batch
-            .column(1)
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .unwrap();
-        assert_eq!(57.653484, lat.value(0));
-
-        // access data from a string array (ListArray<u8>)
-        let city = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<BinaryArray>()
-            .unwrap();
-
-        let city_name: String = String::from_utf8(city.get_value(13).to_vec()).unwrap();
-
-        assert_eq!("Aberdeen, Aberdeen City, UK", city_name);
-    }
-
-    #[test]
-    fn test_csv_with_projection() {
-        let schema = Schema::new(vec![
-            Field::new("city", DataType::Utf8, false),
-            Field::new("lat", DataType::Float64, false),
-            Field::new("lng", DataType::Float64, false),
-        ]);
-
-        let file = File::open("test/data/uk_cities.csv").unwrap();
-
-        let mut csv = Reader::new(file, Arc::new(schema), false, 1024, Some(vec![0, 1]));
-        let batch = csv.next().unwrap().unwrap();
-        assert_eq!(37, batch.num_rows());
-        assert_eq!(2, batch.num_columns());
-    }
-
-    #[test]
-    fn test_nulls() {
-        let schema = Schema::new(vec![
-            Field::new("c_int", DataType::UInt64, false),
-            Field::new("c_float", DataType::Float32, false),
-            Field::new("c_string", DataType::Utf8, false),
-        ]);
-
-        let file = File::open("test/data/null_test.csv").unwrap();
-
-        let mut csv = Reader::new(file, Arc::new(schema), true, 1024, None);
-        let batch = csv.next().unwrap().unwrap();
-
-        assert_eq!(false, batch.column(1).is_null(0));
-        assert_eq!(false, batch.column(1).is_null(1));
-        assert_eq!(true, batch.column(1).is_null(2));
-        assert_eq!(false, batch.column(1).is_null(3));
-        assert_eq!(false, batch.column(1).is_null(4));
-    }
-
-}
diff --git a/rust/test/data/null_test.csv b/rust/test/data/null_test.csv
deleted file mode 100644
index 80830606563b3..0000000000000
--- a/rust/test/data/null_test.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-c_int,c_float,c_string,c_bool
-1,1.1,"1.11",true
-2,2.2,"2.22",true
-3,,"3.33",true
-4,4.4,,false
-5,6.6,"",false
\ No newline at end of file
diff --git a/site/README.md b/site/README.md
index 62d060a2e0b45..7a287ec84dddd 100644
--- a/site/README.md
+++ b/site/README.md
@@ -17,32 +17,53 @@
   under the License.
 -->
 
-## Apache Arrow Website
+# Apache Arrow Website
 
-### Development instructions
+## Overview
 
-If you are planning to publish the website, you must first clone the arrow-site
-git repository:
+Site content is maintained in the main Arrow repository, mostly in markdown
+format. [Jekyll](https://jekyllrb.com/) is used to generate HTML files that can
+then be committed to the [arrow-site](https://github.com/apache/arrow-site)
+repository.
 
-```shell
-git clone --branch=asf-site https://git-wip-us.apache.org/repos/asf/arrow-site.git asf-site
-```
+## Prerequisites
 
-Now, with Ruby >= 2.1 installed, run:
+With Ruby >= 2.1 installed, run the following commands to install
+[Jekyll](https://jekyllrb.com/).
 
 ```shell
 gem install jekyll bundler
 bundle install
+```
+
+If you are planning to publish the website, you must clone the arrow-site git
+repository. Run this command from the `site` directory so that `asf-site` is a
+subdirectory of `site`.
+
+```shell
+git clone --branch=asf-site https://github.com/apache/arrow-site.git asf-site
+```
 
-# This imports the format Markdown documents so they will be rendered
+Also, from the `site` directory, run the following command to import the format
+markdown documents so that they will rendered.
+
+```
 scripts/sync_format_docs.sh
+```
 
+## Previewing the site
+
+From the `site` directory, run the following to generate HTML files and run the
+web site locally.
+
+```
 bundle exec jekyll serve
 ```
 
-### Publishing
+## Publishing
 
-After following the above instructions the base `site/` directory, run:
+After following the above instructions, run the following commands from the
+`site` directory:
 
 ```shell
 JEKYLL_ENV=production bundle exec jekyll build
diff --git a/site/_data/contributors.yml b/site/_data/contributors.yml
index 9289542fd8d68..c49230738222a 100644
--- a/site/_data/contributors.yml
+++ b/site/_data/contributors.yml
@@ -25,6 +25,10 @@
   apacheId: uwe
   githubId: xhochy
   role: PMC
+- name: Jacques Nadeau
+  apacheId: jacques
+  githubId: jacques-n
+  role: PMC
 - name: Julien Le Dem
   apacheId: julienledem
   githubId: julienledem
diff --git a/site/_data/versions.yml b/site/_data/versions.yml
index 8e162744c501c..39c8e5dae1047 100644
--- a/site/_data/versions.yml
+++ b/site/_data/versions.yml
@@ -17,16 +17,16 @@
 # Blogs and other pages use this data
 #
 current:
-  number: '0.11.1'
-  pinned_number: '0.11.*'
-  date: '19 October 2018'
-  git-tag: 'b65beb6'
-  github-tag-link: 'https://github.com/apache/arrow/releases/tag/apache-arrow-0.11.1'
-  release-notes: 'http://arrow.apache.org/release/0.11.1.html'
-  mirrors: 'https://www.apache.org/dyn/closer.cgi/arrow/arrow-0.11.1/'
-  tarball_name: 'apache-arrow-0.11.1.tar.gz'
-  mirrors-tar: 'https://www.apache.org/dyn/closer.cgi/arrow/arrow-0.11.1/apache-arrow-0.11.1.tar.gz'
-  java-artifacts: 'http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.arrow%22%20AND%20v%3A%220.11.1%22'
-  asc: 'https://www.apache.org/dist/arrow/arrow-0.11.1/apache-arrow-0.11.1.tar.gz.asc'
-  sha256: 'https://www.apache.org/dist/arrow/arrow-0.11.1/apache-arrow-0.11.1.tar.gz.sha256'
-  sha512: 'https://www.apache.org/dist/arrow/arrow-0.11.1/apache-arrow-0.11.1.tar.gz.sha512'
+  number: '0.12.0'
+  pinned_number: '0.12.*'
+  date: '20 January 2019'
+  git-tag: '8ca41384b5324bfd0ef3d3ed3f728e1d10ed73f0'
+  github-tag-link: 'https://github.com/apache/arrow/releases/tag/apache-arrow-0.12.0'
+  release-notes: 'http://arrow.apache.org/release/0.12.0.html'
+  mirrors: 'https://www.apache.org/dyn/closer.cgi/arrow/arrow-0.12.0/'
+  tarball_name: 'apache-arrow-0.12.0.tar.gz'
+  mirrors-tar: 'https://www.apache.org/dyn/closer.cgi/arrow/arrow-0.12.0/apache-arrow-0.12.0.tar.gz'
+  java-artifacts: 'http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.arrow%22%20AND%20v%3A%220.12.0%22'
+  asc: 'https://www.apache.org/dist/arrow/arrow-0.12.0/apache-arrow-0.12.0.tar.gz.asc'
+  sha256: 'https://www.apache.org/dist/arrow/arrow-0.12.0/apache-arrow-0.12.0.tar.gz.sha256'
+  sha512: 'https://www.apache.org/dist/arrow/arrow-0.12.0/apache-arrow-0.12.0.tar.gz.sha512'
diff --git a/site/_includes/footer.html b/site/_includes/footer.html
index bf7701d5c3314..25dfac4f1b961 100644
--- a/site/_includes/footer.html
+++ b/site/_includes/footer.html
@@ -1,6 +1,6 @@
 <hr/>
 <footer class="footer">
   <p>Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p>
-  <p>&copy; 2017 Apache Software Foundation</p>
+  <p>&copy; 2016-2019 The Apache Software Foundation</p>
   {% asset main.js %}
 </footer>
diff --git a/site/_includes/header.html b/site/_includes/header.html
index e0f23ecd24e30..2abf60f9da782 100644
--- a/site/_includes/header.html
+++ b/site/_includes/header.html
@@ -54,8 +54,9 @@
              Documentation
           </a>
           <div class="dropdown-menu" aria-labelledby="navbarDropdownDocumentation">
+            <a class="dropdown-item" href="{{ site.baseurl }}/docs">Project Docs</a>
             <a class="dropdown-item" href="{{ site.baseurl }}/docs/python">Python</a>
-            <a class="dropdown-item" href="{{ site.baseurl }}/docs/cpp">C++ API</a>
+            <a class="dropdown-item" href="{{ site.baseurl }}/docs/cpp">C++</a>
             <a class="dropdown-item" href="{{ site.baseurl }}/docs/java">Java API</a>
             <a class="dropdown-item" href="{{ site.baseurl }}/docs/c_glib">C GLib API</a>
             <a class="dropdown-item" href="{{ site.baseurl }}/docs/js">Javascript API</a>
@@ -77,9 +78,14 @@
           </div>
         </li>
       </ul>
-      <a href="http://www.apache.org/" class="flex-row ml-md-auto">
-        <img style="float:right;" src="{{ site.baseurl }}/img/asf_logo.svg" width="120px"/>
-      </a>
+      <div class="flex-row justify-content-end ml-md-auto">
+        <a class="d-sm-none d-md-inline pr-2" href="https://www.apache.org/events/current-event.html">
+          <img src="https://www.apache.org/events/current-event-234x60.png"/>
+        </a>
+        <a href="http://www.apache.org/">
+          <img src="{{ site.baseurl }}/img/asf_logo.svg" width="120px"/>
+        </a>
+      </div>
       </div><!-- /.navbar-collapse -->
     </div>
   </nav>
diff --git a/site/_posts/2018-12-05-gandiva-donation.md b/site/_posts/2018-12-05-gandiva-donation.md
new file mode 100644
index 0000000000000..ab12e4d80801d
--- /dev/null
+++ b/site/_posts/2018-12-05-gandiva-donation.md
@@ -0,0 +1,93 @@
+---
+layout: post
+title: "Gandiva: A LLVM-based Analytical Expression Compiler for Apache Arrow"
+date: "2018-12-05 00:00:00 -0500"
+author: jacques
+categories: [application]
+---
+<!--
+{% comment %}
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to you under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+{% endcomment %}
+-->
+
+Today we're happy to announce that the Gandiva Initiative for Apache Arrow, an
+LLVM-based execution kernel, is now part of the Apache Arrow project. Gandiva
+was kindly donated by [Dremio](https://www.dremio.com/), where it was
+originally developed and open-sourced. Gandiva extends Arrow's capabilities to
+provide high performance analytical execution and is composed of two main
+components:
+
+* A runtime expression compiler leveraging LLVM
+
+* A high performance execution environment
+
+Gandiva works as follows: applications submit an expression tree to the
+compiler, built in a language agnostic protobuf-based expression
+representation. From there, Gandiva then compiles the expression tree to native
+code for the current runtime environment and hardware. Once compiled, the
+Gandiva execution kernel then consumes and produces Arrow columnar batches. The
+generated code is highly optimized for parallel processing on modern CPUs. For
+example, on AVX-128 processors Gandiva can process 8 pairs of 2 byte values in
+a single vectorized operation, and on AVX-512 processors Gandiva can process 4x
+as many values in a single operation. Gandiva is built from the ground up to
+understand Arrow's in-memory representation and optimize processing against it.
+
+While Gandiva is just starting within the Arrow community, it already supports
+hundreds of [expressions][1], ranging from math functions to case
+statements. Gandiva was built as a standalone C++ library built on top of the
+core Apache Arrow codebase and was donated with C++ and Java APIs construction
+and execution APIs for projection and filtering operations. The Arrow community
+is already looking to expand Gandiva's capabilities. This will include
+incorporating more operations and supporting many new language bindings. As an
+example, multiple community members are already actively building new language
+bindings that allow use of Gandiva within Python and Ruby.
+
+While young within the Arrow community, Gandiva is already shipped and used in
+production by many Dremio customers as part of Dremio's execution
+engine. Experiments have demonstrated [70x performance improvement][2] on many
+SQL queries. We expect to see similar performance gains for many other projects
+that leverage Arrow.
+
+The Arrow community is working to ship the first formal Apache Arrow release
+that includes Gandiva, and we hope this will be available within the next
+couple months. This should make it much easier for the broader analytics and
+data science development communities to leverage runtime code generation for
+high-performance data processing in a variety of contexts and projects.
+
+We started the Arrow project a couple of years ago with the objective of
+creating an industry-standard columnar in-memory data representation for
+analytics. Within this short period of time, Apache Arrow has been adopted by
+dozens of both open source and commercial software products. Some key examples
+include technologies such as Apache Spark, Pandas, Nvidia RAPIDS, Dremio, and
+InfluxDB. This success has driven Arrow to now be downloaded more than 1
+million times per month. Over 200 developers have already contributed to Apache
+Arrow. If you're interested in contributing to Gandiva or any other part of the
+Apache Arrow project, feel free to reach out on the mailing list and join us!
+
+For additional technical details on Gandiva, you can check out some of the
+following resources:
+
+* [https://www.dremio.com/announcing-gandiva-initiative-for-apache-arrow/](https://www.dremio.com/announcing-gandiva-initiative-for-apache-arrow/)
+
+* [https://www.dremio.com/gandiva-performance-improvements-production-query/](https://www.dremio.com/gandiva-performance-improvements-production-query/)
+
+* [https://www.dremio.com/webinars/vectorized-query-processing-apache-arrow/](https://www.dremio.com/webinars/vectorized-query-processing-apache-arrow/)
+
+* [https://www.dremio.com/adding-a-user-define-function-to-gandiva/](https://www.dremio.com/adding-a-user-define-function-to-gandiva/)
+
+[1]: https://github.com/apache/arrow/blob/master/cpp/src/gandiva/function_registry.cc
+[2]: https://www.dremio.com/gandiva-performance-improvements-production-query/
\ No newline at end of file
diff --git a/site/_posts/2019-01-21-0.12.0-release.md b/site/_posts/2019-01-21-0.12.0-release.md
new file mode 100644
index 0000000000000..eef865c06f0b5
--- /dev/null
+++ b/site/_posts/2019-01-21-0.12.0-release.md
@@ -0,0 +1,184 @@
+---
+layout: post
+title: "Apache Arrow 0.12.0 Release"
+date: "2019-01-21 07:00:00 -0600"
+author: wesm
+categories: [release]
+---
+<!--
+{% comment %}
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to you under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+{% endcomment %}
+-->
+
+The Apache Arrow team is pleased to announce the 0.12.0 release. This is the
+largest release yet in the project, covering 3 months of development work and
+includes [**614 resolved issues**][1] from [**77 distinct contributors**][22].
+
+See the [Install Page][2] to learn how to get the libraries for your
+platform. The [complete changelog][3] is also available.
+
+It's a huge release, but we'll give some brief highlights and new from the
+project to help guide you to the parts of the project that may be of interest.
+
+## New committers and PMC member
+
+The Arrow team is growing! Since the 0.11.0 release we have added 3 new
+committers:
+
+* [Sebastien Binet][13], who has mainly worked on the Go implementation
+* [Romain Francois][14], who has mainly worked on the R implementation
+* [Yosuke Shiro][15], who has mainly worked on the GLib (C) and Ruby
+  implementations
+
+We also pleased to announce that [Krisztián Szűcs][16] has been promoted
+from committer to PMC (Project Management Committee) member.
+
+Thank you for all your contributions!
+
+## Code donations
+
+Since the last release, we have received 3 code donations into the Apache
+project.
+
+* A [native C# .NET library][17] donated by Feyen Zylstra LLC.
+* A [Ruby library for Parquet files][18] which uses the existing GLib bindings to
+  the C++ Parquet library.
+* A [native Rust Parquet library][19]
+
+We are excited to continue to grow the Apache Arrow development community.
+
+## Combined project-level documentation
+
+Since the last release, we have merged the Python and C++ documentation to
+create a combined project-wide documentation site:
+https://arrow.apache.org/docs. There is now some prose documentation about many
+parts of the C++ library. We intend to keep adding documentation for other
+parts of Apache Arrow to this site.
+
+## Packages
+
+We start providing the official APT and Yum repositories for C++ and
+GLib (C). See the [install document][23] for details.
+
+## C++ notes
+
+Much of the C++ development work the last 3 months concerned internal code
+refactoring and performance improvements. Some user-visible highlights of note:
+
+* Experimental support for [in-memory sparse tensors (or ndarrays)][21], with
+  support for zero-copy IPC
+* Support for building on Alpine Linux
+* Significantly improved hash table utilities, with improved hash table
+  performance in many parts of the library
+* IO library improvements for both read and write buffering
+* A fast [trie implementation][20] for string searching
+* Many improvements to the parallel CSV reader in performance and features. See
+  the changelog
+
+Since the LLVM-based Gandiva expression compiler was donated to Apache Arrow
+during the last release cycle, development there has been moving along. We
+expect to have Windows support for Gandiva and to ship this in downstream
+packages (like Python) in the 0.13 release time frame.
+
+## Go notes
+
+The Arrow Go development team has been expanding. The Go library has gained
+support for many missing features from the columnar format as well as semantic
+constructs like chunked arrays and tables that are used heavily in the C++
+project.
+
+## GLib and Ruby notes
+
+Development of the GLib-based C bindings and corresponding Ruby interfaces have
+advanced in lock-step with the C++, Python, and R libraries. In this release,
+there are many new features in C and Ruby:
+
+* Compressed file read/write support
+* Support for using the C++ parallel CSV reader
+* Feather file support
+* Gandiva bindings
+* Plasma bindings
+
+## Python notes
+
+We fixed a ton of bugs and made many improvements throughout the Python
+project. Some highlights from the Python side include:
+
+* Python 3.7 support: wheels and conda packages are now available for Python
+  3.7
+* Substantially improved memory use when converting strings types to pandas
+  format, including when reading Parquet files. Parquet users should notice
+  significantly lower memory use in common use cases
+* Support for reading and writing compressed files, can be used for CSV files,
+  IPC, or any other form of IO
+* The new `pyarrow.input_stream` and `pyarrow.output_stream` functions support
+  read and write buffering. This is analogous to `BufferedIOBase` from the
+  Python standard library, but the internals are implemented natively in C++.
+* Gandiva (LLVM expression compiler) bindings, though not yet available in
+  pip/conda yet. Look for this in 0.13.0.
+* Many improvements to Arrow CUDA integration, including interoperability with
+  Numba
+
+## R notes
+
+The R library made huge progress in 0.12, with work led by new committer Romain
+Francois. The R project's features are not far behind the Python library, and
+we are hoping to be able to make the R library available to CRAN users for use
+with Apache Spark or for reading and writing Parquet files over the next
+quarter.
+
+Users of the `feather` R library will see significant speed increases in many
+cases when reading Feather files with the new Arrow R library.
+
+## Rust notes
+
+Rust development had an active last 3 months; see the changelog for details.
+
+A native Rust implementation was just donated to the project, and the community
+intends to provide a similar level of functionality for reading and writing
+Parquet files using the Arrow in-memory columnar format as an intermediary.
+
+## Upcoming Roadmap, Outlook for 2019
+
+Apache Arrow has become a large, diverse open source project. It is now being
+used in dozens of downstream open source and commercial projects. Work will be
+proceeding in many areas in 2019:
+
+* Development of in-memory query execution engines (e.g. in C++, Rust)
+* Expanded support for reading and writing the Apache Parquet format, and other
+  common data formats like Apache Avro, CSV, JSON, and Apache ORC.
+* New Flight RPC system for fast messaging of Arrow datasets
+* Expanded support in existing programming languages
+* New programming language bindings or native implementations
+
+It promises to be an exciting 2019. We look forward to having you involved in
+the development community.
+
+[1]: https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20status%20in%20(Resolved%2C%20Closed)%20AND%20fixVersion%20%3D%200.12.0
+[2]: https://arrow.apache.org/install
+[3]: https://arrow.apache.org/release/0.12.0.html
+[13]: https://github.com/sbinet
+[14]: https://github.com/romainfrancois
+[15]: https://github.com/shiro615
+[16]: https://github.com/kszucs
+[17]: http://incubator.apache.org/ip-clearance/arrow-csharp-library.html
+[18]: http://incubator.apache.org/ip-clearance/arrow-parquet-ruby.html
+[19]: http://incubator.apache.org/ip-clearance/arrow-parquet-rust.html
+[20]: https://github.com/apache/arrow/blob/master/cpp/src/arrow/util/trie.h
+[21]: https://github.com/apache/arrow/blob/master/cpp/src/arrow/sparse_tensor.h
+[22]: https://arrow.apache.org/release/0.12.0.html#contributors
+[23]: https://arrow.apache.org/install/
diff --git a/site/_posts/2019-01-25-r-spark-improvements.md b/site/_posts/2019-01-25-r-spark-improvements.md
new file mode 100644
index 0000000000000..23fba426f4dc3
--- /dev/null
+++ b/site/_posts/2019-01-25-r-spark-improvements.md
@@ -0,0 +1,198 @@
+---
+layout: post
+title: "Speeding up R and Apache Spark using Apache Arrow"
+date: "2019-01-25 00:00:00 -0600"
+author: javierluraschi
+categories: [application]
+---
+<!--
+{% comment %}
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to you under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+{% endcomment %}
+-->
+
+*[Javier Luraschi][1] is a software engineer at [RStudio][2]*
+
+Support for Apache Arrow in Apache Spark with R is currently under active
+development in the [sparklyr][3] and [SparkR][4] projects. This post explores early, yet
+promising, performance improvements achieved when using R with [Apache Spark][5],
+Arrow and `sparklyr`.
+
+# Setup
+
+Since this work is under active development, install `sparklyr` and
+`arrow` from GitHub as follows:
+
+```r
+devtools::install_github("apache/arrow", subdir = "r", ref = "apache-arrow-0.12.0")
+devtools::install_github("rstudio/sparklyr", ref = "apache-arrow-0.12.0")
+```
+
+In this benchmark, we will use [dplyr][6], but similar improvements can
+be  expected from using [DBI][7], or [Spark DataFrames][8] in `sparklyr`.
+The local Spark connection and dataframe with 10M numeric rows was
+initialized as follows:
+
+```r
+library(sparklyr)
+library(dplyr)
+
+sc <- spark_connect(master = "local", config = list("sparklyr.shell.driver-memory" = "6g"))
+data <- data.frame(y = runif(10^7, 0, 1))
+```
+
+# Copying
+
+Currently, copying data to Spark using `sparklyr` is performed by persisting
+data on-disk from R and reading it back from Spark. This was meant to be used
+for small datasets since there are better tools to transfer data into
+distributed storage systems. Nevertheless, many users have requested support to
+transfer more data at fast speeds into Spark.
+
+Using `arrow` with `sparklyr`, we can transfer data directly from R to
+Spark without having to serialize this data in R or persist in disk.
+
+The following example copies 10M rows from R into Spark using `sparklyr`
+with and without `arrow`, there is close to a 16x improvement using `arrow`.
+
+This benchmark uses the [microbenchmark][9] R package, which runs code
+multiple times, provides stats on total execution time and plots each
+excecution time to understand the distribution over each iteration.
+
+```r
+microbenchmark::microbenchmark(
+  setup = library(arrow),
+  arrow_on = {
+    sparklyr_df <<- copy_to(sc, data, overwrite = T)
+    count(sparklyr_df) %>% collect()
+  },
+  arrow_off = {
+    if ("arrow" %in% .packages()) detach("package:arrow")
+    sparklyr_df <<- copy_to(sc, data, overwrite = T)
+    count(sparklyr_df) %>% collect()
+  },
+  times = 10
+) %T>% print() %>% ggplot2::autoplot()
+```
+```
+ Unit: seconds
+      expr       min        lq       mean    median         uq       max neval
+  arrow_on  3.011515  4.250025   7.257739  7.273011   8.974331  14.23325    10
+ arrow_off 50.051947 68.523081 119.946947 71.898908 138.743419 390.44028    10
+```
+
+<div align="center">
+<img src="{{ site.base-url }}/img/arrow-r-spark-copying.png"
+     alt="Copying data with R into Spark with and without Arrow"
+     width="60%" class="img-responsive">
+</div>
+
+# Collecting
+
+Similarly, `arrow` with `sparklyr` can now avoid deserializing data in R
+while collecting data from Spark into R. These improvements are not as
+significant as copying data since, `sparklyr` already collects data in
+columnar format.
+
+The following benchmark collects 10M rows from Spark into R and shows that
+`arrow` can bring 3x improvements.
+
+```r
+microbenchmark::microbenchmark(
+  setup = library(arrow),
+  arrow_on = {
+    collect(sparklyr_df)
+  },
+  arrow_off = {
+    if ("arrow" %in% .packages()) detach("package:arrow")
+    collect(sparklyr_df)
+  },
+  times = 10
+) %T>% print() %>% ggplot2::autoplot()
+```
+```
+Unit: seconds
+      expr      min        lq      mean    median        uq       max neval
+  arrow_on 4.520593  5.609812  6.154509  5.928099  6.217447  9.432221    10
+ arrow_off 7.882841 13.358113 16.670708 16.127704 21.051382 24.373331    10
+```
+
+<div align="center">
+<img src="{{ site.base-url }}/img/arrow-r-spark-collecting.png"
+     alt="Collecting data with R from Spark with and without Arrow"
+     width="60%" class="img-responsive">
+</div>
+
+# Transforming
+
+Today, custom transformations of data using R functions are performed in
+`sparklyr` by moving data in row-format from Spark into an R process through a
+socket connection, transferring data in row-format is inefficient since
+multiple data types need to be deserialized over each row, then the data gets
+converted to columnar format (R was originally designed to use columnar data),
+once R finishes this computation, data is again converted to row-format,
+serialized row-by-row and then sent back to Spark over the socket connection.
+
+By adding support for `arrow` in `sparklyr`, it makes Spark perform the
+row-format to column-format conversion in parallel in Spark. Data
+is then transferred through the socket but no custom serialization takes place.
+All the R process needs to do is copy this data from the socket into its heap,
+transform it and copy it back to the socket connection.
+
+The following example transforms 100K rows with and without `arrow` enabled,
+`arrow` makes transformation with R functions close to 41x faster.
+
+```r
+microbenchmark::microbenchmark(
+  setup = library(arrow),
+  arrow_on = {
+    sample_n(sparklyr_df, 10^5) %>% spark_apply(~ .x / 2) %>% count()
+  },
+  arrow_off = {
+    if ("arrow" %in% .packages()) detach("package:arrow")
+    sample_n(sparklyr_df, 10^5) %>% spark_apply(~ .x / 2) %>% count()
+  },
+  times = 10
+) %T>% print() %>% ggplot2::autoplot()
+```
+```
+Unit: seconds
+      expr        min         lq       mean     median         uq        max neval
+  arrow_on   3.881293   4.038376   5.136604   4.772739   5.759082   7.873711    10
+ arrow_off 178.605733 183.654887 213.296238 227.182018 233.601885 238.877341    10
+ ```
+
+<div align="center">
+<img src="{{ site.base-url }}/img/arrow-r-spark-transforming.png"
+     alt="Transforming data with R in Spark with and without Arrow"
+     width="60%" class="img-responsive">
+</div>
+
+Additional benchmarks and fine-tuning parameters can be found under `sparklyr`
+[/rstudio/sparklyr/pull/1611][10] and `SparkR` [/apache/spark/pull/22954][11]. Looking forward to bringing this feature
+to the Spark, Arrow and R communities.
+
+[1]: https://github.com/javierluraschi
+[2]: https://rstudio.com
+[3]: https://github.com/rstudio/sparklyr
+[4]: https://spark.apache.org/docs/latest/sparkr.html
+[5]: https://spark.apache.org
+[6]: https://dplyr.tidyverse.org
+[7]: https://cran.r-project.org/package=DBI
+[8]: https://spark.rstudio.com/reference/#section-spark-dataframes
+[9]: https://CRAN.R-project.org/package=microbenchmark
+[10]: https://github.com/rstudio/sparklyr/pull/1611
+[11]: https://github.com/apache/spark/pull/22954
diff --git a/site/_release/0.12.0.md b/site/_release/0.12.0.md
new file mode 100644
index 0000000000000..5a1306b76c780
--- /dev/null
+++ b/site/_release/0.12.0.md
@@ -0,0 +1,736 @@
+---
+layout: default
+title: Apache Arrow 0.12.0 Release
+permalink: /release/0.12.0.html
+---
+<!--
+{% comment %}
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to you under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+{% endcomment %}
+-->
+
+# Apache Arrow 0.12.0 (20 January 2019)
+
+This is a major release covering more than 3 months of development.
+
+## Download
+
+* [**Source Artifacts**][1]
+* [**Binary Artifacts**][2]
+* [Git tag 8ca41384b5324bfd0ef3d3ed3f728e1d10ed73f0][3]
+
+## Contributors
+
+This release includes 601 commits from 77 distinct contributors.
+
+```shell
+$ git shortlog -sn apache-arrow-0.11.0..apache-arrow-0.12.0
+    94  Kouhei Sutou
+    76  Wes McKinney
+    61  Antoine Pitrou
+    60  Krisztián Szűcs
+    25  Pindikura Ravindra
+    25  Yosuke Shiro
+    21  Romain Francois
+    17  Chao Sun
+    16  Andy Grove
+    15  Korn, Uwe
+    15  Sebastien Binet
+    13  Paddy Horan
+    13  Philipp Moritz
+    10  François Saint-Jacques
+     9  Praveen
+     8  Robert Nishihara
+     7  Uwe L. Korn
+     6  Brian Hulette
+     6  Bryan Cutler
+     6  praveenbingo
+     6  suquark
+     5  Javier Luraschi
+     5  Kousuke Saruta
+     4  Tanya Schlusser
+     3  Benjamin Kietzman
+     3  Dimitri Vorona
+     3  James Lamb
+     3  Kenta Murata
+     3  Neville Dipale
+     3  Yuhong Guo
+     3  alexandreyc
+     2  Animesh Trivedi
+     2  Deepak Majeti
+     2  Dmitry Kalinkin
+     2  Hatem Helal
+     2  Kengo Seki
+     2  Micah Kornfield
+     2  Pearu Peterson
+     2  Yasuo Honda
+     2  jlapacik
+     2  ptaylor
+     2  shyam
+     1  Antonio Cavallo
+     1  Binyang2014
+     1  Bradley Skaggs
+     1  Brent Kerby
+     1  Bruno P. Kinoshita
+     1  Christopher Hutchinson
+     1  Dmitry Vukolov
+     1  Dominik Moritz
+     1  Dustin Long
+     1  Jacques Nadeau
+     1  Jeffrey Wong
+     1  Joris Van den Bossche
+     1  Justin Dunham
+     1  Li Jin
+     1  Renjie Liu
+     1  ShubhamChaurasia
+     1  Siddharth Dave
+     1  Suvayu Ali
+     1  Sweeney, Mack
+     1  Sylvain Joubert
+     1  Tim Paine
+     1  Todd Mostak
+     1  Vivekanand Vellanki
+     1  Yevgeni Litvin
+     1  Yuqi Gu
+     1  Zhenyuan Zhao
+     1  andrew
+     1  c-bata
+     1  cav71
+     1  kabukawa
+     1  m.wang
+     1  minmin.fmm
+     1  philjdf
+     1  sarutak
+     1  siddharth
+     1  zeroshade
+```
+
+# Patch Committers
+
+The following Apache committers merged contributed patches to the repository.
+
+```
+$ git shortlog -csn apache-arrow-0.11.0..apache-arrow-0.12.0
+   297  Wes McKinney
+    79  Kouhei Sutou
+    76  Krisztián Szűcs
+    35  Antoine Pitrou
+    25  Korn, Uwe
+    25  Yosuke Shiro
+    16  Philipp Moritz
+    15  Andy Grove
+    14  Uwe L. Korn
+     6  Robert Nishihara
+     4  Brian Hulette
+     4  Sidd
+     3  Bryan Cutler
+     3  GitHub
+```
+
+# Changelog
+
+## New Features and Improvements
+
+* [ARROW-1019](https://issues.apache.org/jira/browse/ARROW-1019) - [C++] Implement input stream and output stream with Gzip codec
+* [ARROW-1423](https://issues.apache.org/jira/browse/ARROW-1423) - [C++] Create non-owned CudaContext from context handle provided by thirdparty user
+* [ARROW-1492](https://issues.apache.org/jira/browse/ARROW-1492) - [C++] Type casting function kernel suite
+* [ARROW-1688](https://issues.apache.org/jira/browse/ARROW-1688) - [Java] Fail build on checkstyle warnings
+* [ARROW-1696](https://issues.apache.org/jira/browse/ARROW-1696) - [C++] Add codec benchmarks
+* [ARROW-1993](https://issues.apache.org/jira/browse/ARROW-1993) - [Python] Add function for determining implied Arrow schema from pandas.DataFrame
+* [ARROW-2211](https://issues.apache.org/jira/browse/ARROW-2211) - [C++] Use simpler hash functions for integers
+* [ARROW-2216](https://issues.apache.org/jira/browse/ARROW-2216) - [CI] CI descriptions and envars are misleading
+* [ARROW-2337](https://issues.apache.org/jira/browse/ARROW-2337) - [Scripts] Windows release verification script should use boost DSOs instead of static linkage
+* [ARROW-2475](https://issues.apache.org/jira/browse/ARROW-2475) - [Format] Confusing array length description
+* [ARROW-2483](https://issues.apache.org/jira/browse/ARROW-2483) - [Rust] use bit-packing for boolean vectors
+* [ARROW-2504](https://issues.apache.org/jira/browse/ARROW-2504) - [Website] Add ApacheCon NA link
+* [ARROW-2535](https://issues.apache.org/jira/browse/ARROW-2535) - [Python] Provide pre-commit hooks that check flake8
+* [ARROW-2560](https://issues.apache.org/jira/browse/ARROW-2560) - [Rust] The Rust README should include Rust-specific information on contributing
+* [ARROW-2624](https://issues.apache.org/jira/browse/ARROW-2624) - [Python] Random schema and data generator for Arrow conversion and Parquet testing
+* [ARROW-2637](https://issues.apache.org/jira/browse/ARROW-2637) - [C++/Python] Build support and instructions for development on Alpine Linux
+* [ARROW-2653](https://issues.apache.org/jira/browse/ARROW-2653) - [C++] Refactor hash table support
+* [ARROW-2670](https://issues.apache.org/jira/browse/ARROW-2670) - [C++/Python] Add Ubuntu 18.04 / gcc7 as a nightly build
+* [ARROW-2673](https://issues.apache.org/jira/browse/ARROW-2673) - [Python] Add documentation + docstring for ARROW-2661
+* [ARROW-2684](https://issues.apache.org/jira/browse/ARROW-2684) - [Python] Various documentation improvements
+* [ARROW-2712](https://issues.apache.org/jira/browse/ARROW-2712) - [C#] Initial C# .NET library
+* [ARROW-2720](https://issues.apache.org/jira/browse/ARROW-2720) - [C++] Clean up cmake CXX\_STANDARD and PIC flag setting
+* [ARROW-2759](https://issues.apache.org/jira/browse/ARROW-2759) - Export notification socket of Plasma
+* [ARROW-2803](https://issues.apache.org/jira/browse/ARROW-2803) - [C++] Put hashing function into src/arrow/util
+* [ARROW-2807](https://issues.apache.org/jira/browse/ARROW-2807) - [Python] Enable memory-mapping to be toggled in get\_reader when reading Parquet files
+* [ARROW-2808](https://issues.apache.org/jira/browse/ARROW-2808) - [Python] Add unit tests for ProxyMemoryPool, enable new default MemoryPool to be constructed
+* [ARROW-2919](https://issues.apache.org/jira/browse/ARROW-2919) - [C++] Improve error message when listing empty HDFS file
+* [ARROW-2968](https://issues.apache.org/jira/browse/ARROW-2968) - [R] Multi-threaded conversion from Arrow table to R data.frame
+* [ARROW-3020](https://issues.apache.org/jira/browse/ARROW-3020) - [Python] Addition of option to allow empty Parquet row groups
+* [ARROW-3038](https://issues.apache.org/jira/browse/ARROW-3038) - [Go] add support for StringArray
+* [ARROW-3063](https://issues.apache.org/jira/browse/ARROW-3063) - [Go] move list of supported/TODO features to confluence
+* [ARROW-3070](https://issues.apache.org/jira/browse/ARROW-3070) - [Release] Host binary artifacts for RCs and releases on ASF Bintray account instead of dist/mirror system
+* [ARROW-3108](https://issues.apache.org/jira/browse/ARROW-3108) - [C++] arrow::PrettyPrint for Table instances
+* [ARROW-3126](https://issues.apache.org/jira/browse/ARROW-3126) - [Python] Make Buffered\* IO classes available to Python, incorporate into input\_stream, output\_stream factory functions
+* [ARROW-3131](https://issues.apache.org/jira/browse/ARROW-3131) - [Go] add test for Go-1.11
+* [ARROW-3161](https://issues.apache.org/jira/browse/ARROW-3161) - [Packaging] Ensure to run pyarrow unit tests in conda and wheel builds
+* [ARROW-3169](https://issues.apache.org/jira/browse/ARROW-3169) - [C++] Break array-test.cc and array.cc into multiple compilation units
+* [ARROW-3184](https://issues.apache.org/jira/browse/ARROW-3184) - [C++] Add modular build targets, "all" target, and require explicit target when invoking make or ninja
+* [ARROW-3194](https://issues.apache.org/jira/browse/ARROW-3194) - [Java] Fix setValueCount in spitAndTransfer for variable width vectors
+* [ARROW-3199](https://issues.apache.org/jira/browse/ARROW-3199) - [Plasma] Check for EAGAIN in recvmsg and sendmsg
+* [ARROW-3209](https://issues.apache.org/jira/browse/ARROW-3209) - [C++] Rename libarrow\_gpu to libarrow\_cuda
+* [ARROW-3230](https://issues.apache.org/jira/browse/ARROW-3230) - [Python] Missing comparisons on ChunkedArray, Table
+* [ARROW-3233](https://issues.apache.org/jira/browse/ARROW-3233) - [Python] Sphinx documentation for pyarrow.cuda GPU support
+* [ARROW-3248](https://issues.apache.org/jira/browse/ARROW-3248) - [C++] Arrow tests should have label "arrow"
+* [ARROW-3260](https://issues.apache.org/jira/browse/ARROW-3260) - [CI] Make linting a separate job
+* [ARROW-3272](https://issues.apache.org/jira/browse/ARROW-3272) - [Java] Document checkstyle deviations from Google style guide
+* [ARROW-3273](https://issues.apache.org/jira/browse/ARROW-3273) - [Java] checkstyle - fix javadoc style
+* [ARROW-3278](https://issues.apache.org/jira/browse/ARROW-3278) - [Python] Retrieve StructType's and StructArray's field by name
+* [ARROW-3291](https://issues.apache.org/jira/browse/ARROW-3291) - [C++] Convenience API for constructing arrow::io::BufferReader from std::string
+* [ARROW-3303](https://issues.apache.org/jira/browse/ARROW-3303) - [C++] Enable example arrays to be written with a simplified JSON representation
+* [ARROW-3306](https://issues.apache.org/jira/browse/ARROW-3306) - [R] Objects and support functions different kinds of arrow::Buffer
+* [ARROW-3307](https://issues.apache.org/jira/browse/ARROW-3307) - [R] Convert chunked arrow::Column to R vector
+* [ARROW-3310](https://issues.apache.org/jira/browse/ARROW-3310) - [R] Create wrapper classes for various Arrow IO interfaces
+* [ARROW-3312](https://issues.apache.org/jira/browse/ARROW-3312) - [R] Use same .clang-format file for both R binding C++ code and main C++ codebase
+* [ARROW-3318](https://issues.apache.org/jira/browse/ARROW-3318) - [C++] Convenience method for reading all batches from an IPC stream or file as arrow::Table
+* [ARROW-3323](https://issues.apache.org/jira/browse/ARROW-3323) - [Java] checkstyle - fix naming
+* [ARROW-3331](https://issues.apache.org/jira/browse/ARROW-3331) - [C++] Add re2 to ThirdpartyToolchain
+* [ARROW-3340](https://issues.apache.org/jira/browse/ARROW-3340) - [R] support for dates and time classes
+* [ARROW-3347](https://issues.apache.org/jira/browse/ARROW-3347) - [Rust] Implement PrimitiveArrayBuilder
+* [ARROW-3353](https://issues.apache.org/jira/browse/ARROW-3353) - [Packaging] Build python 3.7 wheels
+* [ARROW-3355](https://issues.apache.org/jira/browse/ARROW-3355) - [R] Support for factors
+* [ARROW-3358](https://issues.apache.org/jira/browse/ARROW-3358) - [Gandiva][C++] Replace usages of gandiva/status.h with arrow/status.h
+* [ARROW-3362](https://issues.apache.org/jira/browse/ARROW-3362) - [R] Guard against null buffers
+* [ARROW-3366](https://issues.apache.org/jira/browse/ARROW-3366) - [R] Dockerfile for docker-compose setup
+* [ARROW-3368](https://issues.apache.org/jira/browse/ARROW-3368) - [Integration/CI/Python] Add dask integration test to docker-compose setup
+* [ARROW-3380](https://issues.apache.org/jira/browse/ARROW-3380) - [Python] Support reading CSV files and more from a gzipped file
+* [ARROW-3381](https://issues.apache.org/jira/browse/ARROW-3381) - [C++] Implement InputStream for bz2 files
+* [ARROW-3383](https://issues.apache.org/jira/browse/ARROW-3383) - [Java] Run Gandiva tests in Travis CI
+* [ARROW-3384](https://issues.apache.org/jira/browse/ARROW-3384) - [Gandiva] Sync remaining commits from gandiva repo
+* [ARROW-3385](https://issues.apache.org/jira/browse/ARROW-3385) - [Java] [Gandiva] Deploy gandiva snapshot jars automatically
+* [ARROW-3387](https://issues.apache.org/jira/browse/ARROW-3387) - [C++] Function to cast binary to string/utf8 with UTF8 validation
+* [ARROW-3398](https://issues.apache.org/jira/browse/ARROW-3398) - [Rust] Update existing Builder to use MutableBuffer internally
+* [ARROW-3402](https://issues.apache.org/jira/browse/ARROW-3402) - [Gandiva][C++] Utilize common bitmap operation implementations in precompiled IR routines
+* [ARROW-3407](https://issues.apache.org/jira/browse/ARROW-3407) - [C++] Add UTF8 conversion modes in CSV reader conversion options
+* [ARROW-3409](https://issues.apache.org/jira/browse/ARROW-3409) - [C++] Add streaming compression interfaces
+* [ARROW-3421](https://issues.apache.org/jira/browse/ARROW-3421) - [C++] Add include-what-you-use setup to primary docker-compose.yml
+* [ARROW-3427](https://issues.apache.org/jira/browse/ARROW-3427) - [C++] Add Windows support, Unix static libs for double-conversion package in conda-forge
+* [ARROW-3429](https://issues.apache.org/jira/browse/ARROW-3429) - [Packaging] Add a script to release binaries that use source archive at dist.apache.orgtable bit
+* [ARROW-3430](https://issues.apache.org/jira/browse/ARROW-3430) - [Packaging] Add workaround to verify 0.11.0
+* [ARROW-3431](https://issues.apache.org/jira/browse/ARROW-3431) - [GLib] Include Gemfile to archive
+* [ARROW-3432](https://issues.apache.org/jira/browse/ARROW-3432) - [Packaging] Variables aren't expanded Subversion commit message
+* [ARROW-3439](https://issues.apache.org/jira/browse/ARROW-3439) - [R] R language bindings for Feather format
+* [ARROW-3440](https://issues.apache.org/jira/browse/ARROW-3440) - [Gandiva][C++] Remove outdated cpp/src/gandiva/README.md, add build documentation to cpp/README.md
+* [ARROW-3441](https://issues.apache.org/jira/browse/ARROW-3441) - [Gandiva][C++] Produce fewer test executables
+* [ARROW-3442](https://issues.apache.org/jira/browse/ARROW-3442) - [C++] Use dynamic linking for unit tests, ensure coverage working properly with clang
+* [ARROW-3450](https://issues.apache.org/jira/browse/ARROW-3450) - [R] Wrap MemoryMappedFile class
+* [ARROW-3451](https://issues.apache.org/jira/browse/ARROW-3451) - [Python] Allocate CUDA memory from a CUcontext created by numba.cuda
+* [ARROW-3455](https://issues.apache.org/jira/browse/ARROW-3455) - [Gandiva][C++] Support pkg-config for Gandiva
+* [ARROW-3456](https://issues.apache.org/jira/browse/ARROW-3456) - [CI] Reuse docker images and optimize docker-compose containers
+* [ARROW-3460](https://issues.apache.org/jira/browse/ARROW-3460) - [Packaging] Add a script to rebase master on local release branch
+* [ARROW-3461](https://issues.apache.org/jira/browse/ARROW-3461) - [Packaging] Add a script to upload RC artifacts as the official release
+* [ARROW-3462](https://issues.apache.org/jira/browse/ARROW-3462) - [Packaging] Update CHANGELOG for 0.11.0
+* [ARROW-3463](https://issues.apache.org/jira/browse/ARROW-3463) - [Website] Update for 0.11.0
+* [ARROW-3465](https://issues.apache.org/jira/browse/ARROW-3465) - [Documentation] Fix gen\_apidocs' docker image
+* [ARROW-3469](https://issues.apache.org/jira/browse/ARROW-3469) - [Gandiva] add travis entry for gandiva on OSX
+* [ARROW-3472](https://issues.apache.org/jira/browse/ARROW-3472) - [Gandiva] remove gandiva helpers library
+* [ARROW-3473](https://issues.apache.org/jira/browse/ARROW-3473) - [Format] Update Layout.md document to clarify use of 64-bit array lengths
+* [ARROW-3474](https://issues.apache.org/jira/browse/ARROW-3474) - [GLib] Extend gparquet API with get\_schema and read\_column
+* [ARROW-3479](https://issues.apache.org/jira/browse/ARROW-3479) - [R] Support to write record\_batch as stream
+* [ARROW-3482](https://issues.apache.org/jira/browse/ARROW-3482) - [C++] Build with JEMALLOC by default
+* [ARROW-3487](https://issues.apache.org/jira/browse/ARROW-3487) - [Gandiva] simplify NULL\_IF\_NULL functions that can return errors
+* [ARROW-3488](https://issues.apache.org/jira/browse/ARROW-3488) - [Packaging] Separate crossbow task definition files for packaging and tests
+* [ARROW-3489](https://issues.apache.org/jira/browse/ARROW-3489) - [Gandiva] Support for in expressions
+* [ARROW-3490](https://issues.apache.org/jira/browse/ARROW-3490) - [R] streaming arrow objects to output streams
+* [ARROW-3492](https://issues.apache.org/jira/browse/ARROW-3492) - [C++] Build jemalloc in parallel
+* [ARROW-3493](https://issues.apache.org/jira/browse/ARROW-3493) - [Java] Document BOUNDS\_CHECKING\_ENABLED
+* [ARROW-3499](https://issues.apache.org/jira/browse/ARROW-3499) - [R] Expose arrow::ipc::Message type
+* [ARROW-3501](https://issues.apache.org/jira/browse/ARROW-3501) - [Gandiva] Enable building with gcc 4.8.x on Ubuntu Trusty, similar distros
+* [ARROW-3504](https://issues.apache.org/jira/browse/ARROW-3504) - [Plasma] Add support for Plasma Client to put/get raw bytes without pyarrow serialization.
+* [ARROW-3505](https://issues.apache.org/jira/browse/ARROW-3505) - [R] Read record batch and table
+* [ARROW-3506](https://issues.apache.org/jira/browse/ARROW-3506) - [Packaging] Nightly tests for docker-compose images
+* [ARROW-3515](https://issues.apache.org/jira/browse/ARROW-3515) - Introduce NumericTensor class
+* [ARROW-3518](https://issues.apache.org/jira/browse/ARROW-3518) - [C++] Detect HOMEBREW\_PREFIX automatically
+* [ARROW-3519](https://issues.apache.org/jira/browse/ARROW-3519) - [Gandiva] Add support for functions that can return variable len output
+* [ARROW-3521](https://issues.apache.org/jira/browse/ARROW-3521) - [GLib] Run Python using find\_program in meson.build
+* [ARROW-3529](https://issues.apache.org/jira/browse/ARROW-3529) - [Ruby] Import Red Parquet
+* [ARROW-3530](https://issues.apache.org/jira/browse/ARROW-3530) - [Java/Python] Add conversion for pyarrow.Schema from org.apache…pojo.Schema
+* [ARROW-3533](https://issues.apache.org/jira/browse/ARROW-3533) - [Python/Documentation] Use sphinx\_rtd\_theme instead of Bootstrap
+* [ARROW-3536](https://issues.apache.org/jira/browse/ARROW-3536) - [C++] Fast UTF8 validation functions
+* [ARROW-3537](https://issues.apache.org/jira/browse/ARROW-3537) - [Rust] Implement Tensor Type
+* [ARROW-3539](https://issues.apache.org/jira/browse/ARROW-3539) - [CI/Packaging] Update scripts to build against vendored jemalloc
+* [ARROW-3540](https://issues.apache.org/jira/browse/ARROW-3540) - [Rust] Incorporate BooleanArray into PrimitiveArray
+* [ARROW-3542](https://issues.apache.org/jira/browse/ARROW-3542) - [C++] Use unsafe appends when building array from CSV
+* [ARROW-3545](https://issues.apache.org/jira/browse/ARROW-3545) - [C++/Python] Normalize child/field terminology with StructType
+* [ARROW-3547](https://issues.apache.org/jira/browse/ARROW-3547) - [R] Protect against Null crash when reading from RecordBatch
+* [ARROW-3548](https://issues.apache.org/jira/browse/ARROW-3548) - Speed up storing small objects in the object store.
+* [ARROW-3551](https://issues.apache.org/jira/browse/ARROW-3551) - Change MapD to OmniSci on Powered By page
+* [ARROW-3555](https://issues.apache.org/jira/browse/ARROW-3555) - [Plasma] Unify plasma client get function using metadata.
+* [ARROW-3556](https://issues.apache.org/jira/browse/ARROW-3556) - [CI] Disable optimizations on Windows
+* [ARROW-3557](https://issues.apache.org/jira/browse/ARROW-3557) - [Python] Set language\_level in Cython sources
+* [ARROW-3558](https://issues.apache.org/jira/browse/ARROW-3558) - [Plasma] Remove fatal error when plasma client calls get on an unsealed object that it created.
+* [ARROW-3559](https://issues.apache.org/jira/browse/ARROW-3559) - Statically link libraries for plasma\_store\_server executable.
+* [ARROW-3562](https://issues.apache.org/jira/browse/ARROW-3562) - [R] Disallow creation of objects with null shared\_ptr<T>
+* [ARROW-3563](https://issues.apache.org/jira/browse/ARROW-3563) - [C++] Declare public link dependencies so arrow\_static, plasma\_static automatically pull in transitive dependencies
+* [ARROW-3566](https://issues.apache.org/jira/browse/ARROW-3566) - Clarify that the type of dictionary encoded field should be the encoded(index) type
+* [ARROW-3567](https://issues.apache.org/jira/browse/ARROW-3567) - [Gandiva] [GLib] Add GLib bindings of Gandiva
+* [ARROW-3568](https://issues.apache.org/jira/browse/ARROW-3568) - [Packaging] Run pyarrow unittests for windows wheels
+* [ARROW-3569](https://issues.apache.org/jira/browse/ARROW-3569) - [Packaging] Run pyarrow unittests when building conda package
+* [ARROW-3574](https://issues.apache.org/jira/browse/ARROW-3574) - Fix remaining bug with plasma static versus shared libraries.
+* [ARROW-3576](https://issues.apache.org/jira/browse/ARROW-3576) - [Python] Expose compressed file readers as NativeFile
+* [ARROW-3577](https://issues.apache.org/jira/browse/ARROW-3577) - [Go] add support for ChunkedArray
+* [ARROW-3581](https://issues.apache.org/jira/browse/ARROW-3581) - [Gandiva][C++] ARROW\_PROTOBUF\_USE\_SHARED isn't used
+* [ARROW-3582](https://issues.apache.org/jira/browse/ARROW-3582) - [CI] Gandiva C++ build is always triggered
+* [ARROW-3583](https://issues.apache.org/jira/browse/ARROW-3583) - [Python/Java] Create RecordBatch from VectorSchemaRoot
+* [ARROW-3584](https://issues.apache.org/jira/browse/ARROW-3584) - [Go] add support for Table
+* [ARROW-3587](https://issues.apache.org/jira/browse/ARROW-3587) - [Python] Efficient serialization for Arrow Objects (array, table, tensor, etc)
+* [ARROW-3588](https://issues.apache.org/jira/browse/ARROW-3588) - [Java] checkstyle - fix license
+* [ARROW-3589](https://issues.apache.org/jira/browse/ARROW-3589) - [Gandiva] Make it possible to compile gandiva without JNI
+* [ARROW-3591](https://issues.apache.org/jira/browse/ARROW-3591) - [R] Support to collect decimal type
+* [ARROW-3592](https://issues.apache.org/jira/browse/ARROW-3592) - [Python] Get BinaryArray value as zero copy memory view
+* [ARROW-3597](https://issues.apache.org/jira/browse/ARROW-3597) - [Gandiva] gandiva should integrate with ADD\_ARROW\_TEST for tests
+* [ARROW-3600](https://issues.apache.org/jira/browse/ARROW-3600) - [Packaging] Support Ubuntu 18.10
+* [ARROW-3601](https://issues.apache.org/jira/browse/ARROW-3601) - [Rust] Release 0.11.0
+* [ARROW-3602](https://issues.apache.org/jira/browse/ARROW-3602) - [Gandiva] [Python] Add preliminary Cython bindings for Gandiva
+* [ARROW-3603](https://issues.apache.org/jira/browse/ARROW-3603) - [Gandiva][C++] Can't build with vendored Boost
+* [ARROW-3605](https://issues.apache.org/jira/browse/ARROW-3605) - Remove AE library from plasma header files.
+* [ARROW-3607](https://issues.apache.org/jira/browse/ARROW-3607) - [Java] delete() method via JNI for plasma
+* [ARROW-3608](https://issues.apache.org/jira/browse/ARROW-3608) - [R] Support for time32 and time64 array types
+* [ARROW-3609](https://issues.apache.org/jira/browse/ARROW-3609) - [Gandiva] Move benchmark tests out of unit test
+* [ARROW-3610](https://issues.apache.org/jira/browse/ARROW-3610) - [C++] Add interface to turn stl\_allocator into arrow::MemoryPool
+* [ARROW-3611](https://issues.apache.org/jira/browse/ARROW-3611) - Give error more quickly when pyarrow serialization context is used incorrectly.
+* [ARROW-3612](https://issues.apache.org/jira/browse/ARROW-3612) - [Go] implement RecordBatch and RecordBatchReader
+* [ARROW-3615](https://issues.apache.org/jira/browse/ARROW-3615) - [R] Support for NaN
+* [ARROW-3616](https://issues.apache.org/jira/browse/ARROW-3616) - [Java] checkstyle - fix remaining coding checks
+* [ARROW-3618](https://issues.apache.org/jira/browse/ARROW-3618) - [Packaging/Documentation] Add \`-c conda-forge\` option to avoid PackagesNotFoundError
+* [ARROW-3620](https://issues.apache.org/jira/browse/ARROW-3620) - [Python] Document multithreading options in Sphinx and add to api.rst
+* [ARROW-3621](https://issues.apache.org/jira/browse/ARROW-3621) - [Go] implement TableBatchReader
+* [ARROW-3622](https://issues.apache.org/jira/browse/ARROW-3622) - [Go] implement Schema.Equal
+* [ARROW-3623](https://issues.apache.org/jira/browse/ARROW-3623) - [Go] implement Field.Equal
+* [ARROW-3624](https://issues.apache.org/jira/browse/ARROW-3624) - [Python/C++] Support for zero-sized device buffers
+* [ARROW-3626](https://issues.apache.org/jira/browse/ARROW-3626) - [Go] add a CSV TableReader
+* [ARROW-3629](https://issues.apache.org/jira/browse/ARROW-3629) - [Python] Add write\_to\_dataset to Python Sphinx API listing
+* [ARROW-3630](https://issues.apache.org/jira/browse/ARROW-3630) - [Plasma] [GLib] Add GLib bindings of Plasma
+* [ARROW-3632](https://issues.apache.org/jira/browse/ARROW-3632) - [Packaging] Update deb names in dev/tasks/tasks.yml in dev/release/00-prepare.sh
+* [ARROW-3633](https://issues.apache.org/jira/browse/ARROW-3633) - [Packaging] Update deb names in dev/tasks/tasks.yml for 0.12.0
+* [ARROW-3636](https://issues.apache.org/jira/browse/ARROW-3636) - [C++/Python] Update arrow/python/pyarrow\_api.h
+* [ARROW-3638](https://issues.apache.org/jira/browse/ARROW-3638) - [C++][Python] Move reading from Feather as Table feature to C++ from Python
+* [ARROW-3639](https://issues.apache.org/jira/browse/ARROW-3639) - [Packaging] Run gandiva nightly packaging tasks
+* [ARROW-3640](https://issues.apache.org/jira/browse/ARROW-3640) - [Go] add support for Tensors
+* [ARROW-3641](https://issues.apache.org/jira/browse/ARROW-3641) - [C++/Python] remove public keyword from Cython api functions
+* [ARROW-3642](https://issues.apache.org/jira/browse/ARROW-3642) - [C++] Add arrowConfig.cmake generation
+* [ARROW-3645](https://issues.apache.org/jira/browse/ARROW-3645) - [Python] Document compression support in Sphinx
+* [ARROW-3646](https://issues.apache.org/jira/browse/ARROW-3646) - [Python] Add convenience factories to create IO streams
+* [ARROW-3647](https://issues.apache.org/jira/browse/ARROW-3647) - [R] Crash after unloading bit64 package
+* [ARROW-3648](https://issues.apache.org/jira/browse/ARROW-3648) - [Plasma] Add API to get metadata and data at the same time
+* [ARROW-3649](https://issues.apache.org/jira/browse/ARROW-3649) - [Rust] Refactor MutableBuffer's resize
+* [ARROW-3656](https://issues.apache.org/jira/browse/ARROW-3656) - [C++] Allow whitespace in numeric CSV fields
+* [ARROW-3657](https://issues.apache.org/jira/browse/ARROW-3657) - [R] Require bit64 package
+* [ARROW-3659](https://issues.apache.org/jira/browse/ARROW-3659) - [C++] Clang Travis build (matrix entry 2) might not actually be using clang
+* [ARROW-3660](https://issues.apache.org/jira/browse/ARROW-3660) - [C++] Don't unnecessarily lock MemoryMappedFile for resizing in readonly files
+* [ARROW-3661](https://issues.apache.org/jira/browse/ARROW-3661) - [Gandiva][GLib] Improve constant name
+* [ARROW-3662](https://issues.apache.org/jira/browse/ARROW-3662) - [C++] Add a const overload to MemoryMappedFile::GetSize
+* [ARROW-3664](https://issues.apache.org/jira/browse/ARROW-3664) - [Rust] Add benchmark for PrimitiveArrayBuilder
+* [ARROW-3665](https://issues.apache.org/jira/browse/ARROW-3665) - [Rust] Implement StructArrayBuilder
+* [ARROW-3666](https://issues.apache.org/jira/browse/ARROW-3666) - [C++] Improve CSV parser performance
+* [ARROW-3672](https://issues.apache.org/jira/browse/ARROW-3672) - [Go] implement Time32 array
+* [ARROW-3673](https://issues.apache.org/jira/browse/ARROW-3673) - [Go] implement Time64 array
+* [ARROW-3674](https://issues.apache.org/jira/browse/ARROW-3674) - [Go] implement Date32 array
+* [ARROW-3675](https://issues.apache.org/jira/browse/ARROW-3675) - [Go] implement Date64 array
+* [ARROW-3677](https://issues.apache.org/jira/browse/ARROW-3677) - [Go] implement FixedSizedBinary array
+* [ARROW-3681](https://issues.apache.org/jira/browse/ARROW-3681) - [Go] add benchmarks for CSV reader
+* [ARROW-3682](https://issues.apache.org/jira/browse/ARROW-3682) - [Go] unexport encoding/csv.Reader from CSV reader
+* [ARROW-3683](https://issues.apache.org/jira/browse/ARROW-3683) - [Go] add functional-option style to CSV reader
+* [ARROW-3684](https://issues.apache.org/jira/browse/ARROW-3684) - [Go] add chunk size option to CSV reader
+* [ARROW-3692](https://issues.apache.org/jira/browse/ARROW-3692) - [Gandiva] [Ruby] Add Ruby bindings of Gandiva
+* [ARROW-3693](https://issues.apache.org/jira/browse/ARROW-3693) - [R] Invalid buffer for empty characters with null data
+* [ARROW-3694](https://issues.apache.org/jira/browse/ARROW-3694) - [Java] Avoid superfluous string creation when logging level is disabled
+* [ARROW-3695](https://issues.apache.org/jira/browse/ARROW-3695) - [Gandiva] Use add\_arrow\_lib()
+* [ARROW-3696](https://issues.apache.org/jira/browse/ARROW-3696) - [C++] Add feather::TableWriter::Write(table)
+* [ARROW-3697](https://issues.apache.org/jira/browse/ARROW-3697) - [Ruby] Add schema#[]
+* [ARROW-3701](https://issues.apache.org/jira/browse/ARROW-3701) - [Gandiva] Add support for decimal operations
+* [ARROW-3708](https://issues.apache.org/jira/browse/ARROW-3708) - [Packaging] Nightly CentOS builds are failing
+* [ARROW-3713](https://issues.apache.org/jira/browse/ARROW-3713) - [Rust] Implement BinaryArrayBuilder
+* [ARROW-3718](https://issues.apache.org/jira/browse/ARROW-3718) - [Gandiva] Remove spurious gtest include
+* [ARROW-3719](https://issues.apache.org/jira/browse/ARROW-3719) - [GLib] Support read/write tabl to/from Feather
+* [ARROW-3720](https://issues.apache.org/jira/browse/ARROW-3720) - [GLib] Use "indices" instead of "indexes"
+* [ARROW-3721](https://issues.apache.org/jira/browse/ARROW-3721) - [Gandiva] [Python] Support all Gandiva literals
+* [ARROW-3722](https://issues.apache.org/jira/browse/ARROW-3722) - [C++] Allow specifying column types to CSV reader
+* [ARROW-3723](https://issues.apache.org/jira/browse/ARROW-3723) - [Plasma] [Ruby] Add Ruby bindings of Plasma
+* [ARROW-3724](https://issues.apache.org/jira/browse/ARROW-3724) - [GLib] Update gitignore
+* [ARROW-3725](https://issues.apache.org/jira/browse/ARROW-3725) - [GLib] Add field readers to GArrowStructDataType
+* [ARROW-3726](https://issues.apache.org/jira/browse/ARROW-3726) - [Rust] CSV Reader & Writer
+* [ARROW-3727](https://issues.apache.org/jira/browse/ARROW-3727) - [Python] Document use of pyarrow.foreign\_buffer, cuda.foreign\_buffer in Sphinx
+* [ARROW-3731](https://issues.apache.org/jira/browse/ARROW-3731) - [R] R API for reading and writing Parquet files
+* [ARROW-3733](https://issues.apache.org/jira/browse/ARROW-3733) - [GLib] Add to\_string() to GArrowTable and GArrowColumn
+* [ARROW-3736](https://issues.apache.org/jira/browse/ARROW-3736) - [CI/Docker] Ninja test in docker-compose run cpp hangs
+* [ARROW-3738](https://issues.apache.org/jira/browse/ARROW-3738) - [C++] Add CSV conversion option to parse ISO8601-like timestamp strings
+* [ARROW-3741](https://issues.apache.org/jira/browse/ARROW-3741) - [R] Add support for arrow::compute::Cast to convert Arrow arrays from one type to another
+* [ARROW-3743](https://issues.apache.org/jira/browse/ARROW-3743) - [Ruby] Add support for saving/loading Feather
+* [ARROW-3744](https://issues.apache.org/jira/browse/ARROW-3744) - [Ruby] Use garrow\_table\_to\_string() in Arrow::Table#to\_s
+* [ARROW-3746](https://issues.apache.org/jira/browse/ARROW-3746) - [Gandiva] [Python] Make it possible to list all functions registered with Gandiva
+* [ARROW-3747](https://issues.apache.org/jira/browse/ARROW-3747) - [C++] Flip order of data members in arrow::Decimal128
+* [ARROW-3748](https://issues.apache.org/jira/browse/ARROW-3748) - [GLib] Add GArrowCSVReader
+* [ARROW-3749](https://issues.apache.org/jira/browse/ARROW-3749) - [GLib] Typos in documentation and test case name
+* [ARROW-3751](https://issues.apache.org/jira/browse/ARROW-3751) - [Python] Add more cython bindings for gandiva
+* [ARROW-3752](https://issues.apache.org/jira/browse/ARROW-3752) - [C++] Remove unused status::ArrowError
+* [ARROW-3753](https://issues.apache.org/jira/browse/ARROW-3753) - [Gandiva] Remove debug print
+* [ARROW-3755](https://issues.apache.org/jira/browse/ARROW-3755) - [GLib] Support for CompressedInputStream, CompressedOutputStream
+* [ARROW-3760](https://issues.apache.org/jira/browse/ARROW-3760) - [R] Support Arrow CSV reader
+* [ARROW-3773](https://issues.apache.org/jira/browse/ARROW-3773) - [C++] Remove duplicated AssertArraysEqual code in parquet/arrow/arrow-reader-writer-test.cc
+* [ARROW-3778](https://issues.apache.org/jira/browse/ARROW-3778) - [C++] Don't put implementations in test-util.h
+* [ARROW-3781](https://issues.apache.org/jira/browse/ARROW-3781) - [C++] Configure buffer size in arrow::io::BufferedOutputStream
+* [ARROW-3782](https://issues.apache.org/jira/browse/ARROW-3782) - [C++] Implement BufferedReader for C++
+* [ARROW-3784](https://issues.apache.org/jira/browse/ARROW-3784) - [R] Array with type fails with x is not a vector
+* [ARROW-3785](https://issues.apache.org/jira/browse/ARROW-3785) - [C++] Use double-conversion conda package in CI toolchain
+* [ARROW-3787](https://issues.apache.org/jira/browse/ARROW-3787) - Implement From<ListArray> for BinaryArray
+* [ARROW-3788](https://issues.apache.org/jira/browse/ARROW-3788) - [Ruby] Add support for CSV parser writtin in C++
+* [ARROW-3795](https://issues.apache.org/jira/browse/ARROW-3795) - [R] Support for retrieving NAs from INT64 arrays
+* [ARROW-3796](https://issues.apache.org/jira/browse/ARROW-3796) - [Rust] Add Example for PrimitiveArrayBuilder
+* [ARROW-3798](https://issues.apache.org/jira/browse/ARROW-3798) - [GLib] Add support for column type CSV read options
+* [ARROW-3800](https://issues.apache.org/jira/browse/ARROW-3800) - [C++] Vendor a string\_view backport
+* [ARROW-3803](https://issues.apache.org/jira/browse/ARROW-3803) - [C++/Python] Split C++ and Python unit test Travis CI jobs, run all C++ tests (including Gandiva) together
+* [ARROW-3807](https://issues.apache.org/jira/browse/ARROW-3807) - [R] Missing Field API
+* [ARROW-3819](https://issues.apache.org/jira/browse/ARROW-3819) - [Packaging] Update conda variant files to conform with feedstock after compiler migration
+* [ARROW-3821](https://issues.apache.org/jira/browse/ARROW-3821) - [Format/Documentation]: Fix typos and grammar issues in Flight.proto comments
+* [ARROW-3823](https://issues.apache.org/jira/browse/ARROW-3823) - [R] + buffer.complex
+* [ARROW-3825](https://issues.apache.org/jira/browse/ARROW-3825) - [Python] The Python README.md does not show how to run the unit test suite
+* [ARROW-3830](https://issues.apache.org/jira/browse/ARROW-3830) - [GLib] Add GArrowCodec
+* [ARROW-3834](https://issues.apache.org/jira/browse/ARROW-3834) - [Doc] Merge Python & C++ and move to top-level
+* [ARROW-3836](https://issues.apache.org/jira/browse/ARROW-3836) - [C++] Add PREFIX option to ADD\_ARROW\_BENCHMARK
+* [ARROW-3839](https://issues.apache.org/jira/browse/ARROW-3839) - [Rust] Add ability to infer schema in CSV reader
+* [ARROW-3841](https://issues.apache.org/jira/browse/ARROW-3841) - [C++] warning: catching polymorphic type by value
+* [ARROW-3842](https://issues.apache.org/jira/browse/ARROW-3842) - [R] RecordBatchStreamWriter api
+* [ARROW-3844](https://issues.apache.org/jira/browse/ARROW-3844) - [C++] Remove ARROW\_USE\_SSE and ARROW\_SSE3
+* [ARROW-3845](https://issues.apache.org/jira/browse/ARROW-3845) - [Gandiva] [GLib] Add GGandivaNode
+* [ARROW-3847](https://issues.apache.org/jira/browse/ARROW-3847) - [GLib] Remove unnecessary “\”.
+* [ARROW-3849](https://issues.apache.org/jira/browse/ARROW-3849) - Leverage Armv8 crc32 extension instructions to accelerate the hash computation for Arm64.
+* [ARROW-3851](https://issues.apache.org/jira/browse/ARROW-3851) - [C++] "make check-format" is slow
+* [ARROW-3852](https://issues.apache.org/jira/browse/ARROW-3852) - [C++] used uninitialized warning
+* [ARROW-3853](https://issues.apache.org/jira/browse/ARROW-3853) - [C++] Implement string to timestamp cast
+* [ARROW-3854](https://issues.apache.org/jira/browse/ARROW-3854) - [GLib] Deprecate garrow\_gio\_{input,output}\_stream\_get\_raw()
+* [ARROW-3855](https://issues.apache.org/jira/browse/ARROW-3855) - [Rust] Schema/Field/Datatype should implement serde traits
+* [ARROW-3856](https://issues.apache.org/jira/browse/ARROW-3856) - [Ruby] Support compressed CSV save/load
+* [ARROW-3858](https://issues.apache.org/jira/browse/ARROW-3858) - [GLib] Use {class\_name}\_get\_instance\_private
+* [ARROW-3859](https://issues.apache.org/jira/browse/ARROW-3859) - [Java] Fix ComplexWriter backward incompatible change
+* [ARROW-3860](https://issues.apache.org/jira/browse/ARROW-3860) - [Gandiva] [C++] Add option to use -static-libstdc++ when building libgandiva\_jni.so
+* [ARROW-3862](https://issues.apache.org/jira/browse/ARROW-3862) - [C++] Improve dependencies download script
+* [ARROW-3863](https://issues.apache.org/jira/browse/ARROW-3863) - [GLib] Use travis\_retry with brew bundle command
+* [ARROW-3864](https://issues.apache.org/jira/browse/ARROW-3864) - [GLib] Add support for allow-float-truncate cast option
+* [ARROW-3865](https://issues.apache.org/jira/browse/ARROW-3865) - [Packaging] Add double-conversion dependency to conda forge recipes and the windows wheel build
+* [ARROW-3867](https://issues.apache.org/jira/browse/ARROW-3867) - [Documentation] Uploading binary realase artifacts to Bintray
+* [ARROW-3868](https://issues.apache.org/jira/browse/ARROW-3868) - [Rust] Build against nightly Rust in CI
+* [ARROW-3870](https://issues.apache.org/jira/browse/ARROW-3870) - [C++] Add Peek to InputStream API
+* [ARROW-3871](https://issues.apache.org/jira/browse/ARROW-3871) - [R] Replace usages of C++ GetValuesSafely with new methods on ArrayData
+* [ARROW-3878](https://issues.apache.org/jira/browse/ARROW-3878) - [Rust] Improve primitive types
+* [ARROW-3880](https://issues.apache.org/jira/browse/ARROW-3880) - [Rust] PrimitiveArray<T> should support simple math operations
+* [ARROW-3883](https://issues.apache.org/jira/browse/ARROW-3883) - [Rust] Update Rust README to reflect new functionality
+* [ARROW-3884](https://issues.apache.org/jira/browse/ARROW-3884) - [Python] Add LLVM6 to manylinux1 base image
+* [ARROW-3885](https://issues.apache.org/jira/browse/ARROW-3885) - [Rust] Update version to 0.12.0 and update release instructions on wiki
+* [ARROW-3886](https://issues.apache.org/jira/browse/ARROW-3886) - [C++] Additional test cases for ARROW-3831
+* [ARROW-3891](https://issues.apache.org/jira/browse/ARROW-3891) - [Java] Remove Long.bitCount with simple bitmap operations
+* [ARROW-3893](https://issues.apache.org/jira/browse/ARROW-3893) - [C++] Improve adaptive int builder performance
+* [ARROW-3895](https://issues.apache.org/jira/browse/ARROW-3895) - [Rust] CSV reader should return Result<Option<>> not Option<Result<>>
+* [ARROW-3900](https://issues.apache.org/jira/browse/ARROW-3900) - [GLib] Add garrow\_mutable\_buffer\_set\_data()
+* [ARROW-3905](https://issues.apache.org/jira/browse/ARROW-3905) - [Ruby] Add StructDataType#[]
+* [ARROW-3906](https://issues.apache.org/jira/browse/ARROW-3906) - [C++] Break builder.cc into multiple compilation units
+* [ARROW-3908](https://issues.apache.org/jira/browse/ARROW-3908) - [Rust] Update rust dockerfile to use nightly toolchain
+* [ARROW-3910](https://issues.apache.org/jira/browse/ARROW-3910) - [Python] Set date\_as\_object to True in \*.to\_pandas as default after deduplicating logic implemented
+* [ARROW-3911](https://issues.apache.org/jira/browse/ARROW-3911) - [Python] Deduplicate datetime.date objects in Table.to\_pandas internals
+* [ARROW-3912](https://issues.apache.org/jira/browse/ARROW-3912) - [Plasma][GLib] Add support for creating and referring objects
+* [ARROW-3913](https://issues.apache.org/jira/browse/ARROW-3913) - [Gandiva] [GLib] Add GGandivaLiteralNode
+* [ARROW-3914](https://issues.apache.org/jira/browse/ARROW-3914) - [C++/Python/Packaging] Docker-compose setup for Alpine linux
+* [ARROW-3916](https://issues.apache.org/jira/browse/ARROW-3916) - [Python] Support caller-provided filesystem in \`ParquetWriter\` constructor
+* [ARROW-3922](https://issues.apache.org/jira/browse/ARROW-3922) - [C++] improve the performance of bitmap operations
+* [ARROW-3924](https://issues.apache.org/jira/browse/ARROW-3924) - [Packaging][Plasma] Add support for Plasma deb/rpm packages
+* [ARROW-3925](https://issues.apache.org/jira/browse/ARROW-3925) - [Python] Include autoconf in Linux/macOS dependencies in conda environment
+* [ARROW-3928](https://issues.apache.org/jira/browse/ARROW-3928) - [Python] Add option to deduplicate PyBytes / PyString / PyUnicode objects in Table.to\_pandas conversion path
+* [ARROW-3929](https://issues.apache.org/jira/browse/ARROW-3929) - [Go] improve memory usage of CSV reader to improve runtime performances
+* [ARROW-3930](https://issues.apache.org/jira/browse/ARROW-3930) - [C++] Random test data generation is slow
+* [ARROW-3932](https://issues.apache.org/jira/browse/ARROW-3932) - [Python/Documentation] Include Benchmarks.md in Sphinx docs
+* [ARROW-3934](https://issues.apache.org/jira/browse/ARROW-3934) - [Gandiva] Don't compile precompiled tests if ARROW\_GANDIVA\_BUILD\_TESTS=off
+* [ARROW-3938](https://issues.apache.org/jira/browse/ARROW-3938) - [Packaging] Stop to refer java/pom.xml to get version information
+* [ARROW-3939](https://issues.apache.org/jira/browse/ARROW-3939) - [Rust] Remove macro definition for ListArrayBuilder
+* [ARROW-3945](https://issues.apache.org/jira/browse/ARROW-3945) - [Website] Blog post about Gandiva code donation
+* [ARROW-3946](https://issues.apache.org/jira/browse/ARROW-3946) - [GLib] Add support for union
+* [ARROW-3948](https://issues.apache.org/jira/browse/ARROW-3948) - [CI][GLib] Set timeout to Homebrew
+* [ARROW-3950](https://issues.apache.org/jira/browse/ARROW-3950) - [Plasma] Don't force loading the TensorFlow op on import
+* [ARROW-3952](https://issues.apache.org/jira/browse/ARROW-3952) - [Rust] Specify edition="2018" in Cargo.toml
+* [ARROW-3958](https://issues.apache.org/jira/browse/ARROW-3958) - [Plasma] Reduce number of IPCs
+* [ARROW-3959](https://issues.apache.org/jira/browse/ARROW-3959) - [Rust] Time and Timestamp Support
+* [ARROW-3960](https://issues.apache.org/jira/browse/ARROW-3960) - [Rust] remove extern crate for Rust 2018
+* [ARROW-3963](https://issues.apache.org/jira/browse/ARROW-3963) - [Packaging/Docker] Nightly test for building sphinx documentations
+* [ARROW-3964](https://issues.apache.org/jira/browse/ARROW-3964) - [Go] More readable example for csv.Reader
+* [ARROW-3967](https://issues.apache.org/jira/browse/ARROW-3967) - [Gandiva] [C++] Make gandiva/node.h public
+* [ARROW-3970](https://issues.apache.org/jira/browse/ARROW-3970) - [Gandiva][C++] Remove unnecessary boost dependencies
+* [ARROW-3971](https://issues.apache.org/jira/browse/ARROW-3971) - [Python] Remove APIs deprecated in 0.11 and prior
+* [ARROW-3974](https://issues.apache.org/jira/browse/ARROW-3974) - [C++] Combine field\_builders\_ and children\_ members in array/builder.h
+* [ARROW-3982](https://issues.apache.org/jira/browse/ARROW-3982) - [C++] Allow "binary" input in simple JSON format
+* [ARROW-3983](https://issues.apache.org/jira/browse/ARROW-3983) - [Gandiva][Crossbow] Use static boost while packaging
+* [ARROW-3984](https://issues.apache.org/jira/browse/ARROW-3984) - [C++] Exit with error if user hits zstd ExternalProject path
+* [ARROW-3986](https://issues.apache.org/jira/browse/ARROW-3986) - [C++] Write prose documentation
+* [ARROW-3988](https://issues.apache.org/jira/browse/ARROW-3988) - [C++] Do not build unit tests by default in build system
+* [ARROW-3993](https://issues.apache.org/jira/browse/ARROW-3993) - [JS] CI Jobs Failing
+* [ARROW-3994](https://issues.apache.org/jira/browse/ARROW-3994) - [C++] Remove ARROW\_GANDIVA\_BUILD\_TESTS option
+* [ARROW-3995](https://issues.apache.org/jira/browse/ARROW-3995) - [CI] Use understandable names in Travis Matrix
+* [ARROW-3997](https://issues.apache.org/jira/browse/ARROW-3997) - [C++] [Doc] Clarify dictionary encoding integer signedness (and width?)
+* [ARROW-4002](https://issues.apache.org/jira/browse/ARROW-4002) - [C++][Gandiva] Remove CMake version check
+* [ARROW-4004](https://issues.apache.org/jira/browse/ARROW-4004) - [GLib] Replace GPU with CUDA
+* [ARROW-4005](https://issues.apache.org/jira/browse/ARROW-4005) - [Plasma] [GLib] Add gplasma\_client\_disconnect()
+* [ARROW-4006](https://issues.apache.org/jira/browse/ARROW-4006) - Add CODE\_OF\_CONDUCT.md
+* [ARROW-4009](https://issues.apache.org/jira/browse/ARROW-4009) - [CI] Run Valgrind and C++ code coverage in different bulds
+* [ARROW-4015](https://issues.apache.org/jira/browse/ARROW-4015) - [Plasma] remove legacy interfaces for plasma manager
+* [ARROW-4017](https://issues.apache.org/jira/browse/ARROW-4017) - [C++] Check and update vendored libraries
+* [ARROW-4026](https://issues.apache.org/jira/browse/ARROW-4026) - [C++] Use separate modular $COMPONENT-test targets for unit tests
+* [ARROW-4028](https://issues.apache.org/jira/browse/ARROW-4028) - [Rust] Merge parquet-rs codebase
+* [ARROW-4029](https://issues.apache.org/jira/browse/ARROW-4029) - [C++] Define and document naming convention for internal / private header files not to be installed
+* [ARROW-4030](https://issues.apache.org/jira/browse/ARROW-4030) - [CI] Use travis\_terminate to halt builds when a step fails
+* [ARROW-4035](https://issues.apache.org/jira/browse/ARROW-4035) - [Ruby] Support msys2 mingw dependencies
+* [ARROW-4037](https://issues.apache.org/jira/browse/ARROW-4037) - [Packaging] Remove workaround to verify 0.11.0
+* [ARROW-4038](https://issues.apache.org/jira/browse/ARROW-4038) - [Rust] Add array\_ops methods for boolean AND, OR, NOT
+* [ARROW-4039](https://issues.apache.org/jira/browse/ARROW-4039) - Update link to 'development.rst' page from Python README.md
+* [ARROW-4042](https://issues.apache.org/jira/browse/ARROW-4042) - [Rust] Inconsistent method naming between BinaryArray and PrimitiveArray
+* [ARROW-4043](https://issues.apache.org/jira/browse/ARROW-4043) - [Packaging/Docker] Python tests on alpine miss pytest dependency
+* [ARROW-4044](https://issues.apache.org/jira/browse/ARROW-4044) - [Packaging/Python] Add hypothesis test dependency to pyarrow conda recipe
+* [ARROW-4045](https://issues.apache.org/jira/browse/ARROW-4045) - [Packaging/Python] Add hypothesis test dependency to wheel crossbow tests
+* [ARROW-4048](https://issues.apache.org/jira/browse/ARROW-4048) - [GLib] Return ChunkedArray instead of Array in gparquet\_arrow\_file\_reader\_read\_column
+* [ARROW-4051](https://issues.apache.org/jira/browse/ARROW-4051) - [Gandiva] [GLib] Add support for null literal
+* [ARROW-4054](https://issues.apache.org/jira/browse/ARROW-4054) - [Python] Update gtest, flatbuffers and OpenSSL in manylinux1 base image
+* [ARROW-4060](https://issues.apache.org/jira/browse/ARROW-4060) - [Rust] Add Parquet/Arrow schema converter
+* [ARROW-4069](https://issues.apache.org/jira/browse/ARROW-4069) - [Python] Add tests for casting from binary to utf8
+* [ARROW-4075](https://issues.apache.org/jira/browse/ARROW-4075) - [Rust] Reuse array builder after calling finish()
+* [ARROW-4079](https://issues.apache.org/jira/browse/ARROW-4079) - [C++] Add machine benchmarks
+* [ARROW-4080](https://issues.apache.org/jira/browse/ARROW-4080) - [Rust] Improving lengthy build times in Appveyor
+* [ARROW-4082](https://issues.apache.org/jira/browse/ARROW-4082) - [C++] CMake tweaks: allow RelWithDebInfo, improve FindClangTools
+* [ARROW-4084](https://issues.apache.org/jira/browse/ARROW-4084) - [C++] Simplify Status and stringstream boilerplate
+* [ARROW-4085](https://issues.apache.org/jira/browse/ARROW-4085) - [GLib] Use "field" for struct data type
+* [ARROW-4087](https://issues.apache.org/jira/browse/ARROW-4087) - [C++] Make CSV nulls configurable
+* [ARROW-4093](https://issues.apache.org/jira/browse/ARROW-4093) - [C++] Deprecated method suggests wrong method
+* [ARROW-4098](https://issues.apache.org/jira/browse/ARROW-4098) - [Python] Deprecate pyarrow.open\_stream,open\_file in favor of pa.ipc.open\_stream/open\_file
+* [ARROW-4100](https://issues.apache.org/jira/browse/ARROW-4100) - [Gandiva][C++] Fix regex to ignore "." character
+* [ARROW-4102](https://issues.apache.org/jira/browse/ARROW-4102) - [C++] FixedSizeBinary identity cast not implemented
+* [ARROW-4103](https://issues.apache.org/jira/browse/ARROW-4103) - [Documentation] Add README to docs/ root
+* [ARROW-4105](https://issues.apache.org/jira/browse/ARROW-4105) - Add rust-toolchain to enforce user to use nightly toolchain for building
+* [ARROW-4107](https://issues.apache.org/jira/browse/ARROW-4107) - [Python] Use ninja in pyarrow manylinux1 build
+* [ARROW-4112](https://issues.apache.org/jira/browse/ARROW-4112) - [Packaging][Gandiva] Add support for deb packages
+* [ARROW-4116](https://issues.apache.org/jira/browse/ARROW-4116) - [Python] Clarify in development.rst that virtualenv cannot be used with miniconda/Anaconda
+* [ARROW-4122](https://issues.apache.org/jira/browse/ARROW-4122) - [C++] Initialize some uninitialized class members
+* [ARROW-4127](https://issues.apache.org/jira/browse/ARROW-4127) - [Documentation] Add Docker build instructions
+* [ARROW-4129](https://issues.apache.org/jira/browse/ARROW-4129) - [Python] Fix syntax problem in benchmark docs
+* [ARROW-4132](https://issues.apache.org/jira/browse/ARROW-4132) - [GLib] Add more GArrowTable constructors
+* [ARROW-4141](https://issues.apache.org/jira/browse/ARROW-4141) - [Ruby] Add support for creating schema from raw Ruby objects
+* [ARROW-4148](https://issues.apache.org/jira/browse/ARROW-4148) - [CI/Python] Disable ORC on nightly Alpine builds
+* [ARROW-4150](https://issues.apache.org/jira/browse/ARROW-4150) - [C++] Do not return buffers containing nullptr from internal allocations
+* [ARROW-4151](https://issues.apache.org/jira/browse/ARROW-4151) - [Rust] Restructure project directories
+* [ARROW-4152](https://issues.apache.org/jira/browse/ARROW-4152) - [GLib] Remove an example to show Torch integration
+* [ARROW-4153](https://issues.apache.org/jira/browse/ARROW-4153) - [GLib] Add builder\_append\_value() for consistency
+* [ARROW-4154](https://issues.apache.org/jira/browse/ARROW-4154) - [GLib] Add GArrowDecimal128DataType
+* [ARROW-4155](https://issues.apache.org/jira/browse/ARROW-4155) - [Rust] Implement array\_ops::sum() for PrimitiveArray<T>
+* [ARROW-4156](https://issues.apache.org/jira/browse/ARROW-4156) - [C++] xcodebuild failure for cmake generated project
+* [ARROW-4158](https://issues.apache.org/jira/browse/ARROW-4158) - [Dev] Allow maintainers to use a GitHub API token when merging pull requests
+* [ARROW-4160](https://issues.apache.org/jira/browse/ARROW-4160) - [Rust] Add README and executable files to parquet
+* [ARROW-4161](https://issues.apache.org/jira/browse/ARROW-4161) - [GLib] Add GPlasmaClientOptions
+* [ARROW-4162](https://issues.apache.org/jira/browse/ARROW-4162) - [Ruby] Add support for creating data types from description
+* [ARROW-4166](https://issues.apache.org/jira/browse/ARROW-4166) - [Ruby] Add support for saving to and loading from buffer
+* [ARROW-4167](https://issues.apache.org/jira/browse/ARROW-4167) - [Gandiva] switch to arrow/util/variant
+* [ARROW-4168](https://issues.apache.org/jira/browse/ARROW-4168) - [GLib] Use property to keep GArrowDataType passed in garrow\_field\_new()
+* [ARROW-4172](https://issues.apache.org/jira/browse/ARROW-4172) - [Rust] more consistent naming in array builders
+* [ARROW-4174](https://issues.apache.org/jira/browse/ARROW-4174) - [Ruby] Add support for building composite array from raw Ruby objects
+* [ARROW-4175](https://issues.apache.org/jira/browse/ARROW-4175) - [GLib] Add support for decimal compare operators
+* [ARROW-4177](https://issues.apache.org/jira/browse/ARROW-4177) - [C++] Add ThreadPool and TaskGroup microbenchmarks
+* [ARROW-4183](https://issues.apache.org/jira/browse/ARROW-4183) - [Ruby] Add Arrow::Struct as an element of Arrow::StructArray
+* [ARROW-4184](https://issues.apache.org/jira/browse/ARROW-4184) - [Ruby] Add Arrow::RecordBatch#to\_table
+* [ARROW-4191](https://issues.apache.org/jira/browse/ARROW-4191) - [C++] Use same CC and AR for jemalloc as for the main sources
+* [ARROW-4199](https://issues.apache.org/jira/browse/ARROW-4199) - [GLib] Add garrow\_seekable\_input\_stream\_peek()
+* [ARROW-4207](https://issues.apache.org/jira/browse/ARROW-4207) - [Gandiva] [GLib] Add support for IfNode
+* [ARROW-4210](https://issues.apache.org/jira/browse/ARROW-4210) - [Python] Mention boost-cpp directly in the conda meta.yaml for pyarrow
+* [ARROW-4211](https://issues.apache.org/jira/browse/ARROW-4211) - [GLib] Add GArrowFixedSizeBinaryDataType
+* [ARROW-4214](https://issues.apache.org/jira/browse/ARROW-4214) - [Ruby] Add support for building RecordBatch from raw Ruby objects
+* [ARROW-4216](https://issues.apache.org/jira/browse/ARROW-4216) - [Python] Add CUDA API docs
+* [ARROW-4228](https://issues.apache.org/jira/browse/ARROW-4228) - [GLib] Add garrow\_list\_data\_type\_get\_field()
+* [ARROW-4229](https://issues.apache.org/jira/browse/ARROW-4229) - [Packaging] Set crossbow target explicitly to enable building arbitrary arrow repo
+* [ARROW-4233](https://issues.apache.org/jira/browse/ARROW-4233) - [Packaging] Create a Dockerfile to build source archive
+* [ARROW-4239](https://issues.apache.org/jira/browse/ARROW-4239) - [Release] Updating .deb package names in the prepare script failed to run on OSX
+* [ARROW-4240](https://issues.apache.org/jira/browse/ARROW-4240) - [Packaging] Documents for Plasma GLib and Gandiva GLib are missing in source archive
+* [ARROW-4241](https://issues.apache.org/jira/browse/ARROW-4241) - [Packaging] Disable crossbow conda OSX clang builds
+* [ARROW-4243](https://issues.apache.org/jira/browse/ARROW-4243) - [Python] Test failure with pandas 0.24.0rc1
+* [ARROW-4249](https://issues.apache.org/jira/browse/ARROW-4249) - [Plasma] Remove reference to logging.h from plasma/common.h
+* [ARROW-4257](https://issues.apache.org/jira/browse/ARROW-4257) - [Release] Update release verification script to check binaries on Bintray
+* [ARROW-4266](https://issues.apache.org/jira/browse/ARROW-4266) - [Python][CI] Disable ORC tests in dask integration test
+* [ARROW-4269](https://issues.apache.org/jira/browse/ARROW-4269) - [Python] AttributeError: module 'pandas.core' has no attribute 'arrays'
+* [ARROW-4270](https://issues.apache.org/jira/browse/ARROW-4270) - [Packaging][Conda] Update xcode version and remove toolchain builds
+* [ARROW-4276](https://issues.apache.org/jira/browse/ARROW-4276) - [Release] Remove needless Bintray authentication from binaries verify script
+* [ARROW-45](https://issues.apache.org/jira/browse/ARROW-45) - [Python] Add unnest/flatten function for List types
+* [ARROW-554](https://issues.apache.org/jira/browse/ARROW-554) - [C++] Implement functions to conform unequal dictionaries amongst multiple Arrow arrays
+* [ARROW-854](https://issues.apache.org/jira/browse/ARROW-854) - [Format] Support sparse tensor
+* [ARROW-912](https://issues.apache.org/jira/browse/ARROW-912) - [Python] Account for multiarch systems in development.rst
+
+## Bug Fixes
+
+* [ARROW-1847](https://issues.apache.org/jira/browse/ARROW-1847) - [Doc] Document the difference between RecordBatch and Table in an FAQ fashion
+* [ARROW-1994](https://issues.apache.org/jira/browse/ARROW-1994) - [Python] Test against Pandas master
+* [ARROW-2026](https://issues.apache.org/jira/browse/ARROW-2026) - [Python] Cast all timestamp resolutions to INT96 use\_deprecated\_int96\_timestamps=True
+* [ARROW-2038](https://issues.apache.org/jira/browse/ARROW-2038) - [Python] Follow-up bug fixes for s3fs Parquet support
+* [ARROW-2113](https://issues.apache.org/jira/browse/ARROW-2113) - [Python] Incomplete CLASSPATH with "hadoop" contained in it can fool the classpath setting HDFS logic
+* [ARROW-2591](https://issues.apache.org/jira/browse/ARROW-2591) - [Python] Segmentation fault when writing empty ListType column to Parquet
+* [ARROW-2592](https://issues.apache.org/jira/browse/ARROW-2592) - [Python] Error reading old Parquet file due to metadata backwards compatibility issue
+* [ARROW-2708](https://issues.apache.org/jira/browse/ARROW-2708) - [C++] Internal GetValues function in arrow::compute should check for nullptr
+* [ARROW-2970](https://issues.apache.org/jira/browse/ARROW-2970) - [Python] NumPyConverter::Visit for Binary/String/FixedSizeBinary can overflow
+* [ARROW-3058](https://issues.apache.org/jira/browse/ARROW-3058) - [Python] Feather reads fail with unintuitive error when conversion from pandas yields ChunkedArray
+* [ARROW-3186](https://issues.apache.org/jira/browse/ARROW-3186) - [GLib] mesonbuild failures in Travis CI
+* [ARROW-3202](https://issues.apache.org/jira/browse/ARROW-3202) - [C++] Build does not succeed on Alpine Linux
+* [ARROW-3225](https://issues.apache.org/jira/browse/ARROW-3225) - [C++/Python] Pandas object conversion of ListType<DateType> and ListType<TimeType>
+* [ARROW-3324](https://issues.apache.org/jira/browse/ARROW-3324) - [Parquet] Free more internal resources when writing multiple row groups
+* [ARROW-3343](https://issues.apache.org/jira/browse/ARROW-3343) - [Java] Java tests fail non-deterministically with memory leak from Flight tests
+* [ARROW-3405](https://issues.apache.org/jira/browse/ARROW-3405) - [Python] Document CSV reader
+* [ARROW-3428](https://issues.apache.org/jira/browse/ARROW-3428) - [Python] from\_pandas gives incorrect results when converting floating point to bool
+* [ARROW-3436](https://issues.apache.org/jira/browse/ARROW-3436) - [C++] Boost version required by Gandiva is too new for Ubuntu 14.04
+* [ARROW-3437](https://issues.apache.org/jira/browse/ARROW-3437) - [Gandiva][C++] Configure static linking of libgcc, libstdc++ with LDFLAGS
+* [ARROW-3438](https://issues.apache.org/jira/browse/ARROW-3438) - [Packaging] Escaped bulletpoints in changelog
+* [ARROW-3445](https://issues.apache.org/jira/browse/ARROW-3445) - [GLib] Parquet GLib doesn't link Arrow GLib
+* [ARROW-3449](https://issues.apache.org/jira/browse/ARROW-3449) - [C++] Support CMake 3.2 for "out of the box" builds
+* [ARROW-3466](https://issues.apache.org/jira/browse/ARROW-3466) - [Python] Crash when importing tensorflow and pyarrow
+* [ARROW-3467](https://issues.apache.org/jira/browse/ARROW-3467) - Building against external double conversion is broken
+* [ARROW-3470](https://issues.apache.org/jira/browse/ARROW-3470) - [C++] Row-wise conversion tutorial has fallen out of date
+* [ARROW-3477](https://issues.apache.org/jira/browse/ARROW-3477) - [C++] Testsuite fails on 32 bit arch
+* [ARROW-3480](https://issues.apache.org/jira/browse/ARROW-3480) - [Website] Install document for Ubuntu is broken
+* [ARROW-3485](https://issues.apache.org/jira/browse/ARROW-3485) - [C++] Examples fail with Protobuf error
+* [ARROW-3494](https://issues.apache.org/jira/browse/ARROW-3494) - [C++] re2 conda-forge package not working in toolchain
+* [ARROW-3516](https://issues.apache.org/jira/browse/ARROW-3516) - [C++] Use unsigned type for difference of pointers in parallel\_memcpy
+* [ARROW-3517](https://issues.apache.org/jira/browse/ARROW-3517) - [C++] MinGW 32bit build causes g++ segv
+* [ARROW-3524](https://issues.apache.org/jira/browse/ARROW-3524) - [C++] Fix compiler warnings from ARROW-3409 on clang-6
+* [ARROW-3527](https://issues.apache.org/jira/browse/ARROW-3527) - [R] Unused variables in R-package C++ code
+* [ARROW-3528](https://issues.apache.org/jira/browse/ARROW-3528) - [R] Typo in R documentation
+* [ARROW-3535](https://issues.apache.org/jira/browse/ARROW-3535) - [Python] pip install tensorflow install too new numpy in manylinux1 build
+* [ARROW-3541](https://issues.apache.org/jira/browse/ARROW-3541) - [Rust] Update BufferBuilder to allow for new bit-packed BooleanArray
+* [ARROW-3544](https://issues.apache.org/jira/browse/ARROW-3544) - [Gandiva] Populate function registry in multiple compilation units to mitigate long compile times in release mode
+* [ARROW-3549](https://issues.apache.org/jira/browse/ARROW-3549) - [Rust] Replace i64 with usize for some bit utility functions
+* [ARROW-3573](https://issues.apache.org/jira/browse/ARROW-3573) - [Rust] with\_bitset does not set valid bits correctly
+* [ARROW-3580](https://issues.apache.org/jira/browse/ARROW-3580) - [Gandiva][C++] Build error with g++ 8.2.0
+* [ARROW-3586](https://issues.apache.org/jira/browse/ARROW-3586) - [Python] Segmentation fault when converting empty table to pandas with categoricals
+* [ARROW-3598](https://issues.apache.org/jira/browse/ARROW-3598) - [Plasma] plasma\_store\_server fails linking with GPU enabled
+* [ARROW-3613](https://issues.apache.org/jira/browse/ARROW-3613) - [Go] Resize does not correctly update the length
+* [ARROW-3614](https://issues.apache.org/jira/browse/ARROW-3614) - [R] Handle Type::TIMESTAMP from Arrow to R
+* [ARROW-3634](https://issues.apache.org/jira/browse/ARROW-3634) - [GLib] cuda.cpp compile error
+* [ARROW-3658](https://issues.apache.org/jira/browse/ARROW-3658) - [Rust] validation of offsets buffer is incorrect for \`List<T>\`
+* [ARROW-3670](https://issues.apache.org/jira/browse/ARROW-3670) - [C++] Use FindBacktrace to find execinfo.h support
+* [ARROW-3687](https://issues.apache.org/jira/browse/ARROW-3687) - [Rust] Anything measuring array slots should be \`usize\`
+* [ARROW-3698](https://issues.apache.org/jira/browse/ARROW-3698) - [C++] Segmentation fault when using a large table in Gandiva
+* [ARROW-3700](https://issues.apache.org/jira/browse/ARROW-3700) - [C++] CSV parser should allow ignoring empty lines
+* [ARROW-3703](https://issues.apache.org/jira/browse/ARROW-3703) - [Python] DataFrame.to\_parquet crashes if datetime column has time zones
+* [ARROW-3704](https://issues.apache.org/jira/browse/ARROW-3704) - [Gandiva] Can't build with g++ 8.2.0
+* [ARROW-3707](https://issues.apache.org/jira/browse/ARROW-3707) - [C++] test failure with zstd 1.3.7
+* [ARROW-3711](https://issues.apache.org/jira/browse/ARROW-3711) - [C++] Don't pass CXX\_FLAGS to C\_FLAGS
+* [ARROW-3712](https://issues.apache.org/jira/browse/ARROW-3712) - [CI] License check regression (RAT failure)
+* [ARROW-3715](https://issues.apache.org/jira/browse/ARROW-3715) - [C++] gflags\_ep fails to build with CMake 3.13
+* [ARROW-3716](https://issues.apache.org/jira/browse/ARROW-3716) - [R] Missing cases for ChunkedArray conversion
+* [ARROW-3728](https://issues.apache.org/jira/browse/ARROW-3728) - [Python] Merging Parquet Files - Pandas Meta in Schema Mismatch
+* [ARROW-3734](https://issues.apache.org/jira/browse/ARROW-3734) - [C++] Linking static zstd library fails on Arch x86-64
+* [ARROW-3740](https://issues.apache.org/jira/browse/ARROW-3740) - [C++] Calling ArrayBuilder::Resize with length smaller than current appended length results in invalid state
+* [ARROW-3742](https://issues.apache.org/jira/browse/ARROW-3742) - Fix pyarrow.types & gandiva cython bindings
+* [ARROW-3745](https://issues.apache.org/jira/browse/ARROW-3745) - [C++] CMake passes static libraries multiple times to linker
+* [ARROW-3754](https://issues.apache.org/jira/browse/ARROW-3754) - [Packaging] Zstd configure error on linux package builds
+* [ARROW-3756](https://issues.apache.org/jira/browse/ARROW-3756) - [CI/Docker/Java] Java tests are failing in docker-compose setup
+* [ARROW-3762](https://issues.apache.org/jira/browse/ARROW-3762) - [C++] Parquet arrow::Table reads error when overflowing capacity of BinaryArray
+* [ARROW-3765](https://issues.apache.org/jira/browse/ARROW-3765) - [Gandiva] Segfault when the validity bitmap has not been allocated
+* [ARROW-3766](https://issues.apache.org/jira/browse/ARROW-3766) - [Python] pa.Table.from\_pandas doesn't use schema ordering
+* [ARROW-3768](https://issues.apache.org/jira/browse/ARROW-3768) - [Python] set classpath to hdfs not hadoop executable
+* [ARROW-3790](https://issues.apache.org/jira/browse/ARROW-3790) - [C++] Signed to unsigned integer cast yields incorrect results when type sizes are the same
+* [ARROW-3792](https://issues.apache.org/jira/browse/ARROW-3792) - [Python] Segmentation fault when writing empty RecordBatches to Parquet
+* [ARROW-3793](https://issues.apache.org/jira/browse/ARROW-3793) - [C++] TestScalarAppendUnsafe is not testing unsafe appends
+* [ARROW-3797](https://issues.apache.org/jira/browse/ARROW-3797) - [Rust] BinaryArray::value\_offset incorrect in offset case
+* [ARROW-3805](https://issues.apache.org/jira/browse/ARROW-3805) - [Gandiva] handle null validity bitmap in if-else expressions
+* [ARROW-3831](https://issues.apache.org/jira/browse/ARROW-3831) - [C++] arrow::util::Codec::Decompress() doesn't return decompressed data size
+* [ARROW-3835](https://issues.apache.org/jira/browse/ARROW-3835) - [C++] arrow::io::CompressedOutputStream::raw() impementation is missing
+* [ARROW-3837](https://issues.apache.org/jira/browse/ARROW-3837) - [C++] gflags link errors on Windows
+* [ARROW-3866](https://issues.apache.org/jira/browse/ARROW-3866) - [Python] Column metadata is not transferred to tables in pyarrow
+* [ARROW-3874](https://issues.apache.org/jira/browse/ARROW-3874) - [Gandiva] Cannot build: LLVM not detected correctly
+* [ARROW-3879](https://issues.apache.org/jira/browse/ARROW-3879) - [C++] cuda-test failure
+* [ARROW-3888](https://issues.apache.org/jira/browse/ARROW-3888) - [C++] Compilation warnings with gcc 7.3.0
+* [ARROW-3889](https://issues.apache.org/jira/browse/ARROW-3889) - [Python] creating schema with invalid paramaters causes segmanetation fault
+* [ARROW-3890](https://issues.apache.org/jira/browse/ARROW-3890) - [Python] Creating Array with explicit string type fails on Python 2.7
+* [ARROW-3894](https://issues.apache.org/jira/browse/ARROW-3894) - [Python] Error reading IPC file with no record batches
+* [ARROW-3898](https://issues.apache.org/jira/browse/ARROW-3898) - parquet-arrow example has compilation errors
+* [ARROW-3920](https://issues.apache.org/jira/browse/ARROW-3920) - Plasma reference counting not properly done in TensorFlow custom operator.
+* [ARROW-3931](https://issues.apache.org/jira/browse/ARROW-3931) - Make possible to build regardless of LANG
+* [ARROW-3936](https://issues.apache.org/jira/browse/ARROW-3936) - Add \_O\_NOINHERIT to the file open flags on Windows
+* [ARROW-3937](https://issues.apache.org/jira/browse/ARROW-3937) - [Rust] Rust nightly build is failing
+* [ARROW-3940](https://issues.apache.org/jira/browse/ARROW-3940) - [Python/Documentation] Add required packages to the development instruction
+* [ARROW-3941](https://issues.apache.org/jira/browse/ARROW-3941) - [R] RecordBatchStreamReader$schema
+* [ARROW-3942](https://issues.apache.org/jira/browse/ARROW-3942) - [R] Feather api fixes
+* [ARROW-3953](https://issues.apache.org/jira/browse/ARROW-3953) - Compat with pandas 0.24 rename of MultiIndex labels -> codes
+* [ARROW-3955](https://issues.apache.org/jira/browse/ARROW-3955) - [GLib] Add (transfer full) to free when no longer needed
+* [ARROW-3957](https://issues.apache.org/jira/browse/ARROW-3957) - [Python] Better error message when user connects to HDFS cluster with wrong port
+* [ARROW-3961](https://issues.apache.org/jira/browse/ARROW-3961) - [Python/Documentation] Fix wrong path in the pyarrow README
+* [ARROW-3969](https://issues.apache.org/jira/browse/ARROW-3969) - [Rust] CI build broken because rustfmt not available on nightly toolchain
+* [ARROW-3976](https://issues.apache.org/jira/browse/ARROW-3976) - [Ruby] Homebrew donation solicitation on CLI breaking CI builds
+* [ARROW-3977](https://issues.apache.org/jira/browse/ARROW-3977) - [Gandiva] gandiva cpp tests not running in CI
+* [ARROW-3979](https://issues.apache.org/jira/browse/ARROW-3979) - [Gandiva] fix all valgrind reported errors
+* [ARROW-3980](https://issues.apache.org/jira/browse/ARROW-3980) - [C++] Fix CRTP use in json-simple.cc
+* [ARROW-3989](https://issues.apache.org/jira/browse/ARROW-3989) - [Rust] CSV reader should handle case sensitivity for boolean values
+* [ARROW-3996](https://issues.apache.org/jira/browse/ARROW-3996) - [C++] Insufficient description on build
+* [ARROW-4008](https://issues.apache.org/jira/browse/ARROW-4008) - [C++] Integration test executable failure
+* [ARROW-4011](https://issues.apache.org/jira/browse/ARROW-4011) - [Gandiva] Refer irhelpers.bc in build directory
+* [ARROW-4019](https://issues.apache.org/jira/browse/ARROW-4019) - [C++] Fix coverity issues
+* [ARROW-4033](https://issues.apache.org/jira/browse/ARROW-4033) - [C++] thirdparty/download\_dependencies.sh uses tools or options not available in older Linuxes
+* [ARROW-4034](https://issues.apache.org/jira/browse/ARROW-4034) - [Ruby] Interface for FileOutputStream doesn't respect append=True
+* [ARROW-4041](https://issues.apache.org/jira/browse/ARROW-4041) - [CI] Python 2.7 run uses Python 3.6
+* [ARROW-4049](https://issues.apache.org/jira/browse/ARROW-4049) - [C++] Arrow never use glog even though glog is linked.
+* [ARROW-4052](https://issues.apache.org/jira/browse/ARROW-4052) - [C++] Linker errors with glog and gflags
+* [ARROW-4053](https://issues.apache.org/jira/browse/ARROW-4053) - [Python/Integration] HDFS Tests failing with I/O operation on closed file
+* [ARROW-4055](https://issues.apache.org/jira/browse/ARROW-4055) - [Python] Fails to convert pytz.utc with versions 2018.3 and earlier
+* [ARROW-4058](https://issues.apache.org/jira/browse/ARROW-4058) - [C++] arrow-io-hdfs-test fails when run against HDFS cluster from docker-compose
+* [ARROW-4065](https://issues.apache.org/jira/browse/ARROW-4065) - [C++] arrowTargets.cmake is broken
+* [ARROW-4066](https://issues.apache.org/jira/browse/ARROW-4066) - Instructions to create Sphinx documentation
+* [ARROW-4070](https://issues.apache.org/jira/browse/ARROW-4070) - [C++] ARROW\_BOOST\_VENDORED doesn't work properly with ninja build
+* [ARROW-4073](https://issues.apache.org/jira/browse/ARROW-4073) - [Python] Parquet test failures on AppVeyor
+* [ARROW-4074](https://issues.apache.org/jira/browse/ARROW-4074) - [Python] test\_get\_library\_dirs\_win32 fails if libraries installed someplace different from conda or wheel packages
+* [ARROW-4078](https://issues.apache.org/jira/browse/ARROW-4078) - [CI] Run Travis job where documentation is built when docs/ is changed
+* [ARROW-4088](https://issues.apache.org/jira/browse/ARROW-4088) - [Python] Table.from\_batches() fails when passed a schema with metadata
+* [ARROW-4089](https://issues.apache.org/jira/browse/ARROW-4089) - [Plasma] The tutorial is wrong regarding the parameter type of PlasmaClient.Create
+* [ARROW-4101](https://issues.apache.org/jira/browse/ARROW-4101) - [C++] Binary identity cast not implemented
+* [ARROW-4106](https://issues.apache.org/jira/browse/ARROW-4106) - [Python] Tests fail to run because hypothesis update broke its API
+* [ARROW-4109](https://issues.apache.org/jira/browse/ARROW-4109) - [Packaging] Missing glog dependency from arrow-cpp conda recipe
+* [ARROW-4113](https://issues.apache.org/jira/browse/ARROW-4113) - [R] Version number patch broke build
+* [ARROW-4114](https://issues.apache.org/jira/browse/ARROW-4114) - [C++][DOCUMENTATION]
+* [ARROW-4115](https://issues.apache.org/jira/browse/ARROW-4115) - [Gandiva] valgrind complains that boolean output data buffer has uninited data
+* [ARROW-4118](https://issues.apache.org/jira/browse/ARROW-4118) - [Python] Error with "asv run"
+* [ARROW-4125](https://issues.apache.org/jira/browse/ARROW-4125) - [Python] ASV benchmarks fail to run if Plasma extension is not built (e.g. on Windows)
+* [ARROW-4126](https://issues.apache.org/jira/browse/ARROW-4126) - [Go] offset not used when accessing boolean array
+* [ARROW-4128](https://issues.apache.org/jira/browse/ARROW-4128) - [C++][DOCUMENTATION] Update style guide to reflect some more exceptions
+* [ARROW-4130](https://issues.apache.org/jira/browse/ARROW-4130) - [Go] offset not used when accessing binary array
+* [ARROW-4134](https://issues.apache.org/jira/browse/ARROW-4134) - [Packaging] Properly setup timezone in docker tests to prevent ORC adapter's abort
+* [ARROW-4135](https://issues.apache.org/jira/browse/ARROW-4135) - [Python] Can't reload a pandas dataframe containing a list of datetime.time
+* [ARROW-4137](https://issues.apache.org/jira/browse/ARROW-4137) - [Rust] Move parquet code into a separate crate
+* [ARROW-4138](https://issues.apache.org/jira/browse/ARROW-4138) - [Python] setuptools\_scm customization does not work for versions above 0.9.0 on Windows
+* [ARROW-4147](https://issues.apache.org/jira/browse/ARROW-4147) - [JAVA] Reduce heap usage for variable width vectors
+* [ARROW-4149](https://issues.apache.org/jira/browse/ARROW-4149) - [CI/C++] Parquet test misses ZSTD compression codec in CMake 3.2 nightly builds
+* [ARROW-4157](https://issues.apache.org/jira/browse/ARROW-4157) - [C++] -Wdocumentation failures with clang 6.0 on Ubuntu 18.04
+* [ARROW-4171](https://issues.apache.org/jira/browse/ARROW-4171) - [Rust] fix parquet crate release version
+* [ARROW-4173](https://issues.apache.org/jira/browse/ARROW-4173) - JIRA library name is wrong in error message of dev/merge\_arrow\_pr.py
+* [ARROW-4178](https://issues.apache.org/jira/browse/ARROW-4178) - [C++] Fix TSan and UBSan errors
+* [ARROW-4179](https://issues.apache.org/jira/browse/ARROW-4179) - [Python] Tests crashing on all platforms in CI
+* [ARROW-4185](https://issues.apache.org/jira/browse/ARROW-4185) - [Rust] Appveyor builds are broken
+* [ARROW-4186](https://issues.apache.org/jira/browse/ARROW-4186) - [C++] BitmapWriters clobber the first byte when length=0
+* [ARROW-4188](https://issues.apache.org/jira/browse/ARROW-4188) - [Rust] There should be a README in the top level rust directory
+* [ARROW-4197](https://issues.apache.org/jira/browse/ARROW-4197) - [C++] Emscripten compiler fails building Arrow
+* [ARROW-4200](https://issues.apache.org/jira/browse/ARROW-4200) - [C++] conda\_env\_\* files cannot be used to create a fresh conda environment on Windows
+* [ARROW-4209](https://issues.apache.org/jira/browse/ARROW-4209) - [Gandiva] returning IR structs causes issues with windows
+* [ARROW-4215](https://issues.apache.org/jira/browse/ARROW-4215) - [GLib] Fix typos in documentation
+* [ARROW-4227](https://issues.apache.org/jira/browse/ARROW-4227) - [GLib] Field in composite data type returns wrong data type
+* [ARROW-4237](https://issues.apache.org/jira/browse/ARROW-4237) - [Packaging] Fix CMAKE\_INSTALL\_LIBDIR in release verification script
+* [ARROW-4238](https://issues.apache.org/jira/browse/ARROW-4238) - [Packaging] Fix RC version conflict between crossbow and rake
+* [ARROW-4246](https://issues.apache.org/jira/browse/ARROW-4246) - [Plasma][Python] PlasmaClient.list doesn't work with CUDA enabled Plasma
+* [ARROW-4256](https://issues.apache.org/jira/browse/ARROW-4256) - [Release] Update Windows verification script for 0.12 release
+* [ARROW-4258](https://issues.apache.org/jira/browse/ARROW-4258) - [Python] Safe cast fails from numpy float64 array with nans to integer
+* [ARROW-4260](https://issues.apache.org/jira/browse/ARROW-4260) - [Python] test\_serialize\_deserialize\_pandas is failing in multiple build entries
+
+[1]: https://www.apache.org/dyn/closer.cgi/arrow/arrow-0.12.0/
+[2]: https://www.apache.org/dyn/closer.cgi/arrow/arrow-0.12.0/binaries
+[3]: https://github.com/apache/arrow/releases/tag/apache-arrow-0.12.0
diff --git a/site/_release/index.md b/site/_release/index.md
index bd5fb1f452859..6a6b5df395f2f 100644
--- a/site/_release/index.md
+++ b/site/_release/index.md
@@ -26,6 +26,7 @@ limitations under the License.
 
 Navigate to the release page for downloads and the changelog.
 
+* [0.12.0 (20 January 2019)][15]
 * [0.11.1 (19 October 2018)][14]
 * [0.11.0 (8 October 2018)][13]
 * [0.10.0 (6 August 2018)][12]
@@ -55,3 +56,4 @@ Navigate to the release page for downloads and the changelog.
 [12]: {{ site.baseurl }}/release/0.10.0.html
 [13]: {{ site.baseurl }}/release/0.11.0.html
 [14]: {{ site.baseurl }}/release/0.11.1.html
+[15]: {{ site.baseurl }}/release/0.12.0.html
diff --git a/site/committers.html b/site/committers.html
index 43fd0a14f3389..36543e8ac6eca 100644
--- a/site/committers.html
+++ b/site/committers.html
@@ -195,7 +195,7 @@ <h2>Committers</h2>
 <td>Antoine Pitrou</td>
 <td>PMC</td>
 <td>apitrou</td>
-<td>Independent / Two Sigma</td>
+<td>Independent / Ursa Labs</td>
 </tr>
 <tr>
 <td>Andy Grove</td>
@@ -205,9 +205,27 @@ <h2>Committers</h2>
 </tr>
 <tr>
 <td>Krisztián Szűcs</td>
-<td>Committer</td>
+<td>PMC</td>
 <td>kszucs</td>
-<td>Independent / Two Sigma</td>
+<td>Ursa Labs</td>
+</tr>
+<tr>
+<td>Yosuke Shiro</td>
+<td>Committer</td>
+<td>shiro</td>
+<td>Red Data Tools</td>
+</tr>
+<tr>
+<td>Romain Francois</td>
+<td>Committer</td>
+<td>romainfrancois</td>
+<td>RStudio</td>
+</tr>
+<tr>
+<td>Sebastien Binet</td>
+<td>Committer</td>
+<td>sbinet</td>
+<td>CERN</td>
 </tr>
 </tbody></table>
 
diff --git a/site/img/arrow-r-spark-collecting.png b/site/img/arrow-r-spark-collecting.png
new file mode 100644
index 0000000000000..0cfd523e65907
Binary files /dev/null and b/site/img/arrow-r-spark-collecting.png differ
diff --git a/site/img/arrow-r-spark-copying.png b/site/img/arrow-r-spark-copying.png
new file mode 100644
index 0000000000000..45a9051a0dff5
Binary files /dev/null and b/site/img/arrow-r-spark-copying.png differ
diff --git a/site/img/arrow-r-spark-transforming.png b/site/img/arrow-r-spark-transforming.png
new file mode 100644
index 0000000000000..b81f718821292
Binary files /dev/null and b/site/img/arrow-r-spark-transforming.png differ
diff --git a/site/index.html b/site/index.html
index bcfa972c0c634..4d5995ac54a39 100644
--- a/site/index.html
+++ b/site/index.html
@@ -17,7 +17,7 @@ <h5>
   <a href="{{ site.baseurl }}/blog/"><strong>See Latest News</strong></a>
 </h5>
 <p>
-  Apache Arrow is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. It also provides computational libraries and zero-copy streaming messaging and interprocess communication. Languages currently supported include C, C++, Java, JavaScript, Python, and Ruby.
+  Apache Arrow is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. It also provides computational libraries and zero-copy streaming messaging and interprocess communication. Languages currently supported include C, C++, C#, Go, Java, JavaScript, MATLAB, Python, R, Ruby, and Rust.
 </p>
 <hr />
 <div class="row">
@@ -28,7 +28,7 @@ <h2 class="mt-3">Fast</h2>
   </div>
   <div class="col-lg-4">
       <h2 class="mt-3">Flexible</h2>
-      <p>Arrow acts as a new high-performance interface between various systems. It is also focused on supporting a wide variety of industry-standard programming languages. Java, C, C++, Python, Ruby, and JavaScript implementations are in progress and more languages are welcome.
+      <p>Arrow acts as a new high-performance interface between various systems. It is also focused on supporting a wide variety of industry-standard programming languages. C, C++, C#, Go, Java, JavaScript, MATLAB, Python, R, Ruby, and Rust implementations are in progress and more languages are welcome.
       </p>
   </div>
   <div class="col-lg-4">
diff --git a/site/install.md b/site/install.md
index d72323d6dd099..71377caff00ef 100644
--- a/site/install.md
+++ b/site/install.md
@@ -44,8 +44,8 @@ See the [release notes][10] for more about what's new.
 We have provided binary conda packages on [conda-forge][5] for the following
 platforms:
 
-* Linux and macOS (Python 2.7, 3.5, and 3.6)
-* Windows (Python 3.5 and 3.6)
+* Linux and macOS (Python 2.7, 3.6 and 3.7)
+* Windows (Python 3.6 and 3.7)
 
 Install them with:
 
@@ -78,10 +78,11 @@ Apache Arrow GLib (C). Here are supported platforms:
 * Ubuntu 14.04 LTS
 * Ubuntu 16.04 LTS
 * Ubuntu 18.04 LTS
+* Ubuntu 18.10
 * CentOS 6
 * CentOS 7
 
-Debian GNU/Linux and Ubuntu:
+Debian GNU/Linux and Ubuntu 18.04 LTS or later:
 
 ```shell
 sudo apt update
@@ -94,8 +95,33 @@ APT_LINE
 sudo apt update
 sudo apt install -y -V libarrow-dev # For C++
 sudo apt install -y -V libarrow-glib-dev # For GLib (C)
+sudo apt install -y -V libplasma-dev # For Plasma C++
+sudo apt install -y -V libplasma-glib-dev # For Plasma GLib (C)
+sudo apt install -y -V libgandiva-dev # For Gandiva C++
+sudo apt install -y -V libgandiva-glib-dev # For Gandiva GLib (C)
 sudo apt install -y -V libparquet-dev # For Apache Parquet C++
-sudo apt install -y -V libparquet-glib-dev # For Parquet GLib (C)
+sudo apt install -y -V libparquet-glib-dev # For Apache Parquet GLib (C)
+```
+
+Debian Ubuntu 16.04 LTS or earlier:
+
+```shell
+sudo apt update
+sudo apt install -y -V apt-transport-https lsb-release
+curl https://dist.apache.org/repos/dist/dev/arrow/KEYS | sudo apt-key add -
+sudo tee /etc/apt/sources.list.d/apache-arrow.list <<APT_LINE
+deb [arch=amd64] https://dl.bintray.com/apache/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/ $(lsb_release --codename --short) main
+deb-src https://dl.bintray.com/apache/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/ $(lsb_release --codename --short) main
+APT_LINE
+sudo apt update
+sudo apt install -y -V libarrow-dev # For C++
+sudo apt install -y -V libarrow-glib-dev # For GLib (C)
+sudo apt install -y -V libplasma-dev # For Plasma C++
+sudo apt install -y -V libplasma-glib-dev # For Plasma GLib (C)
+sudo apt install -y -V libgandiva-dev # For Gandiva C++
+sudo apt install -y -V libgandiva-glib-dev # For Gandiva GLib (C)
+sudo apt install -y -V libparquet-dev # For Apache Parquet C++
+sudo apt install -y -V libparquet-glib-dev # For Apache Parquet GLib (C)
 ```
 
 CentOS:
@@ -119,10 +145,10 @@ sudo yum install -y --enablerepo=epel parquet-glib-devel # For Parquet GLib (C)
 [1]: {{site.data.versions['current'].mirrors}}
 [2]: {{site.data.versions['current'].github-tag-link}}
 [4]: {{site.data.versions['current'].java-artifacts}}
-[5]: http://conda-forge.github.io
+[5]: https://conda-forge.github.io
 [6]: {{site.data.versions['current'].mirrors-tar}}
 [10]: {{site.data.versions['current'].release-notes}}
-[11]: http://www.apache.org/dist/arrow/KEYS
+[11]: https://www.apache.org/dist/arrow/KEYS
 [12]: https://www.apache.org/dyn/closer.cgi#verify
 [13]: {{site.data.versions['current'].asc}}
 [14]: {{site.data.versions['current'].sha256}}